diff options
Diffstat (limited to 'apps')
-rw-r--r-- | apps/plugins/lib/gray_core.c | 501 | ||||
-rw-r--r-- | apps/plugins/lib/gray_draw.c | 494 |
2 files changed, 471 insertions, 524 deletions
diff --git a/apps/plugins/lib/gray_core.c b/apps/plugins/lib/gray_core.c index 809e88dba1..413b66c65d 100644 --- a/apps/plugins/lib/gray_core.c +++ b/apps/plugins/lib/gray_core.c | |||
@@ -348,7 +348,7 @@ int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size, | |||
348 | long plane_size, buftaken; | 348 | long plane_size, buftaken; |
349 | unsigned data; | 349 | unsigned data; |
350 | #ifndef SIMULATOR | 350 | #ifndef SIMULATOR |
351 | int j; | 351 | int j, bitfill; |
352 | #endif | 352 | #endif |
353 | 353 | ||
354 | _gray_rb = newrb; | 354 | _gray_rb = newrb; |
@@ -439,6 +439,7 @@ int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size, | |||
439 | _gray_info.cur_plane = 0; | 439 | _gray_info.cur_plane = 0; |
440 | _gray_info.plane_size = plane_size; | 440 | _gray_info.plane_size = plane_size; |
441 | _gray_info.plane_data = gbuf; | 441 | _gray_info.plane_data = gbuf; |
442 | _gray_rb->memset(gbuf, 0, depth * plane_size); | ||
442 | gbuf += depth * plane_size; | 443 | gbuf += depth * plane_size; |
443 | _gray_info.bitpattern = (unsigned long *)gbuf; | 444 | _gray_info.bitpattern = (unsigned long *)gbuf; |
444 | 445 | ||
@@ -449,7 +450,8 @@ int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size, | |||
449 | i >>= 1; | 450 | i >>= 1; |
450 | j--; | 451 | j--; |
451 | } | 452 | } |
452 | _gray_info.randmask = 0xFFu >> j; | 453 | _gray_info.randmask = 0xFFu >> j; |
454 | bitfill = (-depth) & 7; | ||
453 | 455 | ||
454 | /* Precalculate the bit patterns for all possible pixel values */ | 456 | /* Precalculate the bit patterns for all possible pixel values */ |
455 | for (i = 0; i <= depth; i++) | 457 | for (i = 0; i <= depth; i++) |
@@ -469,7 +471,7 @@ int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size, | |||
469 | } | 471 | } |
470 | /* now the lower <depth> bits contain the pattern */ | 472 | /* now the lower <depth> bits contain the pattern */ |
471 | 473 | ||
472 | _gray_info.bitpattern[i] = pattern; | 474 | _gray_info.bitpattern[i] = pattern << bitfill; |
473 | } | 475 | } |
474 | #endif | 476 | #endif |
475 | 477 | ||
@@ -797,101 +799,93 @@ void gray_update_rect(int x, int y, int width, int height) | |||
797 | "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ | 799 | "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ |
798 | "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ | 800 | "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ |
799 | 801 | ||
802 | "sub r0, %[dpth], #1 \n" /** shift out unused low bytes **/ | ||
803 | "and r0, r0, #7 \n" | ||
804 | "add pc, pc, r0, lsl #2 \n" /* jump into shift streak */ | ||
805 | "mov r8, r8, lsr #8 \n" /* r8: never reached */ | ||
806 | "mov r7, r7, lsr #8 \n" | ||
807 | "mov r6, r6, lsr #8 \n" | ||
808 | "mov r5, r5, lsr #8 \n" | ||
809 | "mov r4, r4, lsr #8 \n" | ||
810 | "mov r3, r3, lsr #8 \n" | ||
811 | "mov r2, r2, lsr #8 \n" | ||
812 | "mov r1, r1, lsr #8 \n" | ||
813 | |||
800 | "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ | 814 | "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ |
801 | "ands %[mask], %[mask], #0xff \n" | 815 | "ands %[mask], %[mask], #0xff \n" |
802 | "beq .ur_sloop \n" /* short loop if no bits to keep */ | 816 | "beq .ur_sstart \n" /* short loop if no bits to keep */ |
803 | |||
804 | ".ur_floop: \n" /** full loop (bits to keep)**/ | ||
805 | "cmp %[dpth], #8 \n" /* 8 planes or more left? */ | ||
806 | "bhs .ur_f8 \n" | ||
807 | 817 | ||
808 | "mul r0, %[psiz], %[dpth] \n" /* point behind the last plane */ | 818 | "ldrb r0, [pc, r0] \n" /* jump into full loop */ |
809 | "add %[addr], %[addr], r0 \n" /* for this round */ | ||
810 | |||
811 | "ldrb r0, [pc, %[dpth]] \n" /* jump into streak */ | ||
812 | "add pc, pc, r0 \n" | 819 | "add pc, pc, r0 \n" |
813 | ".ur_ftable: \n" | 820 | ".ur_ftable: \n" |
814 | ".byte .ur_f0 - .ur_ftable - 4 \n" /* [jump tables are tricky] */ | 821 | ".byte .ur_f1 - .ur_ftable - 4 \n" /* [jump tables are tricky] */ |
815 | ".byte .ur_f1 - .ur_ftable - 4 \n" | ||
816 | ".byte .ur_f2 - .ur_ftable - 4 \n" | 822 | ".byte .ur_f2 - .ur_ftable - 4 \n" |
817 | ".byte .ur_f3 - .ur_ftable - 4 \n" | 823 | ".byte .ur_f3 - .ur_ftable - 4 \n" |
818 | ".byte .ur_f4 - .ur_ftable - 4 \n" | 824 | ".byte .ur_f4 - .ur_ftable - 4 \n" |
819 | ".byte .ur_f5 - .ur_ftable - 4 \n" | 825 | ".byte .ur_f5 - .ur_ftable - 4 \n" |
820 | ".byte .ur_f6 - .ur_ftable - 4 \n" | 826 | ".byte .ur_f6 - .ur_ftable - 4 \n" |
821 | ".byte .ur_f7 - .ur_ftable - 4 \n" | 827 | ".byte .ur_f7 - .ur_ftable - 4 \n" |
828 | ".byte .ur_f8 - .ur_ftable - 4 \n" | ||
822 | 829 | ||
830 | ".ur_floop: \n" /** full loop (bits to keep)**/ | ||
823 | ".ur_f8: \n" | 831 | ".ur_f8: \n" |
824 | "add %[addr], %[addr], %[psiz], lsl #3 \n" | 832 | "ldrb r0, [%[addr]] \n" /* load old byte */ |
825 | /* Point behind the last plane for this round. Note: We're using the | ||
826 | * registers backwards in order to reuse the streak for the last round. | ||
827 | * Therefore we need to go thru the bitplanes backwards too, otherwise | ||
828 | * the bit order would be destroyed which results in more flicker. */ | ||
829 | "ldrb r0, [%[addr], -%[psiz]]! \n" /* load old byte */ | ||
830 | "and r0, r0, %[mask] \n" /* mask out replaced bits */ | 833 | "and r0, r0, %[mask] \n" /* mask out replaced bits */ |
831 | "orr r0, r0, r8 \n" /* set new bits */ | 834 | "orr r0, r0, r1 \n" /* set new bits */ |
832 | "strb r0, [%[addr]] \n" /* store byte */ | 835 | "strb r0, [%[addr]], %[psiz] \n" /* store byte */ |
833 | "mov r8, r8, lsr #8 \n" /* shift out used-up byte */ | 836 | "mov r1, r1, lsr #8 \n" /* shift out used-up byte */ |
834 | ".ur_f7: \n" | 837 | ".ur_f7: \n" |
835 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 838 | "ldrb r0, [%[addr]] \n" |
836 | "and r0, r0, %[mask] \n" | 839 | "and r0, r0, %[mask] \n" |
837 | "orr r0, r0, r7 \n" | 840 | "orr r0, r0, r2 \n" |
838 | "strb r0, [%[addr]] \n" | 841 | "strb r0, [%[addr]], %[psiz] \n" |
839 | "mov r7, r7, lsr #8 \n" | 842 | "mov r2, r2, lsr #8 \n" |
840 | ".ur_f6: \n" | 843 | ".ur_f6: \n" |
841 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 844 | "ldrb r0, [%[addr]] \n" |
842 | "and r0, r0, %[mask] \n" | 845 | "and r0, r0, %[mask] \n" |
843 | "orr r0, r0, r6 \n" | 846 | "orr r0, r0, r3 \n" |
844 | "strb r0, [%[addr]] \n" | 847 | "strb r0, [%[addr]], %[psiz] \n" |
845 | "mov r6, r6, lsr #8 \n" | 848 | "mov r3, r3, lsr #8 \n" |
846 | ".ur_f5: \n" | 849 | ".ur_f5: \n" |
847 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 850 | "ldrb r0, [%[addr]] \n" |
848 | "and r0, r0, %[mask] \n" | ||
849 | "orr r0, r0, r5 \n" | ||
850 | "strb r0, [%[addr]] \n" | ||
851 | "mov r5, r5, lsr #8 \n" | ||
852 | ".ur_f4: \n" | ||
853 | "ldrb r0, [%[addr], -%[psiz]]! \n" | ||
854 | "and r0, r0, %[mask] \n" | 851 | "and r0, r0, %[mask] \n" |
855 | "orr r0, r0, r4 \n" | 852 | "orr r0, r0, r4 \n" |
856 | "strb r0, [%[addr]] \n" | 853 | "strb r0, [%[addr]], %[psiz] \n" |
857 | "mov r4, r4, lsr #8 \n" | 854 | "mov r4, r4, lsr #8 \n" |
855 | ".ur_f4: \n" | ||
856 | "ldrb r0, [%[addr]] \n" | ||
857 | "and r0, r0, %[mask] \n" | ||
858 | "orr r0, r0, r5 \n" | ||
859 | "strb r0, [%[addr]], %[psiz] \n" | ||
860 | "mov r5, r5, lsr #8 \n" | ||
858 | ".ur_f3: \n" | 861 | ".ur_f3: \n" |
859 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 862 | "ldrb r0, [%[addr]] \n" |
860 | "and r0, r0, %[mask] \n" | 863 | "and r0, r0, %[mask] \n" |
861 | "orr r0, r0, r3 \n" | 864 | "orr r0, r0, r6 \n" |
862 | "strb r0, [%[addr]] \n" | 865 | "strb r0, [%[addr]], %[psiz] \n" |
863 | "mov r3, r3, lsr #8 \n" | 866 | "mov r6, r6, lsr #8 \n" |
864 | ".ur_f2: \n" | 867 | ".ur_f2: \n" |
865 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 868 | "ldrb r0, [%[addr]] \n" |
866 | "and r0, r0, %[mask] \n" | 869 | "and r0, r0, %[mask] \n" |
867 | "orr r0, r0, r2 \n" | 870 | "orr r0, r0, r7 \n" |
868 | "strb r0, [%[addr]] \n" | 871 | "strb r0, [%[addr]], %[psiz] \n" |
869 | "mov r2, r2, lsr #8 \n" | 872 | "mov r7, r7, lsr #8 \n" |
870 | ".ur_f1: \n" | 873 | ".ur_f1: \n" |
871 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 874 | "ldrb r0, [%[addr]] \n" |
872 | "and r0, r0, %[mask] \n" | 875 | "and r0, r0, %[mask] \n" |
873 | "orr r0, r0, r1 \n" | 876 | "orr r0, r0, r8 \n" |
874 | "strb r0, [%[addr]] \n" | 877 | "strb r0, [%[addr]], %[psiz] \n" |
875 | "mov r1, r1, lsr #8 \n" | 878 | "mov r8, r8, lsr #8 \n" |
876 | ".ur_f0: \n" | ||
877 | 879 | ||
878 | "add %[addr], %[addr], %[psiz], lsl #3 \n" /* correct address */ | ||
879 | "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ | 880 | "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ |
880 | "bhi .ur_floop \n" | 881 | "bhi .ur_floop \n" |
881 | 882 | ||
882 | "b .ur_end \n" | 883 | "b .ur_end \n" |
883 | 884 | ||
884 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | 885 | ".ur_sstart: \n" |
885 | "cmp %[dpth], #8 \n" /* 8 planes or more left? */ | 886 | "ldrb r0, [pc, r0] \n" /* jump into short loop*/ |
886 | "bhs .ur_s8 \n" | ||
887 | |||
888 | "mul r0, %[psiz], %[dpth] \n" /* point behind the last plane */ | ||
889 | "add %[addr], %[addr], r0 \n" /* for this round */ | ||
890 | |||
891 | "ldrb r0, [pc, %[dpth]] \n" /* jump into streak */ | ||
892 | "add pc, pc, r0 \n" | 887 | "add pc, pc, r0 \n" |
893 | ".ur_stable: \n" | 888 | ".ur_stable: \n" |
894 | ".byte .ur_s0 - .ur_stable - 4 \n" | ||
895 | ".byte .ur_s1 - .ur_stable - 4 \n" | 889 | ".byte .ur_s1 - .ur_stable - 4 \n" |
896 | ".byte .ur_s2 - .ur_stable - 4 \n" | 890 | ".byte .ur_s2 - .ur_stable - 4 \n" |
897 | ".byte .ur_s3 - .ur_stable - 4 \n" | 891 | ".byte .ur_s3 - .ur_stable - 4 \n" |
@@ -899,36 +893,34 @@ void gray_update_rect(int x, int y, int width, int height) | |||
899 | ".byte .ur_s5 - .ur_stable - 4 \n" | 893 | ".byte .ur_s5 - .ur_stable - 4 \n" |
900 | ".byte .ur_s6 - .ur_stable - 4 \n" | 894 | ".byte .ur_s6 - .ur_stable - 4 \n" |
901 | ".byte .ur_s7 - .ur_stable - 4 \n" | 895 | ".byte .ur_s7 - .ur_stable - 4 \n" |
896 | ".byte .ur_s8 - .ur_stable - 4 \n" | ||
902 | 897 | ||
898 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | ||
903 | ".ur_s8: \n" | 899 | ".ur_s8: \n" |
904 | "add %[addr], %[addr], %[psiz], lsl #3 \n" | 900 | "strb r1, [%[addr]], %[psiz] \n" /* store byte */ |
905 | /* Point behind the last plane for this round. See above. */ | 901 | "mov r1, r1, lsr #8 \n" /* shift out used-up byte */ |
906 | "strb r8, [%[addr], -%[psiz]]! \n" /* store byte */ | ||
907 | "mov r8, r8, lsr #8 \n" /* shift out used-up byte */ | ||
908 | ".ur_s7: \n" | 902 | ".ur_s7: \n" |
909 | "strb r7, [%[addr], -%[psiz]]! \n" | 903 | "strb r2, [%[addr]], %[psiz] \n" |
910 | "mov r7, r7, lsr #8 \n" | 904 | "mov r2, r2, lsr #8 \n" |
911 | ".ur_s6: \n" | 905 | ".ur_s6: \n" |
912 | "strb r6, [%[addr], -%[psiz]]! \n" | 906 | "strb r3, [%[addr]], %[psiz] \n" |
913 | "mov r6, r6, lsr #8 \n" | 907 | "mov r3, r3, lsr #8 \n" |
914 | ".ur_s5: \n" | 908 | ".ur_s5: \n" |
915 | "strb r5, [%[addr], -%[psiz]]! \n" | 909 | "strb r4, [%[addr]], %[psiz] \n" |
916 | "mov r5, r5, lsr #8 \n" | ||
917 | ".ur_s4: \n" | ||
918 | "strb r4, [%[addr], -%[psiz]]! \n" | ||
919 | "mov r4, r4, lsr #8 \n" | 910 | "mov r4, r4, lsr #8 \n" |
911 | ".ur_s4: \n" | ||
912 | "strb r5, [%[addr]], %[psiz] \n" | ||
913 | "mov r5, r5, lsr #8 \n" | ||
920 | ".ur_s3: \n" | 914 | ".ur_s3: \n" |
921 | "strb r3, [%[addr], -%[psiz]]! \n" | 915 | "strb r6, [%[addr]], %[psiz] \n" |
922 | "mov r3, r3, lsr #8 \n" | 916 | "mov r6, r6, lsr #8 \n" |
923 | ".ur_s2: \n" | 917 | ".ur_s2: \n" |
924 | "strb r2, [%[addr], -%[psiz]]! \n" | 918 | "strb r7, [%[addr]], %[psiz] \n" |
925 | "mov r2, r2, lsr #8 \n" | 919 | "mov r7, r7, lsr #8 \n" |
926 | ".ur_s1: \n" | 920 | ".ur_s1: \n" |
927 | "strb r1, [%[addr], -%[psiz]]! \n" | 921 | "strb r8, [%[addr]], %[psiz] \n" |
928 | "mov r1, r1, lsr #8 \n" | 922 | "mov r8, r8, lsr #8 \n" |
929 | ".ur_s0: \n" | ||
930 | 923 | ||
931 | "add %[addr], %[addr], %[psiz], lsl #3 \n" /* correct address */ | ||
932 | "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ | 924 | "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ |
933 | "bhi .ur_sloop \n" | 925 | "bhi .ur_sloop \n" |
934 | 926 | ||
@@ -956,7 +948,7 @@ void gray_update_rect(int x, int y, int width, int height) | |||
956 | { | 948 | { |
957 | unsigned char *addr, *end; | 949 | unsigned char *addr, *end; |
958 | unsigned mask = 0; | 950 | unsigned mask = 0; |
959 | unsigned test = 1; | 951 | unsigned test = 1 << ((-_gray_info.depth) & 7); |
960 | int i; | 952 | int i; |
961 | 953 | ||
962 | /* precalculate the bit patterns with random shifts | 954 | /* precalculate the bit patterns with random shifts |
@@ -1287,25 +1279,37 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1287 | "shlr r0 \n" | 1279 | "shlr r0 \n" |
1288 | "xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ | 1280 | "xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ |
1289 | 1281 | ||
1290 | "tst %[mask], %[mask] \n" | 1282 | "mov %[dpth], %[rx] \n" /** shift out unused low bytes **/ |
1291 | "bt .ur_sloop \n" /* short loop if nothing to keep */ | 1283 | "add #-1, %[rx] \n" |
1284 | "mov #7, r0 \n" | ||
1285 | "and r0, %[rx] \n" | ||
1286 | "mova .ur_pshift, r0 \n" | ||
1287 | "add %[rx], r0 \n" | ||
1288 | "add %[rx], r0 \n" | ||
1289 | "jmp @r0 \n" /* jump into shift streak */ | ||
1290 | "nop \n" | ||
1292 | 1291 | ||
1293 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ | 1292 | ".align 2 \n" |
1294 | "mov #8, r0 \n" | 1293 | ".ur_pshift: \n" |
1295 | "cmp/hs r0, %[dpth] \n" /* 8 planes or more left? */ | 1294 | "shlr8 r7 \n" |
1296 | "bt .ur_f8 \n" | 1295 | "shlr8 r6 \n" |
1296 | "shlr8 r5 \n" | ||
1297 | "shlr8 r4 \n" | ||
1298 | "shlr8 r3 \n" | ||
1299 | "shlr8 r2 \n" | ||
1300 | "shlr8 r1 \n" | ||
1301 | |||
1302 | "tst %[mask], %[mask] \n" | ||
1303 | "bt .ur_sstart \n" /* short loop if nothing to keep */ | ||
1297 | 1304 | ||
1298 | "mulu %[psiz], %[dpth] \n" | 1305 | "mova .ur_ftable, r0 \n" /* jump into full loop */ |
1299 | "mova .ur_ftable, r0 \n" | 1306 | "mov.b @(r0, %[rx]), %[rx] \n" |
1300 | "mov.b @(r0, %[dpth]), %[rx] \n" | ||
1301 | "add %[rx], r0 \n" | 1307 | "add %[rx], r0 \n" |
1302 | "sts macl, %[rx] \n" /* point behind the last plane.. */ | 1308 | "jmp @r0 \n" |
1303 | "jmp @r0 \n" /* jump into streak */ | 1309 | "nop \n" |
1304 | "add %[rx], %[addr] \n" /* ..for this round */ | 1310 | |
1305 | |||
1306 | ".align 2 \n" | 1311 | ".align 2 \n" |
1307 | ".ur_ftable: \n" | 1312 | ".ur_ftable: \n" |
1308 | ".byte .ur_f0 - .ur_ftable \n" | ||
1309 | ".byte .ur_f1 - .ur_ftable \n" | 1313 | ".byte .ur_f1 - .ur_ftable \n" |
1310 | ".byte .ur_f2 - .ur_ftable \n" | 1314 | ".byte .ur_f2 - .ur_ftable \n" |
1311 | ".byte .ur_f3 - .ur_ftable \n" | 1315 | ".byte .ur_f3 - .ur_ftable \n" |
@@ -1313,74 +1317,66 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1313 | ".byte .ur_f5 - .ur_ftable \n" | 1317 | ".byte .ur_f5 - .ur_ftable \n" |
1314 | ".byte .ur_f6 - .ur_ftable \n" | 1318 | ".byte .ur_f6 - .ur_ftable \n" |
1315 | ".byte .ur_f7 - .ur_ftable \n" | 1319 | ".byte .ur_f7 - .ur_ftable \n" |
1320 | ".byte .ur_f8 - .ur_ftable \n" | ||
1316 | 1321 | ||
1322 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ | ||
1317 | ".ur_f8: \n" | 1323 | ".ur_f8: \n" |
1318 | "mov %[psiz], %[rx] \n" | ||
1319 | "shll2 %[rx] \n" | ||
1320 | "add %[rx], %[rx] \n" | ||
1321 | "add %[rx], %[addr] \n" | ||
1322 | /* Point behind the last plane for this round. Note: We're using the | ||
1323 | * registers backwards in order to reuse the streak for the last round. | ||
1324 | * Therefore we need to go thru the bitplanes backwards too, otherwise | ||
1325 | * the bit order would be destroyed which results in more flicker. */ | ||
1326 | "sub %[psiz], %[addr] \n" | ||
1327 | "mov.b @%[addr], r0 \n" /* load old byte */ | 1324 | "mov.b @%[addr], r0 \n" /* load old byte */ |
1328 | "and %[mask], r0 \n" /* mask out replaced bits */ | 1325 | "and %[mask], r0 \n" /* mask out replaced bits */ |
1329 | "or r8, r0 \n" /* set new bits */ | 1326 | "or r1, r0 \n" /* set new bits */ |
1330 | "mov.b r0, @%[addr] \n" /* store byte */ | 1327 | "mov.b r0, @%[addr] \n" /* store byte */ |
1331 | "shlr8 r8 \n" /* shift out used-up byte */ | 1328 | "add %[psiz], %[addr] \n" |
1329 | "shlr8 r1 \n" /* shift out used-up byte */ | ||
1332 | ".ur_f7: \n" | 1330 | ".ur_f7: \n" |
1333 | "sub %[psiz], %[addr] \n" | ||
1334 | "mov.b @%[addr], r0 \n" | 1331 | "mov.b @%[addr], r0 \n" |
1335 | "and %[mask], r0 \n" | 1332 | "and %[mask], r0 \n" |
1336 | "or r7, r0 \n" | 1333 | "or r2, r0 \n" |
1337 | "mov.b r0, @%[addr] \n" | 1334 | "mov.b r0, @%[addr] \n" |
1338 | "shlr8 r7 \n" | 1335 | "add %[psiz], %[addr] \n" |
1336 | "shlr8 r2 \n" | ||
1339 | ".ur_f6: \n" | 1337 | ".ur_f6: \n" |
1340 | "sub %[psiz], %[addr] \n" | ||
1341 | "mov.b @%[addr], r0 \n" | 1338 | "mov.b @%[addr], r0 \n" |
1342 | "and %[mask], r0 \n" | 1339 | "and %[mask], r0 \n" |
1343 | "or r6, r0 \n" | 1340 | "or r3, r0 \n" |
1344 | "mov.b r0, @%[addr] \n" | 1341 | "mov.b r0, @%[addr] \n" |
1345 | "shlr8 r6 \n" | 1342 | "add %[psiz], %[addr] \n" |
1343 | "shlr8 r3 \n" | ||
1346 | ".ur_f5: \n" | 1344 | ".ur_f5: \n" |
1347 | "sub %[psiz], %[addr] \n" | ||
1348 | "mov.b @%[addr], r0 \n" | 1345 | "mov.b @%[addr], r0 \n" |
1349 | "and %[mask], r0 \n" | 1346 | "and %[mask], r0 \n" |
1350 | "or r5, r0 \n" | 1347 | "or r4, r0 \n" |
1351 | "mov.b r0, @%[addr] \n" | 1348 | "mov.b r0, @%[addr] \n" |
1352 | "shlr8 r5 \n" | 1349 | "add %[psiz], %[addr] \n" |
1350 | "shlr8 r4 \n" | ||
1353 | ".ur_f4: \n" | 1351 | ".ur_f4: \n" |
1354 | "sub %[psiz], %[addr] \n" | ||
1355 | "mov.b @%[addr], r0 \n" | 1352 | "mov.b @%[addr], r0 \n" |
1356 | "and %[mask], r0 \n" | 1353 | "and %[mask], r0 \n" |
1357 | "or r4, r0 \n" | 1354 | "or r5, r0 \n" |
1358 | "mov.b r0, @%[addr] \n" | 1355 | "mov.b r0, @%[addr] \n" |
1359 | "shlr8 r4 \n" | 1356 | "add %[psiz], %[addr] \n" |
1357 | "shlr8 r5 \n" | ||
1360 | ".ur_f3: \n" | 1358 | ".ur_f3: \n" |
1361 | "sub %[psiz], %[addr] \n" | ||
1362 | "mov.b @%[addr], r0 \n" | 1359 | "mov.b @%[addr], r0 \n" |
1363 | "and %[mask], r0 \n" | 1360 | "and %[mask], r0 \n" |
1364 | "or r3, r0 \n" | 1361 | "or r6, r0 \n" |
1365 | "mov.b r0, @%[addr] \n" | 1362 | "mov.b r0, @%[addr] \n" |
1366 | "shlr8 r3 \n" | 1363 | "add %[psiz], %[addr] \n" |
1364 | "shlr8 r6 \n" | ||
1367 | ".ur_f2: \n" | 1365 | ".ur_f2: \n" |
1368 | "sub %[psiz], %[addr] \n" | ||
1369 | "mov.b @%[addr], r0 \n" | 1366 | "mov.b @%[addr], r0 \n" |
1370 | "and %[mask], r0 \n" | 1367 | "and %[mask], r0 \n" |
1371 | "or r2, r0 \n" | 1368 | "or r7, r0 \n" |
1372 | "mov.b r0, @%[addr] \n" | 1369 | "mov.b r0, @%[addr] \n" |
1373 | "shlr8 r2 \n" | 1370 | "add %[psiz], %[addr] \n" |
1371 | "shlr8 r7 \n" | ||
1374 | ".ur_f1: \n" | 1372 | ".ur_f1: \n" |
1375 | "sub %[psiz], %[addr] \n" | ||
1376 | "mov.b @%[addr], r0 \n" | 1373 | "mov.b @%[addr], r0 \n" |
1377 | "and %[mask], r0 \n" | 1374 | "and %[mask], r0 \n" |
1378 | "or r1, r0 \n" | 1375 | "or r8, r0 \n" |
1379 | "mov.b r0, @%[addr] \n" | 1376 | "mov.b r0, @%[addr] \n" |
1380 | "shlr8 r1 \n" | 1377 | "add %[psiz], %[addr] \n" |
1381 | ".ur_f0: \n" | 1378 | "shlr8 r8 \n" |
1382 | 1379 | ||
1383 | "add %[rx], %[addr] \n" /* correct address */ | ||
1384 | "add #-8, %[dpth] \n" | 1380 | "add #-8, %[dpth] \n" |
1385 | "cmp/pl %[dpth] \n" /* next round if anything left */ | 1381 | "cmp/pl %[dpth] \n" /* next round if anything left */ |
1386 | "bt .ur_floop \n" | 1382 | "bt .ur_floop \n" |
@@ -1404,22 +1400,15 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1404 | ".ur_mask1: \n" | 1400 | ".ur_mask1: \n" |
1405 | ".long 0xAAAAAAAA \n" | 1401 | ".long 0xAAAAAAAA \n" |
1406 | 1402 | ||
1407 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | 1403 | ".ur_sstart: \n" |
1408 | "mov #8, r0 \n" | 1404 | "mova .ur_stable, r0 \n" /* jump into short loop */ |
1409 | "cmp/hs r0, %[dpth] \n" /* 8 planes or more left? */ | 1405 | "mov.b @(r0, %[rx]), %[rx] \n" |
1410 | "bt .ur_s8 \n" | ||
1411 | |||
1412 | "mulu %[psiz], %[dpth] \n" | ||
1413 | "mova .ur_stable, r0 \n" | ||
1414 | "mov.b @(r0, %[dpth]), %[rx] \n" | ||
1415 | "add %[rx], r0 \n" | 1406 | "add %[rx], r0 \n" |
1416 | "sts macl, %[rx] \n" /* point behind the last plane.. */ | 1407 | "jmp @r0 \n" |
1417 | "jmp @r0 \n" /* jump into streak */ | 1408 | "nop \n" |
1418 | "add %[rx], %[addr] \n" /* ..for this round */ | ||
1419 | 1409 | ||
1420 | ".align 2 \n" | 1410 | ".align 2 \n" |
1421 | ".ur_stable: \n" | 1411 | ".ur_stable: \n" |
1422 | ".byte .ur_s0 - .ur_stable \n" | ||
1423 | ".byte .ur_s1 - .ur_stable \n" | 1412 | ".byte .ur_s1 - .ur_stable \n" |
1424 | ".byte .ur_s2 - .ur_stable \n" | 1413 | ".byte .ur_s2 - .ur_stable \n" |
1425 | ".byte .ur_s3 - .ur_stable \n" | 1414 | ".byte .ur_s3 - .ur_stable \n" |
@@ -1427,47 +1416,42 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1427 | ".byte .ur_s5 - .ur_stable \n" | 1416 | ".byte .ur_s5 - .ur_stable \n" |
1428 | ".byte .ur_s6 - .ur_stable \n" | 1417 | ".byte .ur_s6 - .ur_stable \n" |
1429 | ".byte .ur_s7 - .ur_stable \n" | 1418 | ".byte .ur_s7 - .ur_stable \n" |
1419 | ".byte .ur_s8 - .ur_stable \n" | ||
1430 | 1420 | ||
1421 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | ||
1431 | ".ur_s8: \n" | 1422 | ".ur_s8: \n" |
1432 | "mov %[psiz], %[rx] \n" /* Point behind the last plane */ | 1423 | "mov.b r1, @%[addr] \n" /* store byte */ |
1433 | "shll2 %[rx] \n" /* for this round. */ | 1424 | "add %[psiz], %[addr] \n" |
1434 | "add %[rx], %[rx] \n" /* See above. */ | 1425 | "shlr8 r1 \n" /* shift out used-up byte */ |
1435 | "add %[rx], %[addr] \n" | ||
1436 | |||
1437 | "sub %[psiz], %[addr] \n" | ||
1438 | "mov.b r8, @%[addr] \n" /* store byte */ | ||
1439 | "shlr8 r8 \n" /* shift out used-up byte */ | ||
1440 | ".ur_s7: \n" | 1426 | ".ur_s7: \n" |
1441 | "sub %[psiz], %[addr] \n" | 1427 | "mov.b r2, @%[addr] \n" |
1442 | "mov.b r7, @%[addr] \n" | 1428 | "add %[psiz], %[addr] \n" |
1443 | "shlr8 r7 \n" | 1429 | "shlr8 r2 \n" |
1444 | ".ur_s6: \n" | 1430 | ".ur_s6: \n" |
1445 | "sub %[psiz], %[addr] \n" | 1431 | "mov.b r3, @%[addr] \n" |
1446 | "mov.b r6, @%[addr] \n" | 1432 | "add %[psiz], %[addr] \n" |
1447 | "shlr8 r6 \n" | 1433 | "shlr8 r3 \n" |
1448 | ".ur_s5: \n" | 1434 | ".ur_s5: \n" |
1449 | "sub %[psiz], %[addr] \n" | ||
1450 | "mov.b r5, @%[addr] \n" | ||
1451 | "shlr8 r5 \n" | ||
1452 | ".ur_s4: \n" | ||
1453 | "sub %[psiz], %[addr] \n" | ||
1454 | "mov.b r4, @%[addr] \n" | 1435 | "mov.b r4, @%[addr] \n" |
1436 | "add %[psiz], %[addr] \n" | ||
1455 | "shlr8 r4 \n" | 1437 | "shlr8 r4 \n" |
1438 | ".ur_s4: \n" | ||
1439 | "mov.b r5, @%[addr] \n" | ||
1440 | "add %[psiz], %[addr] \n" | ||
1441 | "shlr8 r5 \n" | ||
1456 | ".ur_s3: \n" | 1442 | ".ur_s3: \n" |
1457 | "sub %[psiz], %[addr] \n" | 1443 | "mov.b r6, @%[addr] \n" |
1458 | "mov.b r3, @%[addr] \n" | 1444 | "add %[psiz], %[addr] \n" |
1459 | "shlr8 r3 \n" | 1445 | "shlr8 r6 \n" |
1460 | ".ur_s2: \n" | 1446 | ".ur_s2: \n" |
1461 | "sub %[psiz], %[addr] \n" | 1447 | "mov.b r7, @%[addr] \n" |
1462 | "mov.b r2, @%[addr] \n" | 1448 | "add %[psiz], %[addr] \n" |
1463 | "shlr8 r2 \n" | 1449 | "shlr8 r7 \n" |
1464 | ".ur_s1: \n" | 1450 | ".ur_s1: \n" |
1465 | "sub %[psiz], %[addr] \n" | 1451 | "mov.b r8, @%[addr] \n" |
1466 | "mov.b r1, @%[addr] \n" | 1452 | "add %[psiz], %[addr] \n" |
1467 | "shlr8 r1 \n" | 1453 | "shlr8 r8 \n" |
1468 | ".ur_s0: \n" | ||
1469 | 1454 | ||
1470 | "add %[rx], %[addr] \n" /* correct address */ | ||
1471 | "add #-8, %[dpth] \n" | 1455 | "add #-8, %[dpth] \n" |
1472 | "cmp/pl %[dpth] \n" /* next round if anything left */ | 1456 | "cmp/pl %[dpth] \n" /* next round if anything left */ |
1473 | "bt .ur_sloop \n" | 1457 | "bt .ur_sloop \n" |
@@ -1677,172 +1661,163 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1677 | "move.l %%a0, %%d5 \n" | 1661 | "move.l %%a0, %%d5 \n" |
1678 | "eor.l %%d5, %%d0 \n" | 1662 | "eor.l %%d5, %%d0 \n" |
1679 | "and.l #0xAAAAAAAA, %%d0 \n" | 1663 | "and.l #0xAAAAAAAA, %%d0 \n" |
1680 | "eor.l %%d0, %%d5 \n" | 1664 | "eor.l %%d0, %%d5 \n" /* (a0 = ...h0g0f0e0d0c0b0a0) */ |
1681 | "move.l %%d5, %%a0 \n" /* a0 = ...h0g0f0e0d0c0b0a0 */ | 1665 | /* move.l %%d5, %%a0 */ /* but keep in d5 for shift streak */ |
1682 | "lsr.l #1, %%d0 \n" | 1666 | "lsr.l #1, %%d0 \n" |
1683 | "eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */ | 1667 | "eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */ |
1668 | |||
1669 | "move.l %[dpth], %%d0 \n" /** shift out unused low bytes **/ | ||
1670 | "subq.l #1, %%d0 \n" | ||
1671 | "and.l #7, %%d0 \n" | ||
1672 | "move.l %%d0, %%a0 \n" | ||
1673 | "move.l %[ax], %%d0 \n" /* all data in D registers */ | ||
1674 | "jmp (2, %%pc, %%a0:l:2) \n" /* jump into shift streak */ | ||
1675 | "lsr.l #8, %%d2 \n" | ||
1676 | "lsr.l #8, %%d3 \n" | ||
1677 | "lsr.l #8, %%d4 \n" | ||
1678 | "lsr.l #8, %%d0 \n" | ||
1679 | "lsr.l #8, %%d6 \n" | ||
1680 | "lsr.l #8, %%d7 \n" | ||
1681 | "lsr.l #8, %%d5 \n" | ||
1682 | "move.l %%d0, %[ax] \n" /* put the 2 extra words back.. */ | ||
1683 | "move.l %%a0, %%d0 \n" /* keep the value for later */ | ||
1684 | "move.l %%d5, %%a0 \n" /* ..into their A registers */ | ||
1684 | 1685 | ||
1685 | "tst.l %[mask] \n" | 1686 | "tst.l %[mask] \n" |
1686 | "jeq .ur_sloop \n" /* short loop if nothing to keep */ | 1687 | "jeq .ur_sstart \n" /* short loop if nothing to keep */ |
1687 | 1688 | ||
1688 | "move.l %[mask], %%d5 \n" /* need mask in data reg. */ | 1689 | "move.l %[mask], %%d5 \n" /* need mask in data reg. */ |
1689 | "move.l %%d1, %[mask] \n" /* free d1 as working reg. */ | 1690 | "move.l %%d1, %[mask] \n" /* free d1 as working reg. */ |
1690 | 1691 | ||
1691 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ | 1692 | "jmp (2, %%pc, %%d0:l:2) \n" /* jump into full loop */ |
1692 | "cmp.l #8, %[dpth] \n" /* 8 planes or more left? */ | 1693 | "bra.s .ur_f1 \n" |
1693 | "bhs.s .ur_f8 \n" | ||
1694 | |||
1695 | "move.l %[psiz], %%d0 \n" | ||
1696 | "move.l %[dpth], %%d1 \n" | ||
1697 | "mulu.w %%d1, %%d0 \n" /* point behind the last plane */ | ||
1698 | "add.l %%d0, %[addr] \n" /* for this round */ | ||
1699 | "jmp (%%pc, %[dpth]:l:2) \n" /* jump into streak */ | ||
1700 | "bra.s .ur_f1 \n" /* dpth == 0 should never happen */ | ||
1701 | "bra.s .ur_f2 \n" | 1694 | "bra.s .ur_f2 \n" |
1702 | "bra.s .ur_f3 \n" | 1695 | "bra.s .ur_f3 \n" |
1703 | "bra.s .ur_f4 \n" | 1696 | "bra.s .ur_f4 \n" |
1704 | "bra.s .ur_f5 \n" | 1697 | "bra.s .ur_f5 \n" |
1705 | "bra.s .ur_f6 \n" | 1698 | "bra.s .ur_f6 \n" |
1706 | "bra.s .ur_f7 \n" | 1699 | "bra.s .ur_f7 \n" |
1700 | /* bra.s .ur_f8 */ /* identical with target */ | ||
1707 | 1701 | ||
1702 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ | ||
1708 | ".ur_f8: \n" | 1703 | ".ur_f8: \n" |
1709 | "move.l %[psiz], %%d0 \n" | ||
1710 | "lsl.l #3, %%d0 \n" | ||
1711 | "add.l %%d0, %[addr] \n" | ||
1712 | /* Point behind the last plane for this round. Note: We're using the | ||
1713 | * registers backwards in order to reuse the streak for the last round. | ||
1714 | * Therefore we need to go thru the bitplanes backwards too, otherwise | ||
1715 | * the bit order would be destroyed which results in more flicker. */ | ||
1716 | "sub.l %[psiz], %[addr] \n" | ||
1717 | "move.b (%[addr]), %%d0 \n" /* load old byte */ | 1704 | "move.b (%[addr]), %%d0 \n" /* load old byte */ |
1718 | "and.l %%d5, %%d0 \n" /* mask out replaced bits */ | 1705 | "and.l %%d5, %%d0 \n" /* mask out replaced bits */ |
1719 | "move.l %[mask], %%d1 \n" | 1706 | "move.l %%a0, %%d1 \n" |
1720 | "or.l %%d1, %%d0 \n" /* set new bits */ | 1707 | "or.l %%d1, %%d0 \n" /* set new bits */ |
1721 | "move.b %%d0, (%[addr]) \n" /* store byte */ | 1708 | "move.b %%d0, (%[addr]) \n" /* store byte */ |
1709 | "add.l %[psiz], %[addr] \n" | ||
1722 | "lsr.l #8, %%d1 \n" /* shift out used-up byte */ | 1710 | "lsr.l #8, %%d1 \n" /* shift out used-up byte */ |
1723 | "move.l %%d1, %[mask] \n" | 1711 | "move.l %%d1, %%a0 \n" |
1724 | ".ur_f7: \n" | 1712 | ".ur_f7: \n" |
1725 | "sub.l %[psiz], %[addr] \n" | ||
1726 | "move.b (%[addr]), %%d0 \n" | 1713 | "move.b (%[addr]), %%d0 \n" |
1727 | "and.l %%d5, %%d0 \n" | 1714 | "and.l %%d5, %%d0 \n" |
1728 | "or.l %%d2, %%d0 \n" | 1715 | "or.l %%d7, %%d0 \n" |
1729 | "move.b %%d0, (%[addr]) \n" | 1716 | "move.b %%d0, (%[addr]) \n" |
1730 | "lsr.l #8, %%d2 \n" | 1717 | "add.l %[psiz], %[addr] \n" |
1718 | "lsr.l #8, %%d7 \n" | ||
1731 | ".ur_f6: \n" | 1719 | ".ur_f6: \n" |
1732 | "sub.l %[psiz], %[addr] \n" | ||
1733 | "move.b (%[addr]), %%d0 \n" | 1720 | "move.b (%[addr]), %%d0 \n" |
1734 | "and.l %%d5, %%d0 \n" | 1721 | "and.l %%d5, %%d0 \n" |
1735 | "or.l %%d3, %%d0 \n" | 1722 | "or.l %%d6, %%d0 \n" |
1736 | "move.b %%d0, (%[addr]) \n" | 1723 | "move.b %%d0, (%[addr]) \n" |
1737 | "lsr.l #8, %%d3 \n" | 1724 | "add.l %[psiz], %[addr] \n" |
1725 | "lsr.l #8, %%d6 \n" | ||
1738 | ".ur_f5: \n" | 1726 | ".ur_f5: \n" |
1739 | "sub.l %[psiz], %[addr] \n" | ||
1740 | "move.b (%[addr]), %%d0 \n" | ||
1741 | "and.l %%d5, %%d0 \n" | ||
1742 | "or.l %%d4, %%d0 \n" | ||
1743 | "move.b %%d0, (%[addr]) \n" | ||
1744 | "lsr.l #8, %%d4 \n" | ||
1745 | ".ur_f4: \n" | ||
1746 | "sub.l %[psiz], %[addr] \n" | ||
1747 | "move.b (%[addr]), %%d0 \n" | 1727 | "move.b (%[addr]), %%d0 \n" |
1748 | "and.l %%d5, %%d0 \n" | 1728 | "and.l %%d5, %%d0 \n" |
1749 | "move.l %[ax], %%d1 \n" | 1729 | "move.l %[ax], %%d1 \n" |
1750 | "or.l %%d1, %%d0 \n" | 1730 | "or.l %%d1, %%d0 \n" |
1751 | "move.b %%d0, (%[addr]) \n" | 1731 | "move.b %%d0, (%[addr]) \n" |
1732 | "add.l %[psiz], %[addr] \n" | ||
1752 | "lsr.l #8, %%d1 \n" | 1733 | "lsr.l #8, %%d1 \n" |
1753 | "move.l %%d1, %[ax] \n" | 1734 | "move.l %%d1, %[ax] \n" |
1735 | ".ur_f4: \n" | ||
1736 | "move.b (%[addr]), %%d0 \n" | ||
1737 | "and.l %%d5, %%d0 \n" | ||
1738 | "or.l %%d4, %%d0 \n" | ||
1739 | "move.b %%d0, (%[addr]) \n" | ||
1740 | "add.l %[psiz], %[addr] \n" | ||
1741 | "lsr.l #8, %%d4 \n" | ||
1754 | ".ur_f3: \n" | 1742 | ".ur_f3: \n" |
1755 | "sub.l %[psiz], %[addr] \n" | ||
1756 | "move.b (%[addr]), %%d0 \n" | 1743 | "move.b (%[addr]), %%d0 \n" |
1757 | "and.l %%d5, %%d0 \n" | 1744 | "and.l %%d5, %%d0 \n" |
1758 | "or.l %%d6, %%d0 \n" | 1745 | "or.l %%d3, %%d0 \n" |
1759 | "move.b %%d0, (%[addr]) \n" | 1746 | "move.b %%d0, (%[addr]) \n" |
1760 | "lsr.l #8, %%d6 \n" | 1747 | "add.l %[psiz], %[addr] \n" |
1748 | "lsr.l #8, %%d3 \n" | ||
1761 | ".ur_f2: \n" | 1749 | ".ur_f2: \n" |
1762 | "sub.l %[psiz], %[addr] \n" | ||
1763 | "move.b (%[addr]), %%d0 \n" | 1750 | "move.b (%[addr]), %%d0 \n" |
1764 | "and.l %%d5, %%d0 \n" | 1751 | "and.l %%d5, %%d0 \n" |
1765 | "or.l %%d7, %%d0 \n" | 1752 | "or.l %%d2, %%d0 \n" |
1766 | "move.b %%d0, (%[addr]) \n" | 1753 | "move.b %%d0, (%[addr]) \n" |
1767 | "lsr.l #8, %%d7 \n" | 1754 | "add.l %[psiz], %[addr] \n" |
1755 | "lsr.l #8, %%d2 \n" | ||
1768 | ".ur_f1: \n" | 1756 | ".ur_f1: \n" |
1769 | "sub.l %[psiz], %[addr] \n" | ||
1770 | "move.b (%[addr]), %%d0 \n" | 1757 | "move.b (%[addr]), %%d0 \n" |
1771 | "and.l %%d5, %%d0 \n" | 1758 | "and.l %%d5, %%d0 \n" |
1772 | "move.l %%a0, %%d1 \n" | 1759 | "move.l %[mask], %%d1 \n" |
1773 | "or.l %%d1, %%d0 \n" | 1760 | "or.l %%d1, %%d0 \n" |
1774 | "move.b %%d0, (%[addr]) \n" | 1761 | "move.b %%d0, (%[addr]) \n" |
1762 | "add.l %[psiz], %[addr] \n" | ||
1775 | "lsr.l #8, %%d1 \n" | 1763 | "lsr.l #8, %%d1 \n" |
1776 | "move.l %%d1, %%a0 \n" | 1764 | "move.l %%d1, %[mask] \n" |
1777 | 1765 | ||
1778 | "move.l %[psiz], %%d0 \n" | ||
1779 | "lsl.l #3, %%d0 \n" | ||
1780 | "add.l %%d0, %[addr] \n" /* correct address */ | ||
1781 | "subq.l #8, %[dpth] \n" | 1766 | "subq.l #8, %[dpth] \n" |
1782 | "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ | 1767 | "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ |
1783 | "jgt .ur_floop \n" /* next round if anything left */ | 1768 | "jgt .ur_floop \n" /* next round if anything left */ |
1784 | 1769 | ||
1785 | "jra .ur_end \n" | 1770 | "jra .ur_end \n" |
1786 | 1771 | ||
1787 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | 1772 | ".ur_sstart: \n" |
1788 | "cmp.l #8, %[dpth] \n" /* 8 planes or more left? */ | 1773 | "jmp (2, %%pc, %%d0:l:2) \n" /* jump into short loop */ |
1789 | "bhs.s .ur_s8 \n" | 1774 | "bra.s .ur_s1 \n" |
1790 | |||
1791 | "move.l %[psiz], %%d0 \n" | ||
1792 | "move.l %[dpth], %%d5 \n" | ||
1793 | "mulu.w %%d5, %%d0 \n" /* point behind the last plane */ | ||
1794 | "add.l %%d0, %[addr] \n" /* for this round */ | ||
1795 | "jmp (%%pc, %[dpth]:l:2) \n" /* jump into streak */ | ||
1796 | "bra.s .ur_s1 \n" /* dpth == 0 should never happen */ | ||
1797 | "bra.s .ur_s2 \n" | 1775 | "bra.s .ur_s2 \n" |
1798 | "bra.s .ur_s3 \n" | 1776 | "bra.s .ur_s3 \n" |
1799 | "bra.s .ur_s4 \n" | 1777 | "bra.s .ur_s4 \n" |
1800 | "bra.s .ur_s5 \n" | 1778 | "bra.s .ur_s5 \n" |
1801 | "bra.s .ur_s6 \n" | 1779 | "bra.s .ur_s6 \n" |
1802 | "bra.s .ur_s7 \n" | 1780 | "bra.s .ur_s7 \n" |
1781 | /* bra.s .ur_s8 */ /* identical with target */ | ||
1803 | 1782 | ||
1783 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | ||
1804 | ".ur_s8: \n" | 1784 | ".ur_s8: \n" |
1805 | "move.l %[psiz], %%d0 \n" /* Point behind the last plane */ | 1785 | "move.l %%a0, %%d5 \n" |
1806 | "lsl.l #3, %%d0 \n" /* for this round. */ | 1786 | "move.b %%d5, (%[addr]) \n" /* store byte */ |
1807 | "add.l %%d0, %[addr] \n" /* See above. */ | 1787 | "add.l %[psiz], %[addr] \n" |
1808 | 1788 | "lsr.l #8, %%d5 \n" /* shift out used-up byte */ | |
1809 | "sub.l %[psiz], %[addr] \n" | 1789 | "move.l %%d5, %%a0 \n" |
1810 | "move.b %%d1, (%[addr]) \n" /* store byte */ | ||
1811 | "lsr.l #8, %%d1 \n" /* shift out used-up byte */ | ||
1812 | ".ur_s7: \n" | 1790 | ".ur_s7: \n" |
1813 | "sub.l %[psiz], %[addr] \n" | 1791 | "move.b %%d7, (%[addr]) \n" |
1814 | "move.b %%d2, (%[addr]) \n" | 1792 | "add.l %[psiz], %[addr] \n" |
1815 | "lsr.l #8, %%d2 \n" | 1793 | "lsr.l #8, %%d7 \n" |
1816 | ".ur_s6: \n" | 1794 | ".ur_s6: \n" |
1817 | "sub.l %[psiz], %[addr] \n" | 1795 | "move.b %%d6, (%[addr]) \n" |
1818 | "move.b %%d3, (%[addr]) \n" | 1796 | "add.l %[psiz], %[addr] \n" |
1819 | "lsr.l #8, %%d3 \n" | 1797 | "lsr.l #8, %%d6 \n" |
1820 | ".ur_s5: \n" | 1798 | ".ur_s5: \n" |
1821 | "sub.l %[psiz], %[addr] \n" | ||
1822 | "move.b %%d4, (%[addr]) \n" | ||
1823 | "lsr.l #8, %%d4 \n" | ||
1824 | ".ur_s4: \n" | ||
1825 | "sub.l %[psiz], %[addr] \n" | ||
1826 | "move.l %[ax], %%d5 \n" | 1799 | "move.l %[ax], %%d5 \n" |
1827 | "move.b %%d5, (%[addr]) \n" | 1800 | "move.b %%d5, (%[addr]) \n" |
1801 | "add.l %[psiz], %[addr] \n" | ||
1828 | "lsr.l #8, %%d5 \n" | 1802 | "lsr.l #8, %%d5 \n" |
1829 | "move.l %%d5, %[ax] \n" | 1803 | "move.l %%d5, %[ax] \n" |
1804 | ".ur_s4: \n" | ||
1805 | "move.b %%d4, (%[addr]) \n" | ||
1806 | "add.l %[psiz], %[addr] \n" | ||
1807 | "lsr.l #8, %%d4 \n" | ||
1830 | ".ur_s3: \n" | 1808 | ".ur_s3: \n" |
1831 | "sub.l %[psiz], %[addr] \n" | 1809 | "move.b %%d3, (%[addr]) \n" |
1832 | "move.b %%d6, (%[addr]) \n" | 1810 | "add.l %[psiz], %[addr] \n" |
1833 | "lsr.l #8, %%d6 \n" | 1811 | "lsr.l #8, %%d3 \n" |
1834 | ".ur_s2: \n" | 1812 | ".ur_s2: \n" |
1835 | "sub.l %[psiz], %[addr] \n" | 1813 | "move.b %%d2, (%[addr]) \n" |
1836 | "move.b %%d7, (%[addr]) \n" | 1814 | "add.l %[psiz], %[addr] \n" |
1837 | "lsr.l #8, %%d7 \n" | 1815 | "lsr.l #8, %%d2 \n" |
1838 | ".ur_s1: \n" | 1816 | ".ur_s1: \n" |
1839 | "sub.l %[psiz], %[addr] \n" | 1817 | "move.b %%d1, (%[addr]) \n" |
1840 | "move.l %%a0, %%d5 \n" | 1818 | "add.l %[psiz], %[addr] \n" |
1841 | "move.b %%d5, (%[addr]) \n" | 1819 | "lsr.l #8, %%d1 \n" |
1842 | "lsr.l #8, %%d5 \n" | ||
1843 | "move.l %%d5, %%a0 \n" | ||
1844 | 1820 | ||
1845 | "add.l %%d0, %[addr] \n" /* correct address */ | ||
1846 | "subq.l #8, %[dpth] \n" | 1821 | "subq.l #8, %[dpth] \n" |
1847 | "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ | 1822 | "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ |
1848 | "jgt .ur_sloop \n" /* next round if anything left */ | 1823 | "jgt .ur_sloop \n" /* next round if anything left */ |
@@ -1871,7 +1846,7 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1871 | { | 1846 | { |
1872 | unsigned char *addr, *end; | 1847 | unsigned char *addr, *end; |
1873 | unsigned mask = 0; | 1848 | unsigned mask = 0; |
1874 | unsigned test = 1; | 1849 | unsigned test = 1 << ((-_gray_info.depth) & 7); |
1875 | int i; | 1850 | int i; |
1876 | 1851 | ||
1877 | /* precalculate the bit patterns with random shifts | 1852 | /* precalculate the bit patterns with random shifts |
diff --git a/apps/plugins/lib/gray_draw.c b/apps/plugins/lib/gray_draw.c index dcc65bdd09..bd8ea4f1ce 100644 --- a/apps/plugins/lib/gray_draw.c +++ b/apps/plugins/lib/gray_draw.c | |||
@@ -1002,103 +1002,94 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1002 | "and r0, r0, %[rx] \n" | 1002 | "and r0, r0, %[rx] \n" |
1003 | "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ | 1003 | "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ |
1004 | "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ | 1004 | "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ |
1005 | |||
1006 | "sub r0, %[dpth], #1 \n" /** shift out unused low bytes **/ | ||
1007 | "and r0, r0, #7 \n" | ||
1008 | "add pc, pc, r0, lsl #2 \n" /* jump into shift streak */ | ||
1009 | "mov r8, r8, lsr #8 \n" /* r8: never reached */ | ||
1010 | "mov r7, r7, lsr #8 \n" | ||
1011 | "mov r6, r6, lsr #8 \n" | ||
1012 | "mov r5, r5, lsr #8 \n" | ||
1013 | "mov r4, r4, lsr #8 \n" | ||
1014 | "mov r3, r3, lsr #8 \n" | ||
1015 | "mov r2, r2, lsr #8 \n" | ||
1016 | "mov r1, r1, lsr #8 \n" | ||
1005 | 1017 | ||
1006 | "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ | 1018 | "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ |
1007 | "ands %[mask], %[mask], #0xff \n" | 1019 | "ands %[mask], %[mask], #0xff \n" |
1008 | "beq .wa_sloop \n" /* short loop if no bits to keep */ | 1020 | "beq .wa_sstart \n" /* short loop if no bits to keep */ |
1009 | |||
1010 | ".wa_floop: \n" /** full loop (bits to keep)**/ | ||
1011 | "cmp %[dpth], #8 \n" /* 8 planes or more left? */ | ||
1012 | "bhs .wa_f8 \n" | ||
1013 | 1021 | ||
1014 | "mul r0, %[psiz], %[dpth] \n" /* point behind the last plane */ | 1022 | "ldrb r0, [pc, r0] \n" /* jump into full loop */ |
1015 | "add %[addr], %[addr], r0 \n" /* for this round */ | ||
1016 | |||
1017 | |||
1018 | "ldrb r0, [pc, %[dpth]] \n" /* jump into streak */ | ||
1019 | "add pc, pc, r0 \n" | 1023 | "add pc, pc, r0 \n" |
1020 | ".wa_ftable: \n" | 1024 | ".wa_ftable: \n" |
1021 | ".byte .wa_f0 - .wa_ftable - 4 \n" /* [jump tables are tricky] */ | 1025 | ".byte .wa_f1 - .wa_ftable - 4 \n" /* [jump tables are tricky] */ |
1022 | ".byte .wa_f1 - .wa_ftable - 4 \n" | ||
1023 | ".byte .wa_f2 - .wa_ftable - 4 \n" | 1026 | ".byte .wa_f2 - .wa_ftable - 4 \n" |
1024 | ".byte .wa_f3 - .wa_ftable - 4 \n" | 1027 | ".byte .wa_f3 - .wa_ftable - 4 \n" |
1025 | ".byte .wa_f4 - .wa_ftable - 4 \n" | 1028 | ".byte .wa_f4 - .wa_ftable - 4 \n" |
1026 | ".byte .wa_f5 - .wa_ftable - 4 \n" | 1029 | ".byte .wa_f5 - .wa_ftable - 4 \n" |
1027 | ".byte .wa_f6 - .wa_ftable - 4 \n" | 1030 | ".byte .wa_f6 - .wa_ftable - 4 \n" |
1028 | ".byte .wa_f7 - .wa_ftable - 4 \n" | 1031 | ".byte .wa_f7 - .wa_ftable - 4 \n" |
1032 | ".byte .wa_f8 - .wa_ftable - 4 \n" | ||
1029 | 1033 | ||
1034 | ".wa_floop: \n" /** full loop (bits to keep)**/ | ||
1030 | ".wa_f8: \n" | 1035 | ".wa_f8: \n" |
1031 | "add %[addr], %[addr], %[psiz], lsl #3 \n" | 1036 | "ldrb r0, [%[addr]] \n" /* load old byte */ |
1032 | /* Point behind the last plane for this round. Note: We're using the | ||
1033 | * registers backwards in order to reuse the streak for the last round. | ||
1034 | * Therefore we need to go thru the bitplanes backwards too, otherwise | ||
1035 | * the bit order would be destroyed which results in more flicker. */ | ||
1036 | "ldrb r0, [%[addr], -%[psiz]]! \n" /* load old byte */ | ||
1037 | "and r0, r0, %[mask] \n" /* mask out replaced bits */ | 1037 | "and r0, r0, %[mask] \n" /* mask out replaced bits */ |
1038 | "orr r0, r0, r8 \n" /* set new bits */ | 1038 | "orr r0, r0, r1 \n" /* set new bits */ |
1039 | "strb r0, [%[addr]] \n" /* store byte */ | 1039 | "strb r0, [%[addr]], %[psiz] \n" /* store byte */ |
1040 | "mov r8, r8, lsr #8 \n" /* shift out used-up byte */ | 1040 | "mov r1, r1, lsr #8 \n" /* shift out used-up byte */ |
1041 | ".wa_f7: \n" | 1041 | ".wa_f7: \n" |
1042 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 1042 | "ldrb r0, [%[addr]] \n" |
1043 | "and r0, r0, %[mask] \n" | 1043 | "and r0, r0, %[mask] \n" |
1044 | "orr r0, r0, r7 \n" | 1044 | "orr r0, r0, r2 \n" |
1045 | "strb r0, [%[addr]] \n" | 1045 | "strb r0, [%[addr]], %[psiz] \n" |
1046 | "mov r7, r7, lsr #8 \n" | 1046 | "mov r2, r2, lsr #8 \n" |
1047 | ".wa_f6: \n" | 1047 | ".wa_f6: \n" |
1048 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 1048 | "ldrb r0, [%[addr]] \n" |
1049 | "and r0, r0, %[mask] \n" | 1049 | "and r0, r0, %[mask] \n" |
1050 | "orr r0, r0, r6 \n" | 1050 | "orr r0, r0, r3 \n" |
1051 | "strb r0, [%[addr]] \n" | 1051 | "strb r0, [%[addr]], %[psiz] \n" |
1052 | "mov r6, r6, lsr #8 \n" | 1052 | "mov r3, r3, lsr #8 \n" |
1053 | ".wa_f5: \n" | 1053 | ".wa_f5: \n" |
1054 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 1054 | "ldrb r0, [%[addr]] \n" |
1055 | "and r0, r0, %[mask] \n" | ||
1056 | "orr r0, r0, r5 \n" | ||
1057 | "strb r0, [%[addr]] \n" | ||
1058 | "mov r5, r5, lsr #8 \n" | ||
1059 | ".wa_f4: \n" | ||
1060 | "ldrb r0, [%[addr], -%[psiz]]! \n" | ||
1061 | "and r0, r0, %[mask] \n" | 1055 | "and r0, r0, %[mask] \n" |
1062 | "orr r0, r0, r4 \n" | 1056 | "orr r0, r0, r4 \n" |
1063 | "strb r0, [%[addr]] \n" | 1057 | "strb r0, [%[addr]], %[psiz] \n" |
1064 | "mov r4, r4, lsr #8 \n" | 1058 | "mov r4, r4, lsr #8 \n" |
1059 | ".wa_f4: \n" | ||
1060 | "ldrb r0, [%[addr]] \n" | ||
1061 | "and r0, r0, %[mask] \n" | ||
1062 | "orr r0, r0, r5 \n" | ||
1063 | "strb r0, [%[addr]], %[psiz] \n" | ||
1064 | "mov r5, r5, lsr #8 \n" | ||
1065 | ".wa_f3: \n" | 1065 | ".wa_f3: \n" |
1066 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 1066 | "ldrb r0, [%[addr]] \n" |
1067 | "and r0, r0, %[mask] \n" | 1067 | "and r0, r0, %[mask] \n" |
1068 | "orr r0, r0, r3 \n" | 1068 | "orr r0, r0, r6 \n" |
1069 | "strb r0, [%[addr]] \n" | 1069 | "strb r0, [%[addr]], %[psiz] \n" |
1070 | "mov r3, r3, lsr #8 \n" | 1070 | "mov r6, r6, lsr #8 \n" |
1071 | ".wa_f2: \n" | 1071 | ".wa_f2: \n" |
1072 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 1072 | "ldrb r0, [%[addr]] \n" |
1073 | "and r0, r0, %[mask] \n" | 1073 | "and r0, r0, %[mask] \n" |
1074 | "orr r0, r0, r2 \n" | 1074 | "orr r0, r0, r7 \n" |
1075 | "strb r0, [%[addr]] \n" | 1075 | "strb r0, [%[addr]], %[psiz] \n" |
1076 | "mov r2, r2, lsr #8 \n" | 1076 | "mov r7, r7, lsr #8 \n" |
1077 | ".wa_f1: \n" | 1077 | ".wa_f1: \n" |
1078 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 1078 | "ldrb r0, [%[addr]] \n" |
1079 | "and r0, r0, %[mask] \n" | 1079 | "and r0, r0, %[mask] \n" |
1080 | "orr r0, r0, r1 \n" | 1080 | "orr r0, r0, r8 \n" |
1081 | "strb r0, [%[addr]] \n" | 1081 | "strb r0, [%[addr]], %[psiz] \n" |
1082 | "mov r1, r1, lsr #8 \n" | 1082 | "mov r8, r8, lsr #8 \n" |
1083 | ".wa_f0: \n" | ||
1084 | 1083 | ||
1085 | "add %[addr], %[addr], %[psiz], lsl #3 \n" /* correct address */ | ||
1086 | "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ | 1084 | "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ |
1087 | "bhi .wa_floop \n" | 1085 | "bhi .wa_floop \n" |
1088 | 1086 | ||
1089 | "b .wa_end \n" | 1087 | "b .wa_end \n" |
1090 | 1088 | ||
1091 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ | 1089 | ".wa_sstart: \n" |
1092 | "cmp %[dpth], #8 \n" /* 8 planes or more left? */ | 1090 | "ldrb r0, [pc, r0] \n" /* jump into short loop*/ |
1093 | "bhs .wa_s8 \n" | ||
1094 | |||
1095 | "mul r0, %[psiz], %[dpth] \n" /* point behind the last plane */ | ||
1096 | "add %[addr], %[addr], r0 \n" /* for this round */ | ||
1097 | |||
1098 | "ldrb r0, [pc, %[dpth]] \n" /* jump into streak */ | ||
1099 | "add pc, pc, r0 \n" | 1091 | "add pc, pc, r0 \n" |
1100 | ".wa_stable: \n" | 1092 | ".wa_stable: \n" |
1101 | ".byte .wa_s0 - .wa_stable - 4 \n" | ||
1102 | ".byte .wa_s1 - .wa_stable - 4 \n" | 1093 | ".byte .wa_s1 - .wa_stable - 4 \n" |
1103 | ".byte .wa_s2 - .wa_stable - 4 \n" | 1094 | ".byte .wa_s2 - .wa_stable - 4 \n" |
1104 | ".byte .wa_s3 - .wa_stable - 4 \n" | 1095 | ".byte .wa_s3 - .wa_stable - 4 \n" |
@@ -1106,36 +1097,34 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1106 | ".byte .wa_s5 - .wa_stable - 4 \n" | 1097 | ".byte .wa_s5 - .wa_stable - 4 \n" |
1107 | ".byte .wa_s6 - .wa_stable - 4 \n" | 1098 | ".byte .wa_s6 - .wa_stable - 4 \n" |
1108 | ".byte .wa_s7 - .wa_stable - 4 \n" | 1099 | ".byte .wa_s7 - .wa_stable - 4 \n" |
1100 | ".byte .wa_s8 - .wa_stable - 4 \n" | ||
1109 | 1101 | ||
1102 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ | ||
1110 | ".wa_s8: \n" | 1103 | ".wa_s8: \n" |
1111 | "add %[addr], %[addr], %[psiz], lsl #3 \n" | 1104 | "strb r1, [%[addr]], %[psiz] \n" /* store byte */ |
1112 | /* Point behind the last plane for this round. See above. */ | 1105 | "mov r1, r1, lsr #8 \n" /* shift out used-up byte */ |
1113 | "strb r8, [%[addr], -%[psiz]]! \n" /* store byte */ | ||
1114 | "mov r8, r8, lsr #8 \n" /* shift out used-up byte */ | ||
1115 | ".wa_s7: \n" | 1106 | ".wa_s7: \n" |
1116 | "strb r7, [%[addr], -%[psiz]]! \n" | 1107 | "strb r2, [%[addr]], %[psiz] \n" |
1117 | "mov r7, r7, lsr #8 \n" | 1108 | "mov r2, r2, lsr #8 \n" |
1118 | ".wa_s6: \n" | 1109 | ".wa_s6: \n" |
1119 | "strb r6, [%[addr], -%[psiz]]! \n" | 1110 | "strb r3, [%[addr]], %[psiz] \n" |
1120 | "mov r6, r6, lsr #8 \n" | 1111 | "mov r3, r3, lsr #8 \n" |
1121 | ".wa_s5: \n" | 1112 | ".wa_s5: \n" |
1122 | "strb r5, [%[addr], -%[psiz]]! \n" | 1113 | "strb r4, [%[addr]], %[psiz] \n" |
1123 | "mov r5, r5, lsr #8 \n" | ||
1124 | ".wa_s4: \n" | ||
1125 | "strb r4, [%[addr], -%[psiz]]! \n" | ||
1126 | "mov r4, r4, lsr #8 \n" | 1114 | "mov r4, r4, lsr #8 \n" |
1115 | ".wa_s4: \n" | ||
1116 | "strb r5, [%[addr]], %[psiz] \n" | ||
1117 | "mov r5, r5, lsr #8 \n" | ||
1127 | ".wa_s3: \n" | 1118 | ".wa_s3: \n" |
1128 | "strb r3, [%[addr], -%[psiz]]! \n" | 1119 | "strb r6, [%[addr]], %[psiz] \n" |
1129 | "mov r3, r3, lsr #8 \n" | 1120 | "mov r6, r6, lsr #8 \n" |
1130 | ".wa_s2: \n" | 1121 | ".wa_s2: \n" |
1131 | "strb r2, [%[addr], -%[psiz]]! \n" | 1122 | "strb r7, [%[addr]], %[psiz] \n" |
1132 | "mov r2, r2, lsr #8 \n" | 1123 | "mov r7, r7, lsr #8 \n" |
1133 | ".wa_s1: \n" | 1124 | ".wa_s1: \n" |
1134 | "strb r1, [%[addr], -%[psiz]]! \n" | 1125 | "strb r8, [%[addr]], %[psiz] \n" |
1135 | "mov r1, r1, lsr #8 \n" | 1126 | "mov r8, r8, lsr #8 \n" |
1136 | ".wa_s0: \n" | ||
1137 | 1127 | ||
1138 | "add %[addr], %[addr], %[psiz], lsl #3 \n" /* correct address */ | ||
1139 | "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ | 1128 | "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ |
1140 | "bhi .wa_sloop \n" | 1129 | "bhi .wa_sloop \n" |
1141 | 1130 | ||
@@ -1187,7 +1176,7 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1187 | 1176 | ||
1188 | /* set the bits for all 8 pixels in all bytes according to the | 1177 | /* set the bits for all 8 pixels in all bytes according to the |
1189 | * precalculated patterns on the pattern stack */ | 1178 | * precalculated patterns on the pattern stack */ |
1190 | test = 1; | 1179 | test = 1 << ((-_gray_info.depth) & 7); |
1191 | mask = (~mask & 0xff); | 1180 | mask = (~mask & 0xff); |
1192 | if (mask == 0) | 1181 | if (mask == 0) |
1193 | { | 1182 | { |
@@ -1483,28 +1472,40 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1483 | "xor r0, r7 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ | 1472 | "xor r0, r7 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ |
1484 | "shlr r0 \n" | 1473 | "shlr r0 \n" |
1485 | "xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ | 1474 | "xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ |
1475 | |||
1476 | "mov %[dpth], %[rx] \n" /** shift out unused low bytes **/ | ||
1477 | "add #-1, %[rx] \n" | ||
1478 | "mov #7, r0 \n" | ||
1479 | "and r0, %[rx] \n" | ||
1480 | "mova .wa_pshift, r0 \n" | ||
1481 | "add %[rx], r0 \n" | ||
1482 | "add %[rx], r0 \n" | ||
1483 | "jmp @r0 \n" /* jump into shift streak */ | ||
1484 | "nop \n" | ||
1485 | |||
1486 | ".align 2 \n" | ||
1487 | ".wa_pshift: \n" | ||
1488 | "shlr8 r7 \n" | ||
1489 | "shlr8 r6 \n" | ||
1490 | "shlr8 r5 \n" | ||
1491 | "shlr8 r4 \n" | ||
1492 | "shlr8 r3 \n" | ||
1493 | "shlr8 r2 \n" | ||
1494 | "shlr8 r1 \n" | ||
1486 | 1495 | ||
1487 | "not %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ | 1496 | "not %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ |
1488 | "extu.b %[mask], %[mask] \n" /* mask out high bits */ | 1497 | "extu.b %[mask], %[mask] \n" /* mask out high bits */ |
1489 | "tst %[mask], %[mask] \n" | 1498 | "tst %[mask], %[mask] \n" |
1490 | "bt .wa_sloop \n" /* short loop if nothing to keep */ | 1499 | "bt .wa_sstart \n" /* short loop if nothing to keep */ |
1491 | |||
1492 | ".wa_floop: \n" /** full loop (there are bits to keep)**/ | ||
1493 | "mov #8, r0 \n" | ||
1494 | "cmp/hs r0, %[dpth] \n" /* 8 planes or more left? */ | ||
1495 | "bt .wa_f8 \n" | ||
1496 | 1500 | ||
1497 | "mulu %[psiz], %[dpth] \n" | 1501 | "mova .wa_ftable, r0 \n" /* jump into full loop */ |
1498 | "mova .wa_ftable, r0 \n" | 1502 | "mov.b @(r0, %[rx]), %[rx] \n" |
1499 | "mov.b @(r0, %[dpth]), %[rx] \n" | ||
1500 | "add %[rx], r0 \n" | 1503 | "add %[rx], r0 \n" |
1501 | "sts macl, %[rx] \n" /* point behind the last plane.. */ | 1504 | "jmp @r0 \n" |
1502 | "jmp @r0 \n" /* jump into streak */ | 1505 | "nop \n" |
1503 | "add %[rx], %[addr] \n" /* ..for this round */ | 1506 | |
1504 | |||
1505 | ".align 2 \n" | 1507 | ".align 2 \n" |
1506 | ".wa_ftable: \n" | 1508 | ".wa_ftable: \n" |
1507 | ".byte .wa_f0 - .wa_ftable \n" | ||
1508 | ".byte .wa_f1 - .wa_ftable \n" | 1509 | ".byte .wa_f1 - .wa_ftable \n" |
1509 | ".byte .wa_f2 - .wa_ftable \n" | 1510 | ".byte .wa_f2 - .wa_ftable \n" |
1510 | ".byte .wa_f3 - .wa_ftable \n" | 1511 | ".byte .wa_f3 - .wa_ftable \n" |
@@ -1512,74 +1513,66 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1512 | ".byte .wa_f5 - .wa_ftable \n" | 1513 | ".byte .wa_f5 - .wa_ftable \n" |
1513 | ".byte .wa_f6 - .wa_ftable \n" | 1514 | ".byte .wa_f6 - .wa_ftable \n" |
1514 | ".byte .wa_f7 - .wa_ftable \n" | 1515 | ".byte .wa_f7 - .wa_ftable \n" |
1516 | ".byte .wa_f8 - .wa_ftable \n" | ||
1515 | 1517 | ||
1518 | ".wa_floop: \n" /** full loop (there are bits to keep)**/ | ||
1516 | ".wa_f8: \n" | 1519 | ".wa_f8: \n" |
1517 | "mov %[psiz], %[rx] \n" | ||
1518 | "shll2 %[rx] \n" | ||
1519 | "add %[rx], %[rx] \n" | ||
1520 | "add %[rx], %[addr] \n" | ||
1521 | /* Point behind the last plane for this round. Note: We're using the | ||
1522 | * registers backwards in order to reuse the streak for the last round. | ||
1523 | * Therefore we need to go thru the bitplanes backwards too, otherwise | ||
1524 | * the bit order would be destroyed which results in more flicker. */ | ||
1525 | "sub %[psiz], %[addr] \n" | ||
1526 | "mov.b @%[addr], r0 \n" /* load old byte */ | 1520 | "mov.b @%[addr], r0 \n" /* load old byte */ |
1527 | "and %[mask], r0 \n" /* mask out replaced bits */ | 1521 | "and %[mask], r0 \n" /* mask out replaced bits */ |
1528 | "or r8, r0 \n" /* set new bits */ | 1522 | "or r1, r0 \n" /* set new bits */ |
1529 | "mov.b r0, @%[addr] \n" /* store byte */ | 1523 | "mov.b r0, @%[addr] \n" /* store byte */ |
1530 | "shlr8 r8 \n" /* shift out used-up byte */ | 1524 | "add %[psiz], %[addr] \n" |
1525 | "shlr8 r1 \n" /* shift out used-up byte */ | ||
1531 | ".wa_f7: \n" | 1526 | ".wa_f7: \n" |
1532 | "sub %[psiz], %[addr] \n" | ||
1533 | "mov.b @%[addr], r0 \n" | 1527 | "mov.b @%[addr], r0 \n" |
1534 | "and %[mask], r0 \n" | 1528 | "and %[mask], r0 \n" |
1535 | "or r7, r0 \n" | 1529 | "or r2, r0 \n" |
1536 | "mov.b r0, @%[addr] \n" | 1530 | "mov.b r0, @%[addr] \n" |
1537 | "shlr8 r7 \n" | 1531 | "add %[psiz], %[addr] \n" |
1532 | "shlr8 r2 \n" | ||
1538 | ".wa_f6: \n" | 1533 | ".wa_f6: \n" |
1539 | "sub %[psiz], %[addr] \n" | ||
1540 | "mov.b @%[addr], r0 \n" | 1534 | "mov.b @%[addr], r0 \n" |
1541 | "and %[mask], r0 \n" | 1535 | "and %[mask], r0 \n" |
1542 | "or r6, r0 \n" | 1536 | "or r3, r0 \n" |
1543 | "mov.b r0, @%[addr] \n" | 1537 | "mov.b r0, @%[addr] \n" |
1544 | "shlr8 r6 \n" | 1538 | "add %[psiz], %[addr] \n" |
1539 | "shlr8 r3 \n" | ||
1545 | ".wa_f5: \n" | 1540 | ".wa_f5: \n" |
1546 | "sub %[psiz], %[addr] \n" | ||
1547 | "mov.b @%[addr], r0 \n" | 1541 | "mov.b @%[addr], r0 \n" |
1548 | "and %[mask], r0 \n" | 1542 | "and %[mask], r0 \n" |
1549 | "or r5, r0 \n" | 1543 | "or r4, r0 \n" |
1550 | "mov.b r0, @%[addr] \n" | 1544 | "mov.b r0, @%[addr] \n" |
1551 | "shlr8 r5 \n" | 1545 | "add %[psiz], %[addr] \n" |
1546 | "shlr8 r4 \n" | ||
1552 | ".wa_f4: \n" | 1547 | ".wa_f4: \n" |
1553 | "sub %[psiz], %[addr] \n" | ||
1554 | "mov.b @%[addr], r0 \n" | 1548 | "mov.b @%[addr], r0 \n" |
1555 | "and %[mask], r0 \n" | 1549 | "and %[mask], r0 \n" |
1556 | "or r4, r0 \n" | 1550 | "or r5, r0 \n" |
1557 | "mov.b r0, @%[addr] \n" | 1551 | "mov.b r0, @%[addr] \n" |
1558 | "shlr8 r4 \n" | 1552 | "add %[psiz], %[addr] \n" |
1553 | "shlr8 r5 \n" | ||
1559 | ".wa_f3: \n" | 1554 | ".wa_f3: \n" |
1560 | "sub %[psiz], %[addr] \n" | ||
1561 | "mov.b @%[addr], r0 \n" | 1555 | "mov.b @%[addr], r0 \n" |
1562 | "and %[mask], r0 \n" | 1556 | "and %[mask], r0 \n" |
1563 | "or r3, r0 \n" | 1557 | "or r6, r0 \n" |
1564 | "mov.b r0, @%[addr] \n" | 1558 | "mov.b r0, @%[addr] \n" |
1565 | "shlr8 r3 \n" | 1559 | "add %[psiz], %[addr] \n" |
1560 | "shlr8 r6 \n" | ||
1566 | ".wa_f2: \n" | 1561 | ".wa_f2: \n" |
1567 | "sub %[psiz], %[addr] \n" | ||
1568 | "mov.b @%[addr], r0 \n" | 1562 | "mov.b @%[addr], r0 \n" |
1569 | "and %[mask], r0 \n" | 1563 | "and %[mask], r0 \n" |
1570 | "or r2, r0 \n" | 1564 | "or r7, r0 \n" |
1571 | "mov.b r0, @%[addr] \n" | 1565 | "mov.b r0, @%[addr] \n" |
1572 | "shlr8 r2 \n" | 1566 | "add %[psiz], %[addr] \n" |
1567 | "shlr8 r7 \n" | ||
1573 | ".wa_f1: \n" | 1568 | ".wa_f1: \n" |
1574 | "sub %[psiz], %[addr] \n" | ||
1575 | "mov.b @%[addr], r0 \n" | 1569 | "mov.b @%[addr], r0 \n" |
1576 | "and %[mask], r0 \n" | 1570 | "and %[mask], r0 \n" |
1577 | "or r1, r0 \n" | 1571 | "or r8, r0 \n" |
1578 | "mov.b r0, @%[addr] \n" | 1572 | "mov.b r0, @%[addr] \n" |
1579 | "shlr8 r1 \n" | 1573 | "add %[psiz], %[addr] \n" |
1580 | ".wa_f0: \n" | 1574 | "shlr8 r8 \n" |
1581 | 1575 | ||
1582 | "add %[rx], %[addr] \n" /* correct address */ | ||
1583 | "add #-8, %[dpth] \n" | 1576 | "add #-8, %[dpth] \n" |
1584 | "cmp/pl %[dpth] \n" /* next round if anything left */ | 1577 | "cmp/pl %[dpth] \n" /* next round if anything left */ |
1585 | "bt .wa_floop \n" | 1578 | "bt .wa_floop \n" |
@@ -1603,22 +1596,15 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1603 | ".wa_mask1: \n" | 1596 | ".wa_mask1: \n" |
1604 | ".long 0xAAAAAAAA \n" | 1597 | ".long 0xAAAAAAAA \n" |
1605 | 1598 | ||
1606 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ | 1599 | ".wa_sstart: \n" |
1607 | "mov #8, r0 \n" | 1600 | "mova .wa_stable, r0 \n" /* jump into short loop */ |
1608 | "cmp/hs r0, %[dpth] \n" /* 8 planes or more left? */ | 1601 | "mov.b @(r0, %[rx]), %[rx] \n" |
1609 | "bt .wa_s8 \n" | ||
1610 | |||
1611 | "mulu %[psiz], %[dpth] \n" | ||
1612 | "mova .wa_stable, r0 \n" | ||
1613 | "mov.b @(r0, %[dpth]), %[rx] \n" | ||
1614 | "add %[rx], r0 \n" | 1602 | "add %[rx], r0 \n" |
1615 | "sts macl, %[rx] \n" /* point behind the last plane.. */ | 1603 | "jmp @r0 \n" |
1616 | "jmp @r0 \n" /* jump into streak */ | 1604 | "nop \n" |
1617 | "add %[rx], %[addr] \n" /* ..for this round */ | ||
1618 | 1605 | ||
1619 | ".align 2 \n" | 1606 | ".align 2 \n" |
1620 | ".wa_stable: \n" | 1607 | ".wa_stable: \n" |
1621 | ".byte .wa_s0 - .wa_stable \n" | ||
1622 | ".byte .wa_s1 - .wa_stable \n" | 1608 | ".byte .wa_s1 - .wa_stable \n" |
1623 | ".byte .wa_s2 - .wa_stable \n" | 1609 | ".byte .wa_s2 - .wa_stable \n" |
1624 | ".byte .wa_s3 - .wa_stable \n" | 1610 | ".byte .wa_s3 - .wa_stable \n" |
@@ -1626,47 +1612,42 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1626 | ".byte .wa_s5 - .wa_stable \n" | 1612 | ".byte .wa_s5 - .wa_stable \n" |
1627 | ".byte .wa_s6 - .wa_stable \n" | 1613 | ".byte .wa_s6 - .wa_stable \n" |
1628 | ".byte .wa_s7 - .wa_stable \n" | 1614 | ".byte .wa_s7 - .wa_stable \n" |
1615 | ".byte .wa_s8 - .wa_stable \n" | ||
1629 | 1616 | ||
1617 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ | ||
1630 | ".wa_s8: \n" | 1618 | ".wa_s8: \n" |
1631 | "mov %[psiz], %[rx] \n" /* Point behind the last plane */ | 1619 | "mov.b r1, @%[addr] \n" /* store byte */ |
1632 | "shll2 %[rx] \n" /* for this round. */ | 1620 | "add %[psiz], %[addr] \n" |
1633 | "add %[rx], %[rx] \n" /* See above. */ | 1621 | "shlr8 r1 \n" /* shift out used-up byte */ |
1634 | "add %[rx], %[addr] \n" | ||
1635 | |||
1636 | "sub %[psiz], %[addr] \n" | ||
1637 | "mov.b r8, @%[addr] \n" /* store byte */ | ||
1638 | "shlr8 r8 \n" /* shift out used-up byte */ | ||
1639 | ".wa_s7: \n" | 1622 | ".wa_s7: \n" |
1640 | "sub %[psiz], %[addr] \n" | 1623 | "mov.b r2, @%[addr] \n" |
1641 | "mov.b r7, @%[addr] \n" | 1624 | "add %[psiz], %[addr] \n" |
1642 | "shlr8 r7 \n" | 1625 | "shlr8 r2 \n" |
1643 | ".wa_s6: \n" | 1626 | ".wa_s6: \n" |
1644 | "sub %[psiz], %[addr] \n" | 1627 | "mov.b r3, @%[addr] \n" |
1645 | "mov.b r6, @%[addr] \n" | 1628 | "add %[psiz], %[addr] \n" |
1646 | "shlr8 r6 \n" | 1629 | "shlr8 r3 \n" |
1647 | ".wa_s5: \n" | 1630 | ".wa_s5: \n" |
1648 | "sub %[psiz], %[addr] \n" | ||
1649 | "mov.b r5, @%[addr] \n" | ||
1650 | "shlr8 r5 \n" | ||
1651 | ".wa_s4: \n" | ||
1652 | "sub %[psiz], %[addr] \n" | ||
1653 | "mov.b r4, @%[addr] \n" | 1631 | "mov.b r4, @%[addr] \n" |
1632 | "add %[psiz], %[addr] \n" | ||
1654 | "shlr8 r4 \n" | 1633 | "shlr8 r4 \n" |
1634 | ".wa_s4: \n" | ||
1635 | "mov.b r5, @%[addr] \n" | ||
1636 | "add %[psiz], %[addr] \n" | ||
1637 | "shlr8 r5 \n" | ||
1655 | ".wa_s3: \n" | 1638 | ".wa_s3: \n" |
1656 | "sub %[psiz], %[addr] \n" | 1639 | "mov.b r6, @%[addr] \n" |
1657 | "mov.b r3, @%[addr] \n" | 1640 | "add %[psiz], %[addr] \n" |
1658 | "shlr8 r3 \n" | 1641 | "shlr8 r6 \n" |
1659 | ".wa_s2: \n" | 1642 | ".wa_s2: \n" |
1660 | "sub %[psiz], %[addr] \n" | 1643 | "mov.b r7, @%[addr] \n" |
1661 | "mov.b r2, @%[addr] \n" | 1644 | "add %[psiz], %[addr] \n" |
1662 | "shlr8 r2 \n" | 1645 | "shlr8 r7 \n" |
1663 | ".wa_s1: \n" | 1646 | ".wa_s1: \n" |
1664 | "sub %[psiz], %[addr] \n" | 1647 | "mov.b r8, @%[addr] \n" |
1665 | "mov.b r1, @%[addr] \n" | 1648 | "add %[psiz], %[addr] \n" |
1666 | "shlr8 r1 \n" | 1649 | "shlr8 r8 \n" |
1667 | ".wa_s0: \n" | ||
1668 | 1650 | ||
1669 | "add %[rx], %[addr] \n" /* correct address */ | ||
1670 | "add #-8, %[dpth] \n" | 1651 | "add #-8, %[dpth] \n" |
1671 | "cmp/pl %[dpth] \n" /* next round if anything left */ | 1652 | "cmp/pl %[dpth] \n" /* next round if anything left */ |
1672 | "bt .wa_sloop \n" | 1653 | "bt .wa_sloop \n" |
@@ -1853,172 +1834,163 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1853 | "move.l %%a0, %%d5 \n" | 1834 | "move.l %%a0, %%d5 \n" |
1854 | "eor.l %%d5, %%d0 \n" | 1835 | "eor.l %%d5, %%d0 \n" |
1855 | "and.l #0xAAAAAAAA, %%d0 \n" | 1836 | "and.l #0xAAAAAAAA, %%d0 \n" |
1856 | "eor.l %%d0, %%d5 \n" | 1837 | "eor.l %%d0, %%d5 \n" /* (a0 = ...h0g0f0e0d0c0b0a0) */ |
1857 | "move.l %%d5, %%a0 \n" /* a0 = ...h0g0f0e0d0c0b0a0 */ | 1838 | /* move.l %%d5, %%a0 */ /* but keep in d5 for shift streak */ |
1858 | "lsr.l #1, %%d0 \n" | 1839 | "lsr.l #1, %%d0 \n" |
1859 | "eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */ | 1840 | "eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */ |
1841 | |||
1842 | "move.l %[dpth], %%d0 \n" /** shift out unused low bytes **/ | ||
1843 | "subq.l #1, %%d0 \n" | ||
1844 | "and.l #7, %%d0 \n" | ||
1845 | "move.l %%d0, %%a0 \n" | ||
1846 | "move.l %[ax], %%d0 \n" /* all data in D registers */ | ||
1847 | "jmp (2, %%pc, %%a0:l:2) \n" /* jump into shift streak */ | ||
1848 | "lsr.l #8, %%d2 \n" | ||
1849 | "lsr.l #8, %%d3 \n" | ||
1850 | "lsr.l #8, %%d4 \n" | ||
1851 | "lsr.l #8, %%d0 \n" | ||
1852 | "lsr.l #8, %%d6 \n" | ||
1853 | "lsr.l #8, %%d7 \n" | ||
1854 | "lsr.l #8, %%d5 \n" | ||
1855 | "move.l %%d0, %[ax] \n" /* put the 2 extra words back.. */ | ||
1856 | "move.l %%a0, %%d0 \n" /* keep the value for later */ | ||
1857 | "move.l %%d5, %%a0 \n" /* ..into their A registers */ | ||
1860 | 1858 | ||
1861 | "tst.l %[mask] \n" | 1859 | "tst.l %[mask] \n" |
1862 | "jeq .wa_sloop \n" /* short loop if nothing to keep */ | 1860 | "jeq .wa_sstart \n" /* short loop if nothing to keep */ |
1863 | 1861 | ||
1864 | "move.l %[mask], %%d5 \n" /* need mask in data reg. */ | 1862 | "move.l %[mask], %%d5 \n" /* need mask in data reg. */ |
1865 | "move.l %%d1, %[mask] \n" /* free d1 as working reg. */ | 1863 | "move.l %%d1, %[mask] \n" /* free d1 as working reg. */ |
1866 | 1864 | ||
1867 | ".wa_floop: \n" /** full loop (there are bits to keep)**/ | 1865 | "jmp (2, %%pc, %%d0:l:2) \n" /* jump into full loop */ |
1868 | "cmp.l #8, %[dpth] \n" /* 8 planes or more left? */ | 1866 | "bra.s .wa_f1 \n" |
1869 | "bhs.s .wa_f8 \n" | ||
1870 | |||
1871 | "move.l %[psiz], %%d0 \n" | ||
1872 | "move.l %[dpth], %%d1 \n" | ||
1873 | "mulu.w %%d1, %%d0 \n" /* point behind the last plane */ | ||
1874 | "add.l %%d0, %[addr] \n" /* for this round */ | ||
1875 | "jmp (%%pc, %[dpth]:l:2) \n" /* jump into streak */ | ||
1876 | "bra.s .wa_f1 \n" /* dpth == 0 should never happen */ | ||
1877 | "bra.s .wa_f2 \n" | 1867 | "bra.s .wa_f2 \n" |
1878 | "bra.s .wa_f3 \n" | 1868 | "bra.s .wa_f3 \n" |
1879 | "bra.s .wa_f4 \n" | 1869 | "bra.s .wa_f4 \n" |
1880 | "bra.s .wa_f5 \n" | 1870 | "bra.s .wa_f5 \n" |
1881 | "bra.s .wa_f6 \n" | 1871 | "bra.s .wa_f6 \n" |
1882 | "bra.s .wa_f7 \n" | 1872 | "bra.s .wa_f7 \n" |
1873 | /* bra.s .wa_f8 */ /* identical with target */ | ||
1883 | 1874 | ||
1875 | ".wa_floop: \n" /** full loop (there are bits to keep)**/ | ||
1884 | ".wa_f8: \n" | 1876 | ".wa_f8: \n" |
1885 | "move.l %[psiz], %%d0 \n" | ||
1886 | "lsl.l #3, %%d0 \n" | ||
1887 | "add.l %%d0, %[addr] \n" | ||
1888 | /* Point behind the last plane for this round. Note: We're using the | ||
1889 | * registers backwards in order to reuse the streak for the last round. | ||
1890 | * Therefore we need to go thru the bitplanes backwards too, otherwise | ||
1891 | * the bit order would be destroyed which results in more flicker. */ | ||
1892 | "sub.l %[psiz], %[addr] \n" | ||
1893 | "move.b (%[addr]), %%d0 \n" /* load old byte */ | 1877 | "move.b (%[addr]), %%d0 \n" /* load old byte */ |
1894 | "and.l %%d5, %%d0 \n" /* mask out replaced bits */ | 1878 | "and.l %%d5, %%d0 \n" /* mask out replaced bits */ |
1895 | "move.l %[mask], %%d1 \n" | 1879 | "move.l %%a0, %%d1 \n" |
1896 | "or.l %%d1, %%d0 \n" /* set new bits */ | 1880 | "or.l %%d1, %%d0 \n" /* set new bits */ |
1897 | "move.b %%d0, (%[addr]) \n" /* store byte */ | 1881 | "move.b %%d0, (%[addr]) \n" /* store byte */ |
1882 | "add.l %[psiz], %[addr] \n" | ||
1898 | "lsr.l #8, %%d1 \n" /* shift out used-up byte */ | 1883 | "lsr.l #8, %%d1 \n" /* shift out used-up byte */ |
1899 | "move.l %%d1, %[mask] \n" | 1884 | "move.l %%d1, %%a0 \n" |
1900 | ".wa_f7: \n" | 1885 | ".wa_f7: \n" |
1901 | "sub.l %[psiz], %[addr] \n" | ||
1902 | "move.b (%[addr]), %%d0 \n" | 1886 | "move.b (%[addr]), %%d0 \n" |
1903 | "and.l %%d5, %%d0 \n" | 1887 | "and.l %%d5, %%d0 \n" |
1904 | "or.l %%d2, %%d0 \n" | 1888 | "or.l %%d7, %%d0 \n" |
1905 | "move.b %%d0, (%[addr]) \n" | 1889 | "move.b %%d0, (%[addr]) \n" |
1906 | "lsr.l #8, %%d2 \n" | 1890 | "add.l %[psiz], %[addr] \n" |
1891 | "lsr.l #8, %%d7 \n" | ||
1907 | ".wa_f6: \n" | 1892 | ".wa_f6: \n" |
1908 | "sub.l %[psiz], %[addr] \n" | ||
1909 | "move.b (%[addr]), %%d0 \n" | 1893 | "move.b (%[addr]), %%d0 \n" |
1910 | "and.l %%d5, %%d0 \n" | 1894 | "and.l %%d5, %%d0 \n" |
1911 | "or.l %%d3, %%d0 \n" | 1895 | "or.l %%d6, %%d0 \n" |
1912 | "move.b %%d0, (%[addr]) \n" | 1896 | "move.b %%d0, (%[addr]) \n" |
1913 | "lsr.l #8, %%d3 \n" | 1897 | "add.l %[psiz], %[addr] \n" |
1898 | "lsr.l #8, %%d6 \n" | ||
1914 | ".wa_f5: \n" | 1899 | ".wa_f5: \n" |
1915 | "sub.l %[psiz], %[addr] \n" | ||
1916 | "move.b (%[addr]), %%d0 \n" | ||
1917 | "and.l %%d5, %%d0 \n" | ||
1918 | "or.l %%d4, %%d0 \n" | ||
1919 | "move.b %%d0, (%[addr]) \n" | ||
1920 | "lsr.l #8, %%d4 \n" | ||
1921 | ".wa_f4: \n" | ||
1922 | "sub.l %[psiz], %[addr] \n" | ||
1923 | "move.b (%[addr]), %%d0 \n" | 1900 | "move.b (%[addr]), %%d0 \n" |
1924 | "and.l %%d5, %%d0 \n" | 1901 | "and.l %%d5, %%d0 \n" |
1925 | "move.l %[ax], %%d1 \n" | 1902 | "move.l %[ax], %%d1 \n" |
1926 | "or.l %%d1, %%d0 \n" | 1903 | "or.l %%d1, %%d0 \n" |
1927 | "move.b %%d0, (%[addr]) \n" | 1904 | "move.b %%d0, (%[addr]) \n" |
1905 | "add.l %[psiz], %[addr] \n" | ||
1928 | "lsr.l #8, %%d1 \n" | 1906 | "lsr.l #8, %%d1 \n" |
1929 | "move.l %%d1, %[ax] \n" | 1907 | "move.l %%d1, %[ax] \n" |
1908 | ".wa_f4: \n" | ||
1909 | "move.b (%[addr]), %%d0 \n" | ||
1910 | "and.l %%d5, %%d0 \n" | ||
1911 | "or.l %%d4, %%d0 \n" | ||
1912 | "move.b %%d0, (%[addr]) \n" | ||
1913 | "add.l %[psiz], %[addr] \n" | ||
1914 | "lsr.l #8, %%d4 \n" | ||
1930 | ".wa_f3: \n" | 1915 | ".wa_f3: \n" |
1931 | "sub.l %[psiz], %[addr] \n" | ||
1932 | "move.b (%[addr]), %%d0 \n" | 1916 | "move.b (%[addr]), %%d0 \n" |
1933 | "and.l %%d5, %%d0 \n" | 1917 | "and.l %%d5, %%d0 \n" |
1934 | "or.l %%d6, %%d0 \n" | 1918 | "or.l %%d3, %%d0 \n" |
1935 | "move.b %%d0, (%[addr]) \n" | 1919 | "move.b %%d0, (%[addr]) \n" |
1936 | "lsr.l #8, %%d6 \n" | 1920 | "add.l %[psiz], %[addr] \n" |
1921 | "lsr.l #8, %%d3 \n" | ||
1937 | ".wa_f2: \n" | 1922 | ".wa_f2: \n" |
1938 | "sub.l %[psiz], %[addr] \n" | ||
1939 | "move.b (%[addr]), %%d0 \n" | 1923 | "move.b (%[addr]), %%d0 \n" |
1940 | "and.l %%d5, %%d0 \n" | 1924 | "and.l %%d5, %%d0 \n" |
1941 | "or.l %%d7, %%d0 \n" | 1925 | "or.l %%d2, %%d0 \n" |
1942 | "move.b %%d0, (%[addr]) \n" | 1926 | "move.b %%d0, (%[addr]) \n" |
1943 | "lsr.l #8, %%d7 \n" | 1927 | "add.l %[psiz], %[addr] \n" |
1928 | "lsr.l #8, %%d2 \n" | ||
1944 | ".wa_f1: \n" | 1929 | ".wa_f1: \n" |
1945 | "sub.l %[psiz], %[addr] \n" | ||
1946 | "move.b (%[addr]), %%d0 \n" | 1930 | "move.b (%[addr]), %%d0 \n" |
1947 | "and.l %%d5, %%d0 \n" | 1931 | "and.l %%d5, %%d0 \n" |
1948 | "move.l %%a0, %%d1 \n" | 1932 | "move.l %[mask], %%d1 \n" |
1949 | "or.l %%d1, %%d0 \n" | 1933 | "or.l %%d1, %%d0 \n" |
1950 | "move.b %%d0, (%[addr]) \n" | 1934 | "move.b %%d0, (%[addr]) \n" |
1935 | "add.l %[psiz], %[addr] \n" | ||
1951 | "lsr.l #8, %%d1 \n" | 1936 | "lsr.l #8, %%d1 \n" |
1952 | "move.l %%d1, %%a0 \n" | 1937 | "move.l %%d1, %[mask] \n" |
1953 | 1938 | ||
1954 | "move.l %[psiz], %%d0 \n" | ||
1955 | "lsl.l #3, %%d0 \n" | ||
1956 | "add.l %%d0, %[addr] \n" /* correct address */ | ||
1957 | "subq.l #8, %[dpth] \n" | 1939 | "subq.l #8, %[dpth] \n" |
1958 | "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ | 1940 | "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ |
1959 | "jgt .wa_floop \n" /* next round if anything left */ | 1941 | "jgt .wa_floop \n" /* next round if anything left */ |
1960 | 1942 | ||
1961 | "jra .wa_end \n" | 1943 | "jra .wa_end \n" |
1962 | 1944 | ||
1963 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ | 1945 | ".wa_sstart: \n" |
1964 | "cmp.l #8, %[dpth] \n" /* 8 planes or more left? */ | 1946 | "jmp (2, %%pc, %%d0:l:2) \n" /* jump into short loop */ |
1965 | "bhs.s .wa_s8 \n" | 1947 | "bra.s .wa_s1 \n" |
1966 | |||
1967 | "move.l %[psiz], %%d0 \n" | ||
1968 | "move.l %[dpth], %%d5 \n" | ||
1969 | "mulu.w %%d5, %%d0 \n" /* point behind the last plane */ | ||
1970 | "add.l %%d0, %[addr] \n" /* for this round */ | ||
1971 | "jmp (%%pc, %[dpth]:l:2) \n" /* jump into streak */ | ||
1972 | "bra.s .wa_s1 \n" /* dpth == 0 should never happen */ | ||
1973 | "bra.s .wa_s2 \n" | 1948 | "bra.s .wa_s2 \n" |
1974 | "bra.s .wa_s3 \n" | 1949 | "bra.s .wa_s3 \n" |
1975 | "bra.s .wa_s4 \n" | 1950 | "bra.s .wa_s4 \n" |
1976 | "bra.s .wa_s5 \n" | 1951 | "bra.s .wa_s5 \n" |
1977 | "bra.s .wa_s6 \n" | 1952 | "bra.s .wa_s6 \n" |
1978 | "bra.s .wa_s7 \n" | 1953 | "bra.s .wa_s7 \n" |
1954 | /* bra.s .wa_s8 */ /* identical with target */ | ||
1979 | 1955 | ||
1956 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ | ||
1980 | ".wa_s8: \n" | 1957 | ".wa_s8: \n" |
1981 | "move.l %[psiz], %%d0 \n" /* Point behind the last plane */ | 1958 | "move.l %%a0, %%d5 \n" |
1982 | "lsl.l #3, %%d0 \n" /* for this round. */ | 1959 | "move.b %%d5, (%[addr]) \n" /* store byte */ |
1983 | "add.l %%d0, %[addr] \n" /* See above. */ | 1960 | "add.l %[psiz], %[addr] \n" |
1984 | 1961 | "lsr.l #8, %%d5 \n" /* shift out used-up byte */ | |
1985 | "sub.l %[psiz], %[addr] \n" | 1962 | "move.l %%d5, %%a0 \n" |
1986 | "move.b %%d1, (%[addr]) \n" /* store byte */ | ||
1987 | "lsr.l #8, %%d1 \n" /* shift out used-up byte */ | ||
1988 | ".wa_s7: \n" | 1963 | ".wa_s7: \n" |
1989 | "sub.l %[psiz], %[addr] \n" | 1964 | "move.b %%d7, (%[addr]) \n" |
1990 | "move.b %%d2, (%[addr]) \n" | 1965 | "add.l %[psiz], %[addr] \n" |
1991 | "lsr.l #8, %%d2 \n" | 1966 | "lsr.l #8, %%d7 \n" |
1992 | ".wa_s6: \n" | 1967 | ".wa_s6: \n" |
1993 | "sub.l %[psiz], %[addr] \n" | 1968 | "move.b %%d6, (%[addr]) \n" |
1994 | "move.b %%d3, (%[addr]) \n" | 1969 | "add.l %[psiz], %[addr] \n" |
1995 | "lsr.l #8, %%d3 \n" | 1970 | "lsr.l #8, %%d6 \n" |
1996 | ".wa_s5: \n" | 1971 | ".wa_s5: \n" |
1997 | "sub.l %[psiz], %[addr] \n" | ||
1998 | "move.b %%d4, (%[addr]) \n" | ||
1999 | "lsr.l #8, %%d4 \n" | ||
2000 | ".wa_s4: \n" | ||
2001 | "sub.l %[psiz], %[addr] \n" | ||
2002 | "move.l %[ax], %%d5 \n" | 1972 | "move.l %[ax], %%d5 \n" |
2003 | "move.b %%d5, (%[addr]) \n" | 1973 | "move.b %%d5, (%[addr]) \n" |
1974 | "add.l %[psiz], %[addr] \n" | ||
2004 | "lsr.l #8, %%d5 \n" | 1975 | "lsr.l #8, %%d5 \n" |
2005 | "move.l %%d5, %[ax] \n" | 1976 | "move.l %%d5, %[ax] \n" |
1977 | ".wa_s4: \n" | ||
1978 | "move.b %%d4, (%[addr]) \n" | ||
1979 | "add.l %[psiz], %[addr] \n" | ||
1980 | "lsr.l #8, %%d4 \n" | ||
2006 | ".wa_s3: \n" | 1981 | ".wa_s3: \n" |
2007 | "sub.l %[psiz], %[addr] \n" | 1982 | "move.b %%d3, (%[addr]) \n" |
2008 | "move.b %%d6, (%[addr]) \n" | 1983 | "add.l %[psiz], %[addr] \n" |
2009 | "lsr.l #8, %%d6 \n" | 1984 | "lsr.l #8, %%d3 \n" |
2010 | ".wa_s2: \n" | 1985 | ".wa_s2: \n" |
2011 | "sub.l %[psiz], %[addr] \n" | 1986 | "move.b %%d2, (%[addr]) \n" |
2012 | "move.b %%d7, (%[addr]) \n" | 1987 | "add.l %[psiz], %[addr] \n" |
2013 | "lsr.l #8, %%d7 \n" | 1988 | "lsr.l #8, %%d2 \n" |
2014 | ".wa_s1: \n" | 1989 | ".wa_s1: \n" |
2015 | "sub.l %[psiz], %[addr] \n" | 1990 | "move.b %%d1, (%[addr]) \n" |
2016 | "move.l %%a0, %%d5 \n" | 1991 | "add.l %[psiz], %[addr] \n" |
2017 | "move.b %%d5, (%[addr]) \n" | 1992 | "lsr.l #8, %%d1 \n" |
2018 | "lsr.l #8, %%d5 \n" | ||
2019 | "move.l %%d5, %%a0 \n" | ||
2020 | 1993 | ||
2021 | "add.l %%d0, %[addr] \n" /* correct address */ | ||
2022 | "subq.l #8, %[dpth] \n" | 1994 | "subq.l #8, %[dpth] \n" |
2023 | "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ | 1995 | "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ |
2024 | "jgt .wa_sloop \n" /* next round if anything left */ | 1996 | "jgt .wa_sloop \n" /* next round if anything left */ |
@@ -2071,7 +2043,7 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
2071 | 2043 | ||
2072 | /* set the bits for all 8 pixels in all bytes according to the | 2044 | /* set the bits for all 8 pixels in all bytes according to the |
2073 | * precalculated patterns on the pattern stack */ | 2045 | * precalculated patterns on the pattern stack */ |
2074 | test = 1; | 2046 | test = 1 << ((-_gray_info.depth) & 7); |
2075 | mask = (~mask & 0xff); | 2047 | mask = (~mask & 0xff); |
2076 | if (mask == 0) | 2048 | if (mask == 0) |
2077 | { | 2049 | { |