diff options
author | Jens Arnold <amiconn@rockbox.org> | 2006-08-11 23:40:05 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2006-08-11 23:40:05 +0000 |
commit | e6ed58f6c545719ed804c9ad513496f8d0fa0286 (patch) | |
tree | db096eb64de3ec7c33682c5bb50579784c5ca633 /apps/plugins/lib/gray_core.c | |
parent | 22860a910a7ddbb9b811fce4cf971e097c8e6eac (diff) | |
download | rockbox-e6ed58f6c545719ed804c9ad513496f8d0fa0286.tar.gz rockbox-e6ed58f6c545719ed804c9ad513496f8d0fa0286.zip |
Better implementation of the new greyscale algorithm. A bit faster (+2% on iPod and Archos, +7% on Iriver H1x0), and more straightforward. * Fixed garbage display when a plugin switches on the overlay before drawing anything.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10534 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/plugins/lib/gray_core.c')
-rw-r--r-- | apps/plugins/lib/gray_core.c | 501 |
1 files changed, 238 insertions, 263 deletions
diff --git a/apps/plugins/lib/gray_core.c b/apps/plugins/lib/gray_core.c index 809e88dba1..413b66c65d 100644 --- a/apps/plugins/lib/gray_core.c +++ b/apps/plugins/lib/gray_core.c | |||
@@ -348,7 +348,7 @@ int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size, | |||
348 | long plane_size, buftaken; | 348 | long plane_size, buftaken; |
349 | unsigned data; | 349 | unsigned data; |
350 | #ifndef SIMULATOR | 350 | #ifndef SIMULATOR |
351 | int j; | 351 | int j, bitfill; |
352 | #endif | 352 | #endif |
353 | 353 | ||
354 | _gray_rb = newrb; | 354 | _gray_rb = newrb; |
@@ -439,6 +439,7 @@ int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size, | |||
439 | _gray_info.cur_plane = 0; | 439 | _gray_info.cur_plane = 0; |
440 | _gray_info.plane_size = plane_size; | 440 | _gray_info.plane_size = plane_size; |
441 | _gray_info.plane_data = gbuf; | 441 | _gray_info.plane_data = gbuf; |
442 | _gray_rb->memset(gbuf, 0, depth * plane_size); | ||
442 | gbuf += depth * plane_size; | 443 | gbuf += depth * plane_size; |
443 | _gray_info.bitpattern = (unsigned long *)gbuf; | 444 | _gray_info.bitpattern = (unsigned long *)gbuf; |
444 | 445 | ||
@@ -449,7 +450,8 @@ int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size, | |||
449 | i >>= 1; | 450 | i >>= 1; |
450 | j--; | 451 | j--; |
451 | } | 452 | } |
452 | _gray_info.randmask = 0xFFu >> j; | 453 | _gray_info.randmask = 0xFFu >> j; |
454 | bitfill = (-depth) & 7; | ||
453 | 455 | ||
454 | /* Precalculate the bit patterns for all possible pixel values */ | 456 | /* Precalculate the bit patterns for all possible pixel values */ |
455 | for (i = 0; i <= depth; i++) | 457 | for (i = 0; i <= depth; i++) |
@@ -469,7 +471,7 @@ int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size, | |||
469 | } | 471 | } |
470 | /* now the lower <depth> bits contain the pattern */ | 472 | /* now the lower <depth> bits contain the pattern */ |
471 | 473 | ||
472 | _gray_info.bitpattern[i] = pattern; | 474 | _gray_info.bitpattern[i] = pattern << bitfill; |
473 | } | 475 | } |
474 | #endif | 476 | #endif |
475 | 477 | ||
@@ -797,101 +799,93 @@ void gray_update_rect(int x, int y, int width, int height) | |||
797 | "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ | 799 | "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ |
798 | "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ | 800 | "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ |
799 | 801 | ||
802 | "sub r0, %[dpth], #1 \n" /** shift out unused low bytes **/ | ||
803 | "and r0, r0, #7 \n" | ||
804 | "add pc, pc, r0, lsl #2 \n" /* jump into shift streak */ | ||
805 | "mov r8, r8, lsr #8 \n" /* r8: never reached */ | ||
806 | "mov r7, r7, lsr #8 \n" | ||
807 | "mov r6, r6, lsr #8 \n" | ||
808 | "mov r5, r5, lsr #8 \n" | ||
809 | "mov r4, r4, lsr #8 \n" | ||
810 | "mov r3, r3, lsr #8 \n" | ||
811 | "mov r2, r2, lsr #8 \n" | ||
812 | "mov r1, r1, lsr #8 \n" | ||
813 | |||
800 | "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ | 814 | "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ |
801 | "ands %[mask], %[mask], #0xff \n" | 815 | "ands %[mask], %[mask], #0xff \n" |
802 | "beq .ur_sloop \n" /* short loop if no bits to keep */ | 816 | "beq .ur_sstart \n" /* short loop if no bits to keep */ |
803 | |||
804 | ".ur_floop: \n" /** full loop (bits to keep)**/ | ||
805 | "cmp %[dpth], #8 \n" /* 8 planes or more left? */ | ||
806 | "bhs .ur_f8 \n" | ||
807 | 817 | ||
808 | "mul r0, %[psiz], %[dpth] \n" /* point behind the last plane */ | 818 | "ldrb r0, [pc, r0] \n" /* jump into full loop */ |
809 | "add %[addr], %[addr], r0 \n" /* for this round */ | ||
810 | |||
811 | "ldrb r0, [pc, %[dpth]] \n" /* jump into streak */ | ||
812 | "add pc, pc, r0 \n" | 819 | "add pc, pc, r0 \n" |
813 | ".ur_ftable: \n" | 820 | ".ur_ftable: \n" |
814 | ".byte .ur_f0 - .ur_ftable - 4 \n" /* [jump tables are tricky] */ | 821 | ".byte .ur_f1 - .ur_ftable - 4 \n" /* [jump tables are tricky] */ |
815 | ".byte .ur_f1 - .ur_ftable - 4 \n" | ||
816 | ".byte .ur_f2 - .ur_ftable - 4 \n" | 822 | ".byte .ur_f2 - .ur_ftable - 4 \n" |
817 | ".byte .ur_f3 - .ur_ftable - 4 \n" | 823 | ".byte .ur_f3 - .ur_ftable - 4 \n" |
818 | ".byte .ur_f4 - .ur_ftable - 4 \n" | 824 | ".byte .ur_f4 - .ur_ftable - 4 \n" |
819 | ".byte .ur_f5 - .ur_ftable - 4 \n" | 825 | ".byte .ur_f5 - .ur_ftable - 4 \n" |
820 | ".byte .ur_f6 - .ur_ftable - 4 \n" | 826 | ".byte .ur_f6 - .ur_ftable - 4 \n" |
821 | ".byte .ur_f7 - .ur_ftable - 4 \n" | 827 | ".byte .ur_f7 - .ur_ftable - 4 \n" |
828 | ".byte .ur_f8 - .ur_ftable - 4 \n" | ||
822 | 829 | ||
830 | ".ur_floop: \n" /** full loop (bits to keep)**/ | ||
823 | ".ur_f8: \n" | 831 | ".ur_f8: \n" |
824 | "add %[addr], %[addr], %[psiz], lsl #3 \n" | 832 | "ldrb r0, [%[addr]] \n" /* load old byte */ |
825 | /* Point behind the last plane for this round. Note: We're using the | ||
826 | * registers backwards in order to reuse the streak for the last round. | ||
827 | * Therefore we need to go thru the bitplanes backwards too, otherwise | ||
828 | * the bit order would be destroyed which results in more flicker. */ | ||
829 | "ldrb r0, [%[addr], -%[psiz]]! \n" /* load old byte */ | ||
830 | "and r0, r0, %[mask] \n" /* mask out replaced bits */ | 833 | "and r0, r0, %[mask] \n" /* mask out replaced bits */ |
831 | "orr r0, r0, r8 \n" /* set new bits */ | 834 | "orr r0, r0, r1 \n" /* set new bits */ |
832 | "strb r0, [%[addr]] \n" /* store byte */ | 835 | "strb r0, [%[addr]], %[psiz] \n" /* store byte */ |
833 | "mov r8, r8, lsr #8 \n" /* shift out used-up byte */ | 836 | "mov r1, r1, lsr #8 \n" /* shift out used-up byte */ |
834 | ".ur_f7: \n" | 837 | ".ur_f7: \n" |
835 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 838 | "ldrb r0, [%[addr]] \n" |
836 | "and r0, r0, %[mask] \n" | 839 | "and r0, r0, %[mask] \n" |
837 | "orr r0, r0, r7 \n" | 840 | "orr r0, r0, r2 \n" |
838 | "strb r0, [%[addr]] \n" | 841 | "strb r0, [%[addr]], %[psiz] \n" |
839 | "mov r7, r7, lsr #8 \n" | 842 | "mov r2, r2, lsr #8 \n" |
840 | ".ur_f6: \n" | 843 | ".ur_f6: \n" |
841 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 844 | "ldrb r0, [%[addr]] \n" |
842 | "and r0, r0, %[mask] \n" | 845 | "and r0, r0, %[mask] \n" |
843 | "orr r0, r0, r6 \n" | 846 | "orr r0, r0, r3 \n" |
844 | "strb r0, [%[addr]] \n" | 847 | "strb r0, [%[addr]], %[psiz] \n" |
845 | "mov r6, r6, lsr #8 \n" | 848 | "mov r3, r3, lsr #8 \n" |
846 | ".ur_f5: \n" | 849 | ".ur_f5: \n" |
847 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 850 | "ldrb r0, [%[addr]] \n" |
848 | "and r0, r0, %[mask] \n" | ||
849 | "orr r0, r0, r5 \n" | ||
850 | "strb r0, [%[addr]] \n" | ||
851 | "mov r5, r5, lsr #8 \n" | ||
852 | ".ur_f4: \n" | ||
853 | "ldrb r0, [%[addr], -%[psiz]]! \n" | ||
854 | "and r0, r0, %[mask] \n" | 851 | "and r0, r0, %[mask] \n" |
855 | "orr r0, r0, r4 \n" | 852 | "orr r0, r0, r4 \n" |
856 | "strb r0, [%[addr]] \n" | 853 | "strb r0, [%[addr]], %[psiz] \n" |
857 | "mov r4, r4, lsr #8 \n" | 854 | "mov r4, r4, lsr #8 \n" |
855 | ".ur_f4: \n" | ||
856 | "ldrb r0, [%[addr]] \n" | ||
857 | "and r0, r0, %[mask] \n" | ||
858 | "orr r0, r0, r5 \n" | ||
859 | "strb r0, [%[addr]], %[psiz] \n" | ||
860 | "mov r5, r5, lsr #8 \n" | ||
858 | ".ur_f3: \n" | 861 | ".ur_f3: \n" |
859 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 862 | "ldrb r0, [%[addr]] \n" |
860 | "and r0, r0, %[mask] \n" | 863 | "and r0, r0, %[mask] \n" |
861 | "orr r0, r0, r3 \n" | 864 | "orr r0, r0, r6 \n" |
862 | "strb r0, [%[addr]] \n" | 865 | "strb r0, [%[addr]], %[psiz] \n" |
863 | "mov r3, r3, lsr #8 \n" | 866 | "mov r6, r6, lsr #8 \n" |
864 | ".ur_f2: \n" | 867 | ".ur_f2: \n" |
865 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 868 | "ldrb r0, [%[addr]] \n" |
866 | "and r0, r0, %[mask] \n" | 869 | "and r0, r0, %[mask] \n" |
867 | "orr r0, r0, r2 \n" | 870 | "orr r0, r0, r7 \n" |
868 | "strb r0, [%[addr]] \n" | 871 | "strb r0, [%[addr]], %[psiz] \n" |
869 | "mov r2, r2, lsr #8 \n" | 872 | "mov r7, r7, lsr #8 \n" |
870 | ".ur_f1: \n" | 873 | ".ur_f1: \n" |
871 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 874 | "ldrb r0, [%[addr]] \n" |
872 | "and r0, r0, %[mask] \n" | 875 | "and r0, r0, %[mask] \n" |
873 | "orr r0, r0, r1 \n" | 876 | "orr r0, r0, r8 \n" |
874 | "strb r0, [%[addr]] \n" | 877 | "strb r0, [%[addr]], %[psiz] \n" |
875 | "mov r1, r1, lsr #8 \n" | 878 | "mov r8, r8, lsr #8 \n" |
876 | ".ur_f0: \n" | ||
877 | 879 | ||
878 | "add %[addr], %[addr], %[psiz], lsl #3 \n" /* correct address */ | ||
879 | "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ | 880 | "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ |
880 | "bhi .ur_floop \n" | 881 | "bhi .ur_floop \n" |
881 | 882 | ||
882 | "b .ur_end \n" | 883 | "b .ur_end \n" |
883 | 884 | ||
884 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | 885 | ".ur_sstart: \n" |
885 | "cmp %[dpth], #8 \n" /* 8 planes or more left? */ | 886 | "ldrb r0, [pc, r0] \n" /* jump into short loop*/ |
886 | "bhs .ur_s8 \n" | ||
887 | |||
888 | "mul r0, %[psiz], %[dpth] \n" /* point behind the last plane */ | ||
889 | "add %[addr], %[addr], r0 \n" /* for this round */ | ||
890 | |||
891 | "ldrb r0, [pc, %[dpth]] \n" /* jump into streak */ | ||
892 | "add pc, pc, r0 \n" | 887 | "add pc, pc, r0 \n" |
893 | ".ur_stable: \n" | 888 | ".ur_stable: \n" |
894 | ".byte .ur_s0 - .ur_stable - 4 \n" | ||
895 | ".byte .ur_s1 - .ur_stable - 4 \n" | 889 | ".byte .ur_s1 - .ur_stable - 4 \n" |
896 | ".byte .ur_s2 - .ur_stable - 4 \n" | 890 | ".byte .ur_s2 - .ur_stable - 4 \n" |
897 | ".byte .ur_s3 - .ur_stable - 4 \n" | 891 | ".byte .ur_s3 - .ur_stable - 4 \n" |
@@ -899,36 +893,34 @@ void gray_update_rect(int x, int y, int width, int height) | |||
899 | ".byte .ur_s5 - .ur_stable - 4 \n" | 893 | ".byte .ur_s5 - .ur_stable - 4 \n" |
900 | ".byte .ur_s6 - .ur_stable - 4 \n" | 894 | ".byte .ur_s6 - .ur_stable - 4 \n" |
901 | ".byte .ur_s7 - .ur_stable - 4 \n" | 895 | ".byte .ur_s7 - .ur_stable - 4 \n" |
896 | ".byte .ur_s8 - .ur_stable - 4 \n" | ||
902 | 897 | ||
898 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | ||
903 | ".ur_s8: \n" | 899 | ".ur_s8: \n" |
904 | "add %[addr], %[addr], %[psiz], lsl #3 \n" | 900 | "strb r1, [%[addr]], %[psiz] \n" /* store byte */ |
905 | /* Point behind the last plane for this round. See above. */ | 901 | "mov r1, r1, lsr #8 \n" /* shift out used-up byte */ |
906 | "strb r8, [%[addr], -%[psiz]]! \n" /* store byte */ | ||
907 | "mov r8, r8, lsr #8 \n" /* shift out used-up byte */ | ||
908 | ".ur_s7: \n" | 902 | ".ur_s7: \n" |
909 | "strb r7, [%[addr], -%[psiz]]! \n" | 903 | "strb r2, [%[addr]], %[psiz] \n" |
910 | "mov r7, r7, lsr #8 \n" | 904 | "mov r2, r2, lsr #8 \n" |
911 | ".ur_s6: \n" | 905 | ".ur_s6: \n" |
912 | "strb r6, [%[addr], -%[psiz]]! \n" | 906 | "strb r3, [%[addr]], %[psiz] \n" |
913 | "mov r6, r6, lsr #8 \n" | 907 | "mov r3, r3, lsr #8 \n" |
914 | ".ur_s5: \n" | 908 | ".ur_s5: \n" |
915 | "strb r5, [%[addr], -%[psiz]]! \n" | 909 | "strb r4, [%[addr]], %[psiz] \n" |
916 | "mov r5, r5, lsr #8 \n" | ||
917 | ".ur_s4: \n" | ||
918 | "strb r4, [%[addr], -%[psiz]]! \n" | ||
919 | "mov r4, r4, lsr #8 \n" | 910 | "mov r4, r4, lsr #8 \n" |
911 | ".ur_s4: \n" | ||
912 | "strb r5, [%[addr]], %[psiz] \n" | ||
913 | "mov r5, r5, lsr #8 \n" | ||
920 | ".ur_s3: \n" | 914 | ".ur_s3: \n" |
921 | "strb r3, [%[addr], -%[psiz]]! \n" | 915 | "strb r6, [%[addr]], %[psiz] \n" |
922 | "mov r3, r3, lsr #8 \n" | 916 | "mov r6, r6, lsr #8 \n" |
923 | ".ur_s2: \n" | 917 | ".ur_s2: \n" |
924 | "strb r2, [%[addr], -%[psiz]]! \n" | 918 | "strb r7, [%[addr]], %[psiz] \n" |
925 | "mov r2, r2, lsr #8 \n" | 919 | "mov r7, r7, lsr #8 \n" |
926 | ".ur_s1: \n" | 920 | ".ur_s1: \n" |
927 | "strb r1, [%[addr], -%[psiz]]! \n" | 921 | "strb r8, [%[addr]], %[psiz] \n" |
928 | "mov r1, r1, lsr #8 \n" | 922 | "mov r8, r8, lsr #8 \n" |
929 | ".ur_s0: \n" | ||
930 | 923 | ||
931 | "add %[addr], %[addr], %[psiz], lsl #3 \n" /* correct address */ | ||
932 | "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ | 924 | "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ |
933 | "bhi .ur_sloop \n" | 925 | "bhi .ur_sloop \n" |
934 | 926 | ||
@@ -956,7 +948,7 @@ void gray_update_rect(int x, int y, int width, int height) | |||
956 | { | 948 | { |
957 | unsigned char *addr, *end; | 949 | unsigned char *addr, *end; |
958 | unsigned mask = 0; | 950 | unsigned mask = 0; |
959 | unsigned test = 1; | 951 | unsigned test = 1 << ((-_gray_info.depth) & 7); |
960 | int i; | 952 | int i; |
961 | 953 | ||
962 | /* precalculate the bit patterns with random shifts | 954 | /* precalculate the bit patterns with random shifts |
@@ -1287,25 +1279,37 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1287 | "shlr r0 \n" | 1279 | "shlr r0 \n" |
1288 | "xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ | 1280 | "xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ |
1289 | 1281 | ||
1290 | "tst %[mask], %[mask] \n" | 1282 | "mov %[dpth], %[rx] \n" /** shift out unused low bytes **/ |
1291 | "bt .ur_sloop \n" /* short loop if nothing to keep */ | 1283 | "add #-1, %[rx] \n" |
1284 | "mov #7, r0 \n" | ||
1285 | "and r0, %[rx] \n" | ||
1286 | "mova .ur_pshift, r0 \n" | ||
1287 | "add %[rx], r0 \n" | ||
1288 | "add %[rx], r0 \n" | ||
1289 | "jmp @r0 \n" /* jump into shift streak */ | ||
1290 | "nop \n" | ||
1292 | 1291 | ||
1293 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ | 1292 | ".align 2 \n" |
1294 | "mov #8, r0 \n" | 1293 | ".ur_pshift: \n" |
1295 | "cmp/hs r0, %[dpth] \n" /* 8 planes or more left? */ | 1294 | "shlr8 r7 \n" |
1296 | "bt .ur_f8 \n" | 1295 | "shlr8 r6 \n" |
1296 | "shlr8 r5 \n" | ||
1297 | "shlr8 r4 \n" | ||
1298 | "shlr8 r3 \n" | ||
1299 | "shlr8 r2 \n" | ||
1300 | "shlr8 r1 \n" | ||
1301 | |||
1302 | "tst %[mask], %[mask] \n" | ||
1303 | "bt .ur_sstart \n" /* short loop if nothing to keep */ | ||
1297 | 1304 | ||
1298 | "mulu %[psiz], %[dpth] \n" | 1305 | "mova .ur_ftable, r0 \n" /* jump into full loop */ |
1299 | "mova .ur_ftable, r0 \n" | 1306 | "mov.b @(r0, %[rx]), %[rx] \n" |
1300 | "mov.b @(r0, %[dpth]), %[rx] \n" | ||
1301 | "add %[rx], r0 \n" | 1307 | "add %[rx], r0 \n" |
1302 | "sts macl, %[rx] \n" /* point behind the last plane.. */ | 1308 | "jmp @r0 \n" |
1303 | "jmp @r0 \n" /* jump into streak */ | 1309 | "nop \n" |
1304 | "add %[rx], %[addr] \n" /* ..for this round */ | 1310 | |
1305 | |||
1306 | ".align 2 \n" | 1311 | ".align 2 \n" |
1307 | ".ur_ftable: \n" | 1312 | ".ur_ftable: \n" |
1308 | ".byte .ur_f0 - .ur_ftable \n" | ||
1309 | ".byte .ur_f1 - .ur_ftable \n" | 1313 | ".byte .ur_f1 - .ur_ftable \n" |
1310 | ".byte .ur_f2 - .ur_ftable \n" | 1314 | ".byte .ur_f2 - .ur_ftable \n" |
1311 | ".byte .ur_f3 - .ur_ftable \n" | 1315 | ".byte .ur_f3 - .ur_ftable \n" |
@@ -1313,74 +1317,66 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1313 | ".byte .ur_f5 - .ur_ftable \n" | 1317 | ".byte .ur_f5 - .ur_ftable \n" |
1314 | ".byte .ur_f6 - .ur_ftable \n" | 1318 | ".byte .ur_f6 - .ur_ftable \n" |
1315 | ".byte .ur_f7 - .ur_ftable \n" | 1319 | ".byte .ur_f7 - .ur_ftable \n" |
1320 | ".byte .ur_f8 - .ur_ftable \n" | ||
1316 | 1321 | ||
1322 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ | ||
1317 | ".ur_f8: \n" | 1323 | ".ur_f8: \n" |
1318 | "mov %[psiz], %[rx] \n" | ||
1319 | "shll2 %[rx] \n" | ||
1320 | "add %[rx], %[rx] \n" | ||
1321 | "add %[rx], %[addr] \n" | ||
1322 | /* Point behind the last plane for this round. Note: We're using the | ||
1323 | * registers backwards in order to reuse the streak for the last round. | ||
1324 | * Therefore we need to go thru the bitplanes backwards too, otherwise | ||
1325 | * the bit order would be destroyed which results in more flicker. */ | ||
1326 | "sub %[psiz], %[addr] \n" | ||
1327 | "mov.b @%[addr], r0 \n" /* load old byte */ | 1324 | "mov.b @%[addr], r0 \n" /* load old byte */ |
1328 | "and %[mask], r0 \n" /* mask out replaced bits */ | 1325 | "and %[mask], r0 \n" /* mask out replaced bits */ |
1329 | "or r8, r0 \n" /* set new bits */ | 1326 | "or r1, r0 \n" /* set new bits */ |
1330 | "mov.b r0, @%[addr] \n" /* store byte */ | 1327 | "mov.b r0, @%[addr] \n" /* store byte */ |
1331 | "shlr8 r8 \n" /* shift out used-up byte */ | 1328 | "add %[psiz], %[addr] \n" |
1329 | "shlr8 r1 \n" /* shift out used-up byte */ | ||
1332 | ".ur_f7: \n" | 1330 | ".ur_f7: \n" |
1333 | "sub %[psiz], %[addr] \n" | ||
1334 | "mov.b @%[addr], r0 \n" | 1331 | "mov.b @%[addr], r0 \n" |
1335 | "and %[mask], r0 \n" | 1332 | "and %[mask], r0 \n" |
1336 | "or r7, r0 \n" | 1333 | "or r2, r0 \n" |
1337 | "mov.b r0, @%[addr] \n" | 1334 | "mov.b r0, @%[addr] \n" |
1338 | "shlr8 r7 \n" | 1335 | "add %[psiz], %[addr] \n" |
1336 | "shlr8 r2 \n" | ||
1339 | ".ur_f6: \n" | 1337 | ".ur_f6: \n" |
1340 | "sub %[psiz], %[addr] \n" | ||
1341 | "mov.b @%[addr], r0 \n" | 1338 | "mov.b @%[addr], r0 \n" |
1342 | "and %[mask], r0 \n" | 1339 | "and %[mask], r0 \n" |
1343 | "or r6, r0 \n" | 1340 | "or r3, r0 \n" |
1344 | "mov.b r0, @%[addr] \n" | 1341 | "mov.b r0, @%[addr] \n" |
1345 | "shlr8 r6 \n" | 1342 | "add %[psiz], %[addr] \n" |
1343 | "shlr8 r3 \n" | ||
1346 | ".ur_f5: \n" | 1344 | ".ur_f5: \n" |
1347 | "sub %[psiz], %[addr] \n" | ||
1348 | "mov.b @%[addr], r0 \n" | 1345 | "mov.b @%[addr], r0 \n" |
1349 | "and %[mask], r0 \n" | 1346 | "and %[mask], r0 \n" |
1350 | "or r5, r0 \n" | 1347 | "or r4, r0 \n" |
1351 | "mov.b r0, @%[addr] \n" | 1348 | "mov.b r0, @%[addr] \n" |
1352 | "shlr8 r5 \n" | 1349 | "add %[psiz], %[addr] \n" |
1350 | "shlr8 r4 \n" | ||
1353 | ".ur_f4: \n" | 1351 | ".ur_f4: \n" |
1354 | "sub %[psiz], %[addr] \n" | ||
1355 | "mov.b @%[addr], r0 \n" | 1352 | "mov.b @%[addr], r0 \n" |
1356 | "and %[mask], r0 \n" | 1353 | "and %[mask], r0 \n" |
1357 | "or r4, r0 \n" | 1354 | "or r5, r0 \n" |
1358 | "mov.b r0, @%[addr] \n" | 1355 | "mov.b r0, @%[addr] \n" |
1359 | "shlr8 r4 \n" | 1356 | "add %[psiz], %[addr] \n" |
1357 | "shlr8 r5 \n" | ||
1360 | ".ur_f3: \n" | 1358 | ".ur_f3: \n" |
1361 | "sub %[psiz], %[addr] \n" | ||
1362 | "mov.b @%[addr], r0 \n" | 1359 | "mov.b @%[addr], r0 \n" |
1363 | "and %[mask], r0 \n" | 1360 | "and %[mask], r0 \n" |
1364 | "or r3, r0 \n" | 1361 | "or r6, r0 \n" |
1365 | "mov.b r0, @%[addr] \n" | 1362 | "mov.b r0, @%[addr] \n" |
1366 | "shlr8 r3 \n" | 1363 | "add %[psiz], %[addr] \n" |
1364 | "shlr8 r6 \n" | ||
1367 | ".ur_f2: \n" | 1365 | ".ur_f2: \n" |
1368 | "sub %[psiz], %[addr] \n" | ||
1369 | "mov.b @%[addr], r0 \n" | 1366 | "mov.b @%[addr], r0 \n" |
1370 | "and %[mask], r0 \n" | 1367 | "and %[mask], r0 \n" |
1371 | "or r2, r0 \n" | 1368 | "or r7, r0 \n" |
1372 | "mov.b r0, @%[addr] \n" | 1369 | "mov.b r0, @%[addr] \n" |
1373 | "shlr8 r2 \n" | 1370 | "add %[psiz], %[addr] \n" |
1371 | "shlr8 r7 \n" | ||
1374 | ".ur_f1: \n" | 1372 | ".ur_f1: \n" |
1375 | "sub %[psiz], %[addr] \n" | ||
1376 | "mov.b @%[addr], r0 \n" | 1373 | "mov.b @%[addr], r0 \n" |
1377 | "and %[mask], r0 \n" | 1374 | "and %[mask], r0 \n" |
1378 | "or r1, r0 \n" | 1375 | "or r8, r0 \n" |
1379 | "mov.b r0, @%[addr] \n" | 1376 | "mov.b r0, @%[addr] \n" |
1380 | "shlr8 r1 \n" | 1377 | "add %[psiz], %[addr] \n" |
1381 | ".ur_f0: \n" | 1378 | "shlr8 r8 \n" |
1382 | 1379 | ||
1383 | "add %[rx], %[addr] \n" /* correct address */ | ||
1384 | "add #-8, %[dpth] \n" | 1380 | "add #-8, %[dpth] \n" |
1385 | "cmp/pl %[dpth] \n" /* next round if anything left */ | 1381 | "cmp/pl %[dpth] \n" /* next round if anything left */ |
1386 | "bt .ur_floop \n" | 1382 | "bt .ur_floop \n" |
@@ -1404,22 +1400,15 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1404 | ".ur_mask1: \n" | 1400 | ".ur_mask1: \n" |
1405 | ".long 0xAAAAAAAA \n" | 1401 | ".long 0xAAAAAAAA \n" |
1406 | 1402 | ||
1407 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | 1403 | ".ur_sstart: \n" |
1408 | "mov #8, r0 \n" | 1404 | "mova .ur_stable, r0 \n" /* jump into short loop */ |
1409 | "cmp/hs r0, %[dpth] \n" /* 8 planes or more left? */ | 1405 | "mov.b @(r0, %[rx]), %[rx] \n" |
1410 | "bt .ur_s8 \n" | ||
1411 | |||
1412 | "mulu %[psiz], %[dpth] \n" | ||
1413 | "mova .ur_stable, r0 \n" | ||
1414 | "mov.b @(r0, %[dpth]), %[rx] \n" | ||
1415 | "add %[rx], r0 \n" | 1406 | "add %[rx], r0 \n" |
1416 | "sts macl, %[rx] \n" /* point behind the last plane.. */ | 1407 | "jmp @r0 \n" |
1417 | "jmp @r0 \n" /* jump into streak */ | 1408 | "nop \n" |
1418 | "add %[rx], %[addr] \n" /* ..for this round */ | ||
1419 | 1409 | ||
1420 | ".align 2 \n" | 1410 | ".align 2 \n" |
1421 | ".ur_stable: \n" | 1411 | ".ur_stable: \n" |
1422 | ".byte .ur_s0 - .ur_stable \n" | ||
1423 | ".byte .ur_s1 - .ur_stable \n" | 1412 | ".byte .ur_s1 - .ur_stable \n" |
1424 | ".byte .ur_s2 - .ur_stable \n" | 1413 | ".byte .ur_s2 - .ur_stable \n" |
1425 | ".byte .ur_s3 - .ur_stable \n" | 1414 | ".byte .ur_s3 - .ur_stable \n" |
@@ -1427,47 +1416,42 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1427 | ".byte .ur_s5 - .ur_stable \n" | 1416 | ".byte .ur_s5 - .ur_stable \n" |
1428 | ".byte .ur_s6 - .ur_stable \n" | 1417 | ".byte .ur_s6 - .ur_stable \n" |
1429 | ".byte .ur_s7 - .ur_stable \n" | 1418 | ".byte .ur_s7 - .ur_stable \n" |
1419 | ".byte .ur_s8 - .ur_stable \n" | ||
1430 | 1420 | ||
1421 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | ||
1431 | ".ur_s8: \n" | 1422 | ".ur_s8: \n" |
1432 | "mov %[psiz], %[rx] \n" /* Point behind the last plane */ | 1423 | "mov.b r1, @%[addr] \n" /* store byte */ |
1433 | "shll2 %[rx] \n" /* for this round. */ | 1424 | "add %[psiz], %[addr] \n" |
1434 | "add %[rx], %[rx] \n" /* See above. */ | 1425 | "shlr8 r1 \n" /* shift out used-up byte */ |
1435 | "add %[rx], %[addr] \n" | ||
1436 | |||
1437 | "sub %[psiz], %[addr] \n" | ||
1438 | "mov.b r8, @%[addr] \n" /* store byte */ | ||
1439 | "shlr8 r8 \n" /* shift out used-up byte */ | ||
1440 | ".ur_s7: \n" | 1426 | ".ur_s7: \n" |
1441 | "sub %[psiz], %[addr] \n" | 1427 | "mov.b r2, @%[addr] \n" |
1442 | "mov.b r7, @%[addr] \n" | 1428 | "add %[psiz], %[addr] \n" |
1443 | "shlr8 r7 \n" | 1429 | "shlr8 r2 \n" |
1444 | ".ur_s6: \n" | 1430 | ".ur_s6: \n" |
1445 | "sub %[psiz], %[addr] \n" | 1431 | "mov.b r3, @%[addr] \n" |
1446 | "mov.b r6, @%[addr] \n" | 1432 | "add %[psiz], %[addr] \n" |
1447 | "shlr8 r6 \n" | 1433 | "shlr8 r3 \n" |
1448 | ".ur_s5: \n" | 1434 | ".ur_s5: \n" |
1449 | "sub %[psiz], %[addr] \n" | ||
1450 | "mov.b r5, @%[addr] \n" | ||
1451 | "shlr8 r5 \n" | ||
1452 | ".ur_s4: \n" | ||
1453 | "sub %[psiz], %[addr] \n" | ||
1454 | "mov.b r4, @%[addr] \n" | 1435 | "mov.b r4, @%[addr] \n" |
1436 | "add %[psiz], %[addr] \n" | ||
1455 | "shlr8 r4 \n" | 1437 | "shlr8 r4 \n" |
1438 | ".ur_s4: \n" | ||
1439 | "mov.b r5, @%[addr] \n" | ||
1440 | "add %[psiz], %[addr] \n" | ||
1441 | "shlr8 r5 \n" | ||
1456 | ".ur_s3: \n" | 1442 | ".ur_s3: \n" |
1457 | "sub %[psiz], %[addr] \n" | 1443 | "mov.b r6, @%[addr] \n" |
1458 | "mov.b r3, @%[addr] \n" | 1444 | "add %[psiz], %[addr] \n" |
1459 | "shlr8 r3 \n" | 1445 | "shlr8 r6 \n" |
1460 | ".ur_s2: \n" | 1446 | ".ur_s2: \n" |
1461 | "sub %[psiz], %[addr] \n" | 1447 | "mov.b r7, @%[addr] \n" |
1462 | "mov.b r2, @%[addr] \n" | 1448 | "add %[psiz], %[addr] \n" |
1463 | "shlr8 r2 \n" | 1449 | "shlr8 r7 \n" |
1464 | ".ur_s1: \n" | 1450 | ".ur_s1: \n" |
1465 | "sub %[psiz], %[addr] \n" | 1451 | "mov.b r8, @%[addr] \n" |
1466 | "mov.b r1, @%[addr] \n" | 1452 | "add %[psiz], %[addr] \n" |
1467 | "shlr8 r1 \n" | 1453 | "shlr8 r8 \n" |
1468 | ".ur_s0: \n" | ||
1469 | 1454 | ||
1470 | "add %[rx], %[addr] \n" /* correct address */ | ||
1471 | "add #-8, %[dpth] \n" | 1455 | "add #-8, %[dpth] \n" |
1472 | "cmp/pl %[dpth] \n" /* next round if anything left */ | 1456 | "cmp/pl %[dpth] \n" /* next round if anything left */ |
1473 | "bt .ur_sloop \n" | 1457 | "bt .ur_sloop \n" |
@@ -1677,172 +1661,163 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1677 | "move.l %%a0, %%d5 \n" | 1661 | "move.l %%a0, %%d5 \n" |
1678 | "eor.l %%d5, %%d0 \n" | 1662 | "eor.l %%d5, %%d0 \n" |
1679 | "and.l #0xAAAAAAAA, %%d0 \n" | 1663 | "and.l #0xAAAAAAAA, %%d0 \n" |
1680 | "eor.l %%d0, %%d5 \n" | 1664 | "eor.l %%d0, %%d5 \n" /* (a0 = ...h0g0f0e0d0c0b0a0) */ |
1681 | "move.l %%d5, %%a0 \n" /* a0 = ...h0g0f0e0d0c0b0a0 */ | 1665 | /* move.l %%d5, %%a0 */ /* but keep in d5 for shift streak */ |
1682 | "lsr.l #1, %%d0 \n" | 1666 | "lsr.l #1, %%d0 \n" |
1683 | "eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */ | 1667 | "eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */ |
1668 | |||
1669 | "move.l %[dpth], %%d0 \n" /** shift out unused low bytes **/ | ||
1670 | "subq.l #1, %%d0 \n" | ||
1671 | "and.l #7, %%d0 \n" | ||
1672 | "move.l %%d0, %%a0 \n" | ||
1673 | "move.l %[ax], %%d0 \n" /* all data in D registers */ | ||
1674 | "jmp (2, %%pc, %%a0:l:2) \n" /* jump into shift streak */ | ||
1675 | "lsr.l #8, %%d2 \n" | ||
1676 | "lsr.l #8, %%d3 \n" | ||
1677 | "lsr.l #8, %%d4 \n" | ||
1678 | "lsr.l #8, %%d0 \n" | ||
1679 | "lsr.l #8, %%d6 \n" | ||
1680 | "lsr.l #8, %%d7 \n" | ||
1681 | "lsr.l #8, %%d5 \n" | ||
1682 | "move.l %%d0, %[ax] \n" /* put the 2 extra words back.. */ | ||
1683 | "move.l %%a0, %%d0 \n" /* keep the value for later */ | ||
1684 | "move.l %%d5, %%a0 \n" /* ..into their A registers */ | ||
1684 | 1685 | ||
1685 | "tst.l %[mask] \n" | 1686 | "tst.l %[mask] \n" |
1686 | "jeq .ur_sloop \n" /* short loop if nothing to keep */ | 1687 | "jeq .ur_sstart \n" /* short loop if nothing to keep */ |
1687 | 1688 | ||
1688 | "move.l %[mask], %%d5 \n" /* need mask in data reg. */ | 1689 | "move.l %[mask], %%d5 \n" /* need mask in data reg. */ |
1689 | "move.l %%d1, %[mask] \n" /* free d1 as working reg. */ | 1690 | "move.l %%d1, %[mask] \n" /* free d1 as working reg. */ |
1690 | 1691 | ||
1691 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ | 1692 | "jmp (2, %%pc, %%d0:l:2) \n" /* jump into full loop */ |
1692 | "cmp.l #8, %[dpth] \n" /* 8 planes or more left? */ | 1693 | "bra.s .ur_f1 \n" |
1693 | "bhs.s .ur_f8 \n" | ||
1694 | |||
1695 | "move.l %[psiz], %%d0 \n" | ||
1696 | "move.l %[dpth], %%d1 \n" | ||
1697 | "mulu.w %%d1, %%d0 \n" /* point behind the last plane */ | ||
1698 | "add.l %%d0, %[addr] \n" /* for this round */ | ||
1699 | "jmp (%%pc, %[dpth]:l:2) \n" /* jump into streak */ | ||
1700 | "bra.s .ur_f1 \n" /* dpth == 0 should never happen */ | ||
1701 | "bra.s .ur_f2 \n" | 1694 | "bra.s .ur_f2 \n" |
1702 | "bra.s .ur_f3 \n" | 1695 | "bra.s .ur_f3 \n" |
1703 | "bra.s .ur_f4 \n" | 1696 | "bra.s .ur_f4 \n" |
1704 | "bra.s .ur_f5 \n" | 1697 | "bra.s .ur_f5 \n" |
1705 | "bra.s .ur_f6 \n" | 1698 | "bra.s .ur_f6 \n" |
1706 | "bra.s .ur_f7 \n" | 1699 | "bra.s .ur_f7 \n" |
1700 | /* bra.s .ur_f8 */ /* identical with target */ | ||
1707 | 1701 | ||
1702 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ | ||
1708 | ".ur_f8: \n" | 1703 | ".ur_f8: \n" |
1709 | "move.l %[psiz], %%d0 \n" | ||
1710 | "lsl.l #3, %%d0 \n" | ||
1711 | "add.l %%d0, %[addr] \n" | ||
1712 | /* Point behind the last plane for this round. Note: We're using the | ||
1713 | * registers backwards in order to reuse the streak for the last round. | ||
1714 | * Therefore we need to go thru the bitplanes backwards too, otherwise | ||
1715 | * the bit order would be destroyed which results in more flicker. */ | ||
1716 | "sub.l %[psiz], %[addr] \n" | ||
1717 | "move.b (%[addr]), %%d0 \n" /* load old byte */ | 1704 | "move.b (%[addr]), %%d0 \n" /* load old byte */ |
1718 | "and.l %%d5, %%d0 \n" /* mask out replaced bits */ | 1705 | "and.l %%d5, %%d0 \n" /* mask out replaced bits */ |
1719 | "move.l %[mask], %%d1 \n" | 1706 | "move.l %%a0, %%d1 \n" |
1720 | "or.l %%d1, %%d0 \n" /* set new bits */ | 1707 | "or.l %%d1, %%d0 \n" /* set new bits */ |
1721 | "move.b %%d0, (%[addr]) \n" /* store byte */ | 1708 | "move.b %%d0, (%[addr]) \n" /* store byte */ |
1709 | "add.l %[psiz], %[addr] \n" | ||
1722 | "lsr.l #8, %%d1 \n" /* shift out used-up byte */ | 1710 | "lsr.l #8, %%d1 \n" /* shift out used-up byte */ |
1723 | "move.l %%d1, %[mask] \n" | 1711 | "move.l %%d1, %%a0 \n" |
1724 | ".ur_f7: \n" | 1712 | ".ur_f7: \n" |
1725 | "sub.l %[psiz], %[addr] \n" | ||
1726 | "move.b (%[addr]), %%d0 \n" | 1713 | "move.b (%[addr]), %%d0 \n" |
1727 | "and.l %%d5, %%d0 \n" | 1714 | "and.l %%d5, %%d0 \n" |
1728 | "or.l %%d2, %%d0 \n" | 1715 | "or.l %%d7, %%d0 \n" |
1729 | "move.b %%d0, (%[addr]) \n" | 1716 | "move.b %%d0, (%[addr]) \n" |
1730 | "lsr.l #8, %%d2 \n" | 1717 | "add.l %[psiz], %[addr] \n" |
1718 | "lsr.l #8, %%d7 \n" | ||
1731 | ".ur_f6: \n" | 1719 | ".ur_f6: \n" |
1732 | "sub.l %[psiz], %[addr] \n" | ||
1733 | "move.b (%[addr]), %%d0 \n" | 1720 | "move.b (%[addr]), %%d0 \n" |
1734 | "and.l %%d5, %%d0 \n" | 1721 | "and.l %%d5, %%d0 \n" |
1735 | "or.l %%d3, %%d0 \n" | 1722 | "or.l %%d6, %%d0 \n" |
1736 | "move.b %%d0, (%[addr]) \n" | 1723 | "move.b %%d0, (%[addr]) \n" |
1737 | "lsr.l #8, %%d3 \n" | 1724 | "add.l %[psiz], %[addr] \n" |
1725 | "lsr.l #8, %%d6 \n" | ||
1738 | ".ur_f5: \n" | 1726 | ".ur_f5: \n" |
1739 | "sub.l %[psiz], %[addr] \n" | ||
1740 | "move.b (%[addr]), %%d0 \n" | ||
1741 | "and.l %%d5, %%d0 \n" | ||
1742 | "or.l %%d4, %%d0 \n" | ||
1743 | "move.b %%d0, (%[addr]) \n" | ||
1744 | "lsr.l #8, %%d4 \n" | ||
1745 | ".ur_f4: \n" | ||
1746 | "sub.l %[psiz], %[addr] \n" | ||
1747 | "move.b (%[addr]), %%d0 \n" | 1727 | "move.b (%[addr]), %%d0 \n" |
1748 | "and.l %%d5, %%d0 \n" | 1728 | "and.l %%d5, %%d0 \n" |
1749 | "move.l %[ax], %%d1 \n" | 1729 | "move.l %[ax], %%d1 \n" |
1750 | "or.l %%d1, %%d0 \n" | 1730 | "or.l %%d1, %%d0 \n" |
1751 | "move.b %%d0, (%[addr]) \n" | 1731 | "move.b %%d0, (%[addr]) \n" |
1732 | "add.l %[psiz], %[addr] \n" | ||
1752 | "lsr.l #8, %%d1 \n" | 1733 | "lsr.l #8, %%d1 \n" |
1753 | "move.l %%d1, %[ax] \n" | 1734 | "move.l %%d1, %[ax] \n" |
1735 | ".ur_f4: \n" | ||
1736 | "move.b (%[addr]), %%d0 \n" | ||
1737 | "and.l %%d5, %%d0 \n" | ||
1738 | "or.l %%d4, %%d0 \n" | ||
1739 | "move.b %%d0, (%[addr]) \n" | ||
1740 | "add.l %[psiz], %[addr] \n" | ||
1741 | "lsr.l #8, %%d4 \n" | ||
1754 | ".ur_f3: \n" | 1742 | ".ur_f3: \n" |
1755 | "sub.l %[psiz], %[addr] \n" | ||
1756 | "move.b (%[addr]), %%d0 \n" | 1743 | "move.b (%[addr]), %%d0 \n" |
1757 | "and.l %%d5, %%d0 \n" | 1744 | "and.l %%d5, %%d0 \n" |
1758 | "or.l %%d6, %%d0 \n" | 1745 | "or.l %%d3, %%d0 \n" |
1759 | "move.b %%d0, (%[addr]) \n" | 1746 | "move.b %%d0, (%[addr]) \n" |
1760 | "lsr.l #8, %%d6 \n" | 1747 | "add.l %[psiz], %[addr] \n" |
1748 | "lsr.l #8, %%d3 \n" | ||
1761 | ".ur_f2: \n" | 1749 | ".ur_f2: \n" |
1762 | "sub.l %[psiz], %[addr] \n" | ||
1763 | "move.b (%[addr]), %%d0 \n" | 1750 | "move.b (%[addr]), %%d0 \n" |
1764 | "and.l %%d5, %%d0 \n" | 1751 | "and.l %%d5, %%d0 \n" |
1765 | "or.l %%d7, %%d0 \n" | 1752 | "or.l %%d2, %%d0 \n" |
1766 | "move.b %%d0, (%[addr]) \n" | 1753 | "move.b %%d0, (%[addr]) \n" |
1767 | "lsr.l #8, %%d7 \n" | 1754 | "add.l %[psiz], %[addr] \n" |
1755 | "lsr.l #8, %%d2 \n" | ||
1768 | ".ur_f1: \n" | 1756 | ".ur_f1: \n" |
1769 | "sub.l %[psiz], %[addr] \n" | ||
1770 | "move.b (%[addr]), %%d0 \n" | 1757 | "move.b (%[addr]), %%d0 \n" |
1771 | "and.l %%d5, %%d0 \n" | 1758 | "and.l %%d5, %%d0 \n" |
1772 | "move.l %%a0, %%d1 \n" | 1759 | "move.l %[mask], %%d1 \n" |
1773 | "or.l %%d1, %%d0 \n" | 1760 | "or.l %%d1, %%d0 \n" |
1774 | "move.b %%d0, (%[addr]) \n" | 1761 | "move.b %%d0, (%[addr]) \n" |
1762 | "add.l %[psiz], %[addr] \n" | ||
1775 | "lsr.l #8, %%d1 \n" | 1763 | "lsr.l #8, %%d1 \n" |
1776 | "move.l %%d1, %%a0 \n" | 1764 | "move.l %%d1, %[mask] \n" |
1777 | 1765 | ||
1778 | "move.l %[psiz], %%d0 \n" | ||
1779 | "lsl.l #3, %%d0 \n" | ||
1780 | "add.l %%d0, %[addr] \n" /* correct address */ | ||
1781 | "subq.l #8, %[dpth] \n" | 1766 | "subq.l #8, %[dpth] \n" |
1782 | "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ | 1767 | "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ |
1783 | "jgt .ur_floop \n" /* next round if anything left */ | 1768 | "jgt .ur_floop \n" /* next round if anything left */ |
1784 | 1769 | ||
1785 | "jra .ur_end \n" | 1770 | "jra .ur_end \n" |
1786 | 1771 | ||
1787 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | 1772 | ".ur_sstart: \n" |
1788 | "cmp.l #8, %[dpth] \n" /* 8 planes or more left? */ | 1773 | "jmp (2, %%pc, %%d0:l:2) \n" /* jump into short loop */ |
1789 | "bhs.s .ur_s8 \n" | 1774 | "bra.s .ur_s1 \n" |
1790 | |||
1791 | "move.l %[psiz], %%d0 \n" | ||
1792 | "move.l %[dpth], %%d5 \n" | ||
1793 | "mulu.w %%d5, %%d0 \n" /* point behind the last plane */ | ||
1794 | "add.l %%d0, %[addr] \n" /* for this round */ | ||
1795 | "jmp (%%pc, %[dpth]:l:2) \n" /* jump into streak */ | ||
1796 | "bra.s .ur_s1 \n" /* dpth == 0 should never happen */ | ||
1797 | "bra.s .ur_s2 \n" | 1775 | "bra.s .ur_s2 \n" |
1798 | "bra.s .ur_s3 \n" | 1776 | "bra.s .ur_s3 \n" |
1799 | "bra.s .ur_s4 \n" | 1777 | "bra.s .ur_s4 \n" |
1800 | "bra.s .ur_s5 \n" | 1778 | "bra.s .ur_s5 \n" |
1801 | "bra.s .ur_s6 \n" | 1779 | "bra.s .ur_s6 \n" |
1802 | "bra.s .ur_s7 \n" | 1780 | "bra.s .ur_s7 \n" |
1781 | /* bra.s .ur_s8 */ /* identical with target */ | ||
1803 | 1782 | ||
1783 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | ||
1804 | ".ur_s8: \n" | 1784 | ".ur_s8: \n" |
1805 | "move.l %[psiz], %%d0 \n" /* Point behind the last plane */ | 1785 | "move.l %%a0, %%d5 \n" |
1806 | "lsl.l #3, %%d0 \n" /* for this round. */ | 1786 | "move.b %%d5, (%[addr]) \n" /* store byte */ |
1807 | "add.l %%d0, %[addr] \n" /* See above. */ | 1787 | "add.l %[psiz], %[addr] \n" |
1808 | 1788 | "lsr.l #8, %%d5 \n" /* shift out used-up byte */ | |
1809 | "sub.l %[psiz], %[addr] \n" | 1789 | "move.l %%d5, %%a0 \n" |
1810 | "move.b %%d1, (%[addr]) \n" /* store byte */ | ||
1811 | "lsr.l #8, %%d1 \n" /* shift out used-up byte */ | ||
1812 | ".ur_s7: \n" | 1790 | ".ur_s7: \n" |
1813 | "sub.l %[psiz], %[addr] \n" | 1791 | "move.b %%d7, (%[addr]) \n" |
1814 | "move.b %%d2, (%[addr]) \n" | 1792 | "add.l %[psiz], %[addr] \n" |
1815 | "lsr.l #8, %%d2 \n" | 1793 | "lsr.l #8, %%d7 \n" |
1816 | ".ur_s6: \n" | 1794 | ".ur_s6: \n" |
1817 | "sub.l %[psiz], %[addr] \n" | 1795 | "move.b %%d6, (%[addr]) \n" |
1818 | "move.b %%d3, (%[addr]) \n" | 1796 | "add.l %[psiz], %[addr] \n" |
1819 | "lsr.l #8, %%d3 \n" | 1797 | "lsr.l #8, %%d6 \n" |
1820 | ".ur_s5: \n" | 1798 | ".ur_s5: \n" |
1821 | "sub.l %[psiz], %[addr] \n" | ||
1822 | "move.b %%d4, (%[addr]) \n" | ||
1823 | "lsr.l #8, %%d4 \n" | ||
1824 | ".ur_s4: \n" | ||
1825 | "sub.l %[psiz], %[addr] \n" | ||
1826 | "move.l %[ax], %%d5 \n" | 1799 | "move.l %[ax], %%d5 \n" |
1827 | "move.b %%d5, (%[addr]) \n" | 1800 | "move.b %%d5, (%[addr]) \n" |
1801 | "add.l %[psiz], %[addr] \n" | ||
1828 | "lsr.l #8, %%d5 \n" | 1802 | "lsr.l #8, %%d5 \n" |
1829 | "move.l %%d5, %[ax] \n" | 1803 | "move.l %%d5, %[ax] \n" |
1804 | ".ur_s4: \n" | ||
1805 | "move.b %%d4, (%[addr]) \n" | ||
1806 | "add.l %[psiz], %[addr] \n" | ||
1807 | "lsr.l #8, %%d4 \n" | ||
1830 | ".ur_s3: \n" | 1808 | ".ur_s3: \n" |
1831 | "sub.l %[psiz], %[addr] \n" | 1809 | "move.b %%d3, (%[addr]) \n" |
1832 | "move.b %%d6, (%[addr]) \n" | 1810 | "add.l %[psiz], %[addr] \n" |
1833 | "lsr.l #8, %%d6 \n" | 1811 | "lsr.l #8, %%d3 \n" |
1834 | ".ur_s2: \n" | 1812 | ".ur_s2: \n" |
1835 | "sub.l %[psiz], %[addr] \n" | 1813 | "move.b %%d2, (%[addr]) \n" |
1836 | "move.b %%d7, (%[addr]) \n" | 1814 | "add.l %[psiz], %[addr] \n" |
1837 | "lsr.l #8, %%d7 \n" | 1815 | "lsr.l #8, %%d2 \n" |
1838 | ".ur_s1: \n" | 1816 | ".ur_s1: \n" |
1839 | "sub.l %[psiz], %[addr] \n" | 1817 | "move.b %%d1, (%[addr]) \n" |
1840 | "move.l %%a0, %%d5 \n" | 1818 | "add.l %[psiz], %[addr] \n" |
1841 | "move.b %%d5, (%[addr]) \n" | 1819 | "lsr.l #8, %%d1 \n" |
1842 | "lsr.l #8, %%d5 \n" | ||
1843 | "move.l %%d5, %%a0 \n" | ||
1844 | 1820 | ||
1845 | "add.l %%d0, %[addr] \n" /* correct address */ | ||
1846 | "subq.l #8, %[dpth] \n" | 1821 | "subq.l #8, %[dpth] \n" |
1847 | "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ | 1822 | "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ |
1848 | "jgt .ur_sloop \n" /* next round if anything left */ | 1823 | "jgt .ur_sloop \n" /* next round if anything left */ |
@@ -1871,7 +1846,7 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1871 | { | 1846 | { |
1872 | unsigned char *addr, *end; | 1847 | unsigned char *addr, *end; |
1873 | unsigned mask = 0; | 1848 | unsigned mask = 0; |
1874 | unsigned test = 1; | 1849 | unsigned test = 1 << ((-_gray_info.depth) & 7); |
1875 | int i; | 1850 | int i; |
1876 | 1851 | ||
1877 | /* precalculate the bit patterns with random shifts | 1852 | /* precalculate the bit patterns with random shifts |