summaryrefslogtreecommitdiff
path: root/apps/plugins/lib/gray_core.c
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2006-08-11 23:40:05 +0000
committerJens Arnold <amiconn@rockbox.org>2006-08-11 23:40:05 +0000
commite6ed58f6c545719ed804c9ad513496f8d0fa0286 (patch)
treedb096eb64de3ec7c33682c5bb50579784c5ca633 /apps/plugins/lib/gray_core.c
parent22860a910a7ddbb9b811fce4cf971e097c8e6eac (diff)
downloadrockbox-e6ed58f6c545719ed804c9ad513496f8d0fa0286.tar.gz
rockbox-e6ed58f6c545719ed804c9ad513496f8d0fa0286.zip
Better implementation of the new greyscale algorithm. A bit faster (+2% on iPod and Archos, +7% on Iriver H1x0), and more straightforward. * Fixed garbage display when a plugin switches on the overlay before drawing anything.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10534 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/plugins/lib/gray_core.c')
-rw-r--r--apps/plugins/lib/gray_core.c501
1 files changed, 238 insertions, 263 deletions
diff --git a/apps/plugins/lib/gray_core.c b/apps/plugins/lib/gray_core.c
index 809e88dba1..413b66c65d 100644
--- a/apps/plugins/lib/gray_core.c
+++ b/apps/plugins/lib/gray_core.c
@@ -348,7 +348,7 @@ int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size,
348 long plane_size, buftaken; 348 long plane_size, buftaken;
349 unsigned data; 349 unsigned data;
350#ifndef SIMULATOR 350#ifndef SIMULATOR
351 int j; 351 int j, bitfill;
352#endif 352#endif
353 353
354 _gray_rb = newrb; 354 _gray_rb = newrb;
@@ -439,6 +439,7 @@ int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size,
439 _gray_info.cur_plane = 0; 439 _gray_info.cur_plane = 0;
440 _gray_info.plane_size = plane_size; 440 _gray_info.plane_size = plane_size;
441 _gray_info.plane_data = gbuf; 441 _gray_info.plane_data = gbuf;
442 _gray_rb->memset(gbuf, 0, depth * plane_size);
442 gbuf += depth * plane_size; 443 gbuf += depth * plane_size;
443 _gray_info.bitpattern = (unsigned long *)gbuf; 444 _gray_info.bitpattern = (unsigned long *)gbuf;
444 445
@@ -449,7 +450,8 @@ int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size,
449 i >>= 1; 450 i >>= 1;
450 j--; 451 j--;
451 } 452 }
452 _gray_info.randmask = 0xFFu >> j; 453 _gray_info.randmask = 0xFFu >> j;
454 bitfill = (-depth) & 7;
453 455
454 /* Precalculate the bit patterns for all possible pixel values */ 456 /* Precalculate the bit patterns for all possible pixel values */
455 for (i = 0; i <= depth; i++) 457 for (i = 0; i <= depth; i++)
@@ -469,7 +471,7 @@ int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size,
469 } 471 }
470 /* now the lower <depth> bits contain the pattern */ 472 /* now the lower <depth> bits contain the pattern */
471 473
472 _gray_info.bitpattern[i] = pattern; 474 _gray_info.bitpattern[i] = pattern << bitfill;
473 } 475 }
474#endif 476#endif
475 477
@@ -797,101 +799,93 @@ void gray_update_rect(int x, int y, int width, int height)
797 "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ 799 "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */
798 "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ 800 "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */
799 801
802 "sub r0, %[dpth], #1 \n" /** shift out unused low bytes **/
803 "and r0, r0, #7 \n"
804 "add pc, pc, r0, lsl #2 \n" /* jump into shift streak */
805 "mov r8, r8, lsr #8 \n" /* r8: never reached */
806 "mov r7, r7, lsr #8 \n"
807 "mov r6, r6, lsr #8 \n"
808 "mov r5, r5, lsr #8 \n"
809 "mov r4, r4, lsr #8 \n"
810 "mov r3, r3, lsr #8 \n"
811 "mov r2, r2, lsr #8 \n"
812 "mov r1, r1, lsr #8 \n"
813
800 "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ 814 "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */
801 "ands %[mask], %[mask], #0xff \n" 815 "ands %[mask], %[mask], #0xff \n"
802 "beq .ur_sloop \n" /* short loop if no bits to keep */ 816 "beq .ur_sstart \n" /* short loop if no bits to keep */
803
804 ".ur_floop: \n" /** full loop (bits to keep)**/
805 "cmp %[dpth], #8 \n" /* 8 planes or more left? */
806 "bhs .ur_f8 \n"
807 817
808 "mul r0, %[psiz], %[dpth] \n" /* point behind the last plane */ 818 "ldrb r0, [pc, r0] \n" /* jump into full loop */
809 "add %[addr], %[addr], r0 \n" /* for this round */
810
811 "ldrb r0, [pc, %[dpth]] \n" /* jump into streak */
812 "add pc, pc, r0 \n" 819 "add pc, pc, r0 \n"
813 ".ur_ftable: \n" 820 ".ur_ftable: \n"
814 ".byte .ur_f0 - .ur_ftable - 4 \n" /* [jump tables are tricky] */ 821 ".byte .ur_f1 - .ur_ftable - 4 \n" /* [jump tables are tricky] */
815 ".byte .ur_f1 - .ur_ftable - 4 \n"
816 ".byte .ur_f2 - .ur_ftable - 4 \n" 822 ".byte .ur_f2 - .ur_ftable - 4 \n"
817 ".byte .ur_f3 - .ur_ftable - 4 \n" 823 ".byte .ur_f3 - .ur_ftable - 4 \n"
818 ".byte .ur_f4 - .ur_ftable - 4 \n" 824 ".byte .ur_f4 - .ur_ftable - 4 \n"
819 ".byte .ur_f5 - .ur_ftable - 4 \n" 825 ".byte .ur_f5 - .ur_ftable - 4 \n"
820 ".byte .ur_f6 - .ur_ftable - 4 \n" 826 ".byte .ur_f6 - .ur_ftable - 4 \n"
821 ".byte .ur_f7 - .ur_ftable - 4 \n" 827 ".byte .ur_f7 - .ur_ftable - 4 \n"
828 ".byte .ur_f8 - .ur_ftable - 4 \n"
822 829
830 ".ur_floop: \n" /** full loop (bits to keep)**/
823 ".ur_f8: \n" 831 ".ur_f8: \n"
824 "add %[addr], %[addr], %[psiz], lsl #3 \n" 832 "ldrb r0, [%[addr]] \n" /* load old byte */
825 /* Point behind the last plane for this round. Note: We're using the
826 * registers backwards in order to reuse the streak for the last round.
827 * Therefore we need to go thru the bitplanes backwards too, otherwise
828 * the bit order would be destroyed which results in more flicker. */
829 "ldrb r0, [%[addr], -%[psiz]]! \n" /* load old byte */
830 "and r0, r0, %[mask] \n" /* mask out replaced bits */ 833 "and r0, r0, %[mask] \n" /* mask out replaced bits */
831 "orr r0, r0, r8 \n" /* set new bits */ 834 "orr r0, r0, r1 \n" /* set new bits */
832 "strb r0, [%[addr]] \n" /* store byte */ 835 "strb r0, [%[addr]], %[psiz] \n" /* store byte */
833 "mov r8, r8, lsr #8 \n" /* shift out used-up byte */ 836 "mov r1, r1, lsr #8 \n" /* shift out used-up byte */
834 ".ur_f7: \n" 837 ".ur_f7: \n"
835 "ldrb r0, [%[addr], -%[psiz]]! \n" 838 "ldrb r0, [%[addr]] \n"
836 "and r0, r0, %[mask] \n" 839 "and r0, r0, %[mask] \n"
837 "orr r0, r0, r7 \n" 840 "orr r0, r0, r2 \n"
838 "strb r0, [%[addr]] \n" 841 "strb r0, [%[addr]], %[psiz] \n"
839 "mov r7, r7, lsr #8 \n" 842 "mov r2, r2, lsr #8 \n"
840 ".ur_f6: \n" 843 ".ur_f6: \n"
841 "ldrb r0, [%[addr], -%[psiz]]! \n" 844 "ldrb r0, [%[addr]] \n"
842 "and r0, r0, %[mask] \n" 845 "and r0, r0, %[mask] \n"
843 "orr r0, r0, r6 \n" 846 "orr r0, r0, r3 \n"
844 "strb r0, [%[addr]] \n" 847 "strb r0, [%[addr]], %[psiz] \n"
845 "mov r6, r6, lsr #8 \n" 848 "mov r3, r3, lsr #8 \n"
846 ".ur_f5: \n" 849 ".ur_f5: \n"
847 "ldrb r0, [%[addr], -%[psiz]]! \n" 850 "ldrb r0, [%[addr]] \n"
848 "and r0, r0, %[mask] \n"
849 "orr r0, r0, r5 \n"
850 "strb r0, [%[addr]] \n"
851 "mov r5, r5, lsr #8 \n"
852 ".ur_f4: \n"
853 "ldrb r0, [%[addr], -%[psiz]]! \n"
854 "and r0, r0, %[mask] \n" 851 "and r0, r0, %[mask] \n"
855 "orr r0, r0, r4 \n" 852 "orr r0, r0, r4 \n"
856 "strb r0, [%[addr]] \n" 853 "strb r0, [%[addr]], %[psiz] \n"
857 "mov r4, r4, lsr #8 \n" 854 "mov r4, r4, lsr #8 \n"
855 ".ur_f4: \n"
856 "ldrb r0, [%[addr]] \n"
857 "and r0, r0, %[mask] \n"
858 "orr r0, r0, r5 \n"
859 "strb r0, [%[addr]], %[psiz] \n"
860 "mov r5, r5, lsr #8 \n"
858 ".ur_f3: \n" 861 ".ur_f3: \n"
859 "ldrb r0, [%[addr], -%[psiz]]! \n" 862 "ldrb r0, [%[addr]] \n"
860 "and r0, r0, %[mask] \n" 863 "and r0, r0, %[mask] \n"
861 "orr r0, r0, r3 \n" 864 "orr r0, r0, r6 \n"
862 "strb r0, [%[addr]] \n" 865 "strb r0, [%[addr]], %[psiz] \n"
863 "mov r3, r3, lsr #8 \n" 866 "mov r6, r6, lsr #8 \n"
864 ".ur_f2: \n" 867 ".ur_f2: \n"
865 "ldrb r0, [%[addr], -%[psiz]]! \n" 868 "ldrb r0, [%[addr]] \n"
866 "and r0, r0, %[mask] \n" 869 "and r0, r0, %[mask] \n"
867 "orr r0, r0, r2 \n" 870 "orr r0, r0, r7 \n"
868 "strb r0, [%[addr]] \n" 871 "strb r0, [%[addr]], %[psiz] \n"
869 "mov r2, r2, lsr #8 \n" 872 "mov r7, r7, lsr #8 \n"
870 ".ur_f1: \n" 873 ".ur_f1: \n"
871 "ldrb r0, [%[addr], -%[psiz]]! \n" 874 "ldrb r0, [%[addr]] \n"
872 "and r0, r0, %[mask] \n" 875 "and r0, r0, %[mask] \n"
873 "orr r0, r0, r1 \n" 876 "orr r0, r0, r8 \n"
874 "strb r0, [%[addr]] \n" 877 "strb r0, [%[addr]], %[psiz] \n"
875 "mov r1, r1, lsr #8 \n" 878 "mov r8, r8, lsr #8 \n"
876 ".ur_f0: \n"
877 879
878 "add %[addr], %[addr], %[psiz], lsl #3 \n" /* correct address */
879 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ 880 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */
880 "bhi .ur_floop \n" 881 "bhi .ur_floop \n"
881 882
882 "b .ur_end \n" 883 "b .ur_end \n"
883 884
884 ".ur_sloop: \n" /** short loop (nothing to keep) **/ 885 ".ur_sstart: \n"
885 "cmp %[dpth], #8 \n" /* 8 planes or more left? */ 886 "ldrb r0, [pc, r0] \n" /* jump into short loop*/
886 "bhs .ur_s8 \n"
887
888 "mul r0, %[psiz], %[dpth] \n" /* point behind the last plane */
889 "add %[addr], %[addr], r0 \n" /* for this round */
890
891 "ldrb r0, [pc, %[dpth]] \n" /* jump into streak */
892 "add pc, pc, r0 \n" 887 "add pc, pc, r0 \n"
893 ".ur_stable: \n" 888 ".ur_stable: \n"
894 ".byte .ur_s0 - .ur_stable - 4 \n"
895 ".byte .ur_s1 - .ur_stable - 4 \n" 889 ".byte .ur_s1 - .ur_stable - 4 \n"
896 ".byte .ur_s2 - .ur_stable - 4 \n" 890 ".byte .ur_s2 - .ur_stable - 4 \n"
897 ".byte .ur_s3 - .ur_stable - 4 \n" 891 ".byte .ur_s3 - .ur_stable - 4 \n"
@@ -899,36 +893,34 @@ void gray_update_rect(int x, int y, int width, int height)
899 ".byte .ur_s5 - .ur_stable - 4 \n" 893 ".byte .ur_s5 - .ur_stable - 4 \n"
900 ".byte .ur_s6 - .ur_stable - 4 \n" 894 ".byte .ur_s6 - .ur_stable - 4 \n"
901 ".byte .ur_s7 - .ur_stable - 4 \n" 895 ".byte .ur_s7 - .ur_stable - 4 \n"
896 ".byte .ur_s8 - .ur_stable - 4 \n"
902 897
898 ".ur_sloop: \n" /** short loop (nothing to keep) **/
903 ".ur_s8: \n" 899 ".ur_s8: \n"
904 "add %[addr], %[addr], %[psiz], lsl #3 \n" 900 "strb r1, [%[addr]], %[psiz] \n" /* store byte */
905 /* Point behind the last plane for this round. See above. */ 901 "mov r1, r1, lsr #8 \n" /* shift out used-up byte */
906 "strb r8, [%[addr], -%[psiz]]! \n" /* store byte */
907 "mov r8, r8, lsr #8 \n" /* shift out used-up byte */
908 ".ur_s7: \n" 902 ".ur_s7: \n"
909 "strb r7, [%[addr], -%[psiz]]! \n" 903 "strb r2, [%[addr]], %[psiz] \n"
910 "mov r7, r7, lsr #8 \n" 904 "mov r2, r2, lsr #8 \n"
911 ".ur_s6: \n" 905 ".ur_s6: \n"
912 "strb r6, [%[addr], -%[psiz]]! \n" 906 "strb r3, [%[addr]], %[psiz] \n"
913 "mov r6, r6, lsr #8 \n" 907 "mov r3, r3, lsr #8 \n"
914 ".ur_s5: \n" 908 ".ur_s5: \n"
915 "strb r5, [%[addr], -%[psiz]]! \n" 909 "strb r4, [%[addr]], %[psiz] \n"
916 "mov r5, r5, lsr #8 \n"
917 ".ur_s4: \n"
918 "strb r4, [%[addr], -%[psiz]]! \n"
919 "mov r4, r4, lsr #8 \n" 910 "mov r4, r4, lsr #8 \n"
911 ".ur_s4: \n"
912 "strb r5, [%[addr]], %[psiz] \n"
913 "mov r5, r5, lsr #8 \n"
920 ".ur_s3: \n" 914 ".ur_s3: \n"
921 "strb r3, [%[addr], -%[psiz]]! \n" 915 "strb r6, [%[addr]], %[psiz] \n"
922 "mov r3, r3, lsr #8 \n" 916 "mov r6, r6, lsr #8 \n"
923 ".ur_s2: \n" 917 ".ur_s2: \n"
924 "strb r2, [%[addr], -%[psiz]]! \n" 918 "strb r7, [%[addr]], %[psiz] \n"
925 "mov r2, r2, lsr #8 \n" 919 "mov r7, r7, lsr #8 \n"
926 ".ur_s1: \n" 920 ".ur_s1: \n"
927 "strb r1, [%[addr], -%[psiz]]! \n" 921 "strb r8, [%[addr]], %[psiz] \n"
928 "mov r1, r1, lsr #8 \n" 922 "mov r8, r8, lsr #8 \n"
929 ".ur_s0: \n"
930 923
931 "add %[addr], %[addr], %[psiz], lsl #3 \n" /* correct address */
932 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ 924 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */
933 "bhi .ur_sloop \n" 925 "bhi .ur_sloop \n"
934 926
@@ -956,7 +948,7 @@ void gray_update_rect(int x, int y, int width, int height)
956 { 948 {
957 unsigned char *addr, *end; 949 unsigned char *addr, *end;
958 unsigned mask = 0; 950 unsigned mask = 0;
959 unsigned test = 1; 951 unsigned test = 1 << ((-_gray_info.depth) & 7);
960 int i; 952 int i;
961 953
962 /* precalculate the bit patterns with random shifts 954 /* precalculate the bit patterns with random shifts
@@ -1287,25 +1279,37 @@ void gray_update_rect(int x, int y, int width, int height)
1287 "shlr r0 \n" 1279 "shlr r0 \n"
1288 "xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ 1280 "xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */
1289 1281
1290 "tst %[mask], %[mask] \n" 1282 "mov %[dpth], %[rx] \n" /** shift out unused low bytes **/
1291 "bt .ur_sloop \n" /* short loop if nothing to keep */ 1283 "add #-1, %[rx] \n"
1284 "mov #7, r0 \n"
1285 "and r0, %[rx] \n"
1286 "mova .ur_pshift, r0 \n"
1287 "add %[rx], r0 \n"
1288 "add %[rx], r0 \n"
1289 "jmp @r0 \n" /* jump into shift streak */
1290 "nop \n"
1292 1291
1293 ".ur_floop: \n" /** full loop (there are bits to keep)**/ 1292 ".align 2 \n"
1294 "mov #8, r0 \n" 1293 ".ur_pshift: \n"
1295 "cmp/hs r0, %[dpth] \n" /* 8 planes or more left? */ 1294 "shlr8 r7 \n"
1296 "bt .ur_f8 \n" 1295 "shlr8 r6 \n"
1296 "shlr8 r5 \n"
1297 "shlr8 r4 \n"
1298 "shlr8 r3 \n"
1299 "shlr8 r2 \n"
1300 "shlr8 r1 \n"
1301
1302 "tst %[mask], %[mask] \n"
1303 "bt .ur_sstart \n" /* short loop if nothing to keep */
1297 1304
1298 "mulu %[psiz], %[dpth] \n" 1305 "mova .ur_ftable, r0 \n" /* jump into full loop */
1299 "mova .ur_ftable, r0 \n" 1306 "mov.b @(r0, %[rx]), %[rx] \n"
1300 "mov.b @(r0, %[dpth]), %[rx] \n"
1301 "add %[rx], r0 \n" 1307 "add %[rx], r0 \n"
1302 "sts macl, %[rx] \n" /* point behind the last plane.. */ 1308 "jmp @r0 \n"
1303 "jmp @r0 \n" /* jump into streak */ 1309 "nop \n"
1304 "add %[rx], %[addr] \n" /* ..for this round */ 1310
1305
1306 ".align 2 \n" 1311 ".align 2 \n"
1307 ".ur_ftable: \n" 1312 ".ur_ftable: \n"
1308 ".byte .ur_f0 - .ur_ftable \n"
1309 ".byte .ur_f1 - .ur_ftable \n" 1313 ".byte .ur_f1 - .ur_ftable \n"
1310 ".byte .ur_f2 - .ur_ftable \n" 1314 ".byte .ur_f2 - .ur_ftable \n"
1311 ".byte .ur_f3 - .ur_ftable \n" 1315 ".byte .ur_f3 - .ur_ftable \n"
@@ -1313,74 +1317,66 @@ void gray_update_rect(int x, int y, int width, int height)
1313 ".byte .ur_f5 - .ur_ftable \n" 1317 ".byte .ur_f5 - .ur_ftable \n"
1314 ".byte .ur_f6 - .ur_ftable \n" 1318 ".byte .ur_f6 - .ur_ftable \n"
1315 ".byte .ur_f7 - .ur_ftable \n" 1319 ".byte .ur_f7 - .ur_ftable \n"
1320 ".byte .ur_f8 - .ur_ftable \n"
1316 1321
1322 ".ur_floop: \n" /** full loop (there are bits to keep)**/
1317 ".ur_f8: \n" 1323 ".ur_f8: \n"
1318 "mov %[psiz], %[rx] \n"
1319 "shll2 %[rx] \n"
1320 "add %[rx], %[rx] \n"
1321 "add %[rx], %[addr] \n"
1322 /* Point behind the last plane for this round. Note: We're using the
1323 * registers backwards in order to reuse the streak for the last round.
1324 * Therefore we need to go thru the bitplanes backwards too, otherwise
1325 * the bit order would be destroyed which results in more flicker. */
1326 "sub %[psiz], %[addr] \n"
1327 "mov.b @%[addr], r0 \n" /* load old byte */ 1324 "mov.b @%[addr], r0 \n" /* load old byte */
1328 "and %[mask], r0 \n" /* mask out replaced bits */ 1325 "and %[mask], r0 \n" /* mask out replaced bits */
1329 "or r8, r0 \n" /* set new bits */ 1326 "or r1, r0 \n" /* set new bits */
1330 "mov.b r0, @%[addr] \n" /* store byte */ 1327 "mov.b r0, @%[addr] \n" /* store byte */
1331 "shlr8 r8 \n" /* shift out used-up byte */ 1328 "add %[psiz], %[addr] \n"
1329 "shlr8 r1 \n" /* shift out used-up byte */
1332 ".ur_f7: \n" 1330 ".ur_f7: \n"
1333 "sub %[psiz], %[addr] \n"
1334 "mov.b @%[addr], r0 \n" 1331 "mov.b @%[addr], r0 \n"
1335 "and %[mask], r0 \n" 1332 "and %[mask], r0 \n"
1336 "or r7, r0 \n" 1333 "or r2, r0 \n"
1337 "mov.b r0, @%[addr] \n" 1334 "mov.b r0, @%[addr] \n"
1338 "shlr8 r7 \n" 1335 "add %[psiz], %[addr] \n"
1336 "shlr8 r2 \n"
1339 ".ur_f6: \n" 1337 ".ur_f6: \n"
1340 "sub %[psiz], %[addr] \n"
1341 "mov.b @%[addr], r0 \n" 1338 "mov.b @%[addr], r0 \n"
1342 "and %[mask], r0 \n" 1339 "and %[mask], r0 \n"
1343 "or r6, r0 \n" 1340 "or r3, r0 \n"
1344 "mov.b r0, @%[addr] \n" 1341 "mov.b r0, @%[addr] \n"
1345 "shlr8 r6 \n" 1342 "add %[psiz], %[addr] \n"
1343 "shlr8 r3 \n"
1346 ".ur_f5: \n" 1344 ".ur_f5: \n"
1347 "sub %[psiz], %[addr] \n"
1348 "mov.b @%[addr], r0 \n" 1345 "mov.b @%[addr], r0 \n"
1349 "and %[mask], r0 \n" 1346 "and %[mask], r0 \n"
1350 "or r5, r0 \n" 1347 "or r4, r0 \n"
1351 "mov.b r0, @%[addr] \n" 1348 "mov.b r0, @%[addr] \n"
1352 "shlr8 r5 \n" 1349 "add %[psiz], %[addr] \n"
1350 "shlr8 r4 \n"
1353 ".ur_f4: \n" 1351 ".ur_f4: \n"
1354 "sub %[psiz], %[addr] \n"
1355 "mov.b @%[addr], r0 \n" 1352 "mov.b @%[addr], r0 \n"
1356 "and %[mask], r0 \n" 1353 "and %[mask], r0 \n"
1357 "or r4, r0 \n" 1354 "or r5, r0 \n"
1358 "mov.b r0, @%[addr] \n" 1355 "mov.b r0, @%[addr] \n"
1359 "shlr8 r4 \n" 1356 "add %[psiz], %[addr] \n"
1357 "shlr8 r5 \n"
1360 ".ur_f3: \n" 1358 ".ur_f3: \n"
1361 "sub %[psiz], %[addr] \n"
1362 "mov.b @%[addr], r0 \n" 1359 "mov.b @%[addr], r0 \n"
1363 "and %[mask], r0 \n" 1360 "and %[mask], r0 \n"
1364 "or r3, r0 \n" 1361 "or r6, r0 \n"
1365 "mov.b r0, @%[addr] \n" 1362 "mov.b r0, @%[addr] \n"
1366 "shlr8 r3 \n" 1363 "add %[psiz], %[addr] \n"
1364 "shlr8 r6 \n"
1367 ".ur_f2: \n" 1365 ".ur_f2: \n"
1368 "sub %[psiz], %[addr] \n"
1369 "mov.b @%[addr], r0 \n" 1366 "mov.b @%[addr], r0 \n"
1370 "and %[mask], r0 \n" 1367 "and %[mask], r0 \n"
1371 "or r2, r0 \n" 1368 "or r7, r0 \n"
1372 "mov.b r0, @%[addr] \n" 1369 "mov.b r0, @%[addr] \n"
1373 "shlr8 r2 \n" 1370 "add %[psiz], %[addr] \n"
1371 "shlr8 r7 \n"
1374 ".ur_f1: \n" 1372 ".ur_f1: \n"
1375 "sub %[psiz], %[addr] \n"
1376 "mov.b @%[addr], r0 \n" 1373 "mov.b @%[addr], r0 \n"
1377 "and %[mask], r0 \n" 1374 "and %[mask], r0 \n"
1378 "or r1, r0 \n" 1375 "or r8, r0 \n"
1379 "mov.b r0, @%[addr] \n" 1376 "mov.b r0, @%[addr] \n"
1380 "shlr8 r1 \n" 1377 "add %[psiz], %[addr] \n"
1381 ".ur_f0: \n" 1378 "shlr8 r8 \n"
1382 1379
1383 "add %[rx], %[addr] \n" /* correct address */
1384 "add #-8, %[dpth] \n" 1380 "add #-8, %[dpth] \n"
1385 "cmp/pl %[dpth] \n" /* next round if anything left */ 1381 "cmp/pl %[dpth] \n" /* next round if anything left */
1386 "bt .ur_floop \n" 1382 "bt .ur_floop \n"
@@ -1404,22 +1400,15 @@ void gray_update_rect(int x, int y, int width, int height)
1404 ".ur_mask1: \n" 1400 ".ur_mask1: \n"
1405 ".long 0xAAAAAAAA \n" 1401 ".long 0xAAAAAAAA \n"
1406 1402
1407 ".ur_sloop: \n" /** short loop (nothing to keep) **/ 1403 ".ur_sstart: \n"
1408 "mov #8, r0 \n" 1404 "mova .ur_stable, r0 \n" /* jump into short loop */
1409 "cmp/hs r0, %[dpth] \n" /* 8 planes or more left? */ 1405 "mov.b @(r0, %[rx]), %[rx] \n"
1410 "bt .ur_s8 \n"
1411
1412 "mulu %[psiz], %[dpth] \n"
1413 "mova .ur_stable, r0 \n"
1414 "mov.b @(r0, %[dpth]), %[rx] \n"
1415 "add %[rx], r0 \n" 1406 "add %[rx], r0 \n"
1416 "sts macl, %[rx] \n" /* point behind the last plane.. */ 1407 "jmp @r0 \n"
1417 "jmp @r0 \n" /* jump into streak */ 1408 "nop \n"
1418 "add %[rx], %[addr] \n" /* ..for this round */
1419 1409
1420 ".align 2 \n" 1410 ".align 2 \n"
1421 ".ur_stable: \n" 1411 ".ur_stable: \n"
1422 ".byte .ur_s0 - .ur_stable \n"
1423 ".byte .ur_s1 - .ur_stable \n" 1412 ".byte .ur_s1 - .ur_stable \n"
1424 ".byte .ur_s2 - .ur_stable \n" 1413 ".byte .ur_s2 - .ur_stable \n"
1425 ".byte .ur_s3 - .ur_stable \n" 1414 ".byte .ur_s3 - .ur_stable \n"
@@ -1427,47 +1416,42 @@ void gray_update_rect(int x, int y, int width, int height)
1427 ".byte .ur_s5 - .ur_stable \n" 1416 ".byte .ur_s5 - .ur_stable \n"
1428 ".byte .ur_s6 - .ur_stable \n" 1417 ".byte .ur_s6 - .ur_stable \n"
1429 ".byte .ur_s7 - .ur_stable \n" 1418 ".byte .ur_s7 - .ur_stable \n"
1419 ".byte .ur_s8 - .ur_stable \n"
1430 1420
1421 ".ur_sloop: \n" /** short loop (nothing to keep) **/
1431 ".ur_s8: \n" 1422 ".ur_s8: \n"
1432 "mov %[psiz], %[rx] \n" /* Point behind the last plane */ 1423 "mov.b r1, @%[addr] \n" /* store byte */
1433 "shll2 %[rx] \n" /* for this round. */ 1424 "add %[psiz], %[addr] \n"
1434 "add %[rx], %[rx] \n" /* See above. */ 1425 "shlr8 r1 \n" /* shift out used-up byte */
1435 "add %[rx], %[addr] \n"
1436
1437 "sub %[psiz], %[addr] \n"
1438 "mov.b r8, @%[addr] \n" /* store byte */
1439 "shlr8 r8 \n" /* shift out used-up byte */
1440 ".ur_s7: \n" 1426 ".ur_s7: \n"
1441 "sub %[psiz], %[addr] \n" 1427 "mov.b r2, @%[addr] \n"
1442 "mov.b r7, @%[addr] \n" 1428 "add %[psiz], %[addr] \n"
1443 "shlr8 r7 \n" 1429 "shlr8 r2 \n"
1444 ".ur_s6: \n" 1430 ".ur_s6: \n"
1445 "sub %[psiz], %[addr] \n" 1431 "mov.b r3, @%[addr] \n"
1446 "mov.b r6, @%[addr] \n" 1432 "add %[psiz], %[addr] \n"
1447 "shlr8 r6 \n" 1433 "shlr8 r3 \n"
1448 ".ur_s5: \n" 1434 ".ur_s5: \n"
1449 "sub %[psiz], %[addr] \n"
1450 "mov.b r5, @%[addr] \n"
1451 "shlr8 r5 \n"
1452 ".ur_s4: \n"
1453 "sub %[psiz], %[addr] \n"
1454 "mov.b r4, @%[addr] \n" 1435 "mov.b r4, @%[addr] \n"
1436 "add %[psiz], %[addr] \n"
1455 "shlr8 r4 \n" 1437 "shlr8 r4 \n"
1438 ".ur_s4: \n"
1439 "mov.b r5, @%[addr] \n"
1440 "add %[psiz], %[addr] \n"
1441 "shlr8 r5 \n"
1456 ".ur_s3: \n" 1442 ".ur_s3: \n"
1457 "sub %[psiz], %[addr] \n" 1443 "mov.b r6, @%[addr] \n"
1458 "mov.b r3, @%[addr] \n" 1444 "add %[psiz], %[addr] \n"
1459 "shlr8 r3 \n" 1445 "shlr8 r6 \n"
1460 ".ur_s2: \n" 1446 ".ur_s2: \n"
1461 "sub %[psiz], %[addr] \n" 1447 "mov.b r7, @%[addr] \n"
1462 "mov.b r2, @%[addr] \n" 1448 "add %[psiz], %[addr] \n"
1463 "shlr8 r2 \n" 1449 "shlr8 r7 \n"
1464 ".ur_s1: \n" 1450 ".ur_s1: \n"
1465 "sub %[psiz], %[addr] \n" 1451 "mov.b r8, @%[addr] \n"
1466 "mov.b r1, @%[addr] \n" 1452 "add %[psiz], %[addr] \n"
1467 "shlr8 r1 \n" 1453 "shlr8 r8 \n"
1468 ".ur_s0: \n"
1469 1454
1470 "add %[rx], %[addr] \n" /* correct address */
1471 "add #-8, %[dpth] \n" 1455 "add #-8, %[dpth] \n"
1472 "cmp/pl %[dpth] \n" /* next round if anything left */ 1456 "cmp/pl %[dpth] \n" /* next round if anything left */
1473 "bt .ur_sloop \n" 1457 "bt .ur_sloop \n"
@@ -1677,172 +1661,163 @@ void gray_update_rect(int x, int y, int width, int height)
1677 "move.l %%a0, %%d5 \n" 1661 "move.l %%a0, %%d5 \n"
1678 "eor.l %%d5, %%d0 \n" 1662 "eor.l %%d5, %%d0 \n"
1679 "and.l #0xAAAAAAAA, %%d0 \n" 1663 "and.l #0xAAAAAAAA, %%d0 \n"
1680 "eor.l %%d0, %%d5 \n" 1664 "eor.l %%d0, %%d5 \n" /* (a0 = ...h0g0f0e0d0c0b0a0) */
1681 "move.l %%d5, %%a0 \n" /* a0 = ...h0g0f0e0d0c0b0a0 */ 1665 /* move.l %%d5, %%a0 */ /* but keep in d5 for shift streak */
1682 "lsr.l #1, %%d0 \n" 1666 "lsr.l #1, %%d0 \n"
1683 "eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */ 1667 "eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */
1668
1669 "move.l %[dpth], %%d0 \n" /** shift out unused low bytes **/
1670 "subq.l #1, %%d0 \n"
1671 "and.l #7, %%d0 \n"
1672 "move.l %%d0, %%a0 \n"
1673 "move.l %[ax], %%d0 \n" /* all data in D registers */
1674 "jmp (2, %%pc, %%a0:l:2) \n" /* jump into shift streak */
1675 "lsr.l #8, %%d2 \n"
1676 "lsr.l #8, %%d3 \n"
1677 "lsr.l #8, %%d4 \n"
1678 "lsr.l #8, %%d0 \n"
1679 "lsr.l #8, %%d6 \n"
1680 "lsr.l #8, %%d7 \n"
1681 "lsr.l #8, %%d5 \n"
1682 "move.l %%d0, %[ax] \n" /* put the 2 extra words back.. */
1683 "move.l %%a0, %%d0 \n" /* keep the value for later */
1684 "move.l %%d5, %%a0 \n" /* ..into their A registers */
1684 1685
1685 "tst.l %[mask] \n" 1686 "tst.l %[mask] \n"
1686 "jeq .ur_sloop \n" /* short loop if nothing to keep */ 1687 "jeq .ur_sstart \n" /* short loop if nothing to keep */
1687 1688
1688 "move.l %[mask], %%d5 \n" /* need mask in data reg. */ 1689 "move.l %[mask], %%d5 \n" /* need mask in data reg. */
1689 "move.l %%d1, %[mask] \n" /* free d1 as working reg. */ 1690 "move.l %%d1, %[mask] \n" /* free d1 as working reg. */
1690 1691
1691 ".ur_floop: \n" /** full loop (there are bits to keep)**/ 1692 "jmp (2, %%pc, %%d0:l:2) \n" /* jump into full loop */
1692 "cmp.l #8, %[dpth] \n" /* 8 planes or more left? */ 1693 "bra.s .ur_f1 \n"
1693 "bhs.s .ur_f8 \n"
1694
1695 "move.l %[psiz], %%d0 \n"
1696 "move.l %[dpth], %%d1 \n"
1697 "mulu.w %%d1, %%d0 \n" /* point behind the last plane */
1698 "add.l %%d0, %[addr] \n" /* for this round */
1699 "jmp (%%pc, %[dpth]:l:2) \n" /* jump into streak */
1700 "bra.s .ur_f1 \n" /* dpth == 0 should never happen */
1701 "bra.s .ur_f2 \n" 1694 "bra.s .ur_f2 \n"
1702 "bra.s .ur_f3 \n" 1695 "bra.s .ur_f3 \n"
1703 "bra.s .ur_f4 \n" 1696 "bra.s .ur_f4 \n"
1704 "bra.s .ur_f5 \n" 1697 "bra.s .ur_f5 \n"
1705 "bra.s .ur_f6 \n" 1698 "bra.s .ur_f6 \n"
1706 "bra.s .ur_f7 \n" 1699 "bra.s .ur_f7 \n"
1700 /* bra.s .ur_f8 */ /* identical with target */
1707 1701
1702 ".ur_floop: \n" /** full loop (there are bits to keep)**/
1708 ".ur_f8: \n" 1703 ".ur_f8: \n"
1709 "move.l %[psiz], %%d0 \n"
1710 "lsl.l #3, %%d0 \n"
1711 "add.l %%d0, %[addr] \n"
1712 /* Point behind the last plane for this round. Note: We're using the
1713 * registers backwards in order to reuse the streak for the last round.
1714 * Therefore we need to go thru the bitplanes backwards too, otherwise
1715 * the bit order would be destroyed which results in more flicker. */
1716 "sub.l %[psiz], %[addr] \n"
1717 "move.b (%[addr]), %%d0 \n" /* load old byte */ 1704 "move.b (%[addr]), %%d0 \n" /* load old byte */
1718 "and.l %%d5, %%d0 \n" /* mask out replaced bits */ 1705 "and.l %%d5, %%d0 \n" /* mask out replaced bits */
1719 "move.l %[mask], %%d1 \n" 1706 "move.l %%a0, %%d1 \n"
1720 "or.l %%d1, %%d0 \n" /* set new bits */ 1707 "or.l %%d1, %%d0 \n" /* set new bits */
1721 "move.b %%d0, (%[addr]) \n" /* store byte */ 1708 "move.b %%d0, (%[addr]) \n" /* store byte */
1709 "add.l %[psiz], %[addr] \n"
1722 "lsr.l #8, %%d1 \n" /* shift out used-up byte */ 1710 "lsr.l #8, %%d1 \n" /* shift out used-up byte */
1723 "move.l %%d1, %[mask] \n" 1711 "move.l %%d1, %%a0 \n"
1724 ".ur_f7: \n" 1712 ".ur_f7: \n"
1725 "sub.l %[psiz], %[addr] \n"
1726 "move.b (%[addr]), %%d0 \n" 1713 "move.b (%[addr]), %%d0 \n"
1727 "and.l %%d5, %%d0 \n" 1714 "and.l %%d5, %%d0 \n"
1728 "or.l %%d2, %%d0 \n" 1715 "or.l %%d7, %%d0 \n"
1729 "move.b %%d0, (%[addr]) \n" 1716 "move.b %%d0, (%[addr]) \n"
1730 "lsr.l #8, %%d2 \n" 1717 "add.l %[psiz], %[addr] \n"
1718 "lsr.l #8, %%d7 \n"
1731 ".ur_f6: \n" 1719 ".ur_f6: \n"
1732 "sub.l %[psiz], %[addr] \n"
1733 "move.b (%[addr]), %%d0 \n" 1720 "move.b (%[addr]), %%d0 \n"
1734 "and.l %%d5, %%d0 \n" 1721 "and.l %%d5, %%d0 \n"
1735 "or.l %%d3, %%d0 \n" 1722 "or.l %%d6, %%d0 \n"
1736 "move.b %%d0, (%[addr]) \n" 1723 "move.b %%d0, (%[addr]) \n"
1737 "lsr.l #8, %%d3 \n" 1724 "add.l %[psiz], %[addr] \n"
1725 "lsr.l #8, %%d6 \n"
1738 ".ur_f5: \n" 1726 ".ur_f5: \n"
1739 "sub.l %[psiz], %[addr] \n"
1740 "move.b (%[addr]), %%d0 \n"
1741 "and.l %%d5, %%d0 \n"
1742 "or.l %%d4, %%d0 \n"
1743 "move.b %%d0, (%[addr]) \n"
1744 "lsr.l #8, %%d4 \n"
1745 ".ur_f4: \n"
1746 "sub.l %[psiz], %[addr] \n"
1747 "move.b (%[addr]), %%d0 \n" 1727 "move.b (%[addr]), %%d0 \n"
1748 "and.l %%d5, %%d0 \n" 1728 "and.l %%d5, %%d0 \n"
1749 "move.l %[ax], %%d1 \n" 1729 "move.l %[ax], %%d1 \n"
1750 "or.l %%d1, %%d0 \n" 1730 "or.l %%d1, %%d0 \n"
1751 "move.b %%d0, (%[addr]) \n" 1731 "move.b %%d0, (%[addr]) \n"
1732 "add.l %[psiz], %[addr] \n"
1752 "lsr.l #8, %%d1 \n" 1733 "lsr.l #8, %%d1 \n"
1753 "move.l %%d1, %[ax] \n" 1734 "move.l %%d1, %[ax] \n"
1735 ".ur_f4: \n"
1736 "move.b (%[addr]), %%d0 \n"
1737 "and.l %%d5, %%d0 \n"
1738 "or.l %%d4, %%d0 \n"
1739 "move.b %%d0, (%[addr]) \n"
1740 "add.l %[psiz], %[addr] \n"
1741 "lsr.l #8, %%d4 \n"
1754 ".ur_f3: \n" 1742 ".ur_f3: \n"
1755 "sub.l %[psiz], %[addr] \n"
1756 "move.b (%[addr]), %%d0 \n" 1743 "move.b (%[addr]), %%d0 \n"
1757 "and.l %%d5, %%d0 \n" 1744 "and.l %%d5, %%d0 \n"
1758 "or.l %%d6, %%d0 \n" 1745 "or.l %%d3, %%d0 \n"
1759 "move.b %%d0, (%[addr]) \n" 1746 "move.b %%d0, (%[addr]) \n"
1760 "lsr.l #8, %%d6 \n" 1747 "add.l %[psiz], %[addr] \n"
1748 "lsr.l #8, %%d3 \n"
1761 ".ur_f2: \n" 1749 ".ur_f2: \n"
1762 "sub.l %[psiz], %[addr] \n"
1763 "move.b (%[addr]), %%d0 \n" 1750 "move.b (%[addr]), %%d0 \n"
1764 "and.l %%d5, %%d0 \n" 1751 "and.l %%d5, %%d0 \n"
1765 "or.l %%d7, %%d0 \n" 1752 "or.l %%d2, %%d0 \n"
1766 "move.b %%d0, (%[addr]) \n" 1753 "move.b %%d0, (%[addr]) \n"
1767 "lsr.l #8, %%d7 \n" 1754 "add.l %[psiz], %[addr] \n"
1755 "lsr.l #8, %%d2 \n"
1768 ".ur_f1: \n" 1756 ".ur_f1: \n"
1769 "sub.l %[psiz], %[addr] \n"
1770 "move.b (%[addr]), %%d0 \n" 1757 "move.b (%[addr]), %%d0 \n"
1771 "and.l %%d5, %%d0 \n" 1758 "and.l %%d5, %%d0 \n"
1772 "move.l %%a0, %%d1 \n" 1759 "move.l %[mask], %%d1 \n"
1773 "or.l %%d1, %%d0 \n" 1760 "or.l %%d1, %%d0 \n"
1774 "move.b %%d0, (%[addr]) \n" 1761 "move.b %%d0, (%[addr]) \n"
1762 "add.l %[psiz], %[addr] \n"
1775 "lsr.l #8, %%d1 \n" 1763 "lsr.l #8, %%d1 \n"
1776 "move.l %%d1, %%a0 \n" 1764 "move.l %%d1, %[mask] \n"
1777 1765
1778 "move.l %[psiz], %%d0 \n"
1779 "lsl.l #3, %%d0 \n"
1780 "add.l %%d0, %[addr] \n" /* correct address */
1781 "subq.l #8, %[dpth] \n" 1766 "subq.l #8, %[dpth] \n"
1782 "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ 1767 "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */
1783 "jgt .ur_floop \n" /* next round if anything left */ 1768 "jgt .ur_floop \n" /* next round if anything left */
1784 1769
1785 "jra .ur_end \n" 1770 "jra .ur_end \n"
1786 1771
1787 ".ur_sloop: \n" /** short loop (nothing to keep) **/ 1772 ".ur_sstart: \n"
1788 "cmp.l #8, %[dpth] \n" /* 8 planes or more left? */ 1773 "jmp (2, %%pc, %%d0:l:2) \n" /* jump into short loop */
1789 "bhs.s .ur_s8 \n" 1774 "bra.s .ur_s1 \n"
1790
1791 "move.l %[psiz], %%d0 \n"
1792 "move.l %[dpth], %%d5 \n"
1793 "mulu.w %%d5, %%d0 \n" /* point behind the last plane */
1794 "add.l %%d0, %[addr] \n" /* for this round */
1795 "jmp (%%pc, %[dpth]:l:2) \n" /* jump into streak */
1796 "bra.s .ur_s1 \n" /* dpth == 0 should never happen */
1797 "bra.s .ur_s2 \n" 1775 "bra.s .ur_s2 \n"
1798 "bra.s .ur_s3 \n" 1776 "bra.s .ur_s3 \n"
1799 "bra.s .ur_s4 \n" 1777 "bra.s .ur_s4 \n"
1800 "bra.s .ur_s5 \n" 1778 "bra.s .ur_s5 \n"
1801 "bra.s .ur_s6 \n" 1779 "bra.s .ur_s6 \n"
1802 "bra.s .ur_s7 \n" 1780 "bra.s .ur_s7 \n"
1781 /* bra.s .ur_s8 */ /* identical with target */
1803 1782
1783 ".ur_sloop: \n" /** short loop (nothing to keep) **/
1804 ".ur_s8: \n" 1784 ".ur_s8: \n"
1805 "move.l %[psiz], %%d0 \n" /* Point behind the last plane */ 1785 "move.l %%a0, %%d5 \n"
1806 "lsl.l #3, %%d0 \n" /* for this round. */ 1786 "move.b %%d5, (%[addr]) \n" /* store byte */
1807 "add.l %%d0, %[addr] \n" /* See above. */ 1787 "add.l %[psiz], %[addr] \n"
1808 1788 "lsr.l #8, %%d5 \n" /* shift out used-up byte */
1809 "sub.l %[psiz], %[addr] \n" 1789 "move.l %%d5, %%a0 \n"
1810 "move.b %%d1, (%[addr]) \n" /* store byte */
1811 "lsr.l #8, %%d1 \n" /* shift out used-up byte */
1812 ".ur_s7: \n" 1790 ".ur_s7: \n"
1813 "sub.l %[psiz], %[addr] \n" 1791 "move.b %%d7, (%[addr]) \n"
1814 "move.b %%d2, (%[addr]) \n" 1792 "add.l %[psiz], %[addr] \n"
1815 "lsr.l #8, %%d2 \n" 1793 "lsr.l #8, %%d7 \n"
1816 ".ur_s6: \n" 1794 ".ur_s6: \n"
1817 "sub.l %[psiz], %[addr] \n" 1795 "move.b %%d6, (%[addr]) \n"
1818 "move.b %%d3, (%[addr]) \n" 1796 "add.l %[psiz], %[addr] \n"
1819 "lsr.l #8, %%d3 \n" 1797 "lsr.l #8, %%d6 \n"
1820 ".ur_s5: \n" 1798 ".ur_s5: \n"
1821 "sub.l %[psiz], %[addr] \n"
1822 "move.b %%d4, (%[addr]) \n"
1823 "lsr.l #8, %%d4 \n"
1824 ".ur_s4: \n"
1825 "sub.l %[psiz], %[addr] \n"
1826 "move.l %[ax], %%d5 \n" 1799 "move.l %[ax], %%d5 \n"
1827 "move.b %%d5, (%[addr]) \n" 1800 "move.b %%d5, (%[addr]) \n"
1801 "add.l %[psiz], %[addr] \n"
1828 "lsr.l #8, %%d5 \n" 1802 "lsr.l #8, %%d5 \n"
1829 "move.l %%d5, %[ax] \n" 1803 "move.l %%d5, %[ax] \n"
1804 ".ur_s4: \n"
1805 "move.b %%d4, (%[addr]) \n"
1806 "add.l %[psiz], %[addr] \n"
1807 "lsr.l #8, %%d4 \n"
1830 ".ur_s3: \n" 1808 ".ur_s3: \n"
1831 "sub.l %[psiz], %[addr] \n" 1809 "move.b %%d3, (%[addr]) \n"
1832 "move.b %%d6, (%[addr]) \n" 1810 "add.l %[psiz], %[addr] \n"
1833 "lsr.l #8, %%d6 \n" 1811 "lsr.l #8, %%d3 \n"
1834 ".ur_s2: \n" 1812 ".ur_s2: \n"
1835 "sub.l %[psiz], %[addr] \n" 1813 "move.b %%d2, (%[addr]) \n"
1836 "move.b %%d7, (%[addr]) \n" 1814 "add.l %[psiz], %[addr] \n"
1837 "lsr.l #8, %%d7 \n" 1815 "lsr.l #8, %%d2 \n"
1838 ".ur_s1: \n" 1816 ".ur_s1: \n"
1839 "sub.l %[psiz], %[addr] \n" 1817 "move.b %%d1, (%[addr]) \n"
1840 "move.l %%a0, %%d5 \n" 1818 "add.l %[psiz], %[addr] \n"
1841 "move.b %%d5, (%[addr]) \n" 1819 "lsr.l #8, %%d1 \n"
1842 "lsr.l #8, %%d5 \n"
1843 "move.l %%d5, %%a0 \n"
1844 1820
1845 "add.l %%d0, %[addr] \n" /* correct address */
1846 "subq.l #8, %[dpth] \n" 1821 "subq.l #8, %[dpth] \n"
1847 "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ 1822 "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */
1848 "jgt .ur_sloop \n" /* next round if anything left */ 1823 "jgt .ur_sloop \n" /* next round if anything left */
@@ -1871,7 +1846,7 @@ void gray_update_rect(int x, int y, int width, int height)
1871 { 1846 {
1872 unsigned char *addr, *end; 1847 unsigned char *addr, *end;
1873 unsigned mask = 0; 1848 unsigned mask = 0;
1874 unsigned test = 1; 1849 unsigned test = 1 << ((-_gray_info.depth) & 7);
1875 int i; 1850 int i;
1876 1851
1877 /* precalculate the bit patterns with random shifts 1852 /* precalculate the bit patterns with random shifts