Smaller & faster greylib blitting on iriver H1x0 and iAudio M5, based on the ARM version but using mulu.l for the bit shuffling. ISR speedup is ~10%.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@26434 a1c6a512-1295-4272-9138-f99709370657
author: Jens Arnold <amiconn@rockbox.org> 2010-05-31 19:56:21 +0000
committer: Jens Arnold <amiconn@rockbox.org> 2010-05-31 19:56:21 +0000
commit: 85fd2d8be90ab3eb9f134180357725a60f988243 (patch)
tree: 0149ffdf7110b70d5077d566c0366906d861b03d
parent: c5e14b5835114faae78997f7e0b14c19966b187d (diff)
download: rockbox-85fd2d8be90ab3eb9f134180357725a60f988243.tar.gz
rockbox-85fd2d8be90ab3eb9f134180357725a60f988243.zip
2 files changed, 212 insertions, 250 deletions
diff --git a/firmware/target/coldfire/iaudio/m5/lcd-as-m5.S b/firmware/target/coldfire/iaudio/m5/lcd-as-m5.S
index d42ee1c888..12d0c670e9 100644
--- a/firmware/target/coldfire/iaudio/m5/lcd-as-m5.S
+++ b/firmware/target/coldfire/iaudio/m5/lcd-as-m5.S
@@ -94,151 +94,132 @@ lcd_write_data:
     * will occur. */
 lcd_grey_data:
-    lea.l   (-10*4, %sp), %sp
+    lea.l   (-11*4, %sp), %sp
-    movem.l %d2-%d6/%a2-%a6, (%sp)  /* free some registers */
+    movem.l %d2-%d7/%a2-%a6, (%sp)  /* free some registers */
-    movem.l (10*4+4, %sp), %a0-%a2  /* values, phases, length */
+    movem.l (11*4+4, %sp), %a0-%a2  /* values, phases, length */
    lea.l   (%a1, %a2.l*4), %a2     /* end address */
    lea     0xf0008002, %a3     /* LCD data port */
+    moveq.l #24, %d6            /* shift count */
-    moveq.l #15, %d3
+    move.l  #0xc30c3, %d7       /* bit shuffle factor */
-    add.l   %a1, %d3
-    and.l   #0xfffffff0, %d3    /* first line bound */
+    moveq.l #12, %d2
-    move.l  %a2, %d1
+    add.l   %a1, %d2
-    and.l   #0xfffffff0, %d1    /* last line bound */
+    and.l   #0xfffffff0, %d2    /* first line bound */
-    cmp.l   %d3, %d1
+    cmp.l   %d2, %a2            /* end address lower than first line bound? */
-    bls.w   .g_tloop            /* no lines to copy - jump to tail loop */
+    bhs.s   1f
-    cmp.l   %a1, %d0
+    move.l  %a2, %d2            /* -> adjust end address of head loop */
-    bls.s   .g_lloop            /* no head blocks - jump to line loop */
+1:
+    cmp.l   %a1, %d2
+    bls.s   .g_hend
 .g_hloop:
-    move.l  (%a1), %d2          /* fetch 4 pixel phases */
+    move.l  (%a1), %d0          /* fetch 4 pixel phases */
-    bclr.l  #31, %d2            /* Z = !(p0 & 0x80); p0 &= ~0x80; */
+    move.l  %d0, %d1
-    seq.b   %d0                 /* %d0 = ........................00000000 */
+    and.l   #0x80808080, %d1    /* separate MSBs of the 4 phases */
-    lsl.l   #2, %d0             /* %d0 = ......................00000000.. */
+    eor.l   %d1, %d0            /* clear them in %d0 */
-    bclr.l  #23, %d2            /* Z = !(p1 & 0x80); p1 &= ~0x80; */
+    add.l   (%a0)+, %d0         /* add 4 pixel values to the phases */
-    seq.b   %d0                 /* %d0 = ......................0011111111 */
+    move.l  %d0, (%a1)+         /* store new phases, advance pointer */
-    lsl.l   #2, %d0             /* %d0 = ....................0011111111.. */
-    bclr.l  #15, %d2            /* Z = !(p2 & 0x80); p2 &= ~0x80; */
+    lsr.l   #1, %d1             /* %d1 = .0.......1.......2.......3...... */
-    seq.b   %d0                 /* %d0 = ....................001122222222 */
+    mulu.l  %d7, %d1            /* %d1 = 00112233112233..2233....33...... */
-    lsl.l   #2, %d0             /* %d0 = ..................001122222222.. */
+    not.l   %d1                 /*       negate bits */
-    bclr.l  #7, %d2             /* Z = !(p3 & 0x80); p3 &= ~0x80; */
+    lsr.l   %d6, %d1            /* %d1 = ........................00112233 */
-    seq.b   %d0                 /* %d0 = ..................00112233333333 */
+    move.w  %d1, (%a3)          /* write pixel block */
-    lsr.l   #6, %d0             /* %d0 = ........................00112233 */
-    move.w  %d0, (%a3)          /* write pixel block */
-    
-    add.l   (%a0)+, %d2         /* add 4 pixel values to the phases */
-    move.l  %d2, (%a1)+         /* store new phases, advance pointer */
-    cmp.l   %a1, %d3            /* go up to first line bound */
+    cmp.l   %a1, %d2            /* go up to first line bound */
    bhi.s   .g_hloop
+.g_hend:
+    cmp.l   %a1, %a2
+    bls.w   .g_tend
+    lea.l   (-12, %a2), %a2
+    cmp.l   %a1, %a2
+    bls.s   .g_lend
    
 .g_lloop:
-    movem.l (%a1), %d2-%d5      /* fetch 4 blocks of 4 pixel phases each */
+    movem.l (%a1), %d0-%d3      /* fetch 4 blocks of 4 pixel phases each */
-    
-    bclr.l  #31, %d2            /* calculate first pixel block */
+    move.l  %d0, %d4            /* calculate first pixel block */
-    seq.b   %d0
+    and.l   #0x80808080, %d4
-    lsl.l   #2, %d0
+    eor.l   %d4, %d0            
-    bclr.l  #23, %d2
+    lsr.l   #1, %d4             
-    seq.b   %d0
+    mulu.l  %d7, %d4            
-    lsl.l   #2, %d0
+    not.l   %d4
-    bclr.l  #15, %d2
+    lsr.l   %d6, %d4
-    seq.b   %d0
-    lsl.l   #2, %d0
+    move.w  %d4, (%a3)          /* write first pixel block to LCD */
-    bclr.l  #7, %d2
-    seq.b   %d0
+    move.l  %d1, %d5            /* calculate second pixel block */
-    lsr.l   #6, %d0
+    and.l   #0x80808080, %d5
+    eor.l   %d5, %d1
-    move.w  %d0, (%a3)          /* write first block to LCD */
+    lsr.l   #1, %d5
+    mulu.l  %d7, %d5
-    bclr.l  #31, %d3            /* calculate second pixel block */
+    not.l   %d5
-    seq.b   %d6
+    lsr.l   %d6, %d5
-    lsl.l   #2, %d6
+                                
-    bclr.l  #23, %d3
+    move.l  %d2, %d4            /* calculate third pixel block */
-    seq.b   %d6
+    and.l   #0x80808080, %d4    
-    lsl.l   #2, %d6
+    eor.l   %d4, %d2            
-    bclr.l  #15, %d3
+    lsr.l   #1, %d4
-    seq.b   %d6
+    mulu.l  %d7, %d4            
-    lsl.l   #2, %d6
+    not.l   %d4
-    bclr.l  #7, %d3
+    lsr.l   %d6, %d4
-    seq.b   %d6
-    lsr.l   #6, %d6
+    move.w  %d5, (%a3)          /* write second pixel block to LCD */
-    bclr.l  #31, %d4            /* calculate third pixel block */
+    movem.l (%a0), %d5/%a4-%a6  /* fetch 4 blocks of 4 pixel values each */
-    seq.b   %d0
-    lsl.l   #2, %d0
-    bclr.l  #23, %d4
-    seq.b   %d0
-    lsl.l   #2, %d0
-    bclr.l  #15, %d4
-    seq.b   %d0
-    lsl.l   #2, %d0
-    bclr.l  #7, %d4
-    seq.b   %d0
-    lsr.l   #6, %d0
-    move.w  %d6, (%a3)          /* write second block to LCD */
-    movem.l (%a0), %d6/%a4-%a6  /* fetch 4 blocks of 4 pixel values each */
    lea.l   (16, %a0), %a0
-    move.w  %d0, (%a3)          /* write third block to LCD */
+    move.w  %d4, (%a3)          /* write third pixel block to LCD */
+                                
-    bclr.l  #31, %d5            /* calculate fourth pixel block */
+    move.l  %d3, %d4            /* calculate fourth pixel block */
-    seq.b   %d0
+    and.l   #0x80808080, %d4
-    lsl.l   #2, %d0
+    eor.l   %d4, %d3
-    bclr.l  #23, %d5
+    lsr.l   #1, %d4
-    seq.b   %d0
+    mulu.l  %d7, %d4            
-    lsl.l   #2, %d0
+    not.l   %d4
-    bclr.l  #15, %d5
+    lsr.l   %d6, %d4
-    seq.b   %d0
-    lsl.l   #2, %d0
+    add.l   %d5, %d0            /* calculate 4*4 new pixel phases */
-    bclr.l  #7, %d5
+    add.l   %a4, %d1            /* (packed addition) */
-    seq.b   %d0
+    add.l   %a5, %d2
-    lsr.l   #6, %d0
+    add.l   %a6, %d3
-    add.l   %d6, %d2            /* calculate 4*4 new pixel phases */
+    movem.l %d0-%d3, (%a1)      /* store 4*4 new pixel phases */
-    add.l   %a4, %d3            /* (packed addition) */
-    add.l   %a5, %d4
-    add.l   %a6, %d5
-    movem.l %d2-%d5, (%a1)      /* store 4*4 new pixel phases */
    lea.l   (16, %a1), %a1
-    move.w  %d0, (%a3)          /* write fourth block to LCD */
+    move.w  %d4, (%a3)          /* write fourth pixel block to LCD */
-    cmp.l   %a1, %d1            /* go up to last line bound */
-    bhi.w   .g_lloop
+    cmp.l   %a1, %a2            /* go up to last line bound */
+    bhi.s   .g_lloop
+    
+.g_lend:
+    lea.l   (12, %a2), %a2
    cmp.l   %a1, %a2
-    bls.s   .g_no_tail
+    bls.s   .g_tend
 .g_tloop:
-    move.l  (%a1), %d2
+    move.l  (%a1), %d0          /* fetch 4 pixel phases */
-    bclr.l  #31, %d2
-    seq.b   %d0
-    lsl.l   #2, %d0
-    bclr.l  #23, %d2
-    seq.b   %d0
-    lsl.l   #2, %d0
-    bclr.l  #15, %d2
-    seq.b   %d0
-    lsl.l   #2, %d0
-    bclr.l  #7, %d2
-    seq.b   %d0
-    lsr.l   #6, %d0
-    move.w  %d0, (%a3)
-    add.l   (%a0)+, %d2         /* go up to end address */
-    move.l  %d2, (%a1)+
-    cmp.l   %a1, %a2
+    move.l  %d0, %d1
+    and.l   #0x80808080, %d1
+    eor.l   %d1, %d0            
+    add.l   (%a0)+, %d0         /* add 4 pixel values to the phases */
+    move.l  %d0, (%a1)+         /* store new phases, advance pointer */
+    lsr.l   #1, %d1             
+    mulu.l  %d7, %d1
+    not.l   %d1
+    lsr.l   %d6, %d1
+    move.w  %d1, (%a3)          /* write pixel block */
+    cmp.l   %a1, %a2            /* go up to end address */
    bhi.s   .g_tloop
-.g_no_tail:
+.g_tend:
-    movem.l (%sp), %d2-%d6/%a2-%a6  /* restore registers */
+    movem.l (%sp), %d2-%d7/%a2-%a6  /* restore registers */
-    lea.l   (10*4, %sp), %sp
+    lea.l   (11*4, %sp), %sp
    rts
 .gd_end:
diff --git a/firmware/target/coldfire/iriver/h100/lcd-as-h100.S b/firmware/target/coldfire/iriver/h100/lcd-as-h100.S
index 9ebb5752aa..b13d5146b9 100644
--- a/firmware/target/coldfire/iriver/h100/lcd-as-h100.S
+++ b/firmware/target/coldfire/iriver/h100/lcd-as-h100.S
@@ -106,153 +106,134 @@ lcd_write_data:
     * will occur. */
 lcd_grey_data:
-    lea.l   (-10*4, %sp), %sp
+    lea.l   (-11*4, %sp), %sp
-    movem.l %d2-%d6/%a2-%a6, (%sp)  /* free some registers */
+    movem.l %d2-%d7/%a2-%a6, (%sp)  /* free some registers */
-    movem.l (10*4+4, %sp), %a0-%a2  /* values, phases, length */
+    movem.l (11*4+4, %sp), %a0-%a2  /* values, phases, length */
    lea.l   (%a1, %a2.l*4), %a2     /* end address */
    moveq   #8, %d1
    or.l    %d1, (MBAR2+0xb4)   /* A0 = 1 (data) */
    lea     0xf0000000, %a3     /* LCD data port */
-    
+    moveq.l #24, %d6            /* shift count */
-    moveq.l #15, %d3
+    move.l  #0xc30c3, %d7       /* bit shuffle factor */
-    add.l   %a1, %d3
-    and.l   #0xfffffff0, %d3    /* first line bound */
+    moveq.l #12, %d2
-    move.l  %a2, %d1
+    add.l   %a1, %d2
-    and.l   #0xfffffff0, %d1    /* last line bound */
+    and.l   #0xfffffff0, %d2    /* first line bound */
-    cmp.l   %d3, %d1
+    cmp.l   %d2, %a2            /* end address lower than first line bound? */
-    bls.w   .g_tloop            /* no lines to copy - jump to tail loop */
+    bhs.s   1f
-    cmp.l   %a1, %d0
+    move.l  %a2, %d2            /* -> adjust end address of head loop */
-    bls.s   .g_lloop            /* no head blocks - jump to line loop */
+1:
+    cmp.l   %a1, %d2
+    bls.s   .g_hend
 .g_hloop:
-    move.l  (%a1), %d2          /* fetch 4 pixel phases */
+    move.l  (%a1), %d0          /* fetch 4 pixel phases */
-    bclr.l  #31, %d2            /* Z = !(p0 & 0x80); p0 &= ~0x80; */
+    move.l  %d0, %d1
-    seq.b   %d0                 /* %d0 = ........................00000000 */
+    and.l   #0x80808080, %d1    /* separate MSBs of the 4 phases */
-    lsl.l   #2, %d0             /* %d0 = ......................00000000.. */
+    eor.l   %d1, %d0            /* clear them in %d0 */
-    bclr.l  #23, %d2            /* Z = !(p1 & 0x80); p1 &= ~0x80; */
+    add.l   (%a0)+, %d0         /* add 4 pixel values to the phases */
-    seq.b   %d0                 /* %d0 = ......................0011111111 */
+    move.l  %d0, (%a1)+         /* store new phases, advance pointer */
-    lsl.l   #2, %d0             /* %d0 = ....................0011111111.. */
-    bclr.l  #15, %d2            /* Z = !(p2 & 0x80); p2 &= ~0x80; */
-    seq.b   %d0                 /* %d0 = ....................001122222222 */
-    lsl.l   #2, %d0             /* %d0 = ..................001122222222.. */
-    bclr.l  #7, %d2             /* Z = !(p3 & 0x80); p3 &= ~0x80; */
-    seq.b   %d0                 /* %d0 = ..................00112233333333 */
-    lsr.l   #6, %d0             /* %d0 = ........................00112233 */
-    move.w  %d0, (%a3)          /* write pixel block */
-    
-    add.l   (%a0)+, %d2         /* add 4 pixel values to the phases */
-    move.l  %d2, (%a1)+         /* store new phases, advance pointer */
-    cmp.l   %a1, %d3            /* go up to first line bound */
+    lsr.l   #1, %d1             /* %d1 = .0.......1.......2.......3...... */
+    mulu.l  %d7, %d1            /* %d1 = 00112233112233..2233....33...... */
+    not.l   %d1                 /*       negate bits */
+    lsr.l   %d6, %d1            /* %d1 = ........................00112233 */
+    move.w  %d1, (%a3)          /* write pixel block */
+    cmp.l   %a1, %d2            /* go up to first line bound */
    bhi.s   .g_hloop
+.g_hend:
+    cmp.l   %a1, %a2
+    bls.w   .g_tend
+    lea.l   (-12, %a2), %a2
+    cmp.l   %a1, %a2
+    bls.s   .g_lend
    
 .g_lloop:
-    movem.l (%a1), %d2-%d5      /* fetch 4 blocks of 4 pixel phases each */
+    movem.l (%a1), %d0-%d3      /* fetch 4 blocks of 4 pixel phases each */
-    
-    bclr.l  #31, %d2            /* calculate first pixel block */
+    move.l  %d0, %d4            /* calculate first pixel block */
-    seq.b   %d0
+    and.l   #0x80808080, %d4
-    lsl.l   #2, %d0
+    eor.l   %d4, %d0            
-    bclr.l  #23, %d2
+    lsr.l   #1, %d4             
-    seq.b   %d0
+    mulu.l  %d7, %d4            
-    lsl.l   #2, %d0
+    not.l   %d4
-    bclr.l  #15, %d2
+    lsr.l   %d6, %d4
-    seq.b   %d0
-    lsl.l   #2, %d0
+    move.w  %d4, (%a3)          /* write first pixel block to LCD */
-    bclr.l  #7, %d2
-    seq.b   %d0
+    move.l  %d1, %d5            /* calculate second pixel block */
-    lsr.l   #6, %d0
+    and.l   #0x80808080, %d5
+    eor.l   %d5, %d1
-    move.w  %d0, (%a3)          /* write first block to LCD */
+    lsr.l   #1, %d5
+    mulu.l  %d7, %d5
-    bclr.l  #31, %d3            /* calculate second pixel block */
+    not.l   %d5
-    seq.b   %d6
+    lsr.l   %d6, %d5
-    lsl.l   #2, %d6
+                                
-    bclr.l  #23, %d3
+    move.l  %d2, %d4            /* calculate third pixel block */
-    seq.b   %d6
+    and.l   #0x80808080, %d4    
-    lsl.l   #2, %d6
+    eor.l   %d4, %d2            
-    bclr.l  #15, %d3
+    lsr.l   #1, %d4
-    seq.b   %d6
+    mulu.l  %d7, %d4            
-    lsl.l   #2, %d6
+    not.l   %d4
-    bclr.l  #7, %d3
+    lsr.l   %d6, %d4
-    seq.b   %d6
-    lsr.l   #6, %d6
+    move.w  %d5, (%a3)          /* write second pixel block to LCD */
-    bclr.l  #31, %d4            /* calculate third pixel block */
+    movem.l (%a0), %d5/%a4-%a6  /* fetch 4 blocks of 4 pixel values each */
-    seq.b   %d0
-    lsl.l   #2, %d0
-    bclr.l  #23, %d4
-    seq.b   %d0
-    lsl.l   #2, %d0
-    bclr.l  #15, %d4
-    seq.b   %d0
-    lsl.l   #2, %d0
-    bclr.l  #7, %d4
-    seq.b   %d0
-    lsr.l   #6, %d0
-    move.w  %d6, (%a3)          /* write second block to LCD */
-    movem.l (%a0), %d6/%a4-%a6  /* fetch 4 blocks of 4 pixel values each */
    lea.l   (16, %a0), %a0
-    move.w  %d0, (%a3)          /* write third block to LCD */
+    move.w  %d4, (%a3)          /* write third pixel block to LCD */
+                                
-    bclr.l  #31, %d5            /* calculate fourth pixel block */
+    move.l  %d3, %d4            /* calculate fourth pixel block */
-    seq.b   %d0
+    and.l   #0x80808080, %d4
-    lsl.l   #2, %d0
+    eor.l   %d4, %d3
-    bclr.l  #23, %d5
+    lsr.l   #1, %d4
-    seq.b   %d0
+    mulu.l  %d7, %d4            
-    lsl.l   #2, %d0
+    not.l   %d4
-    bclr.l  #15, %d5
+    lsr.l   %d6, %d4
-    seq.b   %d0
-    lsl.l   #2, %d0
+    add.l   %d5, %d0            /* calculate 4*4 new pixel phases */
-    bclr.l  #7, %d5
+    add.l   %a4, %d1            /* (packed addition) */
-    seq.b   %d0
+    add.l   %a5, %d2
-    lsr.l   #6, %d0
+    add.l   %a6, %d3
-    add.l   %d6, %d2            /* calculate 4*4 new pixel phases */
+    movem.l %d0-%d3, (%a1)      /* store 4*4 new pixel phases */
-    add.l   %a4, %d3            /* (packed addition) */
-    add.l   %a5, %d4
-    add.l   %a6, %d5
-    movem.l %d2-%d5, (%a1)      /* store 4*4 new pixel phases */
    lea.l   (16, %a1), %a1
-    move.w  %d0, (%a3)          /* write fourth block to LCD */
+    move.w  %d4, (%a3)          /* write fourth pixel block to LCD */
-    cmp.l   %a1, %d1            /* go up to last line bound */
+    cmp.l   %a1, %a2            /* go up to last line bound */
-    bhi.w   .g_lloop
+    bhi.s   .g_lloop
    
+.g_lend:
+    lea.l   (12, %a2), %a2
    cmp.l   %a1, %a2
-    bls.s   .g_no_tail
+    bls.s   .g_tend
 .g_tloop:
-    move.l  (%a1), %d2
+    move.l  (%a1), %d0          /* fetch 4 pixel phases */
-    bclr.l  #31, %d2
+    move.l  %d0, %d1
-    seq.b   %d0
+    and.l   #0x80808080, %d1
-    lsl.l   #2, %d0
+    eor.l   %d1, %d0            
-    bclr.l  #23, %d2
+    add.l   (%a0)+, %d0         /* add 4 pixel values to the phases */
-    seq.b   %d0
+    move.l  %d0, (%a1)+         /* store new phases, advance pointer */
-    lsl.l   #2, %d0
-    bclr.l  #15, %d2
+    lsr.l   #1, %d1             
-    seq.b   %d0
+    mulu.l  %d7, %d1
-    lsl.l   #2, %d0
+    not.l   %d1
-    bclr.l  #7, %d2
+    lsr.l   %d6, %d1
-    seq.b   %d0
+    move.w  %d1, (%a3)          /* write pixel block */
-    lsr.l   #6, %d0
-    move.w  %d0, (%a3)
-    add.l   (%a0)+, %d2
-    move.l  %d2, (%a1)+
    cmp.l   %a1, %a2            /* go up to end address */
    bhi.s   .g_tloop
-.g_no_tail:
+.g_tend:
-    movem.l (%sp), %d2-%d6/%a2-%a6  /* restore registers */
+    movem.l (%sp), %d2-%d7/%a2-%a6  /* restore registers */
-    lea.l   (10*4, %sp), %sp
+    lea.l   (11*4, %sp), %sp
    rts
 .gd_end:
author	Jens Arnold <amiconn@rockbox.org>	2010-05-31 19:56:21 +0000
committer	Jens Arnold <amiconn@rockbox.org>	2010-05-31 19:56:21 +0000
commit	85fd2d8be90ab3eb9f134180357725a60f988243 (patch)
tree	0149ffdf7110b70d5077d566c0366906d861b03d
parent	c5e14b5835114faae78997f7e0b14c19966b187d (diff)
download	rockbox-85fd2d8be90ab3eb9f134180357725a60f988243.tar.gz rockbox-85fd2d8be90ab3eb9f134180357725a60f988243.zip

diff --git a/firmware/target/coldfire/iaudio/m5/lcd-as-m5.S b/firmware/target/coldfire/iaudio/m5/lcd-as-m5.S index d42ee1c888..12d0c670e9 100644 --- a/firmware/target/coldfire/iaudio/m5/lcd-as-m5.S +++ b/firmware/target/coldfire/iaudio/m5/lcd-as-m5.S
@@ -94,151 +94,132 @@ lcd_write_data:
94	* will occur. */	94	* will occur. */
95		95
96	lcd_grey_data:	96	lcd_grey_data:
97	lea.l (-10*4, %sp), %sp	97	lea.l (-11*4, %sp), %sp
98	movem.l %d2-%d6/%a2-%a6, (%sp) /* free some registers */	98	movem.l %d2-%d7/%a2-%a6, (%sp) /* free some registers */
99	movem.l (104+4, %sp), %a0-%a2 / values, phases, length */	99	movem.l (114+4, %sp), %a0-%a2 / values, phases, length */
100	lea.l (%a1, %a2.l4), %a2 / end address */	100	lea.l (%a1, %a2.l4), %a2 / end address */
101	lea 0xf0008002, %a3 /* LCD data port */	101	lea 0xf0008002, %a3 /* LCD data port */
102		102	moveq.l #24, %d6 /* shift count */
103	moveq.l #15, %d3	103	move.l #0xc30c3, %d7 /* bit shuffle factor */
104	add.l %a1, %d3	104
105	and.l #0xfffffff0, %d3 /* first line bound */	105	moveq.l #12, %d2
106	move.l %a2, %d1	106	add.l %a1, %d2
107	and.l #0xfffffff0, %d1 /* last line bound */	107	and.l #0xfffffff0, %d2 /* first line bound */
108	cmp.l %d3, %d1	108	cmp.l %d2, %a2 /* end address lower than first line bound? */
109	bls.w .g_tloop /* no lines to copy - jump to tail loop */	109	bhs.s 1f
110	cmp.l %a1, %d0	110	move.l %a2, %d2 /* -> adjust end address of head loop */
111	bls.s .g_lloop /* no head blocks - jump to line loop */	111	1:
		112	cmp.l %a1, %d2
		113	bls.s .g_hend
112		114
113	.g_hloop:	115	.g_hloop:
114	move.l (%a1), %d2 /* fetch 4 pixel phases */	116	move.l (%a1), %d0 /* fetch 4 pixel phases */
115		117
116	bclr.l #31, %d2 /* Z = !(p0 & 0x80); p0 &= ~0x80; */	118	move.l %d0, %d1
117	seq.b %d0 /* %d0 = ........................00000000 */	119	and.l #0x80808080, %d1 /* separate MSBs of the 4 phases */
118	lsl.l #2, %d0 /* %d0 = ......................00000000.. */	120	eor.l %d1, %d0 /* clear them in %d0 */
119	bclr.l #23, %d2 /* Z = !(p1 & 0x80); p1 &= ~0x80; */	121	add.l (%a0)+, %d0 /* add 4 pixel values to the phases */
120	seq.b %d0 /* %d0 = ......................0011111111 */	122	move.l %d0, (%a1)+ /* store new phases, advance pointer */
121	lsl.l #2, %d0 /* %d0 = ....................0011111111.. */	123
122	bclr.l #15, %d2 /* Z = !(p2 & 0x80); p2 &= ~0x80; */	124	lsr.l #1, %d1 /* %d1 = .0.......1.......2.......3...... */
123	seq.b %d0 /* %d0 = ....................001122222222 */	125	mulu.l %d7, %d1 /* %d1 = 00112233112233..2233....33...... */
124	lsl.l #2, %d0 /* %d0 = ..................001122222222.. */	126	not.l %d1 /* negate bits */
125	bclr.l #7, %d2 /* Z = !(p3 & 0x80); p3 &= ~0x80; */	127	lsr.l %d6, %d1 /* %d1 = ........................00112233 */
126	seq.b %d0 /* %d0 = ..................00112233333333 */	128	move.w %d1, (%a3) /* write pixel block */
127	lsr.l #6, %d0 /* %d0 = ........................00112233 */
128	move.w %d0, (%a3) /* write pixel block */
129
130	add.l (%a0)+, %d2 /* add 4 pixel values to the phases */
131	move.l %d2, (%a1)+ /* store new phases, advance pointer */
132		129
133	cmp.l %a1, %d3 /* go up to first line bound */	130	cmp.l %a1, %d2 /* go up to first line bound */
134	bhi.s .g_hloop	131	bhi.s .g_hloop
		132
		133	.g_hend:
		134	cmp.l %a1, %a2
		135	bls.w .g_tend
		136	lea.l (-12, %a2), %a2
		137	cmp.l %a1, %a2
		138	bls.s .g_lend
135		139
136	.g_lloop:	140	.g_lloop:
137	movem.l (%a1), %d2-%d5 /* fetch 4 blocks of 4 pixel phases each */	141	movem.l (%a1), %d0-%d3 /* fetch 4 blocks of 4 pixel phases each */
138		142
139	bclr.l #31, %d2 /* calculate first pixel block */	143	move.l %d0, %d4 /* calculate first pixel block */
140	seq.b %d0	144	and.l #0x80808080, %d4
141	lsl.l #2, %d0	145	eor.l %d4, %d0
142	bclr.l #23, %d2	146	lsr.l #1, %d4
143	seq.b %d0	147	mulu.l %d7, %d4
144	lsl.l #2, %d0	148	not.l %d4
145	bclr.l #15, %d2	149	lsr.l %d6, %d4
146	seq.b %d0	150
147	lsl.l #2, %d0	151	move.w %d4, (%a3) /* write first pixel block to LCD */
148	bclr.l #7, %d2	152
149	seq.b %d0	153	move.l %d1, %d5 /* calculate second pixel block */
150	lsr.l #6, %d0	154	and.l #0x80808080, %d5
151		155	eor.l %d5, %d1
152	move.w %d0, (%a3) /* write first block to LCD */	156	lsr.l #1, %d5
153		157	mulu.l %d7, %d5
154	bclr.l #31, %d3 /* calculate second pixel block */	158	not.l %d5
155	seq.b %d6	159	lsr.l %d6, %d5
156	lsl.l #2, %d6	160
157	bclr.l #23, %d3	161	move.l %d2, %d4 /* calculate third pixel block */
158	seq.b %d6	162	and.l #0x80808080, %d4
159	lsl.l #2, %d6	163	eor.l %d4, %d2
160	bclr.l #15, %d3	164	lsr.l #1, %d4
161	seq.b %d6	165	mulu.l %d7, %d4
162	lsl.l #2, %d6	166	not.l %d4
163	bclr.l #7, %d3	167	lsr.l %d6, %d4
164	seq.b %d6	168
165	lsr.l #6, %d6	169	move.w %d5, (%a3) /* write second pixel block to LCD */
166		170
167	bclr.l #31, %d4 /* calculate third pixel block */	171	movem.l (%a0), %d5/%a4-%a6 /* fetch 4 blocks of 4 pixel values each */
168	seq.b %d0
169	lsl.l #2, %d0
170	bclr.l #23, %d4
171	seq.b %d0
172	lsl.l #2, %d0
173	bclr.l #15, %d4
174	seq.b %d0
175	lsl.l #2, %d0
176	bclr.l #7, %d4
177	seq.b %d0
178	lsr.l #6, %d0
179
180	move.w %d6, (%a3) /* write second block to LCD */
181
182	movem.l (%a0), %d6/%a4-%a6 /* fetch 4 blocks of 4 pixel values each */
183	lea.l (16, %a0), %a0	172	lea.l (16, %a0), %a0
184		173
185	move.w %d0, (%a3) /* write third block to LCD */	174	move.w %d4, (%a3) /* write third pixel block to LCD */
186		175
187	bclr.l #31, %d5 /* calculate fourth pixel block */	176	move.l %d3, %d4 /* calculate fourth pixel block */
188	seq.b %d0	177	and.l #0x80808080, %d4
189	lsl.l #2, %d0	178	eor.l %d4, %d3
190	bclr.l #23, %d5	179	lsr.l #1, %d4
191	seq.b %d0	180	mulu.l %d7, %d4
192	lsl.l #2, %d0	181	not.l %d4
193	bclr.l #15, %d5	182	lsr.l %d6, %d4
194	seq.b %d0	183
195	lsl.l #2, %d0	184	add.l %d5, %d0 /* calculate 44 new pixel phases /
196	bclr.l #7, %d5	185	add.l %a4, %d1 /* (packed addition) */
197	seq.b %d0	186	add.l %a5, %d2
198	lsr.l #6, %d0	187	add.l %a6, %d3
199		188
200	add.l %d6, %d2 /* calculate 44 new pixel phases /	189	movem.l %d0-%d3, (%a1) /* store 44 new pixel phases /
201	add.l %a4, %d3 /* (packed addition) */
202	add.l %a5, %d4
203	add.l %a6, %d5
204
205	movem.l %d2-%d5, (%a1) /* store 44 new pixel phases /
206	lea.l (16, %a1), %a1	190	lea.l (16, %a1), %a1
207		191
208	move.w %d0, (%a3) /* write fourth block to LCD */	192	move.w %d4, (%a3) /* write fourth pixel block to LCD */
209
210	cmp.l %a1, %d1 /* go up to last line bound */
211	bhi.w .g_lloop
212		193
		194	cmp.l %a1, %a2 /* go up to last line bound */
		195	bhi.s .g_lloop
		196
		197	.g_lend:
		198	lea.l (12, %a2), %a2
213	cmp.l %a1, %a2	199	cmp.l %a1, %a2
214	bls.s .g_no_tail	200	bls.s .g_tend
215		201
216	.g_tloop:	202	.g_tloop:
217	move.l (%a1), %d2	203	move.l (%a1), %d0 /* fetch 4 pixel phases */
218
219	bclr.l #31, %d2
220	seq.b %d0
221	lsl.l #2, %d0
222	bclr.l #23, %d2
223	seq.b %d0
224	lsl.l #2, %d0
225	bclr.l #15, %d2
226	seq.b %d0
227	lsl.l #2, %d0
228	bclr.l #7, %d2
229	seq.b %d0
230	lsr.l #6, %d0
231	move.w %d0, (%a3)
232
233	add.l (%a0)+, %d2 /* go up to end address */
234	move.l %d2, (%a1)+
235		204
236	cmp.l %a1, %a2	205	move.l %d0, %d1
		206	and.l #0x80808080, %d1
		207	eor.l %d1, %d0
		208	add.l (%a0)+, %d0 /* add 4 pixel values to the phases */
		209	move.l %d0, (%a1)+ /* store new phases, advance pointer */
		210
		211	lsr.l #1, %d1
		212	mulu.l %d7, %d1
		213	not.l %d1
		214	lsr.l %d6, %d1
		215	move.w %d1, (%a3) /* write pixel block */
		216
		217	cmp.l %a1, %a2 /* go up to end address */
237	bhi.s .g_tloop	218	bhi.s .g_tloop
238		219
239	.g_no_tail:	220	.g_tend:
240	movem.l (%sp), %d2-%d6/%a2-%a6 /* restore registers */	221	movem.l (%sp), %d2-%d7/%a2-%a6 /* restore registers */
241	lea.l (10*4, %sp), %sp	222	lea.l (11*4, %sp), %sp
242	rts	223	rts
243		224
244	.gd_end:	225	.gd_end:


diff --git a/firmware/target/coldfire/iriver/h100/lcd-as-h100.S b/firmware/target/coldfire/iriver/h100/lcd-as-h100.S index 9ebb5752aa..b13d5146b9 100644 --- a/firmware/target/coldfire/iriver/h100/lcd-as-h100.S +++ b/firmware/target/coldfire/iriver/h100/lcd-as-h100.S
@@ -106,153 +106,134 @@ lcd_write_data:
106	* will occur. */	106	* will occur. */
107		107
108	lcd_grey_data:	108	lcd_grey_data:
109	lea.l (-10*4, %sp), %sp	109	lea.l (-11*4, %sp), %sp
110	movem.l %d2-%d6/%a2-%a6, (%sp) /* free some registers */	110	movem.l %d2-%d7/%a2-%a6, (%sp) /* free some registers */
111	movem.l (104+4, %sp), %a0-%a2 / values, phases, length */	111	movem.l (114+4, %sp), %a0-%a2 / values, phases, length */
112	lea.l (%a1, %a2.l4), %a2 / end address */	112	lea.l (%a1, %a2.l4), %a2 / end address */
113	moveq #8, %d1	113	moveq #8, %d1
114	or.l %d1, (MBAR2+0xb4) /* A0 = 1 (data) */	114	or.l %d1, (MBAR2+0xb4) /* A0 = 1 (data) */
115	lea 0xf0000000, %a3 /* LCD data port */	115	lea 0xf0000000, %a3 /* LCD data port */
116		116	moveq.l #24, %d6 /* shift count */
117	moveq.l #15, %d3	117	move.l #0xc30c3, %d7 /* bit shuffle factor */
118	add.l %a1, %d3	118
119	and.l #0xfffffff0, %d3 /* first line bound */	119	moveq.l #12, %d2
120	move.l %a2, %d1	120	add.l %a1, %d2
121	and.l #0xfffffff0, %d1 /* last line bound */	121	and.l #0xfffffff0, %d2 /* first line bound */
122	cmp.l %d3, %d1	122	cmp.l %d2, %a2 /* end address lower than first line bound? */
123	bls.w .g_tloop /* no lines to copy - jump to tail loop */	123	bhs.s 1f
124	cmp.l %a1, %d0	124	move.l %a2, %d2 /* -> adjust end address of head loop */
125	bls.s .g_lloop /* no head blocks - jump to line loop */	125	1:
		126	cmp.l %a1, %d2
		127	bls.s .g_hend
126		128
127	.g_hloop:	129	.g_hloop:
128	move.l (%a1), %d2 /* fetch 4 pixel phases */	130	move.l (%a1), %d0 /* fetch 4 pixel phases */
129		131
130	bclr.l #31, %d2 /* Z = !(p0 & 0x80); p0 &= ~0x80; */	132	move.l %d0, %d1
131	seq.b %d0 /* %d0 = ........................00000000 */	133	and.l #0x80808080, %d1 /* separate MSBs of the 4 phases */
132	lsl.l #2, %d0 /* %d0 = ......................00000000.. */	134	eor.l %d1, %d0 /* clear them in %d0 */
133	bclr.l #23, %d2 /* Z = !(p1 & 0x80); p1 &= ~0x80; */	135	add.l (%a0)+, %d0 /* add 4 pixel values to the phases */
134	seq.b %d0 /* %d0 = ......................0011111111 */	136	move.l %d0, (%a1)+ /* store new phases, advance pointer */
135	lsl.l #2, %d0 /* %d0 = ....................0011111111.. */
136	bclr.l #15, %d2 /* Z = !(p2 & 0x80); p2 &= ~0x80; */
137	seq.b %d0 /* %d0 = ....................001122222222 */
138	lsl.l #2, %d0 /* %d0 = ..................001122222222.. */
139	bclr.l #7, %d2 /* Z = !(p3 & 0x80); p3 &= ~0x80; */
140	seq.b %d0 /* %d0 = ..................00112233333333 */
141	lsr.l #6, %d0 /* %d0 = ........................00112233 */
142	move.w %d0, (%a3) /* write pixel block */
143
144	add.l (%a0)+, %d2 /* add 4 pixel values to the phases */
145	move.l %d2, (%a1)+ /* store new phases, advance pointer */
146		137
147	cmp.l %a1, %d3 /* go up to first line bound */	138	lsr.l #1, %d1 /* %d1 = .0.......1.......2.......3...... */
		139	mulu.l %d7, %d1 /* %d1 = 00112233112233..2233....33...... */
		140	not.l %d1 /* negate bits */
		141	lsr.l %d6, %d1 /* %d1 = ........................00112233 */
		142	move.w %d1, (%a3) /* write pixel block */
		143
		144	cmp.l %a1, %d2 /* go up to first line bound */
148	bhi.s .g_hloop	145	bhi.s .g_hloop
		146
		147	.g_hend:
		148	cmp.l %a1, %a2
		149	bls.w .g_tend
		150	lea.l (-12, %a2), %a2
		151	cmp.l %a1, %a2
		152	bls.s .g_lend
149		153
150	.g_lloop:	154	.g_lloop:
151	movem.l (%a1), %d2-%d5 /* fetch 4 blocks of 4 pixel phases each */	155	movem.l (%a1), %d0-%d3 /* fetch 4 blocks of 4 pixel phases each */
152		156
153	bclr.l #31, %d2 /* calculate first pixel block */	157	move.l %d0, %d4 /* calculate first pixel block */
154	seq.b %d0	158	and.l #0x80808080, %d4
155	lsl.l #2, %d0	159	eor.l %d4, %d0
156	bclr.l #23, %d2	160	lsr.l #1, %d4
157	seq.b %d0	161	mulu.l %d7, %d4
158	lsl.l #2, %d0	162	not.l %d4
159	bclr.l #15, %d2	163	lsr.l %d6, %d4
160	seq.b %d0	164
161	lsl.l #2, %d0	165	move.w %d4, (%a3) /* write first pixel block to LCD */
162	bclr.l #7, %d2	166
163	seq.b %d0	167	move.l %d1, %d5 /* calculate second pixel block */
164	lsr.l #6, %d0	168	and.l #0x80808080, %d5
165		169	eor.l %d5, %d1
166	move.w %d0, (%a3) /* write first block to LCD */	170	lsr.l #1, %d5
167		171	mulu.l %d7, %d5
168	bclr.l #31, %d3 /* calculate second pixel block */	172	not.l %d5
169	seq.b %d6	173	lsr.l %d6, %d5
170	lsl.l #2, %d6	174
171	bclr.l #23, %d3	175	move.l %d2, %d4 /* calculate third pixel block */
172	seq.b %d6	176	and.l #0x80808080, %d4
173	lsl.l #2, %d6	177	eor.l %d4, %d2
174	bclr.l #15, %d3	178	lsr.l #1, %d4
175	seq.b %d6	179	mulu.l %d7, %d4
176	lsl.l #2, %d6	180	not.l %d4
177	bclr.l #7, %d3	181	lsr.l %d6, %d4
178	seq.b %d6	182
179	lsr.l #6, %d6	183	move.w %d5, (%a3) /* write second pixel block to LCD */
180		184
181	bclr.l #31, %d4 /* calculate third pixel block */	185	movem.l (%a0), %d5/%a4-%a6 /* fetch 4 blocks of 4 pixel values each */
182	seq.b %d0
183	lsl.l #2, %d0
184	bclr.l #23, %d4
185	seq.b %d0
186	lsl.l #2, %d0
187	bclr.l #15, %d4
188	seq.b %d0
189	lsl.l #2, %d0
190	bclr.l #7, %d4
191	seq.b %d0
192	lsr.l #6, %d0
193
194	move.w %d6, (%a3) /* write second block to LCD */
195
196	movem.l (%a0), %d6/%a4-%a6 /* fetch 4 blocks of 4 pixel values each */
197	lea.l (16, %a0), %a0	186	lea.l (16, %a0), %a0
198		187
199	move.w %d0, (%a3) /* write third block to LCD */	188	move.w %d4, (%a3) /* write third pixel block to LCD */
200		189
201	bclr.l #31, %d5 /* calculate fourth pixel block */	190	move.l %d3, %d4 /* calculate fourth pixel block */
202	seq.b %d0	191	and.l #0x80808080, %d4
203	lsl.l #2, %d0	192	eor.l %d4, %d3
204	bclr.l #23, %d5	193	lsr.l #1, %d4
205	seq.b %d0	194	mulu.l %d7, %d4
206	lsl.l #2, %d0	195	not.l %d4
207	bclr.l #15, %d5	196	lsr.l %d6, %d4
208	seq.b %d0	197
209	lsl.l #2, %d0	198	add.l %d5, %d0 /* calculate 44 new pixel phases /
210	bclr.l #7, %d5	199	add.l %a4, %d1 /* (packed addition) */
211	seq.b %d0	200	add.l %a5, %d2
212	lsr.l #6, %d0	201	add.l %a6, %d3
213		202
214	add.l %d6, %d2 /* calculate 44 new pixel phases /	203	movem.l %d0-%d3, (%a1) /* store 44 new pixel phases /
215	add.l %a4, %d3 /* (packed addition) */
216	add.l %a5, %d4
217	add.l %a6, %d5
218
219	movem.l %d2-%d5, (%a1) /* store 44 new pixel phases /
220	lea.l (16, %a1), %a1	204	lea.l (16, %a1), %a1
221		205
222	move.w %d0, (%a3) /* write fourth block to LCD */	206	move.w %d4, (%a3) /* write fourth pixel block to LCD */
223		207
224	cmp.l %a1, %d1 /* go up to last line bound */	208	cmp.l %a1, %a2 /* go up to last line bound */
225	bhi.w .g_lloop	209	bhi.s .g_lloop
226		210
		211	.g_lend:
		212	lea.l (12, %a2), %a2
227	cmp.l %a1, %a2	213	cmp.l %a1, %a2
228	bls.s .g_no_tail	214	bls.s .g_tend
229		215
230	.g_tloop:	216	.g_tloop:
231	move.l (%a1), %d2	217	move.l (%a1), %d0 /* fetch 4 pixel phases */
232		218
233	bclr.l #31, %d2	219	move.l %d0, %d1
234	seq.b %d0	220	and.l #0x80808080, %d1
235	lsl.l #2, %d0	221	eor.l %d1, %d0
236	bclr.l #23, %d2	222	add.l (%a0)+, %d0 /* add 4 pixel values to the phases */
237	seq.b %d0	223	move.l %d0, (%a1)+ /* store new phases, advance pointer */
238	lsl.l #2, %d0	224
239	bclr.l #15, %d2	225	lsr.l #1, %d1
240	seq.b %d0	226	mulu.l %d7, %d1
241	lsl.l #2, %d0	227	not.l %d1
242	bclr.l #7, %d2	228	lsr.l %d6, %d1
243	seq.b %d0	229	move.w %d1, (%a3) /* write pixel block */
244	lsr.l #6, %d0
245	move.w %d0, (%a3)
246
247	add.l (%a0)+, %d2
248	move.l %d2, (%a1)+
249		230
250	cmp.l %a1, %a2 /* go up to end address */	231	cmp.l %a1, %a2 /* go up to end address */
251	bhi.s .g_tloop	232	bhi.s .g_tloop
252		233
253	.g_no_tail:	234	.g_tend:
254	movem.l (%sp), %d2-%d6/%a2-%a6 /* restore registers */	235	movem.l (%sp), %d2-%d7/%a2-%a6 /* restore registers */
255	lea.l (10*4, %sp), %sp	236	lea.l (11*4, %sp), %sp
256	rts	237	rts
257		238
258	.gd_end:	239	.gd_end: