From f053b0d6066291bf865a622bd4969cd782a2deee Mon Sep 17 00:00:00 2001 From: Jens Arnold Date: Fri, 11 Jun 2010 19:53:17 +0000 Subject: Port greylib blitting optimisation to MPIO HD200. ISR speedup is ~10%; further speedup should be possible by using line transfers for accessing the greylib buffers. Thanks to Marcin Bukat for testing. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@26793 a1c6a512-1295-4272-9138-f99709370657 --- firmware/target/coldfire/mpio/hd200/lcd-as-hd200.S | 71 ++++++++++------------ 1 file changed, 31 insertions(+), 40 deletions(-) diff --git a/firmware/target/coldfire/mpio/hd200/lcd-as-hd200.S b/firmware/target/coldfire/mpio/hd200/lcd-as-hd200.S index 7ea0b8f921..9709e210e2 100644 --- a/firmware/target/coldfire/mpio/hd200/lcd-as-hd200.S +++ b/firmware/target/coldfire/mpio/hd200/lcd-as-hd200.S @@ -125,54 +125,45 @@ lcd_mono_data: .type lcd_grey_data,@function lcd_grey_data: - lea.l (-2*4, %sp), %sp - movem.l %a2-%a3, (%sp) - movem.l (2*4+4, %sp), %a0-%a2 /* values, phases, length */ + lea.l (-6*4, %sp), %sp + movem.l %d2-%d5/%a2-%a3, (%sp) + movem.l (6*4+4, %sp), %a0-%a2 /* values, phases, length */ add.l %a2, %a2 lea.l (%a1, %a2.l*4), %a2 /* end address */ lea.l LCD_BASE_ADDRESS+2, %a3 /* LCD data port address */ + moveq.l #24, %d4 /* shift count */ + move.l #0x204081, %d5 /* bit shuffle factor */ + .ph_loop: - clr.l %d1 - move.l (%a1), %d0 /* fetch 4 pixel phases */ - bclr.l #31, %d0 /* Z = !(p0 & 0x80); p0 &= ~0x80; */ - seq.b %d1 /* %d1 = ........................00000000 */ - lsl.l #1, %d1 /* %d1 = .......................00000000. */ - bclr.l #23, %d0 /* Z = !(p1 & 0x80); p1 &= ~0x80; */ - seq.b %d1 /* %d1 = .......................011111111 */ - lsl.l #1, %d1 /* %d1 = ......................011111111. */ - bclr.l #15, %d0 /* Z = !(p2 & 0x80); p2 &= ~0x80; */ - seq.b %d1 /* %d1 = ......................0122222222 */ - lsl.l #1, %d1 /* %d1 = .....................0122222222. */ - bclr.l #7, %d0 /* Z = !(p3 & 0x80); p3 &= ~0x80; */ - seq.b %d1 /* %d1 = .....................01233333333 */ - lsl.l #1, %d1 /* %d1 = ....................01233333333. */ - add.l (%a0)+, %d0 /* add 4 pixel values to the phases */ - move.l %d0, (%a1)+ /* store new phases, advance pointer */ - - move.l (%a1), %d0 /* fetch 4 pixel phases */ - bclr.l #31, %d0 /* Z = !(p0 & 0x80); p0 &= ~0x80; */ - seq.b %d1 /* %d1 = ....................012344444444 */ - lsl.l #1, %d1 /* %d1 = ...................012344444444. */ - bclr.l #23, %d0 /* Z = !(p1 & 0x80); p1 &= ~0x80; */ - seq.b %d1 /* %d1 = ...................0123455555555 */ - lsl.l #1, %d1 /* %d1 = ..................0123455555555. */ - bclr.l #15, %d0 /* Z = !(p2 & 0x80); p2 &= ~0x80; */ - seq.b %d1 /* %d1 = ..................01234566666666 */ - lsl.l #1, %d1 /* %d1 = .................01234566666666. */ - bclr.l #7, %d0 /* Z = !(p3 & 0x80); p3 &= ~0x80; */ - seq.b %d1 /* %d1 = .................012345677777777 */ - lsr.l #7, %d1 /* %d1 = ........................01234567 */ - add.l (%a0)+, %d0 /* add 4 pixel values to the phases */ - move.l %d0, (%a1)+ /* store new phases, advance pointer */ - - move.w %d1, (%a3) /* transfer to lcd */ - move.w %d1, (%a3) /* transfer to lcd */ + movem.l (%a1), %d0-%d1 /* fetch 8 pixel phases */ + + move.l %d0, %d2 + and.l #0x80808080, %d2 /* %d2 = 0.......1.......2.......3....... */ + eor.l %d2, %d0 + add.l (%a0)+, %d0 /* add values to first 4 phases */ + + move.l %d1, %d3 + and.l #0x80808080, %d3 /* %d3 = 4.......5.......6.......7....... */ + eor.l %d3, %d1 + add.l (%a0)+, %d1 /* add values to second 4 phases */ + + lsr.l #4, %d3 /* %d3 = ....4.......5.......6.......7... */ + or.l %d3, %d2 /* %d2 = 0...4...1...5...2...6...3...7... */ + mulu.l %d5, %d2 /* %d2 = 01234567123.567.23..67..3...7... */ + not.l %d2 /* negate bits */ + lsr.l %d4, %d2 /* %d2 = ........................01234567 */ + + move.w %d2, (%a3) /* transfer first LCD byte */ + + movem.l %d0-%d1, (%a1) /* store 8 new pixel phases */ + addq.l #8, %a1 + move.w %d2, (%a3) /* transfer second LCD byte */ cmp.l %a2, %a1 bls.s .ph_loop - movem.l (%sp), %a2-%a3 - lea.l (2*4, %sp), %sp + movem.l (%sp), %d2-%d5/%a2-%a3 + lea.l (6*4, %sp), %sp rts .grey_end: .size lcd_grey_data,.grey_end-lcd_grey_data -- cgit v1.2.3