From 6a56c14e17f6ba113ec0d4d40e75bffd61b293cc Mon Sep 17 00:00:00 2001 From: Jens Arnold Date: Wed, 9 Jan 2008 23:48:26 +0000 Subject: Greyscale library: Changed the internal data format once more (separated pixel values and phases), allowing for further optimisation of drawing, scrolling etc. * Optimised grey phase blitting in the core reduces CPU load on all architectures, most significantly on coldfire. Previous version was too slow to keep up at 45MHz, leading to unwanted graininess (update frequency was halved). Also fixed screendump on 2bpp targets with vertical pixel packing. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@16043 a1c6a512-1295-4272-9138-f99709370657 --- firmware/target/arm/ipod/lcd-gray.c | 134 ++++++--------- firmware/target/coldfire/iaudio/m5/lcd-as-m5.S | 189 +++++++++++++++------ firmware/target/coldfire/iaudio/m5/lcd-m5.c | 12 +- firmware/target/coldfire/iriver/h100/lcd-as-h100.S | 189 +++++++++++++++------ firmware/target/coldfire/iriver/h100/lcd-h100.c | 12 +- firmware/target/sh/archos/lcd-archos-bitmap.c | 12 +- firmware/target/sh/archos/lcd-as-archos-bitmap.S | 125 +++++++------- 7 files changed, 412 insertions(+), 261 deletions(-) (limited to 'firmware/target') diff --git a/firmware/target/arm/ipod/lcd-gray.c b/firmware/target/arm/ipod/lcd-gray.c index c7f4074c0b..5734480bee 100644 --- a/firmware/target/arm/ipod/lcd-gray.c +++ b/firmware/target/arm/ipod/lcd-gray.c @@ -301,118 +301,88 @@ void lcd_blit(const unsigned char* data, int bx, int y, int bwidth, /* Performance function that works with an external buffer note that bx and bwidth are in 8-pixel units! */ -void lcd_grey_phase_blit(const struct grey_data *data, int bx, int y, - int bwidth, int height, int stride) +void lcd_grey_phase_blit(unsigned char *values, unsigned char *phases, + int bx, int y, int bwidth, int height, int stride) { - const struct grey_data *addr; - int width; - + unsigned char *val, *ph; + int bw; + while (height--) { lcd_cmd_and_data(R_RAM_ADDR_SET, (y++ << 5) + addr_offset - bx); lcd_prepare_cmd(R_RAM_DATA); - addr = data; - width = bwidth; + val = values; + ph = phases; + bw = bwidth; asm volatile ( "10: \n" - "ldmia %[addr]!, {r0-r3} \n" /* r0 = v1p1v0p0 ... */ + "ldmia %[ph], {r0-r1} \n" /* Fetch 8 pixel phases */ + "ldmia %[val]!, {r2-r3} \n" /* Fetch 8 pixel values */ #ifdef IPOD_MINI2G - "mov r5, #0x7600 \n" + "mov r4, #0x7600 \n" #else - "mov r5, #0 \n" + "mov r4, #0 \n" #endif - - "and r4, r0, %[mask] \n" /* r4 = --p1--p0 */ - "and r0, %[mask], r0, lsr #8 \n" /* r0 = --v1--v0 */ - - "tst r4, #0x80 \n" - "orreq r5, r5, #0xc0 \n" - "tst r4, #0x800000 \n" - "orreq r5, r5, #0x30 \n" - "bic r4, r4, %[clbt] \n" - - "add r4, r0, r4 \n" /* p0 += v0; p1 += v1; */ - "strb r4, [%[addr], #-16] \n" - "mov r4, r4, lsr #16 \n" - "strb r4, [%[addr], #-14] \n" - - "and r4, r1, %[mask] \n" - "and r1, %[mask], r1, lsr #8 \n" - - "tst r4, #0x80 \n" - "orreq r5, r5, #0x0c \n" - "tst r4, #0x800000 \n" - "orreq r5, r5, #0x03 \n" - "bic r4, r4, %[clbt] \n" - - "add r4, r1, r4 \n" - "strb r4, [%[addr], #-12] \n" - "mov r4, r4, lsr #16 \n" - "strb r4, [%[addr], #-10] \n" + "tst r0, #0x80 \n" + "orreq r4, r4, #0xc0 \n" + "tst r0, #0x8000 \n" + "orreq r4, r4, #0x30 \n" + "tst r0, #0x800000 \n" + "orreq r4, r4, #0x0c \n" + "tst r0, #0x80000000 \n" + "orreq r4, r4, #0x03 \n" + "bic r0, r0, %[clbt] \n" + "add r0, r0, r2 \n" #ifdef IPOD_MINI2G - "mov r5, r5, lsl #8 \n" + "mov r4, r4, lsl #8 \n" #else "1: \n" - "ldr r4, [%[lcdb]] \n" - "tst r4, #0x8000 \n" + "ldr r2, [%[lcdb]] \n" + "tst r2, #0x8000 \n" "bne 1b \n" - "str r5, [%[lcdb], #0x10] \n" - "mov r5, #0 \n" + "str r4, [%[lcdb], #0x10] \n" + "mov r4, #0 \n" #endif - "and r4, r2, %[mask] \n" - "and r2, %[mask], r2, lsr #8 \n" - - "tst r4, #0x80 \n" - "orreq r5, r5, #0xc0 \n" - "tst r4, #0x800000 \n" - "orreq r5, r5, #0x30 \n" - "bic r4, r4, %[clbt] \n" - - "add r4, r2, r4 \n" - "strb r4, [%[addr], #-8] \n" - "mov r4, r4, lsr #16 \n" - "strb r4, [%[addr], #-6] \n" - - "and r4, r3, %[mask] \n" - "and r3, %[mask], r3, lsr #8 \n" - - "tst r4, #0x80 \n" - "orreq r5, r5, #0x0c \n" - "tst r4, #0x800000 \n" - "orreq r5, r5, #0x03 \n" - "bic r4, r4, %[clbt] \n" - - "add r4, r3, r4 \n" - "strb r4, [%[addr], #-4] \n" - "mov r4, r4, lsr #16 \n" - "strb r4, [%[addr], #-2] \n" - + "tst r1, #0x80 \n" + "orreq r4, r4, #0xc0 \n" + "tst r1, #0x8000 \n" + "orreq r4, r4, #0x30 \n" + "tst r1, #0x800000 \n" + "orreq r4, r4, #0x0c \n" + "tst r1, #0x80000000 \n" + "orreq r4, r4, #0x03 \n" + "bic r1, r1, %[clbt] \n" + "add r1, r1, r3 \n" + + "stmia %[ph]!, {r0-r1} \n" + "1: \n" - "ldr r4, [%[lcdb]] \n" - "tst r4, #0x8000 \n" + "ldr r2, [%[lcdb]] \n" + "tst r2, #0x8000 \n" "bne 1b \n" #ifdef IPOD_MINI2G - "str r5, [%[lcdb], #0x08] \n" + "str r4, [%[lcdb], #0x08] \n" #else - "str r5, [%[lcdb], #0x10] \n" + "str r4, [%[lcdb], #0x10] \n" #endif - "subs %[wdth], %[wdth], #1 \n" + "subs %[bw], %[bw], #1 \n" "bne 10b \n" : /* outputs */ - [addr]"+r"(addr), - [wdth]"+r"(width) + [val]"+r"(val), + [ph] "+r"(ph), + [bw] "+r"(bw) : /* inputs */ - [mask]"r"(0x00ff00ff), - [clbt]"r"(0x00800080), + [clbt]"r"(0x80808080), [lcdb]"r"(LCD1_BASE) : /* clobbers */ - "r0", "r1", "r2", "r3", "r4", "r5" + "r0", "r1", "r2", "r3", "r4" ); - data += stride; + values += stride; + phases += stride; } } diff --git a/firmware/target/coldfire/iaudio/m5/lcd-as-m5.S b/firmware/target/coldfire/iaudio/m5/lcd-as-m5.S index 7e89815ec8..0ec98e4589 100644 --- a/firmware/target/coldfire/iaudio/m5/lcd-as-m5.S +++ b/firmware/target/coldfire/iaudio/m5/lcd-as-m5.S @@ -88,57 +88,146 @@ lcd_write_data: .type lcd_grey_data,@function lcd_grey_data: - lea.l (-4*4, %sp), %sp - movem.l %d2-%d5, (%sp) - movem.l (4*4+4, %sp), %a0-%a1 /* Data pointer */ - move.l %a1, %d0 /* Length */ - lea 0xf0008002, %a1 /* LCD data port */ - move.l #0xff00ff00, %d2 /* mask for splitting value/phase pairs */ - -.greyloop: - movem.l (%a0), %d4-%d5 /* fetch 4 pixel phase/value pairs at once */ - /* %d4 = p0v0p1v1, %d5 = p2v2p3v3 */ - move.l %d2, %d3 /* copy mask */ - and.l %d4, %d3 /* %d3 = p0--p1-- */ - eor.l %d3, %d4 /* %d4 = --v0--v1 */ - lsr.l #8, %d3 /* %d3 = --p0--p1 */ - - bclr.l #23, %d3 /* Z = !(p0 & 0x80); p0 &= ~0x80; */ - seq.b %d1 /* %d1 = ........................00000000 */ - lsl.l #2, %d1 /* %d1 = ......................00000000.. */ - bclr.l #7, %d3 /* Z = !(p1 & 0x80); p1 &= ~0x80; */ - seq.b %d1 /* %d1 = ......................0011111111 */ - lsl.l #2, %d1 /* %d1 = ....................0011111111.. */ + lea.l (-9*4, %sp), %sp + movem.l %d2-%d5/%a2-%a6, (%sp) /* free some registers */ + movem.l (9*4+4, %sp), %a0-%a2 /* values, phases, length */ + lea.l (%a1, %a2.l*4), %a2 /* end address */ + lea 0xf0008002, %a3 /* LCD data port */ + + moveq.l #15, %d3 + add.l %a1, %d3 + and.l #0xfffffff0, %d3 /* first line bound */ + move.l %a2, %d1 + and.l #0xfffffff0, %d1 /* last line bound */ + cmp.l %d3, %d1 + bls.w .g_tloop /* no lines to copy - jump to tail loop */ + cmp.l %a1, %d0 + bls.s .g_lloop /* no head blocks - jump to line loop */ + +.g_hloop: + move.l (%a1), %d2 /* fetch 4 pixel phases */ + + bclr.l #31, %d2 /* Z = !(p0 & 0x80); p0 &= ~0x80; */ + seq.b %d0 /* %d0 = ........................00000000 */ + lsl.l #2, %d0 /* %d0 = ......................00000000.. */ + bclr.l #23, %d2 /* Z = !(p1 & 0x80); p1 &= ~0x80; */ + seq.b %d0 /* %d0 = ......................0011111111 */ + lsl.l #2, %d0 /* %d0 = ....................0011111111.. */ + bclr.l #15, %d2 /* Z = !(p2 & 0x80); p2 &= ~0x80; */ + seq.b %d0 /* %d0 = ....................001122222222 */ + lsl.l #2, %d0 /* %d0 = ..................001122222222.. */ + bclr.l #7, %d2 /* Z = !(p3 & 0x80); p3 &= ~0x80; */ + seq.b %d0 /* %d0 = ..................00112233333333 */ + lsr.l #6, %d0 /* %d0 = ........................00112233 */ + move.w %d0, (%a3) /* write pixel block */ + + add.l (%a0)+, %d2 /* add 4 pixel values to the phases */ + move.l %d2, (%a1)+ /* store new phases, advance pointer */ + + cmp.l %a1, %d3 /* go up to first line bound */ + bhi.s .g_hloop + +.g_lloop: + movem.l (%a1), %d2-%d5 + + bclr.l #31, %d2 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #23, %d2 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #15, %d2 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #7, %d2 + seq.b %d0 + lsr.l #6, %d0 + move.w %d0, (%a3) + + bclr.l #31, %d3 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #23, %d3 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #15, %d3 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #7, %d3 + seq.b %d0 + lsr.l #6, %d0 + move.w %d0, (%a3) - add.l %d4, %d3 /* p0 += v0; p1 += v1; */ - move.b %d3, (2, %a0) /* store p1 */ - swap %d3 - move.b %d3, (%a0) /* store p0 */ - - move.l %d2, %d3 /* copy mask */ - and.l %d5, %d3 /* %d3 = p2--p3-- */ - eor.l %d3, %d5 /* %d5 = --v2--v3 */ - lsr.l #8, %d3 /* %d3 = --p2--p3 */ - - bclr.l #23, %d3 /* Z = !(p2 & 0x80); p2 &= ~0x80; */ - seq.b %d1 /* %d1 = ....................001122222222 */ - lsl.l #2, %d1 /* %d1 = ..................001122222222.. */ - bclr.l #7, %d3 /* Z = !(p3 & 0x80); p3 &= ~0x80; */ - seq.b %d1 /* %d1 = ..................00112233333333 */ - lsr.l #6, %d1 /* %d1 = ........................00112233 */ - - add.l %d5, %d3 /* p2 += v2; p3 += v3; */ - move.b %d3, (6, %a0) /* store p3 */ - swap %d3 - move.b %d3, (4, %a0) /* store p2 */ - - move.w %d1, (%a1) /* write pixel block */ - addq.l #8, %a0 /* advance address pointer */ - subq.l #1, %d0 /* any blocks left? */ - bne.b .greyloop - - movem.l (%sp), %d2-%d5 - lea.l (4*4, %sp), %sp + bclr.l #31, %d4 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #23, %d4 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #15, %d4 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #7, %d4 + seq.b %d0 + lsr.l #6, %d0 + move.w %d0, (%a3) + + bclr.l #31, %d5 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #23, %d5 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #15, %d5 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #7, %d5 + seq.b %d0 + lsr.l #6, %d0 + move.w %d0, (%a3) + + movem.l (%a0), %d0/%a4-%a6 + lea.l (16, %a0), %a0 + add.l %d0, %d2 + add.l %a4, %d3 + add.l %a5, %d4 + add.l %a6, %d5 + movem.l %d2-%d5, (%a1) + lea.l (16, %a1), %a1 + + cmp.l %a1, %d1 /* go up to last line bound */ + bhi.w .g_lloop + + cmp.l %a1, %a2 + bls.s .g_no_tail + +.g_tloop: + move.l (%a1), %d2 + + bclr.l #31, %d2 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #23, %d2 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #15, %d2 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #7, %d2 + seq.b %d0 + lsr.l #6, %d0 + move.w %d0, (%a3) + + add.l (%a0)+, %d2 /* go up to end address */ + move.l %d2, (%a1)+ + + cmp.l %a1, %a2 + bhi.s .g_tloop + +.g_no_tail: + movem.l (%sp), %d2-%d5/%a2-%a6 /* restore registers */ + lea.l (9*4, %sp), %sp rts + .gd_end: .size lcd_grey_data,.gd_end-lcd_grey_data diff --git a/firmware/target/coldfire/iaudio/m5/lcd-m5.c b/firmware/target/coldfire/iaudio/m5/lcd-m5.c index 4f963795c7..be9d5a39b8 100644 --- a/firmware/target/coldfire/iaudio/m5/lcd-m5.c +++ b/firmware/target/coldfire/iaudio/m5/lcd-m5.c @@ -171,10 +171,13 @@ void lcd_blit(const unsigned char* data, int x, int by, int width, } } +/* Helper function for lcd_grey_phase_blit(). */ +void lcd_grey_data(unsigned char *values, unsigned char *phases, int count); + /* Performance function that works with an external buffer note that by and bheight are in 4-pixel units! */ -void lcd_grey_phase_blit(const struct grey_data *data, int x, int by, - int width, int bheight, int stride) +void lcd_grey_phase_blit(unsigned char *values, unsigned char *phases, + int x, int by, int width, int bheight, int stride) { stride <<= 2; /* 4 pixels per block */ while (bheight--) @@ -182,8 +185,9 @@ void lcd_grey_phase_blit(const struct grey_data *data, int x, int by, lcd_write_command_ex(LCD_CNTL_PAGE, by++, -1); lcd_write_command_ex(LCD_CNTL_COLUMN, x, -1); lcd_write_command(LCD_CNTL_DATA_WRITE); - lcd_grey_data(data, width); - data += stride; + lcd_grey_data(values, phases, width); + values += stride; + phases += stride; } } diff --git a/firmware/target/coldfire/iriver/h100/lcd-as-h100.S b/firmware/target/coldfire/iriver/h100/lcd-as-h100.S index df410fa379..776e22a6c4 100644 --- a/firmware/target/coldfire/iriver/h100/lcd-as-h100.S +++ b/firmware/target/coldfire/iriver/h100/lcd-as-h100.S @@ -100,59 +100,148 @@ lcd_write_data: .type lcd_grey_data,@function lcd_grey_data: - lea.l (-4*4, %sp), %sp - movem.l %d2-%d5, (%sp) - movem.l (4*4+4, %sp), %a0-%a1 /* Data pointer */ - move.l %a1, %d0 /* Length */ + lea.l (-9*4, %sp), %sp + movem.l %d2-%d5/%a2-%a6, (%sp) /* free some registers */ + movem.l (9*4+4, %sp), %a0-%a2 /* values, phases, length */ + lea.l (%a1, %a2.l*4), %a2 /* end address */ moveq #8, %d1 or.l %d1, (MBAR2+0xb4) /* A0 = 1 (data) */ - lea 0xf0000000, %a1 /* LCD data port */ - move.l #0xff00ff00, %d2 /* mask for splitting value/phase pairs */ - -.greyloop: - movem.l (%a0), %d4-%d5 /* fetch 4 pixel phase/value pairs at once */ - /* %d4 = p0v0p1v1, %d5 = p2v2p3v3 */ - move.l %d2, %d3 /* copy mask */ - and.l %d4, %d3 /* %d3 = p0--p1-- */ - eor.l %d3, %d4 /* %d4 = --v0--v1 */ - lsr.l #8, %d3 /* %d3 = --p0--p1 */ - - bclr.l #23, %d3 /* Z = !(p0 & 0x80); p0 &= ~0x80; */ - seq.b %d1 /* %d1 = ........................00000000 */ - lsl.l #2, %d1 /* %d1 = ......................00000000.. */ - bclr.l #7, %d3 /* Z = !(p1 & 0x80); p1 &= ~0x80; */ - seq.b %d1 /* %d1 = ......................0011111111 */ - lsl.l #2, %d1 /* %d1 = ....................0011111111.. */ + lea 0xf0000000, %a3 /* LCD data port */ + + moveq.l #15, %d3 + add.l %a1, %d3 + and.l #0xfffffff0, %d3 /* first line bound */ + move.l %a2, %d1 + and.l #0xfffffff0, %d1 /* last line bound */ + cmp.l %d3, %d1 + bls.w .g_tloop /* no lines to copy - jump to tail loop */ + cmp.l %a1, %d0 + bls.s .g_lloop /* no head blocks - jump to line loop */ + +.g_hloop: + move.l (%a1), %d2 /* fetch 4 pixel phases */ + + bclr.l #31, %d2 /* Z = !(p0 & 0x80); p0 &= ~0x80; */ + seq.b %d0 /* %d0 = ........................00000000 */ + lsl.l #2, %d0 /* %d0 = ......................00000000.. */ + bclr.l #23, %d2 /* Z = !(p1 & 0x80); p1 &= ~0x80; */ + seq.b %d0 /* %d0 = ......................0011111111 */ + lsl.l #2, %d0 /* %d0 = ....................0011111111.. */ + bclr.l #15, %d2 /* Z = !(p2 & 0x80); p2 &= ~0x80; */ + seq.b %d0 /* %d0 = ....................001122222222 */ + lsl.l #2, %d0 /* %d0 = ..................001122222222.. */ + bclr.l #7, %d2 /* Z = !(p3 & 0x80); p3 &= ~0x80; */ + seq.b %d0 /* %d0 = ..................00112233333333 */ + lsr.l #6, %d0 /* %d0 = ........................00112233 */ + move.w %d0, (%a3) /* write pixel block */ + + add.l (%a0)+, %d2 /* add 4 pixel values to the phases */ + move.l %d2, (%a1)+ /* store new phases, advance pointer */ + + cmp.l %a1, %d3 /* go up to first line bound */ + bhi.s .g_hloop + +.g_lloop: + movem.l (%a1), %d2-%d5 - add.l %d4, %d3 /* p0 += v0; p1 += v1; */ - move.b %d3, (2, %a0) /* store p1 */ - swap %d3 - move.b %d3, (%a0) /* store p0 */ - - move.l %d2, %d3 /* copy mask */ - and.l %d5, %d3 /* %d3 = p2--p3-- */ - eor.l %d3, %d5 /* %d5 = --v2--v3 */ - lsr.l #8, %d3 /* %d3 = --p2--p3 */ - - bclr.l #23, %d3 /* Z = !(p2 & 0x80); p2 &= ~0x80; */ - seq.b %d1 /* %d1 = ....................001122222222 */ - lsl.l #2, %d1 /* %d1 = ..................001122222222.. */ - bclr.l #7, %d3 /* Z = !(p3 & 0x80); p3 &= ~0x80; */ - seq.b %d1 /* %d1 = ..................00112233333333 */ - lsr.l #6, %d1 /* %d1 = ........................00112233 */ - - add.l %d5, %d3 /* p2 += v2; p3 += v3; */ - move.b %d3, (6, %a0) /* store p3 */ - swap %d3 - move.b %d3, (4, %a0) /* store p2 */ - - move.w %d1, (%a1) /* write pixel block */ - addq.l #8, %a0 /* advance address pointer */ - subq.l #1, %d0 /* any blocks left? */ - bne.b .greyloop - - movem.l (%sp), %d2-%d5 - lea.l (4*4, %sp), %sp + bclr.l #31, %d2 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #23, %d2 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #15, %d2 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #7, %d2 + seq.b %d0 + lsr.l #6, %d0 + move.w %d0, (%a3) + + bclr.l #31, %d3 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #23, %d3 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #15, %d3 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #7, %d3 + seq.b %d0 + lsr.l #6, %d0 + move.w %d0, (%a3) + + bclr.l #31, %d4 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #23, %d4 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #15, %d4 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #7, %d4 + seq.b %d0 + lsr.l #6, %d0 + move.w %d0, (%a3) + + bclr.l #31, %d5 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #23, %d5 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #15, %d5 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #7, %d5 + seq.b %d0 + lsr.l #6, %d0 + move.w %d0, (%a3) + + movem.l (%a0), %d0/%a4-%a6 + lea.l (16, %a0), %a0 + add.l %d0, %d2 + add.l %a4, %d3 + add.l %a5, %d4 + add.l %a6, %d5 + movem.l %d2-%d5, (%a1) + lea.l (16, %a1), %a1 + + cmp.l %a1, %d1 /* go up to last line bound */ + bhi.w .g_lloop + + cmp.l %a1, %a2 + bls.s .g_no_tail + +.g_tloop: + move.l (%a1), %d2 + + bclr.l #31, %d2 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #23, %d2 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #15, %d2 + seq.b %d0 + lsl.l #2, %d0 + bclr.l #7, %d2 + seq.b %d0 + lsr.l #6, %d0 + move.w %d0, (%a3) + + add.l (%a0)+, %d2 + move.l %d2, (%a1)+ + + cmp.l %a1, %a2 /* go up to end address */ + bhi.s .g_tloop + +.g_no_tail: + movem.l (%sp), %d2-%d5/%a2-%a6 /* restore registers */ + lea.l (9*4, %sp), %sp rts + .gd_end: .size lcd_grey_data,.gd_end-lcd_grey_data diff --git a/firmware/target/coldfire/iriver/h100/lcd-h100.c b/firmware/target/coldfire/iriver/h100/lcd-h100.c index c17de952c3..2ba19255da 100644 --- a/firmware/target/coldfire/iriver/h100/lcd-h100.c +++ b/firmware/target/coldfire/iriver/h100/lcd-h100.c @@ -180,10 +180,13 @@ void lcd_blit(const unsigned char* data, int x, int by, int width, } } +/* Helper function for lcd_grey_phase_blit(). */ +void lcd_grey_data(unsigned char *values, unsigned char *phases, int count); + /* Performance function that works with an external buffer note that by and bheight are in 4-pixel units! */ -void lcd_grey_phase_blit(const struct grey_data *data, int x, int by, - int width, int bheight, int stride) +void lcd_grey_phase_blit(unsigned char *values, unsigned char *phases, + int x, int by, int width, int bheight, int stride) { stride <<= 2; /* 4 pixels per block */ while (bheight--) @@ -191,8 +194,9 @@ void lcd_grey_phase_blit(const struct grey_data *data, int x, int by, lcd_write_command_ex(LCD_CNTL_PAGE, by++, -1); lcd_write_command_ex(LCD_CNTL_COLUMN, x, -1); lcd_write_command(LCD_CNTL_DATA_WRITE); - lcd_grey_data(data, width); - data += stride; + lcd_grey_data(values, phases, width); + values += stride; + phases += stride; } } diff --git a/firmware/target/sh/archos/lcd-archos-bitmap.c b/firmware/target/sh/archos/lcd-archos-bitmap.c index 17c4d76092..18d48f4ead 100644 --- a/firmware/target/sh/archos/lcd-archos-bitmap.c +++ b/firmware/target/sh/archos/lcd-archos-bitmap.c @@ -155,10 +155,13 @@ void lcd_blit(const unsigned char* data, int x, int by, int width, } } +/* Helper function for lcd_grey_phase_blit(). */ +void lcd_grey_data(unsigned char *values, unsigned char *phases, int count); + /* Performance function that works with an external buffer note that by and bheight are in 8-pixel units! */ -void lcd_grey_phase_blit(const struct grey_data *data, int x, int by, - int width, int bheight, int stride) +void lcd_grey_phase_blit(unsigned char *values, unsigned char *phases, + int x, int by, int width, int bheight, int stride) { stride <<= 3; /* 8 pixels per block */ while (bheight--) @@ -167,8 +170,9 @@ void lcd_grey_phase_blit(const struct grey_data *data, int x, int by, lcd_write_command (LCD_CNTL_HIGHCOL | (((x+xoffset)>>4) & 0xf)); lcd_write_command (LCD_CNTL_LOWCOL | ((x+xoffset) & 0xf)); - lcd_grey_data(data, width); - data += stride; + lcd_grey_data(values, phases, width); + values += stride; + phases += stride; } } diff --git a/firmware/target/sh/archos/lcd-as-archos-bitmap.S b/firmware/target/sh/archos/lcd-as-archos-bitmap.S index 492413fdb7..a60ea9aad1 100644 --- a/firmware/target/sh/archos/lcd-as-archos-bitmap.S +++ b/firmware/target/sh/archos/lcd-as-archos-bitmap.S @@ -210,28 +210,35 @@ _lcd_write_data: * one or multiple pixels. * * Arguments: - * r4 - data address, (phase,value)-pairs - * r5 - pixel block count + * r4 - pixel value data address + * r5 - pixel phase data address + * r6 - pixel block count * * Register usage: - * r0 - current pixel value + * r0 - scratch / phase signs mask * r1 - scratch * r2 - precalculated port value (CS and SC low, DS and SD high), * negated (neg)! * r3 - lcd port address - * r5 - end address - * r6/r7 - current/next pixel phase - * r8 - current block address (for writing back phase) - * r9 - 0x80 (for phase modification) + * r4 - current value address + * r5 - current phase address + * r6 - end address + * r7/r8 - current/next pixel phase + * r9 - current pixel value + * r10 - 0x00000080 \ + * r11 - 0x00008000 > for phase sign check + * r12 - 0x00800000 / */ _lcd_grey_data: mov.l r8, @-r15 /* save r8 */ - shll2 r5 /* v */ + shll2 r6 /* v */ mov.l r9, @-r15 /* save r9 */ - shll2 r5 /* r5 *= 16; (8 pixel per block * 2 bytes/pixel) */ + shll r6 /* r6 *= 8; (8 pixels per block) */ + mov.l r10, @-r15 /* save r10 */ + add r4, r6 /* end address */ mov.l .lcdr, r3 /* put lcd data port address in r3 */ - add r4, r5 /* end address */ + nop /* keep alignment */ /* This code will fail if an interrupt changes the contents of PBDRL. * If so, we must disable the interrupt here. If disabling interrupts @@ -240,110 +247,90 @@ _lcd_grey_data: * this would significantly decrease performance. */ mov.b @r3, r0 /* r0 = PBDRL */ - mov #0x80, r9 /* for phase modification - "or #imm,xx" only allows r0 */ - mov.b @r4+, r6 /* fetch first pixel phase */ or #(LCD_DS|LCD_SD), r0 /* r0 |= LCD_DS|LCD_SD */ + mov.l r11, @-r15 /* save r11 */ and #(~(LCD_CS|LCD_SC)), r0 /* r0 &= ~(LCD_CS|LCD_SC) */ + mov.l r12, @-r15 /* save r12 */ neg r0, r2 /* r2 = 0 - r0 */ - mov #-3, r0 /* offset for storing phase */ /* loop exploits that SD is on bit 0 for recorders and Ondios */ -.greyloop: - cmp/pz r6 /* phase non-negative? */ - mov.b @r4+, r8 /* fetch pixel value */ - negc r2, r1 /* T -> SD, SC low */ - mov.b r1, @r3 /* set port */ - or r9, r6 /* r6 -= (r6 >= 0) ? 128 : 0; */ - mov.b @r4+, r7 /* fetch next pixel phase */ - add #(LCD_SC), r1 /* rise SC */ - mov.b r1, @r3 /* set port */ - add r8, r6 /* calculate new phase */ - mov.b r6, @(r0,r4) /* store phase */ + mov.w .ptest, r10 + swap.b r10, r11 + mov.l @r5, r7 + swap.w r10, r12 + mov.l .pmask, r0 +.greyloop: cmp/pz r7 - mov.b @r4+, r8 + mov.l @r4+, r9 negc r2, r1 mov.b r1, @r3 - or r9, r7 - mov.b @r4+, r6 add #(LCD_SC), r1 mov.b r1, @r3 - add r8, r7 - mov.b r7, @(r0,r4) - cmp/pz r6 - mov.b @r4+, r8 + tst r12, r7 + mov.l @(4,r5), r8 negc r2, r1 mov.b r1, @r3 - or r9, r6 - mov.b @r4+, r7 add #(LCD_SC), r1 mov.b r1, @r3 - add r8, r6 - mov.b r6, @(r0,r4) - cmp/pz r7 - mov.b @r4+, r8 + tst r11, r7 negc r2, r1 + tst r10, r7 mov.b r1, @r3 - or r9, r7 - mov.b @r4+, r6 add #(LCD_SC), r1 mov.b r1, @r3 - add r8, r7 - mov.b r7, @(r0,r4) - cmp/pz r6 - mov.b @r4+, r8 + or r0, r7 + sub r9, r7 negc r2, r1 mov.b r1, @r3 - or r9, r6 - mov.b @r4+, r7 add #(LCD_SC), r1 mov.b r1, @r3 - add r8, r6 - mov.b r6, @(r0,r4) - cmp/pz r7 - mov.b @r4+, r8 + cmp/pz r8 + mov.l r7, @r5 negc r2, r1 mov.b r1, @r3 - or r9, r7 - mov.b @r4+, r6 add #(LCD_SC), r1 mov.b r1, @r3 - add r8, r7 - mov.b r7, @(r0,r4) - cmp/pz r6 - mov.b @r4+, r8 + tst r12, r8 + mov.l @r4+, r9 negc r2, r1 mov.b r1, @r3 - or r9, r6 - mov.b @r4+, r7 add #(LCD_SC), r1 mov.b r1, @r3 - add r8, r6 - mov.b r6, @(r0,r4) - cmp/pz r7 - mov.b @r4+, r8 + tst r11, r8 + mov.l @(8,r5), r7 negc r2, r1 mov.b r1, @r3 - or r9, r7 - mov.b @r4+, r6 add #(LCD_SC), r1 mov.b r1, @r3 - add r8, r7 - mov.b r7, @(r0,r4) - cmp/hi r4, r5 /* some blocks left? */ + tst r10, r8 + or r0, r8 + negc r2, r1 + mov.b r1, @r3 + add #(LCD_SC), r1 + mov.b r1, @r3 + + sub r9, r8 + mov.l r8, @(4,r5) + + add #8, r5 + cmp/hi r4, r6 bt .greyloop + mov.l @r15+, r12 /* restore r12 */ mov #(LCD_CS|LCD_DS|LCD_SD|LCD_SC), r0 - mov.l @r15+, r9 /* restore r9 */ + mov.l @r15+, r11 /* restore r11 */ or r0, r1 /* restore port */ + mov.l @r15+, r10 /* restore r10 */ + mov.l @r15+, r9 /* restore r9 */ mov.l @r15+, r8 /* restore r8 */ rts mov.b r1, @r3 @@ -351,7 +338,11 @@ _lcd_grey_data: /* This is the place to reenable the interrupts, if we have disabled * them. See above. */ +.ptest: + .short 0x0080 .align 2 .lcdr: .long LCDR +.pmask: + .long 0x80808080 -- cgit v1.2.3