From 013ab3dd3d49059a4896bce4498a48b91ade2bdc Mon Sep 17 00:00:00 2001 From: Michael Sevakis Date: Tue, 15 Aug 2006 23:55:31 +0000 Subject: Added assembly lcd_yuv_blit for iAudio X5 and misc. display related changes git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10599 a1c6a512-1295-4272-9138-f99709370657 --- firmware/target/coldfire/iaudio/x5/lcd-as-x5.S | 334 ++++++++++++++++++++++++- firmware/target/coldfire/iaudio/x5/lcd-x5.c | 135 +++------- 2 files changed, 364 insertions(+), 105 deletions(-) diff --git a/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S b/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S index 323710cc57..1a527bb8f3 100644 --- a/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S +++ b/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S @@ -23,10 +23,342 @@ .section .icode,"ax",@progbits +/* begin lcd_write_yuv420_lines + * + * See http://en.wikipedia.org/wiki/YCbCr + * ITU-R BT.601 (formerly CCIR 601): + * |Y'| | 0.299000 0.587000 0.114000| |R| + * |Pb| = |-0.168736 -0.331264 0.500000| |G| or 0.564334*(B - Y') + * |Pr| | 0.500000 -0.418688 0.081312| |B| or 0.713267*(R - Y') + * Scaled, normalized and rounded: + * |Y'| | 65 129 25| |R| + 16 : 16->235 + * |Cb| = |-38 -74 112| |G| + 128 : 16->240 + * |Cr| |112 -94 -18| |B| + 128 : 16->240 + * + * The inverse: + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Scaled, normalized, rounded and tweaked to yield RGB 666: + * |R| |74 0 102| |Y' - 16| / 256 + * |G| = |74 -25 -52| |Cb - 128| / 256 + * |B| |74 129 0| |Cr - 128| / 256 + */ + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines,@function +lcd_write_yuv420_lines: + lea.l (-40,%sp),%sp /* free up some registers */ + movem.l %d2-%d7/%a2-%a5,(%sp) + + lea.l 0xf0008002,%a0 /* LCD data port */ + move.l (40+4,%sp),%a1 /* Y data */ + move.l (40+8,%sp),%a2 /* Cb data */ + move.l (40+12,%sp),%a3 /* guv storage */ + move.l (40+16,%sp),%a4 /* Cr data */ + move.l (40+20,%sp),%d0 /* width */ + lea.l (%a1,%d0.l),%a5 /* end address */ + +.yuv_line_loop1: + /** Write first pixel **/ + clr.l %d1 /* get y component */ + move.b (%a1)+,%d1 + subq.l #8,%d1 + subq.l #8,%d1 + moveq.l #74,%d6 + muls.w %d6,%d1 + asr.l #8,%d1 + + clr %d2 /* get bu component */ + move.b (%a2),%d2 + moveq.l #-128,%d6 + add.l %d6,%d2 + move.l %d2,%d3 /* %d3 = cb component for guv */ + move.w #129,%d6 + muls.w %d6,%d2 + asr.l #8,%d2 + move.b %d2,(%a2)+ /* save bu for next line */ + + moveq.l #-25,%d6 /* multiply first term of guv */ + muls.w %d6,%d3 + + clr %d4 /* get rv component */ + move.b (%a4),%d4 + moveq.l #-128,%d6 + add.l %d6,%d4 + move.l %d4,%d7 /* %d7 = cr component for guv */ + moveq.l #102,%d6 + muls.w %d6,%d4 + asr.l #8,%d4 + move.b %d4,(%a4)+ /* save rv for next line */ + + moveq.l #-52,%d6 /* multiply second term of guv */ + muls.w %d6,%d7 + add.l %d7,%d3 + asr.l #8,%d3 + move.b %d3,(%a3)+ /* save guv for next line */ + /* : %d1 = Y, %d2 = bu, %d3 = guv, %d4 = rv */ + + move.l %d1,%d5 /* get r */ + add.l %d4,%d5 + move.l %d1,%d6 /* get g */ + add.l %d3,%d6 + move.l %d1,%d7 /* get b */ + add.l %d2,%d7 + + move.l %d7,%d1 /* is clamping needed? */ + or.l %d6,%d1 + or.l %d5,%d1 + asr.l #6,%d1 + beq.b .yuv_no_clamp1 /* values in range: skip clamping */ + bpl.b .yuv_r63_test1 /* no negative values: skip to high bounds checks */ +.yuv_r0_test1: + clr.l %d1 /* check for any values < 0 */ + cmp.l %d1,%d5 + bgt.b .yuv_g0_test1 + clr.l %d5 +.yuv_g0_test1: + cmp.l %d1,%d6 + bgt.b .yuv_b0_test1 + clr.l %d6 +.yuv_b0_test1: + cmp.l %d1,%d7 + bgt.b .yuv_r63_test1 + clr.l %d7 +.yuv_r63_test1: /* check for any values > 63 */ + moveq.l #63,%d1 + cmp.l %d1,%d5 + blt.b .yuv_g63_test1 + move.l %d1,%d5 +.yuv_g63_test1: + cmp.l %d1,%d6 + blt.b .yuv_b63_test1 + move.l %d1,%d6 +.yuv_b63_test1: + cmp.l %d1,%d7 + blt.b .yuv_no_clamp1 + move.l %d1,%d7 +.yuv_no_clamp1: + /* : %d5 = R, %d6 = G, %d7 = B */ + + move.l %d6,%d1 /* save g for lower 9 bits */ + lsl.l #3,%d5 /* R << 3 */ + lsr.l #3,%d1 /* G >> 3 */ + or.l %d5,%d1 + move.w %d1,(%a0) /* |00000000|000000000|0000000r|rrrrrggg| */ + lsl.l #6,%d6 /* B << 6 */ + or.l %d6,%d7 /* |00000000|000000000|0000gggg|ggbbbbbb| */ + move.w %d7,(%a0) + + /** Write second pixel **/ + clr %d1 + move.b (%a1)+,%d1 /* get y component */ + subq.l #8,%d1 + subq.l #8,%d1 + moveq.l #74,%d6 + muls.w %d6,%d1 + asr.l #8,%d1 + /* : %d1 = Y, %d2 = bu, %d3 = guv, %d4 = rv */ + + /* Add Y + each chroma component (can clobber %d2-%d4 values now) */ + add.l %d1,%d4 /* get r */ + add.l %d1,%d3 /* get g */ + add.l %d1,%d2 /* get b */ + + move.l %d2,%d1 /* is clamping needed? */ + or.l %d3,%d1 + or.l %d4,%d1 + asr.l #6,%d1 + beq.b .yuv_no_clamp2 /* values in range: skip clamping */ + bpl.b .yuv_r63_test2 /* no negative values: skip to high bounds checks */ +.yuv_r0_test2: + clr.l %d1 /* check for any values < 0 */ + cmp.l %d1,%d4 + bgt.b .yuv_g0_test2 + clr.l %d4 +.yuv_g0_test2: + cmp.l %d1,%d3 + bgt.b .yuv_b0_test2 + clr.l %d3 +.yuv_b0_test2: + cmp.l %d1,%d2 + bgt.b .yuv_r63_test2 + clr.l %d2 +.yuv_r63_test2: /* check for any values > 63 */ + moveq.l #63,%d1 + cmp.l %d1,%d4 + blt.b .yuv_g63_test2 + move.l %d1,%d4 +.yuv_g63_test2: + cmp.l %d1,%d3 + blt.b .yuv_b63_test2 + move.l %d1,%d3 +.yuv_b63_test2: + cmp.l %d1,%d2 + blt.b .yuv_no_clamp2 + move.l %d1,%d2 +.yuv_no_clamp2: + /* : %d4 = R, %d3 = G, %d2 = B */ + + move.l %d3,%d1 /* save g for lower 9 bits */ + lsl.l #3,%d4 /* R << 3 */ + lsr.l #3,%d1 /* G >> 3 */ + or.l %d4,%d1 /* |00000000|000000000|0000000r|rrrrrggg| */ + move.w %d1,(%a0) + lsl.l #6,%d3 /* G << 6 */ + or.l %d3,%d2 /* |00000000|000000000|0000gggg|ggbbbbbb| */ + move.w %d2,(%a0) + + cmp.l %a1,%a5 /* run %a1 up to end of line */ + bhi.w .yuv_line_loop1 + + /* Rewind chroma pointers */ + move.l (40+8,%sp),%a2 /* bu data */ + move.l (40+12,%sp),%a3 /* guv data */ + move.l (40+16,%sp),%a4 /* rv data */ + lea.l (%a5,%d0),%a5 /* next end address */ + +.yuv_line_loop2: + clr %d1 + move.b (%a1)+,%d1 /* get y component */ + subq.l #8,%d1 + subq.l #8,%d1 + moveq.l #74,%d6 + muls.w %d6,%d1 + asr.l #8,%d1 + + move.b (%a2)+,%d2 /* read save chromas and sign extend */ + extb.l %d2 + move.b (%a3)+,%d3 + extb.l %d3 + move.b (%a4)+,%d4 + extb.l %d4 + /* : %d1 = Y, %d2 = bu, %d3 = guv, %d4 = rv */ + + move.l %d1,%d5 /* get r */ + add.l %d4,%d5 + move.l %d1,%d6 /* get g */ + add.l %d3,%d6 + move.l %d1,%d7 /* get b */ + add.l %d2,%d7 + + move.l %d7,%d1 /* is clamping needed? */ + or.l %d6,%d1 + or.l %d5,%d1 + asr.l #6,%d1 + beq.b .yuv_no_clamp3 /* values in range: skip clamping */ + bpl.b .yuv_r63_test3 /* no negative values: skip to high bounds checks */ +.yuv_r0_test3: + clr.l %d1 /* check for any values < 0 */ + cmp.l %d1,%d5 + bgt.b .yuv_g0_test3 + clr.l %d5 +.yuv_g0_test3: + cmp.l %d1,%d6 + bgt.b .yuv_b0_test3 + clr.l %d6 +.yuv_b0_test3: + cmp.l %d1,%d7 + bgt.b .yuv_r63_test3 + clr.l %d7 +.yuv_r63_test3: /* check for any values > 63 */ + moveq.l #63,%d1 + cmp.l %d1,%d5 + blt.b .yuv_g63_test3 + move.l %d1,%d5 +.yuv_g63_test3: + cmp.l %d1,%d6 + blt.b .yuv_b63_test3 + move.l %d1,%d6 +.yuv_b63_test3: + cmp.l %d1,%d7 + blt.b .yuv_no_clamp3 + move.l %d1,%d7 +.yuv_no_clamp3: + /* : %d5 = R, %d6 = G, %d7 = B */ + + move.l %d6,%d1 /* save g for lower 9 bits */ + lsl.l #3,%d5 /* R << 3 */ + lsr.l #3,%d1 /* G >> 3 */ + or.l %d5,%d1 + move.w %d1,(%a0) /* |00000000|000000000|0000000r|rrrrrggg| */ + lsl.l #6,%d6 /* B << 6 */ + or.l %d6,%d7 /* |00000000|000000000|0000gggg|ggbbbbbb| */ + move.w %d7,(%a0) + + /** Write second pixel **/ + clr %d1 + move.b (%a1)+,%d1 /* get y component */ + subq.l #8,%d1 + subq.l #8,%d1 + moveq.l #74,%d6 + muls.w %d6,%d1 + asr.l #8,%d1 + /* : %d1 = Y, %d2 = bu, %d3 = guv, %d4 = rv */ + + /* Add Y + each chroma component (can clobber %d2-%d4 values now) */ + add.l %d1,%d4 /* get r */ + add.l %d1,%d3 /* get g */ + add.l %d1,%d2 /* get b */ + + move.l %d2,%d1 /* is clamping needed? */ + or.l %d3,%d1 + or.l %d4,%d1 + asr.l #6,%d1 + beq.b .yuv_no_clamp4 /* values in range: skip clamping */ + bpl.b .yuv_r63_test4 /* no negative values: skip to high bounds checks */ +.yuv_r0_test4: + clr.l %d1 /* check for any values < 0 */ + cmp.l %d1,%d4 + bgt.b .yuv_g0_test4 + clr.l %d4 +.yuv_g0_test4: + cmp.l %d1,%d3 + bgt.b .yuv_b0_test4 + clr.l %d3 +.yuv_b0_test4: + cmp.l %d1,%d2 + bgt.b .yuv_r63_test4 + clr.l %d2 +.yuv_r63_test4: /* check for any values > 63 */ + moveq.l #63,%d1 + cmp.l %d1,%d4 + blt.b .yuv_g63_test4 + move.l %d1,%d4 +.yuv_g63_test4: + cmp.l %d1,%d3 + blt.b .yuv_b63_test4 + move.l %d1,%d3 +.yuv_b63_test4: + cmp.l %d1,%d2 + blt.b .yuv_no_clamp4 + move.l %d1,%d2 +.yuv_no_clamp4: + /* : %d4 = R, %d3 = G, %d2 = B */ + + move.l %d3,%d1 /* save g for lower 9 bits */ + lsl.l #3,%d4 /* R << 3 */ + lsr.l #3,%d1 /* G >> 3 */ + or.l %d4,%d1 /* |00000000|000000000|0000000r|rrrrrggg| */ + move.w %d1,(%a0) + lsl.l #6,%d3 /* G << 6 */ + or.l %d3,%d2 /* |00000000|000000000|0000gggg|ggbbbbbb| */ + move.w %d2,(%a0) + + cmp.l %a1,%a5 /* run %a0 up to end of line */ + bhi.w .yuv_line_loop2 + + movem.l (%sp),%d2-%d7/%a2-%a5 + lea.l (40,%sp),%sp /* restore registers */ + + rts +/* end lcd_write_yuv420_lines */ + + +/* begin lcd_write_data */ .align 2 .global lcd_write_data .type lcd_write_data,@function - lcd_write_data: move.l (4,%sp),%a0 /* data pointer */ move.l (8,%sp),%d0 /* length in words */ diff --git a/firmware/target/coldfire/iaudio/x5/lcd-x5.c b/firmware/target/coldfire/iaudio/x5/lcd-x5.c index 240b1db586..e1fe23a442 100755 --- a/firmware/target/coldfire/iaudio/x5/lcd-x5.c +++ b/firmware/target/coldfire/iaudio/x5/lcd-x5.c @@ -100,25 +100,6 @@ static inline void lcd_begin_write_gram(void) LCD_CMD = R_WRITE_DATA_2_GRAM << 1; } -static inline void lcd_write_one(unsigned short px) -{ - unsigned short pxsr = px >> 8; - LCD_DATA = pxsr + (pxsr & 0x1F8); - LCD_DATA = px << 1; -} - -/* Write two pixels to gram from a long */ -/* called very frequently - inline! */ -static inline void lcd_write_two(unsigned long px2) -{ - unsigned short px2sr = px2 >> 24; - LCD_DATA = px2sr + (px2sr & 0x1F8); - LCD_DATA = px2 >> 15; - px2sr = px2 >> 8; - LCD_DATA = px2sr + (px2sr & 0x1F8); - LCD_DATA = px2 << 1; -} - /*** hardware configuration ***/ int lcd_default_contrast(void) @@ -423,45 +404,38 @@ void lcd_blit(const fb_data* data, int x, int by, int width, /*if(display_on)*/ } -/* Performance function to blit a YUV bitmap directly to the LCD */ -/* Assumes YCrCb 4:2:0. */ -/* - See http://en.wikipedia.org/wiki/YCbCr - ITU-R BT.601 (formerly CCIR 601): - |Y'| | 0.299000 0.587000 0.114000| |R| - |Pb| = |-0.168736 -0.331264 0.500000| |G| or 0.564334 * (B - Y') - |Pr| | 0.500000 -0.418688 0.081312| |B| or 0.713267 * (R - Y') - Scaled, normalized and rounded: - |Y'| | 65 129 25| |R| + 16 : 16->235 - |Cb| = |-38 -74 112| |G| + 128 : 16->240 - |Cr| |112 -94 -18| |B| + 128 : 16->240 - - The inverse: - |R| |1.000000 -0.000001 1.402000| |Y'| - |G| = |1.000000 -0.334136 -0.714136| |Pb| - |B| |1.000000 1.772000 0.000000| |Pr| - Scaled, normalized, rounded and tweaked to yield RGB 666: - |R| |298 0 409| |Y' - 16| / 1024 - |G| = |298 -100 -208| |Cb - 128| / 1024 - |B| |298 516 0| |Cr - 128| / 1024 -*/ -void lcd_yuv_blit(unsigned char * const [3], int, int, int, - int, int, int, int) ICODE_ATTR; +/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. + * y should have two lines of Y back to back. + * bu and rv should contain the Cb and Cr data for the two lines of Y. + * Stores bu, guv and rv in repective buffers for use in second line. + */ +extern void lcd_write_yuv420_lines(const unsigned char *y, + unsigned char *bu, unsigned char *guv, unsigned char *rv, + int width); + +/* Performance function to blit a YUV bitmap directly to the LCD + * src_x, src_y, width and height should be even and within the LCD's + * boundaries. + */ void lcd_yuv_blit(unsigned char * const src[3], int src_x, int src_y, int stride, int x, int y, int width, int height) { + /* IRAM Y, Cb/bu, guv and Cb/rv buffers. */ + unsigned char y_ibuf[LCD_WIDTH*2]; + unsigned char bu_ibuf[LCD_WIDTH/2]; + unsigned char guv_ibuf[LCD_WIDTH/2]; + unsigned char rv_ibuf[LCD_WIDTH/2]; const unsigned char *ysrc, *usrc, *vsrc; - int uv_stepper, uv_step, y_end; + const unsigned char *ysrc_max; if (!display_on) return; width = (width + 1) & ~1; height = (height + 1) & ~1; - y_end = y + height; - /* Set start position and window */ + /* Set start position and window */ lcd_write_reg(R_RAM_ADDR_SET, (x << 8) | (((y + roll_offset) & 127) + y_offset)); lcd_write_reg(R_VERT_RAM_ADDR_POS, ((x + width - 1) << 8) | x); @@ -471,72 +445,26 @@ void lcd_yuv_blit(unsigned char * const src[3], ysrc = src[0] + src_y*stride + src_x; usrc = src[1] + (src_y*stride >> 2) + (src_x >> 1); vsrc = src[2] + (usrc - src[1]); - - stride = stride - width; /* Use end of current line->start of next */ - uv_stepper = (stride >> 1) - (width >> 1); - uv_step = uv_stepper - (stride >> 1); + ysrc_max = ysrc + height*stride; do { - const unsigned char *ysrc_end = ysrc + width; - - do - { - int lum, cb, cr; - int rv, guv, bu; - int r, g, b; - - lum = 298* *ysrc++ - 4768; /* 298*16 */ - cb = *usrc++ - 128; - cr = *vsrc++ - 128; - bu = 516*cb; - guv = -100*cb - 208*cr; - rv = 409*cr; - - r = (lum + rv) >> 10; - g = (lum + guv) >> 10; - b = (lum + bu) >> 10; - - if ((unsigned)r > 63) - r = (r < 0) ? 0 : 63; - if ((unsigned)g > 63) - g = (g < 0) ? 0 : 63; - if ((unsigned)b > 63) - b = (b < 0) ? 0 : 63; - - LCD_DATA = (r << 3) | (g >> 3); - LCD_DATA = (g << 6) | b; - - lum = 298* *ysrc++ - 4768; /* 298*16 */ - r = (lum + rv) >> 10; - g = (lum + guv) >> 10; - b = (lum + bu) >> 10; - - if ((unsigned)r > 63) - r = (r < 0) ? 0 : 63; - if ((unsigned)g > 63) - g = (g < 0) ? 0 : 63; - if ((unsigned)b > 63) - b = (b < 0) ? 0 : 63; - - LCD_DATA = (r << 3) | (g >> 3); - LCD_DATA = (g << 6) | b; - } - while (ysrc < ysrc_end); - - usrc += uv_step; - vsrc += uv_step; - uv_step = uv_stepper - uv_step; - - ysrc += stride; + memcpy(y_ibuf, ysrc, width); + memcpy(&y_ibuf[width], &ysrc[stride], width); + memcpy(bu_ibuf, usrc, width >> 1); + memcpy(rv_ibuf, vsrc, width >> 1); + lcd_write_yuv420_lines(y_ibuf, bu_ibuf, guv_ibuf, rv_ibuf, width); + ysrc += stride << 1; + usrc += stride >> 1; + vsrc += stride >> 1; } - while (++y < y_end); + while (ysrc < ysrc_max); } /* lcd_yuv_blit */ + /* Update the display. This must be called after all other LCD functions that change the lcd frame buffer. */ -void lcd_update(void) ICODE_ATTR; void lcd_update(void) { if (!display_on) @@ -554,7 +482,6 @@ void lcd_update(void) } /* lcd_update */ /* Update a fraction of the display. */ -void lcd_update_rect(int, int, int, int) ICODE_ATTR; void lcd_update_rect(int x, int y, int width, int height) { int ymax; -- cgit v1.2.3