From 9af13c327e3a086f8015aec8135e01018a7ffb2b Mon Sep 17 00:00:00 2001 From: Jens Arnold Date: Wed, 23 Aug 2006 17:30:51 +0000 Subject: H300: * Assembler optimised lcd_yuv_blit(), based on the X5 version. 23..31% speedup in tests depending on video size. * Don't set HORIZ_RAM_ADDR_POS for LCD updates, it's unnecessary. * Started the target tree move for H300 with this. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10724 a1c6a512-1295-4272-9138-f99709370657 --- firmware/SOURCES | 6 +- firmware/drivers/lcd-h300.c | 179 +++------- firmware/drivers/lcd.S | 95 ----- firmware/target/coldfire/iriver/h300/lcd-as-h300.S | 385 +++++++++++++++++++++ 4 files changed, 438 insertions(+), 227 deletions(-) create mode 100755 firmware/target/coldfire/iriver/h300/lcd-as-h300.S diff --git a/firmware/SOURCES b/firmware/SOURCES index 63cbf9c56f..e56199658c 100644 --- a/firmware/SOURCES +++ b/firmware/SOURCES @@ -179,7 +179,7 @@ thread.c timer.c crt0.S drivers/lcd.S -#endif +#endif mp3_playback.c mp3data.c #if CONFIG_CODEC != SWCODEC @@ -251,6 +251,10 @@ target/coldfire/iaudio/x5/lcd-remote-x5.c #endif #endif +#ifdef IRIVER_H300_SERIES +target/coldfire/iriver/h300/lcd-as-h300.S +#endif + #ifdef IRIVER_H10 #ifndef SIMULATOR target/arm/iriver/h10/backlight-h10.c diff --git a/firmware/drivers/lcd-h300.c b/firmware/drivers/lcd-h300.c index c3f5d48f87..41d2d11509 100644 --- a/firmware/drivers/lcd-h300.c +++ b/firmware/drivers/lcd-h300.c @@ -301,134 +301,59 @@ void lcd_blit(const fb_data* data, int x, int by, int width, /*if(display_on)*/ } -#define CSUB_X 2 -#define CSUB_Y 2 - -#define RYFAC (31*257) -#define GYFAC (63*257) -#define BYFAC (31*257) -#define RVFAC 11170 /* 31 * 257 * 1.402 */ -#define GVFAC (-11563) /* 63 * 257 * -0.714136 */ -#define GUFAC (-5572) /* 63 * 257 * -0.344136 */ -#define BUFAC 14118 /* 31 * 257 * 1.772 */ - -#define ROUNDOFFS (127*257) - -/* Performance function to blit a YUV bitmap directly to the LCD */ +/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. + * y should have two lines of Y back to back. + * bu and rv should contain the Cb and Cr data for the two lines of Y. + * Stores bu, guv and rv in repective buffers for use in second line. + */ +extern void lcd_write_yuv420_lines(const unsigned char *y, + unsigned char *bu, unsigned char *guv, unsigned char *rv, int width); + +/* Performance function to blit a YUV bitmap directly to the LCD + * src_x, src_y, width and height should be even + * x, y, width and height have to be within LCD bounds + */ void lcd_yuv_blit(unsigned char * const src[3], int src_x, int src_y, int stride, int x, int y, int width, int height) { - if (display_on) - { - int ymax; - - width = (width + 1) & ~1; - height = (height + 1) & ~1; - ymax = y + height - 1; - - /* set update window */ - - /* horiz ram addr */ - lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (ymax << 8) | y); - - /* vert ram addr */ - lcd_write_reg(R_VERT_RAM_ADDR_POS,((x+xoffset+width-1) << 8) | (x+xoffset)); - lcd_write_reg(R_RAM_ADDR_SET, ((x+xoffset) << 8) | y); - lcd_begin_write_gram(); + /* IRAM Y, Cb/bu, guv and Cb/rv buffers. */ + unsigned char y_ibuf[LCD_WIDTH*2]; + unsigned char bu_ibuf[LCD_WIDTH/2]; + unsigned char guv_ibuf[LCD_WIDTH/2]; + unsigned char rv_ibuf[LCD_WIDTH/2]; + const unsigned char *ysrc, *usrc, *vsrc; + const unsigned char *ysrc_max; - for (; y <= ymax; y++) - { - /* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */ - const unsigned char *ysrc = src[0] + stride * src_y + src_x; - const unsigned char *usrc = src[1] + (stride/CSUB_X) * (src_y/CSUB_Y) - + (src_x/CSUB_X); - const unsigned char *vsrc = src[2] + (stride/CSUB_X) * (src_y/CSUB_Y) - + (src_x/CSUB_X); - const unsigned char *row_end = ysrc + width; - - int y, u, v; - int rc, gc, bc; - int red, green, blue; - unsigned rbits, gbits, bbits; - - do - { - u = *usrc++ - 128; - v = *vsrc++ - 128; - rc = RVFAC * v + ROUNDOFFS; - gc = GVFAC * v + GUFAC * u + ROUNDOFFS; - bc = BUFAC * u + ROUNDOFFS; - - y = *ysrc++; - red = RYFAC * y + rc; - green = GYFAC * y + gc; - blue = BYFAC * y + bc; - - if ((unsigned)red > (RYFAC*255+ROUNDOFFS)) - { - if (red < 0) - red = 0; - else - red = (RYFAC*255+ROUNDOFFS); - } - if ((unsigned)green > (GYFAC*255+ROUNDOFFS)) - { - if (green < 0) - green = 0; - else - green = (GYFAC*255+ROUNDOFFS); - } - if ((unsigned)blue > (BYFAC*255+ROUNDOFFS)) - { - if (blue < 0) - blue = 0; - else - blue = (BYFAC*255+ROUNDOFFS); - } - rbits = ((unsigned)red) >> 16 ; - gbits = ((unsigned)green) >> 16 ; - bbits = ((unsigned)blue) >> 16 ; - - LCD_DATA = (rbits << 11) | (gbits << 5) | bbits; - - y = *ysrc++; - red = RYFAC * y + rc; - green = GYFAC * y + gc; - blue = BYFAC * y + bc; - - if ((unsigned)red > (RYFAC*255+ROUNDOFFS)) - { - if (red < 0) - red = 0; - else - red = (RYFAC*255+ROUNDOFFS); - } - if ((unsigned)green > (GYFAC*255+ROUNDOFFS)) - { - if (green < 0) - green = 0; - else - green = (GYFAC*255+ROUNDOFFS); - } - if ((unsigned)blue > (BYFAC*255+ROUNDOFFS)) - { - if (blue < 0) - blue = 0; - else - blue = (BYFAC*255+ROUNDOFFS); - } - rbits = ((unsigned)red) >> 16 ; - gbits = ((unsigned)green) >> 16 ; - bbits = ((unsigned)blue) >> 16 ; - - LCD_DATA = (rbits << 11) | (gbits << 5) | bbits; - } - while (ysrc < row_end); - - src_y++; - } + if (!display_on) + return; + + width &= ~1; /* stay on the safe side */ + height &= ~1; + + /* Set start position and window */ + lcd_write_reg(R_VERT_RAM_ADDR_POS,((x+xoffset+width-1) << 8) | (x+xoffset)); + lcd_write_reg(R_RAM_ADDR_SET, ((x+xoffset) << 8) | y); + + lcd_begin_write_gram(); + + ysrc = src[0] + src_y * stride + src_x; + usrc = src[1] + (src_y * stride >> 2) + (src_x >> 1); + vsrc = src[2] + (src_y * stride >> 2) + (src_x >> 1); + ysrc_max = ysrc + height * stride; + + do + { + memcpy(y_ibuf, ysrc, width); + memcpy(y_ibuf + width, ysrc + stride, width); + memcpy(bu_ibuf, usrc, width >> 1); + memcpy(rv_ibuf, vsrc, width >> 1); + lcd_write_yuv420_lines(y_ibuf, bu_ibuf, guv_ibuf, rv_ibuf, width); + ysrc += 2 * stride; + usrc += stride >> 1; + vsrc += stride >> 1; } + while (ysrc < ysrc_max); } /* Update the display. @@ -438,10 +363,6 @@ void lcd_update(void) { if(display_on){ /* reset update window */ - /* horiz ram addr: 0 - 175 */ - lcd_write_reg(R_HORIZ_RAM_ADDR_POS, 0xaf00); - - /* vert ram addr: 0 - 219 */ lcd_write_reg(R_VERT_RAM_ADDR_POS,((xoffset+219)<<8) | xoffset); /* Copy display bitmap to hardware */ @@ -467,15 +388,11 @@ void lcd_update_rect(int x, int y, int width, int height) /* set update window */ - /* horiz ram addr */ - lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (ymax << 8) | y); - - /* vert ram addr */ lcd_write_reg(R_VERT_RAM_ADDR_POS,((x+xoffset+width-1) << 8) | (x+xoffset)); lcd_write_reg(R_RAM_ADDR_SET, ((x+xoffset) << 8) | y); lcd_begin_write_gram(); - /* Copy specified rectangle bitmap to hardware */ + /* Copy specified rectangle bitmap to hardware */ for (; y <= ymax; y++) { lcd_write_data ((unsigned short *)&lcd_framebuffer[y][x], width); diff --git a/firmware/drivers/lcd.S b/firmware/drivers/lcd.S index 8764d793c3..d9e7092c94 100755 --- a/firmware/drivers/lcd.S +++ b/firmware/drivers/lcd.S @@ -371,99 +371,4 @@ lcd_write_data: nop /* 1(0/0) */ bne .loop /* 2(0/0) */ rts -#elif defined(IRIVER_H300_SERIES) - .section .icode,"ax",@progbits - - .align 2 - .global lcd_write_data - .type lcd_write_data,@function - -lcd_write_data: - move.l (4,%sp),%a0 /* data pointer */ - move.l (8,%sp),%d0 /* length in words */ - add.l %d0,%d0 /* words -> bytes */ - add.l %a0,%d0 /* -> end address */ - lea.l 0xf0000002,%a1 /* LCD data port */ - - move.l %a0,%d1 - btst.l #1,%d1 /* already longword aligned? */ - beq.b .word1_end /* yes: skip initial word copy */ - - move.w (%a0)+,(%a1) /* transfer initial word */ - -.word1_end: /* now longword aligned */ - moveq.l #28,%d1 - add.l %a0,%d1 - and.l #0xFFFFFFF0,%d1 /* %d1 = first line bound + 16 */ - cmp.l %d1,%d0 /* at least one full line to send? */ - blo.b .long2_start /* no: skip to trailing longword handling */ - - lea.l (-16,%sp),%sp /* free up some registers */ - movem.l %d2-%d4/%a2,(%sp) - - subq.l #8,%d1 - subq.l #8,%d1 /* %d1 = first line bound */ - - cmp.l %a0,%d1 /* any leading longwords? */ - bls.b .long1_end /* no: skip leading long loop */ - -.long1_loop: - move.l (%a0)+,%d2 /* read longword */ - swap %d2 /* send data to LCD in correct order...*/ - move.w %d2,(%a1) - swap %d2 - move.w %d2,(%a1) - cmp.l %a0,%d1 /* run %a0 up to first line bound */ - bhi.b .long1_loop - -.long1_end: - move.l %d0,%a2 - lea.l (-14,%a2),%a2 /* %a2 = end address - 14 (one line/pass) */ - - /* burst-optimised line transfers */ -.line_loop: - movem.l (%a0),%d1-%d4 /* burst-read line */ - lea.l (16,%a0),%a0 /* increment address */ - swap %d1 /* send data to LCD in correct order... */ - move.w %d1,(%a1) - swap %d1 - move.w %d1,(%a1) - swap %d2 - move.w %d2,(%a1) - swap %d2 - move.w %d2,(%a1) - swap %d3 - move.w %d3,(%a1) - swap %d3 - move.w %d3,(%a1) - swap %d4 - move.w %d4,(%a1) - swap %d4 - move.w %d4,(%a1) - cmp.l %a0,%a2 /* run %a0 up to last line bound */ - bhi.b .line_loop - - movem.l (%sp),%d2-%d4/%a2 - lea.l (16,%sp),%sp /* restore registers */ - -.long2_start: - subq.l #2,%d0 /* account for handling 2 words per loop */ - cmp.l %a0,%d0 /* any (trailing longwords? */ - bls.b .long2_end /* no: skip trailing longword loop */ - -.long2_loop: - move.l (%a0)+,%d1 /* read longword */ - swap %d1 /* send data to LCD in correct order */ - move.w %d1,(%a1) - swap %d1 - move.w %d1,(%a1) - cmp.l %a0,%d0 /* run %a0 up to last long bound */ - bhi.b .long2_loop - -.long2_end: - blo.b .word2_end /* no final word: skip */ - move.w (%a0)+,(%a1) /* transfer final word */ - -.word2_end: - rts #endif diff --git a/firmware/target/coldfire/iriver/h300/lcd-as-h300.S b/firmware/target/coldfire/iriver/h300/lcd-as-h300.S new file mode 100755 index 0000000000..c6c1c76136 --- /dev/null +++ b/firmware/target/coldfire/iriver/h300/lcd-as-h300.S @@ -0,0 +1,385 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006 by Jens Arnold + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "cpu.h" + + .section .icode, "ax", @progbits + + .align 2 + .global lcd_write_data + .type lcd_write_data, @function + +lcd_write_data: + move.l (4, %sp), %a0 /* data pointer */ + move.l (8, %sp), %d0 /* length in words */ + add.l %d0, %d0 /* words -> bytes */ + add.l %a0, %d0 /* -> end address */ + lea.l 0xf0000002, %a1 /* LCD data port */ + + move.l %a0, %d1 + btst.l #1, %d1 /* already longword aligned? */ + beq.s .word1_end /* yes: skip initial word copy */ + + move.w (%a0)+, (%a1) /* transfer initial word */ + +.word1_end: /* now longword aligned */ + moveq.l #28, %d1 + add.l %a0, %d1 + and.l #0xFFFFFFF0,%d1 /* %d1 = first line bound + 16 */ + cmp.l %d1, %d0 /* at least one full line to send? */ + blo.s .long2_start /* no: skip to trailing longword handling */ + + lea.l (-16, %sp), %sp /* free up some registers */ + movem.l %d2-%d4/%a2, (%sp) + + subq.l #8, %d1 + subq.l #8, %d1 /* %d1 = first line bound */ + + cmp.l %a0, %d1 /* any leading longwords? */ + bls.s .long1_end /* no: skip leading long loop */ + +.long1_loop: + move.l (%a0)+, %d2 /* read longword */ + swap %d2 /* send data to LCD in correct order...*/ + move.w %d2, (%a1) + swap %d2 + move.w %d2, (%a1) + cmp.l %a0, %d1 /* run %a0 up to first line bound */ + bhi.s .long1_loop + +.long1_end: + move.l %d0, %a2 + lea.l (-14, %a2), %a2 /* %a2 = end address - 14 (one line/pass) */ + + /* burst-optimised line transfers */ +.line_loop: + movem.l (%a0), %d1-%d4 /* burst-read line */ + lea.l (16, %a0), %a0 /* increment address */ + swap %d1 /* send data to LCD in correct order... */ + move.w %d1, (%a1) + swap %d1 + move.w %d1, (%a1) + swap %d2 + move.w %d2, (%a1) + swap %d2 + move.w %d2, (%a1) + swap %d3 + move.w %d3, (%a1) + swap %d3 + move.w %d3, (%a1) + swap %d4 + move.w %d4, (%a1) + swap %d4 + move.w %d4, (%a1) + cmp.l %a0, %a2 /* run %a0 up to last line bound */ + bhi.s .line_loop + + movem.l (%sp), %d2-%d4/%a2 + lea.l (16, %sp), %sp /* restore registers */ + +.long2_start: + subq.l #2, %d0 /* account for handling 2 words per loop */ + cmp.l %a0, %d0 /* any (trailing longwords? */ + bls.s .long2_end /* no: skip trailing longword loop */ + +.long2_loop: + move.l (%a0)+, %d1 /* read longword */ + swap %d1 /* send data to LCD in correct order */ + move.w %d1, (%a1) + swap %d1 + move.w %d1, (%a1) + cmp.l %a0, %d0 /* run %a0 up to last long bound */ + bhi.s .long2_loop + +.long2_end: + blo.s .word2_end /* no final word: skip */ + move.w (%a0)+, (%a1) /* transfer final word */ + +.word2_end: + rts +.lcd_write_data_end: + .size lcd_write_data, .lcd_write_data_end - lcd_write_data + + +/* lcd_write_yuv420_lines(), based on lcd-as-x5.S + * + * See http://en.wikipedia.org/wiki/YCbCr + * ITU-R BT.601 (formerly CCIR 601): + * |Y'| | 0.299000 0.587000 0.114000| |R| + * |Pb| = |-0.168736 -0.331264 0.500000| |G| or 0.564334*(B - Y') + * |Pr| | 0.500000 -0.418688 0.081312| |B| or 0.713267*(R - Y') + * Scaled, normalized and rounded: + * |Y'| | 65 129 25| |R| + 16 : 16->235 + * |Cb| = |-38 -74 112| |G| + 128 : 16->240 + * |Cr| |112 -94 -18| |B| + 128 : 16->240 + * + * The inverse: + * |R| |1.000000 0.000000 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Scaled, normalized, rounded and tweaked to yield RGB666, as converting + * directly to RGB565 gives too much roundoff error: + * |R| |74 0 101| |Y' - 16| / 256 + * |G| = |74 -24 -51| |Cb - 128| / 256 + * |B| |74 128 0| |Cr - 128| / 256 + */ + + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, @function + +lcd_write_yuv420_lines: + lea.l (-36, %sp), %sp /* free up some registers */ + movem.l %d2-%d6/%a2-%a5, (%sp) + + lea.l 0xf0000002, %a0 /* LCD data port */ + movem.l (36+4, %sp), %a1-%a5 /* Y data, Cb data, guv storage, Cr data, width */ + lea.l (%a1, %a5), %a5 /* end address */ + +.yuv_line_loop1: + /* chroma for first & second pixel */ + clr.l %d1 /* load bu component */ + move.b (%a2), %d1 + clr.l %d3 /* load rv component */ + move.b (%a4), %d3 + moveq.l #-128, %d0 + add.l %d0, %d1 + add.l %d0, %d3 + + move.l %d1, %d2 /* %d2 = cb component for guv */ + asr.l #1, %d1 /* %d1 = 128 * (Cb - 128) / 256 */ + move.b %d1, (%a2)+ /* save bu for next line */ + moveq.l #-24, %d0 + muls.w %d0, %d2 /* %d2 = -24 * (Cb - 128)*/ + moveq.l #-51, %d0 + muls.w %d3, %d0 + add.l %d0, %d2 /* %d2 = -24 * (Cb - 128) - 51 * (Cr - 128) */ + asr.l #8, %d2 + move.b %d2, (%a3)+ /* save guv for next line */ + moveq.l #101, %d0 + muls.w %d0, %d3 /* %d3 = 101 * (Cr - 128) */ + asr.l #8, %d3 + move.b %d3, (%a4)+ /* save rv for next line */ + + /* luma for first pixel */ + clr.l %d4 /* load y component */ + move.b (%a1)+, %d4 + moveq.l #74, %d0 + muls.w %d0, %d4 /* %d4 = 36 * Y */ + asr.l #8, %d4 + subq.l #4, %d4 /* correction for (Y - 16) and rounding */ + move.l %d4, %d5 + move.l %d4, %d6 + + /* combine & write first pixel */ + add.l %d1, %d4 /* %d4 = blue */ + add.l %d2, %d5 /* %d5 = green */ + add.l %d3, %d6 /* %d6 = red */ + + move.l %d4, %d0 /* clamping */ + or.l %d5, %d0 + or.l %d6, %d0 + asr.l #6, %d0 + beq.s .yuv_all_ok1 + moveq.l #63, %d0 + cmp.l %d0, %d4 + bls.s .yuv_blue_ok1 + spl.b %d4 + and.l %d0, %d4 +.yuv_blue_ok1: + cmp.l %d0, %d5 + bls.s .yuv_green_ok1 + spl.b %d5 + and.l %d0, %d5 +.yuv_green_ok1: + cmp.l %d0, %d6 + bls.s .yuv_red_ok1 + spl.b %d6 + and.l %d0, %d6 +.yuv_red_ok1: +.yuv_all_ok1: + + lsr.l #1, %d6 /* pack, convert to RGB565 and output */ + lsr.l #1, %d4 + lsl.l #6, %d6 + or.l %d6, %d5 + lsl.l #5, %d5 + or.l %d5, %d4 + move.w %d4, (%a0) + + /* luma for second pixel */ + clr.l %d4 /* load y component */ + move.b (%a1)+, %d4 + moveq.l #74, %d0 + muls.w %d0, %d4 /* %d4 = 36 * Y */ + asr.l #8, %d4 + subq.l #4, %d4 /* correction for (Y - 16) and rounding */ + + /* combine & write second pixel */ + add.l %d4, %d1 /* %d1 = blue */ + add.l %d4, %d2 /* %d2 = green */ + add.l %d4, %d3 /* %d3 = red */ + + move.l %d1, %d0 /* clamping */ + or.l %d2, %d0 + or.l %d3, %d0 + asr.l #6, %d0 + beq.s .yuv_all_ok2 + moveq.l #63, %d0 + cmp.l %d0, %d1 + bls.s .yuv_blue_ok2 + spl.b %d1 + and.l %d0, %d1 +.yuv_blue_ok2: + cmp.l %d0, %d2 + bls.s .yuv_green_ok2 + spl.b %d2 + and.l %d0, %d2 +.yuv_green_ok2: + cmp.l %d0, %d3 + bls.s .yuv_red_ok2 + spl.b %d3 + and.l %d0, %d3 +.yuv_red_ok2: +.yuv_all_ok2: + + lsr.l #1, %d3 /* pack, convert to RGB565 and output */ + lsr.l #1, %d1 + lsl.l #6, %d3 + or.l %d3, %d2 + lsl.l #5, %d2 + or.l %d2, %d1 + move.w %d1, (%a0) + + cmp.l %a1,%a5 /* run %a1 up to end of line */ + bhi.w .yuv_line_loop1 + + /* Rewind chroma pointers */ + movem.l (36+8, %sp), %a2-%a5 /* bu data, guv data, rv data, width */ + lea.l (%a1, %a5), %a5 /* next end address */ + +.yuv_line_loop2: + /* read saved chromas and sign extend */ + move.b (%a2)+, %d1 + extb.l %d1 + move.b (%a3)+, %d2 + extb.l %d2 + move.b (%a4)+, %d3 + extb.l %d3 + + /* luma for first pixel */ + clr.l %d4 /* load y component */ + move.b (%a1)+, %d4 + moveq.l #74, %d0 + muls.w %d0, %d4 /* %d4 = 36 * Y */ + asr.l #8, %d4 + subq.l #4, %d4 /* correction for (Y - 16) and rounding */ + move.l %d4, %d5 + move.l %d4, %d6 + + /* combine & write first pixel */ + add.l %d1, %d4 /* %d4 = blue */ + add.l %d2, %d5 /* %d5 = green */ + add.l %d3, %d6 /* %d6 = red */ + + move.l %d4, %d0 /* clamping */ + or.l %d5, %d0 + or.l %d6, %d0 + asr.l #6, %d0 + beq.s .yuv_all_ok3 + moveq.l #63, %d0 + cmp.l %d0, %d4 + bls.s .yuv_blue_ok3 + spl.b %d4 + and.l %d0, %d4 +.yuv_blue_ok3: + cmp.l %d0, %d5 + bls.s .yuv_green_ok3 + spl.b %d5 + and.l %d0, %d5 +.yuv_green_ok3: + cmp.l %d0, %d6 + bls.s .yuv_red_ok3 + spl.b %d6 + and.l %d0, %d6 +.yuv_red_ok3: +.yuv_all_ok3: + + lsr.l #1, %d6 /* pack, convert to RGB565 and output */ + lsr.l #1, %d4 + lsl.l #6, %d6 + or.l %d6, %d5 + lsl.l #5, %d5 + or.l %d5, %d4 + move.w %d4, (%a0) + + /* luma for second pixel */ + clr.l %d4 /* load y component */ + move.b (%a1)+, %d4 + moveq.l #74, %d0 + muls.w %d0, %d4 /* %d4 = 36 * Y */ + asr.l #8, %d4 + subq.l #4, %d4 /* correction for (Y - 16) and rounding */ + + /* combine & write second pixel */ + add.l %d4, %d1 /* %d1 = blue */ + add.l %d4, %d2 /* %d2 = green */ + add.l %d4, %d3 /* %d3 = red */ + + move.l %d1, %d0 /* clamping */ + or.l %d2, %d0 + or.l %d3, %d0 + asr.l #6, %d0 + beq.s .yuv_all_ok4 + moveq.l #63, %d0 + cmp.l %d0, %d1 + bls.s .yuv_blue_ok4 + spl.b %d1 + and.l %d0, %d1 +.yuv_blue_ok4: + cmp.l %d0, %d2 + bls.s .yuv_green_ok4 + spl.b %d2 + and.l %d0, %d2 +.yuv_green_ok4: + cmp.l %d0, %d3 + bls.s .yuv_red_ok4 + spl.b %d3 + and.l %d0, %d3 +.yuv_red_ok4: +.yuv_all_ok4: + + lsr.l #1, %d3 /* pack, convert to RGB565 and output */ + lsr.l #1, %d1 + lsl.l #6, %d3 + or.l %d3, %d2 + lsl.l #5, %d2 + or.l %d2, %d1 + move.w %d1, (%a0) + + cmp.l %a1, %a5 /* run %a1 up to end of line */ + bhi.w .yuv_line_loop2 + + movem.l (%sp), %d2-%d6/%a2-%a5 + lea.l (36, %sp), %sp /* restore registers */ + + rts +.lcd_write_yuv420_lines_end: + .size lcd_write_yuv420_lines, .lcd_write_yuv420_lines_end - lcd_write_yuv420_lines -- cgit v1.2.3