From 418169aff8faf2cf90124cd95dba0af821cea73d Mon Sep 17 00:00:00 2001 From: Solomon Peachy Date: Thu, 13 Oct 2022 11:03:53 -0400 Subject: Revert "Remove YUV blitting functions and LCD modes" This reverts commit fe6aa21e9eb88f49005863efd2003d0982920048. Change-Id: I8bb1e5d6c52ed1478002d2140ef494ec5d62b8e3 --- firmware/target/coldfire/iaudio/x5/lcd-as-x5.S | 242 +++++++++++++++++++++++++ 1 file changed, 242 insertions(+) (limited to 'firmware/target/coldfire/iaudio/x5/lcd-as-x5.S') diff --git a/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S b/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S index b319d745ca..e6621e1dea 100644 --- a/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S +++ b/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S @@ -25,6 +25,248 @@ .section .icode,"ax",@progbits +/* begin lcd_write_yuv420_lines + * + * See http://en.wikipedia.org/wiki/YCbCr + * ITU-R BT.601 (formerly CCIR 601): + * |Y'| | 0.299000 0.587000 0.114000| |R| + * |Pb| = |-0.168736 -0.331264 0.500000| |G| or 0.564334*(B - Y') + * |Pr| | 0.500000 -0.418688 0.081312| |B| or 0.713267*(R - Y') + * Scaled, normalized and rounded: + * |Y'| | 65 129 25| |R| + 16 : 16->235 + * |Cb| = |-38 -74 112| |G| + 128 : 16->240 + * |Cr| |112 -94 -18| |B| + 128 : 16->240 + * + * The inverse: + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Scaled, normalized, rounded and tweaked to yield RGB 666: + * |R| |19611723 0 26881894| |Y' - 16| >> 26 + * |G| = |19611723 -6406711 -13692816| |Cb - 128| >> 26 + * |B| |19611723 33976259 0| |Cr - 128| >> 26 + * + * Needs EMAC set to saturated, signed integer mode. + * + * register usage: + * %a0 - LCD data port + * %a1 - Y pointer + * %a2 - C pointer + * %a3 - C width + * %a4 - Y end address + * %a5 - Y factor + * %a6 - BU factor + * %d0 - scratch + * %d1 - B, previous Y \ alternating + * %d2 - U / B, previous Y / + * %d3 - V / G + * %d4 - R / output pixel + * %d5 - GU factor + * %d6 - GV factor + * %d7 - RGB signed -> unsigned conversion mask + */ + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, @function + +lcd_write_yuv420_lines: + lea.l (-44, %sp), %sp /* free up some registers */ + movem.l %d2-%d7/%a2-%a6, (%sp) + + lea.l 0xf0008002, %a0 /* LCD data port */ + movem.l (44+4, %sp), %a1-%a3 /* Y data, C data, C width */ + lea.l (%a1, %a3*2), %a4 /* Y end address */ + + move.l #19611723, %a5 /* y factor */ + move.l #33976259, %a6 /* bu factor */ + move.l #-6406711, %d5 /* gu factor */ + move.l #-13692816, %d6 /* gv factor */ + move.l #0x01040820, %d7 /* bitmask for signed->unsigned conversion + * of R, G and B within RGGB6666 at once */ + + /* chroma for first 2x2 block */ + clr.l %d3 /* load v component */ + move.b (%a2, %a3), %d3 + clr.l %d2 /* load u component */ + move.b (%a2)+, %d2 + moveq.l #-128, %d0 + add.l %d0, %d2 + add.l %d0, %d3 + + mac.l %a6, %d2, %acc0 /* bu */ + mac.l %d5, %d2, %acc1 /* gu */ + mac.l %d6, %d3, %acc1 /* gv */ + move.l #26881894, %d0 /* rv factor */ + mac.l %d0, %d3, %acc2 /* rv */ + + /* luma for very first pixel (top left) */ + clr.l %d1 + move.b (%a1, %a3*2), %d1 + moveq.l #-126, %d0 + add.l %d1, %d0 /* y' (-0.5 ... +0.5) */ + mac.l %a5, %d0, %acc0 + mac.l %a5, %d0, %acc1 + mac.l %a5, %d0, %acc2 + + bra.b .yuv_line_entry + +.yuv_line_loop: + /* chroma for 2x2 pixel block */ + clr.l %d3 /* load v component */ + move.b (%a2, %a3), %d3 + clr.l %d2 /* load u component */ + move.b (%a2)+, %d2 + moveq.l #-128, %d0 + add.l %d0, %d2 + add.l %d0, %d3 + + mac.l %a6, %d2, %acc0 /* bu */ + mac.l %d5, %d2, %acc1 /* gu */ + mac.l %d6, %d3, %acc1 /* gv */ + move.l #26881894, %d0 /* rv factor */ + mac.l %d0, %d3, %acc2 /* rv */ + + /* luma for first pixel (top left) */ + clr.l %d1 + move.b (%a1, %a3*2), %d1 + moveq.l #-126, %d0 + add.l %d1, %d0 /* y' (-0.5 ... +0.5) */ + mac.l %a5, %d0, %acc0 + mac.l %a5, %d0, %acc1 + mac.l %a5, %d0, %acc2 + + move.w %d4, (%a0) + /* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */ + + /* convert to RGB666, pack and output */ +.yuv_line_entry: + moveq.l #26, %d0 + move.l %acc0, %d4 + move.l %acc1, %d3 + move.l %acc2, %d2 + lsr.l %d0, %d4 + lsr.l %d0, %d3 + lsr.l %d0, %d2 + + lsl.l #6, %d2 + or.l %d3, %d2 /* |00000000|00000000|0000Rrrr|rrGggggg| */ + lsl.l #7, %d2 + or.l %d2, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */ + lsl.l #6, %d3 + or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */ + eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */ + swap %d4 + move.w %d4, (%a0) + swap %d4 + + /* luma for second pixel (bottom left) as delta from the first */ + clr.l %d2 + move.b (%a1)+, %d2 + move.l %d2, %d0 + sub.l %d1, %d0 + mac.l %a5, %d0, %acc0 + mac.l %a5, %d0, %acc1 + mac.l %a5, %d0, %acc2 + + move.w %d4, (%a0) + /* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */ + + /* convert to RGB666, pack and output */ + moveq.l #26, %d0 + move.l %acc0, %d4 + move.l %acc1, %d3 + move.l %acc2, %d1 + lsr.l %d0, %d4 + lsr.l %d0, %d3 + lsr.l %d0, %d1 + + lsl.l #6, %d1 + or.l %d3, %d1 /* |00000000|00000000|0000Rrrr|rrGggggg| */ + lsl.l #7, %d1 + or.l %d1, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */ + lsl.l #6, %d3 + or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */ + eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */ + swap %d4 + move.w %d4, (%a0) + swap %d4 + + /* luma for third pixel (top right) as delta from the second */ + clr.l %d1 + move.b (%a1, %a3*2), %d1 + move.l %d1, %d0 + sub.l %d2, %d0 + mac.l %a5, %d0, %acc0 + mac.l %a5, %d0, %acc1 + mac.l %a5, %d0, %acc2 + + move.w %d4, (%a0) + /* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */ + + /* convert to RGB666, pack and output */ + moveq.l #26, %d0 + move.l %acc0, %d4 + move.l %acc1, %d3 + move.l %acc2, %d2 + lsr.l %d0, %d4 + lsr.l %d0, %d3 + lsr.l %d0, %d2 + + lsl.l #6, %d2 + or.l %d3, %d2 /* |00000000|00000000|0000Rrrr|rrGggggg| */ + lsl.l #7, %d2 + or.l %d2, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */ + lsl.l #6, %d3 + or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */ + eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */ + swap %d4 + move.w %d4, (%a0) + swap %d4 + + /* luma for fourth pixel (bottom right) as delta from the thrid */ + clr.l %d2 + move.b (%a1)+, %d2 + move.l %d2, %d0 + sub.l %d1, %d0 + mac.l %a5, %d0, %acc0 + mac.l %a5, %d0, %acc1 + mac.l %a5, %d0, %acc2 + + move.w %d4, (%a0) + /* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */ + + /* convert to RGB666, pack and output */ + moveq.l #26, %d0 + movclr.l %acc0, %d4 + movclr.l %acc1, %d3 + movclr.l %acc2, %d1 + lsr.l %d0, %d4 + lsr.l %d0, %d3 + lsr.l %d0, %d1 + + lsl.l #6, %d1 + or.l %d3, %d1 /* |00000000|00000000|0000Rrrr|rrGggggg| */ + lsl.l #7, %d1 + or.l %d1, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */ + lsl.l #6, %d3 + or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */ + eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */ + swap %d4 + move.w %d4, (%a0) + swap %d4 + + cmp.l %a1, %a4 /* run %a1 up to end of line */ + bhi.w .yuv_line_loop + + move.w %d4, (%a0) /* write (very) last 2nd word */ + + movem.l (%sp), %d2-%d7/%a2-%a6 + lea.l (44, %sp), %sp /* restore registers */ + rts +.yuv_end: + .size lcd_write_yuv420_lines, .yuv_end - lcd_write_yuv420_lines + + /* begin lcd_write_data */ .align 2 .global lcd_write_data -- cgit v1.2.3