From f8709ac7e25386017cee762666c12e2ac6d71a7a Mon Sep 17 00:00:00 2001 From: Michael Sevakis Date: Sat, 26 Jan 2008 14:24:09 +0000 Subject: H10 20GB: Assembly YUV blitting and dithering. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@16171 a1c6a512-1295-4272-9138-f99709370657 --- apps/plugin.c | 3 +- apps/plugin.h | 7 +- apps/plugins/mpegplayer/mpeg_settings.h | 3 +- firmware/SOURCES | 1 + firmware/target/arm/iriver/h10/lcd-as-h10.S | 528 ++++++++++++++++++++++++++ firmware/target/arm/iriver/h10/lcd-h10_20gb.c | 203 +++------- 6 files changed, 595 insertions(+), 150 deletions(-) create mode 100644 firmware/target/arm/iriver/h10/lcd-as-h10.S diff --git a/apps/plugin.c b/apps/plugin.c index 2149fdaee8..1f773cfd2d 100644 --- a/apps/plugin.c +++ b/apps/plugin.c @@ -187,7 +187,8 @@ static const struct plugin_api rockbox_api = { #if defined(HAVE_LCD_COLOR) lcd_yuv_blit, #endif -#if defined(TOSHIBA_GIGABEAT_F) || defined(SANSA_E200) || defined(SANSA_C200) +#if defined(TOSHIBA_GIGABEAT_F) || defined(SANSA_E200) || defined(SANSA_C200) \ + || defined (IRIVER_H10) lcd_yuv_set_options, #endif /* list */ diff --git a/apps/plugin.h b/apps/plugin.h index a291e2b9aa..f91d803854 100644 --- a/apps/plugin.h +++ b/apps/plugin.h @@ -119,12 +119,12 @@ #define PLUGIN_MAGIC 0x526F634B /* RocK */ /* increase this every time the api struct changes */ -#define PLUGIN_API_VERSION 97 +#define PLUGIN_API_VERSION 98 /* update this to latest version if a change to the api struct breaks backwards compatibility (and please take the opportunity to sort in any new function which are "waiting" at the end of the function table) */ -#define PLUGIN_MIN_API_VERSION 97 +#define PLUGIN_MIN_API_VERSION 98 /* plugin return codes */ enum plugin_status { @@ -278,7 +278,8 @@ struct plugin_api { int x, int y, int width, int height); #endif -#if defined(TOSHIBA_GIGABEAT_F) || defined(SANSA_E200) || defined(SANSA_C200) +#if defined(TOSHIBA_GIGABEAT_F) || defined(SANSA_E200) || defined(SANSA_C200) \ + || defined (IRIVER_H10) void (*lcd_yuv_set_options)(unsigned options); #endif diff --git a/apps/plugins/mpegplayer/mpeg_settings.h b/apps/plugins/mpegplayer/mpeg_settings.h index a55b2c33a5..4d6da478ea 100644 --- a/apps/plugins/mpegplayer/mpeg_settings.h +++ b/apps/plugins/mpegplayer/mpeg_settings.h @@ -5,7 +5,8 @@ #define SETTINGS_MIN_VERSION 1 #define SETTINGS_FILENAME "mpegplayer.cfg" -#if defined(TOSHIBA_GIGABEAT_F) || defined(SANSA_E200) || defined(SANSA_C200) +#if defined(TOSHIBA_GIGABEAT_F) || defined(SANSA_E200) || defined(SANSA_C200) \ + || defined (IRIVER_H10) #define MPEG_OPTION_DITHERING_ENABLED 1 #endif diff --git a/firmware/SOURCES b/firmware/SOURCES index bfc34e55c8..eaa4cb32cb 100644 --- a/firmware/SOURCES +++ b/firmware/SOURCES @@ -555,6 +555,7 @@ target/arm/iriver/h10/backlight-h10.c target/arm/iriver/h10/button-h10.c target/arm/iriver/h10/fmradio_i2c-h10.c target/arm/iriver/h10/lcd-h10_20gb.c +target/arm/iriver/h10/lcd-as-h10.S target/arm/iriver/h10/power-h10.c target/arm/iriver/h10/powermgmt-h10.c target/arm/usb-fw-pp502x.c diff --git a/firmware/target/arm/iriver/h10/lcd-as-h10.S b/firmware/target/arm/iriver/h10/lcd-as-h10.S new file mode 100644 index 0000000000..f4f7b9398e --- /dev/null +++ b/firmware/target/arm/iriver/h10/lcd-as-h10.S @@ -0,0 +1,528 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2007-2008 by Michael Sevakis + * + * H10 20GB LCD assembly routines + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "cpu.h" + +/**************************************************************************** + * void lcd_write_yuv_420_lines(unsigned char const * const src[3], + * int width, + * int stride); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function +lcd_write_yuv420_lines: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + stmfd sp!, { r4-r12, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ + mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c + add r0, r0, #0x8a00 @ + mov r14, #LCD2_DATA_MASK @ + @ + sub r2, r2, #1 @ Adjust stride because of increment +10: @ loop line @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right + add r7, r12, r7, asl #5 @ by one less when adding - same for all + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24 + add r10, r10, r10, asl #4 @ + add r10, r10, r8, asl #3 @ + add r10, r10, r8, asl #4 @ + @ + add r11, r9, r9, asl #2 @ r9 = Cr*101 + add r11, r11, r9, asl #5 @ + add r9, r11, r9, asl #6 @ + @ + add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8 + mov r8, r8, asr #2 @ + add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9 + mov r9, r9, asr #9 @ + rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8 + mov r10, r10, asr #8 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11) + orr r3, r3, r7, lsl #5 @ r3 |= (g << 5) + @ + orr r7, r14, r3, lsr #8 @ store pixel + orr r11, r14, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + stmia r0, { r7, r11 } @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11) + orr r3, r3, r7, lsl #5 @ r3 |= (g << 5) + @ + orr r7, r14, r3, lsr #8 @ store pixel + orr r11, r14, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + stmia r0, { r7, r11 } @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + orr r3, r3, r7, lsl #5 @ r3 = b | (g << 5) + orr r3, r3, r11, lsl #11 @ r3 |= (r << 11) + @ + orr r7, r14, r3, lsr #8 @ store pixel + orr r11, r14, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + stmia r0, { r7, r11 } @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11) + orr r3, r3, r7, lsl #5 @ r3 |= (g << 5) + @ + orr r7, r14, r3, lsr #8 @ store pixel + orr r11, r14, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + stmia r0, { r7, r11 } @ + @ + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmfd sp!, { r4-r12, pc } @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines + + +/**************************************************************************** + * void lcd_write_yuv_420_lines_odither(unsigned char const * const src[3], + * int width, + * int stride, + * int x_screen, + * int y_screen); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Red scaled at twice g & b but at same precision to place it in correct + * bit position after multiply and leave instruction count lower. + * |R| |258 0 408| |Y' - 16| + * |G| = |149 -49 -104| |Cb - 128| + * |B| |149 258 0| |Cr - 128| + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + * + * Kernel pattern (raw|use order): + * 5 3 4 2 row0 row2 > down + * 1 7 0 6 | 5 1 3 7 4 0 2 6 col0 left + * 4 2 5 3 | 4 0 2 6 5 1 3 7 col2 \/ + * 0 6 1 7 + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines_odither + .type lcd_write_yuv420_lines_odither, %function +lcd_write_yuv420_lines_odither: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + @ r3 = x_screen + @ [sp] = y_screen + stmfd sp!, { r4-r12, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ + ldr r0, [sp, #40] @ Line up pattern and kernel quadrant + eor r14, r3, r0 @ + and r14, r14, #0x2 @ + mov r14, r14, lsl #6 @ 0x00 or 0x80 + @ + mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c + add r0, r0, #0x8a00 @ + @ + sub r2, r2, #1 @ Adjust stride because of increment +10: @ loop line @ + @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + eor r14, r14, #0x80 @ flip pattern quadrant + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49 + add r10, r10, r8, asl #5 @ + add r10, r10, r9, asl #3 @ + add r10, r10, r9, asl #5 @ + add r10, r10, r9, asl #6 @ + @ + mov r8, r8, asl #1 @ r8 = bu = Cb*258 + add r8, r8, r8, asl #7 @ + @ + add r9, r9, r9, asl #1 @ r9 = rv = Cr*408 + add r9, r9, r9, asl #4 @ + mov r9, r9, asl #3 @ + @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x200 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + mov r11, #LCD2_DATA_MASK @ store pixel + orr r7, r11, r3, lsr #8 @ + orr r11, r11, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + stmia r0, { r7, r11 } @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + @ This element is zero - use r14 @ + @ + add r3, r3, r14 @ b = r3 + delta + add r11, r11, r14, lsl #1 @ r = r11 + delta*2 + add r7, r7, r14, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + mov r11, #LCD2_DATA_MASK @ store pixel + orr r7, r11, r3, lsr #8 @ + orr r11, r11, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + stmia r0, { r7, r11 } @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x100 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + mov r11, #LCD2_DATA_MASK @ store pixel + orr r7, r11, r3, lsr #8 @ + orr r11, r11, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + stmia r0, { r7, r11 } @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x300 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + mov r11, #LCD2_DATA_MASK @ store pixel + orr r7, r11, r3, lsr #8 @ + orr r11, r11, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + stmia r0, { r7, r11 } @ + @ + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmfd sp!, { r4-r12, pc } @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither diff --git a/firmware/target/arm/iriver/h10/lcd-h10_20gb.c b/firmware/target/arm/iriver/h10/lcd-h10_20gb.c index 2c527fa4d9..892adffea9 100644 --- a/firmware/target/arm/iriver/h10/lcd-h10_20gb.c +++ b/firmware/target/arm/iriver/h10/lcd-h10_20gb.c @@ -34,6 +34,8 @@ static unsigned short disp_control_rev; /* Contrast setting << 8 */ static int lcd_contrast; +static unsigned lcd_yuv_options NOCACHEBSS_ATTR = 0; + /* Forward declarations */ static void lcd_display_off(void); @@ -166,8 +168,6 @@ void lcd_set_flip(bool yesno) lcd_write_reg(R_GATE_SCAN_START_POS, yesno ? 0x0002 : 0x0000); /* SM=0, GS=x, SS=x, NL4-0=10011 (G1-G160) */ lcd_write_reg(R_DRV_OUTPUT_CONTROL, yesno ? 0x0213 : 0x0113); - /* HEA7-0=0xxx, HSA7-0=0xxx */ - lcd_write_reg(R_HORIZ_RAM_ADDR_POS, y_offset ? 0x8304 : 0x7f00); } /* LCD init */ @@ -239,8 +239,8 @@ static void lcd_power_on(void) lcd_write_reg(R_1ST_SCR_DRV_POS, 0x9f00); /* SE27-20(End)=0x5c (92), SS27-20(Start)=0x00 */ lcd_write_reg(R_2ND_SCR_DRV_POS, 0x5c00); - /* HEA7-0=0xxx, HSA7-0=0xxx */ - lcd_write_reg(R_HORIZ_RAM_ADDR_POS, y_offset ? 0x8304 : 0x7f00); + /* HEA7-0=7f, HSA7-0=00 */ + lcd_write_reg(R_HORIZ_RAM_ADDR_POS, 0x7f00); /* PKP12-10=0x0, PKP02-00=0x0 */ lcd_write_reg(R_GAMMA_FINE_ADJ_POS1, 0x0003); /* PKP32-30=0x4, PKP22-20=0x0 */ @@ -395,177 +395,91 @@ void lcd_blit(const fb_data* data, int x, int by, int width, (void)stride; } -#define CSUB_X 2 -#define CSUB_Y 2 - -#define RYFAC (31*257) -#define GYFAC (31*257) -#define BYFAC (31*257) -#define RVFAC 11170 /* 31 * 257 * 1.402 */ -#define GVFAC (-5690) /* 31 * 257 * -0.714136 */ -#define GUFAC (-2742) /* 31 * 257 * -0.344136 */ -#define BUFAC 14118 /* 31 * 257 * 1.772 */ +void lcd_yuv_set_options(unsigned options) +{ + lcd_yuv_options = options; +} -#define ROUNDOFFS (127*257) -#define ROUNDOFFSG (63*257) +/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */ +extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + int width, + int stride); +extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + int width, + int stride, + int x_screen, /* To align dither pattern */ + int y_screen); /* Performance function to blit a YUV bitmap directly to the LCD */ void lcd_yuv_blit(unsigned char * const src[3], int src_x, int src_y, int stride, int x, int y, int width, int height) { - int y0, x0, y1, x1; - int ymax; + const unsigned char *yuv_src[3]; + const unsigned char *ysrc_max; + int y0; + int options; if (!display_on) return; - width = (width + 1) & ~1; + width &= ~1; + height &= ~1; /* calculate the drawing region */ - x0 = x; - x1 = x + width - 1; - y0 = y; - y1 = y + height - 1; /* The 20GB LCD is actually 128x160 but rotated 90 degrees so the origin * is actually the bottom left and horizontal and vertical are swapped. * Rockbox expects the origin to be the top left so we need to use * 127 - y instead of just y */ - - /* max horiz << 8 | start horiz */ - lcd_send_cmd(R_HORIZ_RAM_ADDR_POS); - lcd_send_data( (((LCD_HEIGHT-1)-y0+y_offset) << 8) | ((LCD_HEIGHT-1)-y1+y_offset) ); /* max vert << 8 | start vert */ - lcd_send_cmd(R_VERT_RAM_ADDR_POS); - lcd_send_data((x1 << 8) | x0); - - /* position cursor (set AD0-AD15) */ - /* start vert << 8 | start horiz */ - lcd_send_cmd(R_RAM_ADDR_SET); - lcd_send_data( (x0 << 8) | ((LCD_HEIGHT-1)-y0+y_offset) ); - - /* start drawing */ - lcd_send_cmd(R_WRITE_DATA_2_GRAM); + lcd_write_reg(R_VERT_RAM_ADDR_POS, ((x + width - 1) << 8) | x); - ymax = y + height - 1 ; + y0 = LCD_HEIGHT - 1 - y + y_offset; - const int stride_div_csub_x = stride/CSUB_X; + /* DIT=0, BGR=1, HWM=0, I/D1-0=10, AM=0, LG2-0=000 */ + lcd_write_reg(R_ENTRY_MODE, 0x1020); - for (; y <= ymax ; y++) - { - /* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */ - const unsigned char *ysrc = src[0] + stride * src_y + src_x; + yuv_src[0] = src[0] + src_y * stride + src_x; + yuv_src[1] = src[1] + (src_y * stride >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + ysrc_max = yuv_src[0] + height * stride; - const int uvoffset = stride_div_csub_x * (src_y/CSUB_Y) + - (src_x/CSUB_X); + options = lcd_yuv_options; - const unsigned char *usrc = src[1] + uvoffset; - const unsigned char *vsrc = src[2] + uvoffset; - const unsigned char *row_end = ysrc + width; + do + { + /* max horiz << 8 | start horiz */ + lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (y0 << 8) | (y0 - 1)); - int y, u, v; - int red1, green1, blue1; - int red2, green2, blue2; - unsigned rbits, gbits, bbits; + /* position cursor (set AD0-AD15) */ + /* start vert << 8 | start horiz */ + lcd_write_reg(R_RAM_ADDR_SET, (x << 8) | y0); - int rc, gc, bc; + /* start drawing */ + lcd_send_cmd(R_WRITE_DATA_2_GRAM); - do + if (options & LCD_YUV_DITHER) + { + lcd_write_yuv420_lines_odither(yuv_src, width, stride, + x, y); + y -= 2; + } + else { - u = *usrc++ - 128; - v = *vsrc++ - 128; - rc = RVFAC * v + ROUNDOFFS; - gc = GVFAC * v + GUFAC * u + ROUNDOFFSG; - bc = BUFAC * u + ROUNDOFFS; - - /* Pixel 1 */ - y = *ysrc++; - - red1 = RYFAC * y + rc; - green1 = GYFAC * y + gc; - blue1 = BYFAC * y + bc; - - /* Pixel 2 */ - y = *ysrc++; - red2 = RYFAC * y + rc; - green2 = GYFAC * y + gc; - blue2 = BYFAC * y + bc; - - /* Since out of bounds errors are relatively rare, we check two - pixels at once to see if any components are out of bounds, and - then fix whichever is broken. This works due to high values and - negative values both becoming larger than the cutoff when - casted to unsigned. And ORing them together checks all of them - simultaneously. */ - if (((unsigned)(red1 | green1 | blue1 | - red2 | green2 | blue2)) > (RYFAC*255+ROUNDOFFS)) { - if (((unsigned)(red1 | green1 | blue1)) > - (RYFAC*255+ROUNDOFFS)) { - if ((unsigned)red1 > (RYFAC*255+ROUNDOFFS)) - { - if (red1 < 0) - red1 = 0; - else - red1 = (RYFAC*255+ROUNDOFFS); - } - if ((unsigned)green1 > (GYFAC*255+ROUNDOFFSG)) - { - if (green1 < 0) - green1 = 0; - else - green1 = (GYFAC*255+ROUNDOFFSG); - } - if ((unsigned)blue1 > (BYFAC*255+ROUNDOFFS)) - { - if (blue1 < 0) - blue1 = 0; - else - blue1 = (BYFAC*255+ROUNDOFFS); - } - } - - if (((unsigned)(red2 | green2 | blue2)) > - (RYFAC*255+ROUNDOFFS)) { - if ((unsigned)red2 > (RYFAC*255+ROUNDOFFS)) - { - if (red2 < 0) - red2 = 0; - else - red2 = (RYFAC*255+ROUNDOFFS); - } - if ((unsigned)green2 > (GYFAC*255+ROUNDOFFSG)) - { - if (green2 < 0) - green2 = 0; - else - green2 = (GYFAC*255+ROUNDOFFSG); - } - if ((unsigned)blue2 > (BYFAC*255+ROUNDOFFS)) - { - if (blue2 < 0) - blue2 = 0; - else - blue2 = (BYFAC*255+ROUNDOFFS); - } - } - } - - rbits = red1 >> 16 ; - gbits = green1 >> 15 ; - bbits = blue1 >> 16 ; - lcd_send_data((rbits << 11) | (gbits << 5) | bbits); - - rbits = red2 >> 16 ; - gbits = green2 >> 15 ; - bbits = blue2 >> 16 ; - lcd_send_data((rbits << 11) | (gbits << 5) | bbits); + lcd_write_yuv420_lines(yuv_src, width, stride); } - while (ysrc < row_end); - src_y++; + y0 -= 2; + yuv_src[0] += stride << 1; + yuv_src[1] += stride >> 1; + yuv_src[2] += stride >> 1; } + while (yuv_src[0] < ysrc_max); + + /* DIT=0, BGR=1, HWM=0, I/D1-0=10, AM=1, LG2-0=000 */ + lcd_write_reg(R_ENTRY_MODE, 0x1028); } @@ -573,8 +487,7 @@ void lcd_yuv_blit(unsigned char * const src[3], void lcd_update_rect(int x0, int y0, int width, int height) { int x1, y1; - - unsigned short *addr = (unsigned short *)lcd_framebuffer; + unsigned short *addr; if (!display_on) return; -- cgit v1.2.3