From 418169aff8faf2cf90124cd95dba0af821cea73d Mon Sep 17 00:00:00 2001 From: Solomon Peachy Date: Thu, 13 Oct 2022 11:03:53 -0400 Subject: Revert "Remove YUV blitting functions and LCD modes" This reverts commit fe6aa21e9eb88f49005863efd2003d0982920048. Change-Id: I8bb1e5d6c52ed1478002d2140ef494ec5d62b8e3 --- apps/plugin.c | 9 + apps/plugin.h | 11 + apps/plugins/test_fps.c | 91 ++ docs/PLUGIN_API | 19 + firmware/SOURCES | 15 + firmware/asm/arm/lcd-as-memframe.S | 591 ++++++++++++ firmware/asm/lcd-as-memframe.c | 168 ++++ firmware/drivers/lcd-color-common.c | 189 ++++ firmware/drivers/lcd-memframe.c | 98 ++ firmware/export/config/mrobe500.h | 2 +- firmware/export/lcd.h | 11 +- .../target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S | 550 +++++++++++ firmware/target/arm/as3525/lcd-fuze.c | 80 ++ .../target/arm/as3525/sansa-e200v2/lcd-e200v2.c | 98 ++ firmware/target/arm/ipod/lcd-as-color-nano.S | 287 ++++++ firmware/target/arm/ipod/lcd-color_nano.c | 56 ++ firmware/target/arm/ipod/video/lcd-as-video.S | 237 +++++ firmware/target/arm/ipod/video/lcd-video.c | 47 + firmware/target/arm/iriver/h10/lcd-as-h10.S | 538 +++++++++++ firmware/target/arm/iriver/h10/lcd-h10_20gb.c | 90 ++ firmware/target/arm/iriver/h10/lcd-h10_5gb.c | 162 ++++ firmware/target/arm/lcd-c200_c200v2.c | 77 ++ firmware/target/arm/pbell/vibe500/lcd-as-vibe500.S | 556 +++++++++++ firmware/target/arm/pbell/vibe500/lcd-vibe500.c | 75 ++ .../target/arm/philips/hdd1630/lcd-as-hdd1630.S | 570 +++++++++++ firmware/target/arm/philips/hdd1630/lcd-hdd1630.c | 81 ++ .../target/arm/philips/hdd6330/lcd-as-hdd6330.S | 140 +++ firmware/target/arm/philips/hdd6330/lcd-hdd6330.c | 98 ++ firmware/target/arm/philips/sa9200/lcd-as-sa9200.S | 590 ++++++++++++ firmware/target/arm/philips/sa9200/lcd-sa9200.c | 82 ++ firmware/target/arm/rk27xx/ihifi/lcd-ihifi.c | 15 + firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770.c | 15 + firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770c.c | 15 + firmware/target/arm/rk27xx/ihifi2/lcd-ihifi800.c | 15 + firmware/target/arm/rk27xx/lcd-hifiman.c | 19 + firmware/target/arm/rk27xx/ma/lcd-ma.c | 15 + .../arm/rk27xx/rk27generic/lcd-rk27generic.c | 19 + .../target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S | 228 +++++ .../target/arm/s5l8700/ipodnano2g/lcd-nano2g.c | 33 + firmware/target/arm/s5l8700/meizu-m3/lcd-m3.c | 8 + firmware/target/arm/s5l8700/meizu-m6sp/lcd-m6sp.c | 16 + firmware/target/arm/s5l8702/ipod6g/lcd-6g.c | 46 + firmware/target/arm/s5l8702/ipod6g/lcd-asm-6g.S | 1013 ++++++++++++++++++++ firmware/target/arm/samsung/yh820/lcd-as-yh820.S | 550 +++++++++++ firmware/target/arm/samsung/yh820/lcd-yh820.c | 74 ++ firmware/target/arm/samsung/yh925/lcd-as-yh925.S | 538 +++++++++++ firmware/target/arm/samsung/yh925/lcd-yh925.c | 94 ++ .../target/arm/sandisk/sansa-c200/lcd-as-c200.S | 550 +++++++++++ .../target/arm/tms320dm320/mrobe-500/lcd-mr500.c | 86 +- firmware/target/coldfire/iaudio/x5/lcd-as-x5.S | 242 +++++ firmware/target/coldfire/iaudio/x5/lcd-x5.c | 63 ++ firmware/target/coldfire/iriver/h300/lcd-as-h300.S | 246 +++++ firmware/target/coldfire/iriver/h300/lcd-h300.c | 61 ++ firmware/target/mips/ingenic_jz47xx/lcd-jz4740.c | 62 ++ 54 files changed, 9638 insertions(+), 3 deletions(-) create mode 100644 firmware/target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S create mode 100644 firmware/target/arm/ipod/lcd-as-color-nano.S create mode 100644 firmware/target/arm/iriver/h10/lcd-as-h10.S create mode 100644 firmware/target/arm/pbell/vibe500/lcd-as-vibe500.S create mode 100644 firmware/target/arm/philips/hdd1630/lcd-as-hdd1630.S create mode 100644 firmware/target/arm/philips/hdd6330/lcd-as-hdd6330.S create mode 100644 firmware/target/arm/philips/sa9200/lcd-as-sa9200.S create mode 100644 firmware/target/arm/s5l8702/ipod6g/lcd-asm-6g.S create mode 100644 firmware/target/arm/samsung/yh820/lcd-as-yh820.S create mode 100644 firmware/target/arm/samsung/yh925/lcd-as-yh925.S create mode 100644 firmware/target/arm/sandisk/sansa-c200/lcd-as-c200.S create mode 100644 firmware/target/coldfire/iriver/h300/lcd-as-h300.S diff --git a/apps/plugin.c b/apps/plugin.c index 2b3b58a654..888a9e109c 100644 --- a/apps/plugin.c +++ b/apps/plugin.c @@ -222,6 +222,15 @@ static const struct plugin_api rockbox_api = { #if LCD_DEPTH >= 16 lcd_bitmap_transparent_part, lcd_bitmap_transparent, +#if MEMORYSIZE > 2 + lcd_blit_yuv, +#if defined(TOSHIBA_GIGABEAT_F) || defined(SANSA_E200) || defined(SANSA_C200) \ + || defined(IRIVER_H10) || defined(COWON_D2) || defined(PHILIPS_HDD1630) \ + || defined(SANSA_FUZE) || defined(SANSA_E200V2) || defined(SANSA_FUZEV2) \ + || defined(TOSHIBA_GIGABEAT_S) || defined(PHILIPS_SA9200) + lcd_yuv_set_options, +#endif +#endif /* MEMORYSIZE > 2 */ #elif (LCD_DEPTH < 4) && (CONFIG_PLATFORM & PLATFORM_NATIVE) lcd_blit_mono, lcd_blit_grey_phase, diff --git a/apps/plugin.h b/apps/plugin.h index 681200608c..2ac333a19f 100644 --- a/apps/plugin.h +++ b/apps/plugin.h @@ -242,6 +242,17 @@ struct plugin_api { int x, int y, int width, int height); void (*lcd_bitmap_transparent)(const fb_data *src, int x, int y, int width, int height); +#if MEMORYSIZE > 2 + void (*lcd_blit_yuv)(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height); +#if defined(TOSHIBA_GIGABEAT_F) || defined(SANSA_E200) || defined(SANSA_C200) \ + || defined(IRIVER_H10) || defined(COWON_D2) || defined(PHILIPS_HDD1630) \ + || defined(SANSA_FUZE) || defined(SANSA_E200V2) || defined(SANSA_FUZEV2) \ + || defined(TOSHIBA_GIGABEAT_S) || defined(PHILIPS_SA9200) + void (*lcd_yuv_set_options)(unsigned options); +#endif +#endif /* MEMORYSIZE > 2 */ #elif (LCD_DEPTH < 4) && (CONFIG_PLATFORM & PLATFORM_NATIVE) void (*lcd_blit_mono)(const unsigned char *data, int x, int by, int width, int bheight, int stride); diff --git a/apps/plugins/test_fps.c b/apps/plugins/test_fps.c index b2fc957dc1..ddf938ac25 100644 --- a/apps/plugins/test_fps.c +++ b/apps/plugins/test_fps.c @@ -123,6 +123,94 @@ static void time_main_update(void) log_text(str); } +#if defined(HAVE_LCD_COLOR) && (MEMORYSIZE > 2) + +#if LCD_WIDTH >= LCD_HEIGHT +#define YUV_WIDTH LCD_WIDTH +#define YUV_HEIGHT LCD_HEIGHT +#else /* Assume the screen is rotated on portrait LCDs */ +#define YUV_WIDTH LCD_HEIGHT +#define YUV_HEIGHT LCD_WIDTH +#endif + +static unsigned char ydata[YUV_HEIGHT][YUV_WIDTH]; +static unsigned char udata[YUV_HEIGHT/2][YUV_WIDTH/2]; +static unsigned char vdata[YUV_HEIGHT/2][YUV_WIDTH/2]; + +static unsigned char * const yuvbuf[3] = { + (void*)ydata, + (void*)udata, + (void*)vdata +}; + +static void make_gradient_rect(int width, int height) +{ + unsigned char vline[YUV_WIDTH/2]; + int x, y; + + width /= 2; + height /= 2; + + for (x = 0; x < width; x++) + vline[x] = (x << 8) / width; + for (y = 0; y < height; y++) + { + rb->memset(udata[y], (y << 8) / height, width); + rb->memcpy(vdata[y], vline, width); + } +} + +static void time_main_yuv(void) +{ + char str[32]; /* text buffer */ + long time_start; /* start tickcount */ + long time_end; /* end tickcount */ + int frame_count; + int fps; + + const int part14_x = YUV_WIDTH/4; /* x-offset for 1/4 update test */ + const int part14_w = YUV_WIDTH/2; /* x-size for 1/4 update test */ + const int part14_y = YUV_HEIGHT/4; /* y-offset for 1/4 update test */ + const int part14_h = YUV_HEIGHT/2; /* y-size for 1/4 update test */ + + log_text("Main LCD YUV"); + + rb->memset(ydata, 128, sizeof(ydata)); /* medium grey */ + + /* Test 1: full LCD update */ + make_gradient_rect(YUV_WIDTH, YUV_HEIGHT); + + frame_count = 0; + rb->sleep(0); /* sync to tick */ + time_start = *rb->current_tick; + while((time_end = *rb->current_tick) - time_start < DURATION) + { + rb->lcd_blit_yuv(yuvbuf, 0, 0, YUV_WIDTH, + 0, 0, YUV_WIDTH, YUV_HEIGHT); + frame_count++; + } + fps = calc_tenth_fps(frame_count, time_end - time_start); + rb->snprintf(str, sizeof(str), "1/1: %d.%d fps", fps / 10, fps % 10); + log_text(str); + + /* Test 2: quarter LCD update */ + make_gradient_rect(YUV_WIDTH/2, YUV_HEIGHT/2); + + frame_count = 0; + rb->sleep(0); /* sync to tick */ + time_start = *rb->current_tick; + while((time_end = *rb->current_tick) - time_start < DURATION) + { + rb->lcd_blit_yuv(yuvbuf, 0, 0, YUV_WIDTH, + part14_x, part14_y, part14_w, part14_h); + frame_count++; + } + fps = calc_tenth_fps(frame_count, time_end - time_start); + rb->snprintf(str, sizeof(str), "1/4: %d.%d fps", fps / 10, fps % 10); + log_text(str); +} +#endif + #ifdef HAVE_REMOTE_LCD static void time_remote_update(void) { @@ -318,6 +406,9 @@ enum plugin_status plugin_start(const void* parameter) #endif time_main_update(); rb->sleep(HZ); +#if defined(HAVE_LCD_COLOR) && (MEMORYSIZE > 2) + time_main_yuv(); +#endif #if LCD_DEPTH < 4 time_greyscale(); #endif diff --git a/docs/PLUGIN_API b/docs/PLUGIN_API index 0a256ff147..834b3522be 100644 --- a/docs/PLUGIN_API +++ b/docs/PLUGIN_API @@ -832,6 +832,19 @@ void lcd_blit_mono(const unsigned char *data, int x, int by, int width, int bhei \param stride \description +void lcd_blit_yuv(unsigned char * const src[3], int src_x, int src_y, int stride, int x, int y, int width, int height) + \group lcd + \conditions (LCD_DEPTH >= 16) + \param src[3] + \param src_x + \param src_y + \param stride + \param x + \param y + \param width + \param height + \description + void lcd_clear_display(void) \group lcd \description Clears the LCD and the framebuffer @@ -1226,6 +1239,12 @@ void lcd_vline(int x, int y1, int y2) \param y2 Y end coordinate \description Draws a vertical line at (=x=, =y1=) -> (=x=, =y2=) within current drawing mode +void lcd_yuv_set_options(unsigned options) + \group lcd + \conditions (LCD_DEPTH >= 16) && (defined(TOSHIBA_GIGABEAT_F) || defined(SANSA_E200) || defined(SANSA_C200) || defined(IRIVER_H10) || defined(COWON_D2)) + \param options + \description + void led(bool on) \param on \description diff --git a/firmware/SOURCES b/firmware/SOURCES index 9c273246be..abce59fe04 100644 --- a/firmware/SOURCES +++ b/firmware/SOURCES @@ -947,6 +947,7 @@ target/arm/sandisk/sansa-e200/powermgmt-e200.c #endif /* SANSA_E200 */ #ifdef SANSA_C200 +target/arm/sandisk/sansa-c200/lcd-as-c200.S target/arm/sandisk/sansa-c200/button-c200.c target/arm/sandisk/sansa-c200/powermgmt-c200.c #endif /* SANSA_C200 */ @@ -971,6 +972,7 @@ target/arm/philips/piezo.c target/arm/philips/sa9200/backlight-sa9200.c target/arm/philips/sa9200/button-sa9200.c target/arm/philips/sa9200/lcd-sa9200.c +target/arm/philips/sa9200/lcd-as-sa9200.S target/arm/philips/sa9200/power-sa9200.c target/arm/philips/sa9200/powermgmt-sa9200.c #endif /* PHILIPS_SA9200 */ @@ -988,6 +990,7 @@ target/arm/philips/fmradio_i2c-hdd.c target/arm/philips/hdd1630/backlight-hdd1630.c target/arm/philips/hdd1630/button-hdd1630.c target/arm/philips/hdd1630/lcd-hdd1630.c +target/arm/philips/hdd1630/lcd-as-hdd1630.S target/arm/philips/hdd1630/powermgmt-hdd1630.c #endif /* PHILIPS_HDD1630 */ @@ -995,6 +998,7 @@ target/arm/philips/hdd1630/powermgmt-hdd1630.c target/arm/philips/hdd6330/backlight-hdd6330.c target/arm/philips/hdd6330/button-hdd6330.c target/arm/philips/hdd6330/lcd-hdd6330.c +target/arm/philips/hdd6330/lcd-as-hdd6330.S target/arm/philips/hdd6330/powermgmt-hdd6330.c #endif /* PHILIPS_HDD6330 */ @@ -1073,6 +1077,7 @@ target/coldfire/iriver/h300/adc-h300.c target/coldfire/iriver/h300/backlight-h300.c target/coldfire/iriver/h300/button-h300.c target/coldfire/iriver/h300/pcf50606-h300.c +target/coldfire/iriver/h300/lcd-as-h300.S target/coldfire/iriver/h300/lcd-h300.c target/coldfire/iriver/h300/power-h300.c target/coldfire/iriver/h300/powermgmt-h300.c @@ -1105,6 +1110,7 @@ target/arm/iriver/h10/powermgmt-h10.c #ifdef IRIVER_H10 target/arm/iriver/h10/lcd-h10_20gb.c +target/arm/iriver/h10/lcd-as-h10.S #endif /* IRIVER_H10 */ #ifdef IRIVER_H10_5GB @@ -1308,11 +1314,13 @@ target/arm/ipod/button-clickwheel.c #ifdef IPOD_COLOR target/arm/ipod/backlight-4g_color.c target/arm/ipod/button-clickwheel.c +target/arm/ipod/lcd-as-color-nano.S #endif /* IPOD_COLOR */ #ifdef IPOD_NANO target/arm/ipod/backlight-nano_video.c target/arm/ipod/button-clickwheel.c +target/arm/ipod/lcd-as-color-nano.S #endif /* IPOD_NANO */ #ifdef IPOD_VIDEO @@ -1384,6 +1392,7 @@ target/arm/as3525/backlight-e200v2-fuze.c target/arm/as3525/dbop-as3525.c #ifndef BOOTLOADER target/arm/as3525/sansa-e200v2/powermgmt-e200v2.c +target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S #endif /* !BOOTLOADER */ #endif /* SANSA_E200V2 */ @@ -1410,6 +1419,7 @@ target/arm/as3525/backlight-e200v2-fuze.c target/arm/as3525/dbop-as3525.c #ifndef BOOTLOADER target/arm/as3525/sansa-fuze/powermgmt-fuze.c +target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S #endif /* !BOOTLOADER */ #endif /* SANSA_FUZE */ @@ -1420,6 +1430,7 @@ target/arm/as3525/sansa-fuzev2/button-fuzev2.c target/arm/as3525/dbop-as3525.c #ifndef BOOTLOADER target/arm/as3525/sansa-fuzev2/powermgmt-fuzev2.c +target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S #endif /* !BOOTLOADER */ #endif /* SANSA_FUZEV2 */ @@ -1540,6 +1551,7 @@ target/arm/s5l8702/pl080.c target/arm/s5l8702/dma-s5l8702.c target/arm/s5l8702/clocking-s5l8702.c target/arm/s5l8702/ipod6g/lcd-6g.c +target/arm/s5l8702/ipod6g/lcd-asm-6g.S target/arm/s5l8702/ipod6g/piezo-6g.c #if 0 //TODO target/arm/s5l8702/postmortemstub.S @@ -1756,6 +1768,7 @@ target/arm/samsung/power-yh82x_yh92x.c #ifdef SAMSUNG_YH820 target/arm/samsung/yh820/backlight-yh820.c target/arm/samsung/yh820/lcd-yh820.c +target/arm/samsung/yh820/lcd-as-yh820.S target/arm/samsung/yh820/powermgmt-yh820.c #endif /* SAMSUNG_YH820 */ @@ -1770,6 +1783,7 @@ target/arm/samsung/fmradio-yh92x.c #ifdef SAMSUNG_YH925 target/arm/samsung/yh925/backlight-yh925.c target/arm/samsung/yh925/lcd-yh925.c +target/arm/samsung/yh925/lcd-as-yh925.S target/arm/samsung/yh925/powermgmt-yh925.c #endif /* SAMSUNG_YH925 */ @@ -1788,6 +1802,7 @@ target/arm/pbell/vibe500/lcd-vibe500.c target/arm/pbell/vibe500/button-vibe500.c target/arm/pbell/vibe500/power-vibe500.c target/arm/pbell/vibe500/backlight-vibe500.c +target/arm/pbell/vibe500/lcd-as-vibe500.S target/arm/pbell/vibe500/powermgmt-vibe500.c #endif diff --git a/firmware/asm/arm/lcd-as-memframe.S b/firmware/asm/arm/lcd-as-memframe.S index 4bbae6fc0a..52ab0447c2 100644 --- a/firmware/asm/arm/lcd-as-memframe.S +++ b/firmware/asm/arm/lcd-as-memframe.S @@ -99,3 +99,594 @@ lcd_copy_buffer_rect: @ bgt 10b @ copy line @ ldmpc regs=r4-r11 @ restore regs and return .size lcd_copy_buffer_rect, .-lcd_copy_buffer_rect + + +/**************************************************************************** + * void lcd_write_yuv420_lines(fb_data *dst, + * unsigned char const * const src[3], + * int width, + * int stride); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + */ + .section .icode.lcd_write_yuv420_lines, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function +lcd_write_yuv420_lines: + @ r0 = dst + @ r1 = yuv_src + @ r2 = width + @ r3 = stride + stmfd sp!, { r4-r10, lr } @ save non-scratch + ldmia r1, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ r1 = scratch + sub r3, r3, #1 @ +10: @ loop line @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right + add r7, r12, r7, asl #5 @ by one less when adding - same for all + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24 + add r10, r10, r10, asl #4 @ + add r10, r10, r8, asl #3 @ + add r10, r10, r8, asl #4 @ + @ + add lr, r9, r9, asl #2 @ r9 = Cr*101 + add lr, lr, r9, asl #5 @ + add r9, lr, r9, asl #6 @ + @ + add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8 + mov r8, r8, asr #2 @ + add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9 + mov r9, r9, asr #9 @ + rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8 + mov r10, r10, asr #8 @ + @ compute R, G, and B + add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu + add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ +#if ARM_ARCH >= 6 + usat r1, #5, r1 @ clamp b + usat lr, #5, lr @ clamp r + usat r7, #6, r7 @ clamp g +#else + orr r12, r1, lr @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r1, #31 @ clamp b + mvnhi r1, r1, asr #31 @ + andhi r1, r1, #31 @ + cmp lr, #31 @ clamp r + mvnhi lr, lr, asr #31 @ + andhi lr, lr, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ +#endif + @ + ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride) + @ + orr r1, r1, r7, lsl #5 @ r4 |= (g << 5) + orr r1, r1, lr, lsl #11 @ r4 = b | (r << 11) + +#if LCD_WIDTH >= LCD_HEIGHT + strh r1, [r0] @ +#elif LCD_WIDTH < 256 + strh r1, [r0], #LCD_WIDTH @ store pixel +#else + strh r1, [r0] @ +#endif + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu + add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ +#if ARM_ARCH >= 6 + usat r1, #5, r1 @ clamp b + usat lr, #5, lr @ clamp r + usat r7, #6, r7 @ clamp g +#else + orr r12, r1, lr @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r1, #31 @ clamp b + mvnhi r1, r1, asr #31 @ + andhi r1, r1, #31 @ + cmp lr, #31 @ clamp r + mvnhi lr, lr, asr #31 @ + andhi lr, lr, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ +#endif + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + orr r1, r1, lr, lsl #11 @ r1 = b | (r << 11) + orr r1, r1, r7, lsl #5 @ r1 |= (g << 5) + +#if LCD_WIDTH >= LCD_HEIGHT + add r0, r0, #2*LCD_WIDTH @ + strh r1, [r0] @ store pixel + sub r0, r0, #2*LCD_WIDTH @ +#elif LCD_WIDTH < 256 + strh r1, [r0, #-LCD_WIDTH-2] @ store pixel +#else + strh r1, [r0, #-2] @ + add r0, r0, #LCD_WIDTH @ +#endif + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu + add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ +#if ARM_ARCH >= 6 + usat r1, #5, r1 @ clamp b + usat lr, #5, lr @ clamp r + usat r7, #6, r7 @ clamp g +#else + orr r12, r1, lr @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r1, #31 @ clamp b + mvnhi r1, r1, asr #31 @ + andhi r1, r1, #31 @ + cmp lr, #31 @ clamp r + mvnhi lr, lr, asr #31 @ + andhi lr, lr, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ +#endif + @ + ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride) + @ + orr r1, r1, r7, lsl #5 @ r1 = b | (g << 5) + orr r1, r1, lr, lsl #11 @ r1 |= (r << 11) + +#if LCD_WIDTH >= LCD_HEIGHT + strh r1, [r0, #2] +#elif LCD_WIDTH < 256 + strh r1, [r0, #LCD_WIDTH]! @ store pixel +#else + strh r1, [r0] @ +#endif + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu + add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ +#if ARM_ARCH >= 6 + usat r1, #5, r1 @ clamp b + usat lr, #5, lr @ clamp r + usat r7, #6, r7 @ clamp g +#else + orr r12, r1, lr @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r1, #31 @ clamp b + mvnhi r1, r1, asr #31 @ + andhi r1, r1, #31 @ + cmp lr, #31 @ clamp r + mvnhi lr, lr, asr #31 @ + andhi lr, lr, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ +#endif + @ + orr r12, r1, lr, lsl #11 @ r12 = b | (r << 11) + orr r12, r12, r7, lsl #5 @ r12 |= (g << 5) + +#if LCD_WIDTH >= LCD_HEIGHT + add r0, r0, #2*LCD_WIDTH + strh r12, [r0, #2] +#if LCD_WIDTH <= 512 + sub r0, r0, #(2*LCD_WIDTH)-4 +#else + sub r0, r0, #(2*LCD_WIDTH) + add r0, r0, #4 +#endif +#else + strh r12, [r0, #-2] @ store pixel +#if LCD_WIDTH < 256 + add r0, r0, #2*LCD_WIDTH @ +#else + add r0, r0, #LCD_WIDTH @ +#endif +#endif + @ + subs r2, r2, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmpc regs=r4-r10 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines + + +/**************************************************************************** + * void lcd_write_yuv420_lines_odither(fb_data *dst, + * unsigned char const * const src[3], + * int width, + * int stride, + * int x_screen, + * int y_screen); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Red scaled at twice g & b but at same precision to place it in correct + * bit position after multiply and leave instruction count lower. + * |R| |258 0 408| |Y' - 16| + * |G| = |149 -49 -104| |Cb - 128| + * |B| |149 258 0| |Cr - 128| + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + * + * Kernel pattern (raw|rotated|use order): + * 5 3 4 2 2 6 3 7 row0 row2 > down + * 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0 left + * 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2 \/ + * 0 6 1 7 5 1 4 0 + */ + .section .icode.lcd_write_yuv420_lines_odither, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines_odither + .type lcd_write_yuv420_lines_odither, %function +lcd_write_yuv420_lines_odither: + @ r0 = dst + @ r1 = yuv_src + @ r2 = width + @ r3 = stride + @ [sp] = x_screen + @ [sp+4] = y_screen + stmfd sp!, { r4-r11, lr } @ save non-scratch + ldmia r1, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ + sub r3, r3, #1 @ + add r1, sp, #36 @ Line up pattern and kernel quadrant + ldmia r1, { r12, r14 } @ + eor r14, r14, r12 @ + and r14, r14, #0x2 @ + mov r14, r14, lsl #6 @ 0x00 or 0x80 +10: @ loop line @ + @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + eor r14, r14, #0x80 @ flip pattern quadrant + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49 + add r10, r10, r8, asl #5 @ + add r10, r10, r9, asl #3 @ + add r10, r10, r9, asl #5 @ + add r10, r10, r9, asl #6 @ + @ + mov r8, r8, asl #1 @ r8 = bu = Cb*258 + add r8, r8, r8, asl #7 @ + @ + add r9, r9, r9, asl #1 @ r9 = rv = Cr*408 + add r9, r9, r9, asl #4 @ + mov r9, r9, asl #3 @ + @ + @ compute R, G, and B + add r1, r8, r7 @ r1 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r1, r1, lsr #5 @ r1 = 31/32*b + b/256 + add r1, r12, r1, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x100 @ + @ + add r1, r1, r12 @ b = r1 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ +#if ARM_ARCH >= 6 + usat r11, #5, r11, asr #11 @ clamp r + usat r7, #6, r7, asr #9 @ clamp g + usat r1, #5, r1, asr #10 @ clamp b + @ + ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride) + @ + orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11) + orr r1, r1, r7, lsl #5 @ r1 |= (g << 5) +#else + orr r12, r1, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r1, asr #15 @ clamp b + mvnne r1, r12, lsr #15 @ + andne r1, r1, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r1, r11, r1, lsr #10 @ (b >> 10) +#endif + @ +#if LCD_WIDTH >= LCD_HEIGHT + strh r1, [r0] @ +#elif LCD_WIDTH < 256 + strh r1, [r0], #LCD_WIDTH @ store pixel +#else + strh r1, [r0] @ +#endif + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r1, r8, r7 @ r1 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r1, r1, lsr #5 @ r1 = 31/32*b' + b'/256 + add r1, r12, r1, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x200 @ + @ + add r1, r1, r12 @ b = r1 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ +#if ARM_ARCH >= 6 + usat r11, #5, r11, asr #11 @ clamp r + usat r7, #6, r7, asr #9 @ clamp g + usat r1, #5, r1, asr #10 @ clamp b + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11) + orr r1, r1, r7, lsl #5 @ r1 |= (g << 5) +#else + orr r12, r1, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r1, asr #15 @ clamp b + mvnne r1, r12, lsr #15 @ + andne r1, r1, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r1, r11, r1, lsr #10 @ (b >> 10) +#endif + @ +#if LCD_WIDTH >= LCD_HEIGHT + add r0, r0, #2*LCD_WIDTH @ + strh r1, [r0] @ store pixel + sub r0, r0, #2*LCD_WIDTH @ +#elif LCD_WIDTH < 256 + strh r1, [r0, #-LCD_WIDTH-2] @ store pixel +#else + strh r1, [r0, #-2] @ store pixel + add r0, r0, #LCD_WIDTH @ +#endif + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r1, r8, r7 @ r1 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r1, r1, lsr #5 @ r1 = 31/32*b' + b'/256 + add r1, r12, r1, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x300 @ + @ + add r1, r1, r12 @ b = r1 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ +#if ARM_ARCH >= 6 + usat r11, #5, r11, asr #11 @ clamp r + usat r7, #6, r7, asr #9 @ clamp g + usat r1, #5, r1, asr #10 @ clamp b + @ + ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride) + @ + orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11) + orr r1, r1, r7, lsl #5 @ r1 |= (g << 5) +#else + orr r12, r1, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r1, asr #15 @ clamp b + mvnne r1, r12, lsr #15 @ + andne r1, r1, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r1, r11, r1, lsr #10 @ (b >> 10) +#endif + @ +#if LCD_WIDTH >= LCD_HEIGHT + strh r1, [r0, #2] +#elif LCD_WIDTH < 256 + strh r1, [r0, #LCD_WIDTH]! @ store pixel +#else + strh r1, [r0] @ +#endif + + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r1, r8, r7 @ r1 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r1, r1, lsr #5 @ r1 = 31/32*b + b/256 + add r1, r12, r1, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + @ This element is zero - use r14 @ + @ + add r1, r1, r14 @ b = r1 + delta + add r11, r11, r14, lsl #1 @ r = r11 + delta*2 + add r7, r7, r14, lsr #1 @ g = r7 + delta/2 + @ +#if ARM_ARCH >= 6 + usat r11, #5, r11, asr #11 @ clamp r + usat r7, #6, r7, asr #9 @ clamp g + usat r1, #5, r1, asr #10 @ clamp b + @ + orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11) + orr r1, r1, r7, lsl #5 @ r1 |= (g << 5) +#else + orr r12, r1, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r1, asr #15 @ clamp b + mvnne r1, r12, lsr #15 @ + andne r1, r1, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r1, r11, r1, lsr #10 @ (b >> 10) +#endif + @ +#if LCD_WIDTH >= LCD_HEIGHT + add r0, r0, #2*LCD_WIDTH + strh r1, [r0, #2] @ store pixel +#if LCD_WIDTH <= 512 + sub r0, r0, #(2*LCD_WIDTH)-4 +#else + sub r0, r0, #(2*LCD_WIDTH) + add r0, r0, #4 +#endif +#else + strh r1, [r0, #-2] @ store pixel +#if LCD_WIDTH < 256 + add r0, r0, #2*LCD_WIDTH @ +#else + add r0, r0, #LCD_WIDTH @ +#endif +#endif + @ + subs r2, r2, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmpc regs=r4-r11 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither diff --git a/firmware/asm/lcd-as-memframe.c b/firmware/asm/lcd-as-memframe.c index f7f3473fad..fb31fa1953 100644 --- a/firmware/asm/lcd-as-memframe.c +++ b/firmware/asm/lcd-as-memframe.c @@ -9,3 +9,171 @@ void lcd_copy_buffer_rect(fb_data *dst, fb_data *src, int width, int height) dst += LCD_WIDTH; } while (--height); } + +#define YFAC (74) +#define RVFAC (101) +#define GUFAC (-24) +#define GVFAC (-51) +#define BUFAC (128) + +static inline int clamp(int val, int min, int max) +{ + if (val < min) + val = min; + else if (val > max) + val = max; + return val; +} + +extern void lcd_write_yuv420_lines(fb_data *dst, + unsigned char const * const src[3], + int width, + int stride) +{ + /* Draw a partial YUV colour bitmap - similiar behavior to lcd_blit_yuv + in the core */ + const unsigned char *ysrc, *usrc, *vsrc; + fb_data *row_end; + + /* width and height must be >= 2 and an even number */ + width &= ~1; + +#if LCD_WIDTH >= LCD_HEIGHT + row_end = dst + width; +#else + row_end = dst + LCD_WIDTH * width; +#endif + + ysrc = src[0]; + usrc = src[1]; + vsrc = src[2]; + + /* stride => amount to jump from end of last row to start of next */ + stride -= width; + + /* upsampling, YUV->RGB conversion and reduction to RGB in one go */ + + do + { + int y, cb, cr, rv, guv, bu, r, g, b; + + y = YFAC*(*ysrc++ - 16); + cb = *usrc++ - 128; + cr = *vsrc++ - 128; + + rv = RVFAC*cr; + guv = GUFAC*cb + GVFAC*cr; + bu = BUFAC*cb; + + r = y + rv; + g = y + guv; + b = y + bu; + + if ((unsigned)(r | g | b) > 64*256-1) + { + r = clamp(r, 0, 64*256-1); + g = clamp(g, 0, 64*256-1); + b = clamp(b, 0, 64*256-1); + } + + *dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6); + +#if LCD_WIDTH >= LCD_HEIGHT + dst++; +#else + dst += LCD_WIDTH; +#endif + + y = YFAC*(*ysrc++ - 16); + r = y + rv; + g = y + guv; + b = y + bu; + + if ((unsigned)(r | g | b) > 64*256-1) + { + r = clamp(r, 0, 64*256-1); + g = clamp(g, 0, 64*256-1); + b = clamp(b, 0, 64*256-1); + } + + *dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6); + +#if LCD_WIDTH >= LCD_HEIGHT + dst++; +#else + dst += LCD_WIDTH; +#endif + } + while (dst < row_end); + + ysrc += stride; + usrc -= width >> 1; + vsrc -= width >> 1; + +#if LCD_WIDTH >= LCD_HEIGHT + row_end += LCD_WIDTH; + dst += LCD_WIDTH - width; +#else + row_end -= 1; + dst -= LCD_WIDTH*width + 1; +#endif + + do + { + int y, cb, cr, rv, guv, bu, r, g, b; + + y = YFAC*(*ysrc++ - 16); + cb = *usrc++ - 128; + cr = *vsrc++ - 128; + + rv = RVFAC*cr; + guv = GUFAC*cb + GVFAC*cr; + bu = BUFAC*cb; + + r = y + rv; + g = y + guv; + b = y + bu; + + if ((unsigned)(r | g | b) > 64*256-1) + { + r = clamp(r, 0, 64*256-1); + g = clamp(g, 0, 64*256-1); + b = clamp(b, 0, 64*256-1); + } + + *dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6); + +#if LCD_WIDTH >= LCD_HEIGHT + dst++; +#else + dst += LCD_WIDTH; +#endif + + y = YFAC*(*ysrc++ - 16); + r = y + rv; + g = y + guv; + b = y + bu; + + if ((unsigned)(r | g | b) > 64*256-1) + { + r = clamp(r, 0, 64*256-1); + g = clamp(g, 0, 64*256-1); + b = clamp(b, 0, 64*256-1); + } + + *dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6); + +#if LCD_WIDTH >= LCD_HEIGHT + dst++; +#else + dst += LCD_WIDTH; +#endif + } + while (dst < row_end); +} + +void lcd_write_yuv420_lines_odither(fb_data *dst, + unsigned char const * const src[3], + int width, int stride, + int x_screen, int y_screen) +__attribute__((alias("lcd_write_yuv420_lines"))); diff --git a/firmware/drivers/lcd-color-common.c b/firmware/drivers/lcd-color-common.c index cce0bbbf4e..ae0fe519bc 100644 --- a/firmware/drivers/lcd-color-common.c +++ b/firmware/drivers/lcd-color-common.c @@ -220,6 +220,195 @@ static inline int clamp(int val, int min, int max) return val; } +#ifndef _WIN32 +/* + * weak attribute doesn't work for win32 as of gcc 4.6.2 and binutils 2.21.52 + * When building win32 simulators, we won't be using an optimized version of + * lcd_blit_yuv(), so just don't use the weak attribute. + */ +__attribute__((weak)) +#endif +void lcd_yuv_set_options(unsigned options) +{ + (void)options; +} + +/* Draw a partial YUV colour bitmap */ +#ifndef _WIN32 +__attribute__((weak)) +#endif +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + const unsigned char *ysrc, *usrc, *vsrc; + int linecounter; + fb_data *dst, *row_end; + long z; + + /* width and height must be >= 2 and an even number */ + width &= ~1; + linecounter = height >> 1; + +#if LCD_WIDTH >= LCD_HEIGHT + dst = FBADDR(x, y); + row_end = dst + width; +#else + dst = FBADDR(LCD_WIDTH - y - 1, x); + row_end = dst + LCD_WIDTH * width; +#endif + + z = stride * src_y; + ysrc = src[0] + z + src_x; + usrc = src[1] + (z >> 2) + (src_x >> 1); + vsrc = src[2] + (usrc - src[1]); + + /* stride => amount to jump from end of last row to start of next */ + stride -= width; + + /* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */ + + do + { + do + { + int y, cb, cr, rv, guv, bu, r, g, b; + + y = YFAC*(*ysrc++ - 16); + cb = *usrc++ - 128; + cr = *vsrc++ - 128; + + rv = RVFAC*cr; + guv = GUFAC*cb + GVFAC*cr; + bu = BUFAC*cb; + + r = y + rv; + g = y + guv; + b = y + bu; + + if ((unsigned)(r | g | b) > 64*256-1) + { + r = clamp(r, 0, 64*256-1); + g = clamp(g, 0, 64*256-1); + b = clamp(b, 0, 64*256-1); + } + + *dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6); + +#if LCD_WIDTH >= LCD_HEIGHT + dst++; +#else + dst += LCD_WIDTH; +#endif + + y = YFAC*(*ysrc++ - 16); + r = y + rv; + g = y + guv; + b = y + bu; + + if ((unsigned)(r | g | b) > 64*256-1) + { + r = clamp(r, 0, 64*256-1); + g = clamp(g, 0, 64*256-1); + b = clamp(b, 0, 64*256-1); + } + + *dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6); + +#if LCD_WIDTH >= LCD_HEIGHT + dst++; +#else + dst += LCD_WIDTH; +#endif + } + while (dst < row_end); + + ysrc += stride; + usrc -= width >> 1; + vsrc -= width >> 1; + +#if LCD_WIDTH >= LCD_HEIGHT + row_end += LCD_WIDTH; + dst += LCD_WIDTH - width; +#else + row_end -= 1; + dst -= LCD_WIDTH*width + 1; +#endif + + do + { + int y, cb, cr, rv, guv, bu, r, g, b; + + y = YFAC*(*ysrc++ - 16); + cb = *usrc++ - 128; + cr = *vsrc++ - 128; + + rv = RVFAC*cr; + guv = GUFAC*cb + GVFAC*cr; + bu = BUFAC*cb; + + r = y + rv; + g = y + guv; + b = y + bu; + + if ((unsigned)(r | g | b) > 64*256-1) + { + r = clamp(r, 0, 64*256-1); + g = clamp(g, 0, 64*256-1); + b = clamp(b, 0, 64*256-1); + } + + *dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6); + +#if LCD_WIDTH >= LCD_HEIGHT + dst++; +#else + dst += LCD_WIDTH; +#endif + + y = YFAC*(*ysrc++ - 16); + r = y + rv; + g = y + guv; + b = y + bu; + + if ((unsigned)(r | g | b) > 64*256-1) + { + r = clamp(r, 0, 64*256-1); + g = clamp(g, 0, 64*256-1); + b = clamp(b, 0, 64*256-1); + } + + *dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6); + +#if LCD_WIDTH >= LCD_HEIGHT + dst++; +#else + dst += LCD_WIDTH; +#endif + } + while (dst < row_end); + + ysrc += stride; + usrc += stride >> 1; + vsrc += stride >> 1; + +#if LCD_WIDTH >= LCD_HEIGHT + row_end += LCD_WIDTH; + dst += LCD_WIDTH - width; +#else + row_end -= 1; + dst -= LCD_WIDTH*width + 1; +#endif + } + while (--linecounter > 0); + +#if LCD_WIDTH >= LCD_HEIGHT + lcd_update_rect(x, y, width, height); +#else + lcd_update_rect(LCD_WIDTH - y - height, x, height, width); +#endif +} + /* Fill a rectangle with a gradient. This function draws only the partial * gradient. It assumes the original gradient is src_height high and skips * the first few rows. This is useful for drawing only the bottom half of diff --git a/firmware/drivers/lcd-memframe.c b/firmware/drivers/lcd-memframe.c index 357b4af32a..bb1682b074 100644 --- a/firmware/drivers/lcd-memframe.c +++ b/firmware/drivers/lcd-memframe.c @@ -110,3 +110,101 @@ void lcd_update_rect(int x, int y, int width, int height) } } #endif /* LCD_OPTIMIZED_UPDATE_RECT */ + + +/*** YUV functions ***/ +static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0; + + +/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */ +extern void lcd_write_yuv420_lines(fb_data *dst, + unsigned char const * const src[3], + int width, + int stride); +extern void lcd_write_yuv420_lines_odither(fb_data *dst, + unsigned char const * const src[3], + int width, + int stride, + int x_screen, /* To align dither pattern */ + int y_screen); + +void lcd_yuv_set_options(unsigned options) +{ + lcd_yuv_options = options; +} + +#ifndef LCD_OPTIMIZED_BLIT_YUV +/* Performance function to blit a YUV bitmap directly to the LCD + * src_x, src_y, width and height should be even and within the LCD's + * boundaries. + * + * For portrait LCDs, show it rotated counterclockwise by 90 degrees + */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + /* Macrofy the bits that change between orientations */ +#if CONFIG_ORIENTATION == SCREEN_PORTRAIT + #define LCD_FRAMEBUF_ADDR_ORIENTED(col, row) \ + LCD_FRAMEBUF_ADDR(row, col) + #define lcd_write_yuv420_lines_odither_oriented(dst, src, w, s, col, row) \ + lcd_write_yuv420_lines_odither(dst, src, w, s, row, col) + #define YUV_NEXTLINE() dst -= 2 + #define YUV_DITHER_NEXTLINE() dst -= 2, y -= 2 +#else + #define LCD_FRAMEBUF_ADDR_ORIENTED(col, row) \ + LCD_FRAMEBUF_ADDR(col, row) + #define lcd_write_yuv420_lines_odither_oriented(dst, src, w, s, col, row) \ + lcd_write_yuv420_lines_odither(dst, src, w, s, col, row) + #define YUV_NEXTLINE() dst += 2*LCD_FBWIDTH + #define YUV_DITHER_NEXTLINE() dst += 2*LCD_FBWIDTH, y += 2 +#endif + + if (!lcd_write_enabled()) + return; + + /* Sorry, but width and height must be >= 2 or else */ + width &= ~1; + height >>= 1; + +#if CONFIG_ORIENTATION == SCREEN_PORTRAIT + /* Adjust portrait coordinates to make (0, 0) the upper right corner */ + y = LCD_WIDTH - 1 - y; +#endif + + fb_data *dst = LCD_FRAMEBUF_ADDR_ORIENTED(x, y); + int z = stride*src_y; + + unsigned char const * yuv_src[3]; + yuv_src[0] = src[0] + z + src_x; + yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + + if (lcd_yuv_options & LCD_YUV_DITHER) + { + do + { + lcd_write_yuv420_lines_odither_oriented(dst, yuv_src, width, + stride, x, y); + yuv_src[0] += stride << 1; /* Skip down two luma lines */ + yuv_src[1] += stride >> 1; /* Skip down one chroma line */ + yuv_src[2] += stride >> 1; + YUV_DITHER_NEXTLINE(); + } + while (--height > 0); + } + else + { + do + { + lcd_write_yuv420_lines(dst, yuv_src, width, stride); + yuv_src[0] += stride << 1; /* Skip down two luma lines */ + yuv_src[1] += stride >> 1; /* Skip down one chroma line */ + yuv_src[2] += stride >> 1; + YUV_NEXTLINE(); + } + while (--height > 0); + } +} +#endif /* LCD_OPTIMIZED_BLIT_YUV */ diff --git a/firmware/export/config/mrobe500.h b/firmware/export/config/mrobe500.h index 0ecec80e02..621c0b75ca 100644 --- a/firmware/export/config/mrobe500.h +++ b/firmware/export/config/mrobe500.h @@ -220,7 +220,7 @@ #define HAVE_USB_HID_MOUSE /* Define this if hardware supports alternate blitting */ -#define HAVE_LCD_MODES (LCD_MODE_RGB565 | LCD_MODE_PAL256) +#define HAVE_LCD_MODES (LCD_MODE_RGB565 | LCD_MODE_YUV | LCD_MODE_PAL256) #define CONFIG_CPU DM320 diff --git a/firmware/export/lcd.h b/firmware/export/lcd.h index 29c34b698b..67b22190ad 100644 --- a/firmware/export/lcd.h +++ b/firmware/export/lcd.h @@ -145,6 +145,7 @@ struct scrollinfo; #if defined(HAVE_LCD_MODES) void lcd_set_mode(int mode); #define LCD_MODE_RGB565 0x00000001 +#define LCD_MODE_YUV 0x00000002 #define LCD_MODE_PAL256 0x00000004 #if HAVE_LCD_MODES & LCD_MODE_PAL256 @@ -235,7 +236,15 @@ extern bool lcd_putsxy_scroll_func(int x, int y, const unsigned char *string, void *data, int x_offset); /* performance function */ -#if !defined(HAVE_LCD_COLOR) +#if defined(HAVE_LCD_COLOR) +#if MEMORYSIZE > 2 +#define LCD_YUV_DITHER 0x1 + extern void lcd_yuv_set_options(unsigned options); + extern void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height); +#endif /* MEMORYSIZE > 2 */ +#else extern void lcd_blit_mono(const unsigned char *data, int x, int by, int width, int bheight, int stride); extern void lcd_blit_grey_phase(unsigned char *values, unsigned char *phases, diff --git a/firmware/target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S b/firmware/target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S new file mode 100644 index 0000000000..feffe6fb96 --- /dev/null +++ b/firmware/target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S @@ -0,0 +1,550 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2007 by Jens Arnold + * Heavily based on lcd-as-memframe.c by Michael Sevakis + * Adapted for Sansa Fuze/e200v2 by Rafaël Carré + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "cpu.h" + +#define DBOP_BUSY (1<<10) + +/**************************************************************************** + * void lcd_write_yuv420_lines(unsigned char const * const src[3], + * int width, + * int stride); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function +lcd_write_yuv420_lines: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + stmfd sp!, { r4-r11, lr } @ save non-scratch + + mov r3, #0xC8000000 @ + orr r3, r3, #0x120000 @ r3 = DBOP_BASE + + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ r0 = scratch + ldr r12, [r3, #8] @ + sub r2, r2, #1 @ stride -= 1 + orr r12, r12, #3<<13 @ DBOP_CTRL |= (1<<13|1<<14) (32bit mode) +#ifdef SANSA_FUZEV2 + bic r12, r12, #1<<13 @ DBOP_CTRL &= ~(1<<13),still 32bit mode +#endif + str r12, [r3, #8] @ +10: @ loop line @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right + add r7, r12, r7, asl #5 @ by one less when adding - same for all + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24 + add r10, r10, r10, asl #4 @ + add r10, r10, r8, asl #3 @ + add r10, r10, r8, asl #4 @ + @ + add lr, r9, r9, asl #2 @ r9 = Cr*101 + add lr, lr, r9, asl #5 @ + add r9, lr, r9, asl #6 @ + @ + add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8 + mov r8, r8, asr #2 @ + add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9 + mov r9, r9, asr #9 @ + rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8 + mov r10, r10, asr #8 @ + @ compute R, G, and B + add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu + add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r0, lr @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r0, #31 @ clamp b + mvnhi r0, r0, asr #31 @ + andhi r0, r0, #31 @ + cmp lr, #31 @ clamp r + mvnhi lr, lr, asr #31 @ + andhi lr, lr, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b + orr r11, r0, r7, lsl #5 @ r11 = (r << 11) | (g << 5) | b + orr r11, r0, r7, lsl #5 @ r11 = (r << 11) | (g << 5) | b +#ifdef SANSA_FUZEV2 + mov r0, r11, lsr #8 @ + bic r11, r11, #0xff00 @ + orr r11, r0, r11, lsl #8 @ swap bytes +#endif + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu + add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r0, lr @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r0, #31 @ clamp b + mvnhi r0, r0, asr #31 @ + andhi r0, r0, #31 @ + cmp lr, #31 @ clamp r + mvnhi lr, lr, asr #31 @ + andhi lr, lr, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b + orr r0, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b + +#ifdef SANSA_FUZEV2 + mov r7, r0, lsr #8 @ + bic r7, r7, #0xff00 @ + orr r0, r7, r0, lsl #8 @ swap bytes +#endif + + orr r0, r11, r0, lsl#16 @ pack with 2nd pixel + str r0, [r3, #0x10] @ write pixel + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu + add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r0, lr @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r0, #31 @ clamp b + mvnhi r0, r0, asr #31 @ + andhi r0, r0, #31 @ + cmp lr, #31 @ clamp r + mvnhi lr, lr, asr #31 @ + andhi lr, lr, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + @ + orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b + orr r11, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b + +#ifdef SANSA_FUZEV2 + mov r0, r11, lsr #8 @ + bic r11, r11, #0xff00 @ + orr r11, r0, r11, lsl #8 @ swap byte +#endif + + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu + add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r0, lr @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r0, #31 @ clamp b + mvnhi r0, r0, asr #31 @ + andhi r0, r0, #31 @ + cmp lr, #31 @ clamp r + mvnhi lr, lr, asr #31 @ + andhi lr, lr, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b + orr r0, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b + +#ifdef SANSA_FUZEV2 + mov r7, r0, lsr #8 @ + bic r7, r7, #0xff00 @ + orr r0, r7, r0, lsl #8 @ swap bytes +#endif + + orr r0, r11, r0, lsl#16 @ pack with 2nd pixel + str r0, [r3, #0x10] @ write pixel + @ + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ +1: @ busy + @ writing at max 110*32 (LCD_WIDTH/2), the fifo is bigger + @ so polling fifo empty only after each line is save + ldr r7, [r3,#0xc] @ r7 = DBOP_STATUS + tst r7, #DBOP_BUSY @ fifo not empty? + beq 1b @ + + ldmpc regs=r4-r11 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines + +/**************************************************************************** + * void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + * int width, + * int stride, + * int x_screen, + * int y_screen); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Red scaled at twice g & b but at same precision to place it in correct + * bit position after multiply and leave instruction count lower. + * |R| |258 0 408| |Y' - 16| + * |G| = |149 -49 -104| |Cb - 128| + * |B| |149 258 0| |Cr - 128| + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + * + * Kernel pattern (raw|rotated|use order): + * 5 3 4 2 2 6 3 7 row0 row2 > down + * 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0 left + * 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2 \/ + * 0 6 1 7 5 1 4 0 + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines_odither + .type lcd_write_yuv420_lines_odither, %function +lcd_write_yuv420_lines_odither: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + @ r3 = x_screen + @ [sp] = y_screen + stmfd sp!, { r4-r11, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ + ldr r14, [sp, #36] @ Line up pattern and kernel quadrant + sub r2, r2, #1 @ stride =- 1 + eor r14, r14, r3 @ + and r14, r14, #0x2 @ + mov r14, r14, lsl #6 @ 0x00 or 0x80 + + mov r3, #0xC8000000 @ + orr r3, r3, #0x120000 @ r3 = DBOP_BASE, need to be redone + @ due to lack of registers + ldr r12, [r3, #8] @ + orr r12, r12, #3<<13 @ DBOP_CTRL |= (1<<13|1<<14) +#ifdef SANSA_FUZEV2 + bic r12, r12, #1<<13 @ DBOP_CTRL &= ~(1<<13), still 32bit mode +#endif + str r12, [r3, #8] @ (32bit mode) +10: @ loop line @ + @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + eor r14, r14, #0x80 @ flip pattern quadrant + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49 + add r10, r10, r8, asl #5 @ + add r10, r10, r9, asl #3 @ + add r10, r10, r9, asl #5 @ + add r10, r10, r9, asl #6 @ + @ + mov r8, r8, asl #1 @ r8 = bu = Cb*258 + add r8, r8, r8, asl #7 @ + @ + add r9, r9, r9, asl #1 @ r9 = rv = Cr*408 + add r9, r9, r9, asl #4 @ + mov r9, r9, asl #3 @ + @ + @ compute R, G, and B + add r0, r8, r7 @ r0 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256 + add r0, r12, r0, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x100 @ + @ + add r0, r0, r12 @ b = r0 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r0, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r0, asr #15 @ clamp b + mvnne r0, r12, lsr #15 @ + andne r0, r0, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r0, lsr #10 @ (b >> 10) +#ifdef SANSA_FUZEV2 + mov r7, r3, lsr #8 @ + bic r3, r3, #0xff00 @ + orr r3, r7, r3, lsl #8 @ swap pixel +#endif + @ save pixel + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r0, r8, r7 @ r0 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256 + add r0, r12, r0, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x200 @ + @ + add r0, r0, r12 @ b = r0 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r0, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r0, asr #15 @ clamp b + mvnne r0, r12, lsr #15 @ + andne r0, r0, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r0, r11, r0, lsr #10 @ (b >> 10) +#ifdef SANSA_FUZEV2 + mov r7, r0, lsr #8 @ + bic r0, r0, #0xff00 @ + orr r0, r7, r0, lsl #8 @ swap pixel +#endif + orr r3, r3, r0, lsl#16 @ pack with 2nd pixel + mov r0, #0xC8000000 @ + orr r0, r0, #0x120000 @ r3 = DBOP_BASE + + str r3, [r0, #0x10] @ write pixel + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r0, r8, r7 @ r0 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256 + add r0, r12, r0, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x300 @ + @ + add r0, r0, r12 @ b = r0 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r0, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r0, asr #15 @ clamp b + mvnne r0, r12, lsr #15 @ + andne r0, r0, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r0, lsr #10 @ (b >> 10) +#ifdef SANSA_FUZEV2 + mov r7, r3, lsr #8 @ + bic r3, r3, #0xff00 @ + orr r3, r7, r3, lsl #8 @ swap pixel +#endif + @ save pixel + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r0, r8, r7 @ r0 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256 + add r0, r12, r0, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + @ This element is zero - use r14 @ + @ + add r0, r0, r14 @ b = r0 + delta + add r11, r11, r14, lsl #1 @ r = r11 + delta*2 + add r7, r7, r14, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r0, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r0, asr #15 @ clamp b + mvnne r0, r12, lsr #15 @ + andne r0, r0, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r0, r11, r0, lsr #10 @ (b >> 10) +#ifdef SANSA_FUZEV2 + mov r7, r0, lsr #8 @ + bic r0, r0, #0xff00 @ + orr r0, r7, r0, lsl #8 @ swap pixel +#endif + orr r3, r3, r0, lsl#16 @ pack with 2nd pixel + mov r0, #0xC8000000 @ + orr r0, r0, #0x120000 @ r3 = DBOP_BASE + + str r3, [r0, #0x10] @ write pixel + @ + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ +1: @ busy @ + @ writing at max 110*32 (LCD_WIDTH/2), the fifo is bigger (128*32) + @ so polling fifo empty only after each line is save + ldr r7, [r0,#0xc] @ r7 = DBOP_STATUS + tst r7, #DBOP_BUSY @ fifo not empty? + beq 1b @ + + ldmpc regs=r4-r11 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither diff --git a/firmware/target/arm/as3525/lcd-fuze.c b/firmware/target/arm/as3525/lcd-fuze.c index b1f62a1c95..a1ccea348d 100644 --- a/firmware/target/arm/as3525/lcd-fuze.c +++ b/firmware/target/arm/as3525/lcd-fuze.c @@ -197,6 +197,86 @@ static void lcd_window_y(int ymin, int ymax) lcd_write_reg(R_RAM_ADDR_SET, ymin); } +static unsigned lcd_yuv_options = 0; + +void lcd_yuv_set_options(unsigned options) +{ + lcd_yuv_options = options; +} + + +#ifndef BOOTLOADER +/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */ +extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + int width, + int stride); +extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + int width, + int stride, + int x_screen, /* To align dither pattern */ + int y_screen); + +/* Performance function to blit a YUV bitmap directly to the LCD + * src_x, src_y, width and height should be even + * x, y, width and height have to be within LCD bounds + */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + unsigned char const * yuv_src[3]; + off_t z; + + /* Sorry, but width and height must be >= 2 or else */ + width &= ~1; + height >>= 1; + + z = stride*src_y; + yuv_src[0] = src[0] + z + src_x; + yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + + lcd_write_reg(R_ENTRY_MODE, R_ENTRY_MODE_VIDEO); + + lcd_window_x(x, x + width - 1); + + if (lcd_yuv_options & LCD_YUV_DITHER) + { + do + { + lcd_window_y(y, y + 1); + + lcd_write_cmd(R_WRITE_DATA_2_GRAM); + + lcd_write_yuv420_lines_odither(yuv_src, width, stride, x, y); + yuv_src[0] += stride << 1; /* Skip down two luma lines */ + yuv_src[1] += stride >> 1; /* Skip down one chroma line */ + yuv_src[2] += stride >> 1; + y += 2; + } + while (--height > 0); + } + else + { + do + { + lcd_window_y(y, y + 1); + + lcd_write_cmd(R_WRITE_DATA_2_GRAM); + + lcd_write_yuv420_lines(yuv_src, width, stride); + yuv_src[0] += stride << 1; /* Skip down two luma lines */ + yuv_src[1] += stride >> 1; /* Skip down one chroma line */ + yuv_src[2] += stride >> 1; + y += 2; + } + while (--height > 0); + } +} + +#endif + + /* Update the display. This must be called after all other LCD functions that change the display. */ void lcd_update(void) diff --git a/firmware/target/arm/as3525/sansa-e200v2/lcd-e200v2.c b/firmware/target/arm/as3525/sansa-e200v2/lcd-e200v2.c index 141340c003..f69ad48793 100644 --- a/firmware/target/arm/as3525/sansa-e200v2/lcd-e200v2.c +++ b/firmware/target/arm/as3525/sansa-e200v2/lcd-e200v2.c @@ -336,6 +336,104 @@ bool lcd_active(void) /*** update functions ***/ +static unsigned lcd_yuv_options = 0; + +void lcd_yuv_set_options(unsigned options) +{ + lcd_yuv_options = options; +} + + +#ifndef BOOTLOADER +static void lcd_window_blit(int xmin, int ymin, int xmax, int ymax) +{ + if (!display_flipped) + { + lcd_write_reg(R_HORIZ_RAM_ADDR_POS, + ((LCD_WIDTH-1 - xmin) << 8) | (LCD_WIDTH-1 - xmax)); + lcd_write_reg(R_VERT_RAM_ADDR_POS, (ymax << 8) | ymin); + lcd_write_reg(R_RAM_ADDR_SET, + (ymin << 8) | (LCD_WIDTH-1 - xmin)); + } + else + { + lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (xmax << 8) | xmin); + lcd_write_reg(R_VERT_RAM_ADDR_POS, (ymax << 8) | ymin); + lcd_write_reg(R_RAM_ADDR_SET, (ymax << 8) | xmin); + } +} + +/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */ +extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + int width, + int stride); +extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + int width, + int stride, + int x_screen, /* To align dither pattern */ + int y_screen); + +/* Performance function to blit a YUV bitmap directly to the LCD + * src_x, src_y, width and height should be even + * x, y, width and height have to be within LCD bounds + */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + unsigned char const * yuv_src[3]; + off_t z; + + /* Sorry, but width and height must be >= 2 or else */ + width &= ~1; + height >>= 1; + + z = stride*src_y; + yuv_src[0] = src[0] + z + src_x; + yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + + lcd_write_reg(R_ENTRY_MODE, + display_flipped ? R_ENTRY_MODE_VIDEO_FLIPPED : R_ENTRY_MODE_VIDEO_NORMAL + ); + + if (lcd_yuv_options & LCD_YUV_DITHER) + { + do + { + lcd_window_blit(y, x, y+1, x+width-1); + + lcd_write_cmd(R_WRITE_DATA_2_GRAM); + + lcd_write_yuv420_lines_odither(yuv_src, width, stride, x, y); + yuv_src[0] += stride << 1; /* Skip down two luma lines */ + yuv_src[1] += stride >> 1; /* Skip down one chroma line */ + yuv_src[2] += stride >> 1; + y += 2; + } + while (--height > 0); + } + else + { + do + { + lcd_window_blit(y, x, y+1, x+width-1); + + lcd_write_cmd(R_WRITE_DATA_2_GRAM); + + lcd_write_yuv420_lines(yuv_src, width, stride); + yuv_src[0] += stride << 1; /* Skip down two luma lines */ + yuv_src[1] += stride >> 1; /* Skip down one chroma line */ + yuv_src[2] += stride >> 1; + y += 2; + } + while (--height > 0); + } +} + +#endif + + /* Update the display. This must be called after all other LCD functions that change the display. */ void lcd_update(void) diff --git a/firmware/target/arm/ipod/lcd-as-color-nano.S b/firmware/target/arm/ipod/lcd-as-color-nano.S new file mode 100644 index 0000000000..f6f9cc5be3 --- /dev/null +++ b/firmware/target/arm/ipod/lcd-as-color-nano.S @@ -0,0 +1,287 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id:$ + * + * Copyright (C) 2010-2011 by Andree Buschmann + * + * Generic asm helper function used by YUV blitting. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "cpu.h" + +/**************************************************************************** + * #define FORCE_FIFO_WAIT + * + * This is not needed in YUV blitting when the LCD IF is fast enough. In this + * case YUV-to-RGB conversion per pixel needs longer than the transfer of a + * pixel via the LCD IF. + ****************************************************************************/ + +#include "config.h" + +/* Set FIFO wait for both iPod Color and iPod nano1G until we know for which + * devices we can switch this off. */ +#define FORCE_FIFO_WAIT + + .section .icode, "ax", %progbits + +/**************************************************************************** + * extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + * const unsigned LCD_BASE, + * int width, + * int stride); + * + * Conversion from Motion JPEG and MPEG Y'PbPr to RGB is: + * |R| |1.164 0.000 1.596| |Y' - 16| + * |G| = |1.164 -0.391 -0.813| |Pb - 128| + * |B| |1.164 2.018 0.000| |Pr - 128| + * + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + * + * Converts two lines from YUV to RGB565 and writes to LCD at once. First loop + * loads Cb/Cr, calculates the chroma offset and saves them to buffer. Within + * the second loop these chroma offset are reloaded from buffer. Within each + * loop two pixels are calculated and written to LCD. + */ + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function +lcd_write_yuv420_lines: + /* r0 = src = yuv_src */ + /* r1 = dst = LCD_BASE */ + /* r2 = width */ + /* r3 = stride */ + stmfd sp!, { r4-r10, lr } /* save non-scratch */ + ldmia r0, { r9, r10, r12 } /* r9 = yuv_src[0] = Y'_p */ + /* r10 = yuv_src[1] = Cb_p */ + /* r12 = yuv_src[2] = Cr_p */ + add r3, r9, r3 /* r3 = &ysrc[stride] */ + add r4, r2, r2, asr #1 /* chroma buffer lenght = width/2 *3 */ + mov r4, r4, asl #2 /* use words for str/ldm possibility */ + add r4, r4, #19 /* plus room for 4 additional words, */ + bic r4, r4, #3 /* rounded up to multiples of 4 byte */ + sub sp, sp, r4 /* and allocate on stack */ + stmia sp, {r1-r4} /* LCD_BASE, width, &ysrc[stride], stack_alloc */ + + mov r7, r2 /* r7 = loop count */ + add r8, sp, #16 /* chroma buffer */ + add lr, r1, #0x100 /* LCD data port = LCD2_BASE + 0x100 */ + + /* 1st loop start */ +10: /* loop start */ + + ldrb r0, [r10], #1 /* r0 = *usrc++ = *Cb_p++ */ + ldrb r1, [r12], #1 /* r1 = *vsrc++ = *Cr_p++ */ + + sub r0, r0, #128 /* r0 = Cb-128 */ + sub r1, r1, #128 /* r1 = Cr-128 */ + + add r2, r1, r1, asl #1 /* r2 = Cr*51 + Cb*24 */ + add r2, r2, r2, asl #4 + add r2, r2, r0, asl #3 + add r2, r2, r0, asl #4 + + add r4, r1, r1, asl #2 /* r1 = Cr*101 */ + add r4, r4, r1, asl #5 + add r1, r4, r1, asl #6 + + add r1, r1, #256 /* r1 = rv = (r1 + 256) >> 9 */ + mov r1, r1, asr #9 + rsb r2, r2, #128 /* r2 = guv = (-r2 + 128) >> 8 */ + mov r2, r2, asr #8 + add r0, r0, #2 /* r0 = bu = (Cb*128 + 256) >> 9 */ + mov r0, r0, asr #2 + stmia r8!, {r0-r2} /* store r0, r1 and r2 to chroma buffer */ + + /* 1st loop, first pixel */ + ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ + sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ + add r3, r5, r5, asl #2 + add r5, r3, r5, asl #5 + + add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */ + + orr r5, r6, r4 /* check if clamping is needed... */ + orr r5, r5, r3, asr #1 /* ...at all */ + cmp r5, #31 + bls 15f /* -> no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r3, #63 /* clamp g */ + mvnhi r3, r3, asr #31 + andhi r3, r3, #63 + cmp r4, #31 /* clamp b */ + mvnhi r4, r4, asr #31 + andhi r4, r4, #31 +15: /* no clamp */ + + /* calculate pixel_1 and save to r4 for later pixel packing */ + orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */ + orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */ + + /* 1st loop, second pixel */ + ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ + sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ + add r3, r5, r5, asl #2 + add r5, r3, r5, asl #5 + + add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */ + + orr r0, r6, r5 /* check if clamping is needed... */ + orr r0, r0, r3, asr #1 /* ...at all */ + cmp r0, #31 + bls 15f /* -> no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r3, #63 /* clamp g */ + mvnhi r3, r3, asr #31 + andhi r3, r3, #63 + cmp r5, #31 /* clamp b */ + mvnhi r5, r5, asr #31 + andhi r5, r5, #31 +15: /* no clamp */ + + /* calculate pixel_2 and pack with pixel_1 before writing */ + orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ + orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */ +#ifdef FORCE_FIFO_WAIT + /* wait for FIFO half full */ +.fifo_wait1: + ldr r3, [lr, #-0xE0] /* while !(LCD2_BLOCK_CTRL & 0x1000000); */ + tst r3, #0x1000000 + beq .fifo_wait1 +#endif + + mov r3, r4, lsl #8 /* swap pixel_1 */ + and r3, r3, #0xff00 + add r4, r3, r4, lsr #8 + + orr r4, r4, r5, lsl #24 /* swap pixel_2 and pack with pixel_1 */ + mov r5, r5, lsr #8 + orr r4, r4, r5, lsl #16 + + str r4, [lr] /* write pixel_1 and pixel_2 */ + + subs r7, r7, #2 /* check for loop end */ + bgt 10b /* back to beginning */ + /* 1st loop end */ + + /* Reload several registers for pointer rewinding for next loop */ + add r8, sp, #16 /* chroma buffer */ + ldmia sp, { r1, r7, r9} /* r1 = LCD_BASE */ + /* r7 = loop count */ + /* r9 = &ysrc[stride] */ + + /* 2nd loop start */ +20: /* loop start */ + /* restore r0 (bu), r1 (rv) and r2 (guv) from chroma buffer */ + ldmia r8!, {r0-r2} + + /* 2nd loop, first pixel */ + ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ + sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ + add r3, r5, r5, asl #2 + add r5, r3, r5, asl #5 + + add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */ + + orr r5, r6, r4 /* check if clamping is needed... */ + orr r5, r5, r3, asr #1 /* ...at all */ + cmp r5, #31 + bls 15f /* -> no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r3, #63 /* clamp g */ + mvnhi r3, r3, asr #31 + andhi r3, r3, #63 + cmp r4, #31 /* clamp b */ + mvnhi r4, r4, asr #31 + andhi r4, r4, #31 +15: /* no clamp */ + /* calculate pixel_1 and save to r4 for later pixel packing */ + orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */ + orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */ + + /* 2nd loop, second pixel */ + ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ + sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ + add r3, r5, r5, asl #2 + add r5, r3, r5, asl #5 + + add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */ + + orr r0, r6, r5 /* check if clamping is needed... */ + orr r0, r0, r3, asr #1 /* ...at all */ + cmp r0, #31 + bls 15f /* -> no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r3, #63 /* clamp g */ + mvnhi r3, r3, asr #31 + andhi r3, r3, #63 + cmp r5, #31 /* clamp b */ + mvnhi r5, r5, asr #31 + andhi r5, r5, #31 +15: /* no clamp */ + + /* calculate pixel_2 and pack with pixel_1 before writing */ + orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ + orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */ +#ifdef FORCE_FIFO_WAIT + /* wait for FIFO half full */ +.fifo_wait2: + ldr r3, [lr, #-0xE0] /* while !(LCD2_BLOCK_CTRL & 0x1000000); */ + tst r3, #0x1000000 + beq .fifo_wait2 +#endif + + mov r3, r4, lsl #8 /* swap pixel_1 */ + and r3, r3, #0xff00 + add r4, r3, r4, lsr #8 + + orr r4, r4, r5, lsl #24 /* swap pixel_2 and pack with pixel_1 */ + mov r5, r5, lsr #8 + orr r4, r4, r5, lsl #16 + + str r4, [lr] /* write pixel_1 and pixel_2 */ + + subs r7, r7, #2 /* check for loop end */ + bgt 20b /* back to beginning */ + /* 2nd loop end */ + + ldr r3, [sp, #12] + add sp, sp, r3 /* deallocate buffer */ + ldmpc regs=r4-r10 /* restore registers */ + + .ltorg + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines diff --git a/firmware/target/arm/ipod/lcd-color_nano.c b/firmware/target/arm/ipod/lcd-color_nano.c index 71ae22cb23..67d26aa862 100644 --- a/firmware/target/arm/ipod/lcd-color_nano.c +++ b/firmware/target/arm/ipod/lcd-color_nano.c @@ -202,6 +202,62 @@ static void lcd_setup_drawing_region(int x, int y, int width, int height) } } +/* Line write helper function for lcd_yuv_blit. Writes two lines of yuv420. */ +extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + const unsigned int lcd_baseadress, + int width, + int stride); + +/* Performance function to blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + int z; + unsigned char const * yuv_src[3]; + + width = (width + 1) & ~1; /* ensure width is even */ + height = (height + 1) & ~1; /* ensure height is even */ + + lcd_setup_drawing_region(x, y, width, height); + + z = stride * src_y; + yuv_src[0] = src[0] + z + src_x; + yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + + while (height > 0) { + int r, h, pixels_to_write; + + pixels_to_write = (width * height) * 2; + h = height; + + /* calculate how much we can do in one go */ + if (pixels_to_write > 0x10000) { + h = ((0x10000/2) / width) & ~1; /* ensure h is even */ + pixels_to_write = (width * h) * 2; + } + + LCD2_BLOCK_CTRL = 0x10000080; + LCD2_BLOCK_CONFIG = 0xc0010000 | (pixels_to_write - 1); + LCD2_BLOCK_CTRL = 0x34000000; + + r = h>>1; /* lcd_write_yuv420_lines writes two lines at once */ + do { + lcd_write_yuv420_lines(yuv_src, LCD2_BASE, width, stride); + yuv_src[0] += stride << 1; + yuv_src[1] += stride >> 1; + yuv_src[2] += stride >> 1; + } while (--r > 0); + + /* transfer of pixels_to_write bytes finished */ + while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY)); + LCD2_BLOCK_CONFIG = 0; + + height -= h; + } +} + /* Helper function writes 'count' consecutive pixels from src to LCD IF */ static void lcd_write_line(int count, unsigned long *src) { diff --git a/firmware/target/arm/ipod/video/lcd-as-video.S b/firmware/target/arm/ipod/video/lcd-as-video.S index 1b982c75ce..47155b8c75 100644 --- a/firmware/target/arm/ipod/video/lcd-as-video.S +++ b/firmware/target/arm/ipod/video/lcd-as-video.S @@ -63,3 +63,240 @@ lcd_write_data: /* r1 = pixel count, must be even */ strne r3, [lr] ldmpc regs=r4 + +/**************************************************************************** + * extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + * unsigned bcmaddr + * int width, + * int stride); + * + * Conversion from Motion JPEG and MPEG Y'PbPr to RGB is: + * |R| |1.164 0.000 1.596| |Y' - 16| + * |G| = |1.164 -0.391 -0.813| |Pb - 128| + * |B| |1.164 2.018 0.000| |Pr - 128| + * + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + * + * Converts two lines from YUV to RGB565 and writes to BCM at once. First loop + * loads Cb/Cr, calculates the chroma offset and saves them to buffer. Within + * the second loop these chroma offset are reloaded from buffer. + * Within each loop two pixels are calculated and written to BCM. Before each + * loop the desired destination address is transmitted to BCM. + */ + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function +lcd_write_yuv420_lines: + /* r0 = src = yuv_src */ + /* r1 = dst = bcmaddr */ + /* r2 = width */ + /* r3 = stride */ + stmfd sp!, { r4-r10, lr } /* save non-scratch */ + ldmia r0, { r9, r10, r12 } /* r9 = yuv_src[0] = Y'_p */ + /* r10 = yuv_src[1] = Cb_p */ + /* r12 = yuv_src[2] = Cr_p */ + add r3, r9, r3 /* r3 = &ysrc[stride] */ + add r4, r2, r2, asr #1 /* chroma buffer lenght = width/2 *3 */ + mov r4, r4, asl #2 /* use words for str/ldm possibility */ + add r4, r4, #19 /* plus room for 4 additional words, */ + bic r4, r4, #3 /* rounded up to multiples of 4 byte */ + sub sp, sp, r4 /* and allocate on stack */ + stmia sp, {r1-r4} /* bcmaddr, width, &ysrc[stride], stack_alloc */ + + mov r7, r2 /* r7 = loop count */ + add r8, sp, #16 /* chroma buffer */ + mov lr, #0x30000000 /* LCD data port */ + + /* The following writes dest address to BCM and waits for write ready */ + orr r2, lr, #0x00010000 /* r2 = BCM_WR_ADDR32 */ + orr r6, lr, #0x00030000 /* r6 = BCM_CONTROL */ + str r1, [r2] /* BCM_WR_ADDR32 = bcmaddr */ +.busy_1: + ldrh r1, [r6] /* while (!(BCM_CONTROL & 0x2)) */ + tst r1, #0x2 + beq .busy_1 + + /* 1st loop start */ +10: /* loop start */ + + ldrb r0, [r10], #1 /* r0 = *usrc++ = *Cb_p++ */ + ldrb r1, [r12], #1 /* r1 = *vsrc++ = *Cr_p++ */ + + sub r0, r0, #128 /* r0 = Cb-128 */ + sub r1, r1, #128 /* r1 = Cr-128 */ + + add r2, r1, r1, asl #1 /* r2 = Cr*51 + Cb*24 */ + add r2, r2, r2, asl #4 + add r2, r2, r0, asl #3 + add r2, r2, r0, asl #4 + + add r4, r1, r1, asl #2 /* r1 = Cr*101 */ + add r4, r4, r1, asl #5 + add r1, r4, r1, asl #6 + + add r1, r1, #256 /* r1 = rv = (r1 + 256) >> 9 */ + mov r1, r1, asr #9 + rsb r2, r2, #128 /* r2 = guv = (-r2 + 128) >> 8 */ + mov r2, r2, asr #8 + add r0, r0, #2 /* r0 = bu = (Cb*128 + 256) >> 9 */ + mov r0, r0, asr #2 + stmia r8!, {r0-r2} /* store r0, r1 and r2 to chroma buffer */ + + /* 1st loop, first pixel */ + ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ + sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ + add r3, r5, r5, asl #2 + add r5, r3, r5, asl #5 + + add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */ + + orr r5, r6, r4 /* check if clamping is needed... */ + orr r5, r5, r3, asr #1 /* ...at all */ + cmp r5, #31 + bls 15f /* -> no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r3, #63 /* clamp g */ + mvnhi r3, r3, asr #31 + andhi r3, r3, #63 + cmp r4, #31 /* clamp b */ + mvnhi r4, r4, asr #31 + andhi r4, r4, #31 +15: /* no clamp */ + + /* calculate pixel_1 and save to r5 for later pixel packing */ + orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */ + orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */ + + /* 1st loop, second pixel */ + ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */ + sub r4, r4, #16 /* r4 = (Y'-16) * 74 */ + add r3, r4, r4, asl #2 + add r4, r3, r4, asl #5 + + add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r4, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */ + + orr r0, r6, r4 /* check if clamping is needed... */ + orr r0, r0, r3, asr #1 /* ...at all */ + cmp r0, #31 + bls 15f /* -> no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r3, #63 /* clamp g */ + mvnhi r3, r3, asr #31 + andhi r3, r3, #63 + cmp r4, #31 /* clamp b */ + mvnhi r4, r4, asr #31 + andhi r4, r4, #31 +15: /* no clamp */ + + /* calculate pixel_2 and pack with pixel_1 before writing */ + orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ + orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */ + orr r4, r5, r4, lsl #16 /* r4 = pixel_2<<16 | pixel_1 */ + str r4, [lr] /* write packed pixels */ + + subs r7, r7, #2 /* check for loop end */ + bgt 10b /* back to beginning */ + /* 1st loop end */ + + /* Reload several registers for pointer rewinding for next loop */ + add r8, sp, #16 /* chroma buffer */ + ldmia sp, { r1, r7, r9} /* r1 = bcmaddr */ + /* r7 = loop count */ + /* r9 = &ysrc[stride] */ + + /* The following writes dest address to BCM and waits for write ready */ + orr r2, lr, #0x00010000 /* r2 = BCM_WR_ADDR32 */ + orr r6, lr, #0x00030000 /* r6 = BCM_CONTROL */ + add r1, r1, #640 /* dst += (LCD_WIDTH*2) */ + str r1, [r2] /* BCM_WR_ADDR32 = dst */ +.busy_2: + ldrh r1, [r6] /* while (!(BCM_CONTROL & 0x2)) */ + tst r1, #0x2 + beq .busy_2 + + + /* 2nd loop start */ +20: /* loop start */ + /* restore r0 (bu), r1 (rv) and r2 (guv) from chroma buffer */ + ldmia r8!, {r0-r2} + + /* 2nd loop, first pixel */ + ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ + sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ + add r3, r5, r5, asl #2 + add r5, r3, r5, asl #5 + + add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */ + + orr r5, r6, r4 /* check if clamping is needed... */ + orr r5, r5, r3, asr #1 /* ...at all */ + cmp r5, #31 + bls 15f /* -> no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r3, #63 /* clamp g */ + mvnhi r3, r3, asr #31 + andhi r3, r3, #63 + cmp r4, #31 /* clamp b */ + mvnhi r4, r4, asr #31 + andhi r4, r4, #31 +15: /* no clamp */ + /* calculate pixel_1 and save to r5 for later pixel packing */ + orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */ + orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */ + + /* 2nd loop, second pixel */ + ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */ + sub r4, r4, #16 /* r4 = (Y'-16) * 74 */ + add r3, r4, r4, asl #2 + add r4, r3, r4, asl #5 + + add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r4, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */ + + orr r0, r6, r4 /* check if clamping is needed... */ + orr r0, r0, r3, asr #1 /* ...at all */ + cmp r0, #31 + bls 15f /* -> no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r3, #63 /* clamp g */ + mvnhi r3, r3, asr #31 + andhi r3, r3, #63 + cmp r4, #31 /* clamp b */ + mvnhi r4, r4, asr #31 + andhi r4, r4, #31 +15: /* no clamp */ + + /* calculate pixel_2 and pack with pixel_1 before writing */ + orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ + orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */ + orr r4, r5, r4, lsl #16 /* r4 = pixel_2<<16 | pixel_1 */ + str r4, [lr] /* write packed pixels */ + + subs r7, r7, #2 /* check for loop end */ + bgt 20b /* back to beginning */ + /* 2nd loop end */ + + ldr r3, [sp, #12] + add sp, sp, r3 /* deallocate buffer */ + ldmpc regs=r4-r10 /* restore registers */ + + .ltorg + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines diff --git a/firmware/target/arm/ipod/video/lcd-video.c b/firmware/target/arm/ipod/video/lcd-video.c index 494bec8429..27d889aafa 100644 --- a/firmware/target/arm/ipod/video/lcd-video.c +++ b/firmware/target/arm/ipod/video/lcd-video.c @@ -439,6 +439,53 @@ void lcd_update(void) lcd_update_rect(0, 0, LCD_WIDTH, LCD_HEIGHT); } +/* Line write helper function for lcd_yuv_blit. Writes two lines of yuv420. */ +extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + unsigned bcmaddr, + int width, + int stride); + +/* Performance function to blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + unsigned bcmaddr; + off_t z; + unsigned char const * yuv_src[3]; + +#ifdef HAVE_LCD_SLEEP + if (!lcd_state.display_on) + return; +#endif + + /* Sorry, but width and height must be >= 2 or else */ + width &= ~1; + + z = stride * src_y; + yuv_src[0] = src[0] + z + src_x; + yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + + /* Prevent the tick from triggering BCM updates while we're writing. */ + lcd_block_tick(); + + bcmaddr = BCMA_CMDPARAM + (LCD_WIDTH*2) * y + (x << 1); + height >>= 1; + + do + { + lcd_write_yuv420_lines(yuv_src, bcmaddr, width, stride); + bcmaddr += (LCD_WIDTH*4); /* Skip up two lines */ + yuv_src[0] += stride << 1; + yuv_src[1] += stride >> 1; /* Skip down one chroma line */ + yuv_src[2] += stride >> 1; + } + while (--height > 0); + + lcd_unblock_and_update(); +} + #ifdef HAVE_LCD_SLEEP /* Executes a BCM command immediately and waits for it to complete. Other BCM commands (eg. LCD updates or lcd_tick) must not interfere. diff --git a/firmware/target/arm/iriver/h10/lcd-as-h10.S b/firmware/target/arm/iriver/h10/lcd-as-h10.S new file mode 100644 index 0000000000..8ac8b4289f --- /dev/null +++ b/firmware/target/arm/iriver/h10/lcd-as-h10.S @@ -0,0 +1,538 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2007-2008 by Michael Sevakis + * + * H10 20GB LCD assembly routines + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "cpu.h" + +/**************************************************************************** + * void lcd_write_yuv420_lines(unsigned char const * const src[3], + * int width, + * int stride); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function +lcd_write_yuv420_lines: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + stmfd sp!, { r4-r11, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ + mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c + add r0, r0, #0x8a00 @ + mov r14, #LCD2_DATA_MASK @ + @ + sub r2, r2, #1 @ Adjust stride because of increment +10: @ loop line @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right + add r7, r12, r7, asl #5 @ by one less when adding - same for all + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24 + add r10, r10, r10, asl #4 @ + add r10, r10, r8, asl #3 @ + add r10, r10, r8, asl #4 @ + @ + add r11, r9, r9, asl #2 @ r9 = Cr*101 + add r11, r11, r9, asl #5 @ + add r9, r11, r9, asl #6 @ + @ + add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8 + mov r8, r8, asr #2 @ + add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9 + mov r9, r9, asr #9 @ + rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8 + mov r10, r10, asr #8 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11) + orr r3, r3, r7, lsl #5 @ r3 |= (g << 5) + @ + orr r7, r14, r3, lsr #8 @ store pixel + orr r11, r14, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11) + orr r3, r3, r7, lsl #5 @ r3 |= (g << 5) + @ + orr r7, r14, r3, lsr #8 @ store pixel + orr r11, r14, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + orr r3, r3, r7, lsl #5 @ r3 = b | (g << 5) + orr r3, r3, r11, lsl #11 @ r3 |= (r << 11) + @ + orr r7, r14, r3, lsr #8 @ store pixel + orr r11, r14, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11) + orr r3, r3, r7, lsl #5 @ r3 |= (g << 5) + @ + orr r7, r14, r3, lsr #8 @ store pixel + orr r11, r14, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ + str r11, [r0] @ + @ + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmpc regs=r4-r11 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines + + +/**************************************************************************** + * void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + * int width, + * int stride, + * int x_screen, + * int y_screen); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Red scaled at twice g & b but at same precision to place it in correct + * bit position after multiply and leave instruction count lower. + * |R| |258 0 408| |Y' - 16| + * |G| = |149 -49 -104| |Cb - 128| + * |B| |149 258 0| |Cr - 128| + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + * + * Kernel pattern (raw|use order): + * 5 3 4 2 row0 row2 > down + * 1 7 0 6 | 5 1 3 7 4 0 2 6 col0 left + * 4 2 5 3 | 4 0 2 6 5 1 3 7 col2 \/ + * 0 6 1 7 + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines_odither + .type lcd_write_yuv420_lines_odither, %function +lcd_write_yuv420_lines_odither: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + @ r3 = x_screen + @ [sp] = y_screen + stmfd sp!, { r4-r11, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ + ldr r0, [sp, #36] @ Line up pattern and kernel quadrant + eor r14, r3, r0 @ + and r14, r14, #0x2 @ + mov r14, r14, lsl #6 @ 0x00 or 0x80 + @ + mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c + add r0, r0, #0x8a00 @ + @ + sub r2, r2, #1 @ Adjust stride because of increment +10: @ loop line @ + @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + eor r14, r14, #0x80 @ flip pattern quadrant + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49 + add r10, r10, r8, asl #5 @ + add r10, r10, r9, asl #3 @ + add r10, r10, r9, asl #5 @ + add r10, r10, r9, asl #6 @ + @ + mov r8, r8, asl #1 @ r8 = bu = Cb*258 + add r8, r8, r8, asl #7 @ + @ + add r9, r9, r9, asl #1 @ r9 = rv = Cr*408 + add r9, r9, r9, asl #4 @ + mov r9, r9, asl #3 @ + @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x200 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + mov r11, #LCD2_DATA_MASK @ store pixel + orr r7, r11, r3, lsr #8 @ + orr r11, r11, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + @ This element is zero - use r14 @ + @ + add r3, r3, r14 @ b = r3 + delta + add r11, r11, r14, lsl #1 @ r = r11 + delta*2 + add r7, r7, r14, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + mov r11, #LCD2_DATA_MASK @ store pixel + orr r7, r11, r3, lsr #8 @ + orr r11, r11, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x100 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + mov r11, #LCD2_DATA_MASK @ store pixel + orr r7, r11, r3, lsr #8 @ + orr r11, r11, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x300 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + mov r11, #LCD2_DATA_MASK @ store pixel + orr r7, r11, r3, lsr #8 @ + orr r11, r11, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ + str r11, [r0] @ + @ + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmpc regs=r4-r11 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither diff --git a/firmware/target/arm/iriver/h10/lcd-h10_20gb.c b/firmware/target/arm/iriver/h10/lcd-h10_20gb.c index 403c1c19e0..c7e339295d 100644 --- a/firmware/target/arm/iriver/h10/lcd-h10_20gb.c +++ b/firmware/target/arm/iriver/h10/lcd-h10_20gb.c @@ -36,6 +36,8 @@ static unsigned short disp_control_rev; /* Contrast setting << 8 */ static int lcd_contrast; +static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0; + /* Forward declarations */ #if defined(HAVE_LCD_ENABLE) || defined(HAVE_LCD_SLEEP) static void lcd_display_off(void); @@ -396,6 +398,94 @@ bool lcd_active(void) /*** update functions ***/ +void lcd_yuv_set_options(unsigned options) +{ + lcd_yuv_options = options; +} + +/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */ +extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + int width, + int stride); +extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + int width, + int stride, + int x_screen, /* To align dither pattern */ + int y_screen); + +/* Performance function to blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + const unsigned char *yuv_src[3]; + const unsigned char *ysrc_max; + int y0; + int options; + + if (!display_on) + return; + + width &= ~1; + height &= ~1; + + /* calculate the drawing region */ + + /* The 20GB LCD is actually 128x160 but rotated 90 degrees so the origin + * is actually the bottom left and horizontal and vertical are swapped. + * Rockbox expects the origin to be the top left so we need to use + * 127 - y instead of just y */ + + /* max vert << 8 | start vert */ + lcd_write_reg(R_VERT_RAM_ADDR_POS, ((x + width - 1) << 8) | x); + + y0 = LCD_HEIGHT - 1 - y + y_offset; + + /* DIT=0, BGR=1, HWM=0, I/D1-0=10, AM=0, LG2-0=000 */ + lcd_write_reg(R_ENTRY_MODE, 0x1020); + + yuv_src[0] = src[0] + src_y * stride + src_x; + yuv_src[1] = src[1] + (src_y * stride >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + ysrc_max = yuv_src[0] + height * stride; + + options = lcd_yuv_options; + + do + { + /* max horiz << 8 | start horiz */ + lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (y0 << 8) | (y0 - 1)); + + /* position cursor (set AD0-AD15) */ + /* start vert << 8 | start horiz */ + lcd_write_reg(R_RAM_ADDR_SET, (x << 8) | y0); + + /* start drawing */ + lcd_send_cmd(R_WRITE_DATA_2_GRAM); + + if (options & LCD_YUV_DITHER) + { + lcd_write_yuv420_lines_odither(yuv_src, width, stride, + x, y); + y -= 2; + } + else + { + lcd_write_yuv420_lines(yuv_src, width, stride); + } + + y0 -= 2; + yuv_src[0] += stride << 1; + yuv_src[1] += stride >> 1; + yuv_src[2] += stride >> 1; + } + while (yuv_src[0] < ysrc_max); + + /* DIT=0, BGR=1, HWM=0, I/D1-0=10, AM=1, LG2-0=000 */ + lcd_write_reg(R_ENTRY_MODE, 0x1028); +} + + /* Update a fraction of the display. */ void lcd_update_rect(int x0, int y0, int width, int height) { diff --git a/firmware/target/arm/iriver/h10/lcd-h10_5gb.c b/firmware/target/arm/iriver/h10/lcd-h10_5gb.c index 4386e1670c..5e1ad9ce23 100644 --- a/firmware/target/arm/iriver/h10/lcd-h10_5gb.c +++ b/firmware/target/arm/iriver/h10/lcd-h10_5gb.c @@ -118,6 +118,168 @@ void lcd_init_device(void) /*** update functions ***/ +#define CSUB_X 2 +#define CSUB_Y 2 + +#define RYFAC (31*257) +#define GYFAC (31*257) +#define BYFAC (31*257) +#define RVFAC 11170 /* 31 * 257 * 1.402 */ +#define GVFAC (-5690) /* 31 * 257 * -0.714136 */ +#define GUFAC (-2742) /* 31 * 257 * -0.344136 */ +#define BUFAC 14118 /* 31 * 257 * 1.772 */ + +#define ROUNDOFFS (127*257) +#define ROUNDOFFSG (63*257) + +/* Performance function to blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + int y0, x0, y1, x1; + int ymax; + + width = (width + 1) & ~1; + + /* calculate the drawing region */ + x0 = x; + x1 = x + width - 1; + y0 = y; + y1 = y + height - 1; + + /* max horiz << 8 | start horiz */ + lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (x1 << 8) | x0); + + /* max vert << 8 | start vert */ + lcd_write_reg(R_VERT_RAM_ADDR_POS, (y1 << 8) | y0); + + /* start vert << 8 | start horiz */ + lcd_write_reg(R_RAM_ADDR_SET, (y0 << 8) | x0); + + /* start drawing */ + lcd_send_cmd(R_WRITE_DATA_2_GRAM); + + ymax = y + height - 1 ; + + const int stride_div_csub_x = stride/CSUB_X; + + for (; y <= ymax ; y++) + { + /* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */ + const unsigned char *ysrc = src[0] + stride * src_y + src_x; + + const int uvoffset = stride_div_csub_x * (src_y/CSUB_Y) + + (src_x/CSUB_X); + + const unsigned char *usrc = src[1] + uvoffset; + const unsigned char *vsrc = src[2] + uvoffset; + const unsigned char *row_end = ysrc + width; + + int y, u, v; + int red1, green1, blue1; + int red2, green2, blue2; + unsigned rbits, gbits, bbits; + + int rc, gc, bc; + + do + { + u = *usrc++ - 128; + v = *vsrc++ - 128; + rc = RVFAC * v + ROUNDOFFS; + gc = GVFAC * v + GUFAC * u + ROUNDOFFSG; + bc = BUFAC * u + ROUNDOFFS; + + /* Pixel 1 */ + y = *ysrc++; + + red1 = RYFAC * y + rc; + green1 = GYFAC * y + gc; + blue1 = BYFAC * y + bc; + + /* Pixel 2 */ + y = *ysrc++; + red2 = RYFAC * y + rc; + green2 = GYFAC * y + gc; + blue2 = BYFAC * y + bc; + + /* Since out of bounds errors are relatively rare, we check two + pixels at once to see if any components are out of bounds, and + then fix whichever is broken. This works due to high values and + negative values both becoming larger than the cutoff when + casted to unsigned. And ORing them together checks all of them + simultaneously. */ + if (((unsigned)(red1 | green1 | blue1 | + red2 | green2 | blue2)) > (RYFAC*255+ROUNDOFFS)) { + if (((unsigned)(red1 | green1 | blue1)) > + (RYFAC*255+ROUNDOFFS)) { + if ((unsigned)red1 > (RYFAC*255+ROUNDOFFS)) + { + if (red1 < 0) + red1 = 0; + else + red1 = (RYFAC*255+ROUNDOFFS); + } + if ((unsigned)green1 > (GYFAC*255+ROUNDOFFSG)) + { + if (green1 < 0) + green1 = 0; + else + green1 = (GYFAC*255+ROUNDOFFSG); + } + if ((unsigned)blue1 > (BYFAC*255+ROUNDOFFS)) + { + if (blue1 < 0) + blue1 = 0; + else + blue1 = (BYFAC*255+ROUNDOFFS); + } + } + + if (((unsigned)(red2 | green2 | blue2)) > + (RYFAC*255+ROUNDOFFS)) { + if ((unsigned)red2 > (RYFAC*255+ROUNDOFFS)) + { + if (red2 < 0) + red2 = 0; + else + red2 = (RYFAC*255+ROUNDOFFS); + } + if ((unsigned)green2 > (GYFAC*255+ROUNDOFFSG)) + { + if (green2 < 0) + green2 = 0; + else + green2 = (GYFAC*255+ROUNDOFFSG); + } + if ((unsigned)blue2 > (BYFAC*255+ROUNDOFFS)) + { + if (blue2 < 0) + blue2 = 0; + else + blue2 = (BYFAC*255+ROUNDOFFS); + } + } + } + + rbits = red1 >> 16 ; + gbits = green1 >> 15 ; + bbits = blue1 >> 16 ; + lcd_send_data((rbits << 11) | (gbits << 5) | bbits); + + rbits = red2 >> 16 ; + gbits = green2 >> 15 ; + bbits = blue2 >> 16 ; + lcd_send_data((rbits << 11) | (gbits << 5) | bbits); + } + while (ysrc < row_end); + + src_y++; + } +} + + /* Update a fraction of the display. */ void lcd_update_rect(int x0, int y0, int width, int height) { diff --git a/firmware/target/arm/lcd-c200_c200v2.c b/firmware/target/arm/lcd-c200_c200v2.c index 665c82f292..38877ccac9 100644 --- a/firmware/target/arm/lcd-c200_c200v2.c +++ b/firmware/target/arm/lcd-c200_c200v2.c @@ -30,6 +30,9 @@ #endif /* Display status */ +#if MEMORYSIZE > 2 +static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0; +#endif static bool is_lcd_enabled = true; /* LCD command set for Samsung S6B33B2 */ @@ -298,6 +301,80 @@ void lcd_set_flip(bool yesno) /*** update functions ***/ +#if MEMORYSIZE > 2 /* not for C200V2 */ +void lcd_yuv_set_options(unsigned options) +{ + lcd_yuv_options = options; +} + +/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */ +extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + int width, + int stride); +extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + int width, + int stride, + int x_screen, /* To align dither pattern */ + int y_screen); +/* Performance function to blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + unsigned char const * yuv_src[3]; + off_t z; + + /* Sorry, but width and height must be >= 2 or else */ + width &= ~1; + height >>= 1; + + y += 0x1a; + + z = stride*src_y; + yuv_src[0] = src[0] + z + src_x; + yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + + lcd_send_command(R_ENTRY_MODE, 0x80); + + lcd_send_command(R_X_ADDR_AREA, x); + lcd_send_command(x + width - 1, 0); + + if (lcd_yuv_options & LCD_YUV_DITHER) + { + do + { + lcd_send_command(R_Y_ADDR_AREA, y); + lcd_send_command(y + 1, 0); + + lcd_write_yuv420_lines_odither(yuv_src, width, stride, x, y); + + yuv_src[0] += stride << 1; /* Skip down two luma lines */ + yuv_src[1] += stride >> 1; /* Skip down one chroma line */ + yuv_src[2] += stride >> 1; + y += 2; + } + while (--height > 0); + } + else + { + do + { + lcd_send_command(R_Y_ADDR_AREA, y); + lcd_send_command(y + 1, 0); + + lcd_write_yuv420_lines(yuv_src, width, stride); + + yuv_src[0] += stride << 1; /* Skip down two luma lines */ + yuv_src[1] += stride >> 1; /* Skip down one chroma line */ + yuv_src[2] += stride >> 1; + y += 2; + } + while (--height > 0); + } +} +#endif /* MEMORYSIZE > 2 */ + /* Update the display. This must be called after all other LCD functions that change the display. */ void lcd_update(void) diff --git a/firmware/target/arm/pbell/vibe500/lcd-as-vibe500.S b/firmware/target/arm/pbell/vibe500/lcd-as-vibe500.S new file mode 100644 index 0000000000..e03011c168 --- /dev/null +++ b/firmware/target/arm/pbell/vibe500/lcd-as-vibe500.S @@ -0,0 +1,556 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id:$ + * + * Copyright (C) 2007-2008 by Michael Sevakis + * Adapted for the Packard Bell Vibe 500 by Szymon Dziok + * + * Packard Bell Vibe 500 LCD assembly routines + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "cpu.h" + +/**************************************************************************** + * void lcd_write_yuv420_lines(unsigned char const * const src[3], + * int width, + * int stride); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function +lcd_write_yuv420_lines: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + stmfd sp!, { r4-r11, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ + ldr r0, =LCD1_BASE @ + @ + sub r2, r2, #1 @ Adjust stride because of increment +10: @ loop line @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right + add r7, r12, r7, asl #5 @ by one less when adding - same for all + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24 + add r10, r10, r10, asl #4 @ + add r10, r10, r8, asl #3 @ + add r10, r10, r8, asl #4 @ + @ + add r11, r9, r9, asl #2 @ r9 = Cr*101 + add r11, r11, r9, asl #5 @ + add r9, r11, r9, asl #6 @ + @ + add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8 + mov r8, r8, asr #2 @ + add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9 + mov r9, r9, asr #9 @ + rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8 + mov r10, r10, asr #8 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11) + orr r3, r3, r7, lsl #5 @ r3 |= (g << 5) + @ + movs r7, r3, lsr #8 @ store pixel +20: @ + ldr r11, [r0] @ + tst r11, #LCD1_BUSY_MASK @ + bne 20b @ + str r7, [r0, #0x10] @ +25: @ + ldr r11, [r0] @ + tst r11, #LCD1_BUSY_MASK @ + bne 25b @ + str r3, [r0, #0x10] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11) + orr r3, r3, r7, lsl #5 @ r3 |= (g << 5) + @ + movs r7, r3, lsr #8 @ store pixel +20: @ + ldr r11, [r0] @ + tst r11, #LCD1_BUSY_MASK @ + bne 20b @ + str r7, [r0, #0x10] @ +25: @ + ldr r11, [r0] @ + tst r11, #LCD1_BUSY_MASK @ + bne 25b @ + str r3, [r0, #0x10] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + orr r3, r3, r7, lsl #5 @ r3 = b | (g << 5) + orr r3, r3, r11, lsl #11 @ r3 |= (r << 11) + @ + movs r7, r3, lsr #8 @ store pixel +20: @ + ldr r11, [r0] @ + tst r11, #LCD1_BUSY_MASK @ + bne 20b @ + str r7, [r0, #0x10] @ +25: @ + ldr r11, [r0] @ + tst r11, #LCD1_BUSY_MASK @ + bne 25b @ + str r3, [r0, #0x10] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11) + orr r3, r3, r7, lsl #5 @ r3 |= (g << 5) + @ + movs r7, r3, lsr #8 @ store pixel +20: @ + ldr r11, [r0] @ + tst r11, #LCD1_BUSY_MASK @ + bne 20b @ + str r7, [r0, #0x10] @ +25: @ + ldr r11, [r0] @ + tst r11, #LCD1_BUSY_MASK @ + bne 25b @ + str r3, [r0, #0x10] @ + @ + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmpc regs=r4-r11 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines + + +/**************************************************************************** + * void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + * int width, + * int stride, + * int x_screen, + * int y_screen); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Red scaled at twice g & b but at same precision to place it in correct + * bit position after multiply and leave instruction count lower. + * |R| |258 0 408| |Y' - 16| + * |G| = |149 -49 -104| |Cb - 128| + * |B| |149 258 0| |Cr - 128| + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + * + * Kernel pattern (raw|use order): + * 5 3 4 2 row0 row2 > down + * 1 7 0 6 | 5 1 3 7 4 0 2 6 col0 left + * 4 2 5 3 | 4 0 2 6 5 1 3 7 col2 \/ + * 0 6 1 7 + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines_odither + .type lcd_write_yuv420_lines_odither, %function +lcd_write_yuv420_lines_odither: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + @ r3 = x_screen + @ [sp] = y_screen + stmfd sp!, { r4-r11, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ + ldr r0, [sp, #36] @ Line up pattern and kernel quadrant + eor r14, r3, r0 @ + and r14, r14, #0x2 @ + mov r14, r14, lsl #6 @ 0x00 or 0x80 + @ + ldr r0, =LCD1_BASE @ + @ + sub r2, r2, #1 @ Adjust stride because of increment +10: @ loop line @ + @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + eor r14, r14, #0x80 @ flip pattern quadrant + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49 + add r10, r10, r8, asl #5 @ + add r10, r10, r9, asl #3 @ + add r10, r10, r9, asl #5 @ + add r10, r10, r9, asl #6 @ + @ + mov r8, r8, asl #1 @ r8 = bu = Cb*258 + add r8, r8, r8, asl #7 @ + @ + add r9, r9, r9, asl #1 @ r9 = rv = Cr*408 + add r9, r9, r9, asl #4 @ + mov r9, r9, asl #3 @ + @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x200 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + movs r7, r3, lsr #8 @ store pixel +20: @ + ldr r11, [r0] @ + tst r11, #LCD1_BUSY_MASK @ + bne 20b @ + str r7, [r0, #0x10] @ +25: @ + ldr r11, [r0] @ + tst r11, #LCD1_BUSY_MASK @ + bne 25b @ + str r3, [r0, #0x10] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + @ This element is zero - use r14 @ + @ + add r3, r3, r14 @ b = r3 + delta + add r11, r11, r14, lsl #1 @ r = r11 + delta*2 + add r7, r7, r14, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + movs r7, r3, lsr #8 @ store pixel +20: @ + ldr r11, [r0] @ + tst r11, #LCD1_BUSY_MASK @ + bne 20b @ + str r7, [r0, #0x10] @ +25: @ + ldr r11, [r0] @ + tst r11, #LCD1_BUSY_MASK @ + bne 25b @ + str r3, [r0, #0x10] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x100 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + movs r7, r3, lsr #8 @ store pixel +20: @ + ldr r11, [r0] @ + tst r11, #LCD1_BUSY_MASK @ + bne 20b @ + str r7, [r0, #0x10] @ +25: @ + ldr r11, [r0] @ + tst r11, #LCD1_BUSY_MASK @ + bne 25b @ + str r3, [r0, #0x10] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x300 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + movs r7, r3, lsr #8 @ store pixel +20: @ + ldr r11, [r0] @ + tst r11, #LCD1_BUSY_MASK @ + bne 20b @ + str r7, [r0, #0x10] @ +25: @ + ldr r11, [r0] @ + tst r11, #LCD1_BUSY_MASK @ + bne 25b @ + str r3, [r0, #0x10] @ + @ + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmpc regs=r4-r11 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither diff --git a/firmware/target/arm/pbell/vibe500/lcd-vibe500.c b/firmware/target/arm/pbell/vibe500/lcd-vibe500.c index 2daa5def74..047ef2bf53 100644 --- a/firmware/target/arm/pbell/vibe500/lcd-vibe500.c +++ b/firmware/target/arm/pbell/vibe500/lcd-vibe500.c @@ -35,6 +35,8 @@ static unsigned short disp_control_rev; /* Contrast setting << 8 */ static int lcd_contrast; +static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0; + /* Forward declarations */ #if defined(HAVE_LCD_ENABLE) || defined(HAVE_LCD_SLEEP) static void lcd_display_off(void); @@ -375,6 +377,79 @@ bool lcd_active(void) /*** update functions ***/ +void lcd_yuv_set_options(unsigned options) +{ + lcd_yuv_options = options; +} + +/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */ + +extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + int width, + int stride); +extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + int width, + int stride, + int x_screen, /* To align dither pattern */ + int y_screen); + +/* Performance function to blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + const unsigned char *yuv_src[3]; + const unsigned char *ysrc_max; + int y0; + int options; + + if (!display_on) + return; + + width &= ~1; + height &= ~1; + + lcd_write_reg(R_VERT_RAM_ADDR_POS, ((LCD_WIDTH - 1 - x) << 8) | + ((LCD_WIDTH-1) - (x + width - 1))); + + y0 = LCD_HEIGHT - 1 - y; + + lcd_write_reg(R_ENTRY_MODE,0x1000); + + yuv_src[0] = src[0] + src_y * stride + src_x; + yuv_src[1] = src[1] + (src_y * stride >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + ysrc_max = yuv_src[0] + height * stride; + + options = lcd_yuv_options; + + do + { + lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (y0 << 8) | (y0 - 1)); + lcd_write_reg(R_RAM_ADDR_SET, ((LCD_WIDTH - 1 - x) << 8) | y0); + + /* start drawing */ + lcd_send_cmd(R_WRITE_DATA_2_GRAM); + + if (options & LCD_YUV_DITHER) + { + lcd_write_yuv420_lines_odither(yuv_src, width, stride,x, y); + y -= 2; + } + else + { + lcd_write_yuv420_lines(yuv_src, width, stride); + } + + y0 -= 2; + yuv_src[0] += stride << 1; + yuv_src[1] += stride >> 1; + yuv_src[2] += stride >> 1; + } + while (yuv_src[0] < ysrc_max); + lcd_write_reg(R_ENTRY_MODE,0x1008); +} + /* Update a fraction of the display. */ void lcd_update_rect(int x0, int y0, int width, int height) { diff --git a/firmware/target/arm/philips/hdd1630/lcd-as-hdd1630.S b/firmware/target/arm/philips/hdd1630/lcd-as-hdd1630.S new file mode 100644 index 0000000000..3bb3530917 --- /dev/null +++ b/firmware/target/arm/philips/hdd1630/lcd-as-hdd1630.S @@ -0,0 +1,570 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2007-2008 by Michael Sevakis + * + * H10 20GB LCD assembly routines + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "cpu.h" + +/**************************************************************************** + * void lcd_write_yuv420_lines(unsigned char const * const src[3], + * int width, + * int stride); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function +lcd_write_yuv420_lines: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + stmfd sp!, { r4-r11, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ + mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c + add r0, r0, #0x8a00 @ + mov r14, #LCD2_DATA_MASK @ + @ + sub r2, r2, #1 @ Adjust stride because of increment +10: @ loop line @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right + add r7, r12, r7, asl #5 @ by one less when adding - same for all + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24 + add r10, r10, r10, asl #4 @ + add r10, r10, r8, asl #3 @ + add r10, r10, r8, asl #4 @ + @ + add r11, r9, r9, asl #2 @ r9 = Cr*101 + add r11, r11, r9, asl #5 @ + add r9, r11, r9, asl #6 @ + @ + add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8 + mov r8, r8, asr #2 @ + add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9 + mov r9, r9, asr #9 @ + rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8 + mov r10, r10, asr #8 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11) + orr r3, r3, r7, lsl #5 @ r3 |= (g << 5) + @ + orr r7, r14, r3, lsr #8 @ store pixel + orr r11, r14, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11) + orr r3, r3, r7, lsl #5 @ r3 |= (g << 5) + @ + orr r7, r14, r3, lsr #8 @ store pixel + orr r11, r14, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + orr r3, r3, r7, lsl #5 @ r3 = b | (g << 5) + orr r3, r3, r11, lsl #11 @ r3 |= (r << 11) + @ + orr r7, r14, r3, lsr #8 @ store pixel + orr r11, r14, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11) + orr r3, r3, r7, lsl #5 @ r3 |= (g << 5) + @ + orr r7, r14, r3, lsr #8 @ store pixel + orr r11, r14, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r11, [r0] @ + @ + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmpc regs=r4-r11 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines + + +/**************************************************************************** + * void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + * int width, + * int stride, + * int x_screen, + * int y_screen); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Red scaled at twice g & b but at same precision to place it in correct + * bit position after multiply and leave instruction count lower. + * |R| |258 0 408| |Y' - 16| + * |G| = |149 -49 -104| |Cb - 128| + * |B| |149 258 0| |Cr - 128| + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + * + * Kernel pattern (raw|use order): + * 5 3 4 2 row0 row2 > down + * 1 7 0 6 | 5 1 3 7 4 0 2 6 col0 left + * 4 2 5 3 | 4 0 2 6 5 1 3 7 col2 \/ + * 0 6 1 7 + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines_odither + .type lcd_write_yuv420_lines_odither, %function +lcd_write_yuv420_lines_odither: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + @ r3 = x_screen + @ [sp] = y_screen + stmfd sp!, { r4-r11, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ + ldr r0, [sp, #36] @ Line up pattern and kernel quadrant + eor r14, r3, r0 @ + and r14, r14, #0x2 @ + mov r14, r14, lsl #6 @ 0x00 or 0x80 + @ + mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c + add r0, r0, #0x8a00 @ + @ + sub r2, r2, #1 @ Adjust stride because of increment +10: @ loop line @ + @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + eor r14, r14, #0x80 @ flip pattern quadrant + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49 + add r10, r10, r8, asl #5 @ + add r10, r10, r9, asl #3 @ + add r10, r10, r9, asl #5 @ + add r10, r10, r9, asl #6 @ + @ + mov r8, r8, asl #1 @ r8 = bu = Cb*258 + add r8, r8, r8, asl #7 @ + @ + add r9, r9, r9, asl #1 @ r9 = rv = Cr*408 + add r9, r9, r9, asl #4 @ + mov r9, r9, asl #3 @ + @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x200 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + mov r11, #LCD2_DATA_MASK @ store pixel + orr r7, r11, r3, lsr #8 @ + orr r11, r11, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + @ This element is zero - use r14 @ + @ + add r3, r3, r14 @ b = r3 + delta + add r11, r11, r14, lsl #1 @ r = r11 + delta*2 + add r7, r7, r14, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + mov r11, #LCD2_DATA_MASK @ store pixel + orr r7, r11, r3, lsr #8 @ + orr r11, r11, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x100 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + mov r11, #LCD2_DATA_MASK @ store pixel + orr r7, r11, r3, lsr #8 @ + orr r11, r11, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x300 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + mov r11, #LCD2_DATA_MASK @ store pixel + orr r7, r11, r3, lsr #8 @ + orr r11, r11, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r11, [r0] @ + @ + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmpc regs=r4-r11 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither diff --git a/firmware/target/arm/philips/hdd1630/lcd-hdd1630.c b/firmware/target/arm/philips/hdd1630/lcd-hdd1630.c index d9570600bc..c26c0bc963 100644 --- a/firmware/target/arm/philips/hdd1630/lcd-hdd1630.c +++ b/firmware/target/arm/philips/hdd1630/lcd-hdd1630.c @@ -81,6 +81,7 @@ static bool lcd_enabled; /* Display status */ +static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0; static unsigned mad_ctrl = 0; /* wait for LCD */ @@ -312,6 +313,86 @@ void lcd_set_flip(bool yesno) lcd_send_data(mad_ctrl); } +void lcd_yuv_set_options(unsigned options) +{ + lcd_yuv_options = options; +} + +/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */ +extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + int width, int stride); + +extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + int width, int stride, + int x_screen, int y_screen); + +/* Performance function to blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + unsigned char const * yuv_src[3]; + off_t z; + + /* Sorry, but width and height must be >= 2 or else */ + width &= ~1; + height >>= 1; + + z = stride*src_y; + yuv_src[0] = src[0] + z + src_x; + yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + + /* Set vertical address mode */ + lcd_send_cmd(MADCTR); + lcd_send_data(mad_ctrl | (1<<5)); + + lcd_send_cmd(RASET); + lcd_send_data(x); + lcd_send_data(x + width - 1); + + if (lcd_yuv_options & LCD_YUV_DITHER) + { + do + { + lcd_send_cmd(CASET); + lcd_send_data(y); + lcd_send_data(y + 1); + + lcd_send_cmd(RAMWR); + + lcd_write_yuv420_lines_odither(yuv_src, width, stride, x, y); + yuv_src[0] += stride << 1; /* Skip down two luma lines */ + yuv_src[1] += stride >> 1; /* Skip down one chroma line */ + yuv_src[2] += stride >> 1; + y += 2; + } + while (--height > 0); + } + else + { + do + { + lcd_send_cmd(CASET); + lcd_send_data(y); + lcd_send_data(y + 1); + + lcd_send_cmd(RAMWR); + + lcd_write_yuv420_lines(yuv_src, width, stride); + yuv_src[0] += stride << 1; /* Skip down two luma lines */ + yuv_src[1] += stride >> 1; /* Skip down one chroma line */ + yuv_src[2] += stride >> 1; + y += 2; + } + while (--height > 0); + } + + /* Restore the address mode */ + lcd_send_cmd(MADCTR); + lcd_send_data(mad_ctrl); +} + /* Update the display. This must be called after all other LCD functions that change the display. */ void lcd_update(void) diff --git a/firmware/target/arm/philips/hdd6330/lcd-as-hdd6330.S b/firmware/target/arm/philips/hdd6330/lcd-as-hdd6330.S new file mode 100644 index 0000000000..c3a7992a2e --- /dev/null +++ b/firmware/target/arm/philips/hdd6330/lcd-as-hdd6330.S @@ -0,0 +1,140 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id:$ + * + * Copyright (C) 2010 by Szymon Dziok + * + * Philips Gogear HDD6330 LCD assembly routine + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "cpu.h" + +/**************************************************************************** + void lcd_yuv_write_inner_loop(unsigned char const * const ysrc, + unsigned char const * const usrc, + unsigned char const * const vsrc, + int width); +*/ + .section .icode, "ax", %progbits + .align 2 + .global lcd_yuv_write_inner_loop + .type lcd_yuv_write_inner_loop, %function +lcd_yuv_write_inner_loop: + @ r0 = ysrc + @ r1 = usrc + @ r2 = vsrc + @ r3 = width + stmfd sp!, { r4-r11, lr } @ save regs + mov r4, #0x70000000 @ r4 = LCD2_BLOCK_CTRL - 0x20 + add r4, r4, #0x8a00 @ + add r5, r4, #0x100 @ r5 = LCD2_BLOCK_DATA +10: @ loop + + ldrb r7, [r1], #1 @ *usrc++ + ldrb r8, [r2], #1 @ *vsrc++ + + sub r7, r7, #128 @ Cb -= 128 + sub r8, r8, #128 @ Cr -= 128 + + add r10, r8, r8, asl #2 @ Cr*101 + add r10, r10, r8, asl #5 + add r10, r10, r8, asl #6 + + add r11, r8, r8, asl #1 @ Cr*51 + Cb*24 + add r11, r11, r11, asl #4 + add r11, r11, r7, asl #3 + add r11, r11, r7, asl #4 + + add r12, r7, #2 @ r12 = bu = (Cb*128 + 256) >> 9 + mov r12, r12, asr #2 + add r10, r10, #256 @ r10 = rv = (Cr*101 + 256) >> 9 + mov r10, r10, asr #9 + rsb r11, r11, #128 @ r11 = guv = (-r11 + 128) >> 8 + mov r11, r11, asr #8 + +@ pixel_1 + ldrb r7, [r0], #1 @ *ysrc++ + sub r7, r7, #16 @ Y = (Y' - 16) * 37 + add r8, r7, r7, asl #2 + add r7, r8, r7, asl #5 + + add r9, r10, r7, asr #8 @ R = (Y >> 8) + rv + add r8, r11, r7, asr #7 @ G = (Y >> 7) + guv + add r7, r12, r7, asr #8 @ B = (Y >> 8) + bu + + cmp r9, #31 @ clamp R + mvnhi r9, r9, asr #31 + andhi r9, r9, #31 + + cmp r8, #63 @ clamp G + mvnhi r8, r8, asr #31 + andhi r8, r8, #63 + + cmp r7, #31 @ clamp B + mvnhi r7, r7, asr #31 + andhi r7, r7, #31 + + orr r6, r7, r8, lsl #5 @ pack pixel + orr r6, r6, r9, lsl #11 + + mov r7, r6, lsl #8 @ swap bytes + and r7, r7, #0xff00 + add r6, r7, r6, lsr #8 + +@ pixel_2 + ldrb r7, [r0], #1 @ *ysrc++ + sub r7, r7, #16 @ Y = (Y' - 16) * 37 + add r8, r7, r7, asl #2 + add r7, r8, r7, asl #5 + + add r9, r10, r7, asr #8 @ R = (Y >> 8) + rv + add r8, r11, r7, asr #7 @ G = (Y >> 7) + guv + add r7, r12, r7, asr #8 @ B = (Y >> 8) + bu + + cmp r9, #31 @ clamp R + mvnhi r9, r9, asr #31 + andhi r9, r9, #31 + + cmp r8, #63 @ clamp G + mvnhi r8, r8, asr #31 + andhi r8, r8, #63 + + cmp r7, #31 @ clamp B + mvnhi r7, r7, asr #31 + andhi r7, r7, #31 + + orr r7, r7, r8, lsl #5 @ pack pixel + orr r7, r7, r9, lsl #11 + + orr r6, r6, r7, lsl #24 @ swap bytes and add pixels simultaneously + mov r7, r7, lsr #8 + orr r6, r6, r7, lsl #16 + +11: @ while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK)); + ldr r11, [r4, #0x20] @ + tst r11, #0x1000000 @ + beq 11b @ + + str r6, [r5] @ send two pixels + + subs r3, r3, #2 @ decrease width + bgt 10b @ loop + + ldmpc regs=r4-r11 @ restore regs + .ltorg @ dump constant pool + .size lcd_yuv_write_inner_loop, .-lcd_yuv_write_inner_loop diff --git a/firmware/target/arm/philips/hdd6330/lcd-hdd6330.c b/firmware/target/arm/philips/hdd6330/lcd-hdd6330.c index 9d2fdc8519..cdd3064bba 100644 --- a/firmware/target/arm/philips/hdd6330/lcd-hdd6330.c +++ b/firmware/target/arm/philips/hdd6330/lcd-hdd6330.c @@ -37,6 +37,9 @@ /* whether the lcd is currently enabled or not */ static bool lcd_enabled; +/* Display status */ +static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0; + /* Value used for flipping. Must be remembered when display is turned off. */ static unsigned short flip; @@ -144,6 +147,101 @@ void lcd_set_flip(bool yesno) lcd_send_data(0x08 | flip); } +void lcd_yuv_set_options(unsigned options) +{ + lcd_yuv_options = options; +} + +#define CSUB_X 2 +#define CSUB_Y 2 + +/* YUV- > RGB565 conversion + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 +*/ + +extern void lcd_yuv_write_inner_loop(unsigned char const * const ysrc, + unsigned char const * const usrc, + unsigned char const * const vsrc, + int width); + +/* Performance function to blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + int h; + + width = (width + 1) & ~1; + + lcd_send_reg(LCD_REG_HORIZ_ADDR_START); + lcd_send_data(y); + + lcd_send_reg(LCD_REG_HORIZ_ADDR_END); + lcd_send_data(y + height - 1); + + lcd_send_reg(LCD_REG_VERT_ADDR_START); + lcd_send_data(x + x_offset); + + lcd_send_reg(LCD_REG_VERT_ADDR_END); + lcd_send_data(x + width - 1 + x_offset); + + lcd_send_reg(LCD_REG_WRITE_DATA_2_GRAM); + + const int stride_div_csub_x = stride/CSUB_X; + + h=0; + while (1) + { + /* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */ + const unsigned char *ysrc = src[0] + stride * src_y + src_x; + + const int uvoffset = stride_div_csub_x * (src_y/CSUB_Y) + + (src_x/CSUB_X); + + const unsigned char *usrc = src[1] + uvoffset; + const unsigned char *vsrc = src[2] + uvoffset; + + int pixels_to_write; + + if (h==0) + { + while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY)); + LCD2_BLOCK_CONFIG = 0; + + if (height == 0) break; + + pixels_to_write = (width * height) * 2; + h = height; + + /* calculate how much we can do in one go */ + if (pixels_to_write > 0x10000) + { + h = (0x10000/2) / width; + pixels_to_write = (width * h) * 2; + } + + height -= h; + LCD2_BLOCK_CTRL = 0x10000080; + LCD2_BLOCK_CONFIG = 0xc0010000 | (pixels_to_write - 1); + LCD2_BLOCK_CTRL = 0x34000000; + } + + lcd_yuv_write_inner_loop(ysrc,usrc,vsrc,width); + + src_y++; + h--; + } + + while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY)); + LCD2_BLOCK_CONFIG = 0; +} + /* Update the display. This must be called after all other LCD functions that change the display. */ void lcd_update(void) diff --git a/firmware/target/arm/philips/sa9200/lcd-as-sa9200.S b/firmware/target/arm/philips/sa9200/lcd-as-sa9200.S new file mode 100644 index 0000000000..d99222b9df --- /dev/null +++ b/firmware/target/arm/philips/sa9200/lcd-as-sa9200.S @@ -0,0 +1,590 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2007-2011 by Michael Sevakis + * + * Philips GoGear SA9200 LCD assembly routines + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + + /* This code should work in general for a Renesas type LCD interface + * connected to the "mono" bridge. TODO: Share it where possible. + * + * Dither is already prepared to be built for upright and rotated + * orientations. */ + +#include "config.h" +#include "cpu.h" + +/**************************************************************************** + * void lcd_write_yuv420_lines(unsigned char const * const src[3], + * int width, + * int stride); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function +lcd_write_yuv420_lines: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + stmfd sp!, { r4-r10, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ + mov r0, #0x70000000 @ r0 = LCD1_BASE_ADDR = 0x70003000 + orr r0, r0, #0x3000 @ + @ + sub r2, r2, #1 @ Adjust stride because of increment +10: @ loop line @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right + add r7, r12, r7, asl #5 @ by one less when adding - same for all + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24 + add r10, r10, r10, asl #4 @ + add r10, r10, r8, asl #3 @ + add r10, r10, r8, asl #4 @ + @ + add r14, r9, r9, asl #2 @ r9 = Cr*101 + add r14, r14, r9, asl #5 @ + add r9, r14, r9, asl #6 @ + @ + add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8 + mov r8, r8, asr #2 @ + add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9 + mov r9, r9, asr #9 @ + rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8 + mov r10, r10, asr #8 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r14, r9, r7, asr #8 @ r14 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r14 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r14, #31 @ clamp r + mvnhi r14, r14, asr #31 @ + andhi r14, r14, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + orr r7, r3, r7, lsl #5 @ r7 = |00000000|00000000|00000ggg|gggbbbbb| + orr r7, r7, r14, lsl #11 @ r7 = |00000000|00000000|rrrrrggg|gggbbbbb| + mov r14, r7, lsr #8 @ r14 = |00000000|00000000|00000000|rrrrrggg| + @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD1_BUSY_MASK @ + bne 20b @ + strb r14, [r0, #0x10] @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD1_BUSY_MASK @ + bne 20b @ + strb r7, [r0, #0x10] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r14, r9, r7, asr #8 @ r14 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r14 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r14, #31 @ clamp r + mvnhi r14, r14, asr #31 @ + andhi r14, r14, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + orr r7, r3, r7, lsl #5 @ r7 = |00000000|00000000|00000ggg|gggbbbbb| + orr r7, r7, r14, lsl #11 @ r7 = |00000000|00000000|rrrrrggg|gggbbbbb| + mov r14, r7, lsr #8 @ r14 = |00000000|00000000|00000000|rrrrrggg| +20: @ + ldr r3, [r0] @ + tst r3, #LCD1_BUSY_MASK @ + bne 20b @ + strb r14, [r0, #0x10] @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD1_BUSY_MASK @ + bne 20b @ + strb r7, [r0, #0x10] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r14, r9, r7, asr #8 @ r14 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r14 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r14, #31 @ clamp r + mvnhi r14, r14, asr #31 @ + andhi r14, r14, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + orr r7, r3, r7, lsl #5 @ r7 = |00000000|00000000|00000ggg|gggbbbbb| + orr r7, r7, r14, lsl #11 @ r7 = |00000000|00000000|rrrrrggg|gggbbbbb| + mov r14, r7, lsr #8 @ r14 = |00000000|00000000|00000000|rrrrrggg| +20: @ + ldr r3, [r0] @ + tst r3, #LCD1_BUSY_MASK @ + bne 20b @ + strb r14, [r0, #0x10] @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD1_BUSY_MASK @ + bne 20b @ + strb r7, [r0, #0x10] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r14, r9, r7, asr #8 @ r14 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r14 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r14, #31 @ clamp r + mvnhi r14, r14, asr #31 @ + andhi r14, r14, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + orr r7, r3, r7, lsl #5 @ r7 = |00000000|00000000|00000ggg|gggbbbbb| + orr r7, r7, r14, lsl #11 @ r7 = |00000000|00000000|rrrrrggg|gggbbbbb| + mov r14, r7, lsr #8 @ r14 = |00000000|00000000|00000000|rrrrrggg| +20: @ + ldr r3, [r0] @ + tst r3, #LCD1_BUSY_MASK @ + bne 20b @ + strb r14, [r0, #0x10] @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD1_BUSY_MASK @ + bne 20b @ + strb r7, [r0, #0x10] @ + @ + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmpc regs=r4-r10 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines + + +/**************************************************************************** + * void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + * int width, + * int stride, + * int x_screen, + * int y_screen); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Red scaled at twice g & b but at same precision to place it in correct + * bit position after multiply and leave instruction count lower. + * |R| |258 0 408| |Y' - 16| + * |G| = |149 -49 -104| |Cb - 128| + * |B| |149 258 0| |Cr - 128| + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > right/down + * 2 4 \/ down/left + * + * Kernel pattern for upright display: + * 5 3 4 2 +-> right + * 1 7 0 6 | down + * 4 2 5 3 \/ + * 0 6 1 7 + * + * Kernel pattern for clockwise rotated display: + * 2 6 3 7 +-> down + * 4 0 5 1 | left + * 3 7 2 6 \/ + * 5 1 4 0 + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines_odither + .type lcd_write_yuv420_lines_odither, %function +lcd_write_yuv420_lines_odither: + @ r0 = yuv_src + @ r1 = width + @ r2 = strideS + @ r3 = x_screen + @ [sp] = y_screen + stmfd sp!, { r4-r11, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ + ldr r0, [sp, #36] @ Line up pattern and kernel quadrant + eor r14, r3, r0 @ + and r14, r14, #0x2 @ + mov r14, r14, lsl #6 @ 0x00 or 0x80 + @ + mov r0, #0x70000000 @ r0 = LCD1_BASE_ADDR = 0x70003000 + orr r0, r0, #0x3000 @ + @ + sub r2, r2, #1 @ Adjust stride because of increment +10: @ loop line @ + @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + eor r14, r14, #0x80 @ flip pattern quadrant + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49 + add r10, r10, r8, asl #5 @ + add r10, r10, r9, asl #3 @ + add r10, r10, r9, asl #5 @ + add r10, r10, r9, asl #6 @ + @ + mov r8, r8, asl #1 @ r8 = bu = Cb*258 + add r8, r8, r8, asl #7 @ + @ + add r9, r9, r9, asl #1 @ r9 = rv = Cr*408 + add r9, r9, r9, asl #4 @ + mov r9, r9, asl #3 @ + @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ +#if LCD_WIDTH >= LCD_HEIGHT + add r12, r14, #0x200 @ +#else + add r12, r14, #0x100 @ +#endif + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + and r11, r11, #0xf800 @ r11 = |00000000|00000000|rrrrrggg|gggbbbbb| + and r7, r7, #0x7e00 @ + orr r11, r11, r7, lsr #4 @ + orr r11, r11, r3, lsr #10 @ + mov r7, r11, lsr #8 @ r7 = |00000000|00000000|00000000|rrrrrggg| + @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD1_BUSY_MASK @ + bne 20b @ + strb r7, [r0, #0x10] @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD1_BUSY_MASK @ + bne 20b @ + strb r11, [r0, #0x10] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ +#if LCD_WIDTH >= LCD_HEIGHT + @ This element is zero - use r14 @ + @ + add r3, r3, r14 @ b = r3 + delta + add r11, r11, r14, lsl #1 @ r = r11 + delta*2 + add r7, r7, r14, lsr #1 @ g = r7 + delta/2 +#else + add r12, r14, #0x200 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 +#endif + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + and r11, r11, #0xf800 @ r11 = |00000000|00000000|rrrrrggg|gggbbbbb| + and r7, r7, #0x7e00 @ + orr r11, r11, r7, lsr #4 @ + orr r11, r11, r3, lsr #10 @ + mov r7, r11, lsr #8 @ r7 = |00000000|00000000|00000000|rrrrrggg| + @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD1_BUSY_MASK @ + bne 20b @ + strb r7, [r0, #0x10] @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD1_BUSY_MASK @ + bne 20b @ + strb r11, [r0, #0x10] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ +#if LCD_WIDTH >= LCD_HEIGHT + add r12, r14, #0x100 @ +#else + add r12, r14, #0x300 @ +#endif + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + and r11, r11, #0xf800 @ r11 = |00000000|00000000|rrrrrggg|gggbbbbb| + and r7, r7, #0x7e00 @ + orr r11, r11, r7, lsr #4 @ + orr r11, r11, r3, lsr #10 @ + mov r7, r11, lsr #8 @ r7 = |00000000|00000000|00000000|rrrrrggg| + @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD1_BUSY_MASK @ + bne 20b @ + strb r7, [r0, #0x10] @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD1_BUSY_MASK @ + bne 20b @ + strb r11, [r0, #0x10] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ +#if LCD_WIDTH >= LCD_HEIGHT + add r12, r14, #0x300 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 +#else + @ This element is zero - use r14 @ + @ + add r3, r3, r14 @ b = r3 + delta + add r11, r11, r14, lsl #1 @ r = r11 + delta*2 + add r7, r7, r14, lsr #1 @ g = r7 + delta/2 +#endif + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + and r11, r11, #0xf800 @ r11 = |00000000|00000000|rrrrrggg|gggbbbbb| + and r7, r7, #0x7e00 @ + orr r11, r11, r7, lsr #4 @ + orr r11, r11, r3, lsr #10 @ + mov r7, r11, lsr #8 @ r7 = |00000000|00000000|00000000|rrrrrggg| + @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD1_BUSY_MASK @ + bne 20b @ + strb r7, [r0, #0x10] @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD1_BUSY_MASK @ + bne 20b @ + strb r11, [r0, #0x10] @ + @ + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmpc regs=r4-r11 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither diff --git a/firmware/target/arm/philips/sa9200/lcd-sa9200.c b/firmware/target/arm/philips/sa9200/lcd-sa9200.c index c6c297e6ca..e30a298045 100644 --- a/firmware/target/arm/philips/sa9200/lcd-sa9200.c +++ b/firmware/target/arm/philips/sa9200/lcd-sa9200.c @@ -75,6 +75,9 @@ static void lcd_display_off(void); #define R_GATE_OUT_PERIOD_CTRL 0x71 #define R_SOFTWARE_RESET 0x72 +/* Display status */ +static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0; + /* wait for LCD */ static inline void lcd_wait_write(void) { @@ -404,6 +407,85 @@ void lcd_set_flip(bool yesno) lcd_write_reg(R_DRV_OUTPUT_CONTROL, flip ? 0x090c : 0x0a0c); } +void lcd_yuv_set_options(unsigned options) +{ + lcd_yuv_options = options; +} + +/* Performance function to blit a YUV bitmap directly to the LCD */ +void lcd_write_yuv420_lines(unsigned char const * const src[3], + int width, + int stride); +void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + int width, + int stride, + int x_screen, + int y_screen); +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + const unsigned char *yuv_src[3]; + const unsigned char *ysrc_max; + int options; + + if (!display_on) + return; + + width &= ~1; + height &= ~1; + + /* calculate the drawing region */ + lcd_write_reg(R_VERT_RAM_ADDR_POS, ((x + width - 1) << 8) | x); + + /* convert YUV coordinates to screen coordinates */ + y = LCD_WIDTH - 1 - y; + + /* 2px strip: cursor moves left, then down in gram */ + /* BGR=1, MDT1-0=00, I/D1-0=10, AM=0 */ + lcd_write_reg(R_ENTRY_MODE, 0x1020); + + yuv_src[0] = src[0] + src_y * stride + src_x; + yuv_src[1] = src[1] + (src_y * stride >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + ysrc_max = yuv_src[0] + height * stride; + + /* cache options setting */ + options = lcd_yuv_options; + + do + { + /* max horiz << 8 | start horiz */ + lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (y << 8) | (y - 1)); + + /* position cursor (set AD0-AD15) */ + lcd_write_reg(R_RAM_ADDR_SET, (x << 8) | y); + + /* start drawing */ + lcd_send_command(R_WRITE_DATA_2_GRAM); + + if (options & LCD_YUV_DITHER) + { + lcd_write_yuv420_lines_odither(yuv_src, width, stride, + y, x); + } + else + { + lcd_write_yuv420_lines(yuv_src, width, stride); + } + + y -= 2; /* move strip by "down" 2 px */ + yuv_src[0] += stride << 1; + yuv_src[1] += stride >> 1; + yuv_src[2] += stride >> 1; + } + while (yuv_src[0] < ysrc_max); + + /* back to normal right, then down cursor in gram */ + /* BGR=1, MDT1-0=00, I/D1-0=11, AM=0 */ + lcd_write_reg(R_ENTRY_MODE, 0x1030); +} + /* Update the display. This must be called after all other LCD functions that change the display. */ void lcd_update(void) diff --git a/firmware/target/arm/rk27xx/ihifi/lcd-ihifi.c b/firmware/target/arm/rk27xx/ihifi/lcd-ihifi.c index 9596dca98b..d5906b9dd5 100644 --- a/firmware/target/arm/rk27xx/ihifi/lcd-ihifi.c +++ b/firmware/target/arm/rk27xx/ihifi/lcd-ihifi.c @@ -207,3 +207,18 @@ bool lcd_active() { return display_on; } + +/* Blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + (void)src; + (void)src_x; + (void)src_y; + (void)stride; + (void)x; + (void)y; + (void)width; + (void)height; +} diff --git a/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770.c b/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770.c index cef0186de5..23505d9fa0 100644 --- a/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770.c +++ b/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770.c @@ -268,3 +268,18 @@ bool lcd_active() { return display_on; } + +/* Blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + (void)src; + (void)src_x; + (void)src_y; + (void)stride; + (void)x; + (void)y; + (void)width; + (void)height; +} diff --git a/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770c.c b/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770c.c index e2436e9b90..311b8057cb 100644 --- a/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770c.c +++ b/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770c.c @@ -231,3 +231,18 @@ bool lcd_active() { return display_on; } + +/* Blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + (void)src; + (void)src_x; + (void)src_y; + (void)stride; + (void)x; + (void)y; + (void)width; + (void)height; +} diff --git a/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi800.c b/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi800.c index 8520715650..821b52dcb6 100644 --- a/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi800.c +++ b/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi800.c @@ -211,3 +211,18 @@ bool lcd_active() { return display_on; } + +/* Blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + (void)src; + (void)src_x; + (void)src_y; + (void)stride; + (void)x; + (void)y; + (void)width; + (void)height; +} diff --git a/firmware/target/arm/rk27xx/lcd-hifiman.c b/firmware/target/arm/rk27xx/lcd-hifiman.c index 95486b02e4..bde1d3546f 100644 --- a/firmware/target/arm/rk27xx/lcd-hifiman.c +++ b/firmware/target/arm/rk27xx/lcd-hifiman.c @@ -350,3 +350,22 @@ bool lcd_active() { return display_on; } + +/* Blit a YUV bitmap directly to the LCD + * provided by generic fallback in lcd-16bit-common.c + */ +#if 0 +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + (void)src; + (void)src_x; + (void)src_y; + (void)stride; + (void)x; + (void)y; + (void)width; + (void)height; +} +#endif diff --git a/firmware/target/arm/rk27xx/ma/lcd-ma.c b/firmware/target/arm/rk27xx/ma/lcd-ma.c index 8dfe874b44..fa3ccc5aa0 100644 --- a/firmware/target/arm/rk27xx/ma/lcd-ma.c +++ b/firmware/target/arm/rk27xx/ma/lcd-ma.c @@ -253,3 +253,18 @@ bool lcd_active() { return display_on; } + +/* Blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + (void)src; + (void)src_x; + (void)src_y; + (void)stride; + (void)x; + (void)y; + (void)width; + (void)height; +} diff --git a/firmware/target/arm/rk27xx/rk27generic/lcd-rk27generic.c b/firmware/target/arm/rk27xx/rk27generic/lcd-rk27generic.c index e5cefd282d..b40f2860d7 100644 --- a/firmware/target/arm/rk27xx/rk27generic/lcd-rk27generic.c +++ b/firmware/target/arm/rk27xx/rk27generic/lcd-rk27generic.c @@ -178,3 +178,22 @@ void lcd_set_gram_area(int x_start, int y_start, lcd_cmd(GRAM_WRITE); LCDC_CTRL &= ~RGB24B; } + +/* Blit a YUV bitmap directly to the LCD + * provided by generic fallback in lcd-16bit-common.c + */ +#if 0 +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + (void)src; + (void)src_x; + (void)src_y; + (void)stride; + (void)x; + (void)y; + (void)width; + (void)height; +} +#endif diff --git a/firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S b/firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S index 50104a73e8..af338eef16 100644 --- a/firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S +++ b/firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S @@ -65,3 +65,231 @@ lcd_write_line: /* r2 = LCD_BASE */ bgt .loop ldmpc regs=r4-r6 + +/**************************************************************************** + * extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + * const unsigned LCD_BASE, + * int width, + * int stride); + * + * Conversion from Motion JPEG and MPEG Y'PbPr to RGB is: + * |R| |1.164 0.000 1.596| |Y' - 16| + * |G| = |1.164 -0.391 -0.813| |Pb - 128| + * |B| |1.164 2.018 0.000| |Pr - 128| + * + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + * + * Converts two lines from YUV to RGB565 and writes to LCD at once. First loop + * loads Cb/Cr, calculates the chroma offset and saves them to buffer. Within + * the second loop these chroma offset are reloaded from buffer. Within each + * loop two pixels are calculated and written to LCD. + */ + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function +lcd_write_yuv420_lines: + /* r0 = src = yuv_src */ + /* r1 = dst = LCD_BASE */ + /* r2 = width */ + /* r3 = stride */ + stmfd sp!, { r4-r10, lr } /* save non-scratch */ + ldmia r0, { r9, r10, r12 } /* r9 = yuv_src[0] = Y'_p */ + /* r10 = yuv_src[1] = Cb_p */ + /* r12 = yuv_src[2] = Cr_p */ + add r3, r9, r3 /* r3 = &ysrc[stride] */ + add r4, r2, r2, asr #1 /* chroma buffer lenght = width/2 *3 */ + mov r4, r4, asl #2 /* use words for str/ldm possibility */ + add r4, r4, #19 /* plus room for 4 additional words, */ + bic r4, r4, #3 /* rounded up to multiples of 4 byte */ + sub sp, sp, r4 /* and allocate on stack */ + stmia sp, {r1-r4} /* LCD_BASE, width, &ysrc[stride], stack_alloc */ + + mov r7, r2 /* r7 = loop count */ + add r8, sp, #16 /* chroma buffer */ + add lr, r1, #0x40 /* LCD data port = LCD_BASE + 0x40 */ + + /* 1st loop start */ +10: /* loop start */ + + ldrb r0, [r10], #1 /* r0 = *usrc++ = *Cb_p++ */ + ldrb r1, [r12], #1 /* r1 = *vsrc++ = *Cr_p++ */ + + sub r0, r0, #128 /* r0 = Cb-128 */ + sub r1, r1, #128 /* r1 = Cr-128 */ + + add r2, r1, r1, asl #1 /* r2 = Cr*51 + Cb*24 */ + add r2, r2, r2, asl #4 + add r2, r2, r0, asl #3 + add r2, r2, r0, asl #4 + + add r4, r1, r1, asl #2 /* r1 = Cr*101 */ + add r4, r4, r1, asl #5 + add r1, r4, r1, asl #6 + + add r1, r1, #256 /* r1 = rv = (r1 + 256) >> 9 */ + mov r1, r1, asr #9 + rsb r2, r2, #128 /* r2 = guv = (-r2 + 128) >> 8 */ + mov r2, r2, asr #8 + add r0, r0, #2 /* r0 = bu = (Cb*128 + 256) >> 9 */ + mov r0, r0, asr #2 + stmia r8!, {r0-r2} /* store r0, r1 and r2 to chroma buffer */ + + /* 1st loop, first pixel */ + ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ + sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ + add r3, r5, r5, asl #2 + add r5, r3, r5, asl #5 + + add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */ + + orr r5, r6, r4 /* check if clamping is needed... */ + orr r5, r5, r3, asr #1 /* ...at all */ + cmp r5, #31 + bls 15f /* -> no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r3, #63 /* clamp g */ + mvnhi r3, r3, asr #31 + andhi r3, r3, #63 + cmp r4, #31 /* clamp b */ + mvnhi r4, r4, asr #31 + andhi r4, r4, #31 +15: /* no clamp */ + + /* calculate pixel_1 and save to r4 for later pixel packing */ + orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */ + orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */ + + /* 1st loop, second pixel */ + ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ + sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ + add r3, r5, r5, asl #2 + add r5, r3, r5, asl #5 + + add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */ + + orr r0, r6, r5 /* check if clamping is needed... */ + orr r0, r0, r3, asr #1 /* ...at all */ + cmp r0, #31 + bls 15f /* -> no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r3, #63 /* clamp g */ + mvnhi r3, r3, asr #31 + andhi r3, r3, #63 + cmp r5, #31 /* clamp b */ + mvnhi r5, r5, asr #31 + andhi r5, r5, #31 +15: /* no clamp */ + + /* calculate pixel_2 and pack with pixel_1 before writing */ + orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ + orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */ +#ifdef FORCE_FIFO_WAIT + /* wait for FIFO half full */ +.fifo_wait1: + ldr r3, [lr, #-0x24] /* while (LCD_STATUS & 0x08); */ + tst r3, #0x8 + bgt .fifo_wait1 +#endif + stmia lr, {r4,r5} /* write pixel_1 and pixel_2 */ + + subs r7, r7, #2 /* check for loop end */ + bgt 10b /* back to beginning */ + /* 1st loop end */ + + /* Reload several registers for pointer rewinding for next loop */ + add r8, sp, #16 /* chroma buffer */ + ldmia sp, { r1, r7, r9} /* r1 = LCD_BASE */ + /* r7 = loop count */ + /* r9 = &ysrc[stride] */ + + /* 2nd loop start */ +20: /* loop start */ + /* restore r0 (bu), r1 (rv) and r2 (guv) from chroma buffer */ + ldmia r8!, {r0-r2} + + /* 2nd loop, first pixel */ + ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ + sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ + add r3, r5, r5, asl #2 + add r5, r3, r5, asl #5 + + add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */ + + orr r5, r6, r4 /* check if clamping is needed... */ + orr r5, r5, r3, asr #1 /* ...at all */ + cmp r5, #31 + bls 15f /* -> no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r3, #63 /* clamp g */ + mvnhi r3, r3, asr #31 + andhi r3, r3, #63 + cmp r4, #31 /* clamp b */ + mvnhi r4, r4, asr #31 + andhi r4, r4, #31 +15: /* no clamp */ + /* calculate pixel_1 and save to r4 for later pixel packing */ + orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */ + orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */ + + /* 2nd loop, second pixel */ + ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ + sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ + add r3, r5, r5, asl #2 + add r5, r3, r5, asl #5 + + add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */ + + orr r0, r6, r5 /* check if clamping is needed... */ + orr r0, r0, r3, asr #1 /* ...at all */ + cmp r0, #31 + bls 15f /* -> no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r3, #63 /* clamp g */ + mvnhi r3, r3, asr #31 + andhi r3, r3, #63 + cmp r5, #31 /* clamp b */ + mvnhi r5, r5, asr #31 + andhi r5, r5, #31 +15: /* no clamp */ + + /* calculate pixel_2 and pack with pixel_1 before writing */ + orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ + orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */ +#ifdef FORCE_FIFO_WAIT + /* wait for FIFO half full */ +.fifo_wait2: + ldr r3, [lr, #-0x24] /* while (LCD_STATUS & 0x08); */ + tst r3, #0x8 + bgt .fifo_wait2 +#endif + stmia lr, {r4,r5} /* write pixel_1 and pixel_2 */ + + subs r7, r7, #2 /* check for loop end */ + bgt 20b /* back to beginning */ + /* 2nd loop end */ + + ldr r3, [sp, #12] + add sp, sp, r3 /* deallocate buffer */ + ldmpc regs=r4-r10 /* restore registers */ + + .ltorg + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines diff --git a/firmware/target/arm/s5l8700/ipodnano2g/lcd-nano2g.c b/firmware/target/arm/s5l8700/ipodnano2g/lcd-nano2g.c index 2d630886c9..13e5c5c1d4 100644 --- a/firmware/target/arm/s5l8700/ipodnano2g/lcd-nano2g.c +++ b/firmware/target/arm/s5l8700/ipodnano2g/lcd-nano2g.c @@ -406,3 +406,36 @@ void lcd_update_rect(int x, int y, int width, int height) } while (--height > 0 ); } } + +/* Line write helper function for lcd_yuv_blit. Writes two lines of yuv420. */ +extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + const unsigned int lcd_baseadress, + int width, + int stride); + +/* Blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + unsigned int z; + unsigned char const * yuv_src[3]; + + width = (width + 1) & ~1; /* ensure width is even */ + + lcd_setup_drawing_region(x, y, width, height); + + z = stride * src_y; + yuv_src[0] = src[0] + z + src_x; + yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + + height >>= 1; + + do { + lcd_write_yuv420_lines(yuv_src, LCD_BASE, width, stride); + yuv_src[0] += stride << 1; + yuv_src[1] += stride >> 1; /* Skip down one chroma line */ + yuv_src[2] += stride >> 1; + } while (--height > 0); +} diff --git a/firmware/target/arm/s5l8700/meizu-m3/lcd-m3.c b/firmware/target/arm/s5l8700/meizu-m3/lcd-m3.c index ced8925999..5ed6c752b7 100644 --- a/firmware/target/arm/s5l8700/meizu-m3/lcd-m3.c +++ b/firmware/target/arm/s5l8700/meizu-m3/lcd-m3.c @@ -311,3 +311,11 @@ void lcd_update_rect(int x, int y, int width, int height) { lcd_update(); } + +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + /* stub */ +} + diff --git a/firmware/target/arm/s5l8700/meizu-m6sp/lcd-m6sp.c b/firmware/target/arm/s5l8700/meizu-m6sp/lcd-m6sp.c index bbbfccc11d..5e722d5a87 100644 --- a/firmware/target/arm/s5l8700/meizu-m6sp/lcd-m6sp.c +++ b/firmware/target/arm/s5l8700/meizu-m6sp/lcd-m6sp.c @@ -476,3 +476,19 @@ void lcd_update(void) { lcd_update_rect(0, 0, LCD_WIDTH, LCD_HEIGHT); } + +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + (void)src; + (void)src_x; + (void)src_y; + (void)stride; + (void)x; + (void)y; + (void)width; + (void)height; + /* TODO: not implemented yet */ +} + diff --git a/firmware/target/arm/s5l8702/ipod6g/lcd-6g.c b/firmware/target/arm/s5l8702/ipod6g/lcd-6g.c index e1406549f4..14647a5697 100644 --- a/firmware/target/arm/s5l8702/ipod6g/lcd-6g.c +++ b/firmware/target/arm/s5l8702/ipod6g/lcd-6g.c @@ -530,3 +530,49 @@ void lcd_update_rect(int x, int y, int width, int height) displaylcd_dma(pixels); } + +/* Line write helper function for lcd_yuv_blit. Writes two lines of yuv420. */ +extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + uint16_t* outbuf, + int width, + int stride); + +/* Blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) ICODE_ATTR; +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + unsigned int z; + unsigned char const * yuv_src[3]; + +#ifdef HAVE_LCD_SLEEP + if (!lcd_active()) return; +#endif + + width = (width + 1) & ~1; /* ensure width is even */ + + int pixels = width * height; + uint16_t* out = lcd_dblbuf[0]; + + z = stride * src_y; + yuv_src[0] = src[0] + z + src_x; + yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + + displaylcd_setup(x, y, width, height); + + height >>= 1; + + do { + lcd_write_yuv420_lines(yuv_src, out, width, stride); + yuv_src[0] += stride << 1; + yuv_src[1] += stride >> 1; /* Skip down one chroma line */ + yuv_src[2] += stride >> 1; + out += width << 1; + } while (--height); + + displaylcd_dma(pixels); +} diff --git a/firmware/target/arm/s5l8702/ipod6g/lcd-asm-6g.S b/firmware/target/arm/s5l8702/ipod6g/lcd-asm-6g.S new file mode 100644 index 0000000000..1ed7c4e189 --- /dev/null +++ b/firmware/target/arm/s5l8702/ipod6g/lcd-asm-6g.S @@ -0,0 +1,1013 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id: lcd-as-video.S 26756 2010-06-11 04:41:36Z funman $ + * + * Copyright (C) 2010 by Andree Buschmann + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +/* Version history: + * + * SVN: + * - initial SVN version. + * + * ARMv4: + * - use all available registers to calculate four pixels within each + * loop iteration. + * - avoid LDR interlocks. + * + * ARMv5TE: + * - use ARMv5TE+ 1-cycle multiply-accumulate instructions. + * + * ARMv5TE_WST: + * - use data tables (256 bytes) for RBG565 saturation. + * + * All versions are based on current SVN algorithm (round->scale->add) + * using the same coefficients, so output results are identical. + * + * TODO?: SVN coefficients are a very nice approximation for operations + * with shift+add instructions. When 16x16+32 MLA instructions are used, + * NBR and COEF_N could probably be adjusted to slighly increase accuracy. + */ +#define VERSION_SVN 0 +#define VERSION_ARMV4 1 +#define VERSION_ARMV5TE 2 +#define VERSION_ARMV5TE_WST 3 + +#define YUV2RGB_VERSION VERSION_ARMV5TE_WST + + +#define ASM +#include "config.h" +#include "cpu.h" + +#if (YUV2RGB_VERSION == VERSION_SVN) + .section .icode, "ax", %progbits + + +/**************************************************************************** + * extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + * uint16_t* out, + * int width, + * int stride); + * + * Conversion from Motion JPEG and MPEG Y'PbPr to RGB is: + * |R| |1.164 0.000 1.596| |Y' - 16| + * |G| = |1.164 -0.391 -0.813| |Pb - 128| + * |B| |1.164 2.018 0.000| |Pr - 128| + * + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + * + * Converts two lines from YUV to RGB565 and writes to LCD at once. First loop + * loads Cb/Cr, calculates the chroma offset and saves them to buffer. Within + * the second loop these chroma offset are reloaded from buffer. Within each + * loop two pixels are calculated and written to LCD. + */ + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function +lcd_write_yuv420_lines: + /* r0 = src = yuv_src */ + /* r1 = dst = out */ + /* r2 = width */ + /* r3 = stride */ + stmfd sp!, { r4-r10, lr } /* save non-scratch */ + ldmia r0, { r9, r10, r12 } /* r9 = yuv_src[0] = Y'_p */ + /* r10 = yuv_src[1] = Cb_p */ + /* r12 = yuv_src[2] = Cr_p */ + add r3, r9, r3 /* r3 = &ysrc[stride] */ + add r4, r2, r2, asr #1 /* chroma buffer lenght = width/2 *3 */ + mov r4, r4, asl #2 /* use words for str/ldm possibility */ + add r4, r4, #15 /* plus room for 3 additional words, */ + bic r4, r4, #3 /* rounded up to multiples of 4 byte */ + sub sp, sp, r4 /* and allocate on stack */ + stmia sp, {r2-r4} /* width, &ysrc[stride], stack_alloc */ + + mov r7, r2 /* r7 = loop count */ + add r8, sp, #12 /* chroma buffer */ + mov lr, r1 /* RGB565 data destination buffer */ + + /* 1st loop start */ +10: /* loop start */ + + ldrb r0, [r10], #1 /* r0 = *usrc++ = *Cb_p++ */ + ldrb r1, [r12], #1 /* r1 = *vsrc++ = *Cr_p++ */ + + sub r0, r0, #128 /* r0 = Cb-128 */ + sub r1, r1, #128 /* r1 = Cr-128 */ + + add r2, r1, r1, asl #1 /* r2 = Cr*51 + Cb*24 */ + add r2, r2, r2, asl #4 + add r2, r2, r0, asl #3 + add r2, r2, r0, asl #4 + + add r4, r1, r1, asl #2 /* r1 = Cr*101 */ + add r4, r4, r1, asl #5 + add r1, r4, r1, asl #6 + + add r1, r1, #256 /* r1 = rv = (r1 + 256) >> 9 */ + mov r1, r1, asr #9 + rsb r2, r2, #128 /* r2 = guv = (-r2 + 128) >> 8 */ + mov r2, r2, asr #8 + add r0, r0, #2 /* r0 = bu = (Cb*128 + 256) >> 9 */ + mov r0, r0, asr #2 + stmia r8!, {r0-r2} /* store r0, r1 and r2 to chroma buffer */ + + /* 1st loop, first pixel */ + ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ + sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ + add r3, r5, r5, asl #2 + add r5, r3, r5, asl #5 + + add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */ + + orr r5, r6, r4 /* check if clamping is needed... */ + orr r5, r5, r3, asr #1 /* ...at all */ + cmp r5, #31 + bls 15f /* -> no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r3, #63 /* clamp g */ + mvnhi r3, r3, asr #31 + andhi r3, r3, #63 + cmp r4, #31 /* clamp b */ + mvnhi r4, r4, asr #31 + andhi r4, r4, #31 +15: /* no clamp */ + + /* calculate pixel_1 and save to r4 for later pixel packing */ + orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */ + orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */ + + /* 1st loop, second pixel */ + ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ + sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ + add r3, r5, r5, asl #2 + add r5, r3, r5, asl #5 + + add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */ + + orr r0, r6, r5 /* check if clamping is needed... */ + orr r0, r0, r3, asr #1 /* ...at all */ + cmp r0, #31 + bls 15f /* -> no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r3, #63 /* clamp g */ + mvnhi r3, r3, asr #31 + andhi r3, r3, #63 + cmp r5, #31 /* clamp b */ + mvnhi r5, r5, asr #31 + andhi r5, r5, #31 +15: /* no clamp */ + + /* calculate pixel_2 and pack with pixel_1 before writing */ + orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ + orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */ + orr r4, r4, r5, lsl #16 + str r4, [lr], #4 /* write pixel_1 and pixel_2 */ + + subs r7, r7, #2 /* check for loop end */ + bgt 10b /* back to beginning */ + /* 1st loop end */ + + /* Reload several registers for pointer rewinding for next loop */ + add r8, sp, #12 /* chroma buffer */ + ldmia sp, {r7, r9} /* r7 = loop count */ + /* r9 = &ysrc[stride] */ + + /* 2nd loop start */ +20: /* loop start */ + /* restore r0 (bu), r1 (rv) and r2 (guv) from chroma buffer */ + ldmia r8!, {r0-r2} + + /* 2nd loop, first pixel */ + ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ + sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ + add r3, r5, r5, asl #2 + add r5, r3, r5, asl #5 + + add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */ + + orr r5, r6, r4 /* check if clamping is needed... */ + orr r5, r5, r3, asr #1 /* ...at all */ + cmp r5, #31 + bls 15f /* -> no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r3, #63 /* clamp g */ + mvnhi r3, r3, asr #31 + andhi r3, r3, #63 + cmp r4, #31 /* clamp b */ + mvnhi r4, r4, asr #31 + andhi r4, r4, #31 +15: /* no clamp */ + /* calculate pixel_1 and save to r4 for later pixel packing */ + orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */ + orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */ + + /* 2nd loop, second pixel */ + ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ + sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ + add r3, r5, r5, asl #2 + add r5, r3, r5, asl #5 + + add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */ + + orr r0, r6, r5 /* check if clamping is needed... */ + orr r0, r0, r3, asr #1 /* ...at all */ + cmp r0, #31 + bls 15f /* -> no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r3, #63 /* clamp g */ + mvnhi r3, r3, asr #31 + andhi r3, r3, #63 + cmp r5, #31 /* clamp b */ + mvnhi r5, r5, asr #31 + andhi r5, r5, #31 +15: /* no clamp */ + + /* calculate pixel_2 and pack with pixel_1 before writing */ + orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ + orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */ + orr r4, r4, r5, lsl #16 + str r4, [lr], #4 /* write pixel_1 and pixel_2 */ + + subs r7, r7, #2 /* check for loop end */ + bgt 20b /* back to beginning */ + /* 2nd loop end */ + + ldr r3, [sp, #8] + add sp, sp, r3 /* deallocate buffer */ + ldmpc regs=r4-r10 /* restore registers */ + + .ltorg + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines + + +#elif (YUV2RGB_VERSION == VERSION_ARMV4) +/**************************************************************************** + * extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + * uint16_t* out, + * int width, + * int stride); + * + * Conversion from Motion JPEG and MPEG Y'PbPr to RGB is: + * |R| |1.164 0.000 1.596| |Y' - 16| + * |G| = |1.164 -0.391 -0.813| |Pb - 128| + * |B| |1.164 2.018 0.000| |Pr - 128| + * + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + * + * Converts two lines from YUV420 to RGB565, within each iteration four + * pixels (2 per line) are calculated and written to destination buffer. + */ + .section .icode, "ax", %progbits + + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function + +lcd_write_yuv420_lines: + /* r0 = src = yuv_src */ + /* r1 = dst = out */ + /* r2 = width */ + /* r3 = stride */ + stmfd sp!, {r4-r11,lr} /* save non-scratch */ + ldmia r0, {r10-r12} /* r10 = yuv_src[0] = Y'_p */ + /* r11 = yuv_src[1] = Cb_p */ + /* r12 = yuv_src[2] = Cr_p */ + mov r9, r2, lsl #1 /* r9 = 2*width (loop count) */ + str r9, [sp, #-4]! /* [--sp] = 2*width (constant) */ + add r8, r10, r3 /* r8 = Y'_p + stride = Y'stride_p */ + mov lr, r1 /* RGB565 data destination buffer */ + +10: /* loop start */ + ldrb r0, [r11], #1 /* r0 = *Cb_p++ */ + ldrb r1, [r12], #1 /* r1 = *Cr_p++ */ + ldrb r3, [r8], #1 /* r3 = Y'3 */ + ldrb r4, [r8], #1 /* r4 = Y'4 */ + + sub r0, r0, #128 /* r0 = Cb-128 */ + sub r1, r1, #128 /* r1 = Cr-128 */ + + add r2, r1, r1, asl #1 /* r2 = Cr*51 + Cb*24 */ + add r2, r2, r2, asl #4 + add r2, r2, r0, asl #3 + add r2, r2, r0, asl #4 + + add r5, r1, r1, asl #2 /* r1 = Cr*101 */ + add r5, r5, r1, asl #5 + add r1, r5, r1, asl #6 + + add r1, r1, #256 /* r1 = rv = (r1 + 256) >> 9 */ + mov r1, r1, asr #9 + rsb r2, r2, #128 /* r2 = guv = (-r2 + 128) >> 8 */ + mov r2, r2, asr #8 + add r0, r0, #2 /* r0 = bu = (Cb*128 + 256) >> 9 */ + mov r0, r0, asr #2 + + /* pixel_3 */ + sub r3, r3, #16 /* r3 = (Y'-16) * (74/2) */ + add r7, r3, r3, asl #2 + add r3, r7, r3, asl #5 + + add r6, r1, r3, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r7, r2, r3, asr #7 /* r7 = g = (Y >> 8) + guv */ + add r5, r0, r3, asr #8 /* r5 = b = (Y >> 9) + bu */ + + orr r3, r6, r5 /* check if clamping is needed... */ + orr r3, r3, r7, asr #1 /* ...at all */ + cmp r3, #31 + bls 15f /* no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r7, #63 /* clamp g */ + mvnhi r7, r7, asr #31 + andhi r7, r7, #63 + cmp r5, #31 /* clamp b */ + mvnhi r5, r5, asr #31 + andhi r5, r5, #31 +15: /* no clamp */ + + /* calculate pixel_3 and save to r5 for later pixel packing */ + orr r5, r5, r7, lsl #5 /* pixel_3 = r<<11 | g<<5 | b */ + orr r5, r5, r6, lsl #11 /* r5 = pixel_3 */ + + /* pixel_4 */ + sub r4, r4, #16 /* r4 = (Y'-16) * (74/2) */ + add r7, r4, r4, asl #2 + add r4, r7, r4, asl #5 + + add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r7, r2, r4, asr #7 /* r7 = g = (Y >> 8) + guv */ + add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */ + + orr r3, r6, r4 /* check if clamping is needed... */ + orr r3, r3, r7, asr #1 /* ...at all */ + cmp r3, #31 + bls 15f /* no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r7, #63 /* clamp g */ + mvnhi r7, r7, asr #31 + andhi r7, r7, #63 + cmp r4, #31 /* clamp b */ + mvnhi r4, r4, asr #31 + andhi r4, r4, #31 +15: /* no clamp */ + + /* calculate pixel_4 and pack with pixel_3 before writing */ + orr r4, r4, r7, lsl #5 /* pixel_4 = r<<11 | g<<5 | b */ + orr r4, r4, r6, lsl #11 /* r4 = pixel_4 */ + orr r5, r5, r4, lsl #16 /* r5 = pixel_4<<16 | pixel_3 */ + + ldr r7, [sp] /* r7 = 2*width */ + ldrb r3, [r10], #1 /* r3 = Y'1 */ + ldrb r4, [r10], #1 /* r4 = Y'2 */ + + str r5, [lr, r7] /* write pixel_3 and pixel_4 */ + + /* pixel_1 */ + sub r3, r3, #16 /* r3 = (Y'-16) * (74/2) */ + add r7, r3, r3, asl #2 + add r3, r7, r3, asl #5 + + add r6, r1, r3, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r7, r2, r3, asr #7 /* r7 = g = (Y >> 8) + guv */ + add r5, r0, r3, asr #8 /* r5 = b = (Y >> 9) + bu */ + + orr r3, r6, r5 /* check if clamping is needed... */ + orr r3, r3, r7, asr #1 /* ...at all */ + cmp r3, #31 + bls 15f /* no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r7, #63 /* clamp g */ + mvnhi r7, r7, asr #31 + andhi r7, r7, #63 + cmp r5, #31 /* clamp b */ + mvnhi r5, r5, asr #31 + andhi r5, r5, #31 +15: /* no clamp */ + + /* calculate pixel_1 and save to r5 for later pixel packing */ + orr r5, r5, r7, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */ + orr r5, r5, r6, lsl #11 /* r5 = pixel_1 */ + + /* pixel_2 */ + sub r4, r4, #16 /* r4 = (Y'-16) * (74/2) */ + add r7, r4, r4, asl #2 + add r4, r7, r4, asl #5 + + add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r7, r2, r4, asr #7 /* r7 = g = (Y >> 8) + guv */ + add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */ + + orr r3, r6, r4 /* check if clamping is needed... */ + orr r3, r3, r7, asr #1 /* ...at all */ + cmp r3, #31 + bls 15f /* no clamp */ + cmp r6, #31 /* clamp r */ + mvnhi r6, r6, asr #31 + andhi r6, r6, #31 + cmp r7, #63 /* clamp g */ + mvnhi r7, r7, asr #31 + andhi r7, r7, #63 + cmp r4, #31 /* clamp b */ + mvnhi r4, r4, asr #31 + andhi r4, r4, #31 +15: /* no clamp */ + + /* calculate pixel_2 and pack with pixel_1 before writing */ + orr r4, r4, r7, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ + orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */ + orr r5, r5, r4, lsl #16 /* r5 = pixel_2<<16 | pixel_1 */ + + str r5, [lr], #4 /* write pixel_1 and pixel_2 */ + + subs r9, r9, #4 /* check for loop end */ + bgt 10b /* back to beginning */ + + /* loop end */ + add sp, sp, #4 /* deallocate stack */ + ldmpc regs=r4-r11 /* restore registers */ + + .ltorg + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines + + +#elif (YUV2RGB_VERSION == VERSION_ARMV5TE) +/**************************************************************************** + * How do I encode Y'CBCR components from R'G'B' in [0, +1]? (see ColorFAQ) + * |R| |0.00456621 0 0.00625893| |Y' - 16| + * |G| = |0.00456621 -0.00153632 -0.00318811| |Pb - 128| + * |B| |0.00456621 0.00791071 0 | |Pr - 128| + * + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + */ +#define NBR 14 /* 14-bit resolution (SVN) */ +#define COEF_C0 74 +#define COEF_C1 101 +#define COEF_C2 -24 +#define COEF_C3 -51 +#define COEF_C4 128 +#define C4_IS_POW2 + +/* constant for rounding a NBR number before down-scaling it to RS bits */ +#define ROUND(RS) (1 << (NBR - RS - 1)) + +/* packed 16-bit coefficients */ +#define COEF_C4_C1 ((COEF_C4 << 16) | (COEF_C1 & 0xffff)) +#define COEF_2C3_2C2 ((COEF_C3 << 17) | ((COEF_C2 << 1) & 0xffff)) +/* 32-bit MLA constants */ +#define CONST_MLA_Y (-16 * COEF_C0) + +/**************************************************************************** + * extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + * uint16_t* out, + * int width, + * int stride); + * + * Converts two lines from YUV420 to RGB565, within each iteration four + * pixels (2 per line) are calculated and written to destination buffer. + * + * - use ARMv5TE+ 1-cycle multiply+accumulator instructions. + */ + .section .icode, "ax", %progbits + + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function + +lcd_write_yuv420_lines: + @ r0 = src = yuv_src + @ r1 = out = dst_p + @ r2 = width + @ r3 = stride + stmfd sp!, {r4-r11,lr} @ save non-scratch + ldmia r0, {r10-r12} @ r10 = yuv_src[0] = Y'_p + @ r11 = yuv_src[1] = Cb_p + @ r12 = yuv_src[2] = Cr_p + adr r0, const_data @ load constants + ldmia r0, {r5-r8} @ r5 = COEF_C4_C1 + @ r6 = COEF_2C3_2C2 + @ r7 = COEF_C0 + @ r8 = CONST_MLA_Y + sub r4, r12, r11 @ r4 = Cr_p-Cb_p + mov r9, r2, asl #1 @ r9 = 2*width + stmfd sp!, {r4-r6,r9} @ SP -> Cr_p-Cb_p + @ COEF_C4_C1 + @ COEF_2C3_2C2 + @ 2*width + add r12, r10, r3 @ r12 = Y'_p + stride = Y'stride_p + mov lr, r1 @ RGB565 data destination buffer + orr r9, r7, r2, lsl #15 @ loop_count = width/2; + @ r9 = loop_count<<16 | COEF_C0 + sub r9, r9, #0x10000 @ loop_count-- + +10: @ loop_start + + @ register usage: + @ r8 = CONST_MLA_Y + @ r9 = loop count<<16 | COEF_C0 + @ r10 = Y'_p + @ r11 = Cb_p + @ r12 = Y'stride_p + @ lr = dst_p + @ free: r0-r7 + + ldmia sp, {r2-r4} @ r2 = Cr_p-Cb_p + @ r3 = COEF_C4_C1 + @ r4 = COEF_2C3_2C2 + mov r5, #ROUND(5) @ r5 = round constant + + ldrb r6, [r12], #1 @ r6 = Y'3 + ldrb r7, [r12], #1 @ r7 = Y'4 + + ldrb r1, [r11, r2] @ r1 = Cr = *Cr_p++ + ldrb r0, [r11], #1 @ r0 = Cb = *Cb_p++ + + /* calculate Y3 and Y4 */ + smlabb r6, r6, r9, r8 @ r6 = Y3 = C0*Y'3 - C0*16 + smlabb r7, r7, r9, r8 @ r7 = Y4 = C0*Y'4 - C0*16 + + /* calculate rv, guv, bu */ + sub r1, r1, #128 @ r1 = Cr" = Cr-128 + sub r0, r0, #128 @ r0 = Cb" = Cb-128 + + smlabt r2, r1, r4, r5 @ r2 = guv" = Cr"*(2*C2) + + smlabb r2, r0, r4, r2 @ Cb"*(2*C3) + round + smlabb r1, r1, r3, r5 @ r1 = rv" = Cr"*C1 + round + #ifdef C4_IS_POW2 + add r0, r5, r0, asl #NBR-7 @ r0 = bu" = Cb"*C4 + round + #else + smlabt r0, r0, r3, r5 @ r0 = bu" = Cb"*C4 + round + #endif + + /* scale rv",guv",bu" */ + mov r2, r2, asr #NBR-5 @ r2 = guv = guv" >> scale + mov r1, r1, asr #NBR-5 @ r1 = rv = rv" >> scale + mov r0, r0, asr #NBR-5 @ r0 = bu = bu" >> scale + + @ register usage: + @ r8-r12,lr: pointers, counters + @ r0,r1,r2 = bu,rv,guv (rounded and scaled to RGB565) + @ r6,r7 = Y'3,Y'4 + @ free: r3-r5 + + /* pixel_3 */ + add r5, r1, r6, asr #NBR-5 @ r5 = r = (Y3 >> scale) + rv + add r4, r2, r6, asr #NBR-6 @ r4 = g = (Y3 >> scale) + guv + add r3, r0, r6, asr #NBR-5 @ r3 = b = (Y3 >> scale) + bu + + orr r6, r5, r3 @ check if clamping is needed... + orr r6, r6, r4, asr #1 @ ...at all + cmp r6, #31 + bls 15f @ no clamp + cmp r5, #31 @ clamp r + mvnhi r5, r5, asr #31 + andhi r5, r5, #31 + cmp r4, #63 @ clamp g + mvnhi r4, r4, asr #31 + andhi r4, r4, #63 + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 + andhi r3, r3, #31 +15: @ no clamp + + /* calculate pixel_3 and save to r3 for later pixel packing */ + orr r3, r3, r4, lsl #5 @ r3 = pixel_3 = r<<11 | g<<5 | b + orr r3, r3, r5, lsl #11 + + /* pixel_4 */ + add r5, r1, r7, asr #NBR-5 @ r5 = r = (Y4 >> scale) + rv + add r4, r2, r7, asr #NBR-6 @ r4 = g = (Y4 >> scale) + guv + add r7, r0, r7, asr #NBR-5 @ r7 = b = (Y4 >> scale) + bu + + orr r6, r5, r7 @ check if clamping is needed... + orr r6, r6, r4, asr #1 @ ...at all + cmp r6, #31 + bls 15f @ no clamp + cmp r5, #31 @ clamp r + mvnhi r5, r5, asr #31 + andhi r5, r5, #31 + cmp r4, #63 @ clamp g + mvnhi r4, r4, asr #31 + andhi r4, r4, #63 + cmp r7, #31 @ clamp b + mvnhi r7, r7, asr #31 + andhi r7, r7, #31 +15: @ no clamp + + /* calculate pixel_4 and pack with pixel_3 before writing */ + orr r7, r7, r4, lsl #5 @ r7 = pixel_4 = r<<11 | g<<5 | b + orr r7, r7, r5, lsl #11 + orr r3, r3, r7, lsl #16 @ r3 = pixel_4<<16 | pixel_3 + + /* avoid interlocks when writing pixel_3 and pixel_4 */ + ldr r5, [sp, #12] @ r5 = 2*width + + ldrb r6, [r10], #1 @ r6 = Y'1 + ldrb r7, [r10], #1 @ r7 = Y'2 + + /* write pixel_3 and pixel_4 */ + str r3, [lr, r5] @ [dst_p + 2*width] = r3 + + @ register usage: + @ r8-r12,lr: pointers, counters + @ r0,r1,r2 = bu,rv,guv (rounded and scaled to RGB565) + @ r6,r7 = Y'1,Y'2 + @ free: r3-r5 + + /* calculate Y1 and Y2 */ + smlabb r6, r6, r9, r8 @ r6 = Y1 = C0*Y'1 - C0*16 + smlabb r7, r7, r9, r8 @ r7 = Y2 = C0*Y'2 - C0*16 + + /* pixel_1 */ + add r5, r1, r6, asr #NBR-5 @ r5 = r = (Y1 >> scale) + rv + add r4, r2, r6, asr #NBR-6 @ r4 = g = (Y1 >> scale) + guv + add r3, r0, r6, asr #NBR-5 @ r3 = b = (Y1 >> scale) + bu + + orr r6, r5, r3 @ check if clamping is needed... + orr r6, r6, r4, asr #1 @ ...at all + cmp r6, #31 + bls 15f @ no clamp + cmp r5, #31 @ clamp r + mvnhi r5, r5, asr #31 + andhi r5, r5, #31 + cmp r4, #63 @ clamp g + mvnhi r4, r4, asr #31 + andhi r4, r4, #63 + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 + andhi r3, r3, #31 +15: @ no clamp + + /* calculate pixel_1 and save to r3 for later pixel packing */ + orr r3, r3, r4, lsl #5 @ r3 = pixel_1 = r<<11 | g<<5 | b + orr r3, r3, r5, lsl #11 + + /* pixel_2 */ + add r5, r1, r7, asr #NBR-5 @ r5 = r = (Y2 >> scale) + rv + add r4, r2, r7, asr #NBR-6 @ r4 = g = (Y2 >> scale) + guv + add r7, r0, r7, asr #NBR-5 @ r7 = b = (Y2 >> scale) + bu + + orr r6, r5, r7 @ check if clamping is needed... + orr r6, r6, r4, asr #1 @ ...at all + cmp r6, #31 + bls 15f @ no clamp + cmp r5, #31 @ clamp r + mvnhi r5, r5, asr #31 + andhi r5, r5, #31 + cmp r4, #63 @ clamp g + mvnhi r4, r4, asr #31 + andhi r4, r4, #63 + cmp r7, #31 @ clamp b + mvnhi r7, r7, asr #31 + andhi r7, r7, #31 +15: @ no clamp + + /* calculate pixel_2 and pack with pixel_1 before writing */ + orr r7, r7, r4, lsl #5 @ r7 = pixel_2 = r<<11 | g<<5 | b + orr r7, r7, r5, lsl #11 + orr r3, r3, r7, lsl #16 @ r3 = pixel_2 << 16 | pixel_1 + + str r3, [lr], #4 @ write pixel_1 and pixel_2 + + /* check for loop end */ + subs r9, r9, #0x10000 @ loop_count-- + bge 10b @ back to beginning + + /* bye */ + add sp, sp, #16 + ldmpc regs=r4-r11 @ restore registers + + .ltorg + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines + +/* data */ + .align 2 +const_data: + .word COEF_C4_C1 + .word COEF_2C3_2C2 + .word COEF_C0 + .word CONST_MLA_Y + + .size const_data, .-const_data + + +#else /* YUV2RGB_VERSION == VERSION_ARMV5TE_WST */ +/**************************************************************************** + * How do I encode Y'CBCR components from R'G'B' in [0, +1]? (see ColorFAQ) + * |R| |0.00456621 0 0.00625893| |Y' - 16| + * |G| = |0.00456621 -0.00153632 -0.00318811| |Pb - 128| + * |B| |0.00456621 0.00791071 0 | |Pr - 128| + * + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + */ +#define NBR 14 /* 14-bit resolution (SVN) */ +#define COEF_C0 74 +#define COEF_C1 101 +#define COEF_C2 -24 +#define COEF_C3 -51 +#define COEF_C4 128 +#define C4_IS_POW2 + +/* packed 16-bit coefficients */ +#define COEF_C4_C1 ((COEF_C4 << 16) | (COEF_C1 & 0xffff)) +#define COEF_C3_C2 ((COEF_C3 << 16) | (COEF_C2 & 0xffff)) + +/* constant for rounding an NBR number before down-scaling it to RS bits */ +#define ROUND(RS) (1 << (NBR - RS - 1)) + +/* 32-bit MLA constants */ +#define CONST_MLA_Y (-16 * COEF_C0) +#define CONST_MLA_RV ((-128 * COEF_C1) + ROUND(5)) +#define CONST_MLA_BU ((-128 * COEF_C4) + ROUND(5)) +/* trick to save the register needed for table_sat6 reference: + add table_sat6-table_sat5 offset (conveniently scaled) to guv MLA */ +#define CONST_MLA_GUV (-128 * (COEF_C2 + COEF_C3) + ROUND(6) + \ + ((table_sat6 - table_sat5) << (NBR - 6))) + +/**************************************************************************** + * extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + * uint16_t* out, + * int width, + * int stride); + * + * Converts two lines from YUV420 to RGB565, within each iteration four + * pixels (2 per line) are calculated and written to destination buffer. + * + * - use ARMv5TE+ 1-cycle multiply+accumulator instructions. + * - use data tables (256 bytes) for RBG565 saturation. + */ + .section .icode, "ax", %progbits + + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function + +lcd_write_yuv420_lines: + @ r0 = src = yuv_src + @ r1 = out = dst1_p + @ r2 = width + @ r3 = stride + stmfd sp!, {r4-r11,lr} @ save non-scratch + ldmia r0, {r10-r12} @ r10 = yuv_src[0] = Y'_p + @ r11 = yuv_src[1] = Cb_p + @ r12 = yuv_src[2] = Cr_p + /* prepare data and fill stack */ + adr r0, const_data @ load constants + ldmia r0, {r4-r9,lr} @ r4 = COEF_C0 + @ r5 = CONST_MLA_GUV + @ r6 = COEF_C3_C2 + @ r7 = CONST_MLA_BU + @ r8 = COEF_C4_C1 + @ r9 = CONST_MLA_RV + @ lr = table_sat5 + sub r0, r12, r11 @ r0 = Cr_p-Cb_p + #define STACK_SZ 28 + stmfd sp!, {r0,r5-r9,lr} @ SP -> Cr_p-Cb_p + @ CONST_MLA_GUV + @ COEF_C3_C2 + @ CONST_MLA_BU + @ COEF_C4_C1 + @ CONST_MLA_RV + @ table_sat5 + mov r8, r4, lsl #4 @ + rsb r8, #0 @ r8 = -16*COEF_C0 = CONST_MLA_Y + mov lr, r1 @ RGB565 data destination buffer + add r9, lr, r2, asl #1 @ r9 = out + 2*width = dst2_p + add r12, r3, r10 @ r12 = Y'_p + stride + orr r7, r4, r2, lsl #15 @ loop_count = width/2; + @ r7 = loop_count<<16 | COEF_C0 + sub r7, r7, #0x10000 @ loop_count-- + + /* align loop code to minimize occupied lines, execution + time per loop is optimized ~10% on ARM926EJ-S */ + .align CACHEALIGN_BITS +loop_start: + + @ register usage: + @ r7 = loop count<<16 | COEF_C0 + @ r8 = CONST_MLA_Y + @ r9 = dst2_p + @ r10 = Y'_p + @ r11 = Cb_p + @ r12 = Y'stride_p + @ lr = dst1_p + @ free: r0-r6 + + /* load constants from stack */ + ldmia sp, {r1-r3,r6} @ r1 = Cr_p-Cb_p + @ r2 = CONST_MLA_GUV + @ r3 = COEF_C3_C2 + @ r6 = CONST_MLA_BU + + /* read Cr", Cb" */ + ldrb r1, [r11, r1] @ r1 = Cr = *Cr_p++ + ldrb r0, [r11], #1 @ r0 = Cb = *Cb_p++ + + /* load more constants (avoids r1 interlock) */ + ldrd r4, [sp, #16] @ r4 = COEF_C4_C1 + @ r5 = CONST_MLA_RV + + /* calculate rv", guv", bu" */ + smlabt r2, r1, r3, r2 @ r2 = guv" = Cr*C2 + Cb*C3 + smlabb r2, r0, r3, r2 @ + CONST_MLA_GUV + smlabb r1, r1, r4, r5 @ r1 = rv" = Cr*C1 + CONST_MLA_RV + #ifdef C4_IS_POW2 + add r0, r6, r0, asl #NBR-7 @ r0 = bu" = Cb*C4 + CONST_MLA_BU + #else + smlabt r0, r0, r4, r6 @ r0 = bu" = Cb*C4 + CONST_MLA_BU + #endif + + ldr r4, [sp, #STACK_SZ-4] @ r4 = table_sat5 + + /* read Y'1 and Y'2 */ + ldrb r5, [r10], #1 @ r5 = Y'1 = *Y'_p++ + ldrb r6, [r10], #1 @ r6 = Y'2 = *Y'_p++ + + /* scale rv",guv",bu", adding sat5_p here saves instructions later */ + add r1, r4, r1, asr #NBR-5 @ r1 = rv' = sat5_p + rv">>scale + add r2, r4, r2, asr #NBR-6 @ r2 = guv' = sat5_p + guv">>scale + add r0, r4, r0, asr #NBR-5 @ r0 = bu' = sat5_p + bu">>scale + + @ register usage: + @ r7-r12,lr: pointers, counters, tables + @ r0,r1,r2 = (bu,rv,guv) rounded and RGB565 scaled + @ r5,r6 = Y'1,Y'2 + @ free: r3,r4 + + /* calculate Y1 and Y2 */ + smlabb r5, r5, r7, r8 @ r5 = Y1 = C0*Y'1 - 16*C0 + smlabb r6, r6, r7, r8 @ r6 = Y2 = C0*Y'2 - 16*C0 + + /* pixel_1 */ + ldrb r3, [r0, r5, asr #NBR-5] @ r3 = b = sat5[Y1>>scale + bu'] + ldrb r4, [r2, r5, asr #NBR-6] @ r4 = g = sat6[Y1>>scale + guv'] + ldrb r5, [r1, r5, asr #NBR-5] @ r5 = r = sat5[Y1>>scale + rv'] + + /* calculate pixel_1 */ + orr r3, r3, r4, lsl #5 @ r3 = pixel_1 = g<<5 | b + + /* pixel_2 (avoid r5 interlock) */ + ldrb r4, [r0, r6, asr #NBR-5] @ r4 = b = sat5[Y2>>scale + bu'] + + /* calculate pixel_1 and save to r3 for later pixel packing */ + orr r3, r3, r5, lsl #11 @ r3 = pixel_1 = r<<11 | g<<5 | b + + /* pixel_2 */ + ldrb r5, [r2, r6, asr #NBR-6] @ r5 = g = sat6[Y2>>scale + guv'] + ldrb r6, [r1, r6, asr #NBR-5] @ r6 = r = sat5[Y2>>scale + rv'] + + /* calculate pixel_2 and pack with pixel_1 before writing */ + orr r3, r3, r4, lsl #16 @ r3 = pixel_2<<16 | pixel_1 + orr r3, r3, r5, lsl #21 + orr r3, r3, r6, lsl #27 + + /* read Y'3 and Y'4 */ + ldrb r5, [r12], #1 @ r5 = Y'3 = *Y'stride_p++ + ldrb r6, [r12], #1 @ r6 = Y'4 = *Y'stride_p++ + + /* write pixel_1 and pixel_2 */ + str r3, [lr], #4 @ *dst2_p++ = r3 + + @ register usage: + @ r7-r12,lr: pointers, counters, tables + @ r0,r1,r2 = (bu,rv,guv) rounded and RGB565 scaled + @ r5,r6 = Y'3,Y'4 + @ free: r3,r4 + + /* calculate Y3 and Y4 */ + smlabb r5, r5, r7, r8 @ r5 = Y3 = C0*Y'3 - 16*C0 + smlabb r6, r6, r7, r8 @ r6 = Y4 = C0*Y'4 - 16*C0 + + /* pixel_3 */ + ldrb r3, [r0, r5, asr #NBR-5] @ r3 = b = sat5[Y3>>scale + bu'] + ldrb r4, [r2, r5, asr #NBR-6] @ r4 = g = sat6[Y3>>scale + guv'] + ldrb r5, [r1, r5, asr #NBR-5] @ r5 = r = sat5[Y3>>scale + rv'] + + /* calculate pixel_3 */ + orr r3, r3, r4, lsl #5 @ r3 = pixel_3 = g<<5 | b + + /* pixel_4 (avoid r5 interlock) */ + ldrb r4, [r0, r6, asr #NBR-5] @ r4 = b = sat5[Y4>>scale + bu'] + + /* calculate pixel_3 and save to r3 for later pixel packing */ + orr r3, r3, r5, lsl #11 @ r3 = pixel_3 = r<<11 | g<<5 | b + + /* pixel_4 */ + ldrb r5, [r2, r6, asr #NBR-6] @ r5 = g = sat6[Y4>>scale + guv'] + ldrb r6, [r1, r6, asr #NBR-5] @ r6 = r = sat5[Y4>>scale + rv'] + + /* calculate pixel_4 and pack with pixel_3 before writing */ + orr r3, r3, r4, lsl #16 @ r3 = pixel_4 << 16 | pixel_3 + orr r3, r3, r5, lsl #21 + orr r3, r3, r6, lsl #27 + + /* write pixel_3 and pixel_4 */ + str r3, [r9], #4 @ *dst1_p++ = r3 + + /* check for loop end */ + subs r7, r7, #0x10000 @ loop_count-- + bge loop_start @ back to beginning + + /* bye */ + add sp, sp, #STACK_SZ @ deallocate stack + ldmpc regs=r4-r11 @ restore registers + + .ltorg + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines + +/* data */ + .align 2 +const_data: + .word COEF_C0 + .word CONST_MLA_GUV + .word COEF_C3_C2 + .word CONST_MLA_BU + .word COEF_C4_C1 + .word CONST_MLA_RV + .word table_sat5 + + .size const_data, .-const_data + +/* saturation tables */ + /*.section .data*/ + /* aligned to cache line size to minimize cache usage */ + .align CACHEALIGN_BITS + +saturation_tables: + /* 5-bit saturation table [-36..0..+67], size=104 */ + /* table_sat5[-36..-1] */ + .byte 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + table_sat5: + /* table_sat5[0..67] */ + .byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + .byte 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 + .byte 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 + .byte 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 + .byte 31, 31, 31, 31 + + /* 6-bit saturation table [-44..0..+107], size=152 */ + /* table_sat6[-44..-1] */ + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + table_sat6: + /* table_sat6[0..107] */ + .byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + .byte 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 + .byte 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47 + .byte 48, 49, 50, 51, 52, 53 ,54, 55, 56, 57, 58, 59, 60, 61, 62, 63 + .byte 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 + .byte 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 + .byte 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 + + .size saturation_tables, .-saturation_tables +#endif /* YUV2RGB_VERSION */ diff --git a/firmware/target/arm/samsung/yh820/lcd-as-yh820.S b/firmware/target/arm/samsung/yh820/lcd-as-yh820.S new file mode 100644 index 0000000000..542ceeeb36 --- /dev/null +++ b/firmware/target/arm/samsung/yh820/lcd-as-yh820.S @@ -0,0 +1,550 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2007 by Jens Arnold + * Heavily based on lcd-as-memframe.c by Michael Sevakis + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "cpu.h" + +/**************************************************************************** + * void lcd_write_yuv420_lines(unsigned char const * const src[3], + * int width, + * int stride); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function +lcd_write_yuv420_lines: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + stmfd sp!, { r4-r10, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ r0 = scratch + sub r2, r2, #1 @ + mov r3, #0x70000000 @ + orr r3, r3, #0x3000 @ r3 = LCD1_BASE +10: @ loop line @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right + add r7, r12, r7, asl #5 @ by one less when adding - same for all + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24 + add r10, r10, r10, asl #4 @ + add r10, r10, r8, asl #3 @ + add r10, r10, r8, asl #4 @ + @ + add lr, r9, r9, asl #2 @ r9 = Cr*101 + add lr, lr, r9, asl #5 @ + add r9, lr, r9, asl #6 @ + @ + add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8 + mov r8, r8, asr #2 @ + add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9 + mov r9, r9, asr #9 @ + rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8 + mov r10, r10, asr #8 @ + @ compute R, G, and B + add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu + add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r0, lr @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r0, #31 @ clamp b + mvnhi r0, r0, asr #31 @ + andhi r0, r0, #31 @ + cmp lr, #31 @ clamp r + mvnhi lr, lr, asr #31 @ + andhi lr, lr, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + mov lr, lr, lsl #3 @ + orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3) + orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str lr, [r3, #0x10] @ send MSB +1: @busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r0, [r3, #0x10] @ send LSB + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu + add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r0, lr @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r0, #31 @ clamp b + mvnhi r0, r0, asr #31 @ + andhi r0, r0, #31 @ + cmp lr, #31 @ clamp r + mvnhi lr, lr, asr #31 @ + andhi lr, lr, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + mov lr, lr, lsl #3 @ + orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3) + orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str lr, [r3, #0x10] @ send MSB +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r0, [r3, #0x10] @ send LSB + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu + add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r0, lr @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r0, #31 @ clamp b + mvnhi r0, r0, asr #31 @ + andhi r0, r0, #31 @ + cmp lr, #31 @ clamp r + mvnhi lr, lr, asr #31 @ + andhi lr, lr, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + @ + mov lr, lr, lsl #3 @ + orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3) + orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str lr, [r3, #0x10] @ send MSB +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r0, [r3, #0x10] @ send LSB + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu + add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r0, lr @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r0, #31 @ clamp b + mvnhi r0, r0, asr #31 @ + andhi r0, r0, #31 @ + cmp lr, #31 @ clamp r + mvnhi lr, lr, asr #31 @ + andhi lr, lr, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + mov lr, lr, lsl #3 @ + orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3) + orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str lr, [r3, #0x10] @ send MSB +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r0, [r3, #0x10] @ send LSB + @ + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmpc regs=r4-r10 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines + +/**************************************************************************** + * void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + * int width, + * int stride, + * int x_screen, + * int y_screen); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Red scaled at twice g & b but at same precision to place it in correct + * bit position after multiply and leave instruction count lower. + * |R| |258 0 408| |Y' - 16| + * |G| = |149 -49 -104| |Cb - 128| + * |B| |149 258 0| |Cr - 128| + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + * + * Kernel pattern (raw|rotated|use order): + * 5 3 4 2 2 6 3 7 row0 row2 > down + * 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0 left + * 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2 \/ + * 0 6 1 7 5 1 4 0 + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines_odither + .type lcd_write_yuv420_lines_odither, %function +lcd_write_yuv420_lines_odither: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + @ r3 = x_screen + @ [sp] = y_screen + stmfd sp!, { r4-r11, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ + sub r2, r2, #1 @ + ldr r14, [sp, #36] @ Line up pattern and kernel quadrant + eor r14, r14, r3 @ + and r14, r14, #0x2 @ + mov r14, r14, lsl #6 @ 0x00 or 0x80 + mov r3, #0x70000000 @ + orr r3, r3, #0x3000 @ r3 = LCD1_BASE +10: @ loop line @ + @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + eor r14, r14, #0x80 @ flip pattern quadrant + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49 + add r10, r10, r8, asl #5 @ + add r10, r10, r9, asl #3 @ + add r10, r10, r9, asl #5 @ + add r10, r10, r9, asl #6 @ + @ + mov r8, r8, asl #1 @ r8 = bu = Cb*258 + add r8, r8, r8, asl #7 @ + @ + add r9, r9, r9, asl #1 @ r9 = rv = Cr*408 + add r9, r9, r9, asl #4 @ + mov r9, r9, asl #3 @ + @ + @ compute R, G, and B + add r0, r8, r7 @ r0 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256 + add r0, r12, r0, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x100 @ + @ + add r0, r0, r12 @ b = r0 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r0, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r0, asr #15 @ clamp b + mvnne r0, r12, lsr #15 @ + andne r0, r0, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + + and r11, r11, #0xf800 @ pack pixel + mov r11, r11, lsr #8 + and r7, r7, #0x7e00 + orr r11, r11, r7, lsr #12 + mov r7, r7, lsr#4 + orr r0, r7, r0, lsr #10 +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r11, [r3, #0x10] @ send MSB +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r0, [r3, #0x10] @ send LSB + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r0, r8, r7 @ r0 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256 + add r0, r12, r0, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x200 @ + @ + add r0, r0, r12 @ b = r0 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r0, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r0, asr #15 @ clamp b + mvnne r0, r12, lsr #15 @ + andne r0, r0, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + + and r11, r11, #0xf800 @ pack pixel + mov r11, r11, lsr #8 + and r7, r7, #0x7e00 + orr r11, r11, r7, lsr #12 + mov r7, r7, lsr#4 + orr r0, r7, r0, lsr #10 +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r11, [r3, #0x10] @ send MSB +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r0, [r3, #0x10] @ send LSB + + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r0, r8, r7 @ r0 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256 + add r0, r12, r0, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x300 @ + @ + add r0, r0, r12 @ b = r0 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r0, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r0, asr #15 @ clamp b + mvnne r0, r12, lsr #15 @ + andne r0, r0, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + + and r11, r11, #0xf800 @ pack pixel + mov r11, r11, lsr #8 + and r7, r7, #0x7e00 + orr r11, r11, r7, lsr #12 + mov r7, r7, lsr#4 + orr r0, r7, r0, lsr #10 +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r11, [r3, #0x10] @ send MSB +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r0, [r3, #0x10] @ send LSB + + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r0, r8, r7 @ r0 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256 + add r0, r12, r0, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + @ This element is zero - use r14 @ + @ + add r0, r0, r14 @ b = r0 + delta + add r11, r11, r14, lsl #1 @ r = r11 + delta*2 + add r7, r7, r14, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r0, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r0, asr #15 @ clamp b + mvnne r0, r12, lsr #15 @ + andne r0, r0, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + + and r11, r11, #0xf800 @ pack pixel + mov r11, r11, lsr #8 + and r7, r7, #0x7e00 + orr r11, r11, r7, lsr #12 + mov r7, r7, lsr#4 + orr r0, r7, r0, lsr #10 +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r11, [r3, #0x10] @ send MSB +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r0, [r3, #0x10] @ send LSB + + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmpc regs=r4-r11 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither diff --git a/firmware/target/arm/samsung/yh820/lcd-yh820.c b/firmware/target/arm/samsung/yh820/lcd-yh820.c index 25692eb8ac..f4b55ab917 100644 --- a/firmware/target/arm/samsung/yh820/lcd-yh820.c +++ b/firmware/target/arm/samsung/yh820/lcd-yh820.c @@ -30,6 +30,8 @@ #endif /* Display status */ +static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0; + #if defined(HAVE_LCD_ENABLE) || defined(HAVE_LCD_SLEEP) static bool is_lcd_enabled = true; #endif @@ -289,6 +291,78 @@ void lcd_set_flip(bool yesno) /*** update functions ***/ +void lcd_yuv_set_options(unsigned options) +{ + lcd_yuv_options = options; +} + +/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */ +extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + int width, + int stride); +extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + int width, + int stride, + int x_screen, /* To align dither pattern */ + int y_screen); +/* Performance function to blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + unsigned char const * yuv_src[3]; + off_t z; + + /* Sorry, but width and height must be >= 2 or else */ + width &= ~1; + height >>= 1; + + z = stride*src_y; + yuv_src[0] = src[0] + z + src_x; + yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + + lcd_send_command(R_ENTRY_MODE); + lcd_send_command(0x03); + + lcd_send_command(R_Y_ADDR_AREA); + lcd_send_command(x + 4); + lcd_send_command(x + width - 1 + 4); + + if (lcd_yuv_options & LCD_YUV_DITHER) + { + do + { + lcd_send_command(R_X_ADDR_AREA); + lcd_send_command(y); + lcd_send_command(y + 1); + + lcd_write_yuv420_lines_odither(yuv_src, width, stride, x, y); + yuv_src[0] += stride << 1; /* Skip down two luma lines */ + yuv_src[1] += stride >> 1; /* Skip down one chroma line */ + yuv_src[2] += stride >> 1; + y += 2; + } + while (--height > 0); + } + else + { + do + { + lcd_send_command(R_X_ADDR_AREA); + lcd_send_command(y); + lcd_send_command(y + 1); + + lcd_write_yuv420_lines(yuv_src, width, stride); + yuv_src[0] += stride << 1; /* Skip down two luma lines */ + yuv_src[1] += stride >> 1; /* Skip down one chroma line */ + yuv_src[2] += stride >> 1; + y += 2; + } + while (--height > 0); + } +} + /* Update the display. This must be called after all other LCD functions that change the display. */ void lcd_update(void) diff --git a/firmware/target/arm/samsung/yh925/lcd-as-yh925.S b/firmware/target/arm/samsung/yh925/lcd-as-yh925.S new file mode 100644 index 0000000000..8ac8b4289f --- /dev/null +++ b/firmware/target/arm/samsung/yh925/lcd-as-yh925.S @@ -0,0 +1,538 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2007-2008 by Michael Sevakis + * + * H10 20GB LCD assembly routines + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "cpu.h" + +/**************************************************************************** + * void lcd_write_yuv420_lines(unsigned char const * const src[3], + * int width, + * int stride); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function +lcd_write_yuv420_lines: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + stmfd sp!, { r4-r11, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ + mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c + add r0, r0, #0x8a00 @ + mov r14, #LCD2_DATA_MASK @ + @ + sub r2, r2, #1 @ Adjust stride because of increment +10: @ loop line @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right + add r7, r12, r7, asl #5 @ by one less when adding - same for all + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24 + add r10, r10, r10, asl #4 @ + add r10, r10, r8, asl #3 @ + add r10, r10, r8, asl #4 @ + @ + add r11, r9, r9, asl #2 @ r9 = Cr*101 + add r11, r11, r9, asl #5 @ + add r9, r11, r9, asl #6 @ + @ + add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8 + mov r8, r8, asr #2 @ + add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9 + mov r9, r9, asr #9 @ + rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8 + mov r10, r10, asr #8 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11) + orr r3, r3, r7, lsl #5 @ r3 |= (g << 5) + @ + orr r7, r14, r3, lsr #8 @ store pixel + orr r11, r14, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11) + orr r3, r3, r7, lsl #5 @ r3 |= (g << 5) + @ + orr r7, r14, r3, lsr #8 @ store pixel + orr r11, r14, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + orr r3, r3, r7, lsl #5 @ r3 = b | (g << 5) + orr r3, r3, r11, lsl #11 @ r3 |= (r << 11) + @ + orr r7, r14, r3, lsr #8 @ store pixel + orr r11, r14, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu + add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r3, r11 @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r3, #31 @ clamp b + mvnhi r3, r3, asr #31 @ + andhi r3, r3, #31 @ + cmp r11, #31 @ clamp r + mvnhi r11, r11, asr #31 @ + andhi r11, r11, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11) + orr r3, r3, r7, lsl #5 @ r3 |= (g << 5) + @ + orr r7, r14, r3, lsr #8 @ store pixel + orr r11, r14, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ + str r11, [r0] @ + @ + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmpc regs=r4-r11 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines + + +/**************************************************************************** + * void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + * int width, + * int stride, + * int x_screen, + * int y_screen); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Red scaled at twice g & b but at same precision to place it in correct + * bit position after multiply and leave instruction count lower. + * |R| |258 0 408| |Y' - 16| + * |G| = |149 -49 -104| |Cb - 128| + * |B| |149 258 0| |Cr - 128| + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + * + * Kernel pattern (raw|use order): + * 5 3 4 2 row0 row2 > down + * 1 7 0 6 | 5 1 3 7 4 0 2 6 col0 left + * 4 2 5 3 | 4 0 2 6 5 1 3 7 col2 \/ + * 0 6 1 7 + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines_odither + .type lcd_write_yuv420_lines_odither, %function +lcd_write_yuv420_lines_odither: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + @ r3 = x_screen + @ [sp] = y_screen + stmfd sp!, { r4-r11, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ + ldr r0, [sp, #36] @ Line up pattern and kernel quadrant + eor r14, r3, r0 @ + and r14, r14, #0x2 @ + mov r14, r14, lsl #6 @ 0x00 or 0x80 + @ + mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c + add r0, r0, #0x8a00 @ + @ + sub r2, r2, #1 @ Adjust stride because of increment +10: @ loop line @ + @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + eor r14, r14, #0x80 @ flip pattern quadrant + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49 + add r10, r10, r8, asl #5 @ + add r10, r10, r9, asl #3 @ + add r10, r10, r9, asl #5 @ + add r10, r10, r9, asl #6 @ + @ + mov r8, r8, asl #1 @ r8 = bu = Cb*258 + add r8, r8, r8, asl #7 @ + @ + add r9, r9, r9, asl #1 @ r9 = rv = Cr*408 + add r9, r9, r9, asl #4 @ + mov r9, r9, asl #3 @ + @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x200 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + mov r11, #LCD2_DATA_MASK @ store pixel + orr r7, r11, r3, lsr #8 @ + orr r11, r11, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + @ This element is zero - use r14 @ + @ + add r3, r3, r14 @ b = r3 + delta + add r11, r11, r14, lsl #1 @ r = r11 + delta*2 + add r7, r7, r14, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + mov r11, #LCD2_DATA_MASK @ store pixel + orr r7, r11, r3, lsr #8 @ + orr r11, r11, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x100 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + mov r11, #LCD2_DATA_MASK @ store pixel + orr r7, r11, r3, lsr #8 @ + orr r11, r11, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ + str r11, [r0] @ + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r3, r8, r7 @ r3 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256 + add r3, r12, r3, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x300 @ + @ + add r3, r3, r12 @ b = r3 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r3, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r3, asr #15 @ clamp b + mvnne r3, r12, lsr #15 @ + andne r3, r3, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + and r11, r11, #0xf800 @ pack pixel + and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) | + orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) | + orr r3, r11, r3, lsr #10 @ (b >> 10) + @ + mov r11, #LCD2_DATA_MASK @ store pixel + orr r7, r11, r3, lsr #8 @ + orr r11, r11, r3 @ +20: @ + ldr r3, [r0] @ + tst r3, #LCD2_BUSY_MASK @ + bne 20b @ + str r7, [r0] @ + str r11, [r0] @ + @ + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmpc regs=r4-r11 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither diff --git a/firmware/target/arm/samsung/yh925/lcd-yh925.c b/firmware/target/arm/samsung/yh925/lcd-yh925.c index e2b3ae3694..93bfb3a5f2 100644 --- a/firmware/target/arm/samsung/yh925/lcd-yh925.c +++ b/firmware/target/arm/samsung/yh925/lcd-yh925.c @@ -37,6 +37,8 @@ static unsigned short disp_control_rev; /* Contrast setting << 8 */ static int lcd_contrast; +static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0; + /* Forward declarations */ #if defined(HAVE_LCD_ENABLE) || defined(HAVE_LCD_SLEEP) static void lcd_display_off(void); @@ -508,6 +510,98 @@ bool lcd_active(void) /*** update functions ***/ +void lcd_yuv_set_options(unsigned options) +{ + lcd_yuv_options = options; +} + +/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */ +extern void lcd_write_yuv420_lines(unsigned char const * const src[3], + int width, + int stride); +extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + int width, + int stride, + int x_screen, /* To align dither pattern */ + int y_screen); + +/* Performance function to blit a YUV bitmap directly to the LCD */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + const unsigned char *yuv_src[3]; + const unsigned char *ysrc_max; + int y0; + int options; + + /* NOT MODIFIED FOR THE YH-925 */ + + if (!display_on) + return; + + width &= ~1; + height &= ~1; + + x += x_offset; + + /* calculate the drawing region */ + + /* The 20GB LCD is actually 128x160 but rotated 90 degrees so the origin + * is actually the bottom left and horizontal and vertical are swapped. + * Rockbox expects the origin to be the top left so we need to use + * 127 - y instead of just y */ + + /* max vert << 8 | start vert */ + lcd_write_reg(R_VERT_RAM_ADDR_POS, ((x + width - 1) << 8) | x); + + y0 = LCD_HEIGHT - 1 - y + y_offset; + + /* DIT=0, BGR=1, HWM=0, I/D1-0=10, AM=0, LG2-0=000 */ + lcd_write_reg(R_ENTRY_MODE, 0x1020); + + yuv_src[0] = src[0] + src_y * stride + src_x; + yuv_src[1] = src[1] + (src_y * stride >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + ysrc_max = yuv_src[0] + height * stride; + + options = lcd_yuv_options; + + do + { + /* max horiz << 8 | start horiz */ + lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (y0 << 8) | (y0 - 1)); + + /* position cursor (set AD0-AD15) */ + /* start vert << 8 | start horiz */ + lcd_write_reg(R_RAM_ADDR_SET, (x << 8) | y0); + + /* start drawing */ + lcd_send_cmd(R_WRITE_DATA_2_GRAM); + + if (options & LCD_YUV_DITHER) + { + lcd_write_yuv420_lines_odither(yuv_src, width, stride, + x, y); + y -= 2; + } + else + { + lcd_write_yuv420_lines(yuv_src, width, stride); + } + + y0 -= 2; + yuv_src[0] += stride << 1; + yuv_src[1] += stride >> 1; + yuv_src[2] += stride >> 1; + } + while (yuv_src[0] < ysrc_max); + + /* DIT=0, BGR=1, HWM=0, I/D1-0=10, AM=1, LG2-0=000 */ + lcd_write_reg(R_ENTRY_MODE, 0x1028); +} + + /* Update a fraction of the display. */ void lcd_update_rect(int x0, int y0, int width, int height) { diff --git a/firmware/target/arm/sandisk/sansa-c200/lcd-as-c200.S b/firmware/target/arm/sandisk/sansa-c200/lcd-as-c200.S new file mode 100644 index 0000000000..542ceeeb36 --- /dev/null +++ b/firmware/target/arm/sandisk/sansa-c200/lcd-as-c200.S @@ -0,0 +1,550 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2007 by Jens Arnold + * Heavily based on lcd-as-memframe.c by Michael Sevakis + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "cpu.h" + +/**************************************************************************** + * void lcd_write_yuv420_lines(unsigned char const * const src[3], + * int width, + * int stride); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Scaled, normalized, rounded and tweaked to yield RGB 565: + * |R| |74 0 101| |Y' - 16| >> 9 + * |G| = |74 -24 -51| |Cb - 128| >> 8 + * |B| |74 128 0| |Cr - 128| >> 9 + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, %function +lcd_write_yuv420_lines: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + stmfd sp!, { r4-r10, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ r0 = scratch + sub r2, r2, #1 @ + mov r3, #0x70000000 @ + orr r3, r3, #0x3000 @ r3 = LCD1_BASE +10: @ loop line @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right + add r7, r12, r7, asl #5 @ by one less when adding - same for all + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24 + add r10, r10, r10, asl #4 @ + add r10, r10, r8, asl #3 @ + add r10, r10, r8, asl #4 @ + @ + add lr, r9, r9, asl #2 @ r9 = Cr*101 + add lr, lr, r9, asl #5 @ + add r9, lr, r9, asl #6 @ + @ + add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8 + mov r8, r8, asr #2 @ + add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9 + mov r9, r9, asr #9 @ + rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8 + mov r10, r10, asr #8 @ + @ compute R, G, and B + add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu + add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r0, lr @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r0, #31 @ clamp b + mvnhi r0, r0, asr #31 @ + andhi r0, r0, #31 @ + cmp lr, #31 @ clamp r + mvnhi lr, lr, asr #31 @ + andhi lr, lr, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + mov lr, lr, lsl #3 @ + orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3) + orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str lr, [r3, #0x10] @ send MSB +1: @busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r0, [r3, #0x10] @ send LSB + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu + add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r0, lr @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r0, #31 @ clamp b + mvnhi r0, r0, asr #31 @ + andhi r0, r0, #31 @ + cmp lr, #31 @ clamp r + mvnhi lr, lr, asr #31 @ + andhi lr, lr, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + @ + mov lr, lr, lsl #3 @ + orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3) + orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str lr, [r3, #0x10] @ send MSB +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r0, [r3, #0x10] @ send LSB + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu + add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r0, lr @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r0, #31 @ clamp b + mvnhi r0, r0, asr #31 @ + andhi r0, r0, #31 @ + cmp lr, #31 @ clamp r + mvnhi lr, lr, asr #31 @ + andhi lr, lr, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + @ + mov lr, lr, lsl #3 @ + orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3) + orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str lr, [r3, #0x10] @ send MSB +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r0, [r3, #0x10] @ send LSB + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74 + add r12, r7, r7, asl #2 @ + add r7, r12, r7, asl #5 @ + @ compute R, G, and B + add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu + add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv + add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv + @ + orr r12, r0, lr @ check if clamping is needed... + orr r12, r12, r7, asr #1 @ ...at all + cmp r12, #31 @ + bls 15f @ no clamp @ + cmp r0, #31 @ clamp b + mvnhi r0, r0, asr #31 @ + andhi r0, r0, #31 @ + cmp lr, #31 @ clamp r + mvnhi lr, lr, asr #31 @ + andhi lr, lr, #31 @ + cmp r7, #63 @ clamp g + mvnhi r7, r7, asr #31 @ + andhi r7, r7, #63 @ +15: @ no clamp @ + @ + mov lr, lr, lsl #3 @ + orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3) + orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str lr, [r3, #0x10] @ send MSB +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r0, [r3, #0x10] @ send LSB + @ + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmpc regs=r4-r10 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines + +/**************************************************************************** + * void lcd_write_yuv420_lines_odither(unsigned char const * const src[3], + * int width, + * int stride, + * int x_screen, + * int y_screen); + * + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Red scaled at twice g & b but at same precision to place it in correct + * bit position after multiply and leave instruction count lower. + * |R| |258 0 408| |Y' - 16| + * |G| = |149 -49 -104| |Cb - 128| + * |B| |149 258 0| |Cr - 128| + * + * Write four RGB565 pixels in the following order on each loop: + * 1 3 + > down + * 2 4 \/ left + * + * Kernel pattern (raw|rotated|use order): + * 5 3 4 2 2 6 3 7 row0 row2 > down + * 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0 left + * 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2 \/ + * 0 6 1 7 5 1 4 0 + */ + .section .icode, "ax", %progbits + .align 2 + .global lcd_write_yuv420_lines_odither + .type lcd_write_yuv420_lines_odither, %function +lcd_write_yuv420_lines_odither: + @ r0 = yuv_src + @ r1 = width + @ r2 = stride + @ r3 = x_screen + @ [sp] = y_screen + stmfd sp!, { r4-r11, lr } @ save non-scratch + ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p + @ r5 = yuv_src[1] = Cb_p + @ r6 = yuv_src[2] = Cr_p + @ + sub r2, r2, #1 @ + ldr r14, [sp, #36] @ Line up pattern and kernel quadrant + eor r14, r14, r3 @ + and r14, r14, #0x2 @ + mov r14, r14, lsl #6 @ 0x00 or 0x80 + mov r3, #0x70000000 @ + orr r3, r3, #0x3000 @ r3 = LCD1_BASE +10: @ loop line @ + @ + ldrb r7, [r4], #1 @ r7 = *Y'_p++; + ldrb r8, [r5], #1 @ r8 = *Cb_p++; + ldrb r9, [r6], #1 @ r9 = *Cr_p++; + @ + eor r14, r14, #0x80 @ flip pattern quadrant + @ + sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ + sub r8, r8, #128 @ Cb -= 128 + sub r9, r9, #128 @ Cr -= 128 + @ + add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49 + add r10, r10, r8, asl #5 @ + add r10, r10, r9, asl #3 @ + add r10, r10, r9, asl #5 @ + add r10, r10, r9, asl #6 @ + @ + mov r8, r8, asl #1 @ r8 = bu = Cb*258 + add r8, r8, r8, asl #7 @ + @ + add r9, r9, r9, asl #1 @ r9 = rv = Cr*408 + add r9, r9, r9, asl #4 @ + mov r9, r9, asl #3 @ + @ + @ compute R, G, and B + add r0, r8, r7 @ r0 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256 + add r0, r12, r0, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x100 @ + @ + add r0, r0, r12 @ b = r0 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r0, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r0, asr #15 @ clamp b + mvnne r0, r12, lsr #15 @ + andne r0, r0, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + @ + + and r11, r11, #0xf800 @ pack pixel + mov r11, r11, lsr #8 + and r7, r7, #0x7e00 + orr r11, r11, r7, lsr #12 + mov r7, r7, lsr#4 + orr r0, r7, r0, lsr #10 +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r11, [r3, #0x10] @ send MSB +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r0, [r3, #0x10] @ send LSB + @ + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r0, r8, r7 @ r0 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256 + add r0, r12, r0, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x200 @ + @ + add r0, r0, r12 @ b = r0 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r0, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r0, asr #15 @ clamp b + mvnne r0, r12, lsr #15 @ + andne r0, r0, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++) + + and r11, r11, #0xf800 @ pack pixel + mov r11, r11, lsr #8 + and r7, r7, #0x7e00 + orr r11, r11, r7, lsr #12 + mov r7, r7, lsr#4 + orr r0, r7, r0, lsr #10 +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r11, [r3, #0x10] @ send MSB +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r0, [r3, #0x10] @ send LSB + + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r0, r8, r7 @ r0 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + @ r8 = bu, r9 = rv, r10 = guv + @ + sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256 + add r0, r12, r0, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256 + add r7, r12, r7, lsr #8 @ + @ + add r12, r14, #0x300 @ + @ + add r0, r0, r12 @ b = r0 + delta + add r11, r11, r12, lsl #1 @ r = r11 + delta*2 + add r7, r7, r12, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r0, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r0, asr #15 @ clamp b + mvnne r0, r12, lsr #15 @ + andne r0, r0, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + @ + ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride) + + and r11, r11, #0xf800 @ pack pixel + mov r11, r11, lsr #8 + and r7, r7, #0x7e00 + orr r11, r11, r7, lsr #12 + mov r7, r7, lsr#4 + orr r0, r7, r0, lsr #10 +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r11, [r3, #0x10] @ send MSB +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r0, [r3, #0x10] @ send LSB + + sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149 + add r12, r7, r7, asl #2 @ + add r12, r12, r12, asl #4 @ + add r7, r12, r7, asl #6 @ + @ compute R, G, and B + add r0, r8, r7 @ r0 = b' = Y + bu + add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv + rsb r7, r10, r7 @ r7 = g' = Y + guv + @ + sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256 + add r0, r12, r0, lsr #8 @ + @ + sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256 + add r11, r12, r11, lsr #8 @ + @ + sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256 + add r7, r12, r7, lsr #8 @ + @ + @ This element is zero - use r14 @ + @ + add r0, r0, r14 @ b = r0 + delta + add r11, r11, r14, lsl #1 @ r = r11 + delta*2 + add r7, r7, r14, lsr #1 @ g = r7 + delta/2 + @ + orr r12, r0, r11, asr #1 @ check if clamping is needed... + orr r12, r12, r7 @ ...at all + movs r12, r12, asr #15 @ + beq 15f @ no clamp @ + movs r12, r0, asr #15 @ clamp b + mvnne r0, r12, lsr #15 @ + andne r0, r0, #0x7c00 @ mask b only if clamped + movs r12, r11, asr #16 @ clamp r + mvnne r11, r12, lsr #16 @ + movs r12, r7, asr #15 @ clamp g + mvnne r7, r12, lsr #15 @ +15: @ no clamp @ + + and r11, r11, #0xf800 @ pack pixel + mov r11, r11, lsr #8 + and r7, r7, #0x7e00 + orr r11, r11, r7, lsr #12 + mov r7, r7, lsr#4 + orr r0, r7, r0, lsr #10 +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r11, [r3, #0x10] @ send MSB +1: @ busy @ + ldr r7, [r3] @ r7 = LCD1_BASE + tst r7, #LCD1_BUSY_MASK @ bridge busy? + bne 1b @ + str r0, [r3, #0x10] @ send LSB + + subs r1, r1, #2 @ subtract block from width + bgt 10b @ loop line @ + @ + ldmpc regs=r4-r11 @ restore registers and return + .ltorg @ dump constant pool + .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither diff --git a/firmware/target/arm/tms320dm320/mrobe-500/lcd-mr500.c b/firmware/target/arm/tms320dm320/mrobe-500/lcd-mr500.c index e851c421a6..8620c672e1 100644 --- a/firmware/target/arm/tms320dm320/mrobe-500/lcd-mr500.c +++ b/firmware/target/arm/tms320dm320/mrobe-500/lcd-mr500.c @@ -273,7 +273,15 @@ void lcd_init_device(void) #if defined(HAVE_LCD_MODES) void lcd_set_mode(int mode) { - if(mode==LCD_MODE_RGB565) { + if(mode==LCD_MODE_YUV) { + /* Turn off the RGB buffer and enable the YUV buffer with zoom */ + IO_OSD_OSDWINMD0 |= 0x04; + IO_OSD_VIDWINMD |= 0x01; +#if LCD_NATIVE_WIDTH > 240 + IO_OSD_VIDWINMD |= (0x05<<2); /* This does a 2x zoom */ +#endif + memset16(FRAME2, 0x0080, LCD_NATIVE_HEIGHT*(LCD_NATIVE_WIDTH+LCD_FUDGE)); + } else if(mode==LCD_MODE_RGB565) { /* Turn on the RGB window, set it to 16 bit and turn YUV window off */ IO_OSD_VIDWINMD &= ~(0x01); IO_OSD_OSDWIN0OFST = LCD_NATIVE_WIDTH / 16; @@ -636,6 +644,82 @@ void lcd_pal256_update_pal(fb_data *palette) } #endif +/* Performance function to blit a YUV bitmap directly to the LCD */ +/* Show it rotated so the LCD_WIDTH is now the height */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + unsigned char const * yuv_src[3]; + + if (!lcd_on) + return; + + /* y has to be on a 16 pixel boundary */ + y &= ~0xF; + + if( ((y | x | height | width ) < 0) + || y>LCD_NATIVE_HEIGHT || x>LCD_NATIVE_WIDTH ) + return; + + if(y+height>LCD_NATIVE_WIDTH) + { + height=LCD_NATIVE_WIDTH-y; + } + if(x+width>LCD_NATIVE_HEIGHT) + { + width=LCD_NATIVE_HEIGHT-x; + } + + /* Sorry, but width and height must be >= 2 or else */ + width &= ~1; + height>>=1; + + fb_data * dst = FRAME2 + + ((LCD_NATIVE_WIDTH+LCD_FUDGE)*(LCD_NATIVE_HEIGHT-1)) + - (LCD_NATIVE_WIDTH+LCD_FUDGE)*x + y ; + + /* Scope z */ + { + off_t z; + z = stride*src_y; + yuv_src[0] = src[0] + z + src_x; + yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + } + + int cbcr_remain=(stride>>1)-(width>>1); + int y_remain=(stride<<1)-width; + do + { + register int c_width=width; + register unsigned int *c_dst=(unsigned int*)dst; + do + { + register unsigned short Y=*((unsigned short*)yuv_src[0]); + register unsigned short Yst=*((unsigned short*)(yuv_src[0]+stride)); + yuv_src[0]+=2; + + register unsigned char Cb=*yuv_src[1]++; + register unsigned char Cr=*yuv_src[2]++; + + *c_dst = (Yst<<24) | (Cr << 16) | ((Y&0xFF)<<8) | Cb; + *(c_dst - (LCD_NATIVE_WIDTH+LCD_FUDGE)/2) = + ( (Yst&0xFF00)<<16) | (Cr << 16) | (Y&0xFF00) | Cb; + + c_dst -= (LCD_NATIVE_WIDTH+LCD_FUDGE); + + c_width -= 2; + } while (c_width); + + yuv_src[0] += y_remain; /* Skip down two luma lines-width */ + yuv_src[1] += cbcr_remain; /* Skip down one chroma line-width/2 */ + yuv_src[2] += cbcr_remain; + + dst+=2; + } while (--height); +} + void lcd_set_contrast(int val) { (void) val; // TODO: diff --git a/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S b/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S index b319d745ca..e6621e1dea 100644 --- a/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S +++ b/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S @@ -25,6 +25,248 @@ .section .icode,"ax",@progbits +/* begin lcd_write_yuv420_lines + * + * See http://en.wikipedia.org/wiki/YCbCr + * ITU-R BT.601 (formerly CCIR 601): + * |Y'| | 0.299000 0.587000 0.114000| |R| + * |Pb| = |-0.168736 -0.331264 0.500000| |G| or 0.564334*(B - Y') + * |Pr| | 0.500000 -0.418688 0.081312| |B| or 0.713267*(R - Y') + * Scaled, normalized and rounded: + * |Y'| | 65 129 25| |R| + 16 : 16->235 + * |Cb| = |-38 -74 112| |G| + 128 : 16->240 + * |Cr| |112 -94 -18| |B| + 128 : 16->240 + * + * The inverse: + * |R| |1.000000 -0.000001 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Scaled, normalized, rounded and tweaked to yield RGB 666: + * |R| |19611723 0 26881894| |Y' - 16| >> 26 + * |G| = |19611723 -6406711 -13692816| |Cb - 128| >> 26 + * |B| |19611723 33976259 0| |Cr - 128| >> 26 + * + * Needs EMAC set to saturated, signed integer mode. + * + * register usage: + * %a0 - LCD data port + * %a1 - Y pointer + * %a2 - C pointer + * %a3 - C width + * %a4 - Y end address + * %a5 - Y factor + * %a6 - BU factor + * %d0 - scratch + * %d1 - B, previous Y \ alternating + * %d2 - U / B, previous Y / + * %d3 - V / G + * %d4 - R / output pixel + * %d5 - GU factor + * %d6 - GV factor + * %d7 - RGB signed -> unsigned conversion mask + */ + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, @function + +lcd_write_yuv420_lines: + lea.l (-44, %sp), %sp /* free up some registers */ + movem.l %d2-%d7/%a2-%a6, (%sp) + + lea.l 0xf0008002, %a0 /* LCD data port */ + movem.l (44+4, %sp), %a1-%a3 /* Y data, C data, C width */ + lea.l (%a1, %a3*2), %a4 /* Y end address */ + + move.l #19611723, %a5 /* y factor */ + move.l #33976259, %a6 /* bu factor */ + move.l #-6406711, %d5 /* gu factor */ + move.l #-13692816, %d6 /* gv factor */ + move.l #0x01040820, %d7 /* bitmask for signed->unsigned conversion + * of R, G and B within RGGB6666 at once */ + + /* chroma for first 2x2 block */ + clr.l %d3 /* load v component */ + move.b (%a2, %a3), %d3 + clr.l %d2 /* load u component */ + move.b (%a2)+, %d2 + moveq.l #-128, %d0 + add.l %d0, %d2 + add.l %d0, %d3 + + mac.l %a6, %d2, %acc0 /* bu */ + mac.l %d5, %d2, %acc1 /* gu */ + mac.l %d6, %d3, %acc1 /* gv */ + move.l #26881894, %d0 /* rv factor */ + mac.l %d0, %d3, %acc2 /* rv */ + + /* luma for very first pixel (top left) */ + clr.l %d1 + move.b (%a1, %a3*2), %d1 + moveq.l #-126, %d0 + add.l %d1, %d0 /* y' (-0.5 ... +0.5) */ + mac.l %a5, %d0, %acc0 + mac.l %a5, %d0, %acc1 + mac.l %a5, %d0, %acc2 + + bra.b .yuv_line_entry + +.yuv_line_loop: + /* chroma for 2x2 pixel block */ + clr.l %d3 /* load v component */ + move.b (%a2, %a3), %d3 + clr.l %d2 /* load u component */ + move.b (%a2)+, %d2 + moveq.l #-128, %d0 + add.l %d0, %d2 + add.l %d0, %d3 + + mac.l %a6, %d2, %acc0 /* bu */ + mac.l %d5, %d2, %acc1 /* gu */ + mac.l %d6, %d3, %acc1 /* gv */ + move.l #26881894, %d0 /* rv factor */ + mac.l %d0, %d3, %acc2 /* rv */ + + /* luma for first pixel (top left) */ + clr.l %d1 + move.b (%a1, %a3*2), %d1 + moveq.l #-126, %d0 + add.l %d1, %d0 /* y' (-0.5 ... +0.5) */ + mac.l %a5, %d0, %acc0 + mac.l %a5, %d0, %acc1 + mac.l %a5, %d0, %acc2 + + move.w %d4, (%a0) + /* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */ + + /* convert to RGB666, pack and output */ +.yuv_line_entry: + moveq.l #26, %d0 + move.l %acc0, %d4 + move.l %acc1, %d3 + move.l %acc2, %d2 + lsr.l %d0, %d4 + lsr.l %d0, %d3 + lsr.l %d0, %d2 + + lsl.l #6, %d2 + or.l %d3, %d2 /* |00000000|00000000|0000Rrrr|rrGggggg| */ + lsl.l #7, %d2 + or.l %d2, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */ + lsl.l #6, %d3 + or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */ + eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */ + swap %d4 + move.w %d4, (%a0) + swap %d4 + + /* luma for second pixel (bottom left) as delta from the first */ + clr.l %d2 + move.b (%a1)+, %d2 + move.l %d2, %d0 + sub.l %d1, %d0 + mac.l %a5, %d0, %acc0 + mac.l %a5, %d0, %acc1 + mac.l %a5, %d0, %acc2 + + move.w %d4, (%a0) + /* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */ + + /* convert to RGB666, pack and output */ + moveq.l #26, %d0 + move.l %acc0, %d4 + move.l %acc1, %d3 + move.l %acc2, %d1 + lsr.l %d0, %d4 + lsr.l %d0, %d3 + lsr.l %d0, %d1 + + lsl.l #6, %d1 + or.l %d3, %d1 /* |00000000|00000000|0000Rrrr|rrGggggg| */ + lsl.l #7, %d1 + or.l %d1, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */ + lsl.l #6, %d3 + or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */ + eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */ + swap %d4 + move.w %d4, (%a0) + swap %d4 + + /* luma for third pixel (top right) as delta from the second */ + clr.l %d1 + move.b (%a1, %a3*2), %d1 + move.l %d1, %d0 + sub.l %d2, %d0 + mac.l %a5, %d0, %acc0 + mac.l %a5, %d0, %acc1 + mac.l %a5, %d0, %acc2 + + move.w %d4, (%a0) + /* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */ + + /* convert to RGB666, pack and output */ + moveq.l #26, %d0 + move.l %acc0, %d4 + move.l %acc1, %d3 + move.l %acc2, %d2 + lsr.l %d0, %d4 + lsr.l %d0, %d3 + lsr.l %d0, %d2 + + lsl.l #6, %d2 + or.l %d3, %d2 /* |00000000|00000000|0000Rrrr|rrGggggg| */ + lsl.l #7, %d2 + or.l %d2, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */ + lsl.l #6, %d3 + or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */ + eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */ + swap %d4 + move.w %d4, (%a0) + swap %d4 + + /* luma for fourth pixel (bottom right) as delta from the thrid */ + clr.l %d2 + move.b (%a1)+, %d2 + move.l %d2, %d0 + sub.l %d1, %d0 + mac.l %a5, %d0, %acc0 + mac.l %a5, %d0, %acc1 + mac.l %a5, %d0, %acc2 + + move.w %d4, (%a0) + /* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */ + + /* convert to RGB666, pack and output */ + moveq.l #26, %d0 + movclr.l %acc0, %d4 + movclr.l %acc1, %d3 + movclr.l %acc2, %d1 + lsr.l %d0, %d4 + lsr.l %d0, %d3 + lsr.l %d0, %d1 + + lsl.l #6, %d1 + or.l %d3, %d1 /* |00000000|00000000|0000Rrrr|rrGggggg| */ + lsl.l #7, %d1 + or.l %d1, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */ + lsl.l #6, %d3 + or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */ + eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */ + swap %d4 + move.w %d4, (%a0) + swap %d4 + + cmp.l %a1, %a4 /* run %a1 up to end of line */ + bhi.w .yuv_line_loop + + move.w %d4, (%a0) /* write (very) last 2nd word */ + + movem.l (%sp), %d2-%d7/%a2-%a6 + lea.l (44, %sp), %sp /* restore registers */ + rts +.yuv_end: + .size lcd_write_yuv420_lines, .yuv_end - lcd_write_yuv420_lines + + /* begin lcd_write_data */ .align 2 .global lcd_write_data diff --git a/firmware/target/coldfire/iaudio/x5/lcd-x5.c b/firmware/target/coldfire/iaudio/x5/lcd-x5.c index a6a4fc0176..266a381c40 100644 --- a/firmware/target/coldfire/iaudio/x5/lcd-x5.c +++ b/firmware/target/coldfire/iaudio/x5/lcd-x5.c @@ -414,6 +414,69 @@ bool lcd_active(void) #endif /*** update functions ***/ +/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. + * y should have two lines of Y back to back, 2nd line first. + * c should contain the Cb and Cr data for the two lines of Y back to back. + * Needs EMAC set to saturated, signed integer mode. + */ +extern void lcd_write_yuv420_lines(const unsigned char *y, + const unsigned char *c, int width); + +/* Performance function to blit a YUV bitmap directly to the LCD + * src_x, src_y, width and height should be even and within the LCD's + * boundaries. + */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + /* IRAM Y, Cb/bu, guv and Cb/rv buffers. */ + unsigned char y_ibuf[LCD_WIDTH*2]; + unsigned char c_ibuf[LCD_WIDTH]; + const unsigned char *ysrc, *usrc, *vsrc; + const unsigned char *ysrc_max; + + if (!display_on) + return; + + width &= ~1; /* stay on the safe side */ + height &= ~1; + + lcd_write_reg(R_ENTRY_MODE, R_ENTRY_MODE_DIT_HORZ); + /* Set start position and window */ + lcd_write_reg(R_VERT_RAM_ADDR_POS, (LCD_WIDTH-1) << 8); + + ysrc = src[0] + src_y * stride + src_x; + usrc = src[1] + (src_y * stride >> 2) + (src_x >> 1); + vsrc = src[2] + (src_y * stride >> 2) + (src_x >> 1); + ysrc_max = ysrc + height * stride; + + unsigned long macsr = coldfire_get_macsr(); + coldfire_set_macsr(EMAC_SATURATE); + + do + { + lcd_write_reg(R_HORIZ_RAM_ADDR_POS, ((y + y_offset + 1) << 8) | (y + y_offset)); + lcd_write_reg(R_RAM_ADDR_SET, (x << 8) | (y + y_offset)); + lcd_begin_write_gram(); + + memcpy(y_ibuf + width, ysrc, width); + memcpy(y_ibuf, ysrc + stride, width); + memcpy(c_ibuf, usrc, width >> 1); + memcpy(c_ibuf + (width >> 1), vsrc, width >> 1); + lcd_write_yuv420_lines(y_ibuf, c_ibuf, width >> 1); + + y += 2; + ysrc += 2 * stride; + usrc += stride >> 1; + vsrc += stride >> 1; + } + while (ysrc < ysrc_max); + + coldfire_set_macsr(macsr); +} /* lcd_yuv_blit */ + + /* Update the display. This must be called after all other LCD functions that change the lcd frame buffer. */ diff --git a/firmware/target/coldfire/iriver/h300/lcd-as-h300.S b/firmware/target/coldfire/iriver/h300/lcd-as-h300.S new file mode 100644 index 0000000000..223c183860 --- /dev/null +++ b/firmware/target/coldfire/iriver/h300/lcd-as-h300.S @@ -0,0 +1,246 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006 by Jens Arnold + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "cpu.h" + + .section .icode, "ax", @progbits + +/* lcd_write_yuv420_lines() + * + * See http://en.wikipedia.org/wiki/YCbCr + * ITU-R BT.601 (formerly CCIR 601): + * |Y'| | 0.299000 0.587000 0.114000| |R| + * |Pb| = |-0.168736 -0.331264 0.500000| |G| or 0.564334*(B - Y') + * |Pr| | 0.500000 -0.418688 0.081312| |B| or 0.713267*(R - Y') + * Scaled, normalized and rounded: + * |Y'| | 65 129 25| |R| + 16 : 16->235 + * |Cb| = |-38 -74 112| |G| + 128 : 16->240 + * |Cr| |112 -94 -18| |B| + 128 : 16->240 + * + * The inverse: + * |R| |1.000000 0.000000 1.402000| |Y'| + * |G| = |1.000000 -0.334136 -0.714136| |Pb| + * |B| |1.000000 1.772000 0.000000| |Pr| + * Scaled, normalized, rounded and tweaked to yield RGB565: + * |R| |19611723 0 26881894| |Y' - 16| >> 27 + * |G| = |19611723 -6406711 -13692816| |Cb - 128| >> 26 + * |B| |19611723 33976259 0| |Cr - 128| >> 27 + * + * Needs EMAC set to saturated, signed integer mode. + * + * register usage: + * %a0 - LCD data port + * %a1 - Y pointer + * %a2 - C pointer + * %a3 - C width + * %a4 - Y end address + * %a5 - Y factor + * %a6 - BU factor + * %d0 - scratch + * %d1 - B, previous Y \ alternating + * %d2 - U / B, previous Y / + * %d3 - V / G + * %d4 - R / output pixel + * %d5 - GU factor + * %d6 - GV factor + * %d7 - RGB signed -> unsigned conversion mask + */ + .align 2 + .global lcd_write_yuv420_lines + .type lcd_write_yuv420_lines, @function + +lcd_write_yuv420_lines: + lea.l (-44, %sp), %sp /* free up some registers */ + movem.l %d2-%d7/%a2-%a6, (%sp) + + lea.l 0xf0000002, %a0 /* LCD data port */ + movem.l (44+4, %sp), %a1-%a3 /* Y data, C data, C width */ + lea.l (%a1, %a3*2), %a4 /* Y end address */ + + move.l #19611723, %a5 /* y factor */ + move.l #33976259, %a6 /* bu factor */ + move.l #-6406711, %d5 /* gu factor */ + move.l #-13692816, %d6 /* gv factor */ + move.l #0x8410, %d7 /* bitmask for signed->unsigned conversion + * of R, G and B within RGB565 at once */ + + /* chroma for first 2x2 pixel block */ + clr.l %d3 /* load v component */ + move.b (%a2, %a3), %d3 + clr.l %d2 /* load u component */ + move.b (%a2)+, %d2 + moveq.l #-128, %d0 + add.l %d0, %d2 + add.l %d0, %d3 + + mac.l %a6, %d2, %acc0 /* bu */ + mac.l %d5, %d2, %acc1 /* gu */ + mac.l %d6, %d3, %acc1 /* gv */ + move.l #26881894, %d0 /* rv factor */ + mac.l %d0, %d3, %acc2 /* rv */ + + /* luma for very first pixel (top left) */ + clr.l %d1 + move.b (%a1, %a3*2), %d1 + moveq.l #-126, %d0 + add.l %d1, %d0 /* y' (-0.5 ... +0.5) */ + mac.l %a5, %d0, %acc0 + mac.l %a5, %d0, %acc1 + mac.l %a5, %d0, %acc2 + + bra.b .yuv_line_entry + +.yuv_line_loop: + /* chroma for 2x2 pixel block */ + clr.l %d3 /* load v component */ + move.b (%a2, %a3), %d3 + clr.l %d2 /* load u component */ + move.b (%a2)+, %d2 + moveq.l #-128, %d0 + add.l %d0, %d2 + add.l %d0, %d3 + + mac.l %a6, %d2, %acc0 /* bu */ + mac.l %d5, %d2, %acc1 /* gu */ + mac.l %d6, %d3, %acc1 /* gv */ + move.l #26881894, %d0 /* rv factor */ + mac.l %d0, %d3, %acc2 /* rv */ + + /* luma for first pixel (top left) */ + clr.l %d1 + move.b (%a1, %a3*2), %d1 + moveq.l #-126, %d0 + add.l %d1, %d0 /* y' (-0.5 ... +0.5) */ + mac.l %a5, %d0, %acc0 + mac.l %a5, %d0, %acc1 + mac.l %a5, %d0, %acc2 + + move.w %d4, (%a0) + /* LCD write is delayed one pixel to use it for filling the EMAC latency */ + + /* convert to RGB565, pack and output */ +.yuv_line_entry: + moveq.l #27, %d0 + move.l %acc0, %d2 + move.l %acc1, %d3 + move.l %acc2, %d4 + lsr.l %d0, %d2 + lsr.l %d0, %d4 + moveq.l #26, %d0 + lsr.l %d0, %d3 + lsl.l #6, %d4 + or.l %d3, %d4 + lsl.l #5, %d4 + or.l %d2, %d4 + eor.l %d7, %d4 + + /* luma for second pixel (bottom left) as delta from the first */ + clr.l %d2 + move.b (%a1)+, %d2 + move.l %d2, %d0 + sub.l %d1, %d0 + mac.l %a5, %d0, %acc0 + mac.l %a5, %d0, %acc1 + mac.l %a5, %d0, %acc2 + + move.w %d4, (%a0) + /* LCD write is delayed one pixel to use it for filling the EMAC latency */ + + /* convert to RGB565, pack and output */ + moveq.l #27, %d0 + move.l %acc0, %d1 + move.l %acc1, %d3 + move.l %acc2, %d4 + lsr.l %d0, %d1 + lsr.l %d0, %d4 + moveq.l #26, %d0 + lsr.l %d0, %d3 + lsl.l #6, %d4 + or.l %d3, %d4 + lsl.l #5, %d4 + or.l %d1, %d4 + eor.l %d7, %d4 + + /* luma for third pixel (top right) as delta from the second */ + clr.l %d1 + move.b (%a1, %a3*2), %d1 + move.l %d1, %d0 + sub.l %d2, %d0 + mac.l %a5, %d0, %acc0 + mac.l %a5, %d0, %acc1 + mac.l %a5, %d0, %acc2 + + move.w %d4, (%a0) + /* LCD write is delayed one pixel to use it for filling the EMAC latency */ + + /* convert to RGB565, pack and output */ + moveq.l #27, %d0 + move.l %acc0, %d2 + move.l %acc1, %d3 + move.l %acc2, %d4 + lsr.l %d0, %d2 + lsr.l %d0, %d4 + moveq.l #26, %d0 + lsr.l %d0, %d3 + lsl.l #6, %d4 + or.l %d3, %d4 + lsl.l #5, %d4 + or.l %d2, %d4 + eor.l %d7, %d4 + + /* luma for fourth pixel (bottom right) as delta from the third */ + clr.l %d2 + move.b (%a1)+, %d2 + move.l %d2, %d0 + sub.l %d1, %d0 + mac.l %a5, %d0, %acc0 + mac.l %a5, %d0, %acc1 + mac.l %a5, %d0, %acc2 + + move.w %d4, (%a0) + /* LCD write is delayed one pixel to use it for filling the EMAC latency */ + + /* convert to RGB565, pack and output */ + moveq.l #27, %d0 + movclr.l %acc0, %d1 + movclr.l %acc1, %d3 + movclr.l %acc2, %d4 + lsr.l %d0, %d1 + lsr.l %d0, %d4 + moveq.l #26, %d0 + lsr.l %d0, %d3 + lsl.l #6, %d4 + or.l %d3, %d4 + lsl.l #5, %d4 + or.l %d1, %d4 + eor.l %d7, %d4 + + cmp.l %a1, %a4 /* run %a1 up to end of line */ + bhi.w .yuv_line_loop + + move.w %d4, (%a0) /* write (very) last pixel */ + + movem.l (%sp), %d2-%d7/%a2-%a6 + lea.l (44, %sp), %sp /* restore registers */ + rts +.yuv_end: + .size lcd_write_yuv420_lines, .yuv_end - lcd_write_yuv420_lines diff --git a/firmware/target/coldfire/iriver/h300/lcd-h300.c b/firmware/target/coldfire/iriver/h300/lcd-h300.c index 8d5370cdcf..7e73ea3905 100644 --- a/firmware/target/coldfire/iriver/h300/lcd-h300.c +++ b/firmware/target/coldfire/iriver/h300/lcd-h300.c @@ -325,6 +325,67 @@ bool lcd_active(void) /*** update functions ***/ +/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. + * y should have two lines of Y back to back, 2nd line first. + * c should contain the Cb and Cr data for the two lines of Y back to back. + * Needs EMAC set to saturated, signed integer mode. + */ +extern void lcd_write_yuv420_lines(const unsigned char *y, + const unsigned char *c, int cwidth); + +/* Performance function to blit a YUV bitmap directly to the LCD + * src_x, src_y, width and height should be even + * x, y, width and height have to be within LCD bounds + */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + /* IRAM Y, Cb and Cb buffers. */ + unsigned char y_ibuf[LCD_WIDTH*2]; + unsigned char c_ibuf[LCD_WIDTH]; + const unsigned char *ysrc, *usrc, *vsrc; + const unsigned char *ysrc_max; + + if (!display_on) + return; + + LCD_MUTEX_LOCK(); + width &= ~1; /* stay on the safe side */ + height &= ~1; + + lcd_write_reg(R_ENTRY_MODE, R_ENTRY_MODE_HORZ); + /* Set start position and window */ + lcd_write_reg(R_VERT_RAM_ADDR_POS, ((xoffset + 219) << 8) | xoffset); + + ysrc = src[0] + src_y * stride + src_x; + usrc = src[1] + (src_y * stride >> 2) + (src_x >> 1); + vsrc = src[2] + (src_y * stride >> 2) + (src_x >> 1); + ysrc_max = ysrc + height * stride; + + coldfire_set_macsr(EMAC_SATURATE); + do + { + lcd_write_reg(R_HORIZ_RAM_ADDR_POS, ((y + 1) << 8) | y); + lcd_write_reg(R_RAM_ADDR_SET, ((x+xoffset) << 8) | y); + lcd_begin_write_gram(); + + memcpy(y_ibuf + width, ysrc, width); + memcpy(y_ibuf, ysrc + stride, width); + memcpy(c_ibuf, usrc, width >> 1); + memcpy(c_ibuf + (width >> 1), vsrc, width >> 1); + lcd_write_yuv420_lines(y_ibuf, c_ibuf, width >> 1); + + y += 2; + ysrc += 2 * stride; + usrc += stride >> 1; + vsrc += stride >> 1; + } + while (ysrc < ysrc_max) + ;; + LCD_MUTEX_UNLOCK(); +} + #ifndef BOOTLOADER /* LCD DMA ISR */ void DMA3(void) __attribute__ ((interrupt_handler, section(".icode"))); diff --git a/firmware/target/mips/ingenic_jz47xx/lcd-jz4740.c b/firmware/target/mips/ingenic_jz47xx/lcd-jz4740.c index d2a1d759d0..a2d5b73ea8 100644 --- a/firmware/target/mips/ingenic_jz47xx/lcd-jz4740.c +++ b/firmware/target/mips/ingenic_jz47xx/lcd-jz4740.c @@ -158,3 +158,65 @@ void lcd_update(void) lcd_update_rect(0, 0, LCD_WIDTH, LCD_HEIGHT); } + +/* (Mis)use LCD framebuffer as a temporary buffer */ +void lcd_blit_yuv(unsigned char * const src[3], + int src_x, int src_y, int stride, + int x, int y, int width, int height) +{ + unsigned char const * yuv_src[3]; + register off_t z; + + if(!lcd_is_on) + return; + + z = stride * src_y; + yuv_src[0] = src[0] + z + src_x; + yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1); + yuv_src[2] = src[2] + (yuv_src[1] - src[1]); + + commit_discard_dcache(); // XXX range + + __cpm_start_ipu(); + + IPU_STOP_IPU(); + IPU_RESET_IPU(); + IPU_CLEAR_END_FLAG(); + + IPU_DISABLE_RSIZE(); + IPU_DISABLE_IRQ(); + + IPU_SET_INFMT(INFMT_YUV420); + IPU_SET_OUTFMT(OUTFMT_RGB565); + + IPU_SET_IN_FM(width, height); + IPU_SET_Y_STRIDE(stride); + IPU_SET_UV_STRIDE(stride, stride); + + IPU_SET_Y_ADDR(PHYSADDR((unsigned long)yuv_src[0])); + IPU_SET_U_ADDR(PHYSADDR((unsigned long)yuv_src[1])); + IPU_SET_V_ADDR(PHYSADDR((unsigned long)yuv_src[2])); + IPU_SET_OUT_ADDR(PHYSADDR((unsigned long)FBADDR(y,x))); + + IPU_SET_OUT_FM(height, width); + IPU_SET_OUT_STRIDE(height); + + IPU_SET_CSC_C0_COEF(YUV_CSC_C0); + IPU_SET_CSC_C1_COEF(YUV_CSC_C1); + IPU_SET_CSC_C2_COEF(YUV_CSC_C2); + IPU_SET_CSC_C3_COEF(YUV_CSC_C3); + IPU_SET_CSC_C4_COEF(YUV_CSC_C4); + + IPU_RUN_IPU(); + + while(!(IPU_POLLING_END_FLAG()) && IPU_IS_ENABLED()); + + IPU_CLEAR_END_FLAG(); + IPU_STOP_IPU(); + IPU_RESET_IPU(); + + __cpm_stop_ipu(); + + /* YUV speed is limited by LCD speed */ + lcd_update_rect(y, x, height, width); +} -- cgit v1.2.3