summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2010-12-29 23:17:47 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2010-12-29 23:17:47 +0000
commitb04d676706c6e306754a3d1223d52de0037638bf (patch)
treebf2e34a68ca26c6c2397c51c91d4e06801d009e3
parent1980fc3a61db6b85760ff044900094a231568936 (diff)
downloadrockbox-b04d676706c6e306754a3d1223d52de0037638bf.tar.gz
rockbox-b04d676706c6e306754a3d1223d52de0037638bf.zip
Speed up of iPod nano 1G and iPod color LCD. Use HDD6330 asm part for YUV blitting, introduce special handling for full width screen updates. Speed up is about +30% for YUV on both color/nano1G.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28930 a1c6a512-1295-4272-9138-f99709370657
-rwxr-xr-xfirmware/target/arm/ipod/lcd-as-color-nano.S152
-rw-r--r--firmware/target/arm/ipod/lcd-color_nano.c132
2 files changed, 182 insertions, 102 deletions
diff --git a/firmware/target/arm/ipod/lcd-as-color-nano.S b/firmware/target/arm/ipod/lcd-as-color-nano.S
new file mode 100755
index 0000000000..d4df4d496a
--- /dev/null
+++ b/firmware/target/arm/ipod/lcd-as-color-nano.S
@@ -0,0 +1,152 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id:$
9 *
10 * Copyright (C) 2010 by Andree Buschmann
11 *
12 * Generic asm helper function used by YUV blitting.
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
18 *
19 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20 * KIND, either express or implied.
21 *
22 ****************************************************************************/
23
24#include "config.h"
25#include "cpu.h"
26
27 .section .icode, "ax", %progbits
28
29/****************************************************************************
30* void lcd_yuv_write_inner_loop(unsigned char const * const ysrc,
31* unsigned char const * const usrc,
32* unsigned char const * const vsrc,
33* int width);
34*
35* YUV- > RGB565 conversion
36* |R| |1.000000 -0.000001 1.402000| |Y'|
37* |G| = |1.000000 -0.334136 -0.714136| |Pb|
38* |B| |1.000000 1.772000 0.000000| |Pr|
39* Scaled, normalized, rounded and tweaked to yield RGB 565:
40* |R| |74 0 101| |Y' - 16| >> 9
41* |G| = |74 -24 -51| |Cb - 128| >> 8
42* |B| |74 128 0| |Cr - 128| >> 9
43*
44*/
45 .align 2
46 .global lcd_yuv_write_inner_loop
47 .type lcd_yuv_write_inner_loop, %function
48
49lcd_yuv_write_inner_loop:
50 @ r0 = ysrc
51 @ r1 = usrc
52 @ r2 = vsrc
53 @ r3 = width
54 stmfd sp!, { r4-r11, lr } @ save regs
55 mov r4, #0x70000000 @ r4 = LCD2_BLOCK_CTRL - 0x20
56 add r4, r4, #0x8a00 @
57 add r5, r4, #0x100 @ r5 = LCD2_BLOCK_DATA
5810: @ loop
59
60 ldrb r7, [r1], #1 @ *usrc++
61 ldrb r8, [r2], #1 @ *vsrc++
62
63 sub r7, r7, #128 @ Cb -= 128
64 sub r8, r8, #128 @ Cr -= 128
65
66 add r10, r8, r8, asl #2 @ Cr*101
67 add r10, r10, r8, asl #5
68 add r10, r10, r8, asl #6
69
70 add r11, r8, r8, asl #1 @ Cr*51 + Cb*24
71 add r11, r11, r11, asl #4
72 add r11, r11, r7, asl #3
73 add r11, r11, r7, asl #4
74
75 add r12, r7, #2 @ r12 = bu = (Cb*128 + 256) >> 9
76 mov r12, r12, asr #2
77 add r10, r10, #256 @ r10 = rv = (Cr*101 + 256) >> 9
78 mov r10, r10, asr #9
79 rsb r11, r11, #128 @ r11 = guv = (-r11 + 128) >> 8
80 mov r11, r11, asr #8
81
82@ pixel_1
83 ldrb r7, [r0], #1 @ *ysrc++
84 sub r7, r7, #16 @ Y = (Y' - 16) * 37
85 add r8, r7, r7, asl #2
86 add r7, r8, r7, asl #5
87
88 add r9, r10, r7, asr #8 @ R = (Y >> 8) + rv
89 add r8, r11, r7, asr #7 @ G = (Y >> 7) + guv
90 add r7, r12, r7, asr #8 @ B = (Y >> 8) + bu
91
92 cmp r9, #31 @ clamp R
93 mvnhi r9, r9, asr #31
94 andhi r9, r9, #31
95
96 cmp r8, #63 @ clamp G
97 mvnhi r8, r8, asr #31
98 andhi r8, r8, #63
99
100 cmp r7, #31 @ clamp B
101 mvnhi r7, r7, asr #31
102 andhi r7, r7, #31
103
104 orr r6, r7, r8, lsl #5 @ pack pixel
105 orr r6, r6, r9, lsl #11
106
107 mov r7, r6, lsl #8 @ swap bytes
108 and r7, r7, #0xff00
109 add r6, r7, r6, lsr #8
110
111@ pixel_2
112 ldrb r7, [r0], #1 @ *ysrc++
113 sub r7, r7, #16 @ Y = (Y' - 16) * 37
114 add r8, r7, r7, asl #2
115 add r7, r8, r7, asl #5
116
117 add r9, r10, r7, asr #8 @ R = (Y >> 8) + rv
118 add r8, r11, r7, asr #7 @ G = (Y >> 7) + guv
119 add r7, r12, r7, asr #8 @ B = (Y >> 8) + bu
120
121 cmp r9, #31 @ clamp R
122 mvnhi r9, r9, asr #31
123 andhi r9, r9, #31
124
125 cmp r8, #63 @ clamp G
126 mvnhi r8, r8, asr #31
127 andhi r8, r8, #63
128
129 cmp r7, #31 @ clamp B
130 mvnhi r7, r7, asr #31
131 andhi r7, r7, #31
132
133 orr r7, r7, r8, lsl #5 @ pack pixel
134 orr r7, r7, r9, lsl #11
135
136 orr r6, r6, r7, lsl #24 @ swap bytes and add pixels simultaneously
137 mov r7, r7, lsr #8
138 orr r6, r6, r7, lsl #16
139#if 1
14011: @ while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
141 ldr r11, [r4, #0x20] @
142 tst r11, #0x1000000 @
143 beq 11b @
144#endif
145 str r6, [r5] @ send two pixels
146
147 subs r3, r3, #2 @ decrease width
148 bgt 10b @ loop
149
150 ldmpc regs=r4-r11 @ restore regs
151 .ltorg @ dump constant pool
152 .size lcd_yuv_write_inner_loop, .-lcd_yuv_write_inner_loop
diff --git a/firmware/target/arm/ipod/lcd-color_nano.c b/firmware/target/arm/ipod/lcd-color_nano.c
index 7d004cb0f2..e3b9ea8eb6 100644
--- a/firmware/target/arm/ipod/lcd-color_nano.c
+++ b/firmware/target/arm/ipod/lcd-color_nano.c
@@ -121,38 +121,14 @@ void lcd_init_device(void)
121} 121}
122 122
123/*** update functions ***/ 123/*** update functions ***/
124extern void lcd_yuv_write_inner_loop(unsigned char const * const ysrc,
125 unsigned char const * const usrc,
126 unsigned char const * const vsrc,
127 int width);
124 128
125#define CSUB_X 2 129#define CSUB_X 2
126#define CSUB_Y 2 130#define CSUB_Y 2
127 131
128/* YUV- > RGB565 conversion
129 * |R| |1.000000 -0.000001 1.402000| |Y'|
130 * |G| = |1.000000 -0.334136 -0.714136| |Pb|
131 * |B| |1.000000 1.772000 0.000000| |Pr|
132 * Scaled, normalized, rounded and tweaked to yield RGB 565:
133 * |R| |74 0 101| |Y' - 16| >> 9
134 * |G| = |74 -24 -51| |Cb - 128| >> 8
135 * |B| |74 128 0| |Cr - 128| >> 9
136*/
137
138#define RGBYFAC 74 /* 1.0 */
139#define RVFAC 101 /* 1.402 */
140#define GVFAC (-51) /* -0.714136 */
141#define GUFAC (-24) /* -0.334136 */
142#define BUFAC 128 /* 1.772 */
143
144/* ROUNDOFFS contain constant for correct round-offs as well as
145 constant parts of the conversion matrix (e.g. (Y'-16)*RGBYFAC
146 -> constant part = -16*RGBYFAC). Through extraction of these
147 constant parts we save at leat 4 substractions in the conversion
148 loop */
149#define ROUNDOFFSR (256 - 16*RGBYFAC - 128*RVFAC)
150#define ROUNDOFFSG (128 - 16*RGBYFAC - 128*GVFAC - 128*GUFAC)
151#define ROUNDOFFSB (256 - 16*RGBYFAC - 128*BUFAC)
152
153#define MAX_5BIT 0x1f
154#define MAX_6BIT 0x3f
155
156/* Performance function to blit a YUV bitmap directly to the LCD */ 132/* Performance function to blit a YUV bitmap directly to the LCD */
157void lcd_blit_yuv(unsigned char * const src[3], 133void lcd_blit_yuv(unsigned char * const src[3],
158 int src_x, int src_y, int stride, 134 int src_x, int src_y, int stride,
@@ -222,7 +198,8 @@ void lcd_blit_yuv(unsigned char * const src[3],
222 const int stride_div_csub_x = stride/CSUB_X; 198 const int stride_div_csub_x = stride/CSUB_X;
223 199
224 h=0; 200 h=0;
225 while (1) { 201 while (1)
202 {
226 /* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */ 203 /* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */
227 const unsigned char *ysrc = src[0] + stride * src_y + src_x; 204 const unsigned char *ysrc = src[0] + stride * src_y + src_x;
228 205
@@ -231,17 +208,11 @@ void lcd_blit_yuv(unsigned char * const src[3],
231 208
232 const unsigned char *usrc = src[1] + uvoffset; 209 const unsigned char *usrc = src[1] + uvoffset;
233 const unsigned char *vsrc = src[2] + uvoffset; 210 const unsigned char *vsrc = src[2] + uvoffset;
234 const unsigned char *row_end = ysrc + width;
235
236 int yp, up, vp;
237 int red1, green1, blue1;
238 int red2, green2, blue2;
239 211
240 int rc, gc, bc;
241 int pixels_to_write; 212 int pixels_to_write;
242 fb_data pixel1,pixel2;
243 213
244 if (h==0) { 214 if (h==0)
215 {
245 while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY)); 216 while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY));
246 LCD2_BLOCK_CONFIG = 0; 217 LCD2_BLOCK_CONFIG = 0;
247 218
@@ -251,7 +222,8 @@ void lcd_blit_yuv(unsigned char * const src[3],
251 h = height; 222 h = height;
252 223
253 /* calculate how much we can do in one go */ 224 /* calculate how much we can do in one go */
254 if (pixels_to_write > 0x10000) { 225 if (pixels_to_write > 0x10000)
226 {
255 h = (0x10000/2) / width; 227 h = (0x10000/2) / width;
256 pixels_to_write = (width * h) * 2; 228 pixels_to_write = (width * h) * 2;
257 } 229 }
@@ -262,61 +234,7 @@ void lcd_blit_yuv(unsigned char * const src[3],
262 LCD2_BLOCK_CTRL = 0x34000000; 234 LCD2_BLOCK_CTRL = 0x34000000;
263 } 235 }
264 236
265 do 237 lcd_yuv_write_inner_loop(ysrc,usrc,vsrc,width);
266 {
267 up = *usrc++;
268 vp = *vsrc++;
269 rc = RVFAC * vp + ROUNDOFFSR;
270 gc = GVFAC * vp + GUFAC * up + ROUNDOFFSG;
271 bc = BUFAC * up + ROUNDOFFSB;
272
273 /* Pixel 1 -> RGB565 */
274 yp = *ysrc++ * RGBYFAC;
275 red1 = (yp + rc) >> 9;
276 green1 = (yp + gc) >> 8;
277 blue1 = (yp + bc) >> 9;
278
279 /* Pixel 2 -> RGB565 */
280 yp = *ysrc++ * RGBYFAC;
281 red2 = (yp + rc) >> 9;
282 green2 = (yp + gc) >> 8;
283 blue2 = (yp + bc) >> 9;
284
285 /* Since out of bounds errors are relatively rare, we check two
286 pixels at once to see if any components are out of bounds, and
287 then fix whichever is broken. This works due to high values and
288 negative values both being !=0 when bitmasking them.
289 We first check for red and blue components (5bit range). */
290 if ((red1 | blue1 | red2 | blue2) & ~MAX_5BIT)
291 {
292 if (red1 & ~MAX_5BIT)
293 red1 = (red1 >> 31) ? 0 : MAX_5BIT;
294 if (blue1 & ~MAX_5BIT)
295 blue1 = (blue1 >> 31) ? 0 : MAX_5BIT;
296 if (red2 & ~MAX_5BIT)
297 red2 = (red2 >> 31) ? 0 : MAX_5BIT;
298 if (blue2 & ~MAX_5BIT)
299 blue2 = (blue2 >> 31) ? 0 : MAX_5BIT;
300 }
301 /* We second check for green component (6bit range) */
302 if ((green1 | green2) & ~MAX_6BIT)
303 {
304 if (green1 & ~MAX_6BIT)
305 green1 = (green1 >> 31) ? 0 : MAX_6BIT;
306 if (green2 & ~MAX_6BIT)
307 green2 = (green2 >> 31) ? 0 : MAX_6BIT;
308 }
309
310 pixel1 = swap16((red1 << 11) | (green1 << 5) | blue1);
311
312 pixel2 = swap16((red2 << 11) | (green2 << 5) | blue2);
313
314 while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
315
316 /* output 2 pixels */
317 LCD2_BLOCK_DATA = (pixel2 << 16) | pixel1;
318 }
319 while (ysrc < row_end);
320 238
321 src_y++; 239 src_y++;
322 h--; 240 h--;
@@ -415,16 +333,26 @@ void lcd_update_rect(int x, int y, int width, int height)
415 LCD2_BLOCK_CONFIG = 0xc0010000 | (pixels_to_write - 1); 333 LCD2_BLOCK_CONFIG = 0xc0010000 | (pixels_to_write - 1);
416 LCD2_BLOCK_CTRL = 0x34000000; 334 LCD2_BLOCK_CTRL = 0x34000000;
417 335
418 /* for each row */ 336 if (LCD_WIDTH == width) {
419 for (r = 0; r < h; r++) { 337 /* for each row and column in a single loop */
420 /* for each column */ 338 for (r = 0; r < h*width; r += 2) {
421 for (c = 0; c < width; c += 2) { 339 while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
422 while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK)); 340
423 341 /* output 2 pixels */
424 /* output 2 pixels */ 342 LCD2_BLOCK_DATA = *addr++;
425 LCD2_BLOCK_DATA = *addr++; 343 }
344 } else {
345 /* for each row */
346 for (r = 0; r < h; r++) {
347 /* for each column */
348 for (c = 0; c < width; c += 2) {
349 while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
350
351 /* output 2 pixels */
352 LCD2_BLOCK_DATA = *addr++;
353 }
354 addr += (LCD_WIDTH - width)/2;
426 } 355 }
427 addr += (LCD_WIDTH - width)/2;
428 } 356 }
429 357
430 while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY)); 358 while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY));