summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2006-11-04 00:42:18 +0000
committerJens Arnold <amiconn@rockbox.org>2006-11-04 00:42:18 +0000
commitf8b1da2f7bddebc9c7026bd5d106dec118ce70a9 (patch)
tree474e99488c568355dcd07c497181a11afa0245f9
parent0d8781e2f99ea11298b6a290a979417647a5ce37 (diff)
downloadrockbox-f8b1da2f7bddebc9c7026bd5d106dec118ce70a9.tar.gz
rockbox-f8b1da2f7bddebc9c7026bd5d106dec118ce70a9.zip
H300, X5: Faster lcd_yuv_blit() using EMAC. Speedup of the function itself at 124MHz: 10.5% on X5, 16.5% on H300. mpegplayer speedup 3..4%
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@11429 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--firmware/drivers/lcd-h300.c12
-rw-r--r--firmware/target/coldfire/iaudio/x5/lcd-as-x5.S388
-rwxr-xr-xfirmware/target/coldfire/iaudio/x5/lcd-x5.c28
-rwxr-xr-xfirmware/target/coldfire/iriver/h300/lcd-as-h300.S346
4 files changed, 284 insertions, 490 deletions
diff --git a/firmware/drivers/lcd-h300.c b/firmware/drivers/lcd-h300.c
index b7865fa7c5..3e5642e35d 100644
--- a/firmware/drivers/lcd-h300.c
+++ b/firmware/drivers/lcd-h300.c
@@ -304,10 +304,11 @@ void lcd_blit(const fb_data* data, int x, int by, int width,
304/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. 304/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420.
305 * y should have two lines of Y back to back. 305 * y should have two lines of Y back to back.
306 * bu and rv should contain the Cb and Cr data for the two lines of Y. 306 * bu and rv should contain the Cb and Cr data for the two lines of Y.
307 * Stores bu, guv and rv in repective buffers for use in second line. 307 * Needs EMAC set to saturated, signed integer mode.
308 */ 308 */
309extern void lcd_write_yuv420_lines(const unsigned char *y, 309extern void lcd_write_yuv420_lines(const unsigned char *y,
310 unsigned char *bu, unsigned char *guv, unsigned char *rv, int width); 310 const unsigned char *bu,
311 const unsigned char *rv, int width);
311 312
312/* Performance function to blit a YUV bitmap directly to the LCD 313/* Performance function to blit a YUV bitmap directly to the LCD
313 * src_x, src_y, width and height should be even 314 * src_x, src_y, width and height should be even
@@ -317,10 +318,9 @@ void lcd_yuv_blit(unsigned char * const src[3],
317 int src_x, int src_y, int stride, 318 int src_x, int src_y, int stride,
318 int x, int y, int width, int height) 319 int x, int y, int width, int height)
319{ 320{
320 /* IRAM Y, Cb/bu, guv and Cb/rv buffers. */ 321 /* IRAM Y, Cb and Cb buffers. */
321 unsigned char y_ibuf[LCD_WIDTH*2]; 322 unsigned char y_ibuf[LCD_WIDTH*2];
322 unsigned char bu_ibuf[LCD_WIDTH/2]; 323 unsigned char bu_ibuf[LCD_WIDTH/2];
323 unsigned char guv_ibuf[LCD_WIDTH/2];
324 unsigned char rv_ibuf[LCD_WIDTH/2]; 324 unsigned char rv_ibuf[LCD_WIDTH/2];
325 const unsigned char *ysrc, *usrc, *vsrc; 325 const unsigned char *ysrc, *usrc, *vsrc;
326 const unsigned char *ysrc_max; 326 const unsigned char *ysrc_max;
@@ -342,13 +342,14 @@ void lcd_yuv_blit(unsigned char * const src[3],
342 vsrc = src[2] + (src_y * stride >> 2) + (src_x >> 1); 342 vsrc = src[2] + (src_y * stride >> 2) + (src_x >> 1);
343 ysrc_max = ysrc + height * stride; 343 ysrc_max = ysrc + height * stride;
344 344
345 coldfire_set_macsr(EMAC_SATURATE);
345 do 346 do
346 { 347 {
347 memcpy(y_ibuf, ysrc, width); 348 memcpy(y_ibuf, ysrc, width);
348 memcpy(y_ibuf + width, ysrc + stride, width); 349 memcpy(y_ibuf + width, ysrc + stride, width);
349 memcpy(bu_ibuf, usrc, width >> 1); 350 memcpy(bu_ibuf, usrc, width >> 1);
350 memcpy(rv_ibuf, vsrc, width >> 1); 351 memcpy(rv_ibuf, vsrc, width >> 1);
351 lcd_write_yuv420_lines(y_ibuf, bu_ibuf, guv_ibuf, rv_ibuf, width); 352 lcd_write_yuv420_lines(y_ibuf, bu_ibuf, rv_ibuf, width);
352 ysrc += 2 * stride; 353 ysrc += 2 * stride;
353 usrc += stride >> 1; 354 usrc += stride >> 1;
354 vsrc += stride >> 1; 355 vsrc += stride >> 1;
@@ -381,6 +382,7 @@ void lcd_update(void)
381 } 382 }
382} 383}
383 384
385
384/* Update a fraction of the display. */ 386/* Update a fraction of the display. */
385void lcd_update_rect(int, int, int, int) ICODE_ATTR; 387void lcd_update_rect(int, int, int, int) ICODE_ATTR;
386void lcd_update_rect(int x, int y, int width, int height) 388void lcd_update_rect(int x, int y, int width, int height)
diff --git a/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S b/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S
index 6d5d324ebf..11150203af 100644
--- a/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S
+++ b/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S
@@ -40,260 +40,158 @@
40 * |G| = |1.000000 -0.334136 -0.714136| |Pb| 40 * |G| = |1.000000 -0.334136 -0.714136| |Pb|
41 * |B| |1.000000 1.772000 0.000000| |Pr| 41 * |B| |1.000000 1.772000 0.000000| |Pr|
42 * Scaled, normalized, rounded and tweaked to yield RGB 666: 42 * Scaled, normalized, rounded and tweaked to yield RGB 666:
43 * |R| |74 0 101| |Y' - 16| / 256 43 * |R| |19611723 0 26881894| |Y' - 16| >> 26
44 * |G| = |74 -24 -51| |Cb - 128| / 256 44 * |G| = |19611723 -6406711 -13692816| |Cb - 128| >> 26
45 * |B| |74 128 0| |Cr - 128| / 256 45 * |B| |19611723 33976259 0| |Cr - 128| >> 26
46 *
47 * Needs EMAC set to saturated, signed integer mode.
46 */ 48 */
47 .align 2 49 .align 2
48 .global lcd_write_yuv420_lines 50 .global lcd_write_yuv420_lines
49 .type lcd_write_yuv420_lines,@function 51 .type lcd_write_yuv420_lines, @function
52
50lcd_write_yuv420_lines: 53lcd_write_yuv420_lines:
51 lea.l (-36,%sp),%sp /* free up some registers */ 54 lea.l (-44, %sp), %sp /* free up some registers */
52 movem.l %d2-%d6/%a2-%a5,(%sp) 55 movem.l %d2-%d7/%a2-%a6, (%sp)
53 56
54 lea.l 0xf0008002,%a0 /* LCD data port */ 57 lea.l 0xf0008002, %a0 /* LCD data port */
55 movem.l (36+4,%sp),%a1-%a5 /* Y data, Cb data, guv storage, Cr data, width */ 58 movem.l (44+4, %sp), %a1-%a4 /* Y data, Cb data, Cr data, width */
56 lea.l (%a1,%a5),%a5 /* end address */ 59 lea.l (%a1, %a4), %a4 /* end address */
57 60
58.yuv_line_loop1: 61 move.l #19611723, %a5 /* y factor */
59 /** Write first pixel **/ 62 move.l #33976259, %a6 /* bu factor */
60 clr.l %d1 /* get bu component */ 63 move.l #-6406711, %d5 /* gu factor */
61 move.b (%a2),%d1 64 move.l #-13692816, %d6 /* gv factor */
62 clr.l %d3 /* get rv component */ 65 move.l #0x01040820, %d7 /* bitmask for signed->unsigned conversion
63 move.b (%a4),%d3 66 * of R, G and B within RGGB6666 at once */
64 moveq.l #-128,%d0 67
65 add.l %d0,%d1 68 /* chroma for (very) first & second pixel */
66 add.l %d0,%d3 69 clr.l %d2 /* load u component */
67 70 move.b (%a2)+, %d2
68 move.l %d1,%d2 /* %d2 = cb component for guv */ 71 clr.l %d3 /* load v component */
69 asr.l #1,%d1 /* %d1 = 128 * (Cb - 128) / 256 */ 72 move.b (%a3)+, %d3
70 move.b %d1,(%a2)+ /* save bu for next line */ 73 moveq.l #-128, %d0
71 moveq.l #-24,%d0 /* multiply first term of guv */ 74 add.l %d0, %d2
72 muls.w %d0,%d2 75 add.l %d0, %d3
73 moveq.l #-51,%d0 /* multiply second term of guv */ 76
74 muls.w %d3,%d0 77 mac.l %a6, %d2, %acc0 /* bu */
75 add.l %d0,%d2 78 mac.l %d5, %d2, %acc1 /* gu */
76 asr.l #8,%d2 79 mac.l %d6, %d3, %acc1 /* gv */
77 move.b %d2,(%a3)+ /* save guv for next line */ 80 move.l #26881894, %d0 /* rv factor */
78 moveq.l #101,%d0 81 mac.l %d0, %d3, %acc2 /* rv */
79 muls.w %d0,%d3 82
80 asr.l #8,%d3 83 /* luma for (very) first pixel */
81 move.b %d3,(%a4)+ /* save rv for next line */ 84 clr.l %d1
82 85 move.b (%a1)+, %d1
83 clr.l %d4 /* get y component */ 86 moveq.l #-126, %d0
84 move.b (%a1)+,%d4 87 add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
85 moveq.l #74,%d0 88 mac.l %a5, %d0, %acc0
86 muls.w %d0,%d4 89 mac.l %a5, %d0, %acc1
87 asr.l #8,%d4 90 mac.l %a5, %d0, %acc2
88 subq.l #4,%d4 91
89 move.l %d4,%d5 92 bra.b .yuv_line_entry
90 move.l %d4,%d6 93
91 /* : %d4,%d5,%d6 = Y, %d1 = bu, %d2 = guv, %d3 = rv */ 94.yuv_line_loop:
92 95 /* chroma for first & second pixel */
93 add.l %d3,%d4 /* get r */ 96 clr.l %d2 /* load u component */
94 add.l %d2,%d5 /* get g */ 97 move.b (%a2)+, %d2
95 add.l %d1,%d6 /* get b */ 98 clr.l %d3 /* load v component */
96 99 move.b (%a3)+, %d3
97 move.l %d6,%d0 /* is clamping needed? */ 100 moveq.l #-128, %d0
98 or.l %d5,%d0 101 add.l %d0, %d2
99 or.l %d4,%d0 102 add.l %d0, %d3
100 asr.l #6,%d0 103
101 beq.b .yuv_no_clamp1 /* values in range: skip clamping */ 104 mac.l %a6, %d2, %acc0 /* bu */
102 moveq.l #63, %d0 105 mac.l %d5, %d2, %acc1 /* gu */
103 cmp.l %d0, %d4 106 mac.l %d6, %d3, %acc1 /* gv */
104 bls.s .yuv_red_ok1 107 move.l #26881894, %d0 /* rv factor */
105 spl.b %d4 108 mac.l %d0, %d3, %acc2 /* rv */
106 and.l %d0, %d4 109
107.yuv_red_ok1: 110 /* luma for first pixel */
108 cmp.l %d0, %d5 111 clr.l %d1
109 bls.s .yuv_green_ok1 112 move.b (%a1)+, %d1
110 spl.b %d5 113 moveq.l #-126, %d0
111 and.l %d0, %d5 114 add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
112.yuv_green_ok1: 115 mac.l %a5, %d0, %acc0
113 cmp.l %d0, %d6 116 mac.l %a5, %d0, %acc1
114 bls.s .yuv_blue_ok1 117 mac.l %a5, %d0, %acc2
115 spl.b %d6 118
116 and.l %d0, %d6 119 move.w %d4, (%a0)
117.yuv_blue_ok1: 120 /* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */
118.yuv_no_clamp1: 121
119 /* : %d4 = R, %d5 = G, %d6 = B */ 122 /* convert to RGB666, pack and output */
120 123.yuv_line_entry:
121 move.l %d5,%d0 /* save g for lower 9 bits */ 124 moveq.l #26, %d0
122 lsl.l #3,%d4 /* R << 3 */ 125 move.l %acc0, %d4
123 lsr.l #3,%d0 /* G >> 3 */ 126 move.l %acc1, %d3
124 or.l %d4,%d0 127 move.l %acc2, %d2
125 move.w %d0,(%a0) /* |00000000|000000000|0000000r|rrrrrggg| */ 128 lsr.l %d0, %d4
126 lsl.l #6,%d5 /* B << 6 */ 129 lsr.l %d0, %d3
127 or.l %d5,%d6 /* |00000000|000000000|0000gggg|ggbbbbbb| */ 130 lsr.l %d0, %d2
128 move.w %d6,(%a0) 131
129 132 lsl.l #6, %d2
130 /** Write second pixel **/ 133 or.l %d3, %d2 /* |00000000|00000000|0000Rrrr|rrGggggg| */
131 clr.l %d4 134 lsl.l #7, %d2
132 move.b (%a1)+,%d4 /* get y component */ 135 or.l %d2, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */
133 moveq.l #74,%d0 136 lsl.l #6, %d3
134 muls.w %d0,%d4 137 or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */
135 asr.l #8,%d4 138 eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */
136 subq.l #4,%d4 139 swap %d4
137 /* : %d4 = Y, %d1 = bu, %d2 = guv, %d3 = rv */ 140 move.w %d4, (%a0)
138 141 swap %d4
139 /* Add Y + each chroma component (can clobber %d1-%d3 values now) */ 142
140 add.l %d4,%d3 /* get r */ 143 /* luma for second pixel as delta from the first */
141 add.l %d4,%d2 /* get g */ 144 clr.l %d0
142 add.l %d4,%d1 /* get b */ 145 move.b (%a1)+, %d0
143 146 sub.l %d1, %d0
144 move.l %d1,%d0 /* is clamping needed? */ 147 mac.l %a5, %d0, %acc0
145 or.l %d2,%d0 148 mac.l %a5, %d0, %acc1
146 or.l %d3,%d0 149 mac.l %a5, %d0, %acc2
147 asr.l #6,%d0 150
148 beq.b .yuv_no_clamp2 /* values in range: skip clamping */ 151 move.w %d4, (%a0)
149 moveq.l #63, %d0 152 /* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */
150 cmp.l %d0, %d3 153
151 bls.s .yuv_red_ok2 154 /* convert to RGB666, pack and output */
152 spl.b %d3 155 moveq.l #26, %d0
153 and.l %d0, %d3 156 movclr.l %acc0, %d4
154.yuv_red_ok2: 157 movclr.l %acc1, %d3
155 cmp.l %d0, %d2 158 movclr.l %acc2, %d2
156 bls.s .yuv_green_ok2 159 lsr.l %d0, %d4
157 spl.b %d2 160 lsr.l %d0, %d3
158 and.l %d0, %d2 161 lsr.l %d0, %d2
159.yuv_green_ok2: 162
160 cmp.l %d0, %d1 163 lsl.l #6, %d2
161 bls.s .yuv_blue_ok2 164 or.l %d3, %d2 /* |00000000|00000000|0000Rrrr|rrGggggg| */
162 spl.b %d1 165 lsl.l #7, %d2
163 and.l %d0, %d1 166 or.l %d2, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */
164.yuv_blue_ok2: 167 lsl.l #6, %d3
165.yuv_no_clamp2: 168 or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */
166 /* : %d3 = R, %d2 = G, %d1 = B */ 169 eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */
167 170 swap %d4
168 move.l %d2,%d0 /* save g for lower 9 bits */ 171 move.w %d4, (%a0)
169 lsl.l #3,%d3 /* R << 3 */ 172 swap %d4
170 lsr.l #3,%d0 /* G >> 3 */ 173
171 or.l %d3,%d0 /* |00000000|000000000|0000000r|rrrrrggg| */ 174 cmp.l %a1, %a4 /* run %a1 up to end of line */
172 move.w %d0,(%a0) 175 bhi.w .yuv_line_loop
173 lsl.l #6,%d2 /* G << 6 */ 176
174 or.l %d2,%d1 /* |00000000|000000000|0000gggg|ggbbbbbb| */ 177 tst.l (44+4, %sp) /* use original Y pointer as a flag to */
175 move.w %d1,(%a0) 178 beq.b .yuv_exit /* distinguish between first and second */
176 179 clr.l (44+4, %sp) /* pixel line */
177 cmp.l %a1,%a5 /* run %a1 up to end of line */
178 bhi.w .yuv_line_loop1
179 180
180 /* Rewind chroma pointers */ 181 /* Rewind chroma pointers */
181 movem.l (36+8, %sp), %a2-%a5 /* bu data, guv data, rv data, width */ 182 movem.l (44+8, %sp), %a2-%a4 /* Cb data, Cr data, width */
182 lea.l (%a1, %a5), %a5 /* next end address */ 183 lea.l (%a1, %a4), %a4 /* end address */
183 184 bra.w .yuv_line_loop
184.yuv_line_loop2: 185
185 move.b (%a2)+,%d1 /* read save chromas and sign extend */ 186.yuv_exit:
186 extb.l %d1 187 move.w %d4, (%a0) /* write (very) last 2nd word */
187 move.b (%a3)+,%d2
188 extb.l %d2
189 move.b (%a4)+,%d3
190 extb.l %d3
191
192 clr.l %d4
193 move.b (%a1)+,%d4 /* get y component */
194 moveq.l #74,%d0
195 muls.w %d0,%d4
196 asr.l #8,%d4
197 subq.l #4,%d4
198 move.l %d4,%d5
199 move.l %d4,%d6
200 /* : %d4,%d5,%d6 = Y, %d1 = bu, %d2 = guv, %d3 = rv */
201
202 add.l %d3,%d4 /* get r */
203 add.l %d2,%d5 /* get g */
204 add.l %d1,%d6 /* get b */
205
206 move.l %d6,%d0 /* is clamping needed? */
207 or.l %d5,%d0
208 or.l %d4,%d0
209 asr.l #6,%d0
210 beq.b .yuv_no_clamp3 /* values in range: skip clamping */
211 moveq.l #63, %d0
212 cmp.l %d0, %d4
213 bls.s .yuv_red_ok3
214 spl.b %d4
215 and.l %d0, %d4
216.yuv_red_ok3:
217 cmp.l %d0, %d5
218 bls.s .yuv_green_ok3
219 spl.b %d5
220 and.l %d0, %d5
221.yuv_green_ok3:
222 cmp.l %d0, %d6
223 bls.s .yuv_blue_ok3
224 spl.b %d6
225 and.l %d0, %d6
226.yuv_blue_ok3:
227.yuv_no_clamp3:
228 /* : %d4 = R, %d5 = G, %d6 = B */
229
230 move.l %d5,%d0 /* save g for lower 9 bits */
231 lsl.l #3,%d4 /* R << 3 */
232 lsr.l #3,%d0 /* G >> 3 */
233 or.l %d4,%d0
234 move.w %d0,(%a0) /* |00000000|000000000|0000000r|rrrrrggg| */
235 lsl.l #6,%d5 /* B << 6 */
236 or.l %d5,%d6 /* |00000000|000000000|0000gggg|ggbbbbbb| */
237 move.w %d6,(%a0)
238
239 /** Write second pixel **/
240 clr.l %d4
241 move.b (%a1)+,%d4 /* get y component */
242 moveq.l #74,%d0
243 muls.w %d0,%d4
244 asr.l #8,%d4
245 subq.l #4,%d4
246 /* : %d4 = Y, %d1 = bu, %d2 = guv, %d3 = rv */
247
248 /* Add Y + each chroma component (can clobber %d1-%d3 values now) */
249 add.l %d4,%d3 /* get r */
250 add.l %d4,%d2 /* get g */
251 add.l %d4,%d1 /* get b */
252
253 move.l %d1,%d0 /* is clamping needed? */
254 or.l %d2,%d0
255 or.l %d3,%d0
256 asr.l #6,%d0
257 beq.b .yuv_no_clamp4 /* values in range: skip clamping */
258 moveq.l #63, %d0
259 cmp.l %d0, %d3
260 bls.s .yuv_red_ok4
261 spl.b %d3
262 and.l %d0, %d3
263.yuv_red_ok4:
264 cmp.l %d0, %d2
265 bls.s .yuv_green_ok4
266 spl.b %d2
267 and.l %d0, %d2
268.yuv_green_ok4:
269 cmp.l %d0, %d1
270 bls.s .yuv_blue_ok4
271 spl.b %d1
272 and.l %d0, %d1
273.yuv_blue_ok4:
274.yuv_no_clamp4:
275 /* : %d3 = R, %d2 = G, %d1 = B */
276
277 move.l %d2,%d0 /* save g for lower 9 bits */
278 lsl.l #3,%d3 /* R << 3 */
279 lsr.l #3,%d0 /* G >> 3 */
280 or.l %d3,%d0 /* |00000000|000000000|0000000r|rrrrrggg| */
281 move.w %d0,(%a0)
282 lsl.l #6,%d2 /* G << 6 */
283 or.l %d2,%d1 /* |00000000|000000000|0000gggg|ggbbbbbb| */
284 move.w %d1,(%a0)
285
286 cmp.l %a1,%a5 /* run %a0 up to end of line */
287 bhi.w .yuv_line_loop2
288
289 movem.l (%sp),%d2-%d6/%a2-%a5
290 lea.l (36,%sp),%sp /* restore registers */
291 188
292 rts 189 movem.l (%sp), %d2-%d7/%a2-%a6
190 lea.l (44, %sp), %sp /* restore registers */
293 191
192 rts
294.yuv_end: 193.yuv_end:
295 .size lcd_write_yuv420_lines,.yuv_end-lcd_write_yuv420_lines 194 .size lcd_write_yuv420_lines, yuv_end - lcd_write_yuv420_lines
296/* end lcd_write_yuv420_lines */
297 195
298 196
299/* begin lcd_write_data */ 197/* begin lcd_write_data */
diff --git a/firmware/target/coldfire/iaudio/x5/lcd-x5.c b/firmware/target/coldfire/iaudio/x5/lcd-x5.c
index 698ae477fa..92b9fde2e2 100755
--- a/firmware/target/coldfire/iaudio/x5/lcd-x5.c
+++ b/firmware/target/coldfire/iaudio/x5/lcd-x5.c
@@ -429,11 +429,11 @@ void lcd_blit(const fb_data* data, int x, int by, int width,
429/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. 429/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420.
430 * y should have two lines of Y back to back. 430 * y should have two lines of Y back to back.
431 * bu and rv should contain the Cb and Cr data for the two lines of Y. 431 * bu and rv should contain the Cb and Cr data for the two lines of Y.
432 * Stores bu, guv and rv in repective buffers for use in second line. 432 * Needs EMAC set to saturated, signed integer mode.
433 */ 433 */
434extern void lcd_write_yuv420_lines(const unsigned char *y, 434extern void lcd_write_yuv420_lines(const unsigned char *y,
435 unsigned char *bu, unsigned char *guv, unsigned char *rv, 435 const unsigned char *bu,
436 int width); 436 const unsigned char *rv, int width);
437 437
438/* Performance function to blit a YUV bitmap directly to the LCD 438/* Performance function to blit a YUV bitmap directly to the LCD
439 * src_x, src_y, width and height should be even and within the LCD's 439 * src_x, src_y, width and height should be even and within the LCD's
@@ -446,7 +446,6 @@ void lcd_yuv_blit(unsigned char * const src[3],
446 /* IRAM Y, Cb/bu, guv and Cb/rv buffers. */ 446 /* IRAM Y, Cb/bu, guv and Cb/rv buffers. */
447 unsigned char y_ibuf[LCD_WIDTH*2]; 447 unsigned char y_ibuf[LCD_WIDTH*2];
448 unsigned char bu_ibuf[LCD_WIDTH/2]; 448 unsigned char bu_ibuf[LCD_WIDTH/2];
449 unsigned char guv_ibuf[LCD_WIDTH/2];
450 unsigned char rv_ibuf[LCD_WIDTH/2]; 449 unsigned char rv_ibuf[LCD_WIDTH/2];
451 const unsigned char *ysrc, *usrc, *vsrc; 450 const unsigned char *ysrc, *usrc, *vsrc;
452 const unsigned char *ysrc_max; 451 const unsigned char *ysrc_max;
@@ -457,28 +456,29 @@ void lcd_yuv_blit(unsigned char * const src[3],
457 if (r_entry_mode == R_ENTRY_MODE_SOLID) 456 if (r_entry_mode == R_ENTRY_MODE_SOLID)
458 hw_dither(true); 457 hw_dither(true);
459 458
460 width = (width + 1) & ~1; 459 width &= ~1; /* stay on the safe side */
461 height = (height + 1) & ~1; 460 height &= ~1;
462 461
463 /* Set start position and window */ 462 /* Set start position and window */
464 lcd_write_reg(R_RAM_ADDR_SET, (x << 8) | (y + y_offset)); 463 lcd_write_reg(R_RAM_ADDR_SET, (x << 8) | (y + y_offset));
465 lcd_write_reg(R_VERT_RAM_ADDR_POS, ((x + width - 1) << 8) | x); 464 lcd_write_reg(R_VERT_RAM_ADDR_POS, ((x + width - 1) << 8) | x);
466 465
467 lcd_begin_write_gram(); 466 lcd_begin_write_gram();
468 467
469 ysrc = src[0] + src_y*stride + src_x; 468 ysrc = src[0] + src_y * stride + src_x;
470 usrc = src[1] + (src_y*stride >> 2) + (src_x >> 1); 469 usrc = src[1] + (src_y * stride >> 2) + (src_x >> 1);
471 vsrc = src[2] + (usrc - src[1]); 470 vsrc = src[2] + (src_y * stride >> 2) + (src_x >> 1);
472 ysrc_max = ysrc + height*stride; 471 ysrc_max = ysrc + height * stride;
473 472
473 coldfire_set_macsr(EMAC_SATURATE);
474 do 474 do
475 { 475 {
476 memcpy(y_ibuf, ysrc, width); 476 memcpy(y_ibuf, ysrc, width);
477 memcpy(&y_ibuf[width], &ysrc[stride], width); 477 memcpy(y_ibuf + width, ysrc + stride, width);
478 memcpy(bu_ibuf, usrc, width >> 1); 478 memcpy(bu_ibuf, usrc, width >> 1);
479 memcpy(rv_ibuf, vsrc, width >> 1); 479 memcpy(rv_ibuf, vsrc, width >> 1);
480 lcd_write_yuv420_lines(y_ibuf, bu_ibuf, guv_ibuf, rv_ibuf, width); 480 lcd_write_yuv420_lines(y_ibuf, bu_ibuf, rv_ibuf, width);
481 ysrc += stride << 1; 481 ysrc += 2 * stride;
482 usrc += stride >> 1; 482 usrc += stride >> 1;
483 vsrc += stride >> 1; 483 vsrc += stride >> 1;
484 } 484 }
diff --git a/firmware/target/coldfire/iriver/h300/lcd-as-h300.S b/firmware/target/coldfire/iriver/h300/lcd-as-h300.S
index ae55dfb224..1873b905c6 100755
--- a/firmware/target/coldfire/iriver/h300/lcd-as-h300.S
+++ b/firmware/target/coldfire/iriver/h300/lcd-as-h300.S
@@ -22,7 +22,7 @@
22 22
23 .section .icode, "ax", @progbits 23 .section .icode, "ax", @progbits
24 24
25/* lcd_write_yuv420_lines(), based on lcd-as-x5.S 25/* lcd_write_yuv420_lines()
26 * 26 *
27 * See http://en.wikipedia.org/wiki/YCbCr 27 * See http://en.wikipedia.org/wiki/YCbCr
28 * ITU-R BT.601 (formerly CCIR 601): 28 * ITU-R BT.601 (formerly CCIR 601):
@@ -38,252 +38,146 @@
38 * |R| |1.000000 0.000000 1.402000| |Y'| 38 * |R| |1.000000 0.000000 1.402000| |Y'|
39 * |G| = |1.000000 -0.334136 -0.714136| |Pb| 39 * |G| = |1.000000 -0.334136 -0.714136| |Pb|
40 * |B| |1.000000 1.772000 0.000000| |Pr| 40 * |B| |1.000000 1.772000 0.000000| |Pr|
41 * Scaled, normalized, rounded and tweaked to yield RGB666, as converting 41 * Scaled, normalized, rounded and tweaked to yield RGB565:
42 * directly to RGB565 gives too much roundoff error: 42 * |R| |19611723 0 26881894| |Y' - 16| >> 27
43 * |R| |74 0 101| |Y' - 16| / 256 43 * |G| = |19611723 -6406711 -13692816| |Cb - 128| >> 26
44 * |G| = |74 -24 -51| |Cb - 128| / 256 44 * |B| |19611723 33976259 0| |Cr - 128| >> 27
45 * |B| |74 128 0| |Cr - 128| / 256 45 *
46 * Needs EMAC set to saturated, signed integer mode.
46 */ 47 */
47
48 .align 2 48 .align 2
49 .global lcd_write_yuv420_lines 49 .global lcd_write_yuv420_lines
50 .type lcd_write_yuv420_lines, @function 50 .type lcd_write_yuv420_lines, @function
51 51
52lcd_write_yuv420_lines: 52lcd_write_yuv420_lines:
53 lea.l (-36, %sp), %sp /* free up some registers */ 53 lea.l (-44, %sp), %sp /* free up some registers */
54 movem.l %d2-%d6/%a2-%a5, (%sp) 54 movem.l %d2-%d7/%a2-%a6, (%sp)
55 55
56 lea.l 0xf0000002, %a0 /* LCD data port */ 56 lea.l 0xf0000002, %a0 /* LCD data port */
57 movem.l (36+4, %sp), %a1-%a5 /* Y data, Cb data, guv storage, Cr data, width */ 57 movem.l (44+4, %sp), %a1-%a4 /* Y data, Cb data, Cr data, width */
58 lea.l (%a1, %a5), %a5 /* end address */ 58 lea.l (%a1, %a4), %a4 /* end address */
59 59
60.yuv_line_loop1: 60 move.l #19611723, %a5 /* y factor */
61 /* chroma for first & second pixel */ 61 move.l #33976259, %a6 /* bu factor */
62 clr.l %d1 /* load bu component */ 62 move.l #-6406711, %d5 /* gu factor */
63 move.b (%a2), %d1 63 move.l #-13692816, %d6 /* gv factor */
64 clr.l %d3 /* load rv component */ 64 move.l #0x8410, %d7 /* bitmask for signed->unsigned conversion
65 move.b (%a4), %d3 65 * of R, G and B within RGB565 at once */
66
67 /* chroma for (very) first & second pixel */
68 clr.l %d2 /* load u component */
69 move.b (%a2)+, %d2
70 clr.l %d3 /* load v component */
71 move.b (%a3)+, %d3
66 moveq.l #-128, %d0 72 moveq.l #-128, %d0
67 add.l %d0, %d1 73 add.l %d0, %d2
68 add.l %d0, %d3 74 add.l %d0, %d3
69 75
70 move.l %d1, %d2 /* %d2 = cb component for guv */ 76 mac.l %a6, %d2, %acc0 /* bu */
71 asr.l #1, %d1 /* %d1 = 128 * (Cb - 128) / 256 */ 77 mac.l %d5, %d2, %acc1 /* gu */
72 move.b %d1, (%a2)+ /* save bu for next line */ 78 mac.l %d6, %d3, %acc1 /* gv */
73 moveq.l #-24, %d0 79 move.l #26881894, %d0 /* rv factor */
74 muls.w %d0, %d2 /* %d2 = -24 * (Cb - 128)*/ 80 mac.l %d0, %d3, %acc2 /* rv */
75 moveq.l #-51, %d0
76 muls.w %d3, %d0
77 add.l %d0, %d2 /* %d2 = -24 * (Cb - 128) - 51 * (Cr - 128) */
78 asr.l #8, %d2
79 move.b %d2, (%a3)+ /* save guv for next line */
80 moveq.l #101, %d0
81 muls.w %d0, %d3 /* %d3 = 101 * (Cr - 128) */
82 asr.l #8, %d3
83 move.b %d3, (%a4)+ /* save rv for next line */
84 81
85 /* luma for first pixel */ 82 /* luma for (very) first pixel */
86 clr.l %d4 /* load y component */ 83 clr.l %d1
87 move.b (%a1)+, %d4 84 move.b (%a1)+, %d1
88 moveq.l #74, %d0 85 moveq.l #-126, %d0
89 muls.w %d0, %d4 /* %d4 = 36 * Y */ 86 add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
90 asr.l #8, %d4 87 mac.l %a5, %d0, %acc0
91 subq.l #4, %d4 /* correction for (Y - 16) and rounding */ 88 mac.l %a5, %d0, %acc1
92 move.l %d4, %d5 89 mac.l %a5, %d0, %acc2
93 move.l %d4, %d6
94
95 /* combine & write first pixel */
96 add.l %d1, %d4 /* %d4 = blue */
97 add.l %d2, %d5 /* %d5 = green */
98 add.l %d3, %d6 /* %d6 = red */
99
100 move.l %d4, %d0 /* clamping */
101 or.l %d5, %d0
102 or.l %d6, %d0
103 asr.l #6, %d0
104 beq.s .yuv_all_ok1
105 moveq.l #63, %d0
106 cmp.l %d0, %d4
107 bls.s .yuv_blue_ok1
108 spl.b %d4
109 and.l %d0, %d4
110.yuv_blue_ok1:
111 cmp.l %d0, %d5
112 bls.s .yuv_green_ok1
113 spl.b %d5
114 and.l %d0, %d5
115.yuv_green_ok1:
116 cmp.l %d0, %d6
117 bls.s .yuv_red_ok1
118 spl.b %d6
119 and.l %d0, %d6
120.yuv_red_ok1:
121.yuv_all_ok1:
122
123 lsr.l #1, %d6 /* pack, convert to RGB565 and output */
124 lsr.l #1, %d4
125 lsl.l #6, %d6
126 or.l %d6, %d5
127 lsl.l #5, %d5
128 or.l %d5, %d4
129 move.w %d4, (%a0)
130
131 /* luma for second pixel */
132 clr.l %d4 /* load y component */
133 move.b (%a1)+, %d4
134 moveq.l #74, %d0
135 muls.w %d0, %d4 /* %d4 = 36 * Y */
136 asr.l #8, %d4
137 subq.l #4, %d4 /* correction for (Y - 16) and rounding */
138
139 /* combine & write second pixel */
140 add.l %d4, %d1 /* %d1 = blue */
141 add.l %d4, %d2 /* %d2 = green */
142 add.l %d4, %d3 /* %d3 = red */
143
144 move.l %d1, %d0 /* clamping */
145 or.l %d2, %d0
146 or.l %d3, %d0
147 asr.l #6, %d0
148 beq.s .yuv_all_ok2
149 moveq.l #63, %d0
150 cmp.l %d0, %d1
151 bls.s .yuv_blue_ok2
152 spl.b %d1
153 and.l %d0, %d1
154.yuv_blue_ok2:
155 cmp.l %d0, %d2
156 bls.s .yuv_green_ok2
157 spl.b %d2
158 and.l %d0, %d2
159.yuv_green_ok2:
160 cmp.l %d0, %d3
161 bls.s .yuv_red_ok2
162 spl.b %d3
163 and.l %d0, %d3
164.yuv_red_ok2:
165.yuv_all_ok2:
166 90
167 lsr.l #1, %d3 /* pack, convert to RGB565 and output */ 91 bra.b .yuv_line_entry
168 lsr.l #1, %d1
169 lsl.l #6, %d3
170 or.l %d3, %d2
171 lsl.l #5, %d2
172 or.l %d2, %d1
173 move.w %d1, (%a0)
174 92
175 cmp.l %a1,%a5 /* run %a1 up to end of line */ 93.yuv_line_loop:
176 bhi.w .yuv_line_loop1 94 /* chroma for first & second pixel */
177 95 clr.l %d2 /* load u component */
178 /* Rewind chroma pointers */ 96 move.b (%a2)+, %d2
179 movem.l (36+8, %sp), %a2-%a5 /* bu data, guv data, rv data, width */ 97 clr.l %d3 /* load v component */
180 lea.l (%a1, %a5), %a5 /* next end address */ 98 move.b (%a3)+, %d3
99 moveq.l #-128, %d0
100 add.l %d0, %d2
101 add.l %d0, %d3
181 102
182.yuv_line_loop2: 103 mac.l %a6, %d2, %acc0 /* bu */
183 /* read saved chromas and sign extend */ 104 mac.l %d5, %d2, %acc1 /* gu */
184 move.b (%a2)+, %d1 105 mac.l %d6, %d3, %acc1 /* gv */
185 extb.l %d1 106 move.l #26881894, %d0 /* rv factor */
186 move.b (%a3)+, %d2 107 mac.l %d0, %d3, %acc2 /* rv */
187 extb.l %d2
188 move.b (%a4)+, %d3
189 extb.l %d3
190 108
191 /* luma for first pixel */ 109 /* luma for first pixel */
192 clr.l %d4 /* load y component */ 110 clr.l %d1
193 move.b (%a1)+, %d4 111 move.b (%a1)+, %d1
194 moveq.l #74, %d0 112 moveq.l #-126, %d0
195 muls.w %d0, %d4 /* %d4 = 36 * Y */ 113 add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
196 asr.l #8, %d4 114 mac.l %a5, %d0, %acc0
197 subq.l #4, %d4 /* correction for (Y - 16) and rounding */ 115 mac.l %a5, %d0, %acc1
198 move.l %d4, %d5 116 mac.l %a5, %d0, %acc2
199 move.l %d4, %d6
200 117
201 /* combine & write first pixel */ 118 move.w %d4, (%a0)
202 add.l %d1, %d4 /* %d4 = blue */ 119 /* LCD write is delayed one pixel to use it for filling the EMAC latency */
203 add.l %d2, %d5 /* %d5 = green */ 120
204 add.l %d3, %d6 /* %d6 = red */ 121 /* convert to RGB565, pack and output */
122.yuv_line_entry:
123 moveq.l #27, %d0
124 move.l %acc0, %d2
125 move.l %acc1, %d3
126 move.l %acc2, %d4
127 lsr.l %d0, %d2
128 lsr.l %d0, %d4
129 moveq.l #26, %d0
130 lsr.l %d0, %d3
131 lsl.l #6, %d4
132 or.l %d3, %d4
133 lsl.l #5, %d4
134 or.l %d2, %d4
135 eor.l %d7, %d4
136
137 /* luma for second pixel as delta from the first */
138 clr.l %d0
139 move.b (%a1)+, %d0
140 sub.l %d1, %d0
141 mac.l %a5, %d0, %acc0
142 mac.l %a5, %d0, %acc1
143 mac.l %a5, %d0, %acc2
144
145 move.w %d4, (%a0)
146 /* LCD write is delayed one pixel to use it for filling the EMAC latency */
147
148 /* convert to RGB565, pack and output */
149 moveq.l #27, %d0
150 movclr.l %acc0, %d2
151 movclr.l %acc1, %d3
152 movclr.l %acc2, %d4
153 lsr.l %d0, %d2
154 lsr.l %d0, %d4
155 moveq.l #26, %d0
156 lsr.l %d0, %d3
157 lsl.l #6, %d4
158 or.l %d3, %d4
159 lsl.l #5, %d4
160 or.l %d2, %d4
161 eor.l %d7, %d4
162
163 cmp.l %a1, %a4 /* run %a1 up to end of line */
164 bhi.w .yuv_line_loop
205 165
206 move.l %d4, %d0 /* clamping */ 166 tst.l (44+4, %sp) /* use original Y pointer as a flag to */
207 or.l %d5, %d0 167 beq.b .yuv_exit /* distinguish between first and second */
208 or.l %d6, %d0 168 clr.l (44+4, %sp) /* pixel line */
209 asr.l #6, %d0
210 beq.s .yuv_all_ok3
211 moveq.l #63, %d0
212 cmp.l %d0, %d4
213 bls.s .yuv_blue_ok3
214 spl.b %d4
215 and.l %d0, %d4
216.yuv_blue_ok3:
217 cmp.l %d0, %d5
218 bls.s .yuv_green_ok3
219 spl.b %d5
220 and.l %d0, %d5
221.yuv_green_ok3:
222 cmp.l %d0, %d6
223 bls.s .yuv_red_ok3
224 spl.b %d6
225 and.l %d0, %d6
226.yuv_red_ok3:
227.yuv_all_ok3:
228 169
229 lsr.l #1, %d6 /* pack, convert to RGB565 and output */ 170 /* Rewind chroma pointers */
230 lsr.l #1, %d4 171 movem.l (44+8, %sp), %a2-%a4 /* Cb data, Cr data, width */
231 lsl.l #6, %d6 172 lea.l (%a1, %a4), %a4 /* end address */
232 or.l %d6, %d5 173 bra.w .yuv_line_loop
233 lsl.l #5, %d5
234 or.l %d5, %d4
235 move.w %d4, (%a0)
236
237 /* luma for second pixel */
238 clr.l %d4 /* load y component */
239 move.b (%a1)+, %d4
240 moveq.l #74, %d0
241 muls.w %d0, %d4 /* %d4 = 36 * Y */
242 asr.l #8, %d4
243 subq.l #4, %d4 /* correction for (Y - 16) and rounding */
244
245 /* combine & write second pixel */
246 add.l %d4, %d1 /* %d1 = blue */
247 add.l %d4, %d2 /* %d2 = green */
248 add.l %d4, %d3 /* %d3 = red */
249 174
250 move.l %d1, %d0 /* clamping */ 175.yuv_exit:
251 or.l %d2, %d0 176 move.w %d4, (%a0) /* write (very) last pixel */
252 or.l %d3, %d0
253 asr.l #6, %d0
254 beq.s .yuv_all_ok4
255 moveq.l #63, %d0
256 cmp.l %d0, %d1
257 bls.s .yuv_blue_ok4
258 spl.b %d1
259 and.l %d0, %d1
260.yuv_blue_ok4:
261 cmp.l %d0, %d2
262 bls.s .yuv_green_ok4
263 spl.b %d2
264 and.l %d0, %d2
265.yuv_green_ok4:
266 cmp.l %d0, %d3
267 bls.s .yuv_red_ok4
268 spl.b %d3
269 and.l %d0, %d3
270.yuv_red_ok4:
271.yuv_all_ok4:
272
273 lsr.l #1, %d3 /* pack, convert to RGB565 and output */
274 lsr.l #1, %d1
275 lsl.l #6, %d3
276 or.l %d3, %d2
277 lsl.l #5, %d2
278 or.l %d2, %d1
279 move.w %d1, (%a0)
280
281 cmp.l %a1, %a5 /* run %a1 up to end of line */
282 bhi.w .yuv_line_loop2
283 177
284 movem.l (%sp), %d2-%d6/%a2-%a5 178 movem.l (%sp), %d2-%d7/%a2-%a6
285 lea.l (36, %sp), %sp /* restore registers */ 179 lea.l (44, %sp), %sp /* restore registers */
286 180
287 rts 181 rts
288.lcd_write_yuv420_lines_end: 182.yuv_end:
289 .size lcd_write_yuv420_lines, .lcd_write_yuv420_lines_end - lcd_write_yuv420_lines 183 .size lcd_write_yuv420_lines, .yuv_end - lcd_write_yuv420_lines