summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--firmware/target/arm/s3c2440/gigabeat-fx/lcd-as-meg-fx.S256
-rw-r--r--firmware/target/arm/s3c2440/gigabeat-fx/lcd-meg-fx.c5
-rw-r--r--firmware/target/arm/sandisk/sansa-e200/lcd-as-e200.S256
-rw-r--r--firmware/target/arm/sandisk/sansa-e200/lcd-e200.c7
4 files changed, 234 insertions, 290 deletions
diff --git a/firmware/target/arm/s3c2440/gigabeat-fx/lcd-as-meg-fx.S b/firmware/target/arm/s3c2440/gigabeat-fx/lcd-as-meg-fx.S
index cd509753ed..4926c7fa79 100644
--- a/firmware/target/arm/s3c2440/gigabeat-fx/lcd-as-meg-fx.S
+++ b/firmware/target/arm/s3c2440/gigabeat-fx/lcd-as-meg-fx.S
@@ -103,8 +103,7 @@ lcd_copy_buffer_rect: @
103 103
104/**************************************************************************** 104/****************************************************************************
105 * void lcd_write_yuv_420_lines(fb_data *dst, 105 * void lcd_write_yuv_420_lines(fb_data *dst,
106 * unsigned char chroma_buf[LCD_HEIGHT/2*3], 106 * unsigned char const * const src[3],
107 unsigned char const * const src[3],
108 * int width, 107 * int width,
109 * int stride); 108 * int stride);
110 * 109 *
@@ -115,189 +114,166 @@ lcd_copy_buffer_rect: @
115 * |R| |74 0 101| |Y' - 16| >> 9 114 * |R| |74 0 101| |Y' - 16| >> 9
116 * |G| = |74 -24 -51| |Cb - 128| >> 8 115 * |G| = |74 -24 -51| |Cb - 128| >> 8
117 * |B| |74 128 0| |Cr - 128| >> 9 116 * |B| |74 128 0| |Cr - 128| >> 9
117 *
118 * Write four RGB565 pixels in the following order on each loop:
119 * 1 3 + > down
120 * 2 4 \/ left
118 */ 121 */
119 .section .icode, "ax", %progbits 122 .section .icode, "ax", %progbits
120 .align 2 123 .align 2
121 .global lcd_write_yuv420_lines 124 .global lcd_write_yuv420_lines
122 .type lcd_write_yuv420_lines, %function 125 .type lcd_write_yuv420_lines, %function
123lcd_write_yuv420_lines: 126lcd_write_yuv420_lines:
124 @ r0 = dst 127 @ r0 = dst
125 @ r1 = chroma_buf 128 @ r1 = yuv_src
126 @ r2 = yuv_src 129 @ r2 = width
127 @ r3 = width 130 @ r3 = stride
128 @ [sp] = stride 131 stmfd sp!, { r4-r12 } @ save non-scratch
129 stmfd sp!, { r4-r12, lr } @ save non-scratch 132 ldmia r1, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
130 stmfd sp!, { r0, r3 } @ save dst and width
131 mov r14, #74 @ r14 = Y factor
132 ldmia r2, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
133 @ r5 = yuv_src[1] = Cb_p 133 @ r5 = yuv_src[1] = Cb_p
134 @ r6 = yuv_src[2] = Cr_p 134 @ r6 = yuv_src[2] = Cr_p
13510: @ loop line 1 @ 135 @ r1 = scratch
136 ldrb r2, [r4], #1 @ r2 = *Y'_p++; 13610: @ loop line @
137 ldrb r8, [r5], #1 @ r8 = *Cb_p++; 137 ldrb r7, [r4] @ r7 = *Y'_p;
138 ldrb r11, [r6], #1 @ r11 = *Cr_p++; 138 ldrb r8, [r5], #1 @ r8 = *Cb_p++;
139 ldrb r9, [r6], #1 @ r9 = *Cr_p++;
139 @ 140 @
140 @ compute Y 141 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
141 sub r2, r2, #16 @ r7 = Y = (Y' - 16)*74 142 add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
142 mul r7, r2, r14 @ 143 add r7, r12, r7, asl #5 @ by one less when adding - same for all
143 @ 144 @
144 sub r8, r8, #128 @ Cb -= 128 145 sub r8, r8, #128 @ Cb -= 128
145 sub r11, r11, #128 @ Cr -= 128 146 sub r9, r9, #128 @ Cr -= 128
146 @ 147 @
147 mvn r2, #23 @ compute guv 148 add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
148 mul r10, r2, r8 @ r10 = Cb*-24 149 add r10, r10, r10, asl #4 @
149 mvn r2, #50 @ 150 add r10, r10, r8, asl #3 @
150 mla r10, r2, r11, r10 @ r10 = r10 + Cr*-51 151 add r10, r10, r8, asl #4 @
151 @ 152 @
152 mov r2, #101 @ compute rv 153 add r11, r9, r9, asl #2 @ r9 = Cr*101
153 mul r9, r11, r2 @ r9 = rv = Cr*101 154 add r11, r11, r9, asl #5 @
155 add r9, r11, r9, asl #6 @
154 @ 156 @
155 @ store chromas in line buffer 157 add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
156 add r8, r8, #2 @ bu = (Cb + 2) >> 2 158 mov r8, r8, asr #2 @
157 mov r8, r8, asr #2 @ 159 add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9
158 strb r8, [r1], #1 @
159 add r9, r9, #256 @ rv = (Cr + 256) >> 9
160 mov r9, r9, asr #9 @ 160 mov r9, r9, asr #9 @
161 strb r9, [r1], #1 @ 161 rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8
162 mov r10, r10, asr #8 @ guv >>= 8 162 mov r10, r10, asr #8 @
163 strb r10, [r1], #1 @
164 @ compute R, G, and B 163 @ compute R, G, and B
165 add r2, r8, r7, asr #9 @ r2 = b = (Y >> 9) + bu 164 add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
166 add r11, r9, r7, asr #9 @ r11 = r = (Y >> 9) + rv 165 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
167 add r7, r10, r7, asr #8 @ r7 = g = (Y >> 8) + guv 166 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
168 @ 167 @
169 orr r12, r2, r11 @ check if clamping is needed... 168 orr r12, r1, r11 @ check if clamping is needed...
170 orr r12, r12, r7, asr #1 @ ...at all 169 orr r12, r12, r7, asr #1 @ ...at all
171 cmp r12, #31 @ 170 cmp r12, #31 @
172 bls 15f @ no clamp @ 171 bls 15f @ no clamp @
173 mov r12, #31 @ 172 cmp r1, #31 @ clamp b
174 cmp r12, r2 @ clamp b 173 mvnhi r1, r1, asr #31 @
175 andlo r2, r12, r2, asr #31 @ 174 andhi r1, r1, #31 @
176 eorlo r2, r2, r12 @ 175 cmp r11, #31 @ clamp r
177 cmp r12, r11 @ clamp r 176 mvnhi r11, r11, asr #31 @
178 andlo r11, r12, r11, asr #31 @ 177 andhi r11, r11, #31 @
179 eorlo r11, r11, r12 @ 178 cmp r7, #63 @ clamp g
180 cmp r12, r7, asr #1 @ clamp g 179 mvnhi r7, r7, asr #31 @
181 andlo r7, r12, r7, asr #31 @ 180 andhi r7, r7, #63 @
182 eorlo r7, r7, r12 @
183 orrlo r7, r7, r7, asl #1 @
18415: @ no clamp @ 18115: @ no clamp @
185 @ 182 @
186 orr r12, r2, r7, lsl #5 @ r4 |= (g << 5) 183 orr r12, r1, r7, lsl #5 @ r4 |= (g << 5)
187 ldrb r2, [r4], #1 @ r2 = Y' = *Y'_p++ 184 ldrb r7, [r4, r3] @ r7 = Y' = *(Y'_p + stride)
188 orr r12, r12, r11, lsl #11 @ r4 = b | (r << 11) 185 orr r12, r12, r11, lsl #11 @ r4 = b | (r << 11)
189 strh r12, [r0], #LCD_WIDTH @ store pixel 186 strh r12, [r0] @ store pixel
190 @ 187 @
191 sub r2, r2, #16 @ r7 = Y = (Y' - 16)*74 188 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
192 mul r7, r2, r14 @ next Y 189 add r12, r7, r7, asl #2 @
190 add r7, r12, r7, asl #5 @
193 @ compute R, G, and B 191 @ compute R, G, and B
194 add r2, r8, r7, asr #9 @ r2 = b = (Y >> 9) + bu 192 add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
195 add r11, r9, r7, asr #9 @ r11 = r = (Y >> 9) + rv 193 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
196 add r7, r10, r7, asr #8 @ r7 = g = (Y >> 8) + guv 194 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
197 @ 195 @
198 orr r12, r2, r11 @ check if clamping is needed... 196 orr r12, r1, r11 @ check if clamping is needed...
199 orr r12, r12, r7, asr #1 @ ...at all 197 orr r12, r12, r7, asr #1 @ ...at all
200 cmp r12, #31 @ 198 cmp r12, #31 @
201 bls 15f @ no clamp @ 199 bls 15f @ no clamp @
202 mov r12, #31 @ 200 cmp r1, #31 @ clamp b
203 cmp r12, r2 @ clamp b 201 mvnhi r1, r1, asr #31 @
204 andlo r2, r12, r2, asr #31 @ 202 andhi r1, r1, #31 @
205 eorlo r2, r2, r12 @ 203 cmp r11, #31 @ clamp r
206 cmp r12, r11 @ clamp r 204 mvnhi r11, r11, asr #31 @
207 andlo r11, r12, r11, asr #31 @ 205 andhi r11, r11, #31 @
208 eorlo r11, r11, r12 @ 206 cmp r7, #63 @ clamp g
209 cmp r12, r7, asr #1 @ clamp g 207 mvnhi r7, r7, asr #31 @
210 andlo r7, r12, r7, asr #31 @ 208 andhi r7, r7, #63 @
211 eorlo r7, r7, r12 @
212 orrlo r7, r7, r7, asl #1 @
21315: @ no clamp @ 20915: @ no clamp @
214 @ 210 @
215 orr r12, r2, r11, lsl #11 @ r4 = b | (r << 11) 211 orr r12, r1, r11, lsl #11 @ r12 = b | (r << 11)
216 orr r12, r12, r7, lsl #5 @ r4 |= (g << 5) 212 orr r12, r12, r7, lsl #5 @ r12 |= (g << 5)
217 strh r12, [r0, #LCD_WIDTH]! @ store pixel 213 ldrb r7, [r4, #1]! @ r7 = Y' = *(++Y'_p)
214 strh r12, [r0, #-2] @ store pixel
218 add r0, r0, #2*LCD_WIDTH @ 215 add r0, r0, #2*LCD_WIDTH @
219 @ 216 @
220 subs r3, r3, #2 @ 217 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
221 bgt 10b @ loop line 1 @ 218 add r12, r7, r7, asl #2 @
222 @ do second line 219 add r7, r12, r7, asl #5 @
223 @
224 ldmfd sp!, { r0, r3 } @ pop dst and width
225 sub r0, r0, #2 @ set dst to start of next line
226 sub r1, r1, r3, asl #1 @ rewind chroma pointer...
227 ldr r2, [sp, #40] @ r2 = stride
228 add r1, r1, r3, asr #1 @ ... (r1 -= width/2*3)
229 @ move sources to start of next line
230 sub r2, r2, r3 @ r2 = skip = stride - width
231 add r4, r4, r2 @ r4 = Y'_p + skip
232 @
23320: @ loop line 2 @
234 ldrb r2, [r4], #1 @ r7 = Y' = *Y'_p++
235 ldrsb r8, [r1], #1 @ reload saved chromas
236 ldrsb r9, [r1], #1 @
237 ldrsb r10, [r1], #1 @
238 @
239 sub r2, r2, #16 @ r2 = Y = (Y' - 16)*74
240 mul r7, r2, r14 @
241 @ compute R, G, and B 220 @ compute R, G, and B
242 add r2, r8, r7, asr #9 @ r2 = b = (Y >> 9) + bu 221 add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
243 add r11, r9, r7, asr #9 @ r11 = r = (Y >> 9) + rv 222 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
244 add r7, r10, r7, asr #8 @ r7 = g = (Y >> 8) + guv 223 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
245 @ 224 @
246 orr r12, r2, r11 @ check if clamping is needed... 225 orr r12, r1, r11 @ check if clamping is needed...
247 orr r12, r12, r7, asr #1 @ ...at all 226 orr r12, r12, r7, asr #1 @ ...at all
248 cmp r12, #31 @ 227 cmp r12, #31 @
249 bls 25f @ no clamp @ 228 bls 15f @ no clamp @
250 mov r12, #31 @ 229 cmp r1, #31 @ clamp b
251 cmp r12, r2 @ clamp b 230 mvnhi r1, r1, asr #31 @
252 andlo r2, r12, r2, asr #31 @ 231 andhi r1, r1, #31 @
253 eorlo r2, r2, r12 @ 232 cmp r11, #31 @ clamp r
254 cmp r12, r11 @ clamp r 233 mvnhi r11, r11, asr #31 @
255 andlo r11, r12, r11, asr #31 @ 234 andhi r11, r11, #31 @
256 eorlo r11, r11, r12 @ 235 cmp r7, #63 @ clamp g
257 cmp r12, r7, asr #1 @ clamp g 236 mvnhi r7, r7, asr #31 @
258 andlo r7, r12, r7, asr #31 @ 237 andhi r7, r7, #63 @
259 eorlo r7, r7, r12 @ 23815: @ no clamp @
260 orrlo r7, r7, r7, asl #1 @
26125: @ no clamp @
262 @
263 orr r12, r2, r11, lsl #11 @ r4 = b | (r << 11)
264 ldrb r2, [r4], #1 @ r2 = Y' = *Y'_p++
265 orr r12, r12, r7, lsl #5 @ r4 |= (g << 5)
266 strh r12, [r0], #LCD_WIDTH @ store pixel
267 @ 239 @
268 @ do second pixel 240 orr r12, r1, r7, lsl #5 @ r12 = b | (g << 5)
241 ldrb r7, [r4, r3] @ r7 = Y' = *(Y'_p + stride)
242 orr r12, r12, r11, lsl #11 @ r12 |= (r << 11)
243 strh r12, [r0] @ store pixel
269 @ 244 @
270 sub r2, r2, #16 @ r2 = Y = (Y' - 16)*74 245 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
271 mul r7, r2, r14 @ 246 add r12, r7, r7, asl #2 @
247 add r7, r12, r7, asl #5 @
272 @ compute R, G, and B 248 @ compute R, G, and B
273 add r2, r8, r7, asr #9 @ r2 = b = (Y >> 9) + bu 249 add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
274 add r11, r9, r7, asr #9 @ r11 = r = (Y >> 9) + rv 250 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
275 add r7, r10, r7, asr #8 @ r7 = g = (Y >> 8) + guv 251 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
276 @ 252 @
277 orr r12, r2, r11 @ check if clamping is needed... 253 orr r12, r1, r11 @ check if clamping is needed...
278 orr r12, r12, r7, asr #1 @ ...at all 254 orr r12, r12, r7, asr #1 @ ...at all
279 cmp r12, #31 @ 255 cmp r12, #31 @
280 bls 25f @ no clamp @ 256 bls 15f @ no clamp @
281 mov r12, #31 @ 257 cmp r1, #31 @ clamp b
282 cmp r12, r2 @ clamp b 258 mvnhi r1, r1, asr #31 @
283 andlo r2, r12, r2, asr #31 @ 259 andhi r1, r1, #31 @
284 eorlo r2, r2, r12 @ 260 cmp r11, #31 @ clamp r
285 cmp r12, r11 @ clamp r 261 mvnhi r11, r11, asr #31 @
286 andlo r11, r12, r11, asr #31 @ 262 andhi r11, r11, #31 @
287 eorlo r11, r11, r12 @ 263 cmp r7, #63 @ clamp g
288 cmp r12, r7, asr #1 @ clamp g 264 mvnhi r7, r7, asr #31 @
289 andlo r7, r12, r7, asr #31 @ 265 andhi r7, r7, #63 @
290 eorlo r7, r7, r12 @ 26615: @ no clamp @
291 orrlo r7, r7, r7, asl #1 @
29225: @ no clamp @
293 @ 267 @
294 orr r12, r2, r11, lsl #11 @ r4 = b | (r << 11) 268 orr r12, r1, r11, lsl #11 @ r12 = b | (r << 11)
295 orr r12, r12, r7, lsl #5 @ r4 |= (g << 5) 269 orr r12, r12, r7, lsl #5 @ r12 |= (g << 5)
296 strh r12, [r0, #LCD_WIDTH]! @ store pixel 270 strh r12, [r0, #-2] @ store pixel
297 add r0, r0, #2*LCD_WIDTH @ 271 add r0, r0, #2*LCD_WIDTH @
272 add r4, r4, #1 @
298 @ 273 @
299 subs r3, r3, #2 @ 274 subs r2, r2, #2 @ subtract block from width
300 bgt 20b @ loop line 2 @ 275 bgt 10b @ loop line @
301 @ 276 @
302 ldmfd sp!, { r4-r12, pc } @ restore registers and return 277 ldmfd sp!, { r4-r12 } @ restore registers and return
278 bx lr @
303 .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines 279 .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
diff --git a/firmware/target/arm/s3c2440/gigabeat-fx/lcd-meg-fx.c b/firmware/target/arm/s3c2440/gigabeat-fx/lcd-meg-fx.c
index b680b15d25..ab7c91437c 100644
--- a/firmware/target/arm/s3c2440/gigabeat-fx/lcd-meg-fx.c
+++ b/firmware/target/arm/s3c2440/gigabeat-fx/lcd-meg-fx.c
@@ -250,7 +250,6 @@ void lcd_bitmap_transparent_part(const fb_data *src, int src_x, int src_y,
250 250
251/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */ 251/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
252extern void lcd_write_yuv420_lines(fb_data *dst, 252extern void lcd_write_yuv420_lines(fb_data *dst,
253 unsigned char chroma_buf[LCD_HEIGHT/2*3],
254 unsigned char const * const src[3], 253 unsigned char const * const src[3],
255 int width, 254 int width,
256 int stride); 255 int stride);
@@ -263,7 +262,6 @@ void lcd_yuv_blit(unsigned char * const src[3],
263{ 262{
264 /* Caches for chroma data so it only need be recaculated every other 263 /* Caches for chroma data so it only need be recaculated every other
265 line */ 264 line */
266 unsigned char chroma_buf[LCD_HEIGHT/2*3]; /* 480 bytes */
267 unsigned char const * yuv_src[3]; 265 unsigned char const * yuv_src[3];
268 off_t z; 266 off_t z;
269 267
@@ -283,8 +281,7 @@ void lcd_yuv_blit(unsigned char * const src[3],
283 281
284 do 282 do
285 { 283 {
286 lcd_write_yuv420_lines(dst, chroma_buf, yuv_src, width, 284 lcd_write_yuv420_lines(dst, yuv_src, width, stride);
287 stride);
288 yuv_src[0] += stride << 1; /* Skip down two luma lines */ 285 yuv_src[0] += stride << 1; /* Skip down two luma lines */
289 yuv_src[1] += stride >> 1; /* Skip down one chroma line */ 286 yuv_src[1] += stride >> 1; /* Skip down one chroma line */
290 yuv_src[2] += stride >> 1; 287 yuv_src[2] += stride >> 1;
diff --git a/firmware/target/arm/sandisk/sansa-e200/lcd-as-e200.S b/firmware/target/arm/sandisk/sansa-e200/lcd-as-e200.S
index cd509753ed..4926c7fa79 100644
--- a/firmware/target/arm/sandisk/sansa-e200/lcd-as-e200.S
+++ b/firmware/target/arm/sandisk/sansa-e200/lcd-as-e200.S
@@ -103,8 +103,7 @@ lcd_copy_buffer_rect: @
103 103
104/**************************************************************************** 104/****************************************************************************
105 * void lcd_write_yuv_420_lines(fb_data *dst, 105 * void lcd_write_yuv_420_lines(fb_data *dst,
106 * unsigned char chroma_buf[LCD_HEIGHT/2*3], 106 * unsigned char const * const src[3],
107 unsigned char const * const src[3],
108 * int width, 107 * int width,
109 * int stride); 108 * int stride);
110 * 109 *
@@ -115,189 +114,166 @@ lcd_copy_buffer_rect: @
115 * |R| |74 0 101| |Y' - 16| >> 9 114 * |R| |74 0 101| |Y' - 16| >> 9
116 * |G| = |74 -24 -51| |Cb - 128| >> 8 115 * |G| = |74 -24 -51| |Cb - 128| >> 8
117 * |B| |74 128 0| |Cr - 128| >> 9 116 * |B| |74 128 0| |Cr - 128| >> 9
117 *
118 * Write four RGB565 pixels in the following order on each loop:
119 * 1 3 + > down
120 * 2 4 \/ left
118 */ 121 */
119 .section .icode, "ax", %progbits 122 .section .icode, "ax", %progbits
120 .align 2 123 .align 2
121 .global lcd_write_yuv420_lines 124 .global lcd_write_yuv420_lines
122 .type lcd_write_yuv420_lines, %function 125 .type lcd_write_yuv420_lines, %function
123lcd_write_yuv420_lines: 126lcd_write_yuv420_lines:
124 @ r0 = dst 127 @ r0 = dst
125 @ r1 = chroma_buf 128 @ r1 = yuv_src
126 @ r2 = yuv_src 129 @ r2 = width
127 @ r3 = width 130 @ r3 = stride
128 @ [sp] = stride 131 stmfd sp!, { r4-r12 } @ save non-scratch
129 stmfd sp!, { r4-r12, lr } @ save non-scratch 132 ldmia r1, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
130 stmfd sp!, { r0, r3 } @ save dst and width
131 mov r14, #74 @ r14 = Y factor
132 ldmia r2, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
133 @ r5 = yuv_src[1] = Cb_p 133 @ r5 = yuv_src[1] = Cb_p
134 @ r6 = yuv_src[2] = Cr_p 134 @ r6 = yuv_src[2] = Cr_p
13510: @ loop line 1 @ 135 @ r1 = scratch
136 ldrb r2, [r4], #1 @ r2 = *Y'_p++; 13610: @ loop line @
137 ldrb r8, [r5], #1 @ r8 = *Cb_p++; 137 ldrb r7, [r4] @ r7 = *Y'_p;
138 ldrb r11, [r6], #1 @ r11 = *Cr_p++; 138 ldrb r8, [r5], #1 @ r8 = *Cb_p++;
139 ldrb r9, [r6], #1 @ r9 = *Cr_p++;
139 @ 140 @
140 @ compute Y 141 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
141 sub r2, r2, #16 @ r7 = Y = (Y' - 16)*74 142 add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
142 mul r7, r2, r14 @ 143 add r7, r12, r7, asl #5 @ by one less when adding - same for all
143 @ 144 @
144 sub r8, r8, #128 @ Cb -= 128 145 sub r8, r8, #128 @ Cb -= 128
145 sub r11, r11, #128 @ Cr -= 128 146 sub r9, r9, #128 @ Cr -= 128
146 @ 147 @
147 mvn r2, #23 @ compute guv 148 add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
148 mul r10, r2, r8 @ r10 = Cb*-24 149 add r10, r10, r10, asl #4 @
149 mvn r2, #50 @ 150 add r10, r10, r8, asl #3 @
150 mla r10, r2, r11, r10 @ r10 = r10 + Cr*-51 151 add r10, r10, r8, asl #4 @
151 @ 152 @
152 mov r2, #101 @ compute rv 153 add r11, r9, r9, asl #2 @ r9 = Cr*101
153 mul r9, r11, r2 @ r9 = rv = Cr*101 154 add r11, r11, r9, asl #5 @
155 add r9, r11, r9, asl #6 @
154 @ 156 @
155 @ store chromas in line buffer 157 add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
156 add r8, r8, #2 @ bu = (Cb + 2) >> 2 158 mov r8, r8, asr #2 @
157 mov r8, r8, asr #2 @ 159 add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9
158 strb r8, [r1], #1 @
159 add r9, r9, #256 @ rv = (Cr + 256) >> 9
160 mov r9, r9, asr #9 @ 160 mov r9, r9, asr #9 @
161 strb r9, [r1], #1 @ 161 rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8
162 mov r10, r10, asr #8 @ guv >>= 8 162 mov r10, r10, asr #8 @
163 strb r10, [r1], #1 @
164 @ compute R, G, and B 163 @ compute R, G, and B
165 add r2, r8, r7, asr #9 @ r2 = b = (Y >> 9) + bu 164 add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
166 add r11, r9, r7, asr #9 @ r11 = r = (Y >> 9) + rv 165 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
167 add r7, r10, r7, asr #8 @ r7 = g = (Y >> 8) + guv 166 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
168 @ 167 @
169 orr r12, r2, r11 @ check if clamping is needed... 168 orr r12, r1, r11 @ check if clamping is needed...
170 orr r12, r12, r7, asr #1 @ ...at all 169 orr r12, r12, r7, asr #1 @ ...at all
171 cmp r12, #31 @ 170 cmp r12, #31 @
172 bls 15f @ no clamp @ 171 bls 15f @ no clamp @
173 mov r12, #31 @ 172 cmp r1, #31 @ clamp b
174 cmp r12, r2 @ clamp b 173 mvnhi r1, r1, asr #31 @
175 andlo r2, r12, r2, asr #31 @ 174 andhi r1, r1, #31 @
176 eorlo r2, r2, r12 @ 175 cmp r11, #31 @ clamp r
177 cmp r12, r11 @ clamp r 176 mvnhi r11, r11, asr #31 @
178 andlo r11, r12, r11, asr #31 @ 177 andhi r11, r11, #31 @
179 eorlo r11, r11, r12 @ 178 cmp r7, #63 @ clamp g
180 cmp r12, r7, asr #1 @ clamp g 179 mvnhi r7, r7, asr #31 @
181 andlo r7, r12, r7, asr #31 @ 180 andhi r7, r7, #63 @
182 eorlo r7, r7, r12 @
183 orrlo r7, r7, r7, asl #1 @
18415: @ no clamp @ 18115: @ no clamp @
185 @ 182 @
186 orr r12, r2, r7, lsl #5 @ r4 |= (g << 5) 183 orr r12, r1, r7, lsl #5 @ r4 |= (g << 5)
187 ldrb r2, [r4], #1 @ r2 = Y' = *Y'_p++ 184 ldrb r7, [r4, r3] @ r7 = Y' = *(Y'_p + stride)
188 orr r12, r12, r11, lsl #11 @ r4 = b | (r << 11) 185 orr r12, r12, r11, lsl #11 @ r4 = b | (r << 11)
189 strh r12, [r0], #LCD_WIDTH @ store pixel 186 strh r12, [r0] @ store pixel
190 @ 187 @
191 sub r2, r2, #16 @ r7 = Y = (Y' - 16)*74 188 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
192 mul r7, r2, r14 @ next Y 189 add r12, r7, r7, asl #2 @
190 add r7, r12, r7, asl #5 @
193 @ compute R, G, and B 191 @ compute R, G, and B
194 add r2, r8, r7, asr #9 @ r2 = b = (Y >> 9) + bu 192 add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
195 add r11, r9, r7, asr #9 @ r11 = r = (Y >> 9) + rv 193 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
196 add r7, r10, r7, asr #8 @ r7 = g = (Y >> 8) + guv 194 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
197 @ 195 @
198 orr r12, r2, r11 @ check if clamping is needed... 196 orr r12, r1, r11 @ check if clamping is needed...
199 orr r12, r12, r7, asr #1 @ ...at all 197 orr r12, r12, r7, asr #1 @ ...at all
200 cmp r12, #31 @ 198 cmp r12, #31 @
201 bls 15f @ no clamp @ 199 bls 15f @ no clamp @
202 mov r12, #31 @ 200 cmp r1, #31 @ clamp b
203 cmp r12, r2 @ clamp b 201 mvnhi r1, r1, asr #31 @
204 andlo r2, r12, r2, asr #31 @ 202 andhi r1, r1, #31 @
205 eorlo r2, r2, r12 @ 203 cmp r11, #31 @ clamp r
206 cmp r12, r11 @ clamp r 204 mvnhi r11, r11, asr #31 @
207 andlo r11, r12, r11, asr #31 @ 205 andhi r11, r11, #31 @
208 eorlo r11, r11, r12 @ 206 cmp r7, #63 @ clamp g
209 cmp r12, r7, asr #1 @ clamp g 207 mvnhi r7, r7, asr #31 @
210 andlo r7, r12, r7, asr #31 @ 208 andhi r7, r7, #63 @
211 eorlo r7, r7, r12 @
212 orrlo r7, r7, r7, asl #1 @
21315: @ no clamp @ 20915: @ no clamp @
214 @ 210 @
215 orr r12, r2, r11, lsl #11 @ r4 = b | (r << 11) 211 orr r12, r1, r11, lsl #11 @ r12 = b | (r << 11)
216 orr r12, r12, r7, lsl #5 @ r4 |= (g << 5) 212 orr r12, r12, r7, lsl #5 @ r12 |= (g << 5)
217 strh r12, [r0, #LCD_WIDTH]! @ store pixel 213 ldrb r7, [r4, #1]! @ r7 = Y' = *(++Y'_p)
214 strh r12, [r0, #-2] @ store pixel
218 add r0, r0, #2*LCD_WIDTH @ 215 add r0, r0, #2*LCD_WIDTH @
219 @ 216 @
220 subs r3, r3, #2 @ 217 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
221 bgt 10b @ loop line 1 @ 218 add r12, r7, r7, asl #2 @
222 @ do second line 219 add r7, r12, r7, asl #5 @
223 @
224 ldmfd sp!, { r0, r3 } @ pop dst and width
225 sub r0, r0, #2 @ set dst to start of next line
226 sub r1, r1, r3, asl #1 @ rewind chroma pointer...
227 ldr r2, [sp, #40] @ r2 = stride
228 add r1, r1, r3, asr #1 @ ... (r1 -= width/2*3)
229 @ move sources to start of next line
230 sub r2, r2, r3 @ r2 = skip = stride - width
231 add r4, r4, r2 @ r4 = Y'_p + skip
232 @
23320: @ loop line 2 @
234 ldrb r2, [r4], #1 @ r7 = Y' = *Y'_p++
235 ldrsb r8, [r1], #1 @ reload saved chromas
236 ldrsb r9, [r1], #1 @
237 ldrsb r10, [r1], #1 @
238 @
239 sub r2, r2, #16 @ r2 = Y = (Y' - 16)*74
240 mul r7, r2, r14 @
241 @ compute R, G, and B 220 @ compute R, G, and B
242 add r2, r8, r7, asr #9 @ r2 = b = (Y >> 9) + bu 221 add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
243 add r11, r9, r7, asr #9 @ r11 = r = (Y >> 9) + rv 222 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
244 add r7, r10, r7, asr #8 @ r7 = g = (Y >> 8) + guv 223 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
245 @ 224 @
246 orr r12, r2, r11 @ check if clamping is needed... 225 orr r12, r1, r11 @ check if clamping is needed...
247 orr r12, r12, r7, asr #1 @ ...at all 226 orr r12, r12, r7, asr #1 @ ...at all
248 cmp r12, #31 @ 227 cmp r12, #31 @
249 bls 25f @ no clamp @ 228 bls 15f @ no clamp @
250 mov r12, #31 @ 229 cmp r1, #31 @ clamp b
251 cmp r12, r2 @ clamp b 230 mvnhi r1, r1, asr #31 @
252 andlo r2, r12, r2, asr #31 @ 231 andhi r1, r1, #31 @
253 eorlo r2, r2, r12 @ 232 cmp r11, #31 @ clamp r
254 cmp r12, r11 @ clamp r 233 mvnhi r11, r11, asr #31 @
255 andlo r11, r12, r11, asr #31 @ 234 andhi r11, r11, #31 @
256 eorlo r11, r11, r12 @ 235 cmp r7, #63 @ clamp g
257 cmp r12, r7, asr #1 @ clamp g 236 mvnhi r7, r7, asr #31 @
258 andlo r7, r12, r7, asr #31 @ 237 andhi r7, r7, #63 @
259 eorlo r7, r7, r12 @ 23815: @ no clamp @
260 orrlo r7, r7, r7, asl #1 @
26125: @ no clamp @
262 @
263 orr r12, r2, r11, lsl #11 @ r4 = b | (r << 11)
264 ldrb r2, [r4], #1 @ r2 = Y' = *Y'_p++
265 orr r12, r12, r7, lsl #5 @ r4 |= (g << 5)
266 strh r12, [r0], #LCD_WIDTH @ store pixel
267 @ 239 @
268 @ do second pixel 240 orr r12, r1, r7, lsl #5 @ r12 = b | (g << 5)
241 ldrb r7, [r4, r3] @ r7 = Y' = *(Y'_p + stride)
242 orr r12, r12, r11, lsl #11 @ r12 |= (r << 11)
243 strh r12, [r0] @ store pixel
269 @ 244 @
270 sub r2, r2, #16 @ r2 = Y = (Y' - 16)*74 245 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
271 mul r7, r2, r14 @ 246 add r12, r7, r7, asl #2 @
247 add r7, r12, r7, asl #5 @
272 @ compute R, G, and B 248 @ compute R, G, and B
273 add r2, r8, r7, asr #9 @ r2 = b = (Y >> 9) + bu 249 add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
274 add r11, r9, r7, asr #9 @ r11 = r = (Y >> 9) + rv 250 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
275 add r7, r10, r7, asr #8 @ r7 = g = (Y >> 8) + guv 251 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
276 @ 252 @
277 orr r12, r2, r11 @ check if clamping is needed... 253 orr r12, r1, r11 @ check if clamping is needed...
278 orr r12, r12, r7, asr #1 @ ...at all 254 orr r12, r12, r7, asr #1 @ ...at all
279 cmp r12, #31 @ 255 cmp r12, #31 @
280 bls 25f @ no clamp @ 256 bls 15f @ no clamp @
281 mov r12, #31 @ 257 cmp r1, #31 @ clamp b
282 cmp r12, r2 @ clamp b 258 mvnhi r1, r1, asr #31 @
283 andlo r2, r12, r2, asr #31 @ 259 andhi r1, r1, #31 @
284 eorlo r2, r2, r12 @ 260 cmp r11, #31 @ clamp r
285 cmp r12, r11 @ clamp r 261 mvnhi r11, r11, asr #31 @
286 andlo r11, r12, r11, asr #31 @ 262 andhi r11, r11, #31 @
287 eorlo r11, r11, r12 @ 263 cmp r7, #63 @ clamp g
288 cmp r12, r7, asr #1 @ clamp g 264 mvnhi r7, r7, asr #31 @
289 andlo r7, r12, r7, asr #31 @ 265 andhi r7, r7, #63 @
290 eorlo r7, r7, r12 @ 26615: @ no clamp @
291 orrlo r7, r7, r7, asl #1 @
29225: @ no clamp @
293 @ 267 @
294 orr r12, r2, r11, lsl #11 @ r4 = b | (r << 11) 268 orr r12, r1, r11, lsl #11 @ r12 = b | (r << 11)
295 orr r12, r12, r7, lsl #5 @ r4 |= (g << 5) 269 orr r12, r12, r7, lsl #5 @ r12 |= (g << 5)
296 strh r12, [r0, #LCD_WIDTH]! @ store pixel 270 strh r12, [r0, #-2] @ store pixel
297 add r0, r0, #2*LCD_WIDTH @ 271 add r0, r0, #2*LCD_WIDTH @
272 add r4, r4, #1 @
298 @ 273 @
299 subs r3, r3, #2 @ 274 subs r2, r2, #2 @ subtract block from width
300 bgt 20b @ loop line 2 @ 275 bgt 10b @ loop line @
301 @ 276 @
302 ldmfd sp!, { r4-r12, pc } @ restore registers and return 277 ldmfd sp!, { r4-r12 } @ restore registers and return
278 bx lr @
303 .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines 279 .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
diff --git a/firmware/target/arm/sandisk/sansa-e200/lcd-e200.c b/firmware/target/arm/sandisk/sansa-e200/lcd-e200.c
index 31df3f9f19..3e1f74d7a1 100644
--- a/firmware/target/arm/sandisk/sansa-e200/lcd-e200.c
+++ b/firmware/target/arm/sandisk/sansa-e200/lcd-e200.c
@@ -627,7 +627,6 @@ void lcd_blit(const fb_data* data, int x, int by, int width,
627 627
628/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */ 628/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
629extern void lcd_write_yuv420_lines(fb_data *dst, 629extern void lcd_write_yuv420_lines(fb_data *dst,
630 unsigned char chroma_buf[LCD_HEIGHT/2*3],
631 unsigned char const * const src[3], 630 unsigned char const * const src[3],
632 int width, 631 int width,
633 int stride); 632 int stride);
@@ -638,9 +637,6 @@ void lcd_yuv_blit(unsigned char * const src[3],
638 int src_x, int src_y, int stride, 637 int src_x, int src_y, int stride,
639 int x, int y, int width, int height) 638 int x, int y, int width, int height)
640{ 639{
641 /* Caches for chroma data so it only need be recaculated every other
642 line */
643 static unsigned char chroma_buf[LCD_HEIGHT/2*3]; /* 330 bytes */
644 unsigned char const * yuv_src[3]; 640 unsigned char const * yuv_src[3];
645 off_t z; 641 off_t z;
646 642
@@ -661,8 +657,7 @@ void lcd_yuv_blit(unsigned char * const src[3],
661 657
662 do 658 do
663 { 659 {
664 lcd_write_yuv420_lines(dst, chroma_buf, yuv_src, width, 660 lcd_write_yuv420_lines(dst, yuv_src, width, stride);
665 stride);
666 yuv_src[0] += stride << 1; /* Skip down two luma lines */ 661 yuv_src[0] += stride << 1; /* Skip down two luma lines */
667 yuv_src[1] += stride >> 1; /* Skip down one chroma line */ 662 yuv_src[1] += stride >> 1; /* Skip down one chroma line */
668 yuv_src[2] += stride >> 1; 663 yuv_src[2] += stride >> 1;