diff options
author | Michael Sevakis <jethead71@rockbox.org> | 2007-09-12 09:02:31 +0000 |
---|---|---|
committer | Michael Sevakis <jethead71@rockbox.org> | 2007-09-12 09:02:31 +0000 |
commit | e6511d8eaaa4532ab67bd5e3086d51cf82880e05 (patch) | |
tree | 3719498755c514fe4b31802d3a89394eac035d34 | |
parent | 79244b2a3be6b1cf60574c6ef9bdcb4a08e516d2 (diff) | |
download | rockbox-e6511d8eaaa4532ab67bd5e3086d51cf82880e05.tar.gz rockbox-e6511d8eaaa4532ab67bd5e3086d51cf82880e05.zip |
Faster video rendering for e200 and Gigabeat.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@14675 a1c6a512-1295-4272-9138-f99709370657
4 files changed, 234 insertions, 290 deletions
diff --git a/firmware/target/arm/s3c2440/gigabeat-fx/lcd-as-meg-fx.S b/firmware/target/arm/s3c2440/gigabeat-fx/lcd-as-meg-fx.S index cd509753ed..4926c7fa79 100644 --- a/firmware/target/arm/s3c2440/gigabeat-fx/lcd-as-meg-fx.S +++ b/firmware/target/arm/s3c2440/gigabeat-fx/lcd-as-meg-fx.S | |||
@@ -103,8 +103,7 @@ lcd_copy_buffer_rect: @ | |||
103 | 103 | ||
104 | /**************************************************************************** | 104 | /**************************************************************************** |
105 | * void lcd_write_yuv_420_lines(fb_data *dst, | 105 | * void lcd_write_yuv_420_lines(fb_data *dst, |
106 | * unsigned char chroma_buf[LCD_HEIGHT/2*3], | 106 | * unsigned char const * const src[3], |
107 | unsigned char const * const src[3], | ||
108 | * int width, | 107 | * int width, |
109 | * int stride); | 108 | * int stride); |
110 | * | 109 | * |
@@ -115,189 +114,166 @@ lcd_copy_buffer_rect: @ | |||
115 | * |R| |74 0 101| |Y' - 16| >> 9 | 114 | * |R| |74 0 101| |Y' - 16| >> 9 |
116 | * |G| = |74 -24 -51| |Cb - 128| >> 8 | 115 | * |G| = |74 -24 -51| |Cb - 128| >> 8 |
117 | * |B| |74 128 0| |Cr - 128| >> 9 | 116 | * |B| |74 128 0| |Cr - 128| >> 9 |
117 | * | ||
118 | * Write four RGB565 pixels in the following order on each loop: | ||
119 | * 1 3 + > down | ||
120 | * 2 4 \/ left | ||
118 | */ | 121 | */ |
119 | .section .icode, "ax", %progbits | 122 | .section .icode, "ax", %progbits |
120 | .align 2 | 123 | .align 2 |
121 | .global lcd_write_yuv420_lines | 124 | .global lcd_write_yuv420_lines |
122 | .type lcd_write_yuv420_lines, %function | 125 | .type lcd_write_yuv420_lines, %function |
123 | lcd_write_yuv420_lines: | 126 | lcd_write_yuv420_lines: |
124 | @ r0 = dst | 127 | @ r0 = dst |
125 | @ r1 = chroma_buf | 128 | @ r1 = yuv_src |
126 | @ r2 = yuv_src | 129 | @ r2 = width |
127 | @ r3 = width | 130 | @ r3 = stride |
128 | @ [sp] = stride | 131 | stmfd sp!, { r4-r12 } @ save non-scratch |
129 | stmfd sp!, { r4-r12, lr } @ save non-scratch | 132 | ldmia r1, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p |
130 | stmfd sp!, { r0, r3 } @ save dst and width | ||
131 | mov r14, #74 @ r14 = Y factor | ||
132 | ldmia r2, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p | ||
133 | @ r5 = yuv_src[1] = Cb_p | 133 | @ r5 = yuv_src[1] = Cb_p |
134 | @ r6 = yuv_src[2] = Cr_p | 134 | @ r6 = yuv_src[2] = Cr_p |
135 | 10: @ loop line 1 @ | 135 | @ r1 = scratch |
136 | ldrb r2, [r4], #1 @ r2 = *Y'_p++; | 136 | 10: @ loop line @ |
137 | ldrb r8, [r5], #1 @ r8 = *Cb_p++; | 137 | ldrb r7, [r4] @ r7 = *Y'_p; |
138 | ldrb r11, [r6], #1 @ r11 = *Cr_p++; | 138 | ldrb r8, [r5], #1 @ r8 = *Cb_p++; |
139 | ldrb r9, [r6], #1 @ r9 = *Cr_p++; | ||
139 | @ | 140 | @ |
140 | @ compute Y | 141 | sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 |
141 | sub r2, r2, #16 @ r7 = Y = (Y' - 16)*74 | 142 | add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right |
142 | mul r7, r2, r14 @ | 143 | add r7, r12, r7, asl #5 @ by one less when adding - same for all |
143 | @ | 144 | @ |
144 | sub r8, r8, #128 @ Cb -= 128 | 145 | sub r8, r8, #128 @ Cb -= 128 |
145 | sub r11, r11, #128 @ Cr -= 128 | 146 | sub r9, r9, #128 @ Cr -= 128 |
146 | @ | 147 | @ |
147 | mvn r2, #23 @ compute guv | 148 | add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24 |
148 | mul r10, r2, r8 @ r10 = Cb*-24 | 149 | add r10, r10, r10, asl #4 @ |
149 | mvn r2, #50 @ | 150 | add r10, r10, r8, asl #3 @ |
150 | mla r10, r2, r11, r10 @ r10 = r10 + Cr*-51 | 151 | add r10, r10, r8, asl #4 @ |
151 | @ | 152 | @ |
152 | mov r2, #101 @ compute rv | 153 | add r11, r9, r9, asl #2 @ r9 = Cr*101 |
153 | mul r9, r11, r2 @ r9 = rv = Cr*101 | 154 | add r11, r11, r9, asl #5 @ |
155 | add r9, r11, r9, asl #6 @ | ||
154 | @ | 156 | @ |
155 | @ store chromas in line buffer | 157 | add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8 |
156 | add r8, r8, #2 @ bu = (Cb + 2) >> 2 | 158 | mov r8, r8, asr #2 @ |
157 | mov r8, r8, asr #2 @ | 159 | add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9 |
158 | strb r8, [r1], #1 @ | ||
159 | add r9, r9, #256 @ rv = (Cr + 256) >> 9 | ||
160 | mov r9, r9, asr #9 @ | 160 | mov r9, r9, asr #9 @ |
161 | strb r9, [r1], #1 @ | 161 | rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8 |
162 | mov r10, r10, asr #8 @ guv >>= 8 | 162 | mov r10, r10, asr #8 @ |
163 | strb r10, [r1], #1 @ | ||
164 | @ compute R, G, and B | 163 | @ compute R, G, and B |
165 | add r2, r8, r7, asr #9 @ r2 = b = (Y >> 9) + bu | 164 | add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu |
166 | add r11, r9, r7, asr #9 @ r11 = r = (Y >> 9) + rv | 165 | add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv |
167 | add r7, r10, r7, asr #8 @ r7 = g = (Y >> 8) + guv | 166 | add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv |
168 | @ | 167 | @ |
169 | orr r12, r2, r11 @ check if clamping is needed... | 168 | orr r12, r1, r11 @ check if clamping is needed... |
170 | orr r12, r12, r7, asr #1 @ ...at all | 169 | orr r12, r12, r7, asr #1 @ ...at all |
171 | cmp r12, #31 @ | 170 | cmp r12, #31 @ |
172 | bls 15f @ no clamp @ | 171 | bls 15f @ no clamp @ |
173 | mov r12, #31 @ | 172 | cmp r1, #31 @ clamp b |
174 | cmp r12, r2 @ clamp b | 173 | mvnhi r1, r1, asr #31 @ |
175 | andlo r2, r12, r2, asr #31 @ | 174 | andhi r1, r1, #31 @ |
176 | eorlo r2, r2, r12 @ | 175 | cmp r11, #31 @ clamp r |
177 | cmp r12, r11 @ clamp r | 176 | mvnhi r11, r11, asr #31 @ |
178 | andlo r11, r12, r11, asr #31 @ | 177 | andhi r11, r11, #31 @ |
179 | eorlo r11, r11, r12 @ | 178 | cmp r7, #63 @ clamp g |
180 | cmp r12, r7, asr #1 @ clamp g | 179 | mvnhi r7, r7, asr #31 @ |
181 | andlo r7, r12, r7, asr #31 @ | 180 | andhi r7, r7, #63 @ |
182 | eorlo r7, r7, r12 @ | ||
183 | orrlo r7, r7, r7, asl #1 @ | ||
184 | 15: @ no clamp @ | 181 | 15: @ no clamp @ |
185 | @ | 182 | @ |
186 | orr r12, r2, r7, lsl #5 @ r4 |= (g << 5) | 183 | orr r12, r1, r7, lsl #5 @ r4 |= (g << 5) |
187 | ldrb r2, [r4], #1 @ r2 = Y' = *Y'_p++ | 184 | ldrb r7, [r4, r3] @ r7 = Y' = *(Y'_p + stride) |
188 | orr r12, r12, r11, lsl #11 @ r4 = b | (r << 11) | 185 | orr r12, r12, r11, lsl #11 @ r4 = b | (r << 11) |
189 | strh r12, [r0], #LCD_WIDTH @ store pixel | 186 | strh r12, [r0] @ store pixel |
190 | @ | 187 | @ |
191 | sub r2, r2, #16 @ r7 = Y = (Y' - 16)*74 | 188 | sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 |
192 | mul r7, r2, r14 @ next Y | 189 | add r12, r7, r7, asl #2 @ |
190 | add r7, r12, r7, asl #5 @ | ||
193 | @ compute R, G, and B | 191 | @ compute R, G, and B |
194 | add r2, r8, r7, asr #9 @ r2 = b = (Y >> 9) + bu | 192 | add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu |
195 | add r11, r9, r7, asr #9 @ r11 = r = (Y >> 9) + rv | 193 | add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv |
196 | add r7, r10, r7, asr #8 @ r7 = g = (Y >> 8) + guv | 194 | add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv |
197 | @ | 195 | @ |
198 | orr r12, r2, r11 @ check if clamping is needed... | 196 | orr r12, r1, r11 @ check if clamping is needed... |
199 | orr r12, r12, r7, asr #1 @ ...at all | 197 | orr r12, r12, r7, asr #1 @ ...at all |
200 | cmp r12, #31 @ | 198 | cmp r12, #31 @ |
201 | bls 15f @ no clamp @ | 199 | bls 15f @ no clamp @ |
202 | mov r12, #31 @ | 200 | cmp r1, #31 @ clamp b |
203 | cmp r12, r2 @ clamp b | 201 | mvnhi r1, r1, asr #31 @ |
204 | andlo r2, r12, r2, asr #31 @ | 202 | andhi r1, r1, #31 @ |
205 | eorlo r2, r2, r12 @ | 203 | cmp r11, #31 @ clamp r |
206 | cmp r12, r11 @ clamp r | 204 | mvnhi r11, r11, asr #31 @ |
207 | andlo r11, r12, r11, asr #31 @ | 205 | andhi r11, r11, #31 @ |
208 | eorlo r11, r11, r12 @ | 206 | cmp r7, #63 @ clamp g |
209 | cmp r12, r7, asr #1 @ clamp g | 207 | mvnhi r7, r7, asr #31 @ |
210 | andlo r7, r12, r7, asr #31 @ | 208 | andhi r7, r7, #63 @ |
211 | eorlo r7, r7, r12 @ | ||
212 | orrlo r7, r7, r7, asl #1 @ | ||
213 | 15: @ no clamp @ | 209 | 15: @ no clamp @ |
214 | @ | 210 | @ |
215 | orr r12, r2, r11, lsl #11 @ r4 = b | (r << 11) | 211 | orr r12, r1, r11, lsl #11 @ r12 = b | (r << 11) |
216 | orr r12, r12, r7, lsl #5 @ r4 |= (g << 5) | 212 | orr r12, r12, r7, lsl #5 @ r12 |= (g << 5) |
217 | strh r12, [r0, #LCD_WIDTH]! @ store pixel | 213 | ldrb r7, [r4, #1]! @ r7 = Y' = *(++Y'_p) |
214 | strh r12, [r0, #-2] @ store pixel | ||
218 | add r0, r0, #2*LCD_WIDTH @ | 215 | add r0, r0, #2*LCD_WIDTH @ |
219 | @ | 216 | @ |
220 | subs r3, r3, #2 @ | 217 | sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 |
221 | bgt 10b @ loop line 1 @ | 218 | add r12, r7, r7, asl #2 @ |
222 | @ do second line | 219 | add r7, r12, r7, asl #5 @ |
223 | @ | ||
224 | ldmfd sp!, { r0, r3 } @ pop dst and width | ||
225 | sub r0, r0, #2 @ set dst to start of next line | ||
226 | sub r1, r1, r3, asl #1 @ rewind chroma pointer... | ||
227 | ldr r2, [sp, #40] @ r2 = stride | ||
228 | add r1, r1, r3, asr #1 @ ... (r1 -= width/2*3) | ||
229 | @ move sources to start of next line | ||
230 | sub r2, r2, r3 @ r2 = skip = stride - width | ||
231 | add r4, r4, r2 @ r4 = Y'_p + skip | ||
232 | @ | ||
233 | 20: @ loop line 2 @ | ||
234 | ldrb r2, [r4], #1 @ r7 = Y' = *Y'_p++ | ||
235 | ldrsb r8, [r1], #1 @ reload saved chromas | ||
236 | ldrsb r9, [r1], #1 @ | ||
237 | ldrsb r10, [r1], #1 @ | ||
238 | @ | ||
239 | sub r2, r2, #16 @ r2 = Y = (Y' - 16)*74 | ||
240 | mul r7, r2, r14 @ | ||
241 | @ compute R, G, and B | 220 | @ compute R, G, and B |
242 | add r2, r8, r7, asr #9 @ r2 = b = (Y >> 9) + bu | 221 | add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu |
243 | add r11, r9, r7, asr #9 @ r11 = r = (Y >> 9) + rv | 222 | add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv |
244 | add r7, r10, r7, asr #8 @ r7 = g = (Y >> 8) + guv | 223 | add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv |
245 | @ | 224 | @ |
246 | orr r12, r2, r11 @ check if clamping is needed... | 225 | orr r12, r1, r11 @ check if clamping is needed... |
247 | orr r12, r12, r7, asr #1 @ ...at all | 226 | orr r12, r12, r7, asr #1 @ ...at all |
248 | cmp r12, #31 @ | 227 | cmp r12, #31 @ |
249 | bls 25f @ no clamp @ | 228 | bls 15f @ no clamp @ |
250 | mov r12, #31 @ | 229 | cmp r1, #31 @ clamp b |
251 | cmp r12, r2 @ clamp b | 230 | mvnhi r1, r1, asr #31 @ |
252 | andlo r2, r12, r2, asr #31 @ | 231 | andhi r1, r1, #31 @ |
253 | eorlo r2, r2, r12 @ | 232 | cmp r11, #31 @ clamp r |
254 | cmp r12, r11 @ clamp r | 233 | mvnhi r11, r11, asr #31 @ |
255 | andlo r11, r12, r11, asr #31 @ | 234 | andhi r11, r11, #31 @ |
256 | eorlo r11, r11, r12 @ | 235 | cmp r7, #63 @ clamp g |
257 | cmp r12, r7, asr #1 @ clamp g | 236 | mvnhi r7, r7, asr #31 @ |
258 | andlo r7, r12, r7, asr #31 @ | 237 | andhi r7, r7, #63 @ |
259 | eorlo r7, r7, r12 @ | 238 | 15: @ no clamp @ |
260 | orrlo r7, r7, r7, asl #1 @ | ||
261 | 25: @ no clamp @ | ||
262 | @ | ||
263 | orr r12, r2, r11, lsl #11 @ r4 = b | (r << 11) | ||
264 | ldrb r2, [r4], #1 @ r2 = Y' = *Y'_p++ | ||
265 | orr r12, r12, r7, lsl #5 @ r4 |= (g << 5) | ||
266 | strh r12, [r0], #LCD_WIDTH @ store pixel | ||
267 | @ | 239 | @ |
268 | @ do second pixel | 240 | orr r12, r1, r7, lsl #5 @ r12 = b | (g << 5) |
241 | ldrb r7, [r4, r3] @ r7 = Y' = *(Y'_p + stride) | ||
242 | orr r12, r12, r11, lsl #11 @ r12 |= (r << 11) | ||
243 | strh r12, [r0] @ store pixel | ||
269 | @ | 244 | @ |
270 | sub r2, r2, #16 @ r2 = Y = (Y' - 16)*74 | 245 | sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 |
271 | mul r7, r2, r14 @ | 246 | add r12, r7, r7, asl #2 @ |
247 | add r7, r12, r7, asl #5 @ | ||
272 | @ compute R, G, and B | 248 | @ compute R, G, and B |
273 | add r2, r8, r7, asr #9 @ r2 = b = (Y >> 9) + bu | 249 | add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu |
274 | add r11, r9, r7, asr #9 @ r11 = r = (Y >> 9) + rv | 250 | add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv |
275 | add r7, r10, r7, asr #8 @ r7 = g = (Y >> 8) + guv | 251 | add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv |
276 | @ | 252 | @ |
277 | orr r12, r2, r11 @ check if clamping is needed... | 253 | orr r12, r1, r11 @ check if clamping is needed... |
278 | orr r12, r12, r7, asr #1 @ ...at all | 254 | orr r12, r12, r7, asr #1 @ ...at all |
279 | cmp r12, #31 @ | 255 | cmp r12, #31 @ |
280 | bls 25f @ no clamp @ | 256 | bls 15f @ no clamp @ |
281 | mov r12, #31 @ | 257 | cmp r1, #31 @ clamp b |
282 | cmp r12, r2 @ clamp b | 258 | mvnhi r1, r1, asr #31 @ |
283 | andlo r2, r12, r2, asr #31 @ | 259 | andhi r1, r1, #31 @ |
284 | eorlo r2, r2, r12 @ | 260 | cmp r11, #31 @ clamp r |
285 | cmp r12, r11 @ clamp r | 261 | mvnhi r11, r11, asr #31 @ |
286 | andlo r11, r12, r11, asr #31 @ | 262 | andhi r11, r11, #31 @ |
287 | eorlo r11, r11, r12 @ | 263 | cmp r7, #63 @ clamp g |
288 | cmp r12, r7, asr #1 @ clamp g | 264 | mvnhi r7, r7, asr #31 @ |
289 | andlo r7, r12, r7, asr #31 @ | 265 | andhi r7, r7, #63 @ |
290 | eorlo r7, r7, r12 @ | 266 | 15: @ no clamp @ |
291 | orrlo r7, r7, r7, asl #1 @ | ||
292 | 25: @ no clamp @ | ||
293 | @ | 267 | @ |
294 | orr r12, r2, r11, lsl #11 @ r4 = b | (r << 11) | 268 | orr r12, r1, r11, lsl #11 @ r12 = b | (r << 11) |
295 | orr r12, r12, r7, lsl #5 @ r4 |= (g << 5) | 269 | orr r12, r12, r7, lsl #5 @ r12 |= (g << 5) |
296 | strh r12, [r0, #LCD_WIDTH]! @ store pixel | 270 | strh r12, [r0, #-2] @ store pixel |
297 | add r0, r0, #2*LCD_WIDTH @ | 271 | add r0, r0, #2*LCD_WIDTH @ |
272 | add r4, r4, #1 @ | ||
298 | @ | 273 | @ |
299 | subs r3, r3, #2 @ | 274 | subs r2, r2, #2 @ subtract block from width |
300 | bgt 20b @ loop line 2 @ | 275 | bgt 10b @ loop line @ |
301 | @ | 276 | @ |
302 | ldmfd sp!, { r4-r12, pc } @ restore registers and return | 277 | ldmfd sp!, { r4-r12 } @ restore registers and return |
278 | bx lr @ | ||
303 | .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines | 279 | .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines |
diff --git a/firmware/target/arm/s3c2440/gigabeat-fx/lcd-meg-fx.c b/firmware/target/arm/s3c2440/gigabeat-fx/lcd-meg-fx.c index b680b15d25..ab7c91437c 100644 --- a/firmware/target/arm/s3c2440/gigabeat-fx/lcd-meg-fx.c +++ b/firmware/target/arm/s3c2440/gigabeat-fx/lcd-meg-fx.c | |||
@@ -250,7 +250,6 @@ void lcd_bitmap_transparent_part(const fb_data *src, int src_x, int src_y, | |||
250 | 250 | ||
251 | /* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */ | 251 | /* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */ |
252 | extern void lcd_write_yuv420_lines(fb_data *dst, | 252 | extern void lcd_write_yuv420_lines(fb_data *dst, |
253 | unsigned char chroma_buf[LCD_HEIGHT/2*3], | ||
254 | unsigned char const * const src[3], | 253 | unsigned char const * const src[3], |
255 | int width, | 254 | int width, |
256 | int stride); | 255 | int stride); |
@@ -263,7 +262,6 @@ void lcd_yuv_blit(unsigned char * const src[3], | |||
263 | { | 262 | { |
264 | /* Caches for chroma data so it only need be recaculated every other | 263 | /* Caches for chroma data so it only need be recaculated every other |
265 | line */ | 264 | line */ |
266 | unsigned char chroma_buf[LCD_HEIGHT/2*3]; /* 480 bytes */ | ||
267 | unsigned char const * yuv_src[3]; | 265 | unsigned char const * yuv_src[3]; |
268 | off_t z; | 266 | off_t z; |
269 | 267 | ||
@@ -283,8 +281,7 @@ void lcd_yuv_blit(unsigned char * const src[3], | |||
283 | 281 | ||
284 | do | 282 | do |
285 | { | 283 | { |
286 | lcd_write_yuv420_lines(dst, chroma_buf, yuv_src, width, | 284 | lcd_write_yuv420_lines(dst, yuv_src, width, stride); |
287 | stride); | ||
288 | yuv_src[0] += stride << 1; /* Skip down two luma lines */ | 285 | yuv_src[0] += stride << 1; /* Skip down two luma lines */ |
289 | yuv_src[1] += stride >> 1; /* Skip down one chroma line */ | 286 | yuv_src[1] += stride >> 1; /* Skip down one chroma line */ |
290 | yuv_src[2] += stride >> 1; | 287 | yuv_src[2] += stride >> 1; |
diff --git a/firmware/target/arm/sandisk/sansa-e200/lcd-as-e200.S b/firmware/target/arm/sandisk/sansa-e200/lcd-as-e200.S index cd509753ed..4926c7fa79 100644 --- a/firmware/target/arm/sandisk/sansa-e200/lcd-as-e200.S +++ b/firmware/target/arm/sandisk/sansa-e200/lcd-as-e200.S | |||
@@ -103,8 +103,7 @@ lcd_copy_buffer_rect: @ | |||
103 | 103 | ||
104 | /**************************************************************************** | 104 | /**************************************************************************** |
105 | * void lcd_write_yuv_420_lines(fb_data *dst, | 105 | * void lcd_write_yuv_420_lines(fb_data *dst, |
106 | * unsigned char chroma_buf[LCD_HEIGHT/2*3], | 106 | * unsigned char const * const src[3], |
107 | unsigned char const * const src[3], | ||
108 | * int width, | 107 | * int width, |
109 | * int stride); | 108 | * int stride); |
110 | * | 109 | * |
@@ -115,189 +114,166 @@ lcd_copy_buffer_rect: @ | |||
115 | * |R| |74 0 101| |Y' - 16| >> 9 | 114 | * |R| |74 0 101| |Y' - 16| >> 9 |
116 | * |G| = |74 -24 -51| |Cb - 128| >> 8 | 115 | * |G| = |74 -24 -51| |Cb - 128| >> 8 |
117 | * |B| |74 128 0| |Cr - 128| >> 9 | 116 | * |B| |74 128 0| |Cr - 128| >> 9 |
117 | * | ||
118 | * Write four RGB565 pixels in the following order on each loop: | ||
119 | * 1 3 + > down | ||
120 | * 2 4 \/ left | ||
118 | */ | 121 | */ |
119 | .section .icode, "ax", %progbits | 122 | .section .icode, "ax", %progbits |
120 | .align 2 | 123 | .align 2 |
121 | .global lcd_write_yuv420_lines | 124 | .global lcd_write_yuv420_lines |
122 | .type lcd_write_yuv420_lines, %function | 125 | .type lcd_write_yuv420_lines, %function |
123 | lcd_write_yuv420_lines: | 126 | lcd_write_yuv420_lines: |
124 | @ r0 = dst | 127 | @ r0 = dst |
125 | @ r1 = chroma_buf | 128 | @ r1 = yuv_src |
126 | @ r2 = yuv_src | 129 | @ r2 = width |
127 | @ r3 = width | 130 | @ r3 = stride |
128 | @ [sp] = stride | 131 | stmfd sp!, { r4-r12 } @ save non-scratch |
129 | stmfd sp!, { r4-r12, lr } @ save non-scratch | 132 | ldmia r1, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p |
130 | stmfd sp!, { r0, r3 } @ save dst and width | ||
131 | mov r14, #74 @ r14 = Y factor | ||
132 | ldmia r2, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p | ||
133 | @ r5 = yuv_src[1] = Cb_p | 133 | @ r5 = yuv_src[1] = Cb_p |
134 | @ r6 = yuv_src[2] = Cr_p | 134 | @ r6 = yuv_src[2] = Cr_p |
135 | 10: @ loop line 1 @ | 135 | @ r1 = scratch |
136 | ldrb r2, [r4], #1 @ r2 = *Y'_p++; | 136 | 10: @ loop line @ |
137 | ldrb r8, [r5], #1 @ r8 = *Cb_p++; | 137 | ldrb r7, [r4] @ r7 = *Y'_p; |
138 | ldrb r11, [r6], #1 @ r11 = *Cr_p++; | 138 | ldrb r8, [r5], #1 @ r8 = *Cb_p++; |
139 | ldrb r9, [r6], #1 @ r9 = *Cr_p++; | ||
139 | @ | 140 | @ |
140 | @ compute Y | 141 | sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 |
141 | sub r2, r2, #16 @ r7 = Y = (Y' - 16)*74 | 142 | add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right |
142 | mul r7, r2, r14 @ | 143 | add r7, r12, r7, asl #5 @ by one less when adding - same for all |
143 | @ | 144 | @ |
144 | sub r8, r8, #128 @ Cb -= 128 | 145 | sub r8, r8, #128 @ Cb -= 128 |
145 | sub r11, r11, #128 @ Cr -= 128 | 146 | sub r9, r9, #128 @ Cr -= 128 |
146 | @ | 147 | @ |
147 | mvn r2, #23 @ compute guv | 148 | add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24 |
148 | mul r10, r2, r8 @ r10 = Cb*-24 | 149 | add r10, r10, r10, asl #4 @ |
149 | mvn r2, #50 @ | 150 | add r10, r10, r8, asl #3 @ |
150 | mla r10, r2, r11, r10 @ r10 = r10 + Cr*-51 | 151 | add r10, r10, r8, asl #4 @ |
151 | @ | 152 | @ |
152 | mov r2, #101 @ compute rv | 153 | add r11, r9, r9, asl #2 @ r9 = Cr*101 |
153 | mul r9, r11, r2 @ r9 = rv = Cr*101 | 154 | add r11, r11, r9, asl #5 @ |
155 | add r9, r11, r9, asl #6 @ | ||
154 | @ | 156 | @ |
155 | @ store chromas in line buffer | 157 | add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8 |
156 | add r8, r8, #2 @ bu = (Cb + 2) >> 2 | 158 | mov r8, r8, asr #2 @ |
157 | mov r8, r8, asr #2 @ | 159 | add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9 |
158 | strb r8, [r1], #1 @ | ||
159 | add r9, r9, #256 @ rv = (Cr + 256) >> 9 | ||
160 | mov r9, r9, asr #9 @ | 160 | mov r9, r9, asr #9 @ |
161 | strb r9, [r1], #1 @ | 161 | rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8 |
162 | mov r10, r10, asr #8 @ guv >>= 8 | 162 | mov r10, r10, asr #8 @ |
163 | strb r10, [r1], #1 @ | ||
164 | @ compute R, G, and B | 163 | @ compute R, G, and B |
165 | add r2, r8, r7, asr #9 @ r2 = b = (Y >> 9) + bu | 164 | add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu |
166 | add r11, r9, r7, asr #9 @ r11 = r = (Y >> 9) + rv | 165 | add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv |
167 | add r7, r10, r7, asr #8 @ r7 = g = (Y >> 8) + guv | 166 | add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv |
168 | @ | 167 | @ |
169 | orr r12, r2, r11 @ check if clamping is needed... | 168 | orr r12, r1, r11 @ check if clamping is needed... |
170 | orr r12, r12, r7, asr #1 @ ...at all | 169 | orr r12, r12, r7, asr #1 @ ...at all |
171 | cmp r12, #31 @ | 170 | cmp r12, #31 @ |
172 | bls 15f @ no clamp @ | 171 | bls 15f @ no clamp @ |
173 | mov r12, #31 @ | 172 | cmp r1, #31 @ clamp b |
174 | cmp r12, r2 @ clamp b | 173 | mvnhi r1, r1, asr #31 @ |
175 | andlo r2, r12, r2, asr #31 @ | 174 | andhi r1, r1, #31 @ |
176 | eorlo r2, r2, r12 @ | 175 | cmp r11, #31 @ clamp r |
177 | cmp r12, r11 @ clamp r | 176 | mvnhi r11, r11, asr #31 @ |
178 | andlo r11, r12, r11, asr #31 @ | 177 | andhi r11, r11, #31 @ |
179 | eorlo r11, r11, r12 @ | 178 | cmp r7, #63 @ clamp g |
180 | cmp r12, r7, asr #1 @ clamp g | 179 | mvnhi r7, r7, asr #31 @ |
181 | andlo r7, r12, r7, asr #31 @ | 180 | andhi r7, r7, #63 @ |
182 | eorlo r7, r7, r12 @ | ||
183 | orrlo r7, r7, r7, asl #1 @ | ||
184 | 15: @ no clamp @ | 181 | 15: @ no clamp @ |
185 | @ | 182 | @ |
186 | orr r12, r2, r7, lsl #5 @ r4 |= (g << 5) | 183 | orr r12, r1, r7, lsl #5 @ r4 |= (g << 5) |
187 | ldrb r2, [r4], #1 @ r2 = Y' = *Y'_p++ | 184 | ldrb r7, [r4, r3] @ r7 = Y' = *(Y'_p + stride) |
188 | orr r12, r12, r11, lsl #11 @ r4 = b | (r << 11) | 185 | orr r12, r12, r11, lsl #11 @ r4 = b | (r << 11) |
189 | strh r12, [r0], #LCD_WIDTH @ store pixel | 186 | strh r12, [r0] @ store pixel |
190 | @ | 187 | @ |
191 | sub r2, r2, #16 @ r7 = Y = (Y' - 16)*74 | 188 | sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 |
192 | mul r7, r2, r14 @ next Y | 189 | add r12, r7, r7, asl #2 @ |
190 | add r7, r12, r7, asl #5 @ | ||
193 | @ compute R, G, and B | 191 | @ compute R, G, and B |
194 | add r2, r8, r7, asr #9 @ r2 = b = (Y >> 9) + bu | 192 | add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu |
195 | add r11, r9, r7, asr #9 @ r11 = r = (Y >> 9) + rv | 193 | add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv |
196 | add r7, r10, r7, asr #8 @ r7 = g = (Y >> 8) + guv | 194 | add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv |
197 | @ | 195 | @ |
198 | orr r12, r2, r11 @ check if clamping is needed... | 196 | orr r12, r1, r11 @ check if clamping is needed... |
199 | orr r12, r12, r7, asr #1 @ ...at all | 197 | orr r12, r12, r7, asr #1 @ ...at all |
200 | cmp r12, #31 @ | 198 | cmp r12, #31 @ |
201 | bls 15f @ no clamp @ | 199 | bls 15f @ no clamp @ |
202 | mov r12, #31 @ | 200 | cmp r1, #31 @ clamp b |
203 | cmp r12, r2 @ clamp b | 201 | mvnhi r1, r1, asr #31 @ |
204 | andlo r2, r12, r2, asr #31 @ | 202 | andhi r1, r1, #31 @ |
205 | eorlo r2, r2, r12 @ | 203 | cmp r11, #31 @ clamp r |
206 | cmp r12, r11 @ clamp r | 204 | mvnhi r11, r11, asr #31 @ |
207 | andlo r11, r12, r11, asr #31 @ | 205 | andhi r11, r11, #31 @ |
208 | eorlo r11, r11, r12 @ | 206 | cmp r7, #63 @ clamp g |
209 | cmp r12, r7, asr #1 @ clamp g | 207 | mvnhi r7, r7, asr #31 @ |
210 | andlo r7, r12, r7, asr #31 @ | 208 | andhi r7, r7, #63 @ |
211 | eorlo r7, r7, r12 @ | ||
212 | orrlo r7, r7, r7, asl #1 @ | ||
213 | 15: @ no clamp @ | 209 | 15: @ no clamp @ |
214 | @ | 210 | @ |
215 | orr r12, r2, r11, lsl #11 @ r4 = b | (r << 11) | 211 | orr r12, r1, r11, lsl #11 @ r12 = b | (r << 11) |
216 | orr r12, r12, r7, lsl #5 @ r4 |= (g << 5) | 212 | orr r12, r12, r7, lsl #5 @ r12 |= (g << 5) |
217 | strh r12, [r0, #LCD_WIDTH]! @ store pixel | 213 | ldrb r7, [r4, #1]! @ r7 = Y' = *(++Y'_p) |
214 | strh r12, [r0, #-2] @ store pixel | ||
218 | add r0, r0, #2*LCD_WIDTH @ | 215 | add r0, r0, #2*LCD_WIDTH @ |
219 | @ | 216 | @ |
220 | subs r3, r3, #2 @ | 217 | sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 |
221 | bgt 10b @ loop line 1 @ | 218 | add r12, r7, r7, asl #2 @ |
222 | @ do second line | 219 | add r7, r12, r7, asl #5 @ |
223 | @ | ||
224 | ldmfd sp!, { r0, r3 } @ pop dst and width | ||
225 | sub r0, r0, #2 @ set dst to start of next line | ||
226 | sub r1, r1, r3, asl #1 @ rewind chroma pointer... | ||
227 | ldr r2, [sp, #40] @ r2 = stride | ||
228 | add r1, r1, r3, asr #1 @ ... (r1 -= width/2*3) | ||
229 | @ move sources to start of next line | ||
230 | sub r2, r2, r3 @ r2 = skip = stride - width | ||
231 | add r4, r4, r2 @ r4 = Y'_p + skip | ||
232 | @ | ||
233 | 20: @ loop line 2 @ | ||
234 | ldrb r2, [r4], #1 @ r7 = Y' = *Y'_p++ | ||
235 | ldrsb r8, [r1], #1 @ reload saved chromas | ||
236 | ldrsb r9, [r1], #1 @ | ||
237 | ldrsb r10, [r1], #1 @ | ||
238 | @ | ||
239 | sub r2, r2, #16 @ r2 = Y = (Y' - 16)*74 | ||
240 | mul r7, r2, r14 @ | ||
241 | @ compute R, G, and B | 220 | @ compute R, G, and B |
242 | add r2, r8, r7, asr #9 @ r2 = b = (Y >> 9) + bu | 221 | add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu |
243 | add r11, r9, r7, asr #9 @ r11 = r = (Y >> 9) + rv | 222 | add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv |
244 | add r7, r10, r7, asr #8 @ r7 = g = (Y >> 8) + guv | 223 | add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv |
245 | @ | 224 | @ |
246 | orr r12, r2, r11 @ check if clamping is needed... | 225 | orr r12, r1, r11 @ check if clamping is needed... |
247 | orr r12, r12, r7, asr #1 @ ...at all | 226 | orr r12, r12, r7, asr #1 @ ...at all |
248 | cmp r12, #31 @ | 227 | cmp r12, #31 @ |
249 | bls 25f @ no clamp @ | 228 | bls 15f @ no clamp @ |
250 | mov r12, #31 @ | 229 | cmp r1, #31 @ clamp b |
251 | cmp r12, r2 @ clamp b | 230 | mvnhi r1, r1, asr #31 @ |
252 | andlo r2, r12, r2, asr #31 @ | 231 | andhi r1, r1, #31 @ |
253 | eorlo r2, r2, r12 @ | 232 | cmp r11, #31 @ clamp r |
254 | cmp r12, r11 @ clamp r | 233 | mvnhi r11, r11, asr #31 @ |
255 | andlo r11, r12, r11, asr #31 @ | 234 | andhi r11, r11, #31 @ |
256 | eorlo r11, r11, r12 @ | 235 | cmp r7, #63 @ clamp g |
257 | cmp r12, r7, asr #1 @ clamp g | 236 | mvnhi r7, r7, asr #31 @ |
258 | andlo r7, r12, r7, asr #31 @ | 237 | andhi r7, r7, #63 @ |
259 | eorlo r7, r7, r12 @ | 238 | 15: @ no clamp @ |
260 | orrlo r7, r7, r7, asl #1 @ | ||
261 | 25: @ no clamp @ | ||
262 | @ | ||
263 | orr r12, r2, r11, lsl #11 @ r4 = b | (r << 11) | ||
264 | ldrb r2, [r4], #1 @ r2 = Y' = *Y'_p++ | ||
265 | orr r12, r12, r7, lsl #5 @ r4 |= (g << 5) | ||
266 | strh r12, [r0], #LCD_WIDTH @ store pixel | ||
267 | @ | 239 | @ |
268 | @ do second pixel | 240 | orr r12, r1, r7, lsl #5 @ r12 = b | (g << 5) |
241 | ldrb r7, [r4, r3] @ r7 = Y' = *(Y'_p + stride) | ||
242 | orr r12, r12, r11, lsl #11 @ r12 |= (r << 11) | ||
243 | strh r12, [r0] @ store pixel | ||
269 | @ | 244 | @ |
270 | sub r2, r2, #16 @ r2 = Y = (Y' - 16)*74 | 245 | sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74 |
271 | mul r7, r2, r14 @ | 246 | add r12, r7, r7, asl #2 @ |
247 | add r7, r12, r7, asl #5 @ | ||
272 | @ compute R, G, and B | 248 | @ compute R, G, and B |
273 | add r2, r8, r7, asr #9 @ r2 = b = (Y >> 9) + bu | 249 | add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu |
274 | add r11, r9, r7, asr #9 @ r11 = r = (Y >> 9) + rv | 250 | add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv |
275 | add r7, r10, r7, asr #8 @ r7 = g = (Y >> 8) + guv | 251 | add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv |
276 | @ | 252 | @ |
277 | orr r12, r2, r11 @ check if clamping is needed... | 253 | orr r12, r1, r11 @ check if clamping is needed... |
278 | orr r12, r12, r7, asr #1 @ ...at all | 254 | orr r12, r12, r7, asr #1 @ ...at all |
279 | cmp r12, #31 @ | 255 | cmp r12, #31 @ |
280 | bls 25f @ no clamp @ | 256 | bls 15f @ no clamp @ |
281 | mov r12, #31 @ | 257 | cmp r1, #31 @ clamp b |
282 | cmp r12, r2 @ clamp b | 258 | mvnhi r1, r1, asr #31 @ |
283 | andlo r2, r12, r2, asr #31 @ | 259 | andhi r1, r1, #31 @ |
284 | eorlo r2, r2, r12 @ | 260 | cmp r11, #31 @ clamp r |
285 | cmp r12, r11 @ clamp r | 261 | mvnhi r11, r11, asr #31 @ |
286 | andlo r11, r12, r11, asr #31 @ | 262 | andhi r11, r11, #31 @ |
287 | eorlo r11, r11, r12 @ | 263 | cmp r7, #63 @ clamp g |
288 | cmp r12, r7, asr #1 @ clamp g | 264 | mvnhi r7, r7, asr #31 @ |
289 | andlo r7, r12, r7, asr #31 @ | 265 | andhi r7, r7, #63 @ |
290 | eorlo r7, r7, r12 @ | 266 | 15: @ no clamp @ |
291 | orrlo r7, r7, r7, asl #1 @ | ||
292 | 25: @ no clamp @ | ||
293 | @ | 267 | @ |
294 | orr r12, r2, r11, lsl #11 @ r4 = b | (r << 11) | 268 | orr r12, r1, r11, lsl #11 @ r12 = b | (r << 11) |
295 | orr r12, r12, r7, lsl #5 @ r4 |= (g << 5) | 269 | orr r12, r12, r7, lsl #5 @ r12 |= (g << 5) |
296 | strh r12, [r0, #LCD_WIDTH]! @ store pixel | 270 | strh r12, [r0, #-2] @ store pixel |
297 | add r0, r0, #2*LCD_WIDTH @ | 271 | add r0, r0, #2*LCD_WIDTH @ |
272 | add r4, r4, #1 @ | ||
298 | @ | 273 | @ |
299 | subs r3, r3, #2 @ | 274 | subs r2, r2, #2 @ subtract block from width |
300 | bgt 20b @ loop line 2 @ | 275 | bgt 10b @ loop line @ |
301 | @ | 276 | @ |
302 | ldmfd sp!, { r4-r12, pc } @ restore registers and return | 277 | ldmfd sp!, { r4-r12 } @ restore registers and return |
278 | bx lr @ | ||
303 | .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines | 279 | .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines |
diff --git a/firmware/target/arm/sandisk/sansa-e200/lcd-e200.c b/firmware/target/arm/sandisk/sansa-e200/lcd-e200.c index 31df3f9f19..3e1f74d7a1 100644 --- a/firmware/target/arm/sandisk/sansa-e200/lcd-e200.c +++ b/firmware/target/arm/sandisk/sansa-e200/lcd-e200.c | |||
@@ -627,7 +627,6 @@ void lcd_blit(const fb_data* data, int x, int by, int width, | |||
627 | 627 | ||
628 | /* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */ | 628 | /* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */ |
629 | extern void lcd_write_yuv420_lines(fb_data *dst, | 629 | extern void lcd_write_yuv420_lines(fb_data *dst, |
630 | unsigned char chroma_buf[LCD_HEIGHT/2*3], | ||
631 | unsigned char const * const src[3], | 630 | unsigned char const * const src[3], |
632 | int width, | 631 | int width, |
633 | int stride); | 632 | int stride); |
@@ -638,9 +637,6 @@ void lcd_yuv_blit(unsigned char * const src[3], | |||
638 | int src_x, int src_y, int stride, | 637 | int src_x, int src_y, int stride, |
639 | int x, int y, int width, int height) | 638 | int x, int y, int width, int height) |
640 | { | 639 | { |
641 | /* Caches for chroma data so it only need be recaculated every other | ||
642 | line */ | ||
643 | static unsigned char chroma_buf[LCD_HEIGHT/2*3]; /* 330 bytes */ | ||
644 | unsigned char const * yuv_src[3]; | 640 | unsigned char const * yuv_src[3]; |
645 | off_t z; | 641 | off_t z; |
646 | 642 | ||
@@ -661,8 +657,7 @@ void lcd_yuv_blit(unsigned char * const src[3], | |||
661 | 657 | ||
662 | do | 658 | do |
663 | { | 659 | { |
664 | lcd_write_yuv420_lines(dst, chroma_buf, yuv_src, width, | 660 | lcd_write_yuv420_lines(dst, yuv_src, width, stride); |
665 | stride); | ||
666 | yuv_src[0] += stride << 1; /* Skip down two luma lines */ | 661 | yuv_src[0] += stride << 1; /* Skip down two luma lines */ |
667 | yuv_src[1] += stride >> 1; /* Skip down one chroma line */ | 662 | yuv_src[1] += stride >> 1; /* Skip down one chroma line */ |
668 | yuv_src[2] += stride >> 1; | 663 | yuv_src[2] += stride >> 1; |