summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2010-12-13 20:56:53 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2010-12-13 20:56:53 +0000
commitabf28a95864f86f952b77bc25740f0dcc7a560bf (patch)
treec667eab4bc5f58004c2c427fcd15c8b16c79d0c4
parent298bbe8d3c1ab612d43a56b659a117d0ef12cd4c (diff)
downloadrockbox-abf28a95864f86f952b77bc25740f0dcc7a560bf.tar.gz
rockbox-abf28a95864f86f952b77bc25740f0dcc7a560bf.zip
Speedup of iPod nano 2G YUV blitting by 3%.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28825 a1c6a512-1295-4272-9138-f99709370657
-rwxr-xr-xfirmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S72
1 files changed, 35 insertions, 37 deletions
diff --git a/firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S b/firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S
index 3902d34133..7fd703972d 100755
--- a/firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S
+++ b/firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S
@@ -100,7 +100,7 @@ lcd_write_yuv420_lines:
100 100
101 mov r7, r2 /* r7 = loop count */ 101 mov r7, r2 /* r7 = loop count */
102 add r8, sp, #16 /* chroma buffer */ 102 add r8, sp, #16 /* chroma buffer */
103 mov lr, r1 /* LCD data port = LCD_BASE */ 103 add lr, r1, #0x40 /* LCD data port = LCD_BASE + 0x40 */
104 104
105 /* 1st loop start */ 105 /* 1st loop start */
10610: /* loop start */ 10610: /* loop start */
@@ -153,21 +153,21 @@ lcd_write_yuv420_lines:
153 andhi r4, r4, #31 153 andhi r4, r4, #31
15415: /* no clamp */ 15415: /* no clamp */
155 155
156 /* calculate pixel_1 and save to r5 for later pixel packing */ 156 /* calculate pixel_1 and save to r4 for later pixel packing */
157 orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */ 157 orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
158 orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */ 158 orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
159 159
160 /* 1st loop, second pixel */ 160 /* 1st loop, second pixel */
161 ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */ 161 ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
162 sub r4, r4, #16 /* r4 = (Y'-16) * 74 */ 162 sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
163 add r3, r4, r4, asl #2 163 add r3, r5, r5, asl #2
164 add r4, r3, r4, asl #5 164 add r5, r3, r5, asl #5
165 165
166 add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */ 166 add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
167 add r3, r2, r4, asr #7 /* r3 = g = (Y >> 8) + guv */ 167 add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
168 add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */ 168 add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
169 169
170 orr r0, r6, r4 /* check if clamping is needed... */ 170 orr r0, r6, r5 /* check if clamping is needed... */
171 orr r0, r0, r3, asr #1 /* ...at all */ 171 orr r0, r0, r3, asr #1 /* ...at all */
172 cmp r0, #31 172 cmp r0, #31
173 bls 15f /* -> no clamp */ 173 bls 15f /* -> no clamp */
@@ -177,23 +177,22 @@ lcd_write_yuv420_lines:
177 cmp r3, #63 /* clamp g */ 177 cmp r3, #63 /* clamp g */
178 mvnhi r3, r3, asr #31 178 mvnhi r3, r3, asr #31
179 andhi r3, r3, #63 179 andhi r3, r3, #63
180 cmp r4, #31 /* clamp b */ 180 cmp r5, #31 /* clamp b */
181 mvnhi r4, r4, asr #31 181 mvnhi r5, r5, asr #31
182 andhi r4, r4, #31 182 andhi r5, r5, #31
18315: /* no clamp */ 18315: /* no clamp */
184 184
185 /* calculate pixel_2 and pack with pixel_1 before writing */ 185 /* calculate pixel_2 and pack with pixel_1 before writing */
186 orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ 186 orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
187 orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */ 187 orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
188 188
189 /* wait for FIFO half full */ 189 /* wait for FIFO half full */
190.fifo_wait1: 190.fifo_wait1:
191 ldr r3, [lr, #0x1C] /* while (LCD_STATUS & 0x08); */ 191 ldr r3, [lr, #-0x24] /* while (LCD_STATUS & 0x08); */
192 tst r3, #0x8 192 tst r3, #0x8
193 bgt .fifo_wait1 193 bgt .fifo_wait1
194 194
195 str r5, [lr, #0x40] /* write pixel_1 */ 195 stmia lr, {r4,r5} /* write pixel_1 and pixel_2 */
196 str r4, [lr, #0x40] /* write pixel_2 */
197 196
198 subs r7, r7, #2 /* check for loop end */ 197 subs r7, r7, #2 /* check for loop end */
199 bgt 10b /* back to beginning */ 198 bgt 10b /* back to beginning */
@@ -234,21 +233,21 @@ lcd_write_yuv420_lines:
234 mvnhi r4, r4, asr #31 233 mvnhi r4, r4, asr #31
235 andhi r4, r4, #31 234 andhi r4, r4, #31
23615: /* no clamp */ 23515: /* no clamp */
237 /* calculate pixel_1 and save to r5 for later pixel packing */ 236 /* calculate pixel_1 and save to r4 for later pixel packing */
238 orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */ 237 orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
239 orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */ 238 orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
240 239
241 /* 2nd loop, second pixel */ 240 /* 2nd loop, second pixel */
242 ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */ 241 ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
243 sub r4, r4, #16 /* r4 = (Y'-16) * 74 */ 242 sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
244 add r3, r4, r4, asl #2 243 add r3, r5, r5, asl #2
245 add r4, r3, r4, asl #5 244 add r5, r3, r5, asl #5
246 245
247 add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */ 246 add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
248 add r3, r2, r4, asr #7 /* r3 = g = (Y >> 8) + guv */ 247 add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
249 add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */ 248 add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
250 249
251 orr r0, r6, r4 /* check if clamping is needed... */ 250 orr r0, r6, r5 /* check if clamping is needed... */
252 orr r0, r0, r3, asr #1 /* ...at all */ 251 orr r0, r0, r3, asr #1 /* ...at all */
253 cmp r0, #31 252 cmp r0, #31
254 bls 15f /* -> no clamp */ 253 bls 15f /* -> no clamp */
@@ -258,23 +257,22 @@ lcd_write_yuv420_lines:
258 cmp r3, #63 /* clamp g */ 257 cmp r3, #63 /* clamp g */
259 mvnhi r3, r3, asr #31 258 mvnhi r3, r3, asr #31
260 andhi r3, r3, #63 259 andhi r3, r3, #63
261 cmp r4, #31 /* clamp b */ 260 cmp r5, #31 /* clamp b */
262 mvnhi r4, r4, asr #31 261 mvnhi r5, r5, asr #31
263 andhi r4, r4, #31 262 andhi r5, r5, #31
26415: /* no clamp */ 26315: /* no clamp */
265 264
266 /* calculate pixel_2 and pack with pixel_1 before writing */ 265 /* calculate pixel_2 and pack with pixel_1 before writing */
267 orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ 266 orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
268 orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */ 267 orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
269 268
270 /* wait for FIFO half full */ 269 /* wait for FIFO half full */
271.fifo_wait2: 270.fifo_wait2:
272 ldr r3, [lr, #0x1C] /* while (LCD_STATUS & 0x08); */ 271 ldr r3, [lr, #-0x24] /* while (LCD_STATUS & 0x08); */
273 tst r3, #0x8 272 tst r3, #0x8
274 bgt .fifo_wait2 273 bgt .fifo_wait2
275 274
276 str r5, [lr, #0x40] /* write pixel_1 */ 275 stmia lr, {r4,r5} /* write pixel_1 and pixel_2 */
277 str r4, [lr, #0x40] /* write pixel_2 */
278 276
279 subs r7, r7, #2 /* check for loop end */ 277 subs r7, r7, #2 /* check for loop end */
280 bgt 20b /* back to beginning */ 278 bgt 20b /* back to beginning */