summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2008-03-27 07:46:04 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2008-03-27 07:46:04 +0000
commit2364bc213c9dd58c9e4c3faee28b766482f4de8b (patch)
treede887deac730b41c66055e35fcf1448f1c77465e
parenta3674c6bc09ee115674efe07c4371b538558efb5 (diff)
downloadrockbox-2364bc213c9dd58c9e4c3faee28b766482f4de8b.tar.gz
rockbox-2364bc213c9dd58c9e4c3faee28b766482f4de8b.zip
Save a few registers in assembler routines for iPod 5Gs LCD driver and YUV conversion.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@16840 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--firmware/target/arm/ipod/video/lcd-as-video.S68
1 files changed, 33 insertions, 35 deletions
diff --git a/firmware/target/arm/ipod/video/lcd-as-video.S b/firmware/target/arm/ipod/video/lcd-as-video.S
index 052629c576..48e83213d9 100644
--- a/firmware/target/arm/ipod/video/lcd-as-video.S
+++ b/firmware/target/arm/ipod/video/lcd-as-video.S
@@ -30,31 +30,30 @@
30 .type lcd_write_data, %function 30 .type lcd_write_data, %function
31 /* r0 = addr, must be aligned */ 31 /* r0 = addr, must be aligned */
32lcd_write_data: /* r1 = pixel count, must be even */ 32lcd_write_data: /* r1 = pixel count, must be even */
33 stmfd sp!, {r4-r6} 33 stmfd sp!, {r4-r5, lr}
34 mov r2, #0x30000000 /* LCD data port */ 34 mov lr, #0x30000000 /* LCD data port */
35 35
36 subs r1, r1, #16 36 subs r1, r1, #16
37.loop16: 37.loop16:
38 ldmgeia r0!, {r3-r6} 38 ldmgeia r0!, {r2-r5}
39 stmgeia r2, {r3-r6} 39 stmgeia lr, {r2-r5}
40 ldmgeia r0!, {r3-r6} 40 ldmgeia r0!, {r2-r5}
41 stmgeia r2, {r3-r6} 41 stmgeia lr, {r2-r5}
42 subges r1, r1, #16 42 subges r1, r1, #16
43 bge .loop16 43 bge .loop16
44 44
45 /* no need to correct the count, we're just checking bits from now */ 45 /* no need to correct the count, we're just checking bits from now */
46 tst r1, #8 46 tst r1, #8
47 ldmneia r0!, {r3-r6} 47 ldmneia r0!, {r2-r5}
48 stmneia r2, {r3-r6} 48 stmneia lr, {r2-r5}
49 tst r1, #4 49 tst r1, #4
50 ldmneia r0!, {r3-r4} 50 ldmneia r0!, {r2-r3}
51 stmneia r2, {r3-r4} 51 stmneia lr, {r2-r3}
52 tst r1, #2 52 tst r1, #2
53 ldrne r3, [r0], #4 53 ldrne r3, [r0], #4
54 strne r3, [r2] 54 strne r3, [lr]
55 55
56 ldmfd sp!, {r4-r6} 56 ldmfd sp!, {r4-r5, pc}
57 bx lr
58 57
59/**************************************************************************** 58/****************************************************************************
60 * extern void lcd_write_yuv420_lines(unsigned char const * const src[3], 59 * extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
@@ -86,11 +85,11 @@ lcd_write_yuv420_lines:
86 /* r1 = dst = bcmaddr */ 85 /* r1 = dst = bcmaddr */
87 /* r2 = width */ 86 /* r2 = width */
88 /* r3 = stride */ 87 /* r3 = stride */
89 stmfd sp!, { r4-r12 } /* save non-scratch */ 88 stmfd sp!, { r4-r11, lr } /* save non-scratch */
90 ldmia r0, { r10-r12 } /* r10 = yuv_src[0] = Y'_p */ 89 ldmia r0, { r9-r11 } /* r9 = yuv_src[0] = Y'_p */
91 /* r11 = yuv_src[1] = Cb_p */ 90 /* r10 = yuv_src[1] = Cb_p */
92 /* r12 = yuv_src[2] = Cr_p */ 91 /* r11 = yuv_src[2] = Cr_p */
93 add r3, r10, r3 /* r3 = &ysrc[stride] */ 92 add r3, r9, r3 /* r3 = &ysrc[stride] */
94 add r4, r2, r2, asr #1 /* chroma buffer lenght = width/2 *3 */ 93 add r4, r2, r2, asr #1 /* chroma buffer lenght = width/2 *3 */
95 mov r4, r4, asl #2 /* use words for str/ldm possibility */ 94 mov r4, r4, asl #2 /* use words for str/ldm possibility */
96 add r4, r4, #19 /* plus room for 4 additional words, */ 95 add r4, r4, #19 /* plus room for 4 additional words, */
@@ -100,11 +99,11 @@ lcd_write_yuv420_lines:
100 99
101 mov r7, r2 /* r7 = loop count */ 100 mov r7, r2 /* r7 = loop count */
102 add r8, sp, #16 /* chroma buffer */ 101 add r8, sp, #16 /* chroma buffer */
103 mov r9, #0x30000000 /* LCD data port */ 102 mov lr, #0x30000000 /* LCD data port */
104 103
105 /* The following writes dest address to BCM and waits for write ready */ 104 /* The following writes dest address to BCM and waits for write ready */
106 orr r2, r9, #0x00010000 /* r2 = BCM_WR_ADDR32 */ 105 orr r2, lr, #0x00010000 /* r2 = BCM_WR_ADDR32 */
107 orr r6, r9, #0x00030000 /* r6 = BCM_CONTROL */ 106 orr r6, lr, #0x00030000 /* r6 = BCM_CONTROL */
108 str r1, [r2] /* BCM_WR_ADDR32 = bcmaddr */ 107 str r1, [r2] /* BCM_WR_ADDR32 = bcmaddr */
109.busy_1: 108.busy_1:
110 ldrh r1, [r6] /* while (!(BCM_CONTROL & 0x2)) */ 109 ldrh r1, [r6] /* while (!(BCM_CONTROL & 0x2)) */
@@ -114,8 +113,8 @@ lcd_write_yuv420_lines:
114 /* 1st loop start */ 113 /* 1st loop start */
11510: /* loop start */ 11410: /* loop start */
116 115
117 ldrb r0, [r11], #1 /* r0 = *usrc++ = *Cb_p++ */ 116 ldrb r0, [r10], #1 /* r0 = *usrc++ = *Cb_p++ */
118 ldrb r1, [r12], #1 /* r1 = *vsrc++ = *Cr_p++ */ 117 ldrb r1, [r11], #1 /* r1 = *vsrc++ = *Cr_p++ */
119 118
120 sub r0, r0, #128 /* r0 = Cb-128 */ 119 sub r0, r0, #128 /* r0 = Cb-128 */
121 sub r1, r1, #128 /* r1 = Cr-128 */ 120 sub r1, r1, #128 /* r1 = Cr-128 */
@@ -138,7 +137,7 @@ lcd_write_yuv420_lines:
138 stmia r8!, {r0-r2} /* store r0, r1 and r2 to chroma buffer */ 137 stmia r8!, {r0-r2} /* store r0, r1 and r2 to chroma buffer */
139 138
140 /* 1st loop, first pixel */ 139 /* 1st loop, first pixel */
141 ldrb r5, [r10], #1 /* r5 = *ysrc++ = *Y'_p++ */ 140 ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
142 sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ 141 sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
143 add r3, r5, r5, asl #2 142 add r3, r5, r5, asl #2
144 add r5, r3, r5, asl #5 143 add r5, r3, r5, asl #5
@@ -167,7 +166,7 @@ lcd_write_yuv420_lines:
167 orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */ 166 orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */
168 167
169 /* 1st loop, second pixel */ 168 /* 1st loop, second pixel */
170 ldrb r4, [r10], #1 /* r4 = *ysrc++ = *Y'_p++ */ 169 ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */
171 sub r4, r4, #16 /* r4 = (Y'-16) * 74 */ 170 sub r4, r4, #16 /* r4 = (Y'-16) * 74 */
172 add r3, r4, r4, asl #2 171 add r3, r4, r4, asl #2
173 add r4, r3, r4, asl #5 172 add r4, r3, r4, asl #5
@@ -195,7 +194,7 @@ lcd_write_yuv420_lines:
195 orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ 194 orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
196 orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */ 195 orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */
197 orr r4, r5, r4, lsl #16 /* r4 = pixel_2<<16 | pixel_1 */ 196 orr r4, r5, r4, lsl #16 /* r4 = pixel_2<<16 | pixel_1 */
198 str r4, [r9] /* write packed pixels */ 197 str r4, [lr] /* write packed pixels */
199 198
200 subs r7, r7, #2 /* check for loop end */ 199 subs r7, r7, #2 /* check for loop end */
201 bgt 10b /* back to beginning */ 200 bgt 10b /* back to beginning */
@@ -203,13 +202,13 @@ lcd_write_yuv420_lines:
203 202
204 /* Reload several registers for pointer rewinding for next loop */ 203 /* Reload several registers for pointer rewinding for next loop */
205 add r8, sp, #16 /* chroma buffer */ 204 add r8, sp, #16 /* chroma buffer */
206 ldmia sp, { r1, r7, r10} /* r1 = bcmaddr */ 205 ldmia sp, { r1, r7, r9} /* r1 = bcmaddr */
207 /* r7 = loop count */ 206 /* r7 = loop count */
208 /* r10 = &ysrc[stride] */ 207 /* r9 = &ysrc[stride] */
209 208
210 /* The following writes dest address to BCM and waits for write ready */ 209 /* The following writes dest address to BCM and waits for write ready */
211 orr r2, r9, #0x00010000 /* r2 = BCM_WR_ADDR32 */ 210 orr r2, lr, #0x00010000 /* r2 = BCM_WR_ADDR32 */
212 orr r6, r9, #0x00030000 /* r6 = BCM_CONTROL */ 211 orr r6, lr, #0x00030000 /* r6 = BCM_CONTROL */
213 add r1, r1, #640 /* dst += (LCD_WIDTH*2) */ 212 add r1, r1, #640 /* dst += (LCD_WIDTH*2) */
214 str r1, [r2] /* BCM_WR_ADDR32 = dst */ 213 str r1, [r2] /* BCM_WR_ADDR32 = dst */
215.busy_2: 214.busy_2:
@@ -224,7 +223,7 @@ lcd_write_yuv420_lines:
224 ldmia r8!, {r0-r2} 223 ldmia r8!, {r0-r2}
225 224
226 /* 2nd loop, first pixel */ 225 /* 2nd loop, first pixel */
227 ldrb r5, [r10], #1 /* r5 = *ysrc++ = *Y'_p++ */ 226 ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
228 sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ 227 sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
229 add r3, r5, r5, asl #2 228 add r3, r5, r5, asl #2
230 add r5, r3, r5, asl #5 229 add r5, r3, r5, asl #5
@@ -252,7 +251,7 @@ lcd_write_yuv420_lines:
252 orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */ 251 orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */
253 252
254 /* 2nd loop, second pixel */ 253 /* 2nd loop, second pixel */
255 ldrb r4, [r10], #1 /* r4 = *ysrc++ = *Y'_p++ */ 254 ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */
256 sub r4, r4, #16 /* r4 = (Y'-16) * 74 */ 255 sub r4, r4, #16 /* r4 = (Y'-16) * 74 */
257 add r3, r4, r4, asl #2 256 add r3, r4, r4, asl #2
258 add r4, r3, r4, asl #5 257 add r4, r3, r4, asl #5
@@ -280,7 +279,7 @@ lcd_write_yuv420_lines:
280 orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ 279 orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
281 orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */ 280 orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */
282 orr r4, r5, r4, lsl #16 /* r4 = pixel_2<<16 | pixel_1 */ 281 orr r4, r5, r4, lsl #16 /* r4 = pixel_2<<16 | pixel_1 */
283 str r4, [r9] /* write packed pixels */ 282 str r4, [lr] /* write packed pixels */
284 283
285 subs r7, r7, #2 /* check for loop end */ 284 subs r7, r7, #2 /* check for loop end */
286 bgt 20b /* back to beginning */ 285 bgt 20b /* back to beginning */
@@ -288,8 +287,7 @@ lcd_write_yuv420_lines:
288 287
289 ldr r3, [sp, #12] 288 ldr r3, [sp, #12]
290 add sp, sp, r3 /* deallocate buffer */ 289 add sp, sp, r3 /* deallocate buffer */
291 ldmfd sp!, { r4-r12 } /* restore registers */ 290 ldmfd sp!, { r4-r11, pc } /* restore registers */
292 bx lr
293 291
294 .ltorg 292 .ltorg
295 .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines 293 .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines