diff options
author | Andree Buschmann <AndreeBuschmann@t-online.de> | 2008-03-27 07:46:04 +0000 |
---|---|---|
committer | Andree Buschmann <AndreeBuschmann@t-online.de> | 2008-03-27 07:46:04 +0000 |
commit | 2364bc213c9dd58c9e4c3faee28b766482f4de8b (patch) | |
tree | de887deac730b41c66055e35fcf1448f1c77465e /firmware | |
parent | a3674c6bc09ee115674efe07c4371b538558efb5 (diff) | |
download | rockbox-2364bc213c9dd58c9e4c3faee28b766482f4de8b.tar.gz rockbox-2364bc213c9dd58c9e4c3faee28b766482f4de8b.zip |
Save a few registers in assembler routines for iPod 5Gs LCD driver and YUV conversion.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@16840 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware')
-rw-r--r-- | firmware/target/arm/ipod/video/lcd-as-video.S | 68 |
1 files changed, 33 insertions, 35 deletions
diff --git a/firmware/target/arm/ipod/video/lcd-as-video.S b/firmware/target/arm/ipod/video/lcd-as-video.S index 052629c576..48e83213d9 100644 --- a/firmware/target/arm/ipod/video/lcd-as-video.S +++ b/firmware/target/arm/ipod/video/lcd-as-video.S | |||
@@ -30,31 +30,30 @@ | |||
30 | .type lcd_write_data, %function | 30 | .type lcd_write_data, %function |
31 | /* r0 = addr, must be aligned */ | 31 | /* r0 = addr, must be aligned */ |
32 | lcd_write_data: /* r1 = pixel count, must be even */ | 32 | lcd_write_data: /* r1 = pixel count, must be even */ |
33 | stmfd sp!, {r4-r6} | 33 | stmfd sp!, {r4-r5, lr} |
34 | mov r2, #0x30000000 /* LCD data port */ | 34 | mov lr, #0x30000000 /* LCD data port */ |
35 | 35 | ||
36 | subs r1, r1, #16 | 36 | subs r1, r1, #16 |
37 | .loop16: | 37 | .loop16: |
38 | ldmgeia r0!, {r3-r6} | 38 | ldmgeia r0!, {r2-r5} |
39 | stmgeia r2, {r3-r6} | 39 | stmgeia lr, {r2-r5} |
40 | ldmgeia r0!, {r3-r6} | 40 | ldmgeia r0!, {r2-r5} |
41 | stmgeia r2, {r3-r6} | 41 | stmgeia lr, {r2-r5} |
42 | subges r1, r1, #16 | 42 | subges r1, r1, #16 |
43 | bge .loop16 | 43 | bge .loop16 |
44 | 44 | ||
45 | /* no need to correct the count, we're just checking bits from now */ | 45 | /* no need to correct the count, we're just checking bits from now */ |
46 | tst r1, #8 | 46 | tst r1, #8 |
47 | ldmneia r0!, {r3-r6} | 47 | ldmneia r0!, {r2-r5} |
48 | stmneia r2, {r3-r6} | 48 | stmneia lr, {r2-r5} |
49 | tst r1, #4 | 49 | tst r1, #4 |
50 | ldmneia r0!, {r3-r4} | 50 | ldmneia r0!, {r2-r3} |
51 | stmneia r2, {r3-r4} | 51 | stmneia lr, {r2-r3} |
52 | tst r1, #2 | 52 | tst r1, #2 |
53 | ldrne r3, [r0], #4 | 53 | ldrne r3, [r0], #4 |
54 | strne r3, [r2] | 54 | strne r3, [lr] |
55 | 55 | ||
56 | ldmfd sp!, {r4-r6} | 56 | ldmfd sp!, {r4-r5, pc} |
57 | bx lr | ||
58 | 57 | ||
59 | /**************************************************************************** | 58 | /**************************************************************************** |
60 | * extern void lcd_write_yuv420_lines(unsigned char const * const src[3], | 59 | * extern void lcd_write_yuv420_lines(unsigned char const * const src[3], |
@@ -86,11 +85,11 @@ lcd_write_yuv420_lines: | |||
86 | /* r1 = dst = bcmaddr */ | 85 | /* r1 = dst = bcmaddr */ |
87 | /* r2 = width */ | 86 | /* r2 = width */ |
88 | /* r3 = stride */ | 87 | /* r3 = stride */ |
89 | stmfd sp!, { r4-r12 } /* save non-scratch */ | 88 | stmfd sp!, { r4-r11, lr } /* save non-scratch */ |
90 | ldmia r0, { r10-r12 } /* r10 = yuv_src[0] = Y'_p */ | 89 | ldmia r0, { r9-r11 } /* r9 = yuv_src[0] = Y'_p */ |
91 | /* r11 = yuv_src[1] = Cb_p */ | 90 | /* r10 = yuv_src[1] = Cb_p */ |
92 | /* r12 = yuv_src[2] = Cr_p */ | 91 | /* r11 = yuv_src[2] = Cr_p */ |
93 | add r3, r10, r3 /* r3 = &ysrc[stride] */ | 92 | add r3, r9, r3 /* r3 = &ysrc[stride] */ |
94 | add r4, r2, r2, asr #1 /* chroma buffer lenght = width/2 *3 */ | 93 | add r4, r2, r2, asr #1 /* chroma buffer lenght = width/2 *3 */ |
95 | mov r4, r4, asl #2 /* use words for str/ldm possibility */ | 94 | mov r4, r4, asl #2 /* use words for str/ldm possibility */ |
96 | add r4, r4, #19 /* plus room for 4 additional words, */ | 95 | add r4, r4, #19 /* plus room for 4 additional words, */ |
@@ -100,11 +99,11 @@ lcd_write_yuv420_lines: | |||
100 | 99 | ||
101 | mov r7, r2 /* r7 = loop count */ | 100 | mov r7, r2 /* r7 = loop count */ |
102 | add r8, sp, #16 /* chroma buffer */ | 101 | add r8, sp, #16 /* chroma buffer */ |
103 | mov r9, #0x30000000 /* LCD data port */ | 102 | mov lr, #0x30000000 /* LCD data port */ |
104 | 103 | ||
105 | /* The following writes dest address to BCM and waits for write ready */ | 104 | /* The following writes dest address to BCM and waits for write ready */ |
106 | orr r2, r9, #0x00010000 /* r2 = BCM_WR_ADDR32 */ | 105 | orr r2, lr, #0x00010000 /* r2 = BCM_WR_ADDR32 */ |
107 | orr r6, r9, #0x00030000 /* r6 = BCM_CONTROL */ | 106 | orr r6, lr, #0x00030000 /* r6 = BCM_CONTROL */ |
108 | str r1, [r2] /* BCM_WR_ADDR32 = bcmaddr */ | 107 | str r1, [r2] /* BCM_WR_ADDR32 = bcmaddr */ |
109 | .busy_1: | 108 | .busy_1: |
110 | ldrh r1, [r6] /* while (!(BCM_CONTROL & 0x2)) */ | 109 | ldrh r1, [r6] /* while (!(BCM_CONTROL & 0x2)) */ |
@@ -114,8 +113,8 @@ lcd_write_yuv420_lines: | |||
114 | /* 1st loop start */ | 113 | /* 1st loop start */ |
115 | 10: /* loop start */ | 114 | 10: /* loop start */ |
116 | 115 | ||
117 | ldrb r0, [r11], #1 /* r0 = *usrc++ = *Cb_p++ */ | 116 | ldrb r0, [r10], #1 /* r0 = *usrc++ = *Cb_p++ */ |
118 | ldrb r1, [r12], #1 /* r1 = *vsrc++ = *Cr_p++ */ | 117 | ldrb r1, [r11], #1 /* r1 = *vsrc++ = *Cr_p++ */ |
119 | 118 | ||
120 | sub r0, r0, #128 /* r0 = Cb-128 */ | 119 | sub r0, r0, #128 /* r0 = Cb-128 */ |
121 | sub r1, r1, #128 /* r1 = Cr-128 */ | 120 | sub r1, r1, #128 /* r1 = Cr-128 */ |
@@ -138,7 +137,7 @@ lcd_write_yuv420_lines: | |||
138 | stmia r8!, {r0-r2} /* store r0, r1 and r2 to chroma buffer */ | 137 | stmia r8!, {r0-r2} /* store r0, r1 and r2 to chroma buffer */ |
139 | 138 | ||
140 | /* 1st loop, first pixel */ | 139 | /* 1st loop, first pixel */ |
141 | ldrb r5, [r10], #1 /* r5 = *ysrc++ = *Y'_p++ */ | 140 | ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ |
142 | sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ | 141 | sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ |
143 | add r3, r5, r5, asl #2 | 142 | add r3, r5, r5, asl #2 |
144 | add r5, r3, r5, asl #5 | 143 | add r5, r3, r5, asl #5 |
@@ -167,7 +166,7 @@ lcd_write_yuv420_lines: | |||
167 | orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */ | 166 | orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */ |
168 | 167 | ||
169 | /* 1st loop, second pixel */ | 168 | /* 1st loop, second pixel */ |
170 | ldrb r4, [r10], #1 /* r4 = *ysrc++ = *Y'_p++ */ | 169 | ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */ |
171 | sub r4, r4, #16 /* r4 = (Y'-16) * 74 */ | 170 | sub r4, r4, #16 /* r4 = (Y'-16) * 74 */ |
172 | add r3, r4, r4, asl #2 | 171 | add r3, r4, r4, asl #2 |
173 | add r4, r3, r4, asl #5 | 172 | add r4, r3, r4, asl #5 |
@@ -195,7 +194,7 @@ lcd_write_yuv420_lines: | |||
195 | orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ | 194 | orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ |
196 | orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */ | 195 | orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */ |
197 | orr r4, r5, r4, lsl #16 /* r4 = pixel_2<<16 | pixel_1 */ | 196 | orr r4, r5, r4, lsl #16 /* r4 = pixel_2<<16 | pixel_1 */ |
198 | str r4, [r9] /* write packed pixels */ | 197 | str r4, [lr] /* write packed pixels */ |
199 | 198 | ||
200 | subs r7, r7, #2 /* check for loop end */ | 199 | subs r7, r7, #2 /* check for loop end */ |
201 | bgt 10b /* back to beginning */ | 200 | bgt 10b /* back to beginning */ |
@@ -203,13 +202,13 @@ lcd_write_yuv420_lines: | |||
203 | 202 | ||
204 | /* Reload several registers for pointer rewinding for next loop */ | 203 | /* Reload several registers for pointer rewinding for next loop */ |
205 | add r8, sp, #16 /* chroma buffer */ | 204 | add r8, sp, #16 /* chroma buffer */ |
206 | ldmia sp, { r1, r7, r10} /* r1 = bcmaddr */ | 205 | ldmia sp, { r1, r7, r9} /* r1 = bcmaddr */ |
207 | /* r7 = loop count */ | 206 | /* r7 = loop count */ |
208 | /* r10 = &ysrc[stride] */ | 207 | /* r9 = &ysrc[stride] */ |
209 | 208 | ||
210 | /* The following writes dest address to BCM and waits for write ready */ | 209 | /* The following writes dest address to BCM and waits for write ready */ |
211 | orr r2, r9, #0x00010000 /* r2 = BCM_WR_ADDR32 */ | 210 | orr r2, lr, #0x00010000 /* r2 = BCM_WR_ADDR32 */ |
212 | orr r6, r9, #0x00030000 /* r6 = BCM_CONTROL */ | 211 | orr r6, lr, #0x00030000 /* r6 = BCM_CONTROL */ |
213 | add r1, r1, #640 /* dst += (LCD_WIDTH*2) */ | 212 | add r1, r1, #640 /* dst += (LCD_WIDTH*2) */ |
214 | str r1, [r2] /* BCM_WR_ADDR32 = dst */ | 213 | str r1, [r2] /* BCM_WR_ADDR32 = dst */ |
215 | .busy_2: | 214 | .busy_2: |
@@ -224,7 +223,7 @@ lcd_write_yuv420_lines: | |||
224 | ldmia r8!, {r0-r2} | 223 | ldmia r8!, {r0-r2} |
225 | 224 | ||
226 | /* 2nd loop, first pixel */ | 225 | /* 2nd loop, first pixel */ |
227 | ldrb r5, [r10], #1 /* r5 = *ysrc++ = *Y'_p++ */ | 226 | ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ |
228 | sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ | 227 | sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ |
229 | add r3, r5, r5, asl #2 | 228 | add r3, r5, r5, asl #2 |
230 | add r5, r3, r5, asl #5 | 229 | add r5, r3, r5, asl #5 |
@@ -252,7 +251,7 @@ lcd_write_yuv420_lines: | |||
252 | orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */ | 251 | orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */ |
253 | 252 | ||
254 | /* 2nd loop, second pixel */ | 253 | /* 2nd loop, second pixel */ |
255 | ldrb r4, [r10], #1 /* r4 = *ysrc++ = *Y'_p++ */ | 254 | ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */ |
256 | sub r4, r4, #16 /* r4 = (Y'-16) * 74 */ | 255 | sub r4, r4, #16 /* r4 = (Y'-16) * 74 */ |
257 | add r3, r4, r4, asl #2 | 256 | add r3, r4, r4, asl #2 |
258 | add r4, r3, r4, asl #5 | 257 | add r4, r3, r4, asl #5 |
@@ -280,7 +279,7 @@ lcd_write_yuv420_lines: | |||
280 | orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ | 279 | orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ |
281 | orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */ | 280 | orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */ |
282 | orr r4, r5, r4, lsl #16 /* r4 = pixel_2<<16 | pixel_1 */ | 281 | orr r4, r5, r4, lsl #16 /* r4 = pixel_2<<16 | pixel_1 */ |
283 | str r4, [r9] /* write packed pixels */ | 282 | str r4, [lr] /* write packed pixels */ |
284 | 283 | ||
285 | subs r7, r7, #2 /* check for loop end */ | 284 | subs r7, r7, #2 /* check for loop end */ |
286 | bgt 20b /* back to beginning */ | 285 | bgt 20b /* back to beginning */ |
@@ -288,8 +287,7 @@ lcd_write_yuv420_lines: | |||
288 | 287 | ||
289 | ldr r3, [sp, #12] | 288 | ldr r3, [sp, #12] |
290 | add sp, sp, r3 /* deallocate buffer */ | 289 | add sp, sp, r3 /* deallocate buffer */ |
291 | ldmfd sp!, { r4-r12 } /* restore registers */ | 290 | ldmfd sp!, { r4-r11, pc } /* restore registers */ |
292 | bx lr | ||
293 | 291 | ||
294 | .ltorg | 292 | .ltorg |
295 | .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines | 293 | .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines |