diff options
author | Jens Arnold <amiconn@rockbox.org> | 2007-10-14 23:05:56 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2007-10-14 23:05:56 +0000 |
commit | 99f955088149d5938ce4c9ca5624377f464b1380 (patch) | |
tree | 403178f0198f8ef0b57c49be3b25f085be52aa8a /firmware/target/coldfire/iriver/h300/lcd-as-h300.S | |
parent | 57418b2192f4eb4decab716d50e09232ba22f7f4 (diff) | |
download | rockbox-99f955088149d5938ce4c9ca5624377f464b1380.tar.gz rockbox-99f955088149d5938ce4c9ca5624377f464b1380.zip |
H300, X5: Optimised lcd_yuv_blit(), using line-pair zig-zag writing to the LCD controller. ~7% speedup on H300, ~5% speedup on X5.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15111 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware/target/coldfire/iriver/h300/lcd-as-h300.S')
-rw-r--r-- | firmware/target/coldfire/iriver/h300/lcd-as-h300.S | 129 |
1 files changed, 95 insertions, 34 deletions
diff --git a/firmware/target/coldfire/iriver/h300/lcd-as-h300.S b/firmware/target/coldfire/iriver/h300/lcd-as-h300.S index 1873b905c6..9106e22c1c 100644 --- a/firmware/target/coldfire/iriver/h300/lcd-as-h300.S +++ b/firmware/target/coldfire/iriver/h300/lcd-as-h300.S | |||
@@ -44,6 +44,23 @@ | |||
44 | * |B| |19611723 33976259 0| |Cr - 128| >> 27 | 44 | * |B| |19611723 33976259 0| |Cr - 128| >> 27 |
45 | * | 45 | * |
46 | * Needs EMAC set to saturated, signed integer mode. | 46 | * Needs EMAC set to saturated, signed integer mode. |
47 | * | ||
48 | * register usage: | ||
49 | * %a0 - LCD data port | ||
50 | * %a1 - Y pointer | ||
51 | * %a2 - C pointer | ||
52 | * %a3 - C width | ||
53 | * %a4 - Y end address | ||
54 | * %a5 - Y factor | ||
55 | * %a6 - BU factor | ||
56 | * %d0 - scratch | ||
57 | * %d1 - B, previous Y \ alternating | ||
58 | * %d2 - U / B, previous Y / | ||
59 | * %d3 - V / G | ||
60 | * %d4 - R / output pixel | ||
61 | * %d5 - GU factor | ||
62 | * %d6 - GV factor | ||
63 | * %d7 - RGB signed -> unsigned conversion mask | ||
47 | */ | 64 | */ |
48 | .align 2 | 65 | .align 2 |
49 | .global lcd_write_yuv420_lines | 66 | .global lcd_write_yuv420_lines |
@@ -52,10 +69,10 @@ | |||
52 | lcd_write_yuv420_lines: | 69 | lcd_write_yuv420_lines: |
53 | lea.l (-44, %sp), %sp /* free up some registers */ | 70 | lea.l (-44, %sp), %sp /* free up some registers */ |
54 | movem.l %d2-%d7/%a2-%a6, (%sp) | 71 | movem.l %d2-%d7/%a2-%a6, (%sp) |
55 | 72 | ||
56 | lea.l 0xf0000002, %a0 /* LCD data port */ | 73 | lea.l 0xf0000002, %a0 /* LCD data port */ |
57 | movem.l (44+4, %sp), %a1-%a4 /* Y data, Cb data, Cr data, width */ | 74 | movem.l (44+4, %sp), %a1-%a3 /* Y data, C data, C width */ |
58 | lea.l (%a1, %a4), %a4 /* end address */ | 75 | lea.l (%a1, %a3*2), %a4 /* Y end address */ |
59 | 76 | ||
60 | move.l #19611723, %a5 /* y factor */ | 77 | move.l #19611723, %a5 /* y factor */ |
61 | move.l #33976259, %a6 /* bu factor */ | 78 | move.l #33976259, %a6 /* bu factor */ |
@@ -64,11 +81,11 @@ lcd_write_yuv420_lines: | |||
64 | move.l #0x8410, %d7 /* bitmask for signed->unsigned conversion | 81 | move.l #0x8410, %d7 /* bitmask for signed->unsigned conversion |
65 | * of R, G and B within RGB565 at once */ | 82 | * of R, G and B within RGB565 at once */ |
66 | 83 | ||
67 | /* chroma for (very) first & second pixel */ | 84 | /* chroma for first 2x2 pixel block */ |
85 | clr.l %d3 /* load v component */ | ||
86 | move.b (%a2, %a3), %d3 | ||
68 | clr.l %d2 /* load u component */ | 87 | clr.l %d2 /* load u component */ |
69 | move.b (%a2)+, %d2 | 88 | move.b (%a2)+, %d2 |
70 | clr.l %d3 /* load v component */ | ||
71 | move.b (%a3)+, %d3 | ||
72 | moveq.l #-128, %d0 | 89 | moveq.l #-128, %d0 |
73 | add.l %d0, %d2 | 90 | add.l %d0, %d2 |
74 | add.l %d0, %d3 | 91 | add.l %d0, %d3 |
@@ -79,9 +96,9 @@ lcd_write_yuv420_lines: | |||
79 | move.l #26881894, %d0 /* rv factor */ | 96 | move.l #26881894, %d0 /* rv factor */ |
80 | mac.l %d0, %d3, %acc2 /* rv */ | 97 | mac.l %d0, %d3, %acc2 /* rv */ |
81 | 98 | ||
82 | /* luma for (very) first pixel */ | 99 | /* luma for very first pixel (top left) */ |
83 | clr.l %d1 | 100 | clr.l %d1 |
84 | move.b (%a1)+, %d1 | 101 | move.b (%a1, %a3*2), %d1 |
85 | moveq.l #-126, %d0 | 102 | moveq.l #-126, %d0 |
86 | add.l %d1, %d0 /* y' (-0.5 ... +0.5) */ | 103 | add.l %d1, %d0 /* y' (-0.5 ... +0.5) */ |
87 | mac.l %a5, %d0, %acc0 | 104 | mac.l %a5, %d0, %acc0 |
@@ -91,11 +108,11 @@ lcd_write_yuv420_lines: | |||
91 | bra.b .yuv_line_entry | 108 | bra.b .yuv_line_entry |
92 | 109 | ||
93 | .yuv_line_loop: | 110 | .yuv_line_loop: |
94 | /* chroma for first & second pixel */ | 111 | /* chroma for 2x2 pixel block */ |
112 | clr.l %d3 /* load v component */ | ||
113 | move.b (%a2, %a3), %d3 | ||
95 | clr.l %d2 /* load u component */ | 114 | clr.l %d2 /* load u component */ |
96 | move.b (%a2)+, %d2 | 115 | move.b (%a2)+, %d2 |
97 | clr.l %d3 /* load v component */ | ||
98 | move.b (%a3)+, %d3 | ||
99 | moveq.l #-128, %d0 | 116 | moveq.l #-128, %d0 |
100 | add.l %d0, %d2 | 117 | add.l %d0, %d2 |
101 | add.l %d0, %d3 | 118 | add.l %d0, %d3 |
@@ -106,16 +123,16 @@ lcd_write_yuv420_lines: | |||
106 | move.l #26881894, %d0 /* rv factor */ | 123 | move.l #26881894, %d0 /* rv factor */ |
107 | mac.l %d0, %d3, %acc2 /* rv */ | 124 | mac.l %d0, %d3, %acc2 /* rv */ |
108 | 125 | ||
109 | /* luma for first pixel */ | 126 | /* luma for first pixel (top left) */ |
110 | clr.l %d1 | 127 | clr.l %d1 |
111 | move.b (%a1)+, %d1 | 128 | move.b (%a1, %a3*2), %d1 |
112 | moveq.l #-126, %d0 | 129 | moveq.l #-126, %d0 |
113 | add.l %d1, %d0 /* y' (-0.5 ... +0.5) */ | 130 | add.l %d1, %d0 /* y' (-0.5 ... +0.5) */ |
114 | mac.l %a5, %d0, %acc0 | 131 | mac.l %a5, %d0, %acc0 |
115 | mac.l %a5, %d0, %acc1 | 132 | mac.l %a5, %d0, %acc1 |
116 | mac.l %a5, %d0, %acc2 | 133 | mac.l %a5, %d0, %acc2 |
117 | 134 | ||
118 | move.w %d4, (%a0) | 135 | move.w %d4, (%a0) |
119 | /* LCD write is delayed one pixel to use it for filling the EMAC latency */ | 136 | /* LCD write is delayed one pixel to use it for filling the EMAC latency */ |
120 | 137 | ||
121 | /* convert to RGB565, pack and output */ | 138 | /* convert to RGB565, pack and output */ |
@@ -134,22 +151,50 @@ lcd_write_yuv420_lines: | |||
134 | or.l %d2, %d4 | 151 | or.l %d2, %d4 |
135 | eor.l %d7, %d4 | 152 | eor.l %d7, %d4 |
136 | 153 | ||
137 | /* luma for second pixel as delta from the first */ | 154 | /* luma for second pixel (bottom left) as delta from the first */ |
138 | clr.l %d0 | 155 | clr.l %d2 |
139 | move.b (%a1)+, %d0 | 156 | move.b (%a1)+, %d2 |
157 | move.l %d2, %d0 | ||
140 | sub.l %d1, %d0 | 158 | sub.l %d1, %d0 |
141 | mac.l %a5, %d0, %acc0 | 159 | mac.l %a5, %d0, %acc0 |
142 | mac.l %a5, %d0, %acc1 | 160 | mac.l %a5, %d0, %acc1 |
143 | mac.l %a5, %d0, %acc2 | 161 | mac.l %a5, %d0, %acc2 |
144 | 162 | ||
145 | move.w %d4, (%a0) | 163 | move.w %d4, (%a0) |
146 | /* LCD write is delayed one pixel to use it for filling the EMAC latency */ | 164 | /* LCD write is delayed one pixel to use it for filling the EMAC latency */ |
147 | 165 | ||
148 | /* convert to RGB565, pack and output */ | 166 | /* convert to RGB565, pack and output */ |
149 | moveq.l #27, %d0 | 167 | moveq.l #27, %d0 |
150 | movclr.l %acc0, %d2 | 168 | move.l %acc0, %d1 |
151 | movclr.l %acc1, %d3 | 169 | move.l %acc1, %d3 |
152 | movclr.l %acc2, %d4 | 170 | move.l %acc2, %d4 |
171 | lsr.l %d0, %d1 | ||
172 | lsr.l %d0, %d4 | ||
173 | moveq.l #26, %d0 | ||
174 | lsr.l %d0, %d3 | ||
175 | lsl.l #6, %d4 | ||
176 | or.l %d3, %d4 | ||
177 | lsl.l #5, %d4 | ||
178 | or.l %d1, %d4 | ||
179 | eor.l %d7, %d4 | ||
180 | |||
181 | /* luma for third pixel (top right) as delta from the second */ | ||
182 | clr.l %d1 | ||
183 | move.b (%a1, %a3*2), %d1 | ||
184 | move.l %d1, %d0 | ||
185 | sub.l %d2, %d0 | ||
186 | mac.l %a5, %d0, %acc0 | ||
187 | mac.l %a5, %d0, %acc1 | ||
188 | mac.l %a5, %d0, %acc2 | ||
189 | |||
190 | move.w %d4, (%a0) | ||
191 | /* LCD write is delayed one pixel to use it for filling the EMAC latency */ | ||
192 | |||
193 | /* convert to RGB565, pack and output */ | ||
194 | moveq.l #27, %d0 | ||
195 | move.l %acc0, %d2 | ||
196 | move.l %acc1, %d3 | ||
197 | move.l %acc2, %d4 | ||
153 | lsr.l %d0, %d2 | 198 | lsr.l %d0, %d2 |
154 | lsr.l %d0, %d4 | 199 | lsr.l %d0, %d4 |
155 | moveq.l #26, %d0 | 200 | moveq.l #26, %d0 |
@@ -160,24 +205,40 @@ lcd_write_yuv420_lines: | |||
160 | or.l %d2, %d4 | 205 | or.l %d2, %d4 |
161 | eor.l %d7, %d4 | 206 | eor.l %d7, %d4 |
162 | 207 | ||
208 | /* luma for fourth pixel (bottom right) as delta from the third */ | ||
209 | clr.l %d2 | ||
210 | move.b (%a1)+, %d2 | ||
211 | move.l %d2, %d0 | ||
212 | sub.l %d1, %d0 | ||
213 | mac.l %a5, %d0, %acc0 | ||
214 | mac.l %a5, %d0, %acc1 | ||
215 | mac.l %a5, %d0, %acc2 | ||
216 | |||
217 | move.w %d4, (%a0) | ||
218 | /* LCD write is delayed one pixel to use it for filling the EMAC latency */ | ||
219 | |||
220 | /* convert to RGB565, pack and output */ | ||
221 | moveq.l #27, %d0 | ||
222 | movclr.l %acc0, %d1 | ||
223 | movclr.l %acc1, %d3 | ||
224 | movclr.l %acc2, %d4 | ||
225 | lsr.l %d0, %d1 | ||
226 | lsr.l %d0, %d4 | ||
227 | moveq.l #26, %d0 | ||
228 | lsr.l %d0, %d3 | ||
229 | lsl.l #6, %d4 | ||
230 | or.l %d3, %d4 | ||
231 | lsl.l #5, %d4 | ||
232 | or.l %d1, %d4 | ||
233 | eor.l %d7, %d4 | ||
234 | |||
163 | cmp.l %a1, %a4 /* run %a1 up to end of line */ | 235 | cmp.l %a1, %a4 /* run %a1 up to end of line */ |
164 | bhi.w .yuv_line_loop | 236 | bhi.w .yuv_line_loop |
165 | 237 | ||
166 | tst.l (44+4, %sp) /* use original Y pointer as a flag to */ | ||
167 | beq.b .yuv_exit /* distinguish between first and second */ | ||
168 | clr.l (44+4, %sp) /* pixel line */ | ||
169 | |||
170 | /* Rewind chroma pointers */ | ||
171 | movem.l (44+8, %sp), %a2-%a4 /* Cb data, Cr data, width */ | ||
172 | lea.l (%a1, %a4), %a4 /* end address */ | ||
173 | bra.w .yuv_line_loop | ||
174 | |||
175 | .yuv_exit: | ||
176 | move.w %d4, (%a0) /* write (very) last pixel */ | 238 | move.w %d4, (%a0) /* write (very) last pixel */ |
177 | 239 | ||
178 | movem.l (%sp), %d2-%d7/%a2-%a6 | 240 | movem.l (%sp), %d2-%d7/%a2-%a6 |
179 | lea.l (44, %sp), %sp /* restore registers */ | 241 | lea.l (44, %sp), %sp /* restore registers */ |
180 | |||
181 | rts | 242 | rts |
182 | .yuv_end: | 243 | .yuv_end: |
183 | .size lcd_write_yuv420_lines, .yuv_end - lcd_write_yuv420_lines | 244 | .size lcd_write_yuv420_lines, .yuv_end - lcd_write_yuv420_lines |