summaryrefslogtreecommitdiff
path: root/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S
diff options
context:
space:
mode:
Diffstat (limited to 'firmware/target/coldfire/iaudio/x5/lcd-as-x5.S')
-rw-r--r--firmware/target/coldfire/iaudio/x5/lcd-as-x5.S388
1 files changed, 143 insertions, 245 deletions
diff --git a/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S b/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S
index 6d5d324ebf..11150203af 100644
--- a/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S
+++ b/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S
@@ -40,260 +40,158 @@
40 * |G| = |1.000000 -0.334136 -0.714136| |Pb| 40 * |G| = |1.000000 -0.334136 -0.714136| |Pb|
41 * |B| |1.000000 1.772000 0.000000| |Pr| 41 * |B| |1.000000 1.772000 0.000000| |Pr|
42 * Scaled, normalized, rounded and tweaked to yield RGB 666: 42 * Scaled, normalized, rounded and tweaked to yield RGB 666:
43 * |R| |74 0 101| |Y' - 16| / 256 43 * |R| |19611723 0 26881894| |Y' - 16| >> 26
44 * |G| = |74 -24 -51| |Cb - 128| / 256 44 * |G| = |19611723 -6406711 -13692816| |Cb - 128| >> 26
45 * |B| |74 128 0| |Cr - 128| / 256 45 * |B| |19611723 33976259 0| |Cr - 128| >> 26
46 *
47 * Needs EMAC set to saturated, signed integer mode.
46 */ 48 */
47 .align 2 49 .align 2
48 .global lcd_write_yuv420_lines 50 .global lcd_write_yuv420_lines
49 .type lcd_write_yuv420_lines,@function 51 .type lcd_write_yuv420_lines, @function
52
50lcd_write_yuv420_lines: 53lcd_write_yuv420_lines:
51 lea.l (-36,%sp),%sp /* free up some registers */ 54 lea.l (-44, %sp), %sp /* free up some registers */
52 movem.l %d2-%d6/%a2-%a5,(%sp) 55 movem.l %d2-%d7/%a2-%a6, (%sp)
53 56
54 lea.l 0xf0008002,%a0 /* LCD data port */ 57 lea.l 0xf0008002, %a0 /* LCD data port */
55 movem.l (36+4,%sp),%a1-%a5 /* Y data, Cb data, guv storage, Cr data, width */ 58 movem.l (44+4, %sp), %a1-%a4 /* Y data, Cb data, Cr data, width */
56 lea.l (%a1,%a5),%a5 /* end address */ 59 lea.l (%a1, %a4), %a4 /* end address */
57 60
58.yuv_line_loop1: 61 move.l #19611723, %a5 /* y factor */
59 /** Write first pixel **/ 62 move.l #33976259, %a6 /* bu factor */
60 clr.l %d1 /* get bu component */ 63 move.l #-6406711, %d5 /* gu factor */
61 move.b (%a2),%d1 64 move.l #-13692816, %d6 /* gv factor */
62 clr.l %d3 /* get rv component */ 65 move.l #0x01040820, %d7 /* bitmask for signed->unsigned conversion
63 move.b (%a4),%d3 66 * of R, G and B within RGGB6666 at once */
64 moveq.l #-128,%d0 67
65 add.l %d0,%d1 68 /* chroma for (very) first & second pixel */
66 add.l %d0,%d3 69 clr.l %d2 /* load u component */
67 70 move.b (%a2)+, %d2
68 move.l %d1,%d2 /* %d2 = cb component for guv */ 71 clr.l %d3 /* load v component */
69 asr.l #1,%d1 /* %d1 = 128 * (Cb - 128) / 256 */ 72 move.b (%a3)+, %d3
70 move.b %d1,(%a2)+ /* save bu for next line */ 73 moveq.l #-128, %d0
71 moveq.l #-24,%d0 /* multiply first term of guv */ 74 add.l %d0, %d2
72 muls.w %d0,%d2 75 add.l %d0, %d3
73 moveq.l #-51,%d0 /* multiply second term of guv */ 76
74 muls.w %d3,%d0 77 mac.l %a6, %d2, %acc0 /* bu */
75 add.l %d0,%d2 78 mac.l %d5, %d2, %acc1 /* gu */
76 asr.l #8,%d2 79 mac.l %d6, %d3, %acc1 /* gv */
77 move.b %d2,(%a3)+ /* save guv for next line */ 80 move.l #26881894, %d0 /* rv factor */
78 moveq.l #101,%d0 81 mac.l %d0, %d3, %acc2 /* rv */
79 muls.w %d0,%d3 82
80 asr.l #8,%d3 83 /* luma for (very) first pixel */
81 move.b %d3,(%a4)+ /* save rv for next line */ 84 clr.l %d1
82 85 move.b (%a1)+, %d1
83 clr.l %d4 /* get y component */ 86 moveq.l #-126, %d0
84 move.b (%a1)+,%d4 87 add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
85 moveq.l #74,%d0 88 mac.l %a5, %d0, %acc0
86 muls.w %d0,%d4 89 mac.l %a5, %d0, %acc1
87 asr.l #8,%d4 90 mac.l %a5, %d0, %acc2
88 subq.l #4,%d4 91
89 move.l %d4,%d5 92 bra.b .yuv_line_entry
90 move.l %d4,%d6 93
91 /* : %d4,%d5,%d6 = Y, %d1 = bu, %d2 = guv, %d3 = rv */ 94.yuv_line_loop:
92 95 /* chroma for first & second pixel */
93 add.l %d3,%d4 /* get r */ 96 clr.l %d2 /* load u component */
94 add.l %d2,%d5 /* get g */ 97 move.b (%a2)+, %d2
95 add.l %d1,%d6 /* get b */ 98 clr.l %d3 /* load v component */
96 99 move.b (%a3)+, %d3
97 move.l %d6,%d0 /* is clamping needed? */ 100 moveq.l #-128, %d0
98 or.l %d5,%d0 101 add.l %d0, %d2
99 or.l %d4,%d0 102 add.l %d0, %d3
100 asr.l #6,%d0 103
101 beq.b .yuv_no_clamp1 /* values in range: skip clamping */ 104 mac.l %a6, %d2, %acc0 /* bu */
102 moveq.l #63, %d0 105 mac.l %d5, %d2, %acc1 /* gu */
103 cmp.l %d0, %d4 106 mac.l %d6, %d3, %acc1 /* gv */
104 bls.s .yuv_red_ok1 107 move.l #26881894, %d0 /* rv factor */
105 spl.b %d4 108 mac.l %d0, %d3, %acc2 /* rv */
106 and.l %d0, %d4 109
107.yuv_red_ok1: 110 /* luma for first pixel */
108 cmp.l %d0, %d5 111 clr.l %d1
109 bls.s .yuv_green_ok1 112 move.b (%a1)+, %d1
110 spl.b %d5 113 moveq.l #-126, %d0
111 and.l %d0, %d5 114 add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
112.yuv_green_ok1: 115 mac.l %a5, %d0, %acc0
113 cmp.l %d0, %d6 116 mac.l %a5, %d0, %acc1
114 bls.s .yuv_blue_ok1 117 mac.l %a5, %d0, %acc2
115 spl.b %d6 118
116 and.l %d0, %d6 119 move.w %d4, (%a0)
117.yuv_blue_ok1: 120 /* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */
118.yuv_no_clamp1: 121
119 /* : %d4 = R, %d5 = G, %d6 = B */ 122 /* convert to RGB666, pack and output */
120 123.yuv_line_entry:
121 move.l %d5,%d0 /* save g for lower 9 bits */ 124 moveq.l #26, %d0
122 lsl.l #3,%d4 /* R << 3 */ 125 move.l %acc0, %d4
123 lsr.l #3,%d0 /* G >> 3 */ 126 move.l %acc1, %d3
124 or.l %d4,%d0 127 move.l %acc2, %d2
125 move.w %d0,(%a0) /* |00000000|000000000|0000000r|rrrrrggg| */ 128 lsr.l %d0, %d4
126 lsl.l #6,%d5 /* B << 6 */ 129 lsr.l %d0, %d3
127 or.l %d5,%d6 /* |00000000|000000000|0000gggg|ggbbbbbb| */ 130 lsr.l %d0, %d2
128 move.w %d6,(%a0) 131
129 132 lsl.l #6, %d2
130 /** Write second pixel **/ 133 or.l %d3, %d2 /* |00000000|00000000|0000Rrrr|rrGggggg| */
131 clr.l %d4 134 lsl.l #7, %d2
132 move.b (%a1)+,%d4 /* get y component */ 135 or.l %d2, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */
133 moveq.l #74,%d0 136 lsl.l #6, %d3
134 muls.w %d0,%d4 137 or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */
135 asr.l #8,%d4 138 eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */
136 subq.l #4,%d4 139 swap %d4
137 /* : %d4 = Y, %d1 = bu, %d2 = guv, %d3 = rv */ 140 move.w %d4, (%a0)
138 141 swap %d4
139 /* Add Y + each chroma component (can clobber %d1-%d3 values now) */ 142
140 add.l %d4,%d3 /* get r */ 143 /* luma for second pixel as delta from the first */
141 add.l %d4,%d2 /* get g */ 144 clr.l %d0
142 add.l %d4,%d1 /* get b */ 145 move.b (%a1)+, %d0
143 146 sub.l %d1, %d0
144 move.l %d1,%d0 /* is clamping needed? */ 147 mac.l %a5, %d0, %acc0
145 or.l %d2,%d0 148 mac.l %a5, %d0, %acc1
146 or.l %d3,%d0 149 mac.l %a5, %d0, %acc2
147 asr.l #6,%d0 150
148 beq.b .yuv_no_clamp2 /* values in range: skip clamping */ 151 move.w %d4, (%a0)
149 moveq.l #63, %d0 152 /* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */
150 cmp.l %d0, %d3 153
151 bls.s .yuv_red_ok2 154 /* convert to RGB666, pack and output */
152 spl.b %d3 155 moveq.l #26, %d0
153 and.l %d0, %d3 156 movclr.l %acc0, %d4
154.yuv_red_ok2: 157 movclr.l %acc1, %d3
155 cmp.l %d0, %d2 158 movclr.l %acc2, %d2
156 bls.s .yuv_green_ok2 159 lsr.l %d0, %d4
157 spl.b %d2 160 lsr.l %d0, %d3
158 and.l %d0, %d2 161 lsr.l %d0, %d2
159.yuv_green_ok2: 162
160 cmp.l %d0, %d1 163 lsl.l #6, %d2
161 bls.s .yuv_blue_ok2 164 or.l %d3, %d2 /* |00000000|00000000|0000Rrrr|rrGggggg| */
162 spl.b %d1 165 lsl.l #7, %d2
163 and.l %d0, %d1 166 or.l %d2, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */
164.yuv_blue_ok2: 167 lsl.l #6, %d3
165.yuv_no_clamp2: 168 or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */
166 /* : %d3 = R, %d2 = G, %d1 = B */ 169 eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */
167 170 swap %d4
168 move.l %d2,%d0 /* save g for lower 9 bits */ 171 move.w %d4, (%a0)
169 lsl.l #3,%d3 /* R << 3 */ 172 swap %d4
170 lsr.l #3,%d0 /* G >> 3 */ 173
171 or.l %d3,%d0 /* |00000000|000000000|0000000r|rrrrrggg| */ 174 cmp.l %a1, %a4 /* run %a1 up to end of line */
172 move.w %d0,(%a0) 175 bhi.w .yuv_line_loop
173 lsl.l #6,%d2 /* G << 6 */ 176
174 or.l %d2,%d1 /* |00000000|000000000|0000gggg|ggbbbbbb| */ 177 tst.l (44+4, %sp) /* use original Y pointer as a flag to */
175 move.w %d1,(%a0) 178 beq.b .yuv_exit /* distinguish between first and second */
176 179 clr.l (44+4, %sp) /* pixel line */
177 cmp.l %a1,%a5 /* run %a1 up to end of line */
178 bhi.w .yuv_line_loop1
179 180
180 /* Rewind chroma pointers */ 181 /* Rewind chroma pointers */
181 movem.l (36+8, %sp), %a2-%a5 /* bu data, guv data, rv data, width */ 182 movem.l (44+8, %sp), %a2-%a4 /* Cb data, Cr data, width */
182 lea.l (%a1, %a5), %a5 /* next end address */ 183 lea.l (%a1, %a4), %a4 /* end address */
183 184 bra.w .yuv_line_loop
184.yuv_line_loop2: 185
185 move.b (%a2)+,%d1 /* read save chromas and sign extend */ 186.yuv_exit:
186 extb.l %d1 187 move.w %d4, (%a0) /* write (very) last 2nd word */
187 move.b (%a3)+,%d2
188 extb.l %d2
189 move.b (%a4)+,%d3
190 extb.l %d3
191
192 clr.l %d4
193 move.b (%a1)+,%d4 /* get y component */
194 moveq.l #74,%d0
195 muls.w %d0,%d4
196 asr.l #8,%d4
197 subq.l #4,%d4
198 move.l %d4,%d5
199 move.l %d4,%d6
200 /* : %d4,%d5,%d6 = Y, %d1 = bu, %d2 = guv, %d3 = rv */
201
202 add.l %d3,%d4 /* get r */
203 add.l %d2,%d5 /* get g */
204 add.l %d1,%d6 /* get b */
205
206 move.l %d6,%d0 /* is clamping needed? */
207 or.l %d5,%d0
208 or.l %d4,%d0
209 asr.l #6,%d0
210 beq.b .yuv_no_clamp3 /* values in range: skip clamping */
211 moveq.l #63, %d0
212 cmp.l %d0, %d4
213 bls.s .yuv_red_ok3
214 spl.b %d4
215 and.l %d0, %d4
216.yuv_red_ok3:
217 cmp.l %d0, %d5
218 bls.s .yuv_green_ok3
219 spl.b %d5
220 and.l %d0, %d5
221.yuv_green_ok3:
222 cmp.l %d0, %d6
223 bls.s .yuv_blue_ok3
224 spl.b %d6
225 and.l %d0, %d6
226.yuv_blue_ok3:
227.yuv_no_clamp3:
228 /* : %d4 = R, %d5 = G, %d6 = B */
229
230 move.l %d5,%d0 /* save g for lower 9 bits */
231 lsl.l #3,%d4 /* R << 3 */
232 lsr.l #3,%d0 /* G >> 3 */
233 or.l %d4,%d0
234 move.w %d0,(%a0) /* |00000000|000000000|0000000r|rrrrrggg| */
235 lsl.l #6,%d5 /* B << 6 */
236 or.l %d5,%d6 /* |00000000|000000000|0000gggg|ggbbbbbb| */
237 move.w %d6,(%a0)
238
239 /** Write second pixel **/
240 clr.l %d4
241 move.b (%a1)+,%d4 /* get y component */
242 moveq.l #74,%d0
243 muls.w %d0,%d4
244 asr.l #8,%d4
245 subq.l #4,%d4
246 /* : %d4 = Y, %d1 = bu, %d2 = guv, %d3 = rv */
247
248 /* Add Y + each chroma component (can clobber %d1-%d3 values now) */
249 add.l %d4,%d3 /* get r */
250 add.l %d4,%d2 /* get g */
251 add.l %d4,%d1 /* get b */
252
253 move.l %d1,%d0 /* is clamping needed? */
254 or.l %d2,%d0
255 or.l %d3,%d0
256 asr.l #6,%d0
257 beq.b .yuv_no_clamp4 /* values in range: skip clamping */
258 moveq.l #63, %d0
259 cmp.l %d0, %d3
260 bls.s .yuv_red_ok4
261 spl.b %d3
262 and.l %d0, %d3
263.yuv_red_ok4:
264 cmp.l %d0, %d2
265 bls.s .yuv_green_ok4
266 spl.b %d2
267 and.l %d0, %d2
268.yuv_green_ok4:
269 cmp.l %d0, %d1
270 bls.s .yuv_blue_ok4
271 spl.b %d1
272 and.l %d0, %d1
273.yuv_blue_ok4:
274.yuv_no_clamp4:
275 /* : %d3 = R, %d2 = G, %d1 = B */
276
277 move.l %d2,%d0 /* save g for lower 9 bits */
278 lsl.l #3,%d3 /* R << 3 */
279 lsr.l #3,%d0 /* G >> 3 */
280 or.l %d3,%d0 /* |00000000|000000000|0000000r|rrrrrggg| */
281 move.w %d0,(%a0)
282 lsl.l #6,%d2 /* G << 6 */
283 or.l %d2,%d1 /* |00000000|000000000|0000gggg|ggbbbbbb| */
284 move.w %d1,(%a0)
285
286 cmp.l %a1,%a5 /* run %a0 up to end of line */
287 bhi.w .yuv_line_loop2
288
289 movem.l (%sp),%d2-%d6/%a2-%a5
290 lea.l (36,%sp),%sp /* restore registers */
291 188
292 rts 189 movem.l (%sp), %d2-%d7/%a2-%a6
190 lea.l (44, %sp), %sp /* restore registers */
293 191
192 rts
294.yuv_end: 193.yuv_end:
295 .size lcd_write_yuv420_lines,.yuv_end-lcd_write_yuv420_lines 194 .size lcd_write_yuv420_lines, yuv_end - lcd_write_yuv420_lines
296/* end lcd_write_yuv420_lines */
297 195
298 196
299/* begin lcd_write_data */ 197/* begin lcd_write_data */