summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2010-05-31 19:56:21 +0000
committerJens Arnold <amiconn@rockbox.org>2010-05-31 19:56:21 +0000
commit85fd2d8be90ab3eb9f134180357725a60f988243 (patch)
tree0149ffdf7110b70d5077d566c0366906d861b03d
parentc5e14b5835114faae78997f7e0b14c19966b187d (diff)
downloadrockbox-85fd2d8be90ab3eb9f134180357725a60f988243.tar.gz
rockbox-85fd2d8be90ab3eb9f134180357725a60f988243.zip
Smaller & faster greylib blitting on iriver H1x0 and iAudio M5, based on the ARM version but using mulu.l for the bit shuffling. ISR speedup is ~10%.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@26434 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--firmware/target/coldfire/iaudio/m5/lcd-as-m5.S233
-rw-r--r--firmware/target/coldfire/iriver/h100/lcd-as-h100.S229
2 files changed, 212 insertions, 250 deletions
diff --git a/firmware/target/coldfire/iaudio/m5/lcd-as-m5.S b/firmware/target/coldfire/iaudio/m5/lcd-as-m5.S
index d42ee1c888..12d0c670e9 100644
--- a/firmware/target/coldfire/iaudio/m5/lcd-as-m5.S
+++ b/firmware/target/coldfire/iaudio/m5/lcd-as-m5.S
@@ -94,151 +94,132 @@ lcd_write_data:
94 * will occur. */ 94 * will occur. */
95 95
96lcd_grey_data: 96lcd_grey_data:
97 lea.l (-10*4, %sp), %sp 97 lea.l (-11*4, %sp), %sp
98 movem.l %d2-%d6/%a2-%a6, (%sp) /* free some registers */ 98 movem.l %d2-%d7/%a2-%a6, (%sp) /* free some registers */
99 movem.l (10*4+4, %sp), %a0-%a2 /* values, phases, length */ 99 movem.l (11*4+4, %sp), %a0-%a2 /* values, phases, length */
100 lea.l (%a1, %a2.l*4), %a2 /* end address */ 100 lea.l (%a1, %a2.l*4), %a2 /* end address */
101 lea 0xf0008002, %a3 /* LCD data port */ 101 lea 0xf0008002, %a3 /* LCD data port */
102 102 moveq.l #24, %d6 /* shift count */
103 moveq.l #15, %d3 103 move.l #0xc30c3, %d7 /* bit shuffle factor */
104 add.l %a1, %d3 104
105 and.l #0xfffffff0, %d3 /* first line bound */ 105 moveq.l #12, %d2
106 move.l %a2, %d1 106 add.l %a1, %d2
107 and.l #0xfffffff0, %d1 /* last line bound */ 107 and.l #0xfffffff0, %d2 /* first line bound */
108 cmp.l %d3, %d1 108 cmp.l %d2, %a2 /* end address lower than first line bound? */
109 bls.w .g_tloop /* no lines to copy - jump to tail loop */ 109 bhs.s 1f
110 cmp.l %a1, %d0 110 move.l %a2, %d2 /* -> adjust end address of head loop */
111 bls.s .g_lloop /* no head blocks - jump to line loop */ 1111:
112 cmp.l %a1, %d2
113 bls.s .g_hend
112 114
113.g_hloop: 115.g_hloop:
114 move.l (%a1), %d2 /* fetch 4 pixel phases */ 116 move.l (%a1), %d0 /* fetch 4 pixel phases */
115 117
116 bclr.l #31, %d2 /* Z = !(p0 & 0x80); p0 &= ~0x80; */ 118 move.l %d0, %d1
117 seq.b %d0 /* %d0 = ........................00000000 */ 119 and.l #0x80808080, %d1 /* separate MSBs of the 4 phases */
118 lsl.l #2, %d0 /* %d0 = ......................00000000.. */ 120 eor.l %d1, %d0 /* clear them in %d0 */
119 bclr.l #23, %d2 /* Z = !(p1 & 0x80); p1 &= ~0x80; */ 121 add.l (%a0)+, %d0 /* add 4 pixel values to the phases */
120 seq.b %d0 /* %d0 = ......................0011111111 */ 122 move.l %d0, (%a1)+ /* store new phases, advance pointer */
121 lsl.l #2, %d0 /* %d0 = ....................0011111111.. */ 123
122 bclr.l #15, %d2 /* Z = !(p2 & 0x80); p2 &= ~0x80; */ 124 lsr.l #1, %d1 /* %d1 = .0.......1.......2.......3...... */
123 seq.b %d0 /* %d0 = ....................001122222222 */ 125 mulu.l %d7, %d1 /* %d1 = 00112233112233..2233....33...... */
124 lsl.l #2, %d0 /* %d0 = ..................001122222222.. */ 126 not.l %d1 /* negate bits */
125 bclr.l #7, %d2 /* Z = !(p3 & 0x80); p3 &= ~0x80; */ 127 lsr.l %d6, %d1 /* %d1 = ........................00112233 */
126 seq.b %d0 /* %d0 = ..................00112233333333 */ 128 move.w %d1, (%a3) /* write pixel block */
127 lsr.l #6, %d0 /* %d0 = ........................00112233 */
128 move.w %d0, (%a3) /* write pixel block */
129
130 add.l (%a0)+, %d2 /* add 4 pixel values to the phases */
131 move.l %d2, (%a1)+ /* store new phases, advance pointer */
132 129
133 cmp.l %a1, %d3 /* go up to first line bound */ 130 cmp.l %a1, %d2 /* go up to first line bound */
134 bhi.s .g_hloop 131 bhi.s .g_hloop
132
133.g_hend:
134 cmp.l %a1, %a2
135 bls.w .g_tend
136 lea.l (-12, %a2), %a2
137 cmp.l %a1, %a2
138 bls.s .g_lend
135 139
136.g_lloop: 140.g_lloop:
137 movem.l (%a1), %d2-%d5 /* fetch 4 blocks of 4 pixel phases each */ 141 movem.l (%a1), %d0-%d3 /* fetch 4 blocks of 4 pixel phases each */
138 142
139 bclr.l #31, %d2 /* calculate first pixel block */ 143 move.l %d0, %d4 /* calculate first pixel block */
140 seq.b %d0 144 and.l #0x80808080, %d4
141 lsl.l #2, %d0 145 eor.l %d4, %d0
142 bclr.l #23, %d2 146 lsr.l #1, %d4
143 seq.b %d0 147 mulu.l %d7, %d4
144 lsl.l #2, %d0 148 not.l %d4
145 bclr.l #15, %d2 149 lsr.l %d6, %d4
146 seq.b %d0 150
147 lsl.l #2, %d0 151 move.w %d4, (%a3) /* write first pixel block to LCD */
148 bclr.l #7, %d2 152
149 seq.b %d0 153 move.l %d1, %d5 /* calculate second pixel block */
150 lsr.l #6, %d0 154 and.l #0x80808080, %d5
151 155 eor.l %d5, %d1
152 move.w %d0, (%a3) /* write first block to LCD */ 156 lsr.l #1, %d5
153 157 mulu.l %d7, %d5
154 bclr.l #31, %d3 /* calculate second pixel block */ 158 not.l %d5
155 seq.b %d6 159 lsr.l %d6, %d5
156 lsl.l #2, %d6 160
157 bclr.l #23, %d3 161 move.l %d2, %d4 /* calculate third pixel block */
158 seq.b %d6 162 and.l #0x80808080, %d4
159 lsl.l #2, %d6 163 eor.l %d4, %d2
160 bclr.l #15, %d3 164 lsr.l #1, %d4
161 seq.b %d6 165 mulu.l %d7, %d4
162 lsl.l #2, %d6 166 not.l %d4
163 bclr.l #7, %d3 167 lsr.l %d6, %d4
164 seq.b %d6 168
165 lsr.l #6, %d6 169 move.w %d5, (%a3) /* write second pixel block to LCD */
166 170
167 bclr.l #31, %d4 /* calculate third pixel block */ 171 movem.l (%a0), %d5/%a4-%a6 /* fetch 4 blocks of 4 pixel values each */
168 seq.b %d0
169 lsl.l #2, %d0
170 bclr.l #23, %d4
171 seq.b %d0
172 lsl.l #2, %d0
173 bclr.l #15, %d4
174 seq.b %d0
175 lsl.l #2, %d0
176 bclr.l #7, %d4
177 seq.b %d0
178 lsr.l #6, %d0
179
180 move.w %d6, (%a3) /* write second block to LCD */
181
182 movem.l (%a0), %d6/%a4-%a6 /* fetch 4 blocks of 4 pixel values each */
183 lea.l (16, %a0), %a0 172 lea.l (16, %a0), %a0
184 173
185 move.w %d0, (%a3) /* write third block to LCD */ 174 move.w %d4, (%a3) /* write third pixel block to LCD */
186 175
187 bclr.l #31, %d5 /* calculate fourth pixel block */ 176 move.l %d3, %d4 /* calculate fourth pixel block */
188 seq.b %d0 177 and.l #0x80808080, %d4
189 lsl.l #2, %d0 178 eor.l %d4, %d3
190 bclr.l #23, %d5 179 lsr.l #1, %d4
191 seq.b %d0 180 mulu.l %d7, %d4
192 lsl.l #2, %d0 181 not.l %d4
193 bclr.l #15, %d5 182 lsr.l %d6, %d4
194 seq.b %d0 183
195 lsl.l #2, %d0 184 add.l %d5, %d0 /* calculate 4*4 new pixel phases */
196 bclr.l #7, %d5 185 add.l %a4, %d1 /* (packed addition) */
197 seq.b %d0 186 add.l %a5, %d2
198 lsr.l #6, %d0 187 add.l %a6, %d3
199 188
200 add.l %d6, %d2 /* calculate 4*4 new pixel phases */ 189 movem.l %d0-%d3, (%a1) /* store 4*4 new pixel phases */
201 add.l %a4, %d3 /* (packed addition) */
202 add.l %a5, %d4
203 add.l %a6, %d5
204
205 movem.l %d2-%d5, (%a1) /* store 4*4 new pixel phases */
206 lea.l (16, %a1), %a1 190 lea.l (16, %a1), %a1
207 191
208 move.w %d0, (%a3) /* write fourth block to LCD */ 192 move.w %d4, (%a3) /* write fourth pixel block to LCD */
209
210 cmp.l %a1, %d1 /* go up to last line bound */
211 bhi.w .g_lloop
212 193
194 cmp.l %a1, %a2 /* go up to last line bound */
195 bhi.s .g_lloop
196
197.g_lend:
198 lea.l (12, %a2), %a2
213 cmp.l %a1, %a2 199 cmp.l %a1, %a2
214 bls.s .g_no_tail 200 bls.s .g_tend
215 201
216.g_tloop: 202.g_tloop:
217 move.l (%a1), %d2 203 move.l (%a1), %d0 /* fetch 4 pixel phases */
218
219 bclr.l #31, %d2
220 seq.b %d0
221 lsl.l #2, %d0
222 bclr.l #23, %d2
223 seq.b %d0
224 lsl.l #2, %d0
225 bclr.l #15, %d2
226 seq.b %d0
227 lsl.l #2, %d0
228 bclr.l #7, %d2
229 seq.b %d0
230 lsr.l #6, %d0
231 move.w %d0, (%a3)
232
233 add.l (%a0)+, %d2 /* go up to end address */
234 move.l %d2, (%a1)+
235 204
236 cmp.l %a1, %a2 205 move.l %d0, %d1
206 and.l #0x80808080, %d1
207 eor.l %d1, %d0
208 add.l (%a0)+, %d0 /* add 4 pixel values to the phases */
209 move.l %d0, (%a1)+ /* store new phases, advance pointer */
210
211 lsr.l #1, %d1
212 mulu.l %d7, %d1
213 not.l %d1
214 lsr.l %d6, %d1
215 move.w %d1, (%a3) /* write pixel block */
216
217 cmp.l %a1, %a2 /* go up to end address */
237 bhi.s .g_tloop 218 bhi.s .g_tloop
238 219
239.g_no_tail: 220.g_tend:
240 movem.l (%sp), %d2-%d6/%a2-%a6 /* restore registers */ 221 movem.l (%sp), %d2-%d7/%a2-%a6 /* restore registers */
241 lea.l (10*4, %sp), %sp 222 lea.l (11*4, %sp), %sp
242 rts 223 rts
243 224
244.gd_end: 225.gd_end:
diff --git a/firmware/target/coldfire/iriver/h100/lcd-as-h100.S b/firmware/target/coldfire/iriver/h100/lcd-as-h100.S
index 9ebb5752aa..b13d5146b9 100644
--- a/firmware/target/coldfire/iriver/h100/lcd-as-h100.S
+++ b/firmware/target/coldfire/iriver/h100/lcd-as-h100.S
@@ -106,153 +106,134 @@ lcd_write_data:
106 * will occur. */ 106 * will occur. */
107 107
108lcd_grey_data: 108lcd_grey_data:
109 lea.l (-10*4, %sp), %sp 109 lea.l (-11*4, %sp), %sp
110 movem.l %d2-%d6/%a2-%a6, (%sp) /* free some registers */ 110 movem.l %d2-%d7/%a2-%a6, (%sp) /* free some registers */
111 movem.l (10*4+4, %sp), %a0-%a2 /* values, phases, length */ 111 movem.l (11*4+4, %sp), %a0-%a2 /* values, phases, length */
112 lea.l (%a1, %a2.l*4), %a2 /* end address */ 112 lea.l (%a1, %a2.l*4), %a2 /* end address */
113 moveq #8, %d1 113 moveq #8, %d1
114 or.l %d1, (MBAR2+0xb4) /* A0 = 1 (data) */ 114 or.l %d1, (MBAR2+0xb4) /* A0 = 1 (data) */
115 lea 0xf0000000, %a3 /* LCD data port */ 115 lea 0xf0000000, %a3 /* LCD data port */
116 116 moveq.l #24, %d6 /* shift count */
117 moveq.l #15, %d3 117 move.l #0xc30c3, %d7 /* bit shuffle factor */
118 add.l %a1, %d3 118
119 and.l #0xfffffff0, %d3 /* first line bound */ 119 moveq.l #12, %d2
120 move.l %a2, %d1 120 add.l %a1, %d2
121 and.l #0xfffffff0, %d1 /* last line bound */ 121 and.l #0xfffffff0, %d2 /* first line bound */
122 cmp.l %d3, %d1 122 cmp.l %d2, %a2 /* end address lower than first line bound? */
123 bls.w .g_tloop /* no lines to copy - jump to tail loop */ 123 bhs.s 1f
124 cmp.l %a1, %d0 124 move.l %a2, %d2 /* -> adjust end address of head loop */
125 bls.s .g_lloop /* no head blocks - jump to line loop */ 1251:
126 cmp.l %a1, %d2
127 bls.s .g_hend
126 128
127.g_hloop: 129.g_hloop:
128 move.l (%a1), %d2 /* fetch 4 pixel phases */ 130 move.l (%a1), %d0 /* fetch 4 pixel phases */
129 131
130 bclr.l #31, %d2 /* Z = !(p0 & 0x80); p0 &= ~0x80; */ 132 move.l %d0, %d1
131 seq.b %d0 /* %d0 = ........................00000000 */ 133 and.l #0x80808080, %d1 /* separate MSBs of the 4 phases */
132 lsl.l #2, %d0 /* %d0 = ......................00000000.. */ 134 eor.l %d1, %d0 /* clear them in %d0 */
133 bclr.l #23, %d2 /* Z = !(p1 & 0x80); p1 &= ~0x80; */ 135 add.l (%a0)+, %d0 /* add 4 pixel values to the phases */
134 seq.b %d0 /* %d0 = ......................0011111111 */ 136 move.l %d0, (%a1)+ /* store new phases, advance pointer */
135 lsl.l #2, %d0 /* %d0 = ....................0011111111.. */
136 bclr.l #15, %d2 /* Z = !(p2 & 0x80); p2 &= ~0x80; */
137 seq.b %d0 /* %d0 = ....................001122222222 */
138 lsl.l #2, %d0 /* %d0 = ..................001122222222.. */
139 bclr.l #7, %d2 /* Z = !(p3 & 0x80); p3 &= ~0x80; */
140 seq.b %d0 /* %d0 = ..................00112233333333 */
141 lsr.l #6, %d0 /* %d0 = ........................00112233 */
142 move.w %d0, (%a3) /* write pixel block */
143
144 add.l (%a0)+, %d2 /* add 4 pixel values to the phases */
145 move.l %d2, (%a1)+ /* store new phases, advance pointer */
146 137
147 cmp.l %a1, %d3 /* go up to first line bound */ 138 lsr.l #1, %d1 /* %d1 = .0.......1.......2.......3...... */
139 mulu.l %d7, %d1 /* %d1 = 00112233112233..2233....33...... */
140 not.l %d1 /* negate bits */
141 lsr.l %d6, %d1 /* %d1 = ........................00112233 */
142 move.w %d1, (%a3) /* write pixel block */
143
144 cmp.l %a1, %d2 /* go up to first line bound */
148 bhi.s .g_hloop 145 bhi.s .g_hloop
146
147.g_hend:
148 cmp.l %a1, %a2
149 bls.w .g_tend
150 lea.l (-12, %a2), %a2
151 cmp.l %a1, %a2
152 bls.s .g_lend
149 153
150.g_lloop: 154.g_lloop:
151 movem.l (%a1), %d2-%d5 /* fetch 4 blocks of 4 pixel phases each */ 155 movem.l (%a1), %d0-%d3 /* fetch 4 blocks of 4 pixel phases each */
152 156
153 bclr.l #31, %d2 /* calculate first pixel block */ 157 move.l %d0, %d4 /* calculate first pixel block */
154 seq.b %d0 158 and.l #0x80808080, %d4
155 lsl.l #2, %d0 159 eor.l %d4, %d0
156 bclr.l #23, %d2 160 lsr.l #1, %d4
157 seq.b %d0 161 mulu.l %d7, %d4
158 lsl.l #2, %d0 162 not.l %d4
159 bclr.l #15, %d2 163 lsr.l %d6, %d4
160 seq.b %d0 164
161 lsl.l #2, %d0 165 move.w %d4, (%a3) /* write first pixel block to LCD */
162 bclr.l #7, %d2 166
163 seq.b %d0 167 move.l %d1, %d5 /* calculate second pixel block */
164 lsr.l #6, %d0 168 and.l #0x80808080, %d5
165 169 eor.l %d5, %d1
166 move.w %d0, (%a3) /* write first block to LCD */ 170 lsr.l #1, %d5
167 171 mulu.l %d7, %d5
168 bclr.l #31, %d3 /* calculate second pixel block */ 172 not.l %d5
169 seq.b %d6 173 lsr.l %d6, %d5
170 lsl.l #2, %d6 174
171 bclr.l #23, %d3 175 move.l %d2, %d4 /* calculate third pixel block */
172 seq.b %d6 176 and.l #0x80808080, %d4
173 lsl.l #2, %d6 177 eor.l %d4, %d2
174 bclr.l #15, %d3 178 lsr.l #1, %d4
175 seq.b %d6 179 mulu.l %d7, %d4
176 lsl.l #2, %d6 180 not.l %d4
177 bclr.l #7, %d3 181 lsr.l %d6, %d4
178 seq.b %d6 182
179 lsr.l #6, %d6 183 move.w %d5, (%a3) /* write second pixel block to LCD */
180 184
181 bclr.l #31, %d4 /* calculate third pixel block */ 185 movem.l (%a0), %d5/%a4-%a6 /* fetch 4 blocks of 4 pixel values each */
182 seq.b %d0
183 lsl.l #2, %d0
184 bclr.l #23, %d4
185 seq.b %d0
186 lsl.l #2, %d0
187 bclr.l #15, %d4
188 seq.b %d0
189 lsl.l #2, %d0
190 bclr.l #7, %d4
191 seq.b %d0
192 lsr.l #6, %d0
193
194 move.w %d6, (%a3) /* write second block to LCD */
195
196 movem.l (%a0), %d6/%a4-%a6 /* fetch 4 blocks of 4 pixel values each */
197 lea.l (16, %a0), %a0 186 lea.l (16, %a0), %a0
198 187
199 move.w %d0, (%a3) /* write third block to LCD */ 188 move.w %d4, (%a3) /* write third pixel block to LCD */
200 189
201 bclr.l #31, %d5 /* calculate fourth pixel block */ 190 move.l %d3, %d4 /* calculate fourth pixel block */
202 seq.b %d0 191 and.l #0x80808080, %d4
203 lsl.l #2, %d0 192 eor.l %d4, %d3
204 bclr.l #23, %d5 193 lsr.l #1, %d4
205 seq.b %d0 194 mulu.l %d7, %d4
206 lsl.l #2, %d0 195 not.l %d4
207 bclr.l #15, %d5 196 lsr.l %d6, %d4
208 seq.b %d0 197
209 lsl.l #2, %d0 198 add.l %d5, %d0 /* calculate 4*4 new pixel phases */
210 bclr.l #7, %d5 199 add.l %a4, %d1 /* (packed addition) */
211 seq.b %d0 200 add.l %a5, %d2
212 lsr.l #6, %d0 201 add.l %a6, %d3
213 202
214 add.l %d6, %d2 /* calculate 4*4 new pixel phases */ 203 movem.l %d0-%d3, (%a1) /* store 4*4 new pixel phases */
215 add.l %a4, %d3 /* (packed addition) */
216 add.l %a5, %d4
217 add.l %a6, %d5
218
219 movem.l %d2-%d5, (%a1) /* store 4*4 new pixel phases */
220 lea.l (16, %a1), %a1 204 lea.l (16, %a1), %a1
221 205
222 move.w %d0, (%a3) /* write fourth block to LCD */ 206 move.w %d4, (%a3) /* write fourth pixel block to LCD */
223 207
224 cmp.l %a1, %d1 /* go up to last line bound */ 208 cmp.l %a1, %a2 /* go up to last line bound */
225 bhi.w .g_lloop 209 bhi.s .g_lloop
226 210
211.g_lend:
212 lea.l (12, %a2), %a2
227 cmp.l %a1, %a2 213 cmp.l %a1, %a2
228 bls.s .g_no_tail 214 bls.s .g_tend
229 215
230.g_tloop: 216.g_tloop:
231 move.l (%a1), %d2 217 move.l (%a1), %d0 /* fetch 4 pixel phases */
232 218
233 bclr.l #31, %d2 219 move.l %d0, %d1
234 seq.b %d0 220 and.l #0x80808080, %d1
235 lsl.l #2, %d0 221 eor.l %d1, %d0
236 bclr.l #23, %d2 222 add.l (%a0)+, %d0 /* add 4 pixel values to the phases */
237 seq.b %d0 223 move.l %d0, (%a1)+ /* store new phases, advance pointer */
238 lsl.l #2, %d0 224
239 bclr.l #15, %d2 225 lsr.l #1, %d1
240 seq.b %d0 226 mulu.l %d7, %d1
241 lsl.l #2, %d0 227 not.l %d1
242 bclr.l #7, %d2 228 lsr.l %d6, %d1
243 seq.b %d0 229 move.w %d1, (%a3) /* write pixel block */
244 lsr.l #6, %d0
245 move.w %d0, (%a3)
246
247 add.l (%a0)+, %d2
248 move.l %d2, (%a1)+
249 230
250 cmp.l %a1, %a2 /* go up to end address */ 231 cmp.l %a1, %a2 /* go up to end address */
251 bhi.s .g_tloop 232 bhi.s .g_tloop
252 233
253.g_no_tail: 234.g_tend:
254 movem.l (%sp), %d2-%d6/%a2-%a6 /* restore registers */ 235 movem.l (%sp), %d2-%d7/%a2-%a6 /* restore registers */
255 lea.l (10*4, %sp), %sp 236 lea.l (11*4, %sp), %sp
256 rts 237 rts
257 238
258.gd_end: 239.gd_end: