summaryrefslogtreecommitdiff
path: root/firmware/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'firmware/drivers')
-rw-r--r--firmware/drivers/ata.c59
1 files changed, 39 insertions, 20 deletions
diff --git a/firmware/drivers/ata.c b/firmware/drivers/ata.c
index e114718bb4..284c599ef8 100644
--- a/firmware/drivers/ata.c
+++ b/firmware/drivers/ata.c
@@ -181,16 +181,23 @@ static void copy_read_sectors(unsigned char* buf, int wordcount)
181 *buf++ = tmp >> 8; /* and don't use the SWAB16 macro */ 181 *buf++ = tmp >> 8; /* and don't use the SWAB16 macro */
182 } while (buf < bufend); /* tail loop is faster */ 182 } while (buf < bufend); /* tail loop is faster */
183#else 183#else
184 asm ( /* I can bring it down to 7 instructions/loop */ 184 /* I can bring it down to 7 instructions/loop, and exploit pipeline */
185 "mov #1, r0 \n" 185 asm (
186 "mov #1, r0 \n" /* r0 = 1; */
187 /* correct for the "early increment" below */
188 "add #-2,%2 \n" /* buf -= 2; */
189 "add #-2,%3 \n" /* bufend -= 2; */
186 "loop_b: \n" 190 "loop_b: \n"
187 "mov.w @%1,%0 \n" 191 "mov.w @%1,%0 \n" /* tmp = ATA_DATA; */
188 "mov.b %0,@%2 \n" 192 /* Now we're reading from the bus, I do something independent we
189 "shlr8 %0 \n" 193 need later, to avoid pipeline stall */
190 "mov.b %0,@(r0,%2) \n" 194 "add #0x02,%2 \n" /* buf += 2; */
191 "add #0x02,%2 \n" 195 "cmp/hs %3,%2 \n" /* if (buf < bufend) */
192 "cmp/hs %3,%2 \n" 196 /* now use the read result */
193 "bf loop_b \n" 197 "mov.b %0,@%2 \n" /* buf[0] = lowbyte(tmp); */
198 "shlr8 %0 \n" /* tmp >>= 8; */
199 "mov.b %0,@(r0,%2) \n" /* buf[r0] = lowbyte(tmp); */
200 "bf loop_b \n" /* goto loop_b; */
194 : /* outputs */ 201 : /* outputs */
195 : /* inputs */ 202 : /* inputs */
196 /* %0 */ "r"(tmp), 203 /* %0 */ "r"(tmp),
@@ -212,18 +219,30 @@ static void copy_read_sectors(unsigned char* buf, int wordcount)
212 *wbuf = SWAB16(ATA_DATA); 219 *wbuf = SWAB16(ATA_DATA);
213 } while (++wbuf < wbufend); /* tail loop is faster */ 220 } while (++wbuf < wbufend); /* tail loop is faster */
214#else 221#else
215 asm ( /* I can bring it down to 9 instructions for 2 loops */ 222 /* I can bring it down to 9 instructions for 2 loops, and pipeline */
216 "mov #2, r0 \n" 223 asm (
224 "mov #2, r0 \n" /* r0 = 2 */
225 /* correct for the "early increment" below */
226 "add #-4,%2 \n" /* wbuf -= 4; */
227 "bra enter_loop \n" /* goto enter_loop, after next instr. */
228 "add #-4,%3 \n" /* wbufend -= 4; */
217 "loop_w: \n" 229 "loop_w: \n"
218 "mov.w @%1,%0 \n" 230 /* use read result and store, from last round */
219 "swap.b %0,%0 \n" 231 "swap.b %0,%0 \n" /* endian_swap(tmp); */
220 "mov.w %0,@%2 \n" 232 "mov.w %0,@(r0,%2) \n" /* wbuf[r0] = tmp; */
221 "mov.w @%1,%0 \n" /* unrolled, do one more */ 233 "enter_loop: \n"
222 "swap.b %0,%0 \n" 234 "mov.w @%1,%0 \n" /* tmp = ATA_DATA; */
223 "mov.w %0,@(r0,%2) \n" 235 /* keep the pipeline busy with 2 independent instructions */
224 "add #0x04,%2 \n" 236 "add #0x04,%2 \n" /* wbuf += 4; */
225 "cmp/hs %3,%2 \n" 237 "cmp/hs %3,%2 \n" /* if (wbuf < wbufend) */
226 "bf loop_w \n" 238 "swap.b %0,%0 \n" /* endian_swap(tmp); */
239 "mov.w %0,@%2 \n" /* wbuf[0] = tmp; */
240 /* unrolled, do one more */
241 "mov.w @%1,%0 \n" /* tmp = ATA_DATA; */
242 /* use and store later, to keep pipeline busy */
243 "bf loop_w \n" /* goto loop_w; */
244 "swap.b %0,%0 \n" /* endian_swap(tmp); */
245 "mov.w %0,@(r0,%2) \n" /* wbuf[r0] = tmp; */
227 : /* outputs */ 246 : /* outputs */
228 : /* inputs */ 247 : /* inputs */
229 /* %0 */ "r"(tmp), 248 /* %0 */ "r"(tmp),