diff options
author | Jörg Hohensohn <hohensoh@rockbox.org> | 2004-02-09 08:24:25 +0000 |
---|---|---|
committer | Jörg Hohensohn <hohensoh@rockbox.org> | 2004-02-09 08:24:25 +0000 |
commit | b646d4f278017b4b3797ab0976239bd64df8b43e (patch) | |
tree | 40458f5703e6a3dada9e7ac3ced4ba827fab94ab /firmware/drivers/ata.c | |
parent | 06500c84ce41725586fce25316bb7e633e654c59 (diff) | |
download | rockbox-b646d4f278017b4b3797ab0976239bd64df8b43e.tar.gz rockbox-b646d4f278017b4b3797ab0976239bd64df8b43e.zip |
Assembly code for copy_read_sectors() reworked: I've spread the ATA reads apart by filling independent instructions inbetween, this is also slightly faster because of no pipeline stall. Hopefully this fixes the problem Kargatron had with it.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@4302 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware/drivers/ata.c')
-rw-r--r-- | firmware/drivers/ata.c | 59 |
1 files changed, 39 insertions, 20 deletions
diff --git a/firmware/drivers/ata.c b/firmware/drivers/ata.c index e114718bb4..284c599ef8 100644 --- a/firmware/drivers/ata.c +++ b/firmware/drivers/ata.c | |||
@@ -181,16 +181,23 @@ static void copy_read_sectors(unsigned char* buf, int wordcount) | |||
181 | *buf++ = tmp >> 8; /* and don't use the SWAB16 macro */ | 181 | *buf++ = tmp >> 8; /* and don't use the SWAB16 macro */ |
182 | } while (buf < bufend); /* tail loop is faster */ | 182 | } while (buf < bufend); /* tail loop is faster */ |
183 | #else | 183 | #else |
184 | asm ( /* I can bring it down to 7 instructions/loop */ | 184 | /* I can bring it down to 7 instructions/loop, and exploit pipeline */ |
185 | "mov #1, r0 \n" | 185 | asm ( |
186 | "mov #1, r0 \n" /* r0 = 1; */ | ||
187 | /* correct for the "early increment" below */ | ||
188 | "add #-2,%2 \n" /* buf -= 2; */ | ||
189 | "add #-2,%3 \n" /* bufend -= 2; */ | ||
186 | "loop_b: \n" | 190 | "loop_b: \n" |
187 | "mov.w @%1,%0 \n" | 191 | "mov.w @%1,%0 \n" /* tmp = ATA_DATA; */ |
188 | "mov.b %0,@%2 \n" | 192 | /* Now we're reading from the bus, I do something independent we |
189 | "shlr8 %0 \n" | 193 | need later, to avoid pipeline stall */ |
190 | "mov.b %0,@(r0,%2) \n" | 194 | "add #0x02,%2 \n" /* buf += 2; */ |
191 | "add #0x02,%2 \n" | 195 | "cmp/hs %3,%2 \n" /* if (buf < bufend) */ |
192 | "cmp/hs %3,%2 \n" | 196 | /* now use the read result */ |
193 | "bf loop_b \n" | 197 | "mov.b %0,@%2 \n" /* buf[0] = lowbyte(tmp); */ |
198 | "shlr8 %0 \n" /* tmp >>= 8; */ | ||
199 | "mov.b %0,@(r0,%2) \n" /* buf[r0] = lowbyte(tmp); */ | ||
200 | "bf loop_b \n" /* goto loop_b; */ | ||
194 | : /* outputs */ | 201 | : /* outputs */ |
195 | : /* inputs */ | 202 | : /* inputs */ |
196 | /* %0 */ "r"(tmp), | 203 | /* %0 */ "r"(tmp), |
@@ -212,18 +219,30 @@ static void copy_read_sectors(unsigned char* buf, int wordcount) | |||
212 | *wbuf = SWAB16(ATA_DATA); | 219 | *wbuf = SWAB16(ATA_DATA); |
213 | } while (++wbuf < wbufend); /* tail loop is faster */ | 220 | } while (++wbuf < wbufend); /* tail loop is faster */ |
214 | #else | 221 | #else |
215 | asm ( /* I can bring it down to 9 instructions for 2 loops */ | 222 | /* I can bring it down to 9 instructions for 2 loops, and pipeline */ |
216 | "mov #2, r0 \n" | 223 | asm ( |
224 | "mov #2, r0 \n" /* r0 = 2 */ | ||
225 | /* correct for the "early increment" below */ | ||
226 | "add #-4,%2 \n" /* wbuf -= 4; */ | ||
227 | "bra enter_loop \n" /* goto enter_loop, after next instr. */ | ||
228 | "add #-4,%3 \n" /* wbufend -= 4; */ | ||
217 | "loop_w: \n" | 229 | "loop_w: \n" |
218 | "mov.w @%1,%0 \n" | 230 | /* use read result and store, from last round */ |
219 | "swap.b %0,%0 \n" | 231 | "swap.b %0,%0 \n" /* endian_swap(tmp); */ |
220 | "mov.w %0,@%2 \n" | 232 | "mov.w %0,@(r0,%2) \n" /* wbuf[r0] = tmp; */ |
221 | "mov.w @%1,%0 \n" /* unrolled, do one more */ | 233 | "enter_loop: \n" |
222 | "swap.b %0,%0 \n" | 234 | "mov.w @%1,%0 \n" /* tmp = ATA_DATA; */ |
223 | "mov.w %0,@(r0,%2) \n" | 235 | /* keep the pipeline busy with 2 independent instructions */ |
224 | "add #0x04,%2 \n" | 236 | "add #0x04,%2 \n" /* wbuf += 4; */ |
225 | "cmp/hs %3,%2 \n" | 237 | "cmp/hs %3,%2 \n" /* if (wbuf < wbufend) */ |
226 | "bf loop_w \n" | 238 | "swap.b %0,%0 \n" /* endian_swap(tmp); */ |
239 | "mov.w %0,@%2 \n" /* wbuf[0] = tmp; */ | ||
240 | /* unrolled, do one more */ | ||
241 | "mov.w @%1,%0 \n" /* tmp = ATA_DATA; */ | ||
242 | /* use and store later, to keep pipeline busy */ | ||
243 | "bf loop_w \n" /* goto loop_w; */ | ||
244 | "swap.b %0,%0 \n" /* endian_swap(tmp); */ | ||
245 | "mov.w %0,@(r0,%2) \n" /* wbuf[r0] = tmp; */ | ||
227 | : /* outputs */ | 246 | : /* outputs */ |
228 | : /* inputs */ | 247 | : /* inputs */ |
229 | /* %0 */ "r"(tmp), | 248 | /* %0 */ "r"(tmp), |