diff options
author | Jörg Hohensohn <hohensoh@rockbox.org> | 2004-01-16 09:02:21 +0000 |
---|---|---|
committer | Jörg Hohensohn <hohensoh@rockbox.org> | 2004-01-16 09:02:21 +0000 |
commit | 88faf38ef7bdca708e3134f127d5c3e2a1b44d1e (patch) | |
tree | 601a830398720e0d43ed1fbe1852c3af0051212e /firmware | |
parent | 89d2039367279aece429b1e86be263578a763a3d (diff) | |
download | rockbox-88faf38ef7bdca708e3134f127d5c3e2a1b44d1e.tar.gz rockbox-88faf38ef7bdca708e3134f127d5c3e2a1b44d1e.zip |
Optimized the sector read loop as much as C allows. I measured an overall speed improvement for file reading of 12.5% for 16-bit aligned and 35% for misaligned. I took the rest of ata_read_sectors() out of IRAM, it's sufficient if only the copy loop stays there.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@4247 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware')
-rw-r--r-- | firmware/drivers/ata.c | 49 |
1 files changed, 34 insertions, 15 deletions
diff --git a/firmware/drivers/ata.c b/firmware/drivers/ata.c index 725d33e4a0..45eb1a5963 100644 --- a/firmware/drivers/ata.c +++ b/firmware/drivers/ata.c | |||
@@ -161,9 +161,39 @@ static int wait_for_end_of_transfer(void) | |||
161 | return (ATA_ALT_STATUS & (STATUS_RDY|STATUS_DRQ)) == STATUS_RDY; | 161 | return (ATA_ALT_STATUS & (STATUS_RDY|STATUS_DRQ)) == STATUS_RDY; |
162 | } | 162 | } |
163 | 163 | ||
164 | int ata_read_sectors(unsigned long start, | 164 | |
165 | int count, | 165 | /* the tight loop of ata_read_sectors(), to avoid the whole in IRAM */ |
166 | void* buf) __attribute__ ((section (".icode"))); | 166 | static void copy_read_sectors(unsigned char* buf, |
167 | int wordcount) | ||
168 | __attribute__ ((section (".icode"))); | ||
169 | static void copy_read_sectors(unsigned char* buf, int wordcount) | ||
170 | { | ||
171 | int j; | ||
172 | |||
173 | if (wordcount <= 0) | ||
174 | return; /* should never happen, but to protect my tail loop */ | ||
175 | |||
176 | if ( (unsigned int)buf & 1 ) | ||
177 | { | ||
178 | unsigned char* bufend = buf + wordcount*2; | ||
179 | do | ||
180 | { /* loop compiles to 8 assembler instructions */ | ||
181 | unsigned short tmp = ATA_DATA; | ||
182 | *buf++ = tmp & 0xff; /* I assume big endian */ | ||
183 | *buf++ = tmp >> 8; /* and don't use the SWAB16 macro */ | ||
184 | } while (buf < bufend); /* tail loop is faster */ | ||
185 | } | ||
186 | else | ||
187 | { | ||
188 | unsigned short* wbuf = (unsigned short*)buf; | ||
189 | unsigned short* wbufend = wbuf + wordcount; | ||
190 | do | ||
191 | { /* loop compiles to 7 assembler instructions */ | ||
192 | *wbuf = SWAB16(ATA_DATA); | ||
193 | } while (++wbuf < wbufend); /* tail loop is faster */ | ||
194 | } | ||
195 | } | ||
196 | |||
167 | int ata_read_sectors(unsigned long start, | 197 | int ata_read_sectors(unsigned long start, |
168 | int incount, | 198 | int incount, |
169 | void* inbuf) | 199 | void* inbuf) |
@@ -235,7 +265,6 @@ int ata_read_sectors(unsigned long start, | |||
235 | asm volatile ("nop"); | 265 | asm volatile ("nop"); |
236 | 266 | ||
237 | while (count) { | 267 | while (count) { |
238 | int j; | ||
239 | int sectors; | 268 | int sectors; |
240 | int wordcount; | 269 | int wordcount; |
241 | int status; | 270 | int status; |
@@ -265,17 +294,7 @@ int ata_read_sectors(unsigned long start, | |||
265 | 294 | ||
266 | wordcount = sectors * SECTOR_SIZE / 2; | 295 | wordcount = sectors * SECTOR_SIZE / 2; |
267 | 296 | ||
268 | if ( (unsigned int)buf & 1 ) { | 297 | copy_read_sectors(buf, wordcount); |
269 | for (j=0; j < wordcount; j++) { | ||
270 | unsigned short tmp = SWAB16(ATA_DATA); | ||
271 | ((unsigned char*)buf)[j*2] = tmp >> 8; | ||
272 | ((unsigned char*)buf)[j*2+1] = tmp & 0xff; | ||
273 | } | ||
274 | } | ||
275 | else { | ||
276 | for (j=0; j < wordcount; j++) | ||
277 | ((unsigned short*)buf)[j] = SWAB16(ATA_DATA); | ||
278 | } | ||
279 | 298 | ||
280 | /* | 299 | /* |
281 | "Device errors encountered during READ MULTIPLE commands are | 300 | "Device errors encountered during READ MULTIPLE commands are |