diff options
Diffstat (limited to 'firmware/drivers/ata.c')
-rw-r--r-- | firmware/drivers/ata.c | 32 |
1 files changed, 16 insertions, 16 deletions
diff --git a/firmware/drivers/ata.c b/firmware/drivers/ata.c index b991387074..ffc004572d 100644 --- a/firmware/drivers/ata.c +++ b/firmware/drivers/ata.c | |||
@@ -31,9 +31,9 @@ | |||
31 | #include "hwcompat.h" | 31 | #include "hwcompat.h" |
32 | 32 | ||
33 | /* use plain C code in copy_read_sectors(), instead of tweaked assembler */ | 33 | /* use plain C code in copy_read_sectors(), instead of tweaked assembler */ |
34 | #define PREFER_C /* mystery: assembler caused problems with some disks */ | 34 | #define PREFER_C |
35 | /* use plain C code in copy_write_sectors(), instead of tweaked assembler */ | 35 | /* use plain C code in copy_write_sectors(), instead of tweaked assembler */ |
36 | #define PREFER_C_WRITING /* we don't know yet about this one */ | 36 | #define PREFER_C_WRITING |
37 | 37 | ||
38 | #define SECTOR_SIZE 512 | 38 | #define SECTOR_SIZE 512 |
39 | #define ATA_DATA (*((volatile unsigned short*)0x06104100)) | 39 | #define ATA_DATA (*((volatile unsigned short*)0x06104100)) |
@@ -184,7 +184,7 @@ static void copy_read_sectors(unsigned char* buf, int wordcount) | |||
184 | unsigned char* bufend = buf + wordcount*2; | 184 | unsigned char* bufend = buf + wordcount*2; |
185 | do | 185 | do |
186 | { /* loop compiles to 9 assembler instructions */ | 186 | { /* loop compiles to 9 assembler instructions */ |
187 | /* takes 13 clock cycles because of 2 pipeline stalls */ | 187 | /* takes 14 clock cycles (2 pipeline stalls, 1 wait) */ |
188 | tmp = ATA_DATA; | 188 | tmp = ATA_DATA; |
189 | *buf++ = tmp & 0xff; /* I assume big endian */ | 189 | *buf++ = tmp & 0xff; /* I assume big endian */ |
190 | *buf++ = tmp >> 8; /* and don't use the SWAB16 macro */ | 190 | *buf++ = tmp >> 8; /* and don't use the SWAB16 macro */ |
@@ -196,7 +196,7 @@ static void copy_read_sectors(unsigned char* buf, int wordcount) | |||
196 | unsigned short* wbufend = wbuf + wordcount; | 196 | unsigned short* wbufend = wbuf + wordcount; |
197 | do | 197 | do |
198 | { /* loop compiles to 7 assembler instructions */ | 198 | { /* loop compiles to 7 assembler instructions */ |
199 | /* takes 11 clock cycles because of 2 pipeline stalls */ | 199 | /* takes 12 clock cycles (2 pipeline stalls, 1 wait) */ |
200 | *wbuf = SWAB16(ATA_DATA); | 200 | *wbuf = SWAB16(ATA_DATA); |
201 | } while (++wbuf < wbufend); /* tail loop is faster */ | 201 | } while (++wbuf < wbufend); /* tail loop is faster */ |
202 | } | 202 | } |
@@ -247,8 +247,8 @@ static void copy_read_sectors(unsigned char* buf, int wordcount) | |||
247 | "or r2,r0 \n" /* combine with high byte of third word */ | 247 | "or r2,r0 \n" /* combine with high byte of third word */ |
248 | "mov.w r0,@(2,%0) \n" /* store at buf[2] */ | 248 | "mov.w r0,@(2,%0) \n" /* store at buf[2] */ |
249 | "bt .loop4_b \n" | 249 | "bt .loop4_b \n" |
250 | /* 24 instructions for 4 copies, takes 26 clock cycles */ | 250 | /* 24 instructions for 4 copies, takes 30 clock cycles (4 wait) */ |
251 | /* avg. 6.5 cycles per word - 100% faster */ | 251 | /* avg. 7.5 cycles per word - 86% faster */ |
252 | 252 | ||
253 | "swap.b r1,r0 \n" /* get high byte of last word */ | 253 | "swap.b r1,r0 \n" /* get high byte of last word */ |
254 | "bra .exit \n" | 254 | "bra .exit \n" |
@@ -280,8 +280,8 @@ static void copy_read_sectors(unsigned char* buf, int wordcount) | |||
280 | "swap.b r2,r0 \n" /* swap third word */ | 280 | "swap.b r2,r0 \n" /* swap third word */ |
281 | "mov.w r0,@(2,%0) \n" /* store third word */ | 281 | "mov.w r0,@(2,%0) \n" /* store third word */ |
282 | "bt .loop4_w \n" | 282 | "bt .loop4_w \n" |
283 | /* 16 instructions for 4 copies, takes 18 clock cycles */ | 283 | /* 16 instructions for 4 copies, takes 22 clock cycles (4 wait) */ |
284 | /* avg. 4.5 cycles per word - 144% faster */ | 284 | /* avg. 5.5 cycles per word - 118% faster */ |
285 | 285 | ||
286 | "swap.b r1,r0 \n" /* swap fourth word (last round) */ | 286 | "swap.b r1,r0 \n" /* swap fourth word (last round) */ |
287 | "mov.w r0,@(4,%0) \n" /* and store it */ | 287 | "mov.w r0,@(4,%0) \n" /* and store it */ |
@@ -460,9 +460,9 @@ static void copy_write_sectors(unsigned char* buf, int wordcount) | |||
460 | unsigned short tmp = 0; | 460 | unsigned short tmp = 0; |
461 | unsigned char* bufend = buf + wordcount*2; | 461 | unsigned char* bufend = buf + wordcount*2; |
462 | do | 462 | do |
463 | { /* loop compiles to 8 assembler instructions */ | 463 | { /* loop compiles to 9 assembler instructions */ |
464 | /* takes 12 clock cycles because of 2 pipeline stalls */ | 464 | /* takes 13 clock cycles (2 pipeline stalls) */ |
465 | tmp = (unsigned short) *buf++; | 465 | tmp = (unsigned short) *buf++; |
466 | tmp |= (unsigned short) *buf++ << 8; /* I assume big endian */ | 466 | tmp |= (unsigned short) *buf++ << 8; /* I assume big endian */ |
467 | ATA_DATA = tmp; /* and don't use the SWAB16 macro */ | 467 | ATA_DATA = tmp; /* and don't use the SWAB16 macro */ |
468 | } while (buf < bufend); /* tail loop is faster */ | 468 | } while (buf < bufend); /* tail loop is faster */ |
@@ -472,8 +472,8 @@ static void copy_write_sectors(unsigned char* buf, int wordcount) | |||
472 | unsigned short* wbuf = (unsigned short*)buf; | 472 | unsigned short* wbuf = (unsigned short*)buf; |
473 | unsigned short* wbufend = wbuf + wordcount; | 473 | unsigned short* wbufend = wbuf + wordcount; |
474 | do | 474 | do |
475 | { /* loop compiles to 5 assembler instructions */ | 475 | { /* loop compiles to 6 assembler instructions */ |
476 | /* takes 9 clock cycles because of 2 pipeline stalls */ | 476 | /* takes 10 clock cycles (2 pipeline stalls) */ |
477 | ATA_DATA = SWAB16(*wbuf); | 477 | ATA_DATA = SWAB16(*wbuf); |
478 | } while (++wbuf < wbufend); /* tail loop is faster */ | 478 | } while (++wbuf < wbufend); /* tail loop is faster */ |
479 | } | 479 | } |
@@ -520,7 +520,7 @@ static void copy_write_sectors(unsigned char* buf, int wordcount) | |||
520 | "mov.w r3,@%2 \n" /* write that */ | 520 | "mov.w r3,@%2 \n" /* write that */ |
521 | "bt .w_loop2_b \n" | 521 | "bt .w_loop2_b \n" |
522 | /* 12 instructions for 2 copies, takes 14 clock cycles */ | 522 | /* 12 instructions for 2 copies, takes 14 clock cycles */ |
523 | /* avg. 7 cycles per word - 71% faster */ | 523 | /* avg. 7 cycles per word - 85% faster */ |
524 | 524 | ||
525 | /* the loop "overreads" 1 byte past the buffer end, however, the last */ | 525 | /* the loop "overreads" 1 byte past the buffer end, however, the last */ |
526 | /* byte is not written to disk */ | 526 | /* byte is not written to disk */ |
@@ -547,7 +547,7 @@ static void copy_write_sectors(unsigned char* buf, int wordcount) | |||
547 | "mov.w r0,@%2 \n" /* write first word */ | 547 | "mov.w r0,@%2 \n" /* write first word */ |
548 | "bt .w_loop2_w \n" | 548 | "bt .w_loop2_w \n" |
549 | /* 8 instructions for 2 copies, takes 10 clock cycles */ | 549 | /* 8 instructions for 2 copies, takes 10 clock cycles */ |
550 | /* avg. 5 cycles per word - 80% faster */ | 550 | /* avg. 5 cycles per word - 100% faster */ |
551 | 551 | ||
552 | "swap.b r1,r0 \n" /* swap second word (last round) */ | 552 | "swap.b r1,r0 \n" /* swap second word (last round) */ |
553 | "mov.w r0,@%2 \n" /* and write it */ | 553 | "mov.w r0,@%2 \n" /* and write it */ |
@@ -1040,7 +1040,7 @@ static int set_features(void) | |||
1040 | unsigned char subcommand; | 1040 | unsigned char subcommand; |
1041 | unsigned char parameter; | 1041 | unsigned char parameter; |
1042 | } features[] = { | 1042 | } features[] = { |
1043 | { 83, 3, 0x05, 1 }, /* power management: lowest power */ | 1043 | { 83, 3, 0x05, 0x80 }, /* power management: lowest power without standby */ |
1044 | { 83, 9, 0x42, 0x80 }, /* acoustic management: lowest noise */ | 1044 | { 83, 9, 0x42, 0x80 }, /* acoustic management: lowest noise */ |
1045 | { 82, 6, 0xaa, 0 }, /* enable read look-ahead */ | 1045 | { 82, 6, 0xaa, 0 }, /* enable read look-ahead */ |
1046 | { 83, 14, 0x03, 0 }, /* force PIO mode */ | 1046 | { 83, 14, 0x03, 0 }, /* force PIO mode */ |