diff options
author | Jens Arnold <amiconn@rockbox.org> | 2006-03-18 10:12:50 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2006-03-18 10:12:50 +0000 |
commit | 5a121270aac16faf6638fecafd2229200092dd44 (patch) | |
tree | 140c92718b9c2b99f80dfb81317dd0bfa5cb6e3e /firmware/drivers/ata.c | |
parent | b7c7df788e6fe35c82c7bf8f296c9c86e40ff0a4 (diff) | |
download | rockbox-5a121270aac16faf6638fecafd2229200092dd44.tar.gz rockbox-5a121270aac16faf6638fecafd2229200092dd44.zip |
Coldfire: Assembler optimised disk reading, speed increase ~40% on average.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@9092 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware/drivers/ata.c')
-rw-r--r-- | firmware/drivers/ata.c | 194 |
1 files changed, 192 insertions, 2 deletions
diff --git a/firmware/drivers/ata.c b/firmware/drivers/ata.c index a603c51648..72ebee2bd3 100644 --- a/firmware/drivers/ata.c +++ b/firmware/drivers/ata.c | |||
@@ -38,8 +38,7 @@ | |||
38 | 38 | ||
39 | #if (CONFIG_CPU == MCF5249) || (CONFIG_CPU == MCF5250) | 39 | #if (CONFIG_CPU == MCF5249) || (CONFIG_CPU == MCF5250) |
40 | 40 | ||
41 | /* don't use sh7034 assembler routines */ | 41 | /* asm reading, C writing */ |
42 | #define PREFER_C_READING | ||
43 | #define PREFER_C_WRITING | 42 | #define PREFER_C_WRITING |
44 | 43 | ||
45 | #define ATA_IOBASE 0x20000000 | 44 | #define ATA_IOBASE 0x20000000 |
@@ -484,6 +483,197 @@ static void copy_read_sectors(unsigned char* buf, int wordcount) | |||
484 | buf += SECTOR_SIZE; | 483 | buf += SECTOR_SIZE; |
485 | sectorcount--; | 484 | sectorcount--; |
486 | } while (sectorcount > 0); | 485 | } while (sectorcount > 0); |
486 | #elif defined(CPU_COLDFIRE) | ||
487 | /* coldfire asm reading, utilising line bursts */ | ||
488 | /* this assumes there is at least one full line to copy */ | ||
489 | asm ( | ||
490 | "add.l %[wcnt],%[wcnt] \n" /* wordcount -> bytecount */ | ||
491 | "add.l %[buf],%[wcnt] \n" /* bytecount -> bufend */ | ||
492 | "move.l %[buf],%%d0 \n" | ||
493 | "btst.l #0,%%d0 \n" /* 16-bit aligned? */ | ||
494 | "jeq .aligned \n" /* yes, do word copy */ | ||
495 | |||
496 | /* not 16-bit aligned */ | ||
497 | "subq.l #1,%[wcnt] \n" /* last byte is done unconditionally */ | ||
498 | "moveq.l #24,%%d1 \n" /* preload shift count */ | ||
499 | |||
500 | "move.w (%[ata]),%%d2 \n" /* load initial word */ | ||
501 | "move.l %%d2,%%d3 \n" | ||
502 | "lsr.l #8,%%d3 \n" | ||
503 | "move.b %%d3,(%[buf])+ \n" /* write high byte of it, aligns dest addr */ | ||
504 | |||
505 | "btst.l #1,%%d0 \n" /* longword aligned? */ | ||
506 | "beq.b .end_u_w1 \n" /* yes, skip leading word handling */ | ||
507 | |||
508 | "swap %%d2 \n" /* move initila word up */ | ||
509 | "move.w (%[ata]),%%d2 \n" /* combine with second word */ | ||
510 | "move.l %%d2,%%d3 \n" | ||
511 | "lsr.l #8,%%d3 \n" | ||
512 | "move.w %%d3,(%[buf])+ \n" /* write bytes 2 and 3 as word */ | ||
513 | |||
514 | ".end_u_w1: \n" | ||
515 | "moveq.l #12,%%d0 \n" | ||
516 | "add.l %[buf],%%d0 \n" | ||
517 | "and.l #0xFFFFFFF0,%%d0\n" /* d0 == first line bound */ | ||
518 | "cmp.l %[buf],%%d0 \n" /* any leading longwords? */ | ||
519 | "bls.b .end_u_l1 \n" /* no: skip loop */ | ||
520 | |||
521 | ".loop_u_l1: \n" | ||
522 | "move.w (%[ata]),%%d3 \n" /* load first word */ | ||
523 | "swap %%d3 \n" /* move to upper 16 bit */ | ||
524 | "move.w (%[ata]),%%d3 \n" /* load second word */ | ||
525 | "move.l %%d3,%%d4 \n" | ||
526 | "lsl.l %%d1,%%d2 \n" | ||
527 | "lsr.l #8,%%d3 \n" | ||
528 | "or.l %%d3,%%d2 \n" /* combine old low byte with new top 3 bytes */ | ||
529 | "move.l %%d2,(%[buf])+ \n" /* store as long */ | ||
530 | "move.l %%d4,%%d2 \n" | ||
531 | "cmp.l %[buf],%%d0 \n" /* run up to first line bound */ | ||
532 | "bhi.b .loop_u_l1 \n" | ||
533 | |||
534 | ".end_u_l1: \n" | ||
535 | "lea.l (-14,%[wcnt]),%[wcnt] \n" /* adjust end addr. to 16 bytes/pass */ | ||
536 | |||
537 | ".loop_u_line: \n" | ||
538 | "move.w (%[ata]),%%d3 \n" /* load 1st word */ | ||
539 | "swap %%d3 \n" /* move to upper 16 bit */ | ||
540 | "move.w (%[ata]),%%d3 \n" /* load 2nd word */ | ||
541 | "move.l %%d3,%%d0 \n" | ||
542 | "lsl.l %%d1,%%d2 \n" | ||
543 | "lsr.l #8,%%d0 \n" | ||
544 | "or.l %%d0,%%d2 \n" /* combine old low byte with new top 3 bytes */ | ||
545 | "move.w (%[ata]),%%d4 \n" /* load 3rd word */ | ||
546 | "swap %%d4 \n" /* move to upper 16 bit */ | ||
547 | "move.w (%[ata]),%%d4 \n" /* load 4th word */ | ||
548 | "move.l %%d4,%%d0 \n" | ||
549 | "lsl.l %%d1,%%d3 \n" | ||
550 | "lsr.l #8,%%d0 \n" | ||
551 | "or.l %%d0,%%d3 \n" /* combine old low byte with new top 3 bytes */ | ||
552 | "move.w (%[ata]),%%d5 \n" /* load 5th word */ | ||
553 | "swap %%d5 \n" /* move to upper 16 bit */ | ||
554 | "move.w (%[ata]),%%d5 \n" /* load 6th word */ | ||
555 | "move.l %%d5,%%d0 \n" | ||
556 | "lsl.l %%d1,%%d4 \n" | ||
557 | "lsr.l #8,%%d0 \n" | ||
558 | "or.l %%d0,%%d4 \n" /* combine old low byte with new top 3 bytes */ | ||
559 | "move.w (%[ata]),%%d6 \n" /* load 7th word */ | ||
560 | "swap %%d6 \n" /* move to upper 16 bit */ | ||
561 | "move.w (%[ata]),%%d6 \n" /* load 8th word */ | ||
562 | "move.l %%d6,%%d0 \n" | ||
563 | "lsl.l %%d1,%%d5 \n" | ||
564 | "lsr.l #8,%%d0 \n" | ||
565 | "or.l %%d0,%%d5 \n" /* combine old low byte with new top 3 bytes */ | ||
566 | "movem.l %%d2-%%d5,(%[buf]) \n" /* store line */ | ||
567 | "lea.l (16,%[buf]),%[buf] \n" | ||
568 | "move.l %%d6,%%d2 \n" | ||
569 | "cmp.l %[buf],%[wcnt] \n" /* run up to last line bound */ | ||
570 | "bhi.b .loop_a_line \n" | ||
571 | |||
572 | "lea.l (12,%[wcnt]),%[wcnt]\n" /* readjust for longword loop */ | ||
573 | "cmp.l %[buf],%[wcnt] \n" /* any trailing longwords? */ | ||
574 | "bls.b .end_u_l2 \n" /* no: skip loop */ | ||
575 | |||
576 | ".loop_u_l2: \n" | ||
577 | "move.w (%[ata]),%%d3 \n" /* load first word */ | ||
578 | "swap %%d3 \n" /* move to upper 16 bit */ | ||
579 | "move.w (%[ata]),%%d3 \n" /* load second word */ | ||
580 | "move.l %%d3,%%d4 \n" | ||
581 | "lsl.l %%d1,%%d2 \n" | ||
582 | "lsr.l #8,%%d3 \n" | ||
583 | "or.l %%d3,%%d2 \n" /* combine old low byte with new top 3 bytes */ | ||
584 | "move.l %%d2,(%[buf])+ \n" /* store as long */ | ||
585 | "move.l %%d4,%%d2 \n" | ||
586 | "cmp.l %[buf],%[wcnt] \n" /* run up to last long bound */ | ||
587 | "bhi.b .loop_u_l2 \n" | ||
588 | |||
589 | ".end_u_l2: \n" | ||
590 | "addq.l #2,%[wcnt] \n" /* back to final end address */ | ||
591 | "cmp.l %[buf],%[wcnt] \n" /* one word left? */ | ||
592 | "bls.b .end_u_w2 \n" | ||
593 | |||
594 | "swap %%d2 \n" /* move old word to upper 16 bits */ | ||
595 | "move.w (%[ata]),%%d2 \n" /* load final word */ | ||
596 | "move.l %%d2,%%d3 \n" | ||
597 | "lsr.l #8,%%d3 \n" | ||
598 | "move.w %%d3,(%[buf])+ \n" /* write bytes 2 and 3 as word */ | ||
599 | |||
600 | ".end_u_w2: \n" | ||
601 | "move.b %%d2,(%[buf])+ \n" /* store final byte */ | ||
602 | "bra.b .exit \n" | ||
603 | |||
604 | /* 16-bit aligned */ | ||
605 | ".aligned: \n" | ||
606 | "btst.l #1,%%d0 \n" /* longword aligned? */ | ||
607 | "beq.b .end_a_w1 \n" /* yes, skip leading word handling */ | ||
608 | |||
609 | "move.w (%[ata]),(%[buf])+ \n" /* copy initial word */ | ||
610 | |||
611 | ".end_a_w1: \n" | ||
612 | "moveq.l #12,%%d0 \n" | ||
613 | "add.l %[buf],%%d0 \n" | ||
614 | "and.l #0xFFFFFFF0,%%d0\n" /* d0 == first line bound */ | ||
615 | "cmp.l %[buf],%%d0 \n" /* any leading longwords? */ | ||
616 | "bls.b .end_a_l1 \n" /* no: skip loop */ | ||
617 | |||
618 | ".loop_a_l1: \n" | ||
619 | "move.w (%[ata]),%%d1 \n" /* load first word */ | ||
620 | "swap %%d1 \n" /* move it to upper 16 bits */ | ||
621 | "move.w (%[ata]),%%d1 \n" /* load second word */ | ||
622 | "move.l %%d1,(%[buf])+ \n" /* store as long */ | ||
623 | "cmp.l %[buf],%%d0 \n" /* run up to first line bound */ | ||
624 | "bhi.b .loop_a_l1 \n" | ||
625 | |||
626 | ".end_a_l1: \n" | ||
627 | "lea.l (-14,%[wcnt]),%[wcnt] \n" /* adjust end addr. to 16 bytes/pass */ | ||
628 | |||
629 | ".loop_a_line: \n" | ||
630 | "move.w (%[ata]),%%d0 \n" /* load 1st word */ | ||
631 | "swap %%d0 \n" /* move it to upper 16 bits */ | ||
632 | "move.w (%[ata]),%%d0 \n" /* load 2nd word */ | ||
633 | "move.w (%[ata]),%%d1 \n" /* load 3rd word */ | ||
634 | "swap %%d1 \n" /* move it to upper 16 bits */ | ||
635 | "move.w (%[ata]),%%d1 \n" /* load 4th word */ | ||
636 | "move.w (%[ata]),%%d2 \n" /* load 5th word */ | ||
637 | "swap %%d2 \n" /* move it to upper 16 bits */ | ||
638 | "move.w (%[ata]),%%d2 \n" /* load 6th word */ | ||
639 | "move.w (%[ata]),%%d3 \n" /* load 7th word */ | ||
640 | "swap %%d3 \n" /* move it to upper 16 bits */ | ||
641 | "move.w (%[ata]),%%d3 \n" /* load 8th word */ | ||
642 | "movem.l %%d0-%%d3,(%[buf]) \n" /* store line */ | ||
643 | "lea.l (16,%[buf]),%[buf] \n" | ||
644 | "cmp.l %[buf],%[wcnt] \n" /* run up to last line bound */ | ||
645 | "bhi.b .loop_a_line \n" | ||
646 | |||
647 | "lea.l (12,%[wcnt]),%[wcnt]\n" /* readjust for longword loop */ | ||
648 | "cmp.l %[buf],%[wcnt] \n" /* any trailing longwords? */ | ||
649 | "bls.b .end_a_l2 \n" /* no: skip loop */ | ||
650 | |||
651 | ".loop_a_l2: \n" | ||
652 | "move.w (%[ata]),%%d1 \n" /* read first word */ | ||
653 | "swap %%d1 \n" /* move it to upper 16 bits */ | ||
654 | "move.w (%[ata]),%%d1 \n" /* read second word */ | ||
655 | "move.l %%d1,(%[buf])+ \n" /* store as long */ | ||
656 | "cmp.l %[buf],%[wcnt] \n" /* run up to last long bound */ | ||
657 | "bhi.b .loop_a_l2 \n" | ||
658 | |||
659 | ".end_a_l2: \n" | ||
660 | "addq.l #2,%[wcnt] \n" /* back to final end address */ | ||
661 | "cmp.l %[buf],%[wcnt] \n" /* one word left? */ | ||
662 | "bls.b .end_a_w2 \n" | ||
663 | |||
664 | "move.w (%[ata]),(%[buf])+ \n" /* copy final word */ | ||
665 | |||
666 | ".end_a_w2: \n" | ||
667 | |||
668 | ".exit: \n" | ||
669 | : /* outputs */ | ||
670 | : /* inputs */ | ||
671 | [buf] "a"(buf), | ||
672 | [wcnt]"a"(wordcount), | ||
673 | [ata] "a"(&ATA_DATA) | ||
674 | : /*trashed */ | ||
675 | "d0", "d1", "d2", "d3", "d4", "d5", "d6" | ||
676 | ); | ||
487 | #else | 677 | #else |
488 | /* turbo-charged assembler version */ | 678 | /* turbo-charged assembler version */ |
489 | /* this assumes wordcount to be a multiple of 4 */ | 679 | /* this assumes wordcount to be a multiple of 4 */ |