diff options
author | Jens Arnold <amiconn@rockbox.org> | 2006-03-19 12:37:23 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2006-03-19 12:37:23 +0000 |
commit | bf3dba862824d05fdc12ae78c5c5bea61545f6ea (patch) | |
tree | baecbdcf8fab4eaea8a28ed35143ffad95c1609f /firmware | |
parent | 958d6ac278dc589ffc02ac994d0699de05cf20c8 (diff) | |
download | rockbox-bf3dba862824d05fdc12ae78c5c5bea61545f6ea.tar.gz rockbox-bf3dba862824d05fdc12ae78c5c5bea61545f6ea.zip |
Coldfire: Assembler optimised disk writing, speed increase ~36% on average.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@9117 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware')
-rw-r--r-- | firmware/drivers/ata.c | 214 |
1 files changed, 208 insertions, 6 deletions
diff --git a/firmware/drivers/ata.c b/firmware/drivers/ata.c index ce00ba4774..87f41b4ff1 100644 --- a/firmware/drivers/ata.c +++ b/firmware/drivers/ata.c | |||
@@ -38,8 +38,7 @@ | |||
38 | 38 | ||
39 | #if (CONFIG_CPU == MCF5249) || (CONFIG_CPU == MCF5250) | 39 | #if (CONFIG_CPU == MCF5249) || (CONFIG_CPU == MCF5250) |
40 | 40 | ||
41 | /* asm reading, C writing */ | 41 | /* asm reading + writing */ |
42 | #define PREFER_C_WRITING | ||
43 | 42 | ||
44 | #define ATA_IOBASE 0x20000000 | 43 | #define ATA_IOBASE 0x20000000 |
45 | #define ATA_DATA (*((volatile unsigned short*)(ATA_IOBASE + 0x20))) | 44 | #define ATA_DATA (*((volatile unsigned short*)(ATA_IOBASE + 0x20))) |
@@ -505,7 +504,7 @@ static void copy_read_sectors(unsigned char* buf, int wordcount) | |||
505 | "btst.l #1,%%d0 \n" /* longword aligned? */ | 504 | "btst.l #1,%%d0 \n" /* longword aligned? */ |
506 | "beq.b .end_u_w1 \n" /* yes, skip leading word handling */ | 505 | "beq.b .end_u_w1 \n" /* yes, skip leading word handling */ |
507 | 506 | ||
508 | "swap %%d2 \n" /* move initila word up */ | 507 | "swap %%d2 \n" /* move initial word up */ |
509 | "move.w (%[ata]),%%d2 \n" /* combine with second word */ | 508 | "move.w (%[ata]),%%d2 \n" /* combine with second word */ |
510 | "move.l %%d2,%%d3 \n" | 509 | "move.l %%d2,%%d3 \n" |
511 | "lsr.l #8,%%d3 \n" | 510 | "lsr.l #8,%%d3 \n" |
@@ -533,7 +532,7 @@ static void copy_read_sectors(unsigned char* buf, int wordcount) | |||
533 | 532 | ||
534 | ".end_u_l1: \n" | 533 | ".end_u_l1: \n" |
535 | "lea.l (-14,%[wcnt]),%[wcnt] \n" /* adjust end addr. to 16 bytes/pass */ | 534 | "lea.l (-14,%[wcnt]),%[wcnt] \n" /* adjust end addr. to 16 bytes/pass */ |
536 | 535 | ||
537 | ".loop_u_line: \n" | 536 | ".loop_u_line: \n" |
538 | "move.w (%[ata]),%%d3 \n" /* load 1st word */ | 537 | "move.w (%[ata]),%%d3 \n" /* load 1st word */ |
539 | "swap %%d3 \n" /* move to upper 16 bit */ | 538 | "swap %%d3 \n" /* move to upper 16 bit */ |
@@ -675,7 +674,7 @@ static void copy_read_sectors(unsigned char* buf, int wordcount) | |||
675 | "d0", "d1", "d2", "d3", "d4", "d5", "d6" | 674 | "d0", "d1", "d2", "d3", "d4", "d5", "d6" |
676 | ); | 675 | ); |
677 | #else | 676 | #else |
678 | /* turbo-charged assembler version */ | 677 | /* SH1 turbo-charged assembler reading */ |
679 | /* this assumes wordcount to be a multiple of 4 */ | 678 | /* this assumes wordcount to be a multiple of 4 */ |
680 | asm ( | 679 | asm ( |
681 | "add %1,%1 \n" /* wordcount -> bytecount */ | 680 | "add %1,%1 \n" /* wordcount -> bytecount */ |
@@ -981,7 +980,209 @@ static void copy_write_sectors(const unsigned char* buf, int wordcount) | |||
981 | } while (++wbuf < wbufend); /* tail loop is faster */ | 980 | } while (++wbuf < wbufend); /* tail loop is faster */ |
982 | } | 981 | } |
983 | #else | 982 | #else |
984 | /* optimized assembler version */ | 983 | #ifdef CPU_COLDFIRE |
984 | /* coldfire asm writing, utilising line bursts */ | ||
985 | asm ( | ||
986 | "add.l %[wcnt],%[wcnt] \n" /* wordcount -> bytecount */ | ||
987 | "add.l %[buf],%[wcnt] \n" /* bytecount -> bufend */ | ||
988 | "move.l %[buf],%%d0 \n" | ||
989 | "btst.l #0,%%d0 \n" /* 16-bit aligned? */ | ||
990 | "jeq .w_aligned \n" /* yes, do word copy */ | ||
991 | |||
992 | /* not 16-bit aligned */ | ||
993 | "subq.l #1,%[wcnt] \n" /* last byte is done unconditionally */ | ||
994 | "moveq.l #24,%%d1 \n" /* preload shift count */ | ||
995 | |||
996 | "move.b (%[buf])+,%%d2 \n" | ||
997 | |||
998 | "btst.l #1,%%d0 \n" /* longword aligned? */ | ||
999 | "beq.b .w_end_u_w1 \n" /* yes, skip leading word handling */ | ||
1000 | |||
1001 | "swap %%d2 \n" | ||
1002 | "move.w (%[buf])+,%%d2 \n" | ||
1003 | "move.l %%d2,%%d3 \n" | ||
1004 | "lsr.l #8,%%d3 \n" | ||
1005 | "move.w %%d3,(%[ata]) \n" | ||
1006 | |||
1007 | ".w_end_u_w1: \n" | ||
1008 | "moveq.l #12,%%d0 \n" | ||
1009 | "add.l %[buf],%%d0 \n" | ||
1010 | "and.l #0xFFFFFFF0,%%d0\n" /* d0 == first line bound */ | ||
1011 | "cmp.l %[buf],%%d0 \n" /* any leading longwords? */ | ||
1012 | "bls.b .w_end_u_l1 \n" /* no: skip loop */ | ||
1013 | |||
1014 | ".w_loop_u_l1: \n" | ||
1015 | "move.l (%[buf])+,%%d3 \n" | ||
1016 | "move.l %%d3,%%d4 \n" | ||
1017 | "lsl.l %%d1,%%d2 \n" | ||
1018 | "lsr.l #8,%%d3 \n" | ||
1019 | "or.l %%d3,%%d2 \n" | ||
1020 | "swap %%d2 \n" | ||
1021 | "move.w %%d2,(%[ata]) \n" | ||
1022 | "swap %%d2 \n" | ||
1023 | "move.w %%d2,(%[ata]) \n" | ||
1024 | "move.l %%d4,%%d2 \n" | ||
1025 | "cmp.l %[buf],%%d0 \n" /* run up to first line bound */ | ||
1026 | "bhi.b .w_loop_u_l1 \n" | ||
1027 | |||
1028 | ".w_end_u_l1: \n" | ||
1029 | "lea.l (-14,%[wcnt]),%[wcnt] \n" /* adjust end addr. to 16 bytes/pass */ | ||
1030 | |||
1031 | ".w_loop_u_line: \n" | ||
1032 | "movem.l (%[buf]),%%d3-%%d6 \n" | ||
1033 | "lea.l (16,%[buf]),%[buf] \n" | ||
1034 | "move.l %%d3,%%d0 \n" | ||
1035 | "lsl.l %%d1,%%d2 \n" | ||
1036 | "lsr.l #8,%%d0 \n" | ||
1037 | "or.l %%d0,%%d2 \n" | ||
1038 | "swap %%d2 \n" | ||
1039 | "move.w %%d2,(%[ata]) \n" | ||
1040 | "swap %%d2 \n" | ||
1041 | "move.w %%d2,(%[ata]) \n" | ||
1042 | "move.l %%d4,%%d0 \n" | ||
1043 | "lsl.l %%d1,%%d3 \n" | ||
1044 | "lsr.l #8,%%d0 \n" | ||
1045 | "or.l %%d0,%%d3 \n" | ||
1046 | "swap %%d3 \n" | ||
1047 | "move.w %%d3,(%[ata]) \n" | ||
1048 | "swap %%d3 \n" | ||
1049 | "move.w %%d3,(%[ata]) \n" | ||
1050 | "move.l %%d5,%%d0 \n" | ||
1051 | "lsl.l %%d1,%%d4 \n" | ||
1052 | "lsr.l #8,%%d0 \n" | ||
1053 | "or.l %%d0,%%d4 \n" | ||
1054 | "swap %%d4 \n" | ||
1055 | "move.w %%d4,(%[ata]) \n" | ||
1056 | "swap %%d4 \n" | ||
1057 | "move.w %%d4,(%[ata]) \n" | ||
1058 | "move.l %%d6,%%d0 \n" | ||
1059 | "lsl.l %%d1,%%d5 \n" | ||
1060 | "lsr.l #8,%%d0 \n" | ||
1061 | "or.l %%d0,%%d5 \n" | ||
1062 | "swap %%d5 \n" | ||
1063 | "move.w %%d5,(%[ata]) \n" | ||
1064 | "swap %%d5 \n" | ||
1065 | "move.w %%d5,(%[ata]) \n" | ||
1066 | "move.l %%d6,%%d2 \n" | ||
1067 | "cmp.l %[buf],%[wcnt] \n" /* run up to last line bound */ | ||
1068 | "bhi.b .w_loop_u_line \n" | ||
1069 | |||
1070 | "lea.l (12,%[wcnt]),%[wcnt]\n" /* readjust for longword loop */ | ||
1071 | "cmp.l %[buf],%[wcnt] \n" /* any trailing longwords? */ | ||
1072 | "bls.b .w_end_u_l2 \n" /* no: skip loop */ | ||
1073 | |||
1074 | ".w_loop_u_l2: \n" | ||
1075 | "move.l (%[buf])+,%%d3 \n" | ||
1076 | "move.l %%d3,%%d4 \n" | ||
1077 | "lsl.l %%d1,%%d2 \n" | ||
1078 | "lsr.l #8,%%d3 \n" | ||
1079 | "or.l %%d3,%%d2 \n" | ||
1080 | "swap %%d2 \n" | ||
1081 | "move.w %%d2,(%[ata]) \n" | ||
1082 | "swap %%d2 \n" | ||
1083 | "move.w %%d2,(%[ata]) \n" | ||
1084 | "move.l %%d4,%%d2 \n" | ||
1085 | "cmp.l %[buf],%[wcnt] \n" /* run up to first line bound */ | ||
1086 | "bhi.b .w_loop_u_l2 \n" | ||
1087 | |||
1088 | ".w_end_u_l2: \n" | ||
1089 | "addq.l #2,%[wcnt] \n" /* back to final end address */ | ||
1090 | "cmp.l %[buf],%[wcnt] \n" /* one word left? */ | ||
1091 | "bls.b .w_end_u_w2 \n" | ||
1092 | |||
1093 | "swap %%d2 \n" | ||
1094 | "move.w (%[buf])+,%%d2 \n" | ||
1095 | "move.l %%d2,%%d3 \n" | ||
1096 | "lsr.l #8,%%d3 \n" | ||
1097 | "move.w %%d3,(%[ata]) \n" | ||
1098 | |||
1099 | ".w_end_u_w2: \n" | ||
1100 | "lsl.l #8,%%d2 \n" | ||
1101 | "move.b (%[buf])+,%%d2 \n" | ||
1102 | "move.w %%d2,(%[ata]) \n" | ||
1103 | "bra.b .w_exit \n" | ||
1104 | |||
1105 | /* 16-bit aligned */ | ||
1106 | ".w_aligned: \n" | ||
1107 | "btst.l #1,%%d0 \n" | ||
1108 | "beq.b .w_end_a_w1 \n" | ||
1109 | |||
1110 | "move.w (%[buf])+,(%[ata]) \n" /* copy initial word */ | ||
1111 | |||
1112 | ".w_end_a_w1: \n" | ||
1113 | "moveq.l #12,%%d0 \n" | ||
1114 | "add.l %[buf],%%d0 \n" | ||
1115 | "and.l #0xFFFFFFF0,%%d0\n" /* d0 == first line bound */ | ||
1116 | "cmp.l %[buf],%%d0 \n" /* any leading longwords? */ | ||
1117 | "bls.b .w_end_a_l1 \n" /* no: skip loop */ | ||
1118 | |||
1119 | ".w_loop_a_l1: \n" | ||
1120 | "move.l (%[buf])+,%%d1 \n" | ||
1121 | "swap %%d1 \n" | ||
1122 | "move.w %%d1,(%[ata]) \n" | ||
1123 | "swap %%d1 \n" | ||
1124 | "move.w %%d1,(%[ata]) \n" | ||
1125 | "cmp.l %[buf],%%d0 \n" /* run up to first line bound */ | ||
1126 | "bhi.b .w_loop_a_l1 \n" | ||
1127 | |||
1128 | ".w_end_a_l1: \n" | ||
1129 | "lea.l (-14,%[wcnt]),%[wcnt] \n" /* adjust end addr. to 16 bytes/pass */ | ||
1130 | |||
1131 | ".w_loop_a_line: \n" | ||
1132 | "movem.l (%[buf]),%%d0-%%d3 \n" | ||
1133 | "lea.l (16,%[buf]),%[buf] \n" | ||
1134 | "swap %%d0 \n" | ||
1135 | "move.w %%d0,(%[ata]) \n" | ||
1136 | "swap %%d0 \n" | ||
1137 | "move.w %%d0,(%[ata]) \n" | ||
1138 | "swap %%d1 \n" | ||
1139 | "move.w %%d1,(%[ata]) \n" | ||
1140 | "swap %%d1 \n" | ||
1141 | "move.w %%d1,(%[ata]) \n" | ||
1142 | "swap %%d2 \n" | ||
1143 | "move.w %%d2,(%[ata]) \n" | ||
1144 | "swap %%d2 \n" | ||
1145 | "move.w %%d2,(%[ata]) \n" | ||
1146 | "swap %%d3 \n" | ||
1147 | "move.w %%d3,(%[ata]) \n" | ||
1148 | "swap %%d3 \n" | ||
1149 | "move.w %%d3,(%[ata]) \n" | ||
1150 | "cmp.l %[buf],%[wcnt] \n" /* run up to last line bound */ | ||
1151 | "bhi.b .w_loop_a_line \n" | ||
1152 | |||
1153 | "lea.l (12,%[wcnt]),%[wcnt]\n" /* readjust for longword loop */ | ||
1154 | "cmp.l %[buf],%[wcnt] \n" /* any trailing longwords? */ | ||
1155 | "bls.b .w_end_a_l2 \n" /* no: skip loop */ | ||
1156 | |||
1157 | ".w_loop_a_l2: \n" | ||
1158 | "move.l (%[buf])+,%%d1 \n" | ||
1159 | "swap %%d1 \n" | ||
1160 | "move.w %%d1,(%[ata]) \n" | ||
1161 | "swap %%d1 \n" | ||
1162 | "move.w %%d1,(%[ata]) \n" | ||
1163 | "cmp.l %[buf],%[wcnt] \n" /* run up to first line bound */ | ||
1164 | "bhi.b .w_loop_a_l2 \n" | ||
1165 | |||
1166 | ".w_end_a_l2: \n" | ||
1167 | "addq.l #2,%[wcnt] \n" /* back to final end address */ | ||
1168 | "cmp.l %[buf],%[wcnt] \n" /* one word left? */ | ||
1169 | "bls.b .w_end_a_w2 \n" | ||
1170 | |||
1171 | "move.w (%[buf])+,(%[ata]) \n" /* copy final word */ | ||
1172 | |||
1173 | ".w_end_a_w2: \n" | ||
1174 | |||
1175 | ".w_exit: \n" | ||
1176 | : /* outputs */ | ||
1177 | : /* inputs */ | ||
1178 | [buf] "a"(buf), | ||
1179 | [wcnt]"a"(wordcount), | ||
1180 | [ata] "a"(&ATA_DATA) | ||
1181 | : /*trashed */ | ||
1182 | "d0", "d1", "d2", "d3", "d4", "d5", "d6" | ||
1183 | ); | ||
1184 | #else | ||
1185 | /* SH1 optimized assembler version */ | ||
985 | /* this assumes wordcount to be a multiple of 2 */ | 1186 | /* this assumes wordcount to be a multiple of 2 */ |
986 | 1187 | ||
987 | /* writing is not unrolled as much as reading, for several reasons: | 1188 | /* writing is not unrolled as much as reading, for several reasons: |
@@ -1065,6 +1266,7 @@ static void copy_write_sectors(const unsigned char* buf, int wordcount) | |||
1065 | "r0","r1","r2","r3","r6" | 1266 | "r0","r1","r2","r3","r6" |
1066 | ); | 1267 | ); |
1067 | #endif | 1268 | #endif |
1269 | #endif | ||
1068 | } | 1270 | } |
1069 | 1271 | ||
1070 | int ata_write_sectors(IF_MV2(int drive,) | 1272 | int ata_write_sectors(IF_MV2(int drive,) |