diff options
Diffstat (limited to 'firmware')
-rw-r--r-- | firmware/SOURCES | 9 | ||||
-rw-r--r-- | firmware/drivers/ata.c | 770 | ||||
-rw-r--r-- | firmware/target/arm/ata-target.h | 2 | ||||
-rw-r--r-- | firmware/target/arm/gigabeat/meg-fx/ata-target.h | 2 | ||||
-rwxr-xr-x | firmware/target/coldfire/ata-as-coldfire.S | 459 | ||||
-rwxr-xr-x | firmware/target/coldfire/ata-target.h | 6 | ||||
-rwxr-xr-x | firmware/target/sh/archos/ata-archos.c | 76 | ||||
-rwxr-xr-x | firmware/target/sh/archos/ata-as-archos.S | 231 | ||||
-rwxr-xr-x | firmware/target/sh/archos/ata-target.h | 80 |
9 files changed, 890 insertions, 745 deletions
diff --git a/firmware/SOURCES b/firmware/SOURCES index beb51bef42..77c257879a 100644 --- a/firmware/SOURCES +++ b/firmware/SOURCES | |||
@@ -284,6 +284,8 @@ drivers/i2c.c | |||
284 | 284 | ||
285 | #ifdef ARCHOS_PLAYER | 285 | #ifdef ARCHOS_PLAYER |
286 | #ifndef SIMULATOR | 286 | #ifndef SIMULATOR |
287 | target/sh/archos/ata-archos.c | ||
288 | target/sh/archos/ata-as-archos.S | ||
287 | target/sh/archos/player/button-player.c | 289 | target/sh/archos/player/button-player.c |
288 | target/sh/archos/player/lcd-as-player.S | 290 | target/sh/archos/player/lcd-as-player.S |
289 | #endif /* SIMULATOR */ | 291 | #endif /* SIMULATOR */ |
@@ -291,6 +293,8 @@ target/sh/archos/player/lcd-as-player.S | |||
291 | 293 | ||
292 | #ifdef ARCHOS_RECORDER | 294 | #ifdef ARCHOS_RECORDER |
293 | #ifndef SIMULATOR | 295 | #ifndef SIMULATOR |
296 | target/sh/archos/ata-archos.c | ||
297 | target/sh/archos/ata-as-archos.S | ||
294 | target/sh/archos/lcd-archos-bitmap.c | 298 | target/sh/archos/lcd-archos-bitmap.c |
295 | target/sh/archos/lcd-as-archos-bitmap.S | 299 | target/sh/archos/lcd-as-archos-bitmap.S |
296 | target/sh/archos/recorder/button-recorder.c | 300 | target/sh/archos/recorder/button-recorder.c |
@@ -299,6 +303,8 @@ target/sh/archos/recorder/button-recorder.c | |||
299 | 303 | ||
300 | #if defined(ARCHOS_FMRECORDER) || defined(ARCHOS_RECORDERV2) | 304 | #if defined(ARCHOS_FMRECORDER) || defined(ARCHOS_RECORDERV2) |
301 | #ifndef SIMULATOR | 305 | #ifndef SIMULATOR |
306 | target/sh/archos/ata-archos.c | ||
307 | target/sh/archos/ata-as-archos.S | ||
302 | target/sh/archos/lcd-archos-bitmap.c | 308 | target/sh/archos/lcd-archos-bitmap.c |
303 | target/sh/archos/lcd-as-archos-bitmap.S | 309 | target/sh/archos/lcd-as-archos-bitmap.S |
304 | target/sh/archos/fm_v2/button-fm_v2.c | 310 | target/sh/archos/fm_v2/button-fm_v2.c |
@@ -328,6 +334,7 @@ target/arm/sandisk/sansa-e200/power-e200.c | |||
328 | #ifdef IAUDIO_X5 | 334 | #ifdef IAUDIO_X5 |
329 | target/coldfire/iaudio/x5/power-x5.c | 335 | target/coldfire/iaudio/x5/power-x5.c |
330 | #ifndef SIMULATOR | 336 | #ifndef SIMULATOR |
337 | target/coldfire/ata-as-coldfire.S | ||
331 | target/coldfire/pcf50606-coldfire.c | 338 | target/coldfire/pcf50606-coldfire.c |
332 | target/coldfire/iaudio/x5/adc-x5.c | 339 | target/coldfire/iaudio/x5/adc-x5.c |
333 | target/coldfire/iaudio/x5/ata-x5.c | 340 | target/coldfire/iaudio/x5/ata-x5.c |
@@ -356,6 +363,7 @@ common/sscanf.c | |||
356 | #ifdef IRIVER_H300_SERIES | 363 | #ifdef IRIVER_H300_SERIES |
357 | target/coldfire/iriver/h300/power-h300.c | 364 | target/coldfire/iriver/h300/power-h300.c |
358 | #ifndef SIMULATOR | 365 | #ifndef SIMULATOR |
366 | target/coldfire/ata-as-coldfire.S | ||
359 | target/coldfire/pcf50606-coldfire.c | 367 | target/coldfire/pcf50606-coldfire.c |
360 | target/coldfire/iriver/ata-iriver.c | 368 | target/coldfire/iriver/ata-iriver.c |
361 | target/coldfire/iriver/system-iriver.c | 369 | target/coldfire/iriver/system-iriver.c |
@@ -375,6 +383,7 @@ target/coldfire/iriver/audio-iriver.c | |||
375 | #ifdef IRIVER_H100_SERIES | 383 | #ifdef IRIVER_H100_SERIES |
376 | target/coldfire/iriver/h100/power-h100.c | 384 | target/coldfire/iriver/h100/power-h100.c |
377 | #ifndef SIMULATOR | 385 | #ifndef SIMULATOR |
386 | target/coldfire/ata-as-coldfire.S | ||
378 | target/coldfire/iriver/ata-iriver.c | 387 | target/coldfire/iriver/ata-iriver.c |
379 | target/coldfire/iriver/system-iriver.c | 388 | target/coldfire/iriver/system-iriver.c |
380 | target/coldfire/iriver/h100/adc-h100.c | 389 | target/coldfire/iriver/h100/adc-h100.c |
diff --git a/firmware/drivers/ata.c b/firmware/drivers/ata.c index d0cca2e148..72194db8da 100644 --- a/firmware/drivers/ata.c +++ b/firmware/drivers/ata.c | |||
@@ -30,66 +30,10 @@ | |||
30 | #include "string.h" | 30 | #include "string.h" |
31 | #include "hwcompat.h" | 31 | #include "hwcompat.h" |
32 | #include "ata_idle_notify.h" | 32 | #include "ata_idle_notify.h" |
33 | #ifdef TARGET_TREE | ||
34 | #include "ata-target.h" | 33 | #include "ata-target.h" |
35 | #endif | ||
36 | 34 | ||
37 | #define SECTOR_SIZE (512) | 35 | #define SECTOR_SIZE (512) |
38 | 36 | ||
39 | #if CONFIG_CPU == SH7034 | ||
40 | |||
41 | /* asm optimised read & write loops */ | ||
42 | |||
43 | #define NOINLINE_ATTR __attribute__((noinline)) /* don't inline the loops */ | ||
44 | |||
45 | #define SWAP_WORDS | ||
46 | |||
47 | #define ATA_IOBASE 0x06100100 | ||
48 | #define ATA_DATA (*((volatile unsigned short*)0x06104100)) | ||
49 | #define ATA_CONTROL1 ((volatile unsigned char*)0x06200206) | ||
50 | #define ATA_CONTROL2 ((volatile unsigned char*)0x06200306) | ||
51 | #define ATA_CONTROL (*ata_control) | ||
52 | |||
53 | #define ATA_ERROR (*((volatile unsigned char*)ATA_IOBASE + 1)) | ||
54 | #define ATA_NSECTOR (*((volatile unsigned char*)ATA_IOBASE + 2)) | ||
55 | #define ATA_SECTOR (*((volatile unsigned char*)ATA_IOBASE + 3)) | ||
56 | #define ATA_LCYL (*((volatile unsigned char*)ATA_IOBASE + 4)) | ||
57 | #define ATA_HCYL (*((volatile unsigned char*)ATA_IOBASE + 5)) | ||
58 | #define ATA_SELECT (*((volatile unsigned char*)ATA_IOBASE + 6)) | ||
59 | #define ATA_COMMAND (*((volatile unsigned char*)ATA_IOBASE + 7)) | ||
60 | |||
61 | #define STATUS_BSY 0x80 | ||
62 | #define STATUS_RDY 0x40 | ||
63 | #define STATUS_DF 0x20 | ||
64 | #define STATUS_DRQ 0x08 | ||
65 | #define STATUS_ERR 0x01 | ||
66 | |||
67 | #define ERROR_ABRT 0x04 | ||
68 | |||
69 | #define WRITE_PATTERN1 0xa5 | ||
70 | #define WRITE_PATTERN2 0x5a | ||
71 | #define WRITE_PATTERN3 0xaa | ||
72 | #define WRITE_PATTERN4 0x55 | ||
73 | |||
74 | #define READ_PATTERN1 0xa5 | ||
75 | #define READ_PATTERN2 0x5a | ||
76 | #define READ_PATTERN3 0xaa | ||
77 | #define READ_PATTERN4 0x55 | ||
78 | |||
79 | #define READ_PATTERN1_MASK 0xff | ||
80 | #define READ_PATTERN2_MASK 0xff | ||
81 | #define READ_PATTERN3_MASK 0xff | ||
82 | #define READ_PATTERN4_MASK 0xff | ||
83 | |||
84 | #define SET_REG(reg,val) reg = (val) | ||
85 | #define SET_16BITREG(reg,val) reg = (val) | ||
86 | |||
87 | #endif | ||
88 | |||
89 | #ifndef NOINLINE_ATTR | ||
90 | #define NOINLINE_ATTR | ||
91 | #endif | ||
92 | |||
93 | #define ATA_FEATURE ATA_ERROR | 37 | #define ATA_FEATURE ATA_ERROR |
94 | 38 | ||
95 | #define ATA_STATUS ATA_COMMAND | 39 | #define ATA_STATUS ATA_COMMAND |
@@ -118,11 +62,7 @@ | |||
118 | #define READ_TIMEOUT 5*HZ | 62 | #define READ_TIMEOUT 5*HZ |
119 | 63 | ||
120 | static struct mutex ata_mtx; | 64 | static struct mutex ata_mtx; |
121 | char ata_device; /* device 0 (master) or 1 (slave) */ | 65 | int ata_device; /* device 0 (master) or 1 (slave) */ |
122 | int ata_io_address; /* 0x300 or 0x200, only valid on recorder */ | ||
123 | #if CONFIG_CPU == SH7034 | ||
124 | static volatile unsigned char* ata_control; | ||
125 | #endif | ||
126 | 66 | ||
127 | int ata_spinup_time = 0; | 67 | int ata_spinup_time = 0; |
128 | #if CONFIG_LED == LED_REAL | 68 | #if CONFIG_LED == LED_REAL |
@@ -131,8 +71,8 @@ static bool ata_led_on = false; | |||
131 | #endif | 71 | #endif |
132 | static bool spinup = false; | 72 | static bool spinup = false; |
133 | static bool sleeping = true; | 73 | static bool sleeping = true; |
134 | static long sleep_timeout = 5*HZ; | ||
135 | static bool poweroff = false; | 74 | static bool poweroff = false; |
75 | static long sleep_timeout = 5*HZ; | ||
136 | #ifdef HAVE_ATA_POWER_OFF | 76 | #ifdef HAVE_ATA_POWER_OFF |
137 | static int poweroff_timeout = 2*HZ; | 77 | static int poweroff_timeout = 2*HZ; |
138 | #endif | 78 | #endif |
@@ -206,25 +146,30 @@ static int wait_for_end_of_transfer(void) | |||
206 | return (ATA_ALT_STATUS & (STATUS_RDY|STATUS_DRQ)) == STATUS_RDY; | 146 | return (ATA_ALT_STATUS & (STATUS_RDY|STATUS_DRQ)) == STATUS_RDY; |
207 | } | 147 | } |
208 | 148 | ||
209 | /* Optimization: don't do 256 calls to ddma_transfer; fuse with it | 149 | #if CONFIG_LED == LED_REAL |
210 | * as in the Archos firmware. | 150 | /* Conditionally block LED access for the ATA driver, so the LED can be |
211 | * It actually possible to do a single dma transfer to copy a whole sector between ATA | 151 | * (mis)used for other purposes */ |
212 | * controller & cpu internal memory. | 152 | static void ata_led(bool on) |
213 | */ | 153 | { |
214 | /* the tight loop of ata_read_sectors(), to avoid the whole in IRAM */ | 154 | ata_led_on = on; |
215 | static void copy_read_sectors(unsigned char* buf, int wordcount) | 155 | if (ata_led_enabled) |
216 | ICODE_ATTR NOINLINE_ATTR; | 156 | led(ata_led_on); |
157 | } | ||
158 | #else | ||
159 | #define ata_led(on) led(on) | ||
160 | #endif | ||
161 | |||
162 | #ifndef ATA_OPTIMIZED_READING | ||
163 | static void copy_read_sectors(unsigned char* buf, int wordcount) ICODE_ATTR; | ||
217 | static void copy_read_sectors(unsigned char* buf, int wordcount) | 164 | static void copy_read_sectors(unsigned char* buf, int wordcount) |
218 | { | 165 | { |
219 | #ifdef PREFER_C_READING | ||
220 | unsigned short tmp = 0; | 166 | unsigned short tmp = 0; |
221 | 167 | ||
222 | if ( (unsigned long)buf & 1) | 168 | if ( (unsigned long)buf & 1) |
223 | { /* not 16-bit aligned, copy byte by byte */ | 169 | { /* not 16-bit aligned, copy byte by byte */ |
224 | unsigned char* bufend = buf + wordcount*2; | 170 | unsigned char* bufend = buf + wordcount*2; |
225 | do | 171 | do |
226 | { /* loop compiles to 9 assembler instructions */ | 172 | { |
227 | /* takes 14 clock cycles (2 pipeline stalls, 1 wait) */ | ||
228 | tmp = ATA_DATA; | 173 | tmp = ATA_DATA; |
229 | #if defined(SWAP_WORDS) || defined(ROCKBOX_LITTLE_ENDIAN) | 174 | #if defined(SWAP_WORDS) || defined(ROCKBOX_LITTLE_ENDIAN) |
230 | *buf++ = tmp & 0xff; /* I assume big endian */ | 175 | *buf++ = tmp & 0xff; /* I assume big endian */ |
@@ -240,8 +185,7 @@ static void copy_read_sectors(unsigned char* buf, int wordcount) | |||
240 | unsigned short* wbuf = (unsigned short*)buf; | 185 | unsigned short* wbuf = (unsigned short*)buf; |
241 | unsigned short* wbufend = wbuf + wordcount; | 186 | unsigned short* wbufend = wbuf + wordcount; |
242 | do | 187 | do |
243 | { /* loop compiles to 7 assembler instructions */ | 188 | { |
244 | /* takes 12 clock cycles (2 pipeline stalls, 1 wait) */ | ||
245 | #ifdef SWAP_WORDS | 189 | #ifdef SWAP_WORDS |
246 | *wbuf = swap16(ATA_DATA); | 190 | *wbuf = swap16(ATA_DATA); |
247 | #else | 191 | #else |
@@ -249,304 +193,8 @@ static void copy_read_sectors(unsigned char* buf, int wordcount) | |||
249 | #endif | 193 | #endif |
250 | } while (++wbuf < wbufend); /* tail loop is faster */ | 194 | } while (++wbuf < wbufend); /* tail loop is faster */ |
251 | } | 195 | } |
252 | #else /* !PREFER_C_READING */ | ||
253 | #if defined(CPU_COLDFIRE) | ||
254 | unsigned char* bufend = buf + 2 * wordcount; | ||
255 | /* coldfire asm reading, utilising line bursts */ | ||
256 | /* this assumes there is at least one full line to copy */ | ||
257 | asm volatile ( | ||
258 | "move.l %[buf],%%d0 \n" | ||
259 | "btst.l #0,%%d0 \n" /* 16-bit aligned? */ | ||
260 | "jeq .aligned \n" /* yes, do word copy */ | ||
261 | |||
262 | /* not 16-bit aligned */ | ||
263 | "subq.l #1,%[end] \n" /* last byte is done unconditionally */ | ||
264 | "moveq.l #24,%%d1 \n" /* preload shift count */ | ||
265 | |||
266 | "move.w (%[ata]),%%d2 \n" /* load initial word */ | ||
267 | "move.l %%d2,%%d3 \n" | ||
268 | "lsr.l #8,%%d3 \n" | ||
269 | "move.b %%d3,(%[buf])+ \n" /* write high byte of it, aligns dest addr */ | ||
270 | |||
271 | "btst.l #1,%%d0 \n" /* longword aligned? */ | ||
272 | "beq.b .end_u_w1 \n" /* yes, skip leading word handling */ | ||
273 | |||
274 | "swap %%d2 \n" /* move initial word up */ | ||
275 | "move.w (%[ata]),%%d2 \n" /* combine with second word */ | ||
276 | "move.l %%d2,%%d3 \n" | ||
277 | "lsr.l #8,%%d3 \n" | ||
278 | "move.w %%d3,(%[buf])+ \n" /* write bytes 2 and 3 as word */ | ||
279 | |||
280 | ".end_u_w1: \n" | ||
281 | "moveq.l #12,%%d0 \n" | ||
282 | "add.l %[buf],%%d0 \n" | ||
283 | "and.l #0xFFFFFFF0,%%d0\n" /* d0 == first line bound */ | ||
284 | "cmp.l %[buf],%%d0 \n" /* any leading longwords? */ | ||
285 | "bls.b .end_u_l1 \n" /* no: skip loop */ | ||
286 | |||
287 | ".loop_u_l1: \n" | ||
288 | "move.w (%[ata]),%%d3 \n" /* load first word */ | ||
289 | "swap %%d3 \n" /* move to upper 16 bit */ | ||
290 | "move.w (%[ata]),%%d3 \n" /* load second word */ | ||
291 | "move.l %%d3,%%d4 \n" | ||
292 | "lsl.l %%d1,%%d2 \n" | ||
293 | "lsr.l #8,%%d3 \n" | ||
294 | "or.l %%d3,%%d2 \n" /* combine old low byte with new top 3 bytes */ | ||
295 | "move.l %%d2,(%[buf])+ \n" /* store as long */ | ||
296 | "move.l %%d4,%%d2 \n" | ||
297 | "cmp.l %[buf],%%d0 \n" /* run up to first line bound */ | ||
298 | "bhi.b .loop_u_l1 \n" | ||
299 | |||
300 | ".end_u_l1: \n" | ||
301 | "lea.l (-14,%[end]),%[end] \n" /* adjust end addr. to 16 bytes/pass */ | ||
302 | |||
303 | ".loop_u_line: \n" | ||
304 | "move.w (%[ata]),%%d3 \n" /* load 1st word */ | ||
305 | "swap %%d3 \n" /* move to upper 16 bit */ | ||
306 | "move.w (%[ata]),%%d3 \n" /* load 2nd word */ | ||
307 | "move.l %%d3,%%d0 \n" | ||
308 | "lsl.l %%d1,%%d2 \n" | ||
309 | "lsr.l #8,%%d0 \n" | ||
310 | "or.l %%d0,%%d2 \n" /* combine old low byte with new top 3 bytes */ | ||
311 | "move.w (%[ata]),%%d4 \n" /* load 3rd word */ | ||
312 | "swap %%d4 \n" /* move to upper 16 bit */ | ||
313 | "move.w (%[ata]),%%d4 \n" /* load 4th word */ | ||
314 | "move.l %%d4,%%d0 \n" | ||
315 | "lsl.l %%d1,%%d3 \n" | ||
316 | "lsr.l #8,%%d0 \n" | ||
317 | "or.l %%d0,%%d3 \n" /* combine old low byte with new top 3 bytes */ | ||
318 | "move.w (%[ata]),%%d5 \n" /* load 5th word */ | ||
319 | "swap %%d5 \n" /* move to upper 16 bit */ | ||
320 | "move.w (%[ata]),%%d5 \n" /* load 6th word */ | ||
321 | "move.l %%d5,%%d0 \n" | ||
322 | "lsl.l %%d1,%%d4 \n" | ||
323 | "lsr.l #8,%%d0 \n" | ||
324 | "or.l %%d0,%%d4 \n" /* combine old low byte with new top 3 bytes */ | ||
325 | "move.w (%[ata]),%%d6 \n" /* load 7th word */ | ||
326 | "swap %%d6 \n" /* move to upper 16 bit */ | ||
327 | "move.w (%[ata]),%%d6 \n" /* load 8th word */ | ||
328 | "move.l %%d6,%%d0 \n" | ||
329 | "lsl.l %%d1,%%d5 \n" | ||
330 | "lsr.l #8,%%d0 \n" | ||
331 | "or.l %%d0,%%d5 \n" /* combine old low byte with new top 3 bytes */ | ||
332 | "movem.l %%d2-%%d5,(%[buf]) \n" /* store line */ | ||
333 | "lea.l (16,%[buf]),%[buf] \n" | ||
334 | "move.l %%d6,%%d2 \n" | ||
335 | "cmp.l %[buf],%[end] \n" /* run up to last line bound */ | ||
336 | "bhi.b .loop_u_line \n" | ||
337 | |||
338 | "lea.l (12,%[end]),%[end] \n" /* readjust for longword loop */ | ||
339 | "cmp.l %[buf],%[end] \n" /* any trailing longwords? */ | ||
340 | "bls.b .end_u_l2 \n" /* no: skip loop */ | ||
341 | |||
342 | ".loop_u_l2: \n" | ||
343 | "move.w (%[ata]),%%d3 \n" /* load first word */ | ||
344 | "swap %%d3 \n" /* move to upper 16 bit */ | ||
345 | "move.w (%[ata]),%%d3 \n" /* load second word */ | ||
346 | "move.l %%d3,%%d4 \n" | ||
347 | "lsl.l %%d1,%%d2 \n" | ||
348 | "lsr.l #8,%%d3 \n" | ||
349 | "or.l %%d3,%%d2 \n" /* combine old low byte with new top 3 bytes */ | ||
350 | "move.l %%d2,(%[buf])+ \n" /* store as long */ | ||
351 | "move.l %%d4,%%d2 \n" | ||
352 | "cmp.l %[buf],%[end] \n" /* run up to last long bound */ | ||
353 | "bhi.b .loop_u_l2 \n" | ||
354 | |||
355 | ".end_u_l2: \n" | ||
356 | "addq.l #2,%[end] \n" /* back to final end address */ | ||
357 | "cmp.l %[buf],%[end] \n" /* one word left? */ | ||
358 | "bls.b .end_u_w2 \n" | ||
359 | |||
360 | "swap %%d2 \n" /* move old word to upper 16 bits */ | ||
361 | "move.w (%[ata]),%%d2 \n" /* load final word */ | ||
362 | "move.l %%d2,%%d3 \n" | ||
363 | "lsr.l #8,%%d3 \n" | ||
364 | "move.w %%d3,(%[buf])+ \n" /* write bytes 2 and 3 as word */ | ||
365 | |||
366 | ".end_u_w2: \n" | ||
367 | "move.b %%d2,(%[buf])+ \n" /* store final byte */ | ||
368 | "bra.b .exit \n" | ||
369 | |||
370 | /* 16-bit aligned */ | ||
371 | ".aligned: \n" | ||
372 | "btst.l #1,%%d0 \n" /* longword aligned? */ | ||
373 | "beq.b .end_a_w1 \n" /* yes, skip leading word handling */ | ||
374 | |||
375 | "move.w (%[ata]),(%[buf])+ \n" /* copy initial word */ | ||
376 | |||
377 | ".end_a_w1: \n" | ||
378 | "moveq.l #12,%%d0 \n" | ||
379 | "add.l %[buf],%%d0 \n" | ||
380 | "and.l #0xFFFFFFF0,%%d0\n" /* d0 == first line bound */ | ||
381 | "cmp.l %[buf],%%d0 \n" /* any leading longwords? */ | ||
382 | "bls.b .end_a_l1 \n" /* no: skip loop */ | ||
383 | |||
384 | ".loop_a_l1: \n" | ||
385 | "move.w (%[ata]),%%d1 \n" /* load first word */ | ||
386 | "swap %%d1 \n" /* move it to upper 16 bits */ | ||
387 | "move.w (%[ata]),%%d1 \n" /* load second word */ | ||
388 | "move.l %%d1,(%[buf])+ \n" /* store as long */ | ||
389 | "cmp.l %[buf],%%d0 \n" /* run up to first line bound */ | ||
390 | "bhi.b .loop_a_l1 \n" | ||
391 | |||
392 | ".end_a_l1: \n" | ||
393 | "lea.l (-14,%[end]),%[end] \n" /* adjust end addr. to 16 bytes/pass */ | ||
394 | |||
395 | ".loop_a_line: \n" | ||
396 | "move.w (%[ata]),%%d0 \n" /* load 1st word */ | ||
397 | "swap %%d0 \n" /* move it to upper 16 bits */ | ||
398 | "move.w (%[ata]),%%d0 \n" /* load 2nd word */ | ||
399 | "move.w (%[ata]),%%d1 \n" /* load 3rd word */ | ||
400 | "swap %%d1 \n" /* move it to upper 16 bits */ | ||
401 | "move.w (%[ata]),%%d1 \n" /* load 4th word */ | ||
402 | "move.w (%[ata]),%%d2 \n" /* load 5th word */ | ||
403 | "swap %%d2 \n" /* move it to upper 16 bits */ | ||
404 | "move.w (%[ata]),%%d2 \n" /* load 6th word */ | ||
405 | "move.w (%[ata]),%%d3 \n" /* load 7th word */ | ||
406 | "swap %%d3 \n" /* move it to upper 16 bits */ | ||
407 | "move.w (%[ata]),%%d3 \n" /* load 8th word */ | ||
408 | "movem.l %%d0-%%d3,(%[buf]) \n" /* store line */ | ||
409 | "lea.l (16,%[buf]),%[buf] \n" | ||
410 | "cmp.l %[buf],%[end] \n" /* run up to last line bound */ | ||
411 | "bhi.b .loop_a_line \n" | ||
412 | |||
413 | "lea.l (12,%[end]),%[end] \n" /* readjust for longword loop */ | ||
414 | "cmp.l %[buf],%[end] \n" /* any trailing longwords? */ | ||
415 | "bls.b .end_a_l2 \n" /* no: skip loop */ | ||
416 | |||
417 | ".loop_a_l2: \n" | ||
418 | "move.w (%[ata]),%%d1 \n" /* read first word */ | ||
419 | "swap %%d1 \n" /* move it to upper 16 bits */ | ||
420 | "move.w (%[ata]),%%d1 \n" /* read second word */ | ||
421 | "move.l %%d1,(%[buf])+ \n" /* store as long */ | ||
422 | "cmp.l %[buf],%[end] \n" /* run up to last long bound */ | ||
423 | "bhi.b .loop_a_l2 \n" | ||
424 | |||
425 | ".end_a_l2: \n" | ||
426 | "addq.l #2,%[end] \n" /* back to final end address */ | ||
427 | "cmp.l %[buf],%[end] \n" /* one word left? */ | ||
428 | "bls.b .end_a_w2 \n" | ||
429 | |||
430 | "move.w (%[ata]),(%[buf])+ \n" /* copy final word */ | ||
431 | |||
432 | ".end_a_w2: \n" | ||
433 | |||
434 | ".exit: \n" | ||
435 | : /* outputs */ | ||
436 | [buf]"+a"(buf), | ||
437 | [end]"+a"(bufend) | ||
438 | : /* inputs */ | ||
439 | [ata]"a"(&ATA_DATA) | ||
440 | : /*trashed */ | ||
441 | "d0", "d1", "d2", "d3", "d4", "d5", "d6" | ||
442 | ); | ||
443 | #else | ||
444 | /* SH1 turbo-charged assembler reading */ | ||
445 | /* this assumes wordcount to be a multiple of 4 */ | ||
446 | asm volatile ( | ||
447 | "mov %[buf],r0 \n" | ||
448 | "tst #1,r0 \n" /* 16-bit aligned ? */ | ||
449 | "bt .aligned \n" /* yes, do word copy */ | ||
450 | |||
451 | /* not 16-bit aligned */ | ||
452 | "mov #-1,r3 \n" /* prepare a bit mask for high byte */ | ||
453 | "shll8 r3 \n" /* r3 = 0xFFFFFF00 */ | ||
454 | |||
455 | "mov.w @%[ata],r2 \n" /* read first word (1st round) */ | ||
456 | "mov.b r2,@%[buf] \n" /* store low byte of first word */ | ||
457 | "bra .start4_b \n" /* jump into loop after next instr. */ | ||
458 | "add #-5,%[buf] \n" /* adjust for dest. offsets; now even */ | ||
459 | |||
460 | ".align 2 \n" | ||
461 | ".loop4_b: \n" /* main loop: copy 4 words in a row */ | ||
462 | "mov.w @%[ata],r2 \n" /* read first word (2+ round) */ | ||
463 | "and r3,r1 \n" /* get high byte of fourth word (2+ round) */ | ||
464 | "extu.b r2,r0 \n" /* get low byte of first word (2+ round) */ | ||
465 | "or r1,r0 \n" /* combine with high byte of fourth word */ | ||
466 | "mov.w r0,@(4,%[buf]) \n" /* store at buf[4] */ | ||
467 | "nop \n" /* maintain alignment */ | ||
468 | ".start4_b: \n" | ||
469 | "mov.w @%[ata],r1 \n" /* read second word */ | ||
470 | "and r3,r2 \n" /* get high byte of first word */ | ||
471 | "extu.b r1,r0 \n" /* get low byte of second word */ | ||
472 | "or r2,r0 \n" /* combine with high byte of first word */ | ||
473 | "mov.w r0,@(6,%[buf]) \n" /* store at buf[6] */ | ||
474 | "add #8,%[buf] \n" /* buf += 8 */ | ||
475 | "mov.w @%[ata],r2 \n" /* read third word */ | ||
476 | "and r3,r1 \n" /* get high byte of second word */ | ||
477 | "extu.b r2,r0 \n" /* get low byte of third word */ | ||
478 | "or r1,r0 \n" /* combine with high byte of second word */ | ||
479 | "mov.w r0,@%[buf] \n" /* store at buf[0] */ | ||
480 | "cmp/hi %[buf],%[end] \n" /* check for end */ | ||
481 | "mov.w @%[ata],r1 \n" /* read fourth word */ | ||
482 | "and r3,r2 \n" /* get high byte of third word */ | ||
483 | "extu.b r1,r0 \n" /* get low byte of fourth word */ | ||
484 | "or r2,r0 \n" /* combine with high byte of third word */ | ||
485 | "mov.w r0,@(2,%[buf]) \n" /* store at buf[2] */ | ||
486 | "bt .loop4_b \n" | ||
487 | /* 24 instructions for 4 copies, takes 30 clock cycles (4 wait) */ | ||
488 | /* avg. 7.5 cycles per word - 86% faster */ | ||
489 | |||
490 | "swap.b r1,r0 \n" /* get high byte of last word */ | ||
491 | "bra .exit \n" | ||
492 | "mov.b r0,@(4,%[buf]) \n" /* and store it */ | ||
493 | |||
494 | /* 16-bit aligned, loop(read and store word) */ | ||
495 | ".aligned: \n" | ||
496 | "mov.w @%[ata],r2 \n" /* read first word (1st round) */ | ||
497 | "bra .start4_w \n" /* jump into loop after next instr. */ | ||
498 | "add #-6,%[buf] \n" /* adjust for destination offsets */ | ||
499 | |||
500 | ".align 2 \n" | ||
501 | ".loop4_w: \n" /* main loop: copy 4 words in a row */ | ||
502 | "mov.w @%[ata],r2 \n" /* read first word (2+ round) */ | ||
503 | "swap.b r1,r0 \n" /* swap fourth word (2+ round) */ | ||
504 | "mov.w r0,@(4,%[buf]) \n" /* store fourth word (2+ round) */ | ||
505 | "nop \n" /* maintain alignment */ | ||
506 | ".start4_w: \n" | ||
507 | "mov.w @%[ata],r1 \n" /* read second word */ | ||
508 | "swap.b r2,r0 \n" /* swap first word */ | ||
509 | "mov.w r0,@(6,%[buf]) \n" /* store first word in buf[6] */ | ||
510 | "add #8,%[buf] \n" /* buf += 8 */ | ||
511 | "mov.w @%[ata],r2 \n" /* read third word */ | ||
512 | "swap.b r1,r0 \n" /* swap second word */ | ||
513 | "mov.w r0,@%[buf] \n" /* store second word in buf[0] */ | ||
514 | "cmp/hi %[buf],%[end] \n" /* check for end */ | ||
515 | "mov.w @%[ata],r1 \n" /* read fourth word */ | ||
516 | "swap.b r2,r0 \n" /* swap third word */ | ||
517 | "mov.w r0,@(2,%[buf]) \n" /* store third word */ | ||
518 | "bt .loop4_w \n" | ||
519 | /* 16 instructions for 4 copies, takes 22 clock cycles (4 wait) */ | ||
520 | /* avg. 5.5 cycles per word - 118% faster */ | ||
521 | |||
522 | "swap.b r1,r0 \n" /* swap fourth word (last round) */ | ||
523 | "mov.w r0,@(4,%[buf]) \n" /* and store it */ | ||
524 | |||
525 | ".exit: \n" | ||
526 | : /* outputs */ | ||
527 | [buf]"+r"(buf) | ||
528 | : /* inputs */ | ||
529 | [end]"r"(buf + 2 * wordcount - 12), /* adjusted for offsets */ | ||
530 | [ata]"r"(&ATA_DATA) | ||
531 | : /*trashed */ | ||
532 | "r0","r1","r2","r3" | ||
533 | ); | ||
534 | #endif /* CPU */ | ||
535 | #endif /* !PREFER_C_READING */ | ||
536 | } | ||
537 | |||
538 | #if CONFIG_LED == LED_REAL | ||
539 | /* Conditionally block LED access for the ATA driver, so the LED can be | ||
540 | * (mis)used for other purposes */ | ||
541 | static void ata_led(bool on) { | ||
542 | ata_led_on = on; | ||
543 | if (ata_led_enabled) { | ||
544 | led(ata_led_on); | ||
545 | } | ||
546 | } | 196 | } |
547 | #else | 197 | #endif /* !ATA_OPTIMIZED_READING */ |
548 | #define ata_led(on) led(on) | ||
549 | #endif | ||
550 | 198 | ||
551 | int ata_read_sectors(IF_MV2(int drive,) | 199 | int ata_read_sectors(IF_MV2(int drive,) |
552 | unsigned long start, | 200 | unsigned long start, |
@@ -696,13 +344,10 @@ int ata_read_sectors(IF_MV2(int drive,) | |||
696 | return ret; | 344 | return ret; |
697 | } | 345 | } |
698 | 346 | ||
699 | /* the tight loop of ata_write_sectors(), to avoid the whole in IRAM */ | 347 | #ifndef ATA_OPTIMIZED_WRITING |
700 | static void copy_write_sectors(const unsigned char* buf, int wordcount) | 348 | static void copy_write_sectors(const unsigned char* buf, int wordcount) ICODE_ATTR; |
701 | ICODE_ATTR NOINLINE_ATTR; | ||
702 | static void copy_write_sectors(const unsigned char* buf, int wordcount) | 349 | static void copy_write_sectors(const unsigned char* buf, int wordcount) |
703 | { | 350 | { |
704 | #ifdef PREFER_C_WRITING | ||
705 | |||
706 | if ( (unsigned long)buf & 1) | 351 | if ( (unsigned long)buf & 1) |
707 | { /* not 16-bit aligned, copy byte by byte */ | 352 | { /* not 16-bit aligned, copy byte by byte */ |
708 | unsigned short tmp = 0; | 353 | unsigned short tmp = 0; |
@@ -710,11 +355,9 @@ static void copy_write_sectors(const unsigned char* buf, int wordcount) | |||
710 | do | 355 | do |
711 | { | 356 | { |
712 | #if defined(SWAP_WORDS) || defined(ROCKBOX_LITTLE_ENDIAN) | 357 | #if defined(SWAP_WORDS) || defined(ROCKBOX_LITTLE_ENDIAN) |
713 | /* SH1: loop compiles to 9 assembler instructions */ | ||
714 | /* takes 13 clock cycles (2 pipeline stalls) */ | ||
715 | tmp = (unsigned short) *buf++; | 358 | tmp = (unsigned short) *buf++; |
716 | tmp |= (unsigned short) *buf++ << 8; /* I assume big endian */ | 359 | tmp |= (unsigned short) *buf++ << 8; |
717 | SET_16BITREG(ATA_DATA, tmp); /* and don't use the SWAB16 macro */ | 360 | SET_16BITREG(ATA_DATA, tmp); |
718 | #else | 361 | #else |
719 | tmp = (unsigned short) *buf++ << 8; | 362 | tmp = (unsigned short) *buf++ << 8; |
720 | tmp |= (unsigned short) *buf++; | 363 | tmp |= (unsigned short) *buf++; |
@@ -729,298 +372,14 @@ static void copy_write_sectors(const unsigned char* buf, int wordcount) | |||
729 | do | 372 | do |
730 | { | 373 | { |
731 | #ifdef SWAP_WORDS | 374 | #ifdef SWAP_WORDS |
732 | /* loop compiles to 6 assembler instructions */ | ||
733 | /* takes 10 clock cycles (2 pipeline stalls) */ | ||
734 | SET_16BITREG(ATA_DATA, swap16(*wbuf)); | 375 | SET_16BITREG(ATA_DATA, swap16(*wbuf)); |
735 | #else | 376 | #else |
736 | SET_16BITREG(ATA_DATA, *wbuf); | 377 | SET_16BITREG(ATA_DATA, *wbuf); |
737 | #endif | 378 | #endif |
738 | } while (++wbuf < wbufend); /* tail loop is faster */ | 379 | } while (++wbuf < wbufend); /* tail loop is faster */ |
739 | } | 380 | } |
740 | #else /* !PREFER_C_WRITING */ | ||
741 | #ifdef CPU_COLDFIRE | ||
742 | const unsigned char* bufend = buf + 2 * wordcount; | ||
743 | /* coldfire asm writing, utilising line bursts */ | ||
744 | asm volatile ( | ||
745 | "move.l %[buf],%%d0 \n" | ||
746 | "btst.l #0,%%d0 \n" /* 16-bit aligned? */ | ||
747 | "jeq .w_aligned \n" /* yes, do word copy */ | ||
748 | |||
749 | /* not 16-bit aligned */ | ||
750 | "subq.l #1,%[end] \n" /* last byte is done unconditionally */ | ||
751 | "moveq.l #24,%%d1 \n" /* preload shift count */ | ||
752 | |||
753 | "move.b (%[buf])+,%%d2 \n" | ||
754 | |||
755 | "btst.l #1,%%d0 \n" /* longword aligned? */ | ||
756 | "beq.b .w_end_u_w1 \n" /* yes, skip leading word handling */ | ||
757 | |||
758 | "swap %%d2 \n" | ||
759 | "move.w (%[buf])+,%%d2 \n" | ||
760 | "move.l %%d2,%%d3 \n" | ||
761 | "lsr.l #8,%%d3 \n" | ||
762 | "move.w %%d3,(%[ata]) \n" | ||
763 | |||
764 | ".w_end_u_w1: \n" | ||
765 | "moveq.l #12,%%d0 \n" | ||
766 | "add.l %[buf],%%d0 \n" | ||
767 | "and.l #0xFFFFFFF0,%%d0\n" /* d0 == first line bound */ | ||
768 | "cmp.l %[buf],%%d0 \n" /* any leading longwords? */ | ||
769 | "bls.b .w_end_u_l1 \n" /* no: skip loop */ | ||
770 | |||
771 | ".w_loop_u_l1: \n" | ||
772 | "move.l (%[buf])+,%%d3 \n" | ||
773 | "move.l %%d3,%%d4 \n" | ||
774 | "lsl.l %%d1,%%d2 \n" | ||
775 | "lsr.l #8,%%d3 \n" | ||
776 | "or.l %%d3,%%d2 \n" | ||
777 | "swap %%d2 \n" | ||
778 | "move.w %%d2,(%[ata]) \n" | ||
779 | "swap %%d2 \n" | ||
780 | "move.w %%d2,(%[ata]) \n" | ||
781 | "move.l %%d4,%%d2 \n" | ||
782 | "cmp.l %[buf],%%d0 \n" /* run up to first line bound */ | ||
783 | "bhi.b .w_loop_u_l1 \n" | ||
784 | |||
785 | ".w_end_u_l1: \n" | ||
786 | "lea.l (-14,%[end]),%[end] \n" /* adjust end addr. to 16 bytes/pass */ | ||
787 | |||
788 | ".w_loop_u_line: \n" | ||
789 | "movem.l (%[buf]),%%d3-%%d6 \n" | ||
790 | "lea.l (16,%[buf]),%[buf] \n" | ||
791 | "move.l %%d3,%%d0 \n" | ||
792 | "lsl.l %%d1,%%d2 \n" | ||
793 | "lsr.l #8,%%d0 \n" | ||
794 | "or.l %%d0,%%d2 \n" | ||
795 | "swap %%d2 \n" | ||
796 | "move.w %%d2,(%[ata]) \n" | ||
797 | "swap %%d2 \n" | ||
798 | "move.w %%d2,(%[ata]) \n" | ||
799 | "move.l %%d4,%%d0 \n" | ||
800 | "lsl.l %%d1,%%d3 \n" | ||
801 | "lsr.l #8,%%d0 \n" | ||
802 | "or.l %%d0,%%d3 \n" | ||
803 | "swap %%d3 \n" | ||
804 | "move.w %%d3,(%[ata]) \n" | ||
805 | "swap %%d3 \n" | ||
806 | "move.w %%d3,(%[ata]) \n" | ||
807 | "move.l %%d5,%%d0 \n" | ||
808 | "lsl.l %%d1,%%d4 \n" | ||
809 | "lsr.l #8,%%d0 \n" | ||
810 | "or.l %%d0,%%d4 \n" | ||
811 | "swap %%d4 \n" | ||
812 | "move.w %%d4,(%[ata]) \n" | ||
813 | "swap %%d4 \n" | ||
814 | "move.w %%d4,(%[ata]) \n" | ||
815 | "move.l %%d6,%%d0 \n" | ||
816 | "lsl.l %%d1,%%d5 \n" | ||
817 | "lsr.l #8,%%d0 \n" | ||
818 | "or.l %%d0,%%d5 \n" | ||
819 | "swap %%d5 \n" | ||
820 | "move.w %%d5,(%[ata]) \n" | ||
821 | "swap %%d5 \n" | ||
822 | "move.w %%d5,(%[ata]) \n" | ||
823 | "move.l %%d6,%%d2 \n" | ||
824 | "cmp.l %[buf],%[end] \n" /* run up to last line bound */ | ||
825 | "bhi.b .w_loop_u_line \n" | ||
826 | |||
827 | "lea.l (12,%[end]),%[end] \n" /* readjust for longword loop */ | ||
828 | "cmp.l %[buf],%[end] \n" /* any trailing longwords? */ | ||
829 | "bls.b .w_end_u_l2 \n" /* no: skip loop */ | ||
830 | |||
831 | ".w_loop_u_l2: \n" | ||
832 | "move.l (%[buf])+,%%d3 \n" | ||
833 | "move.l %%d3,%%d4 \n" | ||
834 | "lsl.l %%d1,%%d2 \n" | ||
835 | "lsr.l #8,%%d3 \n" | ||
836 | "or.l %%d3,%%d2 \n" | ||
837 | "swap %%d2 \n" | ||
838 | "move.w %%d2,(%[ata]) \n" | ||
839 | "swap %%d2 \n" | ||
840 | "move.w %%d2,(%[ata]) \n" | ||
841 | "move.l %%d4,%%d2 \n" | ||
842 | "cmp.l %[buf],%[end] \n" /* run up to first line bound */ | ||
843 | "bhi.b .w_loop_u_l2 \n" | ||
844 | |||
845 | ".w_end_u_l2: \n" | ||
846 | "addq.l #2,%[end] \n" /* back to final end address */ | ||
847 | "cmp.l %[buf],%[end] \n" /* one word left? */ | ||
848 | "bls.b .w_end_u_w2 \n" | ||
849 | |||
850 | "swap %%d2 \n" | ||
851 | "move.w (%[buf])+,%%d2 \n" | ||
852 | "move.l %%d2,%%d3 \n" | ||
853 | "lsr.l #8,%%d3 \n" | ||
854 | "move.w %%d3,(%[ata]) \n" | ||
855 | |||
856 | ".w_end_u_w2: \n" | ||
857 | "lsl.l #8,%%d2 \n" | ||
858 | "move.b (%[buf])+,%%d2 \n" | ||
859 | "move.w %%d2,(%[ata]) \n" | ||
860 | "bra.b .w_exit \n" | ||
861 | |||
862 | /* 16-bit aligned */ | ||
863 | ".w_aligned: \n" | ||
864 | "btst.l #1,%%d0 \n" | ||
865 | "beq.b .w_end_a_w1 \n" | ||
866 | |||
867 | "move.w (%[buf])+,(%[ata]) \n" /* copy initial word */ | ||
868 | |||
869 | ".w_end_a_w1: \n" | ||
870 | "moveq.l #12,%%d0 \n" | ||
871 | "add.l %[buf],%%d0 \n" | ||
872 | "and.l #0xFFFFFFF0,%%d0\n" /* d0 == first line bound */ | ||
873 | "cmp.l %[buf],%%d0 \n" /* any leading longwords? */ | ||
874 | "bls.b .w_end_a_l1 \n" /* no: skip loop */ | ||
875 | |||
876 | ".w_loop_a_l1: \n" | ||
877 | "move.l (%[buf])+,%%d1 \n" | ||
878 | "swap %%d1 \n" | ||
879 | "move.w %%d1,(%[ata]) \n" | ||
880 | "swap %%d1 \n" | ||
881 | "move.w %%d1,(%[ata]) \n" | ||
882 | "cmp.l %[buf],%%d0 \n" /* run up to first line bound */ | ||
883 | "bhi.b .w_loop_a_l1 \n" | ||
884 | |||
885 | ".w_end_a_l1: \n" | ||
886 | "lea.l (-14,%[end]),%[end] \n" /* adjust end addr. to 16 bytes/pass */ | ||
887 | |||
888 | ".w_loop_a_line: \n" | ||
889 | "movem.l (%[buf]),%%d0-%%d3 \n" | ||
890 | "lea.l (16,%[buf]),%[buf] \n" | ||
891 | "swap %%d0 \n" | ||
892 | "move.w %%d0,(%[ata]) \n" | ||
893 | "swap %%d0 \n" | ||
894 | "move.w %%d0,(%[ata]) \n" | ||
895 | "swap %%d1 \n" | ||
896 | "move.w %%d1,(%[ata]) \n" | ||
897 | "swap %%d1 \n" | ||
898 | "move.w %%d1,(%[ata]) \n" | ||
899 | "swap %%d2 \n" | ||
900 | "move.w %%d2,(%[ata]) \n" | ||
901 | "swap %%d2 \n" | ||
902 | "move.w %%d2,(%[ata]) \n" | ||
903 | "swap %%d3 \n" | ||
904 | "move.w %%d3,(%[ata]) \n" | ||
905 | "swap %%d3 \n" | ||
906 | "move.w %%d3,(%[ata]) \n" | ||
907 | "cmp.l %[buf],%[end] \n" /* run up to last line bound */ | ||
908 | "bhi.b .w_loop_a_line \n" | ||
909 | |||
910 | "lea.l (12,%[end]),%[end] \n" /* readjust for longword loop */ | ||
911 | "cmp.l %[buf],%[end] \n" /* any trailing longwords? */ | ||
912 | "bls.b .w_end_a_l2 \n" /* no: skip loop */ | ||
913 | |||
914 | ".w_loop_a_l2: \n" | ||
915 | "move.l (%[buf])+,%%d1 \n" | ||
916 | "swap %%d1 \n" | ||
917 | "move.w %%d1,(%[ata]) \n" | ||
918 | "swap %%d1 \n" | ||
919 | "move.w %%d1,(%[ata]) \n" | ||
920 | "cmp.l %[buf],%[end] \n" /* run up to first line bound */ | ||
921 | "bhi.b .w_loop_a_l2 \n" | ||
922 | |||
923 | ".w_end_a_l2: \n" | ||
924 | "addq.l #2,%[end] \n" /* back to final end address */ | ||
925 | "cmp.l %[buf],%[end] \n" /* one word left? */ | ||
926 | "bls.b .w_end_a_w2 \n" | ||
927 | |||
928 | "move.w (%[buf])+,(%[ata]) \n" /* copy final word */ | ||
929 | |||
930 | ".w_end_a_w2: \n" | ||
931 | |||
932 | ".w_exit: \n" | ||
933 | : /* outputs */ | ||
934 | [buf]"+a"(buf), | ||
935 | [end]"+a"(bufend) | ||
936 | : /* inputs */ | ||
937 | [ata]"a"(&ATA_DATA) | ||
938 | : /*trashed */ | ||
939 | "d0", "d1", "d2", "d3", "d4", "d5", "d6" | ||
940 | ); | ||
941 | #else | ||
942 | /* SH1 optimized assembler version */ | ||
943 | /* this assumes wordcount to be a multiple of 2 */ | ||
944 | |||
945 | /* writing is not unrolled as much as reading, for several reasons: | ||
946 | * - a similar instruction sequence is faster for writing than for reading | ||
947 | * because the auto-incrementing load inctructions can be used | ||
948 | * - writing profits from warp mode | ||
949 | * Both of these add up to have writing faster than the more unrolled reading. | ||
950 | */ | ||
951 | asm volatile ( | ||
952 | "mov %[buf],r0 \n" | ||
953 | "tst #1,r0 \n" /* 16-bit aligned ? */ | ||
954 | "bt .w_aligned \n" /* yes, do word copy */ | ||
955 | |||
956 | /* not 16-bit aligned */ | ||
957 | "mov #-1,r6 \n" /* prepare a bit mask for high byte */ | ||
958 | "shll8 r6 \n" /* r6 = 0xFFFFFF00 */ | ||
959 | |||
960 | "mov.b @%[buf]+,r2 \n" /* load (initial old second) first byte */ | ||
961 | "mov.w @%[buf]+,r3 \n" /* load (initial) first word */ | ||
962 | "bra .w_start2_b \n" | ||
963 | "extu.b r2,r0 \n" /* extend unsigned */ | ||
964 | |||
965 | ".align 2 \n" | ||
966 | ".w_loop2_b: \n" /* main loop: copy 2 words in a row */ | ||
967 | "mov.w @%[buf]+,r3 \n" /* load first word (2+ round) */ | ||
968 | "extu.b r2,r0 \n" /* put away low byte of second word (2+ round) */ | ||
969 | "and r6,r2 \n" /* get high byte of second word (2+ round) */ | ||
970 | "or r1,r2 \n" /* combine with low byte of old first word */ | ||
971 | "mov.w r2,@%[ata] \n" /* write that */ | ||
972 | ".w_start2_b: \n" | ||
973 | "cmp/hi %[buf],%[end] \n" /* check for end */ | ||
974 | "mov.w @%[buf]+,r2 \n" /* load second word */ | ||
975 | "extu.b r3,r1 \n" /* put away low byte of first word */ | ||
976 | "and r6,r3 \n" /* get high byte of first word */ | ||
977 | "or r0,r3 \n" /* combine with high byte of old second word */ | ||
978 | "mov.w r3,@%[ata] \n" /* write that */ | ||
979 | "bt .w_loop2_b \n" | ||
980 | /* 12 instructions for 2 copies, takes 14 clock cycles */ | ||
981 | /* avg. 7 cycles per word - 85% faster */ | ||
982 | |||
983 | /* the loop "overreads" 1 byte past the buffer end, however, the last */ | ||
984 | /* byte is not written to disk */ | ||
985 | "and r6,r2 \n" /* get high byte of last word */ | ||
986 | "or r1,r2 \n" /* combine with low byte of old first word */ | ||
987 | "bra .w_exit \n" | ||
988 | "mov.w r2,@%[ata] \n" /* write last word */ | ||
989 | |||
990 | /* 16-bit aligned, loop(load and write word) */ | ||
991 | ".w_aligned: \n" | ||
992 | "bra .w_start2_w \n" /* jump into loop after next instr. */ | ||
993 | "mov.w @%[buf]+,r2 \n" /* load first word (1st round) */ | ||
994 | |||
995 | ".align 2 \n" | ||
996 | ".w_loop2_w: \n" /* main loop: copy 2 words in a row */ | ||
997 | "mov.w @%[buf]+,r2 \n" /* load first word (2+ round) */ | ||
998 | "swap.b r1,r0 \n" /* swap second word (2+ round) */ | ||
999 | "mov.w r0,@%[ata] \n" /* write second word (2+ round) */ | ||
1000 | ".w_start2_w: \n" | ||
1001 | "cmp/hi %[buf],%[end] \n" /* check for end */ | ||
1002 | "mov.w @%[buf]+,r1 \n" /* load second word */ | ||
1003 | "swap.b r2,r0 \n" /* swap first word */ | ||
1004 | "mov.w r0,@%[ata] \n" /* write first word */ | ||
1005 | "bt .w_loop2_w \n" | ||
1006 | /* 8 instructions for 2 copies, takes 10 clock cycles */ | ||
1007 | /* avg. 5 cycles per word - 100% faster */ | ||
1008 | |||
1009 | "swap.b r1,r0 \n" /* swap second word (last round) */ | ||
1010 | "mov.w r0,@%[ata] \n" /* and write it */ | ||
1011 | |||
1012 | ".w_exit: \n" | ||
1013 | : /* outputs */ | ||
1014 | [buf]"+r"(buf) | ||
1015 | : /* inputs */ | ||
1016 | [end]"r"(buf + 2 * wordcount - 4), /* adjusted for earl check */ | ||
1017 | [ata]"r"(&ATA_DATA) | ||
1018 | : /*trashed */ | ||
1019 | "r0","r1","r2","r3","r6" | ||
1020 | ); | ||
1021 | #endif /* CPU */ | ||
1022 | #endif /* !PREFER_C_WRITING */ | ||
1023 | } | 381 | } |
382 | #endif /* !ATA_OPTIMIZED_WRITING */ | ||
1024 | 383 | ||
1025 | int ata_write_sectors(IF_MV2(int drive,) | 384 | int ata_write_sectors(IF_MV2(int drive,) |
1026 | unsigned long start, | 385 | unsigned long start, |
@@ -1306,17 +665,7 @@ int ata_hard_reset(void) | |||
1306 | { | 665 | { |
1307 | int ret; | 666 | int ret; |
1308 | 667 | ||
1309 | #ifdef TARGET_TREE | ||
1310 | ata_reset(); | 668 | ata_reset(); |
1311 | #elif CONFIG_CPU == SH7034 | ||
1312 | /* state HRR0 */ | ||
1313 | and_b(~0x02, &PADRH); /* assert _RESET */ | ||
1314 | sleep(1); /* > 25us */ | ||
1315 | |||
1316 | /* state HRR1 */ | ||
1317 | or_b(0x02, &PADRH); /* negate _RESET */ | ||
1318 | sleep(1); /* > 2ms */ | ||
1319 | #endif | ||
1320 | 669 | ||
1321 | /* state HRR2 */ | 670 | /* state HRR2 */ |
1322 | SET_REG(ATA_SELECT, ata_device); /* select the right device */ | 671 | SET_REG(ATA_SELECT, ata_device); /* select the right device */ |
@@ -1416,36 +765,6 @@ static int master_slave_detect(void) | |||
1416 | return 0; | 765 | return 0; |
1417 | } | 766 | } |
1418 | 767 | ||
1419 | #if CONFIG_CPU == SH7034 /* special archos quirk */ | ||
1420 | static void io_address_detect(void) | ||
1421 | { /* now, use the HW mask instead of probing */ | ||
1422 | if (read_hw_mask() & ATA_ADDRESS_200) | ||
1423 | { | ||
1424 | ata_io_address = 0x200; /* For debug purposes only */ | ||
1425 | ata_control = ATA_CONTROL1; | ||
1426 | } | ||
1427 | else | ||
1428 | { | ||
1429 | ata_io_address = 0x300; /* For debug purposes only */ | ||
1430 | ata_control = ATA_CONTROL2; | ||
1431 | } | ||
1432 | } | ||
1433 | #endif | ||
1434 | |||
1435 | #ifndef TARGET_TREE | ||
1436 | void ata_enable(bool on) | ||
1437 | { | ||
1438 | #if CONFIG_CPU == SH7034 | ||
1439 | if(on) | ||
1440 | and_b(~0x80, &PADRL); /* enable ATA */ | ||
1441 | else | ||
1442 | or_b(0x80, &PADRL); /* disable ATA */ | ||
1443 | |||
1444 | or_b(0x80, &PAIORL); | ||
1445 | #endif | ||
1446 | } | ||
1447 | #endif | ||
1448 | |||
1449 | static int identify(void) | 768 | static int identify(void) |
1450 | { | 769 | { |
1451 | int i; | 770 | int i; |
@@ -1589,39 +908,12 @@ static int init_and_check(bool hard_reset) | |||
1589 | int ata_init(void) | 908 | int ata_init(void) |
1590 | { | 909 | { |
1591 | int rc; | 910 | int rc; |
1592 | #ifdef TARGET_TREE | ||
1593 | bool coldstart = ata_is_coldstart(); | 911 | bool coldstart = ata_is_coldstart(); |
1594 | #else | ||
1595 | bool coldstart = (PACR2 & 0x4000) != 0; | ||
1596 | #endif | ||
1597 | 912 | ||
1598 | mutex_init(&ata_mtx); | 913 | mutex_init(&ata_mtx); |
1599 | 914 | ||
1600 | ata_led(false); | 915 | ata_led(false); |
1601 | |||
1602 | #ifdef TARGET_TREE | ||
1603 | ata_device_init(); | 916 | ata_device_init(); |
1604 | #elif CONFIG_CPU == SH7034 | ||
1605 | /* Port A setup */ | ||
1606 | or_b(0x02, &PAIORH); /* output for ATA reset */ | ||
1607 | or_b(0x02, &PADRH); /* release ATA reset */ | ||
1608 | PACR2 &= 0xBFFF; /* GPIO function for PA7 (IDE enable) */ | ||
1609 | #elif CONFIG_CPU == MCF5249 | ||
1610 | #ifdef HAVE_ATA_LED_CTRL | ||
1611 | /* Enable disk LED & ISD chip power control */ | ||
1612 | and_l(~0x0000240, &GPIO_OUT); | ||
1613 | or_l(0x00000240, &GPIO_ENABLE); | ||
1614 | or_l(0x00000200, &GPIO_FUNCTION); | ||
1615 | #endif | ||
1616 | |||
1617 | /* ATA reset */ | ||
1618 | or_l(0x00080000, &GPIO_OUT); | ||
1619 | or_l(0x00080000, &GPIO_ENABLE); | ||
1620 | or_l(0x00080000, &GPIO_FUNCTION); | ||
1621 | |||
1622 | /* FYI: The IDECONFIGx registers are set by set_cpu_frequency() */ | ||
1623 | #endif | ||
1624 | |||
1625 | sleeping = false; | 917 | sleeping = false; |
1626 | ata_enable(true); | 918 | ata_enable(true); |
1627 | 919 | ||
@@ -1632,8 +924,8 @@ int ata_init(void) | |||
1632 | sleep(HZ); /* allow voltage to build up */ | 924 | sleep(HZ); /* allow voltage to build up */ |
1633 | } | 925 | } |
1634 | 926 | ||
1635 | #if CONFIG_CPU == SH7034 | 927 | #ifdef ATA_ADDRESS_DETECT |
1636 | io_address_detect(); | 928 | ata_address_detect(); |
1637 | #endif | 929 | #endif |
1638 | /* first try, hard reset at cold start only */ | 930 | /* first try, hard reset at cold start only */ |
1639 | rc = init_and_check(coldstart); | 931 | rc = init_and_check(coldstart); |
@@ -1680,12 +972,12 @@ int ata_init(void) | |||
1680 | } | 972 | } |
1681 | 973 | ||
1682 | #if CONFIG_LED == LED_REAL | 974 | #if CONFIG_LED == LED_REAL |
1683 | void ata_set_led_enabled(bool enabled) { | 975 | void ata_set_led_enabled(bool enabled) |
976 | { | ||
1684 | ata_led_enabled = enabled; | 977 | ata_led_enabled = enabled; |
1685 | if (ata_led_enabled) { | 978 | if (ata_led_enabled) |
1686 | led(ata_led_on); | 979 | led(ata_led_on); |
1687 | } else { | 980 | else |
1688 | led(false); | 981 | led(false); |
1689 | } | ||
1690 | } | 982 | } |
1691 | #endif | 983 | #endif |
diff --git a/firmware/target/arm/ata-target.h b/firmware/target/arm/ata-target.h index 0e4c187e27..0f25f89d6b 100644 --- a/firmware/target/arm/ata-target.h +++ b/firmware/target/arm/ata-target.h | |||
@@ -20,8 +20,6 @@ | |||
20 | #if (CONFIG_CPU == PP5002) || (CONFIG_CPU == PP5020) | 20 | #if (CONFIG_CPU == PP5002) || (CONFIG_CPU == PP5020) |
21 | 21 | ||
22 | /* Plain C read & write loops */ | 22 | /* Plain C read & write loops */ |
23 | #define PREFER_C_READING | ||
24 | #define PREFER_C_WRITING | ||
25 | 23 | ||
26 | #if (CONFIG_CPU == PP5002) | 24 | #if (CONFIG_CPU == PP5002) |
27 | #define ATA_IOBASE 0xc00031e0 | 25 | #define ATA_IOBASE 0xc00031e0 |
diff --git a/firmware/target/arm/gigabeat/meg-fx/ata-target.h b/firmware/target/arm/gigabeat/meg-fx/ata-target.h index 95b66ab1bd..1d49a1b874 100644 --- a/firmware/target/arm/gigabeat/meg-fx/ata-target.h +++ b/firmware/target/arm/gigabeat/meg-fx/ata-target.h | |||
@@ -20,8 +20,6 @@ | |||
20 | #define ATA_TARGET_H | 20 | #define ATA_TARGET_H |
21 | 21 | ||
22 | /* Plain C read & write loops */ | 22 | /* Plain C read & write loops */ |
23 | #define PREFER_C_READING | ||
24 | #define PREFER_C_WRITING | ||
25 | 23 | ||
26 | #define ATA_IOBASE 0x18000000 | 24 | #define ATA_IOBASE 0x18000000 |
27 | #define ATA_DATA (*((volatile unsigned short*)(ATA_IOBASE))) | 25 | #define ATA_DATA (*((volatile unsigned short*)(ATA_IOBASE))) |
diff --git a/firmware/target/coldfire/ata-as-coldfire.S b/firmware/target/coldfire/ata-as-coldfire.S new file mode 100755 index 0000000000..3b0d67f8e4 --- /dev/null +++ b/firmware/target/coldfire/ata-as-coldfire.S | |||
@@ -0,0 +1,459 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2006 by Jens Arnold | ||
11 | * | ||
12 | * All files in this archive are subject to the GNU General Public License. | ||
13 | * See the file COPYING in the source tree root for full license agreement. | ||
14 | * | ||
15 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
16 | * KIND, either express or implied. | ||
17 | * | ||
18 | ****************************************************************************/ | ||
19 | |||
20 | .section .icode,"ax",@progbits | ||
21 | |||
22 | .equ .ata_port, 0x20000020 | ||
23 | |||
24 | .align 2 | ||
25 | .global copy_read_sectors | ||
26 | .type copy_read_sectors,@function | ||
27 | |||
28 | /* Read a number of words from the ATA data port | ||
29 | * | ||
30 | * Utilises line bursts, assumes there is at least one full line to copy. | ||
31 | * | ||
32 | * Arguments: | ||
33 | * (4,%sp) - buffer address | ||
34 | * (8,%sp) - word count | ||
35 | * | ||
36 | * Register usage: | ||
37 | * %a0 - current address | ||
38 | * %a1 - end address | ||
39 | * %a2 - ata port | ||
40 | * %d0 - scratch | ||
41 | * %d1 - shift count | ||
42 | * %d2-%d6 - read buffers | ||
43 | */ | ||
44 | |||
45 | copy_read_sectors: | ||
46 | lea.l (-24, %sp), %sp | ||
47 | movem.l %d2-%d6/%a2, (%sp) | ||
48 | movem.l (28, %sp), %a0-%a1 | ||
49 | add.l %a1, %a1 | ||
50 | add.l %a0, %a1 | ||
51 | lea.l .ata_port, %a2 | ||
52 | |||
53 | move.l %a0, %d0 | ||
54 | btst.l #0, %d0 /* 16-bit aligned? */ | ||
55 | jeq .r_aligned /* yes, do word copy */ | ||
56 | |||
57 | /* not 16-bit aligned */ | ||
58 | subq.l #1, %a1 /* last byte is done unconditionally */ | ||
59 | moveq.l #24, %d1 /* preload shift count */ | ||
60 | |||
61 | move.w (%a2), %d2 /* load initial word */ | ||
62 | move.l %d2, %d3 | ||
63 | lsr.l #8, %d3 | ||
64 | move.b %d3, (%a0)+ /* write high byte of it, aligns dest addr */ | ||
65 | |||
66 | btst.l #1, %d0 /* longword aligned? */ | ||
67 | beq.b .r_end_u_w1 /* yes, skip leading word handling */ | ||
68 | |||
69 | swap %d2 /* move initial word up */ | ||
70 | move.w (%a2), %d2 /* combine with second word */ | ||
71 | move.l %d2, %d3 | ||
72 | lsr.l #8, %d3 | ||
73 | move.w %d3, (%a0)+ /* write bytes 2 and 3 as word */ | ||
74 | |||
75 | .r_end_u_w1: | ||
76 | moveq.l #12, %d0 | ||
77 | add.l %a0, %d0 | ||
78 | and.l #0xFFFFFFF0,%d0 /* d0 == first line bound */ | ||
79 | cmp.l %a0, %d0 /* any leading longwords? */ | ||
80 | bls.b .r_end_u_l1 /* no: skip loop */ | ||
81 | |||
82 | .r_loop_u_l1: | ||
83 | move.w (%a2), %d3 /* load first word */ | ||
84 | swap %d3 /* move to upper 16 bit */ | ||
85 | move.w (%a2), %d3 /* load second word */ | ||
86 | move.l %d3, %d4 | ||
87 | lsl.l %d1, %d2 | ||
88 | lsr.l #8, %d3 | ||
89 | or.l %d3, %d2 /* combine old low byte with new top 3 bytes */ | ||
90 | move.l %d2, (%a0)+ /* store as long */ | ||
91 | move.l %d4, %d2 | ||
92 | cmp.l %a0, %d0 /* run up to first line bound */ | ||
93 | bhi.b .r_loop_u_l1 | ||
94 | |||
95 | .r_end_u_l1: | ||
96 | lea.l (-14, %a1), %a1 /* adjust end addr. to 16 bytes/pass */ | ||
97 | |||
98 | .r_loop_u_line: | ||
99 | move.w (%a2), %d3 /* load 1st word */ | ||
100 | swap %d3 /* move to upper 16 bit */ | ||
101 | move.w (%a2), %d3 /* load 2nd word */ | ||
102 | move.l %d3, %d0 | ||
103 | lsl.l %d1, %d2 | ||
104 | lsr.l #8, %d0 | ||
105 | or.l %d0, %d2 /* combine old low byte with new top 3 bytes */ | ||
106 | move.w (%a2), %d4 /* load 3rd word */ | ||
107 | swap %d4 /* move to upper 16 bit */ | ||
108 | move.w (%a2), %d4 /* load 4th word */ | ||
109 | move.l %d4, %d0 | ||
110 | lsl.l %d1, %d3 | ||
111 | lsr.l #8, %d0 | ||
112 | or.l %d0, %d3 /* combine old low byte with new top 3 bytes */ | ||
113 | move.w (%a2), %d5 /* load 5th word */ | ||
114 | swap %d5 /* move to upper 16 bit */ | ||
115 | move.w (%a2), %d5 /* load 6th word */ | ||
116 | move.l %d5, %d0 | ||
117 | lsl.l %d1, %d4 | ||
118 | lsr.l #8, %d0 | ||
119 | or.l %d0, %d4 /* combine old low byte with new top 3 bytes */ | ||
120 | move.w (%a2), %d6 /* load 7th word */ | ||
121 | swap %d6 /* move to upper 16 bit */ | ||
122 | move.w (%a2), %d6 /* load 8th word */ | ||
123 | move.l %d6, %d0 | ||
124 | lsl.l %d1, %d5 | ||
125 | lsr.l #8, %d0 | ||
126 | or.l %d0, %d5 /* combine old low byte with new top 3 bytes */ | ||
127 | movem.l %d2-%d5, (%a0) /* store line */ | ||
128 | lea.l (16, %a0), %a0 | ||
129 | move.l %d6, %d2 | ||
130 | cmp.l %a0, %a1 /* run up to last line bound */ | ||
131 | bhi.b .r_loop_u_line | ||
132 | |||
133 | lea.l (12, %a1), %a1 /* readjust for longword loop */ | ||
134 | cmp.l %a0, %a1 /* any trailing longwords? */ | ||
135 | bls.b .r_end_u_l2 /* no: skip loop */ | ||
136 | |||
137 | .r_loop_u_l2: | ||
138 | move.w (%a2), %d3 /* load first word */ | ||
139 | swap %d3 /* move to upper 16 bit */ | ||
140 | move.w (%a2), %d3 /* load second word */ | ||
141 | move.l %d3, %d4 | ||
142 | lsl.l %d1, %d2 | ||
143 | lsr.l #8, %d3 | ||
144 | or.l %d3, %d2 /* combine old low byte with new top 3 bytes */ | ||
145 | move.l %d2, (%a0)+ /* store as long */ | ||
146 | move.l %d4, %d2 | ||
147 | cmp.l %a0, %a1 /* run up to last long bound */ | ||
148 | bhi.b .r_loop_u_l2 | ||
149 | |||
150 | .r_end_u_l2: | ||
151 | addq.l #2, %a1 /* back to final end address */ | ||
152 | cmp.l %a0, %a1 /* one word left? */ | ||
153 | bls.b .r_end_u_w2 | ||
154 | |||
155 | swap %d2 /* move old word to upper 16 bits */ | ||
156 | move.w (%a2), %d2 /* load final word */ | ||
157 | move.l %d2, %d3 | ||
158 | lsr.l #8, %d3 | ||
159 | move.w %d3, (%a0)+ /* write bytes 2 and 3 as word */ | ||
160 | |||
161 | .r_end_u_w2: | ||
162 | move.b %d2, (%a0)+ /* store final byte */ | ||
163 | bra.b .r_exit | ||
164 | |||
165 | /* 16-bit aligned */ | ||
166 | .r_aligned: | ||
167 | btst.l #1, %d0 /* longword aligned? */ | ||
168 | beq.b .r_end_a_w1 /* yes, skip leading word handling */ | ||
169 | |||
170 | move.w (%a2), (%a0)+ /* copy initial word */ | ||
171 | |||
172 | .r_end_a_w1: | ||
173 | moveq.l #12, %d0 | ||
174 | add.l %a0, %d0 | ||
175 | and.l #0xFFFFFFF0,%d0 /* d0 == first line bound */ | ||
176 | cmp.l %a0, %d0 /* any leading longwords? */ | ||
177 | bls.b .r_end_a_l1 /* no: skip loop */ | ||
178 | |||
179 | .r_loop_a_l1: | ||
180 | move.w (%a2), %d1 /* load first word */ | ||
181 | swap %d1 /* move it to upper 16 bits */ | ||
182 | move.w (%a2), %d1 /* load second word */ | ||
183 | move.l %d1, (%a0)+ /* store as long */ | ||
184 | cmp.l %a0, %d0 /* run up to first line bound */ | ||
185 | bhi.b .r_loop_a_l1 | ||
186 | |||
187 | .r_end_a_l1: | ||
188 | lea.l (-14, %a1), %a1 /* adjust end addr. to 16 bytes/pass */ | ||
189 | |||
190 | .r_loop_a_line: | ||
191 | move.w (%a2), %d0 /* load 1st word */ | ||
192 | swap %d0 /* move it to upper 16 bits */ | ||
193 | move.w (%a2), %d0 /* load 2nd word */ | ||
194 | move.w (%a2), %d1 /* load 3rd word */ | ||
195 | swap %d1 /* move it to upper 16 bits */ | ||
196 | move.w (%a2), %d1 /* load 4th word */ | ||
197 | move.w (%a2), %d2 /* load 5th word */ | ||
198 | swap %d2 /* move it to upper 16 bits */ | ||
199 | move.w (%a2), %d2 /* load 6th word */ | ||
200 | move.w (%a2), %d3 /* load 7th word */ | ||
201 | swap %d3 /* move it to upper 16 bits */ | ||
202 | move.w (%a2), %d3 /* load 8th word */ | ||
203 | movem.l %d0-%d3, (%a0) /* store line */ | ||
204 | lea.l (16, %a0), %a0 | ||
205 | cmp.l %a0, %a1 /* run up to last line bound */ | ||
206 | bhi.b .r_loop_a_line | ||
207 | |||
208 | lea.l (12, %a1), %a1 /* readjust for longword loop */ | ||
209 | cmp.l %a0, %a1 /* any trailing longwords? */ | ||
210 | bls.b .r_end_a_l2 /* no: skip loop */ | ||
211 | |||
212 | .r_loop_a_l2: | ||
213 | move.w (%a2), %d1 /* read first word */ | ||
214 | swap %d1 /* move it to upper 16 bits */ | ||
215 | move.w (%a2), %d1 /* read second word */ | ||
216 | move.l %d1, (%a0)+ /* store as long */ | ||
217 | cmp.l %a0, %a1 /* run up to last long bound */ | ||
218 | bhi.b .r_loop_a_l2 | ||
219 | |||
220 | .r_end_a_l2: | ||
221 | addq.l #2, %a1 /* back to final end address */ | ||
222 | cmp.l %a0, %a1 /* one word left? */ | ||
223 | bls.b .r_end_a_w2 | ||
224 | |||
225 | move.w (%a2), (%a0)+ /* copy final word */ | ||
226 | |||
227 | .r_end_a_w2: | ||
228 | |||
229 | .r_exit: | ||
230 | movem.l (%sp), %d2-%d6/%a2 | ||
231 | lea.l (24, %sp), %sp | ||
232 | rts | ||
233 | |||
234 | .r_end: | ||
235 | .size copy_read_sectors,.r_end-copy_read_sectors | ||
236 | |||
237 | .align 2 | ||
238 | .global copy_write_sectors | ||
239 | .type copy_write_sectors,@function | ||
240 | |||
241 | /* Write a number of words to the ATA data port | ||
242 | * | ||
243 | * Utilises line bursts, assumes there is at least one full line to copy. | ||
244 | * | ||
245 | * Arguments: | ||
246 | * (4,%sp) - buffer address | ||
247 | * (8,%sp) - word count | ||
248 | * | ||
249 | * Register usage: | ||
250 | * %a0 - current address | ||
251 | * %a1 - end address | ||
252 | * %a2 - ata port | ||
253 | * %d0 - scratch | ||
254 | * %d1 - shift count | ||
255 | * %d2-%d6 - read buffers | ||
256 | */ | ||
257 | |||
258 | copy_write_sectors: | ||
259 | lea.l (-24, %sp), %sp | ||
260 | movem.l %d2-%d6/%a2, (%sp) | ||
261 | movem.l (28, %sp), %a0-%a1 | ||
262 | add.l %a1, %a1 | ||
263 | add.l %a0, %a1 | ||
264 | lea.l .ata_port, %a2 | ||
265 | |||
266 | move.l %a0, %d0 | ||
267 | btst.l #0, %d0 /* 16-bit aligned? */ | ||
268 | jeq .w_aligned /* yes, do word copy */ | ||
269 | |||
270 | /* not 16-bit aligned */ | ||
271 | subq.l #1, %a1 /* last byte is done unconditionally */ | ||
272 | moveq.l #24, %d1 /* preload shift count */ | ||
273 | |||
274 | move.b (%a0)+, %d2 | ||
275 | |||
276 | btst.l #1, %d0 /* longword aligned? */ | ||
277 | beq.b .w_end_u_w1 /* yes, skip leading word handling */ | ||
278 | |||
279 | swap %d2 | ||
280 | move.w (%a0)+, %d2 | ||
281 | move.l %d2, %d3 | ||
282 | lsr.l #8, %d3 | ||
283 | move.w %d3, (%a2) | ||
284 | |||
285 | .w_end_u_w1: | ||
286 | moveq.l #12, %d0 | ||
287 | add.l %a0, %d0 | ||
288 | and.l #0xFFFFFFF0,%d0 /* d0 == first line bound */ | ||
289 | cmp.l %a0, %d0 /* any leading longwords? */ | ||
290 | bls.b .w_end_u_l1 /* no: skip loop */ | ||
291 | |||
292 | .w_loop_u_l1: | ||
293 | move.l (%a0)+, %d3 | ||
294 | move.l %d3, %d4 | ||
295 | lsl.l %d1, %d2 | ||
296 | lsr.l #8, %d3 | ||
297 | or.l %d3, %d2 | ||
298 | swap %d2 | ||
299 | move.w %d2, (%a2) | ||
300 | swap %d2 | ||
301 | move.w %d2, (%a2) | ||
302 | move.l %d4, %d2 | ||
303 | cmp.l %a0, %d0 /* run up to first line bound */ | ||
304 | bhi.b .w_loop_u_l1 | ||
305 | |||
306 | .w_end_u_l1: | ||
307 | lea.l (-14, %a1), %a1 /* adjust end addr. to 16 bytes/pass */ | ||
308 | |||
309 | .w_loop_u_line: | ||
310 | movem.l (%a0), %d3-%d6 | ||
311 | lea.l (16, %a0), %a0 | ||
312 | move.l %d3, %d0 | ||
313 | lsl.l %d1, %d2 | ||
314 | lsr.l #8, %d0 | ||
315 | or.l %d0, %d2 | ||
316 | swap %d2 | ||
317 | move.w %d2, (%a2) | ||
318 | swap %d2 | ||
319 | move.w %d2, (%a2) | ||
320 | move.l %d4, %d0 | ||
321 | lsl.l %d1, %d3 | ||
322 | lsr.l #8, %d0 | ||
323 | or.l %d0, %d3 | ||
324 | swap %d3 | ||
325 | move.w %d3, (%a2) | ||
326 | swap %d3 | ||
327 | move.w %d3, (%a2) | ||
328 | move.l %d5, %d0 | ||
329 | lsl.l %d1, %d4 | ||
330 | lsr.l #8, %d0 | ||
331 | or.l %d0, %d4 | ||
332 | swap %d4 | ||
333 | move.w %d4, (%a2) | ||
334 | swap %d4 | ||
335 | move.w %d4, (%a2) | ||
336 | move.l %d6, %d0 | ||
337 | lsl.l %d1, %d5 | ||
338 | lsr.l #8, %d0 | ||
339 | or.l %d0, %d5 | ||
340 | swap %d5 | ||
341 | move.w %d5, (%a2) | ||
342 | swap %d5 | ||
343 | move.w %d5, (%a2) | ||
344 | move.l %d6, %d2 | ||
345 | cmp.l %a0, %a1 /* run up to last line bound */ | ||
346 | bhi.b .w_loop_u_line | ||
347 | |||
348 | lea.l (12, %a1), %a1 /* readjust for longword loop */ | ||
349 | cmp.l %a0, %a1 /* any trailing longwords? */ | ||
350 | bls.b .w_end_u_l2 /* no: skip loop */ | ||
351 | |||
352 | .w_loop_u_l2: | ||
353 | move.l (%a0)+, %d3 | ||
354 | move.l %d3, %d4 | ||
355 | lsl.l %d1, %d2 | ||
356 | lsr.l #8, %d3 | ||
357 | or.l %d3, %d2 | ||
358 | swap %d2 | ||
359 | move.w %d2, (%a2) | ||
360 | swap %d2 | ||
361 | move.w %d2, (%a2) | ||
362 | move.l %d4, %d2 | ||
363 | cmp.l %a0, %a1 /* run up to first line bound */ | ||
364 | bhi.b .w_loop_u_l2 | ||
365 | |||
366 | .w_end_u_l2: | ||
367 | addq.l #2, %a1 /* back to final end address */ | ||
368 | cmp.l %a0, %a1 /* one word left? */ | ||
369 | bls.b .w_end_u_w2 | ||
370 | |||
371 | swap %d2 | ||
372 | move.w (%a0)+, %d2 | ||
373 | move.l %d2, %d3 | ||
374 | lsr.l #8, %d3 | ||
375 | move.w %d3, (%a2) | ||
376 | |||
377 | .w_end_u_w2: | ||
378 | lsl.l #8, %d2 | ||
379 | move.b (%a0)+, %d2 | ||
380 | move.w %d2, (%a2) | ||
381 | bra.b .w_exit | ||
382 | |||
383 | /* 16-bit aligned */ | ||
384 | .w_aligned: | ||
385 | btst.l #1, %d0 | ||
386 | beq.b .w_end_a_w1 | ||
387 | |||
388 | move.w (%a0)+, (%a2) /* copy initial word */ | ||
389 | |||
390 | .w_end_a_w1: | ||
391 | moveq.l #12, %d0 | ||
392 | add.l %a0, %d0 | ||
393 | and.l #0xFFFFFFF0,%d0 /* d0 == first line bound */ | ||
394 | cmp.l %a0, %d0 /* any leading longwords? */ | ||
395 | bls.b .w_end_a_l1 /* no: skip loop */ | ||
396 | |||
397 | .w_loop_a_l1: | ||
398 | move.l (%a0)+, %d1 | ||
399 | swap %d1 | ||
400 | move.w %d1, (%a2) | ||
401 | swap %d1 | ||
402 | move.w %d1, (%a2) | ||
403 | cmp.l %a0, %d0 /* run up to first line bound */ | ||
404 | bhi.b .w_loop_a_l1 | ||
405 | |||
406 | .w_end_a_l1: | ||
407 | lea.l (-14, %a1), %a1 /* adjust end addr. to 16 bytes/pass */ | ||
408 | |||
409 | .w_loop_a_line: | ||
410 | movem.l (%a0), %d0-%d3 | ||
411 | lea.l (16, %a0), %a0 | ||
412 | swap %d0 | ||
413 | move.w %d0, (%a2) | ||
414 | swap %d0 | ||
415 | move.w %d0, (%a2) | ||
416 | swap %d1 | ||
417 | move.w %d1, (%a2) | ||
418 | swap %d1 | ||
419 | move.w %d1, (%a2) | ||
420 | swap %d2 | ||
421 | move.w %d2, (%a2) | ||
422 | swap %d2 | ||
423 | move.w %d2, (%a2) | ||
424 | swap %d3 | ||
425 | move.w %d3, (%a2) | ||
426 | swap %d3 | ||
427 | move.w %d3, (%a2) | ||
428 | cmp.l %a0, %a1 /* run up to last line bound */ | ||
429 | bhi.b .w_loop_a_line | ||
430 | |||
431 | lea.l (12, %a1), %a1 /* readjust for longword loop */ | ||
432 | cmp.l %a0, %a1 /* any trailing longwords? */ | ||
433 | bls.b .w_end_a_l2 /* no: skip loop */ | ||
434 | |||
435 | .w_loop_a_l2: | ||
436 | move.l (%a0)+, %d1 | ||
437 | swap %d1 | ||
438 | move.w %d1, (%a2) | ||
439 | swap %d1 | ||
440 | move.w %d1, (%a2) | ||
441 | cmp.l %a0, %a1 /* run up to first line bound */ | ||
442 | bhi.b .w_loop_a_l2 | ||
443 | |||
444 | .w_end_a_l2: | ||
445 | addq.l #2, %a1 /* back to final end address */ | ||
446 | cmp.l %a0, %a1 /* one word left? */ | ||
447 | bls.b .w_end_a_w2 | ||
448 | |||
449 | move.w (%a0)+, (%a2) /* copy final word */ | ||
450 | |||
451 | .w_end_a_w2: | ||
452 | |||
453 | .w_exit: | ||
454 | movem.l (%sp), %d2-%d6/%a2 | ||
455 | lea.l (24, %sp), %sp | ||
456 | rts | ||
457 | |||
458 | .w_end: | ||
459 | .size copy_write_sectors,.w_end-copy_write_sectors | ||
diff --git a/firmware/target/coldfire/ata-target.h b/firmware/target/coldfire/ata-target.h index 4a28c3ae3f..e246dc7af1 100755 --- a/firmware/target/coldfire/ata-target.h +++ b/firmware/target/coldfire/ata-target.h | |||
@@ -20,8 +20,8 @@ | |||
20 | #define ATA_TARGET_H | 20 | #define ATA_TARGET_H |
21 | 21 | ||
22 | /* asm optimised read & write loops */ | 22 | /* asm optimised read & write loops */ |
23 | 23 | #define ATA_OPTIMIZED_READING | |
24 | #define NOINLINE_ATTR __attribute__((noinline)) /* don't inline the loops */ | 24 | #define ATA_OPTIMIZED_WRITING |
25 | 25 | ||
26 | #define ATA_IOBASE 0x20000000 | 26 | #define ATA_IOBASE 0x20000000 |
27 | #define ATA_DATA (*((volatile unsigned short*)(ATA_IOBASE + 0x20))) | 27 | #define ATA_DATA (*((volatile unsigned short*)(ATA_IOBASE + 0x20))) |
@@ -65,4 +65,6 @@ void ata_reset(void); | |||
65 | void ata_device_init(void); | 65 | void ata_device_init(void); |
66 | bool ata_is_coldstart(void); | 66 | bool ata_is_coldstart(void); |
67 | 67 | ||
68 | void copy_read_sectors(unsigned char* buf, int wordcount); | ||
69 | void copy_write_sectors(const unsigned char* buf, int wordcount); | ||
68 | #endif | 70 | #endif |
diff --git a/firmware/target/sh/archos/ata-archos.c b/firmware/target/sh/archos/ata-archos.c new file mode 100755 index 0000000000..73e56b8d84 --- /dev/null +++ b/firmware/target/sh/archos/ata-archos.c | |||
@@ -0,0 +1,76 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2006 by Jens Arnold | ||
11 | * | ||
12 | * All files in this archive are subject to the GNU General Public License. | ||
13 | * See the file COPYING in the source tree root for full license agreement. | ||
14 | * | ||
15 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
16 | * KIND, either express or implied. | ||
17 | * | ||
18 | ****************************************************************************/ | ||
19 | |||
20 | #include "config.h" | ||
21 | #include "cpu.h" | ||
22 | #include <stdbool.h> | ||
23 | #include "kernel.h" | ||
24 | #include "system.h" | ||
25 | #include "ata-target.h" | ||
26 | #include "hwcompat.h" | ||
27 | |||
28 | volatile unsigned char* ata_control; | ||
29 | int ata_io_address; /* 0x300 or 0x200 */ | ||
30 | |||
31 | void ata_reset(void) | ||
32 | { | ||
33 | /* state HRR0 */ | ||
34 | and_b(~0x02, &PADRH); /* assert _RESET */ | ||
35 | sleep(1); /* > 25us */ | ||
36 | |||
37 | /* state HRR1 */ | ||
38 | or_b(0x02, &PADRH); /* negate _RESET */ | ||
39 | sleep(1); /* > 2ms */ | ||
40 | } | ||
41 | |||
42 | void ata_address_detect(void) | ||
43 | { | ||
44 | if (read_hw_mask() & ATA_ADDRESS_200) | ||
45 | { | ||
46 | ata_io_address = 0x200; /* For debug purposes only */ | ||
47 | ata_control = ATA_CONTROL1; | ||
48 | } | ||
49 | else | ||
50 | { | ||
51 | ata_io_address = 0x300; /* For debug purposes only */ | ||
52 | ata_control = ATA_CONTROL2; | ||
53 | } | ||
54 | } | ||
55 | |||
56 | void ata_enable(bool on) | ||
57 | { | ||
58 | if(on) | ||
59 | and_b(~0x80, &PADRL); /* enable ATA */ | ||
60 | else | ||
61 | or_b(0x80, &PADRL); /* disable ATA */ | ||
62 | |||
63 | or_b(0x80, &PAIORL); | ||
64 | } | ||
65 | |||
66 | void ata_device_init(void) | ||
67 | { | ||
68 | or_b(0x02, &PAIORH); /* output for ATA reset */ | ||
69 | or_b(0x02, &PADRH); /* release ATA reset */ | ||
70 | PACR2 &= 0xBFFF; /* GPIO function for PA7 (IDE enable) */ | ||
71 | } | ||
72 | |||
73 | bool ata_is_coldstart(void) | ||
74 | { | ||
75 | return (PACR2 & 0x4000) != 0; | ||
76 | } | ||
diff --git a/firmware/target/sh/archos/ata-as-archos.S b/firmware/target/sh/archos/ata-as-archos.S new file mode 100755 index 0000000000..4a4e7e4b94 --- /dev/null +++ b/firmware/target/sh/archos/ata-as-archos.S | |||
@@ -0,0 +1,231 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2004-2006 by Jens Arnold | ||
11 | * | ||
12 | * All files in this archive are subject to the GNU General Public License. | ||
13 | * See the file COPYING in the source tree root for full license agreement. | ||
14 | * | ||
15 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
16 | * KIND, either express or implied. | ||
17 | * | ||
18 | ****************************************************************************/ | ||
19 | |||
20 | .section .icode,"ax",@progbits | ||
21 | |||
22 | .align 2 | ||
23 | .global _copy_read_sectors | ||
24 | .type _copy_read_sectors,@function | ||
25 | |||
26 | /* Read a number of words from the ATA data port | ||
27 | * | ||
28 | * Assumes wordcount to be a multiple of 4 | ||
29 | * | ||
30 | * Arguments: | ||
31 | * r4 - buffer address | ||
32 | * r5 - word count | ||
33 | * | ||
34 | * Register usage: | ||
35 | * r0 - scratch | ||
36 | * r1/r2 - read buffers | ||
37 | * r3 - mask (if unaligned) | ||
38 | * r4 - current address | ||
39 | * r5 - end address | ||
40 | * r6 - ata port | ||
41 | */ | ||
42 | |||
43 | _copy_read_sectors: | ||
44 | add r5, r5 /* words -> bytes */ | ||
45 | add r4, r5 /* bytes -> end address */ | ||
46 | add #-12, r5 /* adjust for offsets */ | ||
47 | mov.l .ata_data, r6 | ||
48 | |||
49 | mov r4, r0 | ||
50 | tst #1, r0 /* 16-bit aligned ? */ | ||
51 | bt .r_aligned /* yes, do word copy */ | ||
52 | |||
53 | /* not 16-bit aligned */ | ||
54 | mov #-1, r3 /* prepare a bit mask for high byte */ | ||
55 | shll8 r3 /* r3 = 0xFFFFFF00 */ | ||
56 | |||
57 | mov.w @r6, r2 /* read first word (1st round) */ | ||
58 | mov.b r2, @r4 /* store low byte of first word */ | ||
59 | bra .r_start_b /* jump into loop after next instr. */ | ||
60 | add #-5, r4 /* adjust for dest. offsets; now even */ | ||
61 | |||
62 | .align 2 | ||
63 | .r_loop_b: /* main loop: copy 4 words in a row */ | ||
64 | mov.w @r6, r2 /* read first word (2+ round) */ | ||
65 | and r3, r1 /* get high byte of fourth word (2+ round) */ | ||
66 | extu.b r2, r0 /* get low byte of first word (2+ round) */ | ||
67 | or r1, r0 /* combine with high byte of fourth word */ | ||
68 | mov.w r0, @(4, r4) /* store at buf[4] */ | ||
69 | nop /* maintain alignment */ | ||
70 | .r_start_b: | ||
71 | mov.w @r6, r1 /* read second word */ | ||
72 | and r3, r2 /* get high byte of first word */ | ||
73 | extu.b r1, r0 /* get low byte of second word */ | ||
74 | or r2, r0 /* combine with high byte of first word */ | ||
75 | mov.w r0, @(6, r4) /* store at buf[6] */ | ||
76 | add #8, r4 /* buf += 8 */ | ||
77 | mov.w @r6, r2 /* read third word */ | ||
78 | and r3, r1 /* get high byte of second word */ | ||
79 | extu.b r2, r0 /* get low byte of third word */ | ||
80 | or r1, r0 /* combine with high byte of second word */ | ||
81 | mov.w r0, @r4 /* store at buf[0] */ | ||
82 | cmp/hi r4, r5 /* check for end */ | ||
83 | mov.w @r6, r1 /* read fourth word */ | ||
84 | and r3, r2 /* get high byte of third word */ | ||
85 | extu.b r1, r0 /* get low byte of fourth word */ | ||
86 | or r2, r0 /* combine with high byte of third word */ | ||
87 | mov.w r0, @(2, r4) /* store at buf[2] */ | ||
88 | bt .r_loop_b | ||
89 | /* 24 instructions for 4 copies, takes 30 clock cycles (4 wait) */ | ||
90 | /* avg. 7.5 cycles per word */ | ||
91 | |||
92 | swap.b r1, r0 /* get high byte of last word */ | ||
93 | rts | ||
94 | mov.b r0, @(4, r4) /* and store it */ | ||
95 | |||
96 | /* 16-bit aligned, loop(read and store word) */ | ||
97 | .r_aligned: | ||
98 | mov.w @r6, r2 /* read first word (1st round) */ | ||
99 | bra .r_start_w /* jump into loop after next instr. */ | ||
100 | add #-6, r4 /* adjust for destination offsets */ | ||
101 | |||
102 | .align 2 | ||
103 | .r_loop_w: /* main loop: copy 4 words in a row */ | ||
104 | mov.w @r6, r2 /* read first word (2+ round) */ | ||
105 | swap.b r1, r0 /* swap fourth word (2+ round) */ | ||
106 | mov.w r0, @(4, r4) /* store fourth word (2+ round) */ | ||
107 | nop /* maintain alignment */ | ||
108 | .r_start_w: | ||
109 | mov.w @r6, r1 /* read second word */ | ||
110 | swap.b r2, r0 /* swap first word */ | ||
111 | mov.w r0, @(6, r4) /* store first word in buf[6] */ | ||
112 | add #8, r4 /* buf += 8 */ | ||
113 | mov.w @r6, r2 /* read third word */ | ||
114 | swap.b r1, r0 /* swap second word */ | ||
115 | mov.w r0, @r4 /* store second word in buf[0] */ | ||
116 | cmp/hi r4, r5 /* check for end */ | ||
117 | mov.w @r6, r1 /* read fourth word */ | ||
118 | swap.b r2, r0 /* swap third word */ | ||
119 | mov.w r0, @(2, r4) /* store third word */ | ||
120 | bt .r_loop_w | ||
121 | /* 16 instructions for 4 copies, takes 22 clock cycles (4 wait) */ | ||
122 | /* avg. 5.5 cycles per word */ | ||
123 | |||
124 | swap.b r1, r0 /* swap fourth word (last round) */ | ||
125 | rts | ||
126 | mov.w r0, @(4, r4) /* and store it */ | ||
127 | |||
128 | .r_end: | ||
129 | .size _copy_read_sectors,.r_end-_copy_read_sectors | ||
130 | |||
131 | .align 2 | ||
132 | .global _copy_write_sectors | ||
133 | .type _copy_write_sectors,@function | ||
134 | |||
135 | /* Write a number of words to the ATA data port | ||
136 | * | ||
137 | * Assumes wordcount to be a multiple of 2. | ||
138 | * Writing is not unrolled as much as reading, for several reasons: | ||
139 | * | ||
140 | * - a similar instruction sequence is faster for writing than for reading | ||
141 | * because the auto-incrementing load inctructions can be used | ||
142 | * - writing profits from warp mode | ||
143 | * | ||
144 | * Both of these add up to have writing faster than the more unrolled reading. | ||
145 | * | ||
146 | * Arguments: | ||
147 | * r4 - buffer address | ||
148 | * r5 - word count | ||
149 | * | ||
150 | * Register usage: | ||
151 | * r0/r1 - scratch | ||
152 | * r2/r3 - write buffers | ||
153 | * r4 - current address | ||
154 | * r5 - end address | ||
155 | * r6 - mask (if unaligned) | ||
156 | * r7 - ata port | ||
157 | */ | ||
158 | |||
159 | _copy_write_sectors: | ||
160 | add r5, r5 /* words -> bytes */ | ||
161 | add r4, r5 /* bytes -> end address */ | ||
162 | add #-4, r5 /* adjust for offsets */ | ||
163 | mov.l .ata_data, r7 | ||
164 | |||
165 | mov r4, r0 | ||
166 | tst #1, r0 /* 16-bit aligned ? */ | ||
167 | bt .w_aligned /* yes, do word copy */ | ||
168 | |||
169 | /* not 16-bit aligned */ | ||
170 | mov #-1, r6 /* prepare a bit mask for high byte */ | ||
171 | shll8 r6 /* r6 = 0xFFFFFF00 */ | ||
172 | |||
173 | mov.b @r4+, r2 /* load (initial old second) first byte */ | ||
174 | mov.w @r4+, r3 /* load (initial) first word */ | ||
175 | bra .w_start_b | ||
176 | extu.b r2, r0 /* extend unsigned */ | ||
177 | |||
178 | .align 2 | ||
179 | .w_loop_b: /* main loop: copy 2 words in a row */ | ||
180 | mov.w @r4+, r3 /* load first word (2+ round) */ | ||
181 | extu.b r2, r0 /* put away low byte of second word (2+ round) */ | ||
182 | and r6, r2 /* get high byte of second word (2+ round) */ | ||
183 | or r1, r2 /* combine with low byte of old first word */ | ||
184 | mov.w r2, @r7 /* write that */ | ||
185 | .w_start_b: | ||
186 | cmp/hi r4, r5 /* check for end */ | ||
187 | mov.w @r4+, r2 /* load second word */ | ||
188 | extu.b r3, r1 /* put away low byte of first word */ | ||
189 | and r6, r3 /* get high byte of first word */ | ||
190 | or r0, r3 /* combine with high byte of old second word */ | ||
191 | mov.w r3, @r7 /* write that */ | ||
192 | bt .w_loop_b | ||
193 | /* 12 instructions for 2 copies, takes 14 clock cycles */ | ||
194 | /* avg. 7 cycles per word */ | ||
195 | |||
196 | /* the loop "overreads" 1 byte past the buffer end, however, the last */ | ||
197 | /* byte is not written to disk */ | ||
198 | and r6, r2 /* get high byte of last word */ | ||
199 | or r1, r2 /* combine with low byte of old first word */ | ||
200 | rts | ||
201 | mov.w r2, @r7 /* write last word */ | ||
202 | |||
203 | /* 16-bit aligned, loop(load and write word) */ | ||
204 | .w_aligned: | ||
205 | bra .w_start_w /* jump into loop after next instr. */ | ||
206 | mov.w @r4+, r2 /* load first word (1st round) */ | ||
207 | |||
208 | .align 2 | ||
209 | .w_loop_w: /* main loop: copy 2 words in a row */ | ||
210 | mov.w @r4+, r2 /* load first word (2+ round) */ | ||
211 | swap.b r1, r0 /* swap second word (2+ round) */ | ||
212 | mov.w r0, @r7 /* write second word (2+ round) */ | ||
213 | .w_start_w: | ||
214 | cmp/hi r4, r5 /* check for end */ | ||
215 | mov.w @r4+, r1 /* load second word */ | ||
216 | swap.b r2, r0 /* swap first word */ | ||
217 | mov.w r0, @r7 /* write first word */ | ||
218 | bt .w_loop_w | ||
219 | /* 8 instructions for 2 copies, takes 10 clock cycles */ | ||
220 | /* avg. 5 cycles per word */ | ||
221 | |||
222 | swap.b r1, r0 /* swap second word (last round) */ | ||
223 | rts | ||
224 | mov.w r0, @r7 /* and write it */ | ||
225 | |||
226 | .w_end: | ||
227 | .size _copy_write_sectors,.w_end-_copy_write_sectors | ||
228 | |||
229 | .align 2 | ||
230 | .ata_data: | ||
231 | .long 0x06104100 /* ATA data port */ | ||
diff --git a/firmware/target/sh/archos/ata-target.h b/firmware/target/sh/archos/ata-target.h new file mode 100755 index 0000000000..ddffb34f5a --- /dev/null +++ b/firmware/target/sh/archos/ata-target.h | |||
@@ -0,0 +1,80 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2006 by Jens Arnold | ||
11 | * | ||
12 | * All files in this archive are subject to the GNU General Public License. | ||
13 | * See the file COPYING in the source tree root for full license agreement. | ||
14 | * | ||
15 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
16 | * KIND, either express or implied. | ||
17 | * | ||
18 | ****************************************************************************/ | ||
19 | #ifndef ATA_TARGET_H | ||
20 | #define ATA_TARGET_H | ||
21 | |||
22 | /* asm optimised read & write loops */ | ||
23 | #define ATA_OPTIMIZED_READING | ||
24 | #define ATA_OPTIMIZED_WRITING | ||
25 | #define ATA_ADDRESS_DETECT /* need address detection */ | ||
26 | |||
27 | #define SWAP_WORDS | ||
28 | |||
29 | #define ATA_IOBASE 0x06100100 | ||
30 | #define ATA_DATA (*((volatile unsigned short*)0x06104100)) | ||
31 | #define ATA_CONTROL1 ((volatile unsigned char*)0x06200206) | ||
32 | #define ATA_CONTROL2 ((volatile unsigned char*)0x06200306) | ||
33 | #define ATA_CONTROL (*ata_control) | ||
34 | |||
35 | #define ATA_ERROR (*((volatile unsigned char*)ATA_IOBASE + 1)) | ||
36 | #define ATA_NSECTOR (*((volatile unsigned char*)ATA_IOBASE + 2)) | ||
37 | #define ATA_SECTOR (*((volatile unsigned char*)ATA_IOBASE + 3)) | ||
38 | #define ATA_LCYL (*((volatile unsigned char*)ATA_IOBASE + 4)) | ||
39 | #define ATA_HCYL (*((volatile unsigned char*)ATA_IOBASE + 5)) | ||
40 | #define ATA_SELECT (*((volatile unsigned char*)ATA_IOBASE + 6)) | ||
41 | #define ATA_COMMAND (*((volatile unsigned char*)ATA_IOBASE + 7)) | ||
42 | |||
43 | #define STATUS_BSY 0x80 | ||
44 | #define STATUS_RDY 0x40 | ||
45 | #define STATUS_DF 0x20 | ||
46 | #define STATUS_DRQ 0x08 | ||
47 | #define STATUS_ERR 0x01 | ||
48 | |||
49 | #define ERROR_ABRT 0x04 | ||
50 | |||
51 | #define WRITE_PATTERN1 0xa5 | ||
52 | #define WRITE_PATTERN2 0x5a | ||
53 | #define WRITE_PATTERN3 0xaa | ||
54 | #define WRITE_PATTERN4 0x55 | ||
55 | |||
56 | #define READ_PATTERN1 0xa5 | ||
57 | #define READ_PATTERN2 0x5a | ||
58 | #define READ_PATTERN3 0xaa | ||
59 | #define READ_PATTERN4 0x55 | ||
60 | |||
61 | #define READ_PATTERN1_MASK 0xff | ||
62 | #define READ_PATTERN2_MASK 0xff | ||
63 | #define READ_PATTERN3_MASK 0xff | ||
64 | #define READ_PATTERN4_MASK 0xff | ||
65 | |||
66 | #define SET_REG(reg,val) reg = (val) | ||
67 | #define SET_16BITREG(reg,val) reg = (val) | ||
68 | |||
69 | extern volatile unsigned char* ata_control; | ||
70 | extern int ata_io_address; | ||
71 | |||
72 | void ata_reset(void); | ||
73 | void ata_address_detect(void); | ||
74 | void ata_enable(bool on); | ||
75 | void ata_device_init(void); | ||
76 | bool ata_is_coldstart(void); | ||
77 | |||
78 | void copy_read_sectors(unsigned char* buf, int wordcount); | ||
79 | void copy_write_sectors(const unsigned char* buf, int wordcount); | ||
80 | #endif | ||