summaryrefslogtreecommitdiff
path: root/firmware/drivers/ata.c
diff options
context:
space:
mode:
Diffstat (limited to 'firmware/drivers/ata.c')
-rw-r--r--firmware/drivers/ata.c770
1 files changed, 31 insertions, 739 deletions
diff --git a/firmware/drivers/ata.c b/firmware/drivers/ata.c
index d0cca2e148..72194db8da 100644
--- a/firmware/drivers/ata.c
+++ b/firmware/drivers/ata.c
@@ -30,66 +30,10 @@
30#include "string.h" 30#include "string.h"
31#include "hwcompat.h" 31#include "hwcompat.h"
32#include "ata_idle_notify.h" 32#include "ata_idle_notify.h"
33#ifdef TARGET_TREE
34#include "ata-target.h" 33#include "ata-target.h"
35#endif
36 34
37#define SECTOR_SIZE (512) 35#define SECTOR_SIZE (512)
38 36
39#if CONFIG_CPU == SH7034
40
41/* asm optimised read & write loops */
42
43#define NOINLINE_ATTR __attribute__((noinline)) /* don't inline the loops */
44
45#define SWAP_WORDS
46
47#define ATA_IOBASE 0x06100100
48#define ATA_DATA (*((volatile unsigned short*)0x06104100))
49#define ATA_CONTROL1 ((volatile unsigned char*)0x06200206)
50#define ATA_CONTROL2 ((volatile unsigned char*)0x06200306)
51#define ATA_CONTROL (*ata_control)
52
53#define ATA_ERROR (*((volatile unsigned char*)ATA_IOBASE + 1))
54#define ATA_NSECTOR (*((volatile unsigned char*)ATA_IOBASE + 2))
55#define ATA_SECTOR (*((volatile unsigned char*)ATA_IOBASE + 3))
56#define ATA_LCYL (*((volatile unsigned char*)ATA_IOBASE + 4))
57#define ATA_HCYL (*((volatile unsigned char*)ATA_IOBASE + 5))
58#define ATA_SELECT (*((volatile unsigned char*)ATA_IOBASE + 6))
59#define ATA_COMMAND (*((volatile unsigned char*)ATA_IOBASE + 7))
60
61#define STATUS_BSY 0x80
62#define STATUS_RDY 0x40
63#define STATUS_DF 0x20
64#define STATUS_DRQ 0x08
65#define STATUS_ERR 0x01
66
67#define ERROR_ABRT 0x04
68
69#define WRITE_PATTERN1 0xa5
70#define WRITE_PATTERN2 0x5a
71#define WRITE_PATTERN3 0xaa
72#define WRITE_PATTERN4 0x55
73
74#define READ_PATTERN1 0xa5
75#define READ_PATTERN2 0x5a
76#define READ_PATTERN3 0xaa
77#define READ_PATTERN4 0x55
78
79#define READ_PATTERN1_MASK 0xff
80#define READ_PATTERN2_MASK 0xff
81#define READ_PATTERN3_MASK 0xff
82#define READ_PATTERN4_MASK 0xff
83
84#define SET_REG(reg,val) reg = (val)
85#define SET_16BITREG(reg,val) reg = (val)
86
87#endif
88
89#ifndef NOINLINE_ATTR
90#define NOINLINE_ATTR
91#endif
92
93#define ATA_FEATURE ATA_ERROR 37#define ATA_FEATURE ATA_ERROR
94 38
95#define ATA_STATUS ATA_COMMAND 39#define ATA_STATUS ATA_COMMAND
@@ -118,11 +62,7 @@
118#define READ_TIMEOUT 5*HZ 62#define READ_TIMEOUT 5*HZ
119 63
120static struct mutex ata_mtx; 64static struct mutex ata_mtx;
121char ata_device; /* device 0 (master) or 1 (slave) */ 65int ata_device; /* device 0 (master) or 1 (slave) */
122int ata_io_address; /* 0x300 or 0x200, only valid on recorder */
123#if CONFIG_CPU == SH7034
124static volatile unsigned char* ata_control;
125#endif
126 66
127int ata_spinup_time = 0; 67int ata_spinup_time = 0;
128#if CONFIG_LED == LED_REAL 68#if CONFIG_LED == LED_REAL
@@ -131,8 +71,8 @@ static bool ata_led_on = false;
131#endif 71#endif
132static bool spinup = false; 72static bool spinup = false;
133static bool sleeping = true; 73static bool sleeping = true;
134static long sleep_timeout = 5*HZ;
135static bool poweroff = false; 74static bool poweroff = false;
75static long sleep_timeout = 5*HZ;
136#ifdef HAVE_ATA_POWER_OFF 76#ifdef HAVE_ATA_POWER_OFF
137static int poweroff_timeout = 2*HZ; 77static int poweroff_timeout = 2*HZ;
138#endif 78#endif
@@ -206,25 +146,30 @@ static int wait_for_end_of_transfer(void)
206 return (ATA_ALT_STATUS & (STATUS_RDY|STATUS_DRQ)) == STATUS_RDY; 146 return (ATA_ALT_STATUS & (STATUS_RDY|STATUS_DRQ)) == STATUS_RDY;
207} 147}
208 148
209/* Optimization: don't do 256 calls to ddma_transfer; fuse with it 149#if CONFIG_LED == LED_REAL
210 * as in the Archos firmware. 150/* Conditionally block LED access for the ATA driver, so the LED can be
211 * It actually possible to do a single dma transfer to copy a whole sector between ATA 151 * (mis)used for other purposes */
212 * controller & cpu internal memory. 152static void ata_led(bool on)
213 */ 153{
214/* the tight loop of ata_read_sectors(), to avoid the whole in IRAM */ 154 ata_led_on = on;
215static void copy_read_sectors(unsigned char* buf, int wordcount) 155 if (ata_led_enabled)
216 ICODE_ATTR NOINLINE_ATTR; 156 led(ata_led_on);
157}
158#else
159#define ata_led(on) led(on)
160#endif
161
162#ifndef ATA_OPTIMIZED_READING
163static void copy_read_sectors(unsigned char* buf, int wordcount) ICODE_ATTR;
217static void copy_read_sectors(unsigned char* buf, int wordcount) 164static void copy_read_sectors(unsigned char* buf, int wordcount)
218{ 165{
219#ifdef PREFER_C_READING
220 unsigned short tmp = 0; 166 unsigned short tmp = 0;
221 167
222 if ( (unsigned long)buf & 1) 168 if ( (unsigned long)buf & 1)
223 { /* not 16-bit aligned, copy byte by byte */ 169 { /* not 16-bit aligned, copy byte by byte */
224 unsigned char* bufend = buf + wordcount*2; 170 unsigned char* bufend = buf + wordcount*2;
225 do 171 do
226 { /* loop compiles to 9 assembler instructions */ 172 {
227 /* takes 14 clock cycles (2 pipeline stalls, 1 wait) */
228 tmp = ATA_DATA; 173 tmp = ATA_DATA;
229#if defined(SWAP_WORDS) || defined(ROCKBOX_LITTLE_ENDIAN) 174#if defined(SWAP_WORDS) || defined(ROCKBOX_LITTLE_ENDIAN)
230 *buf++ = tmp & 0xff; /* I assume big endian */ 175 *buf++ = tmp & 0xff; /* I assume big endian */
@@ -240,8 +185,7 @@ static void copy_read_sectors(unsigned char* buf, int wordcount)
240 unsigned short* wbuf = (unsigned short*)buf; 185 unsigned short* wbuf = (unsigned short*)buf;
241 unsigned short* wbufend = wbuf + wordcount; 186 unsigned short* wbufend = wbuf + wordcount;
242 do 187 do
243 { /* loop compiles to 7 assembler instructions */ 188 {
244 /* takes 12 clock cycles (2 pipeline stalls, 1 wait) */
245#ifdef SWAP_WORDS 189#ifdef SWAP_WORDS
246 *wbuf = swap16(ATA_DATA); 190 *wbuf = swap16(ATA_DATA);
247#else 191#else
@@ -249,304 +193,8 @@ static void copy_read_sectors(unsigned char* buf, int wordcount)
249#endif 193#endif
250 } while (++wbuf < wbufend); /* tail loop is faster */ 194 } while (++wbuf < wbufend); /* tail loop is faster */
251 } 195 }
252#else /* !PREFER_C_READING */
253#if defined(CPU_COLDFIRE)
254 unsigned char* bufend = buf + 2 * wordcount;
255 /* coldfire asm reading, utilising line bursts */
256 /* this assumes there is at least one full line to copy */
257 asm volatile (
258 "move.l %[buf],%%d0 \n"
259 "btst.l #0,%%d0 \n" /* 16-bit aligned? */
260 "jeq .aligned \n" /* yes, do word copy */
261
262 /* not 16-bit aligned */
263 "subq.l #1,%[end] \n" /* last byte is done unconditionally */
264 "moveq.l #24,%%d1 \n" /* preload shift count */
265
266 "move.w (%[ata]),%%d2 \n" /* load initial word */
267 "move.l %%d2,%%d3 \n"
268 "lsr.l #8,%%d3 \n"
269 "move.b %%d3,(%[buf])+ \n" /* write high byte of it, aligns dest addr */
270
271 "btst.l #1,%%d0 \n" /* longword aligned? */
272 "beq.b .end_u_w1 \n" /* yes, skip leading word handling */
273
274 "swap %%d2 \n" /* move initial word up */
275 "move.w (%[ata]),%%d2 \n" /* combine with second word */
276 "move.l %%d2,%%d3 \n"
277 "lsr.l #8,%%d3 \n"
278 "move.w %%d3,(%[buf])+ \n" /* write bytes 2 and 3 as word */
279
280 ".end_u_w1: \n"
281 "moveq.l #12,%%d0 \n"
282 "add.l %[buf],%%d0 \n"
283 "and.l #0xFFFFFFF0,%%d0\n" /* d0 == first line bound */
284 "cmp.l %[buf],%%d0 \n" /* any leading longwords? */
285 "bls.b .end_u_l1 \n" /* no: skip loop */
286
287 ".loop_u_l1: \n"
288 "move.w (%[ata]),%%d3 \n" /* load first word */
289 "swap %%d3 \n" /* move to upper 16 bit */
290 "move.w (%[ata]),%%d3 \n" /* load second word */
291 "move.l %%d3,%%d4 \n"
292 "lsl.l %%d1,%%d2 \n"
293 "lsr.l #8,%%d3 \n"
294 "or.l %%d3,%%d2 \n" /* combine old low byte with new top 3 bytes */
295 "move.l %%d2,(%[buf])+ \n" /* store as long */
296 "move.l %%d4,%%d2 \n"
297 "cmp.l %[buf],%%d0 \n" /* run up to first line bound */
298 "bhi.b .loop_u_l1 \n"
299
300 ".end_u_l1: \n"
301 "lea.l (-14,%[end]),%[end] \n" /* adjust end addr. to 16 bytes/pass */
302
303 ".loop_u_line: \n"
304 "move.w (%[ata]),%%d3 \n" /* load 1st word */
305 "swap %%d3 \n" /* move to upper 16 bit */
306 "move.w (%[ata]),%%d3 \n" /* load 2nd word */
307 "move.l %%d3,%%d0 \n"
308 "lsl.l %%d1,%%d2 \n"
309 "lsr.l #8,%%d0 \n"
310 "or.l %%d0,%%d2 \n" /* combine old low byte with new top 3 bytes */
311 "move.w (%[ata]),%%d4 \n" /* load 3rd word */
312 "swap %%d4 \n" /* move to upper 16 bit */
313 "move.w (%[ata]),%%d4 \n" /* load 4th word */
314 "move.l %%d4,%%d0 \n"
315 "lsl.l %%d1,%%d3 \n"
316 "lsr.l #8,%%d0 \n"
317 "or.l %%d0,%%d3 \n" /* combine old low byte with new top 3 bytes */
318 "move.w (%[ata]),%%d5 \n" /* load 5th word */
319 "swap %%d5 \n" /* move to upper 16 bit */
320 "move.w (%[ata]),%%d5 \n" /* load 6th word */
321 "move.l %%d5,%%d0 \n"
322 "lsl.l %%d1,%%d4 \n"
323 "lsr.l #8,%%d0 \n"
324 "or.l %%d0,%%d4 \n" /* combine old low byte with new top 3 bytes */
325 "move.w (%[ata]),%%d6 \n" /* load 7th word */
326 "swap %%d6 \n" /* move to upper 16 bit */
327 "move.w (%[ata]),%%d6 \n" /* load 8th word */
328 "move.l %%d6,%%d0 \n"
329 "lsl.l %%d1,%%d5 \n"
330 "lsr.l #8,%%d0 \n"
331 "or.l %%d0,%%d5 \n" /* combine old low byte with new top 3 bytes */
332 "movem.l %%d2-%%d5,(%[buf]) \n" /* store line */
333 "lea.l (16,%[buf]),%[buf] \n"
334 "move.l %%d6,%%d2 \n"
335 "cmp.l %[buf],%[end] \n" /* run up to last line bound */
336 "bhi.b .loop_u_line \n"
337
338 "lea.l (12,%[end]),%[end] \n" /* readjust for longword loop */
339 "cmp.l %[buf],%[end] \n" /* any trailing longwords? */
340 "bls.b .end_u_l2 \n" /* no: skip loop */
341
342 ".loop_u_l2: \n"
343 "move.w (%[ata]),%%d3 \n" /* load first word */
344 "swap %%d3 \n" /* move to upper 16 bit */
345 "move.w (%[ata]),%%d3 \n" /* load second word */
346 "move.l %%d3,%%d4 \n"
347 "lsl.l %%d1,%%d2 \n"
348 "lsr.l #8,%%d3 \n"
349 "or.l %%d3,%%d2 \n" /* combine old low byte with new top 3 bytes */
350 "move.l %%d2,(%[buf])+ \n" /* store as long */
351 "move.l %%d4,%%d2 \n"
352 "cmp.l %[buf],%[end] \n" /* run up to last long bound */
353 "bhi.b .loop_u_l2 \n"
354
355 ".end_u_l2: \n"
356 "addq.l #2,%[end] \n" /* back to final end address */
357 "cmp.l %[buf],%[end] \n" /* one word left? */
358 "bls.b .end_u_w2 \n"
359
360 "swap %%d2 \n" /* move old word to upper 16 bits */
361 "move.w (%[ata]),%%d2 \n" /* load final word */
362 "move.l %%d2,%%d3 \n"
363 "lsr.l #8,%%d3 \n"
364 "move.w %%d3,(%[buf])+ \n" /* write bytes 2 and 3 as word */
365
366 ".end_u_w2: \n"
367 "move.b %%d2,(%[buf])+ \n" /* store final byte */
368 "bra.b .exit \n"
369
370 /* 16-bit aligned */
371 ".aligned: \n"
372 "btst.l #1,%%d0 \n" /* longword aligned? */
373 "beq.b .end_a_w1 \n" /* yes, skip leading word handling */
374
375 "move.w (%[ata]),(%[buf])+ \n" /* copy initial word */
376
377 ".end_a_w1: \n"
378 "moveq.l #12,%%d0 \n"
379 "add.l %[buf],%%d0 \n"
380 "and.l #0xFFFFFFF0,%%d0\n" /* d0 == first line bound */
381 "cmp.l %[buf],%%d0 \n" /* any leading longwords? */
382 "bls.b .end_a_l1 \n" /* no: skip loop */
383
384 ".loop_a_l1: \n"
385 "move.w (%[ata]),%%d1 \n" /* load first word */
386 "swap %%d1 \n" /* move it to upper 16 bits */
387 "move.w (%[ata]),%%d1 \n" /* load second word */
388 "move.l %%d1,(%[buf])+ \n" /* store as long */
389 "cmp.l %[buf],%%d0 \n" /* run up to first line bound */
390 "bhi.b .loop_a_l1 \n"
391
392 ".end_a_l1: \n"
393 "lea.l (-14,%[end]),%[end] \n" /* adjust end addr. to 16 bytes/pass */
394
395 ".loop_a_line: \n"
396 "move.w (%[ata]),%%d0 \n" /* load 1st word */
397 "swap %%d0 \n" /* move it to upper 16 bits */
398 "move.w (%[ata]),%%d0 \n" /* load 2nd word */
399 "move.w (%[ata]),%%d1 \n" /* load 3rd word */
400 "swap %%d1 \n" /* move it to upper 16 bits */
401 "move.w (%[ata]),%%d1 \n" /* load 4th word */
402 "move.w (%[ata]),%%d2 \n" /* load 5th word */
403 "swap %%d2 \n" /* move it to upper 16 bits */
404 "move.w (%[ata]),%%d2 \n" /* load 6th word */
405 "move.w (%[ata]),%%d3 \n" /* load 7th word */
406 "swap %%d3 \n" /* move it to upper 16 bits */
407 "move.w (%[ata]),%%d3 \n" /* load 8th word */
408 "movem.l %%d0-%%d3,(%[buf]) \n" /* store line */
409 "lea.l (16,%[buf]),%[buf] \n"
410 "cmp.l %[buf],%[end] \n" /* run up to last line bound */
411 "bhi.b .loop_a_line \n"
412
413 "lea.l (12,%[end]),%[end] \n" /* readjust for longword loop */
414 "cmp.l %[buf],%[end] \n" /* any trailing longwords? */
415 "bls.b .end_a_l2 \n" /* no: skip loop */
416
417 ".loop_a_l2: \n"
418 "move.w (%[ata]),%%d1 \n" /* read first word */
419 "swap %%d1 \n" /* move it to upper 16 bits */
420 "move.w (%[ata]),%%d1 \n" /* read second word */
421 "move.l %%d1,(%[buf])+ \n" /* store as long */
422 "cmp.l %[buf],%[end] \n" /* run up to last long bound */
423 "bhi.b .loop_a_l2 \n"
424
425 ".end_a_l2: \n"
426 "addq.l #2,%[end] \n" /* back to final end address */
427 "cmp.l %[buf],%[end] \n" /* one word left? */
428 "bls.b .end_a_w2 \n"
429
430 "move.w (%[ata]),(%[buf])+ \n" /* copy final word */
431
432 ".end_a_w2: \n"
433
434 ".exit: \n"
435 : /* outputs */
436 [buf]"+a"(buf),
437 [end]"+a"(bufend)
438 : /* inputs */
439 [ata]"a"(&ATA_DATA)
440 : /*trashed */
441 "d0", "d1", "d2", "d3", "d4", "d5", "d6"
442 );
443#else
444 /* SH1 turbo-charged assembler reading */
445 /* this assumes wordcount to be a multiple of 4 */
446 asm volatile (
447 "mov %[buf],r0 \n"
448 "tst #1,r0 \n" /* 16-bit aligned ? */
449 "bt .aligned \n" /* yes, do word copy */
450
451 /* not 16-bit aligned */
452 "mov #-1,r3 \n" /* prepare a bit mask for high byte */
453 "shll8 r3 \n" /* r3 = 0xFFFFFF00 */
454
455 "mov.w @%[ata],r2 \n" /* read first word (1st round) */
456 "mov.b r2,@%[buf] \n" /* store low byte of first word */
457 "bra .start4_b \n" /* jump into loop after next instr. */
458 "add #-5,%[buf] \n" /* adjust for dest. offsets; now even */
459
460 ".align 2 \n"
461 ".loop4_b: \n" /* main loop: copy 4 words in a row */
462 "mov.w @%[ata],r2 \n" /* read first word (2+ round) */
463 "and r3,r1 \n" /* get high byte of fourth word (2+ round) */
464 "extu.b r2,r0 \n" /* get low byte of first word (2+ round) */
465 "or r1,r0 \n" /* combine with high byte of fourth word */
466 "mov.w r0,@(4,%[buf]) \n" /* store at buf[4] */
467 "nop \n" /* maintain alignment */
468 ".start4_b: \n"
469 "mov.w @%[ata],r1 \n" /* read second word */
470 "and r3,r2 \n" /* get high byte of first word */
471 "extu.b r1,r0 \n" /* get low byte of second word */
472 "or r2,r0 \n" /* combine with high byte of first word */
473 "mov.w r0,@(6,%[buf]) \n" /* store at buf[6] */
474 "add #8,%[buf] \n" /* buf += 8 */
475 "mov.w @%[ata],r2 \n" /* read third word */
476 "and r3,r1 \n" /* get high byte of second word */
477 "extu.b r2,r0 \n" /* get low byte of third word */
478 "or r1,r0 \n" /* combine with high byte of second word */
479 "mov.w r0,@%[buf] \n" /* store at buf[0] */
480 "cmp/hi %[buf],%[end] \n" /* check for end */
481 "mov.w @%[ata],r1 \n" /* read fourth word */
482 "and r3,r2 \n" /* get high byte of third word */
483 "extu.b r1,r0 \n" /* get low byte of fourth word */
484 "or r2,r0 \n" /* combine with high byte of third word */
485 "mov.w r0,@(2,%[buf]) \n" /* store at buf[2] */
486 "bt .loop4_b \n"
487 /* 24 instructions for 4 copies, takes 30 clock cycles (4 wait) */
488 /* avg. 7.5 cycles per word - 86% faster */
489
490 "swap.b r1,r0 \n" /* get high byte of last word */
491 "bra .exit \n"
492 "mov.b r0,@(4,%[buf]) \n" /* and store it */
493
494 /* 16-bit aligned, loop(read and store word) */
495 ".aligned: \n"
496 "mov.w @%[ata],r2 \n" /* read first word (1st round) */
497 "bra .start4_w \n" /* jump into loop after next instr. */
498 "add #-6,%[buf] \n" /* adjust for destination offsets */
499
500 ".align 2 \n"
501 ".loop4_w: \n" /* main loop: copy 4 words in a row */
502 "mov.w @%[ata],r2 \n" /* read first word (2+ round) */
503 "swap.b r1,r0 \n" /* swap fourth word (2+ round) */
504 "mov.w r0,@(4,%[buf]) \n" /* store fourth word (2+ round) */
505 "nop \n" /* maintain alignment */
506 ".start4_w: \n"
507 "mov.w @%[ata],r1 \n" /* read second word */
508 "swap.b r2,r0 \n" /* swap first word */
509 "mov.w r0,@(6,%[buf]) \n" /* store first word in buf[6] */
510 "add #8,%[buf] \n" /* buf += 8 */
511 "mov.w @%[ata],r2 \n" /* read third word */
512 "swap.b r1,r0 \n" /* swap second word */
513 "mov.w r0,@%[buf] \n" /* store second word in buf[0] */
514 "cmp/hi %[buf],%[end] \n" /* check for end */
515 "mov.w @%[ata],r1 \n" /* read fourth word */
516 "swap.b r2,r0 \n" /* swap third word */
517 "mov.w r0,@(2,%[buf]) \n" /* store third word */
518 "bt .loop4_w \n"
519 /* 16 instructions for 4 copies, takes 22 clock cycles (4 wait) */
520 /* avg. 5.5 cycles per word - 118% faster */
521
522 "swap.b r1,r0 \n" /* swap fourth word (last round) */
523 "mov.w r0,@(4,%[buf]) \n" /* and store it */
524
525 ".exit: \n"
526 : /* outputs */
527 [buf]"+r"(buf)
528 : /* inputs */
529 [end]"r"(buf + 2 * wordcount - 12), /* adjusted for offsets */
530 [ata]"r"(&ATA_DATA)
531 : /*trashed */
532 "r0","r1","r2","r3"
533 );
534#endif /* CPU */
535#endif /* !PREFER_C_READING */
536}
537
538#if CONFIG_LED == LED_REAL
539/* Conditionally block LED access for the ATA driver, so the LED can be
540 * (mis)used for other purposes */
541static void ata_led(bool on) {
542 ata_led_on = on;
543 if (ata_led_enabled) {
544 led(ata_led_on);
545 }
546} 196}
547#else 197#endif /* !ATA_OPTIMIZED_READING */
548#define ata_led(on) led(on)
549#endif
550 198
551int ata_read_sectors(IF_MV2(int drive,) 199int ata_read_sectors(IF_MV2(int drive,)
552 unsigned long start, 200 unsigned long start,
@@ -696,13 +344,10 @@ int ata_read_sectors(IF_MV2(int drive,)
696 return ret; 344 return ret;
697} 345}
698 346
699/* the tight loop of ata_write_sectors(), to avoid the whole in IRAM */ 347#ifndef ATA_OPTIMIZED_WRITING
700static void copy_write_sectors(const unsigned char* buf, int wordcount) 348static void copy_write_sectors(const unsigned char* buf, int wordcount) ICODE_ATTR;
701 ICODE_ATTR NOINLINE_ATTR;
702static void copy_write_sectors(const unsigned char* buf, int wordcount) 349static void copy_write_sectors(const unsigned char* buf, int wordcount)
703{ 350{
704#ifdef PREFER_C_WRITING
705
706 if ( (unsigned long)buf & 1) 351 if ( (unsigned long)buf & 1)
707 { /* not 16-bit aligned, copy byte by byte */ 352 { /* not 16-bit aligned, copy byte by byte */
708 unsigned short tmp = 0; 353 unsigned short tmp = 0;
@@ -710,11 +355,9 @@ static void copy_write_sectors(const unsigned char* buf, int wordcount)
710 do 355 do
711 { 356 {
712#if defined(SWAP_WORDS) || defined(ROCKBOX_LITTLE_ENDIAN) 357#if defined(SWAP_WORDS) || defined(ROCKBOX_LITTLE_ENDIAN)
713 /* SH1: loop compiles to 9 assembler instructions */
714 /* takes 13 clock cycles (2 pipeline stalls) */
715 tmp = (unsigned short) *buf++; 358 tmp = (unsigned short) *buf++;
716 tmp |= (unsigned short) *buf++ << 8; /* I assume big endian */ 359 tmp |= (unsigned short) *buf++ << 8;
717 SET_16BITREG(ATA_DATA, tmp); /* and don't use the SWAB16 macro */ 360 SET_16BITREG(ATA_DATA, tmp);
718#else 361#else
719 tmp = (unsigned short) *buf++ << 8; 362 tmp = (unsigned short) *buf++ << 8;
720 tmp |= (unsigned short) *buf++; 363 tmp |= (unsigned short) *buf++;
@@ -729,298 +372,14 @@ static void copy_write_sectors(const unsigned char* buf, int wordcount)
729 do 372 do
730 { 373 {
731#ifdef SWAP_WORDS 374#ifdef SWAP_WORDS
732 /* loop compiles to 6 assembler instructions */
733 /* takes 10 clock cycles (2 pipeline stalls) */
734 SET_16BITREG(ATA_DATA, swap16(*wbuf)); 375 SET_16BITREG(ATA_DATA, swap16(*wbuf));
735#else 376#else
736 SET_16BITREG(ATA_DATA, *wbuf); 377 SET_16BITREG(ATA_DATA, *wbuf);
737#endif 378#endif
738 } while (++wbuf < wbufend); /* tail loop is faster */ 379 } while (++wbuf < wbufend); /* tail loop is faster */
739 } 380 }
740#else /* !PREFER_C_WRITING */
741#ifdef CPU_COLDFIRE
742 const unsigned char* bufend = buf + 2 * wordcount;
743 /* coldfire asm writing, utilising line bursts */
744 asm volatile (
745 "move.l %[buf],%%d0 \n"
746 "btst.l #0,%%d0 \n" /* 16-bit aligned? */
747 "jeq .w_aligned \n" /* yes, do word copy */
748
749 /* not 16-bit aligned */
750 "subq.l #1,%[end] \n" /* last byte is done unconditionally */
751 "moveq.l #24,%%d1 \n" /* preload shift count */
752
753 "move.b (%[buf])+,%%d2 \n"
754
755 "btst.l #1,%%d0 \n" /* longword aligned? */
756 "beq.b .w_end_u_w1 \n" /* yes, skip leading word handling */
757
758 "swap %%d2 \n"
759 "move.w (%[buf])+,%%d2 \n"
760 "move.l %%d2,%%d3 \n"
761 "lsr.l #8,%%d3 \n"
762 "move.w %%d3,(%[ata]) \n"
763
764 ".w_end_u_w1: \n"
765 "moveq.l #12,%%d0 \n"
766 "add.l %[buf],%%d0 \n"
767 "and.l #0xFFFFFFF0,%%d0\n" /* d0 == first line bound */
768 "cmp.l %[buf],%%d0 \n" /* any leading longwords? */
769 "bls.b .w_end_u_l1 \n" /* no: skip loop */
770
771 ".w_loop_u_l1: \n"
772 "move.l (%[buf])+,%%d3 \n"
773 "move.l %%d3,%%d4 \n"
774 "lsl.l %%d1,%%d2 \n"
775 "lsr.l #8,%%d3 \n"
776 "or.l %%d3,%%d2 \n"
777 "swap %%d2 \n"
778 "move.w %%d2,(%[ata]) \n"
779 "swap %%d2 \n"
780 "move.w %%d2,(%[ata]) \n"
781 "move.l %%d4,%%d2 \n"
782 "cmp.l %[buf],%%d0 \n" /* run up to first line bound */
783 "bhi.b .w_loop_u_l1 \n"
784
785 ".w_end_u_l1: \n"
786 "lea.l (-14,%[end]),%[end] \n" /* adjust end addr. to 16 bytes/pass */
787
788 ".w_loop_u_line: \n"
789 "movem.l (%[buf]),%%d3-%%d6 \n"
790 "lea.l (16,%[buf]),%[buf] \n"
791 "move.l %%d3,%%d0 \n"
792 "lsl.l %%d1,%%d2 \n"
793 "lsr.l #8,%%d0 \n"
794 "or.l %%d0,%%d2 \n"
795 "swap %%d2 \n"
796 "move.w %%d2,(%[ata]) \n"
797 "swap %%d2 \n"
798 "move.w %%d2,(%[ata]) \n"
799 "move.l %%d4,%%d0 \n"
800 "lsl.l %%d1,%%d3 \n"
801 "lsr.l #8,%%d0 \n"
802 "or.l %%d0,%%d3 \n"
803 "swap %%d3 \n"
804 "move.w %%d3,(%[ata]) \n"
805 "swap %%d3 \n"
806 "move.w %%d3,(%[ata]) \n"
807 "move.l %%d5,%%d0 \n"
808 "lsl.l %%d1,%%d4 \n"
809 "lsr.l #8,%%d0 \n"
810 "or.l %%d0,%%d4 \n"
811 "swap %%d4 \n"
812 "move.w %%d4,(%[ata]) \n"
813 "swap %%d4 \n"
814 "move.w %%d4,(%[ata]) \n"
815 "move.l %%d6,%%d0 \n"
816 "lsl.l %%d1,%%d5 \n"
817 "lsr.l #8,%%d0 \n"
818 "or.l %%d0,%%d5 \n"
819 "swap %%d5 \n"
820 "move.w %%d5,(%[ata]) \n"
821 "swap %%d5 \n"
822 "move.w %%d5,(%[ata]) \n"
823 "move.l %%d6,%%d2 \n"
824 "cmp.l %[buf],%[end] \n" /* run up to last line bound */
825 "bhi.b .w_loop_u_line \n"
826
827 "lea.l (12,%[end]),%[end] \n" /* readjust for longword loop */
828 "cmp.l %[buf],%[end] \n" /* any trailing longwords? */
829 "bls.b .w_end_u_l2 \n" /* no: skip loop */
830
831 ".w_loop_u_l2: \n"
832 "move.l (%[buf])+,%%d3 \n"
833 "move.l %%d3,%%d4 \n"
834 "lsl.l %%d1,%%d2 \n"
835 "lsr.l #8,%%d3 \n"
836 "or.l %%d3,%%d2 \n"
837 "swap %%d2 \n"
838 "move.w %%d2,(%[ata]) \n"
839 "swap %%d2 \n"
840 "move.w %%d2,(%[ata]) \n"
841 "move.l %%d4,%%d2 \n"
842 "cmp.l %[buf],%[end] \n" /* run up to first line bound */
843 "bhi.b .w_loop_u_l2 \n"
844
845 ".w_end_u_l2: \n"
846 "addq.l #2,%[end] \n" /* back to final end address */
847 "cmp.l %[buf],%[end] \n" /* one word left? */
848 "bls.b .w_end_u_w2 \n"
849
850 "swap %%d2 \n"
851 "move.w (%[buf])+,%%d2 \n"
852 "move.l %%d2,%%d3 \n"
853 "lsr.l #8,%%d3 \n"
854 "move.w %%d3,(%[ata]) \n"
855
856 ".w_end_u_w2: \n"
857 "lsl.l #8,%%d2 \n"
858 "move.b (%[buf])+,%%d2 \n"
859 "move.w %%d2,(%[ata]) \n"
860 "bra.b .w_exit \n"
861
862 /* 16-bit aligned */
863 ".w_aligned: \n"
864 "btst.l #1,%%d0 \n"
865 "beq.b .w_end_a_w1 \n"
866
867 "move.w (%[buf])+,(%[ata]) \n" /* copy initial word */
868
869 ".w_end_a_w1: \n"
870 "moveq.l #12,%%d0 \n"
871 "add.l %[buf],%%d0 \n"
872 "and.l #0xFFFFFFF0,%%d0\n" /* d0 == first line bound */
873 "cmp.l %[buf],%%d0 \n" /* any leading longwords? */
874 "bls.b .w_end_a_l1 \n" /* no: skip loop */
875
876 ".w_loop_a_l1: \n"
877 "move.l (%[buf])+,%%d1 \n"
878 "swap %%d1 \n"
879 "move.w %%d1,(%[ata]) \n"
880 "swap %%d1 \n"
881 "move.w %%d1,(%[ata]) \n"
882 "cmp.l %[buf],%%d0 \n" /* run up to first line bound */
883 "bhi.b .w_loop_a_l1 \n"
884
885 ".w_end_a_l1: \n"
886 "lea.l (-14,%[end]),%[end] \n" /* adjust end addr. to 16 bytes/pass */
887
888 ".w_loop_a_line: \n"
889 "movem.l (%[buf]),%%d0-%%d3 \n"
890 "lea.l (16,%[buf]),%[buf] \n"
891 "swap %%d0 \n"
892 "move.w %%d0,(%[ata]) \n"
893 "swap %%d0 \n"
894 "move.w %%d0,(%[ata]) \n"
895 "swap %%d1 \n"
896 "move.w %%d1,(%[ata]) \n"
897 "swap %%d1 \n"
898 "move.w %%d1,(%[ata]) \n"
899 "swap %%d2 \n"
900 "move.w %%d2,(%[ata]) \n"
901 "swap %%d2 \n"
902 "move.w %%d2,(%[ata]) \n"
903 "swap %%d3 \n"
904 "move.w %%d3,(%[ata]) \n"
905 "swap %%d3 \n"
906 "move.w %%d3,(%[ata]) \n"
907 "cmp.l %[buf],%[end] \n" /* run up to last line bound */
908 "bhi.b .w_loop_a_line \n"
909
910 "lea.l (12,%[end]),%[end] \n" /* readjust for longword loop */
911 "cmp.l %[buf],%[end] \n" /* any trailing longwords? */
912 "bls.b .w_end_a_l2 \n" /* no: skip loop */
913
914 ".w_loop_a_l2: \n"
915 "move.l (%[buf])+,%%d1 \n"
916 "swap %%d1 \n"
917 "move.w %%d1,(%[ata]) \n"
918 "swap %%d1 \n"
919 "move.w %%d1,(%[ata]) \n"
920 "cmp.l %[buf],%[end] \n" /* run up to first line bound */
921 "bhi.b .w_loop_a_l2 \n"
922
923 ".w_end_a_l2: \n"
924 "addq.l #2,%[end] \n" /* back to final end address */
925 "cmp.l %[buf],%[end] \n" /* one word left? */
926 "bls.b .w_end_a_w2 \n"
927
928 "move.w (%[buf])+,(%[ata]) \n" /* copy final word */
929
930 ".w_end_a_w2: \n"
931
932 ".w_exit: \n"
933 : /* outputs */
934 [buf]"+a"(buf),
935 [end]"+a"(bufend)
936 : /* inputs */
937 [ata]"a"(&ATA_DATA)
938 : /*trashed */
939 "d0", "d1", "d2", "d3", "d4", "d5", "d6"
940 );
941#else
942 /* SH1 optimized assembler version */
943 /* this assumes wordcount to be a multiple of 2 */
944
945/* writing is not unrolled as much as reading, for several reasons:
946 * - a similar instruction sequence is faster for writing than for reading
947 * because the auto-incrementing load inctructions can be used
948 * - writing profits from warp mode
949 * Both of these add up to have writing faster than the more unrolled reading.
950 */
951 asm volatile (
952 "mov %[buf],r0 \n"
953 "tst #1,r0 \n" /* 16-bit aligned ? */
954 "bt .w_aligned \n" /* yes, do word copy */
955
956 /* not 16-bit aligned */
957 "mov #-1,r6 \n" /* prepare a bit mask for high byte */
958 "shll8 r6 \n" /* r6 = 0xFFFFFF00 */
959
960 "mov.b @%[buf]+,r2 \n" /* load (initial old second) first byte */
961 "mov.w @%[buf]+,r3 \n" /* load (initial) first word */
962 "bra .w_start2_b \n"
963 "extu.b r2,r0 \n" /* extend unsigned */
964
965 ".align 2 \n"
966 ".w_loop2_b: \n" /* main loop: copy 2 words in a row */
967 "mov.w @%[buf]+,r3 \n" /* load first word (2+ round) */
968 "extu.b r2,r0 \n" /* put away low byte of second word (2+ round) */
969 "and r6,r2 \n" /* get high byte of second word (2+ round) */
970 "or r1,r2 \n" /* combine with low byte of old first word */
971 "mov.w r2,@%[ata] \n" /* write that */
972 ".w_start2_b: \n"
973 "cmp/hi %[buf],%[end] \n" /* check for end */
974 "mov.w @%[buf]+,r2 \n" /* load second word */
975 "extu.b r3,r1 \n" /* put away low byte of first word */
976 "and r6,r3 \n" /* get high byte of first word */
977 "or r0,r3 \n" /* combine with high byte of old second word */
978 "mov.w r3,@%[ata] \n" /* write that */
979 "bt .w_loop2_b \n"
980 /* 12 instructions for 2 copies, takes 14 clock cycles */
981 /* avg. 7 cycles per word - 85% faster */
982
983 /* the loop "overreads" 1 byte past the buffer end, however, the last */
984 /* byte is not written to disk */
985 "and r6,r2 \n" /* get high byte of last word */
986 "or r1,r2 \n" /* combine with low byte of old first word */
987 "bra .w_exit \n"
988 "mov.w r2,@%[ata] \n" /* write last word */
989
990 /* 16-bit aligned, loop(load and write word) */
991 ".w_aligned: \n"
992 "bra .w_start2_w \n" /* jump into loop after next instr. */
993 "mov.w @%[buf]+,r2 \n" /* load first word (1st round) */
994
995 ".align 2 \n"
996 ".w_loop2_w: \n" /* main loop: copy 2 words in a row */
997 "mov.w @%[buf]+,r2 \n" /* load first word (2+ round) */
998 "swap.b r1,r0 \n" /* swap second word (2+ round) */
999 "mov.w r0,@%[ata] \n" /* write second word (2+ round) */
1000 ".w_start2_w: \n"
1001 "cmp/hi %[buf],%[end] \n" /* check for end */
1002 "mov.w @%[buf]+,r1 \n" /* load second word */
1003 "swap.b r2,r0 \n" /* swap first word */
1004 "mov.w r0,@%[ata] \n" /* write first word */
1005 "bt .w_loop2_w \n"
1006 /* 8 instructions for 2 copies, takes 10 clock cycles */
1007 /* avg. 5 cycles per word - 100% faster */
1008
1009 "swap.b r1,r0 \n" /* swap second word (last round) */
1010 "mov.w r0,@%[ata] \n" /* and write it */
1011
1012 ".w_exit: \n"
1013 : /* outputs */
1014 [buf]"+r"(buf)
1015 : /* inputs */
1016 [end]"r"(buf + 2 * wordcount - 4), /* adjusted for earl check */
1017 [ata]"r"(&ATA_DATA)
1018 : /*trashed */
1019 "r0","r1","r2","r3","r6"
1020 );
1021#endif /* CPU */
1022#endif /* !PREFER_C_WRITING */
1023} 381}
382#endif /* !ATA_OPTIMIZED_WRITING */
1024 383
1025int ata_write_sectors(IF_MV2(int drive,) 384int ata_write_sectors(IF_MV2(int drive,)
1026 unsigned long start, 385 unsigned long start,
@@ -1306,17 +665,7 @@ int ata_hard_reset(void)
1306{ 665{
1307 int ret; 666 int ret;
1308 667
1309#ifdef TARGET_TREE
1310 ata_reset(); 668 ata_reset();
1311#elif CONFIG_CPU == SH7034
1312 /* state HRR0 */
1313 and_b(~0x02, &PADRH); /* assert _RESET */
1314 sleep(1); /* > 25us */
1315
1316 /* state HRR1 */
1317 or_b(0x02, &PADRH); /* negate _RESET */
1318 sleep(1); /* > 2ms */
1319#endif
1320 669
1321 /* state HRR2 */ 670 /* state HRR2 */
1322 SET_REG(ATA_SELECT, ata_device); /* select the right device */ 671 SET_REG(ATA_SELECT, ata_device); /* select the right device */
@@ -1416,36 +765,6 @@ static int master_slave_detect(void)
1416 return 0; 765 return 0;
1417} 766}
1418 767
1419#if CONFIG_CPU == SH7034 /* special archos quirk */
1420static void io_address_detect(void)
1421{ /* now, use the HW mask instead of probing */
1422 if (read_hw_mask() & ATA_ADDRESS_200)
1423 {
1424 ata_io_address = 0x200; /* For debug purposes only */
1425 ata_control = ATA_CONTROL1;
1426 }
1427 else
1428 {
1429 ata_io_address = 0x300; /* For debug purposes only */
1430 ata_control = ATA_CONTROL2;
1431 }
1432}
1433#endif
1434
1435#ifndef TARGET_TREE
1436void ata_enable(bool on)
1437{
1438#if CONFIG_CPU == SH7034
1439 if(on)
1440 and_b(~0x80, &PADRL); /* enable ATA */
1441 else
1442 or_b(0x80, &PADRL); /* disable ATA */
1443
1444 or_b(0x80, &PAIORL);
1445#endif
1446}
1447#endif
1448
1449static int identify(void) 768static int identify(void)
1450{ 769{
1451 int i; 770 int i;
@@ -1589,39 +908,12 @@ static int init_and_check(bool hard_reset)
1589int ata_init(void) 908int ata_init(void)
1590{ 909{
1591 int rc; 910 int rc;
1592#ifdef TARGET_TREE
1593 bool coldstart = ata_is_coldstart(); 911 bool coldstart = ata_is_coldstart();
1594#else
1595 bool coldstart = (PACR2 & 0x4000) != 0;
1596#endif
1597 912
1598 mutex_init(&ata_mtx); 913 mutex_init(&ata_mtx);
1599 914
1600 ata_led(false); 915 ata_led(false);
1601
1602#ifdef TARGET_TREE
1603 ata_device_init(); 916 ata_device_init();
1604#elif CONFIG_CPU == SH7034
1605 /* Port A setup */
1606 or_b(0x02, &PAIORH); /* output for ATA reset */
1607 or_b(0x02, &PADRH); /* release ATA reset */
1608 PACR2 &= 0xBFFF; /* GPIO function for PA7 (IDE enable) */
1609#elif CONFIG_CPU == MCF5249
1610#ifdef HAVE_ATA_LED_CTRL
1611 /* Enable disk LED & ISD chip power control */
1612 and_l(~0x0000240, &GPIO_OUT);
1613 or_l(0x00000240, &GPIO_ENABLE);
1614 or_l(0x00000200, &GPIO_FUNCTION);
1615#endif
1616
1617 /* ATA reset */
1618 or_l(0x00080000, &GPIO_OUT);
1619 or_l(0x00080000, &GPIO_ENABLE);
1620 or_l(0x00080000, &GPIO_FUNCTION);
1621
1622 /* FYI: The IDECONFIGx registers are set by set_cpu_frequency() */
1623#endif
1624
1625 sleeping = false; 917 sleeping = false;
1626 ata_enable(true); 918 ata_enable(true);
1627 919
@@ -1632,8 +924,8 @@ int ata_init(void)
1632 sleep(HZ); /* allow voltage to build up */ 924 sleep(HZ); /* allow voltage to build up */
1633 } 925 }
1634 926
1635#if CONFIG_CPU == SH7034 927#ifdef ATA_ADDRESS_DETECT
1636 io_address_detect(); 928 ata_address_detect();
1637#endif 929#endif
1638 /* first try, hard reset at cold start only */ 930 /* first try, hard reset at cold start only */
1639 rc = init_and_check(coldstart); 931 rc = init_and_check(coldstart);
@@ -1680,12 +972,12 @@ int ata_init(void)
1680} 972}
1681 973
1682#if CONFIG_LED == LED_REAL 974#if CONFIG_LED == LED_REAL
1683void ata_set_led_enabled(bool enabled) { 975void ata_set_led_enabled(bool enabled)
976{
1684 ata_led_enabled = enabled; 977 ata_led_enabled = enabled;
1685 if (ata_led_enabled) { 978 if (ata_led_enabled)
1686 led(ata_led_on); 979 led(ata_led_on);
1687 } else { 980 else
1688 led(false); 981 led(false);
1689 }
1690} 982}
1691#endif 983#endif