summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2007-01-24 00:12:08 +0000
committerJens Arnold <amiconn@rockbox.org>2007-01-24 00:12:08 +0000
commit5d51784faa5e48c821e3fefab685cdb141a6ff81 (patch)
treebc99954e296486d98c0af35bb25303815bad3226
parent970572bec874a9c359ab7a5acdf0c66ba088940c (diff)
downloadrockbox-5d51784faa5e48c821e3fefab685cdb141a6ff81.tar.gz
rockbox-5d51784faa5e48c821e3fefab685cdb141a6ff81.zip
Assembler optimised ATA transfer loops for ARM targets. Only used for PP5020/5021/5022 targets atm. PP5002 seems to have problems with it not directly related to the asm code itself, and the gigabeat doesn't profit from it, probably because the CPU isn't the bottleneck even without optimisation.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12099 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--firmware/SOURCES9
-rw-r--r--firmware/target/arm/ata-as-arm.S344
-rw-r--r--firmware/target/arm/ata-target.h16
3 files changed, 366 insertions, 3 deletions
diff --git a/firmware/SOURCES b/firmware/SOURCES
index 3fdbdbb820..227eab1b6d 100644
--- a/firmware/SOURCES
+++ b/firmware/SOURCES
@@ -410,6 +410,7 @@ target/coldfire/iriver/h100/usb-h100.c
410 410
411#ifdef IRIVER_H10 411#ifdef IRIVER_H10
412#ifndef SIMULATOR 412#ifndef SIMULATOR
413target/arm/ata-as-arm.S
413target/arm/ata-pp5020.c 414target/arm/ata-pp5020.c
414target/arm/wmcodec-pp.c 415target/arm/wmcodec-pp.c
415target/arm/i2s-pp.c 416target/arm/i2s-pp.c
@@ -424,6 +425,7 @@ target/arm/usb-pp.c
424 425
425#ifdef IRIVER_H10_5GB 426#ifdef IRIVER_H10_5GB
426#ifndef SIMULATOR 427#ifndef SIMULATOR
428target/arm/ata-as-arm.S
427target/arm/ata-pp5020.c 429target/arm/ata-pp5020.c
428target/arm/wmcodec-pp.c 430target/arm/wmcodec-pp.c
429target/arm/i2s-pp.c 431target/arm/i2s-pp.c
@@ -460,6 +462,7 @@ target/arm/gigabeat/meg-fx/pcm-meg-fx.c
460 462
461#ifdef ELIO_TPJ1022 463#ifdef ELIO_TPJ1022
462#ifndef SIMULATOR 464#ifndef SIMULATOR
465target/arm/ata-as-arm.S
463target/arm/ata-pp5020.c 466target/arm/ata-pp5020.c
464target/arm/wmcodec-pp.c 467target/arm/wmcodec-pp.c
465target/arm/i2s-pp.c 468target/arm/i2s-pp.c
@@ -478,6 +481,7 @@ drivers/lcd-ipod.c
478drivers/pcf50605.c 481drivers/pcf50605.c
479target/arm/wmcodec-pp.c 482target/arm/wmcodec-pp.c
480target/arm/i2s-pp.c 483target/arm/i2s-pp.c
484target/arm/ata-as-arm.S
481target/arm/ata-pp5020.c 485target/arm/ata-pp5020.c
482target/arm/ipod/adc-ipod.c 486target/arm/ipod/adc-ipod.c
483target/arm/ipod/backlight-4g_color.c 487target/arm/ipod/backlight-4g_color.c
@@ -491,6 +495,7 @@ target/arm/usb-pp.c
491#ifndef SIMULATOR 495#ifndef SIMULATOR
492drivers/lcd-ipod.c 496drivers/lcd-ipod.c
493drivers/pcf50605.c 497drivers/pcf50605.c
498target/arm/ata-as-arm.S
494target/arm/ata-pp5020.c 499target/arm/ata-pp5020.c
495target/arm/wmcodec-pp.c 500target/arm/wmcodec-pp.c
496target/arm/i2s-pp.c 501target/arm/i2s-pp.c
@@ -506,6 +511,7 @@ target/arm/usb-pp.c
506#ifndef SIMULATOR 511#ifndef SIMULATOR
507drivers/lcd-ipod.c 512drivers/lcd-ipod.c
508drivers/pcf50605.c 513drivers/pcf50605.c
514target/arm/ata-as-arm.S
509target/arm/ata-pp5020.c 515target/arm/ata-pp5020.c
510target/arm/wmcodec-pp.c 516target/arm/wmcodec-pp.c
511target/arm/i2s-pp.c 517target/arm/i2s-pp.c
@@ -521,6 +527,7 @@ target/arm/usb-pp.c
521#ifndef SIMULATOR 527#ifndef SIMULATOR
522drivers/lcd-ipodvideo.c 528drivers/lcd-ipodvideo.c
523drivers/pcf50605.c 529drivers/pcf50605.c
530target/arm/ata-as-arm.S
524target/arm/ata-pp5020.c 531target/arm/ata-pp5020.c
525target/arm/wmcodec-pp.c 532target/arm/wmcodec-pp.c
526target/arm/i2s-pp.c 533target/arm/i2s-pp.c
@@ -551,6 +558,7 @@ target/arm/usb-pp.c
551#ifndef SIMULATOR 558#ifndef SIMULATOR
552drivers/lcd-ipod.c 559drivers/lcd-ipod.c
553drivers/pcf50605.c 560drivers/pcf50605.c
561target/arm/ata-as-arm.S
554target/arm/ata-pp5020.c 562target/arm/ata-pp5020.c
555target/arm/wmcodec-pp.c 563target/arm/wmcodec-pp.c
556target/arm/i2s-pp.c 564target/arm/i2s-pp.c
@@ -566,6 +574,7 @@ target/arm/usb-pp.c
566#ifndef SIMULATOR 574#ifndef SIMULATOR
567drivers/lcd-ipod.c 575drivers/lcd-ipod.c
568drivers/pcf50605.c 576drivers/pcf50605.c
577target/arm/ata-as-arm.S
569target/arm/ata-pp5020.c 578target/arm/ata-pp5020.c
570target/arm/wmcodec-pp.c 579target/arm/wmcodec-pp.c
571target/arm/i2s-pp.c 580target/arm/i2s-pp.c
diff --git a/firmware/target/arm/ata-as-arm.S b/firmware/target/arm/ata-as-arm.S
new file mode 100644
index 0000000000..32fc28c4e0
--- /dev/null
+++ b/firmware/target/arm/ata-as-arm.S
@@ -0,0 +1,344 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2007 by Jens Arnold
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20#include "config.h"
21
22
23#if CONFIG_CPU == PP5002
24 /* Causes ATA retries on iPod G3 probably related to improper controller
25 * setup. Needs investigation. */
26 .section .icode,"ax",%progbits
27 .equ .ata_port, 0xc00031e0
28#elif CONFIG_CPU == PP5020
29 /* Verified working on (PP5020, PP5021, PP5022) targets */
30 .section .icode,"ax",%progbits
31 .equ .ata_port, 0xc30001e0
32#elif CONFIG_CPU == S3C2440
33 /* Untested */
34 .text
35 .equ .ata_port, 0x18000000
36#endif
37
38 .align 2
39 .global copy_read_sectors
40 .type copy_read_sectors,%function
41
42/* Read a number of words from the ATA data port
43 *
44 * Optimised for speed; assumes wordcount >= 10
45 *
46 * Arguments:
47 * r0 - buffer address
48 * r1 - word count
49 *
50 * Register usage:
51 * r0 - current address
52 * r1 - word count
53 * r2 - ata port
54 * r3..r6, lr - read buffers
55 */
56
57copy_read_sectors:
58 stmfd sp!, {r4, r5, r6, lr}
59 ldr r2, =.ata_port
60 tst r0, #1 /* 16 bit aligned? */
61 beq .r_aligned
62
63 /* not 16-bit aligned */
64 sub r1, r1, #1 /* one halfword is handled unconditionally */
65 ldrh r3, [r2] /* read first halfword */
66 strb r3, [r0], #1 /* store low byte */
67 mov r3, r3, lsr #8
68
69 tst r0, #2 /* 32 bit aligned? */
70 beq .r_noword_u
71 ldrh r4, [r2] /* read second halfword */
72 orr r3, r3, r4, lsl #8 /* combine with old byte */
73 strh r3, [r0], #2 /* store */
74 mov r3, r4, lsr #8
75 sub r1, r1, #1 /* another halfword taken */
76.r_noword_u:
77
78 sub r1, r1, #8 /* adjust for zero-check and doing 8 halfwords/loop */
79.r_loop_u:
80 ldrh r4, [r2] /* Read 8 halfwords and combine them into */
81 orr r3, r3, r4, lsl #8 /* 4 words so that they're properly aligned */
82 ldrh r4, [r2] /* in memory. Bottom byte of first word is */
83 orr r3, r3, r4, lsl #24 /* the top byte from the last round. Write */
84 mov r4, r4, lsr #8 /* all 4 words at once. */
85 ldrh r5, [r2]
86 orr r4, r4, r5, lsl #8
87 ldrh r5, [r2]
88 orr r4, r4, r5, lsl #24
89 mov r5, r5, lsr #8
90 ldrh r6, [r2]
91 orr r5, r5, r6, lsl #8
92 ldrh r6, [r2]
93 orr r5, r5, r6, lsl #24
94 mov r6, r6, lsr #8
95 ldrh lr, [r2]
96 orr r6, r6, lr, lsl #8
97 ldrh lr, [r2]
98 orr r6, r6, lr, lsl #24
99 stmia r0!, {r3, r4, r5, r6}
100 mov r3, lr, lsr #8
101 subs r1, r1, #8 /* 8 or more halfwords left? */
102 bge .r_loop_u
103
104 /* No need to adjust the count, only checking bits from now on. */
105 tst r1, #4 /* 4 or more halfwords left? */
106 beq .r_end4_u
107 ldrh r4, [r2]
108 orr r3, r3, r4, lsl #8
109 ldrh r4, [r2]
110 orr r3, r3, r4, lsl #24
111 mov r4, r4, lsr #8
112 ldrh r5, [r2]
113 orr r4, r4, r5, lsl #8
114 ldrh r5, [r2]
115 orr r4, r4, r5, lsl #24
116 stmia r0!, {r3, r4}
117 mov r3, r5, lsr #8
118.r_end4_u:
119
120 tst r1, #2 /* 2 or more halfwords left? */
121 beq .r_end2_u
122 ldrh r4, [r2]
123 orr r3, r3, r4, lsl #8
124 ldrh r4, [r2]
125 orr r3, r3, r4, lsl #24
126 str r3, [r0], #4
127 mov r3, r4, lsr #8
128.r_end2_u:
129
130 tst r1, #1 /* one halfword left? */
131 ldrneh r4, [r2]
132 orrne r3, r3, r4, lsl #8
133 strneh r3, [r0], #2
134 movne r3, r4, lsr #8
135
136 strb r3, [r0], #1 /* store final byte */
137
138 ldmfd sp!, {r4, r5, r6, pc}
139
140 /* 16-bit aligned */
141.r_aligned:
142 tst r0, #2 /* 32 bit aligned? */
143 ldrneh r3, [r2] /* no: read first halfword */
144 strneh r3, [r0], #2 /* store */
145 subne r1, r1, #1 /* one halfword taken */
146
147 sub r1, r1, #8 /* adjust for zero-check and doing 8 halfwords/loop */
148.r_loop_a:
149 ldrh r3, [r2] /* Read 8 halfwords and combine each pair */
150 ldrh r4, [r2] /* into a word, then store all at once. */
151 orr r3, r3, r4, lsl #16
152 ldrh r4, [r2]
153 ldrh r5, [r2]
154 orr r4, r4, r5, lsl #16
155 ldrh r5, [r2]
156 ldrh r6, [r2]
157 orr r5, r5, r6, lsl #16
158 ldrh r6, [r2]
159 ldrh lr, [r2]
160 orr r6, r6, lr, lsl #16
161 stmia r0!, {r3, r4, r5, r6}
162 subs r1, r1, #8 /* 8 or more halfwords left? */
163 bge .r_loop_a
164
165 /* No need to adjust the count, only checking bits from now on. */
166 tst r1, #4 /* 4 or more halfwords left? */
167 beq .r_end4_a
168 ldrh r3, [r2]
169 ldrh r4, [r2]
170 orr r3, r3, r4, lsl #16
171 ldrh r4, [r2]
172 ldrh r5, [r2]
173 orr r4, r4, r5, lsl #16
174 stmia r0!, {r3, r4}
175.r_end4_a:
176
177 tst r1, #2 /* 2 or more halfwords left? */
178 ldrneh r3, [r2]
179 ldrneh r4, [r2]
180 orrne r3, r3, r4, lsl #16
181 strne r3, [r0], #4
182
183 tst r1, #1 /* one halfword left? */
184 ldrneh r3, [r2]
185 strneh r3, [r0], #2
186
187 ldmfd sp!, {r4, r5, r6, pc}
188
189.r_end:
190 .size copy_read_sectors,.r_end-copy_read_sectors
191
192 .align 2
193 .global copy_write_sectors
194 .type copy_write_sectors,%function
195
196/* Write a number of words to the ATA data port
197 *
198 * Optimised for speed; assumes wordcount >= 10
199 *
200 * Arguments:
201 * r0 - buffer address
202 * r1 - word count
203 *
204 * Register usage:
205 * r0 - current address
206 * r1 - word count
207 * r2 - ata port
208 * r3..r6, lr - read buffers
209 */
210
211copy_write_sectors:
212 stmfd sp!, {r4, r5, r6, lr}
213 ldr r2, =.ata_port
214 tst r0, #1 /* 16 bit aligned? */
215 beq .w_aligned
216
217 /* not 16-bit aligned */
218 sub r1, r1, #1 /* one halfword is done unconditionally */
219 ldrb r3, [r0], #1 /* load 1st byte, now halfword aligned. */
220
221 tst r0, #2 /* 32 bit aligned? */
222 beq .w_noword_u
223 ldrh r4, [r0], #2 /* load a halfword */
224 orr r3, r3, r4, lsl #8 /* combine with old byte */
225 strh r3, [r2] /* write halfword */
226 mov r3, r4, lsr #8
227 sub r1, r1, #1 /* another halfword taken */
228.w_noword_u:
229
230 sub r1, r1, #8 /* adjust for zero-check and doing 8 halfwords/loop */
231.w_loop_u:
232 ldmia r0!, {r4, r5, r6, lr}
233 orr r3, r3, r4, lsl #8 /* Load 4 words at once and decompose them */
234 strh r3, [r2] /* into 8 halfwords in a way that the words */
235 mov r3, r3, lsr #16 /* are shifted by 8 bits, putting the high */
236 strh r3, [r2] /* byte of one word into the low byte of */
237 mov r4, r4, lsr #24 /* the next. High byte of last word becomes */
238 orr r4, r4, r5, lsl #8 /* low byte of next round. */
239 strh r4, [r2]
240 mov r4, r4, lsr #16
241 strh r4, [r2]
242 mov r5, r5, lsr #24
243 orr r5, r5, r6, lsl #8
244 strh r5, [r2]
245 mov r5, r5, lsr #16
246 strh r5, [r2]
247 mov r6, r6, lsr #24
248 orr r6, r6, lr, lsl #8
249 strh r6, [r2]
250 mov r6, r6, lsr #16
251 strh r6, [r2]
252 mov r3, lr, lsr #24
253 subs r1, r1, #8 /* 8 or more halfwords left? */
254 bge .w_loop_u
255
256 /* No need to adjust the count, only checking bits from now on. */
257 tst r1, #4 /* 4 or more halfwords left? */
258 beq .w_end4_u
259 ldmia r0!, {r4, r5}
260 orr r3, r3, r4, lsl #8
261 strh r3, [r2]
262 mov r3, r3, lsr #16
263 strh r3, [r2]
264 mov r4, r4, lsr #24
265 orr r4, r4, r5, lsl #8
266 strh r4, [r2]
267 mov r4, r4, lsr #16
268 strh r4, [r2]
269 mov r3, r5, lsr #24
270.w_end4_u:
271
272 tst r1, #2 /* 2 or more halfwords left? */
273 beq .w_end2_u
274 ldr r4, [r0], #4
275 orr r3, r3, r4, lsl #8
276 strh r3, [r2]
277 mov r3, r3, lsr #16
278 strh r3, [r2]
279 mov r3, r4, lsr #24
280.w_end2_u:
281
282 tst r1, #1 /* one halfword left? */
283 ldrneh r4, [r0], #2
284 orrne r3, r3, r4, lsl #8
285 strneh r3, [r2]
286 movne r3, r3, lsr #16
287
288 ldrb r4, [r0], #1 /* load final byte */
289 orr r3, r3, r4, lsl #8
290 strh r3, [r2] /* write final halfword */
291
292 ldmfd sp!, {r4, r5, r6, pc}
293
294 /* 16-bit aligned */
295.w_aligned:
296 tst r0, #2 /* 32 bit aligned? */
297 ldrneh r3, [r0], #2 /* no: load first halfword */
298 strneh r3, [r2] /* write */
299 subne r1, r1, #1 /* one halfword taken */
300
301 sub r1, r1, #8 /* adjust for zero-check and doing 8 halfwords/loop */
302.w_loop_a:
303 ldmia r0!, {r3, r4, r5, r6}
304 strh r3, [r2] /* Load 4 words and decompose them into */
305 mov r3, r3, lsr #16 /* 2 halfwords each, and write those. */
306 strh r3, [r2]
307 strh r4, [r2]
308 mov r4, r4, lsr #16
309 strh r4, [r2]
310 strh r5, [r2]
311 mov r5, r5, lsr #16
312 strh r5, [r2]
313 strh r6, [r2]
314 mov r6, r6, lsr #16
315 strh r6, [r2]
316 subs r1, r1, #8 /* 8 or more halfwords left? */
317 bge .w_loop_a
318
319 /* No need to adjust the count, only checking bits from now on. */
320 tst r1, #4 /* 4 or more halfwords left? */
321 beq .w_end4_a
322 ldmia r0!, {r3, r4}
323 strh r3, [r2]
324 mov r3, r3, lsr #16
325 strh r3, [r2]
326 strh r4, [r2]
327 mov r4, r4, lsr #16
328 strh r4, [r2]
329.w_end4_a:
330
331 tst r1, #2 /* 2 or more halfwords left? */
332 ldrne r3, [r0], #4
333 strneh r3, [r2]
334 movne r3, r3, lsr #16
335 strneh r3, [r2]
336
337 tst r1, #1 /* one halfword left? */
338 ldrneh r3, [r0], #2
339 strneh r3, [r2]
340
341 ldmfd sp!, {r4, r5, r6, pc}
342
343.w_end:
344 .size copy_write_sectors,.w_end-copy_write_sectors
diff --git a/firmware/target/arm/ata-target.h b/firmware/target/arm/ata-target.h
index 0f25f89d6b..d6c5f512e6 100644
--- a/firmware/target/arm/ata-target.h
+++ b/firmware/target/arm/ata-target.h
@@ -19,15 +19,25 @@
19 19
20#if (CONFIG_CPU == PP5002) || (CONFIG_CPU == PP5020) 20#if (CONFIG_CPU == PP5002) || (CONFIG_CPU == PP5020)
21 21
22/* Plain C read & write loops */
23
24#if (CONFIG_CPU == PP5002) 22#if (CONFIG_CPU == PP5002)
23
24/* Plain C reading and writing. See comment in ata-as-arm.S */
25
25#define ATA_IOBASE 0xc00031e0 26#define ATA_IOBASE 0xc00031e0
26#define ATA_CONTROL (*((volatile unsigned char*)(0xc00033f8))) 27#define ATA_CONTROL (*((volatile unsigned char*)(0xc00033f8)))
28
27#elif (CONFIG_CPU == PP5020) 29#elif (CONFIG_CPU == PP5020)
30
31/* asm optimized reading and writing */
32#define ATA_OPTIMIZED_READING
33#define ATA_OPTIMIZED_WRITING
34void copy_read_sectors(unsigned char* buf, int wordcount);
35void copy_write_sectors(const unsigned char* buf, int wordcount);
36
28#define ATA_IOBASE 0xc30001e0 37#define ATA_IOBASE 0xc30001e0
29#define ATA_CONTROL (*((volatile unsigned char*)(0xc30003f8))) 38#define ATA_CONTROL (*((volatile unsigned char*)(0xc30003f8)))
30#endif 39
40#endif /* CONFIG_CPU */
31 41
32#define ATA_DATA (*((volatile unsigned short*)(ATA_IOBASE))) 42#define ATA_DATA (*((volatile unsigned short*)(ATA_IOBASE)))
33#define ATA_ERROR (*((volatile unsigned char*)(ATA_IOBASE + 0x04))) 43#define ATA_ERROR (*((volatile unsigned char*)(ATA_IOBASE + 0x04)))