summaryrefslogtreecommitdiff
path: root/firmware/common/memcpy_a.S
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2005-01-21 22:43:02 +0000
committerJens Arnold <amiconn@rockbox.org>2005-01-21 22:43:02 +0000
commit0310f16005a1e98c441221bc0f0f7586d0b19763 (patch)
treec695e2baa4da1bc7fb9381568863e903020335e3 /firmware/common/memcpy_a.S
parent948f0b849785b3e727be6584470d1bb32238ec6a (diff)
downloadrockbox-0310f16005a1e98c441221bc0f0f7586d0b19763.tar.gz
rockbox-0310f16005a1e98c441221bc0f0f7586d0b19763.zip
Revived C implementations of memcpy() and memset() for platforms without asm optimized versions (gmini), replacing the intermediate strings.c. Moved the asm optimized versions 'out of the way' for the implicit 'make' rules by renaming them to *_a.S (for assembler/alternative).
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@5628 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware/common/memcpy_a.S')
-rw-r--r--firmware/common/memcpy_a.S196
1 files changed, 196 insertions, 0 deletions
diff --git a/firmware/common/memcpy_a.S b/firmware/common/memcpy_a.S
new file mode 100644
index 0000000000..e129b99442
--- /dev/null
+++ b/firmware/common/memcpy_a.S
@@ -0,0 +1,196 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2004 by Jens Arnold
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19#include "config.h"
20
21 .section .icode,"ax",@progbits
22
23#if CONFIG_CPU == SH7034
24 .align 2
25 .global _memcpy
26 .type _memcpy,@function
27
28/* Copies <length> bytes of data in memory from <source> to <dest>
29 * This version is optimized for speed
30 *
31 * arguments:
32 * r4 - destination address
33 * r5 - source address
34 * r6 - length
35 *
36 * return value:
37 * r0 - destination address (like ANSI version)
38 *
39 * register usage:
40 * r0 - data / temporary
41 * r1 - bit mask for rounding to long bounds / 2nd data
42 * r2 - first long bound (only if >= 12 bytes)
43 * r3 - last long bound (-4) (only if >= 12 bytes)
44 * r4 - current dest address
45 * r5 - current source address
46 * r6 - source end address
47 * r7 - stored dest start address
48 *
49 * The instruction order below is devised in a way to utilize the pipelining
50 * of the SH1 to the max. The routine also tries to utilize fast page mode.
51 */
52
53_memcpy:
54 add r5,r6 /* r6 = source_end */
55 mov r4,r7 /* store for returning */
56 add #-8,r4 /* adjust for early increments (max. 2 longs) */
57
58 mov r6,r0
59 add #-12,r0 /* r0 = r6 - 12; don't go below 12 here! */
60 cmp/hs r5,r0 /* >= 12 bytes to copy? */
61 bf .start_b2 /* no, jump into byte loop */
62
63 mov #-4,r1 /* r1 = 0xFFFFFFFC */
64
65 mov r5,r2
66 add #3,r2
67 and r1,r2 /* r2 = first source long bound */
68 mov r6,r3
69 add #-4,r3 /* end offset for copying 2 longs per pass */
70 bra .start_b1 /* jump into leading byte loop */
71 and r1,r3 /* r3 = last source long bound - 4 */
72
73 /* leading byte loop: copies 0..3 bytes */
74 .align 2
75.loop_b1:
76 mov.b @r5+,r0 /* load byte & increment source addr */
77 add #1,r4 /* increment dest addr */
78 mov.b r0,@(7,r4) /* store byte */
79.start_b1:
80 cmp/hi r5,r2 /* runs r5 up to first long bound */
81 bt .loop_b1
82 /* now r5 is always at a long boundary */
83 /* -> memory reading is done in longs for all dest alignments */
84
85 /* selector for main copy loop */
86 mov r4,r0
87 tst #3,r0 /* dest now also at long bound? */
88 bt .loop2_l /* yes, do long copy */
89 tst #1,r0 /* dest now at least at word bound? */
90 bt .start4_w /* yes, do word copy */
91
92 /* main loop for byte aligned destination (fast) */
93 /* copies 1 long per pass */
94 add #4,r3 /* reset end offset */
95 add #-1,r4 /* adjust to word alignment for word write+ */
96
97.loop4_b:
98 mov.l @r5+,r0 /* load a long & increment source addr */
99 add #4,r4 /* increment dest addr */
100 mov.b r0,@(8,r4) /* store low byte */
101 shlr8 r0 /* get middle 2 bytes */
102 mov.w r0,@(6,r4) /* store as word+ */
103 shlr16 r0 /* get upper byte */
104 mov.b r0,@(5,r4) /* and store */
105 cmp/hi r5,r3 /* runs r5 up to last long bound */
106 bt .loop4_b
107
108 bra .start_b2 /* jump to trailing byte loop */
109 add #1,r4 /* readjust */
110
111 /* main loop for word aligned destination (faster) */
112 /* copies 2 longs per pass, utilizing fast page mode */
113.start4_w:
114 add #-2,r4 /* adjust to long alignment for long write+ */
115
116.loop4_w:
117 mov.l @r5+,r1 /* load first long & increment source addr */
118 add #8,r4 /* increment dest addr */
119 mov.l @r5+,r0 /* load second long & increment source addr */
120 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */
121 mov.w r0,@(8,r4) /* store low word of second long */
122 xtrct r1,r0 /* extract low word of first long & high word of second long */
123 mov.l r0,@(4,r4) /* and store as long+ */
124 swap.w r1,r0 /* get high word of first long */
125 mov.w r0,@(2,r4) /* and store it */
126 bt .loop4_w
127
128 add #2,r4 /* readjust destination */
129 add #4,r3 /* reset end offset */
130 cmp/hi r5,r3 /* one long left? */
131 bf .start_b2 /* no, jump to trailing byte loop */
132
133 mov.l @r5+,r0 /* load last long & increment source addr */
134 add #4,r4 /* increment dest addr */
135 mov.w r0,@(6,r4) /* store low word */
136 shlr16 r0 /* get high word */
137 bra .start_b2 /* jump to trailing byte loop */
138 mov.w r0,@(4,r4) /* and store it */
139
140 /* main loop for long aligned destination (fastest) */
141 /* copies 2 longs per pass, utilizing fast page mode */
142.loop2_l:
143 mov.l @r5+,r1 /* load first long & increment source addr */
144 add #8,r4 /* increment dest addr */
145 mov.l @r5+,r0 /* load second long & increment source addr */
146 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */
147 mov.l r1,@r4 /* store first long */
148 mov.l r0,@(4,r4) /* store second long; NOT ALIGNED - no speed loss here! */
149 bt .loop2_l
150
151 add #4,r3 /* reset end offset */
152 cmp/hi r5,r3 /* one long left? */
153 bf .start_b2 /* no, jump to trailing byte loop */
154
155 mov.l @r5+,r0 /* load last long & increment source addr */
156 add #4,r4 /* increment dest addr */
157 bra .start_b2 /* jump to trailing byte loop */
158 mov.l r0,@(4,r4) /* store last long */
159
160 /* trailing byte loop: copies 0..3 bytes (or all for < 12 in total) */
161.loop_b2:
162 mov.b @r5+,r0 /* load byte & increment source addr */
163 add #1,r4 /* increment dest addr */
164 mov.b r0,@(7,r4) /* store byte */
165.start_b2:
166 cmp/hi r5,r6 /* runs r5 up to end address */
167 bt .loop_b2
168
169 rts
170 mov r7,r0 /* return dest start address */
171.end:
172 .size _memcpy,.end-_memcpy
173#elif CONFIG_CPU == MCF5249
174 .align 2
175 .global memcpy
176 .type memcpy,@function
177
178/* Copies <length> bytes of data in memory from <source> to <dest>
179 * This version is not optimized at all
180 */
181memcpy:
182 move.l (4,%sp),%a1 /* Destination */
183 move.l (8,%sp),%a0 /* Source */
184 move.l (12,%sp),%d1 /* Length */
185
186 cmp.l #0,%d1
187 bra.b .byteloopend
188
189.byteloop:
190 move.b (%a0)+,(%a1)+
191 subq.l #1,%d1
192.byteloopend:
193 bne.b .byteloop
194
195 rts
196#endif