diff options
author | Jens Arnold <amiconn@rockbox.org> | 2006-02-06 16:00:58 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2006-02-06 16:00:58 +0000 |
commit | d036e97d3816ac2bc0eefc57bc033bd5fbbbf0f9 (patch) | |
tree | 604e32fcc0cf4ac745774987c5e052544bb25d36 /firmware | |
parent | 93c15381c8fa25cd30d52d3660c6f909837683fe (diff) | |
download | rockbox-d036e97d3816ac2bc0eefc57bc033bd5fbbbf0f9.tar.gz rockbox-d036e97d3816ac2bc0eefc57bc033bd5fbbbf0f9.zip |
Added memmove() to the rockbox core. C implementation taken from newlib. Fully optimised ASM implementations for SH1 and coldfire, reusing the AMS memcpy code path for forward copying.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8601 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware')
-rw-r--r-- | firmware/SOURCES | 2 | ||||
-rw-r--r-- | firmware/common/memcpy_a.S | 25 | ||||
-rw-r--r-- | firmware/common/memmove.c | 148 | ||||
-rwxr-xr-x | firmware/common/memmove_a.S | 869 |
4 files changed, 1034 insertions, 10 deletions
diff --git a/firmware/SOURCES b/firmware/SOURCES index 4e32266654..880d03aadd 100644 --- a/firmware/SOURCES +++ b/firmware/SOURCES | |||
@@ -38,9 +38,11 @@ common/strtok.c | |||
38 | common/timefuncs.c | 38 | common/timefuncs.c |
39 | #if (CONFIG_CPU == SH7034) || defined(CPU_COLDFIRE) | 39 | #if (CONFIG_CPU == SH7034) || defined(CPU_COLDFIRE) |
40 | common/memcpy_a.S | 40 | common/memcpy_a.S |
41 | common/memmove_a.S | ||
41 | common/memset_a.S | 42 | common/memset_a.S |
42 | #else | 43 | #else |
43 | common/memcpy.c | 44 | common/memcpy.c |
45 | common/memmove.c | ||
44 | common/memset.c | 46 | common/memset.c |
45 | #endif | 47 | #endif |
46 | #ifdef HAVE_LCD_CHARCELLS | 48 | #ifdef HAVE_LCD_CHARCELLS |
diff --git a/firmware/common/memcpy_a.S b/firmware/common/memcpy_a.S index 7264c964a4..9f6c813be3 100644 --- a/firmware/common/memcpy_a.S +++ b/firmware/common/memcpy_a.S | |||
@@ -23,6 +23,7 @@ | |||
23 | #if CONFIG_CPU == SH7034 | 23 | #if CONFIG_CPU == SH7034 |
24 | .align 2 | 24 | .align 2 |
25 | .global _memcpy | 25 | .global _memcpy |
26 | .global ___memcpy_fwd_entry | ||
26 | .type _memcpy,@function | 27 | .type _memcpy,@function |
27 | 28 | ||
28 | /* Copies <length> bytes of data in memory from <source> to <dest> | 29 | /* Copies <length> bytes of data in memory from <source> to <dest> |
@@ -46,12 +47,13 @@ | |||
46 | * r6 - source end address | 47 | * r6 - source end address |
47 | * r7 - stored dest start address | 48 | * r7 - stored dest start address |
48 | * | 49 | * |
49 | * The instruction order below is devised in a way to utilize the pipelining | 50 | * The instruction order is devised in a way to utilize the pipelining |
50 | * of the SH1 to the max. The routine also tries to utilize fast page mode. | 51 | * of the SH1 to the max. The routine also tries to utilize fast page mode. |
51 | */ | 52 | */ |
52 | 53 | ||
53 | _memcpy: | 54 | _memcpy: |
54 | mov r4,r7 /* store dest for returning */ | 55 | mov r4,r7 /* store dest for returning */ |
56 | ___memcpy_fwd_entry: | ||
55 | add #-8,r4 /* offset for early increment (max. 2 longs) */ | 57 | add #-8,r4 /* offset for early increment (max. 2 longs) */ |
56 | mov #11,r0 | 58 | mov #11,r0 |
57 | cmp/hs r0,r6 /* at least 11 bytes to copy? (ensures 2 aligned longs) */ | 59 | cmp/hs r0,r6 /* at least 11 bytes to copy? (ensures 2 aligned longs) */ |
@@ -99,7 +101,7 @@ _memcpy: | |||
99 | mov.l r0,@-r4 /* store second long */ | 101 | mov.l r0,@-r4 /* store second long */ |
100 | mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */ | 102 | mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */ |
101 | bt .loop_do0 | 103 | bt .loop_do0 |
102 | 104 | ||
103 | add #4,r3 /* readjust end address */ | 105 | add #4,r3 /* readjust end address */ |
104 | cmp/hi r5,r3 /* one long left? */ | 106 | cmp/hi r5,r3 /* one long left? */ |
105 | bf .start_b2 /* no, jump to trailing byte loop */ | 107 | bf .start_b2 /* no, jump to trailing byte loop */ |
@@ -148,20 +150,20 @@ _memcpy: | |||
148 | mov.l @r5+,r1 /* load first long & increment source addr */ | 150 | mov.l @r5+,r1 /* load first long & increment source addr */ |
149 | add #16,r4 /* increment dest addr */ | 151 | add #16,r4 /* increment dest addr */ |
150 | mov.l @r5+,r0 /* load second long & increment source addr */ | 152 | mov.l @r5+,r0 /* load second long & increment source addr */ |
151 | mov r1,r2 /* copy first long */ | 153 | cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ |
152 | mov.b r0,@-r4 /* store low byte of second long */ | 154 | mov.b r0,@-r4 /* store low byte of second long */ |
153 | shlr8 r0 /* get upper 3 bytes */ | 155 | shlr8 r0 /* get upper 3 bytes */ |
156 | mov r1,r2 /* copy first long */ | ||
154 | shll16 r2 /* move low byte of first long all the way up, .. */ | 157 | shll16 r2 /* move low byte of first long all the way up, .. */ |
155 | shll8 r2 | 158 | shll8 r2 |
156 | or r2,r0 /* ..combine with the 3 bytes of second long.. */ | 159 | or r2,r0 /* ..combine with the 3 bytes of second long.. */ |
157 | cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ | ||
158 | mov.l r0,@-r4 /* ..and store as long */ | 160 | mov.l r0,@-r4 /* ..and store as long */ |
159 | shlr8 r1 /* get middle 2 bytes */ | 161 | shlr8 r1 /* get middle 2 bytes */ |
160 | mov.w r1,@-r4 /* store as word */ | 162 | mov.w r1,@-r4 /* store as word */ |
161 | shlr16 r1 /* get upper byte */ | 163 | shlr16 r1 /* get upper byte */ |
162 | mov.b r1,@-r4 /* and store */ | 164 | mov.b r1,@-r4 /* and store */ |
163 | bt .loop_do1 | 165 | bt .loop_do1 |
164 | 166 | ||
165 | add #4,r3 /* readjust end address */ | 167 | add #4,r3 /* readjust end address */ |
166 | .last_do13: | 168 | .last_do13: |
167 | cmp/hi r5,r3 /* one long left? */ | 169 | cmp/hi r5,r3 /* one long left? */ |
@@ -218,6 +220,7 @@ _memcpy: | |||
218 | #define FULLSPEED /* use burst writing for word aligned destinations */ | 220 | #define FULLSPEED /* use burst writing for word aligned destinations */ |
219 | .align 2 | 221 | .align 2 |
220 | .global memcpy | 222 | .global memcpy |
223 | .global __memcpy_fwd_entry | ||
221 | .type memcpy,@function | 224 | .type memcpy,@function |
222 | 225 | ||
223 | /* Copies <length> bytes of data in memory from <source> to <dest> | 226 | /* Copies <length> bytes of data in memory from <source> to <dest> |
@@ -249,7 +252,9 @@ memcpy: | |||
249 | move.l (4,%sp),%a1 /* Destination */ | 252 | move.l (4,%sp),%a1 /* Destination */ |
250 | move.l (8,%sp),%a0 /* Source */ | 253 | move.l (8,%sp),%a0 /* Source */ |
251 | move.l (12,%sp),%d1 /* Length */ | 254 | move.l (12,%sp),%d1 /* Length */ |
252 | add.l %a0,%d1 /* %d1 = end address */ | 255 | |
256 | __memcpy_fwd_entry: | ||
257 | add.l %a0,%d1 /* %d1 = source end */ | ||
253 | 258 | ||
254 | move.l %a0,%d0 | 259 | move.l %a0,%d0 |
255 | addq.l #7,%d0 | 260 | addq.l #7,%d0 |
@@ -278,7 +283,7 @@ memcpy: | |||
278 | movem.l %d2-%d7/%a2,(%sp) | 283 | movem.l %d2-%d7/%a2,(%sp) |
279 | 284 | ||
280 | moveq.l #16,%d2 | 285 | moveq.l #16,%d2 |
281 | sub.l %d2,%d0 /* %d0 = first source long bound */ | 286 | sub.l %d2,%d0 /* %d0 = first source line bound */ |
282 | move.l %d1,%a2 /* %a2 = end address */ | 287 | move.l %d1,%a2 /* %a2 = end address */ |
283 | lea.l (-15,%a2),%a2 /* adjust end address for loops doing 16 bytes/ pass */ | 288 | lea.l (-15,%a2),%a2 /* adjust end address for loops doing 16 bytes/ pass */ |
284 | move.l %a1,%d1 | 289 | move.l %a1,%d1 |
@@ -507,7 +512,7 @@ memcpy: | |||
507 | lea.l (12,%a2),%a2 /* readjust end address for doing longwords */ | 512 | lea.l (12,%a2),%a2 /* readjust end address for doing longwords */ |
508 | cmp.l %a0,%a2 /* any trailing longwords? */ | 513 | cmp.l %a0,%a2 /* any trailing longwords? */ |
509 | jls .lines_end /* no: get outta here */ | 514 | jls .lines_end /* no: get outta here */ |
510 | 515 | ||
511 | .lines_do0_tail_loop: | 516 | .lines_do0_tail_loop: |
512 | move.l (%a0)+,(%a1)+ /* copy longword */ | 517 | move.l (%a0)+,(%a1)+ /* copy longword */ |
513 | cmp.l %a0,%a2 /* runs %a0 up to last long bound */ | 518 | cmp.l %a0,%a2 /* runs %a0 up to last long bound */ |
@@ -610,7 +615,7 @@ memcpy: | |||
610 | /* word aligned destination (line + 14): use line bursts in the loop */ | 615 | /* word aligned destination (line + 14): use line bursts in the loop */ |
611 | .lines_lo14_start: | 616 | .lines_lo14_start: |
612 | movem.l (%a0),%d4-%d7 /* load first line */ | 617 | movem.l (%a0),%d4-%d7 /* load first line */ |
613 | lea.l (16,%a0),%a0 | 618 | add.l %d0,%a0 |
614 | swap %d4 /* swap words of 1st long */ | 619 | swap %d4 /* swap words of 1st long */ |
615 | move.w %d4,(%a1)+ /* store word */ | 620 | move.w %d4,(%a1)+ /* store word */ |
616 | jra .lines_lo14_entry /* jump into main loop */ | 621 | jra .lines_lo14_entry /* jump into main loop */ |
@@ -784,7 +789,7 @@ memcpy: | |||
784 | move.l (%a0)+,%d7 /* load first longword */ | 789 | move.l (%a0)+,%d7 /* load first longword */ |
785 | swap %d7 /* swap words */ | 790 | swap %d7 /* swap words */ |
786 | move.w %d7,(%a1)+ /* store high word */ | 791 | move.w %d7,(%a1)+ /* store high word */ |
787 | cmp.l %a0,%d0 /* any full lnogword? */ | 792 | cmp.l %a0,%d0 /* any full longword? */ |
788 | jls .lines_do2_loop /* no: skip head loop */ | 793 | jls .lines_do2_loop /* no: skip head loop */ |
789 | 794 | ||
790 | .lines_do2_head_loop: | 795 | .lines_do2_head_loop: |
diff --git a/firmware/common/memmove.c b/firmware/common/memmove.c new file mode 100644 index 0000000000..761e9eb104 --- /dev/null +++ b/firmware/common/memmove.c | |||
@@ -0,0 +1,148 @@ | |||
1 | /* | ||
2 | FUNCTION | ||
3 | <<memmove>>---move possibly overlapping memory | ||
4 | |||
5 | INDEX | ||
6 | memmove | ||
7 | |||
8 | ANSI_SYNOPSIS | ||
9 | #include <string.h> | ||
10 | void *memmove(void *<[dst]>, const void *<[src]>, size_t <[length]>); | ||
11 | |||
12 | TRAD_SYNOPSIS | ||
13 | #include <string.h> | ||
14 | void *memmove(<[dst]>, <[src]>, <[length]>) | ||
15 | void *<[dst]>; | ||
16 | void *<[src]>; | ||
17 | size_t <[length]>; | ||
18 | |||
19 | DESCRIPTION | ||
20 | This function moves <[length]> characters from the block of | ||
21 | memory starting at <<*<[src]>>> to the memory starting at | ||
22 | <<*<[dst]>>>. <<memmove>> reproduces the characters correctly | ||
23 | at <<*<[dst]>>> even if the two areas overlap. | ||
24 | |||
25 | |||
26 | RETURNS | ||
27 | The function returns <[dst]> as passed. | ||
28 | |||
29 | PORTABILITY | ||
30 | <<memmove>> is ANSI C. | ||
31 | |||
32 | <<memmove>> requires no supporting OS subroutines. | ||
33 | |||
34 | QUICKREF | ||
35 | memmove ansi pure | ||
36 | */ | ||
37 | |||
38 | #include "config.h" | ||
39 | #include <_ansi.h> | ||
40 | #include <stddef.h> | ||
41 | #include <limits.h> | ||
42 | |||
43 | /* Nonzero if either X or Y is not aligned on a "long" boundary. */ | ||
44 | #define UNALIGNED(X, Y) \ | ||
45 | (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1))) | ||
46 | |||
47 | /* How many bytes are copied each iteration of the 4X unrolled loop. */ | ||
48 | #define BIGBLOCKSIZE (sizeof (long) << 2) | ||
49 | |||
50 | /* How many bytes are copied each iteration of the word copy loop. */ | ||
51 | #define LITTLEBLOCKSIZE (sizeof (long)) | ||
52 | |||
53 | /* Threshhold for punting to the byte copier. */ | ||
54 | #define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE) | ||
55 | |||
56 | _PTR | ||
57 | _DEFUN (memmove, (dst_void, src_void, length), | ||
58 | _PTR dst_void _AND | ||
59 | _CONST _PTR src_void _AND | ||
60 | size_t length) ICODE_ATTR; | ||
61 | |||
62 | _PTR | ||
63 | _DEFUN (memmove, (dst_void, src_void, length), | ||
64 | _PTR dst_void _AND | ||
65 | _CONST _PTR src_void _AND | ||
66 | size_t length) | ||
67 | { | ||
68 | #if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) | ||
69 | char *dst = dst_void; | ||
70 | _CONST char *src = src_void; | ||
71 | |||
72 | if (src < dst && dst < src + length) | ||
73 | { | ||
74 | /* Have to copy backwards */ | ||
75 | src += length; | ||
76 | dst += length; | ||
77 | while (length--) | ||
78 | { | ||
79 | *--dst = *--src; | ||
80 | } | ||
81 | } | ||
82 | else | ||
83 | { | ||
84 | while (length--) | ||
85 | { | ||
86 | *dst++ = *src++; | ||
87 | } | ||
88 | } | ||
89 | |||
90 | return dst_void; | ||
91 | #else | ||
92 | char *dst = dst_void; | ||
93 | _CONST char *src = src_void; | ||
94 | long *aligned_dst; | ||
95 | _CONST long *aligned_src; | ||
96 | unsigned int len = length; | ||
97 | |||
98 | if (src < dst && dst < src + len) | ||
99 | { | ||
100 | /* Destructive overlap...have to copy backwards */ | ||
101 | src += len; | ||
102 | dst += len; | ||
103 | while (len--) | ||
104 | { | ||
105 | *--dst = *--src; | ||
106 | } | ||
107 | } | ||
108 | else | ||
109 | { | ||
110 | /* Use optimizing algorithm for a non-destructive copy to closely | ||
111 | match memcpy. If the size is small or either SRC or DST is unaligned, | ||
112 | then punt into the byte copy loop. This should be rare. */ | ||
113 | if (!TOO_SMALL(len) && !UNALIGNED (src, dst)) | ||
114 | { | ||
115 | aligned_dst = (long*)dst; | ||
116 | aligned_src = (long*)src; | ||
117 | |||
118 | /* Copy 4X long words at a time if possible. */ | ||
119 | while (len >= BIGBLOCKSIZE) | ||
120 | { | ||
121 | *aligned_dst++ = *aligned_src++; | ||
122 | *aligned_dst++ = *aligned_src++; | ||
123 | *aligned_dst++ = *aligned_src++; | ||
124 | *aligned_dst++ = *aligned_src++; | ||
125 | len -= BIGBLOCKSIZE; | ||
126 | } | ||
127 | |||
128 | /* Copy one long word at a time if possible. */ | ||
129 | while (len >= LITTLEBLOCKSIZE) | ||
130 | { | ||
131 | *aligned_dst++ = *aligned_src++; | ||
132 | len -= LITTLEBLOCKSIZE; | ||
133 | } | ||
134 | |||
135 | /* Pick up any residual with a byte copier. */ | ||
136 | dst = (char*)aligned_dst; | ||
137 | src = (char*)aligned_src; | ||
138 | } | ||
139 | |||
140 | while (len--) | ||
141 | { | ||
142 | *dst++ = *src++; | ||
143 | } | ||
144 | } | ||
145 | |||
146 | return dst_void; | ||
147 | #endif /* not PREFER_SIZE_OVER_SPEED */ | ||
148 | } | ||
diff --git a/firmware/common/memmove_a.S b/firmware/common/memmove_a.S new file mode 100755 index 0000000000..d7421333df --- /dev/null +++ b/firmware/common/memmove_a.S | |||
@@ -0,0 +1,869 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2006 by Jens Arnold | ||
11 | * | ||
12 | * All files in this archive are subject to the GNU General Public License. | ||
13 | * See the file COPYING in the source tree root for full license agreement. | ||
14 | * | ||
15 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
16 | * KIND, either express or implied. | ||
17 | * | ||
18 | ****************************************************************************/ | ||
19 | #include "config.h" | ||
20 | |||
21 | .section .icode,"ax",@progbits | ||
22 | |||
23 | #if CONFIG_CPU == SH7034 | ||
24 | .align 2 | ||
25 | .global _memmove | ||
26 | .type _memmove,@function | ||
27 | |||
28 | /* Moves <length> bytes of data in memory from <source> to <dest> | ||
29 | * Regions may overlap. | ||
30 | * This version is optimized for speed, and needs the corresponding memcpy | ||
31 | * implementation for the forward copy branch. | ||
32 | * | ||
33 | * arguments: | ||
34 | * r4 - destination address | ||
35 | * r5 - source address | ||
36 | * r6 - length | ||
37 | * | ||
38 | * return value: | ||
39 | * r0 - destination address (like ANSI version) | ||
40 | * | ||
41 | * register usage: | ||
42 | * r0 - data / scratch | ||
43 | * r1 - 2nd data / scratch | ||
44 | * r2 - scratch | ||
45 | * r3 - last long bound / adjusted start address (only if >= 11 bytes) | ||
46 | * r4 - current dest address | ||
47 | * r5 - source start address | ||
48 | * r6 - current source address | ||
49 | * | ||
50 | * The instruction order is devised in a way to utilize the pipelining | ||
51 | * of the SH1 to the max. The routine also tries to utilize fast page mode. | ||
52 | */ | ||
53 | |||
54 | _memmove: | ||
55 | cmp/hi r4,r5 /* source > destination */ | ||
56 | bf .backward /* no: backward copy */ | ||
57 | mov.l .memcpy_fwd,r0 | ||
58 | jmp @r0 | ||
59 | mov r4,r7 /* store dest for returning */ | ||
60 | |||
61 | .align 2 | ||
62 | .memcpy_fwd: | ||
63 | .long ___memcpy_fwd_entry | ||
64 | |||
65 | .backward: | ||
66 | add r6,r4 /* r4 = destination end */ | ||
67 | mov #11,r0 | ||
68 | cmp/hs r0,r6 /* at least 11 bytes to copy? (ensures 2 aligned longs) */ | ||
69 | add #-8,r5 /* adjust for late decrement (max. 2 longs) */ | ||
70 | add r5,r6 /* r6 = source end - 8 */ | ||
71 | bf .start_b2r /* no: jump directly to byte loop */ | ||
72 | |||
73 | mov #-4,r3 /* r3 = 0xfffffffc */ | ||
74 | and r6,r3 /* r3 = last source long bound */ | ||
75 | cmp/hi r3,r6 /* already aligned? */ | ||
76 | bf .end_b1r /* yes: skip leading byte loop */ | ||
77 | |||
78 | .loop_b1r: | ||
79 | mov.b @(7,r6),r0 /* load byte */ | ||
80 | add #-1,r6 /* decrement source addr */ | ||
81 | mov.b r0,@-r4 /* store byte */ | ||
82 | cmp/hi r3,r6 /* runs r6 down to last long bound */ | ||
83 | bt .loop_b1r | ||
84 | |||
85 | .end_b1r: | ||
86 | mov #3,r1 | ||
87 | and r4,r1 /* r1 = dest alignment offset */ | ||
88 | mova .jmptab_r,r0 | ||
89 | mov.b @(r0,r1),r1 /* select appropriate main loop.. */ | ||
90 | add r0,r1 | ||
91 | mov r5,r3 /* copy start adress to r3 */ | ||
92 | jmp @r1 /* ..and jump to it */ | ||
93 | add #7,r3 /* adjust end addr for main loops doing 2 longs/pass */ | ||
94 | |||
95 | /** main loops, copying 2 longs per pass to profit from fast page mode **/ | ||
96 | |||
97 | /* long aligned destination (fastest) */ | ||
98 | .align 2 | ||
99 | .loop_do0r: | ||
100 | mov.l @r6,r1 /* load first long */ | ||
101 | add #-8,r6 /* decrement source addr */ | ||
102 | mov.l @(12,r6),r0 /* load second long */ | ||
103 | cmp/hi r3,r6 /* runs r6 down to first or second long bound */ | ||
104 | mov.l r0,@-r4 /* store second long */ | ||
105 | mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */ | ||
106 | bt .loop_do0r | ||
107 | |||
108 | add #-4,r3 /* readjust end address */ | ||
109 | cmp/hi r3,r6 /* first long left? */ | ||
110 | bf .start_b2r /* no, jump to trailing byte loop */ | ||
111 | |||
112 | mov.l @(4,r6),r0 /* load first long */ | ||
113 | add #-4,r6 /* decrement source addr */ | ||
114 | bra .start_b2r /* jump to trailing byte loop */ | ||
115 | mov.l r0,@-r4 /* store first long */ | ||
116 | |||
117 | /* word aligned destination (long + 2) */ | ||
118 | .align 2 | ||
119 | .loop_do2r: | ||
120 | mov.l @r6,r1 /* load first long */ | ||
121 | add #-8,r6 /* decrement source addr */ | ||
122 | mov.l @(12,r6),r0 /* load second long */ | ||
123 | cmp/hi r3,r6 /* runs r6 down to first or second long bound */ | ||
124 | mov.w r0,@-r4 /* store low word of second long */ | ||
125 | xtrct r1,r0 /* extract low word of first long & high word of second long */ | ||
126 | mov.l r0,@-r4 /* and store as long */ | ||
127 | shlr16 r1 /* get high word of first long */ | ||
128 | mov.w r1,@-r4 /* and store it */ | ||
129 | bt .loop_do2r | ||
130 | |||
131 | add #-4,r3 /* readjust end address */ | ||
132 | cmp/hi r3,r6 /* first long left? */ | ||
133 | bf .start_b2r /* no, jump to trailing byte loop */ | ||
134 | |||
135 | mov.l @(4,r6),r0 /* load first long & decrement source addr */ | ||
136 | add #-4,r6 /* decrement source addr */ | ||
137 | mov.w r0,@-r4 /* store low word */ | ||
138 | shlr16 r0 /* get high word */ | ||
139 | bra .start_b2r /* jump to trailing byte loop */ | ||
140 | mov.w r0,@-r4 /* and store it */ | ||
141 | |||
142 | /* jumptable for loop selector */ | ||
143 | .align 2 | ||
144 | .jmptab_r: | ||
145 | .byte .loop_do0r - .jmptab_r /* placed in the middle because the SH1 */ | ||
146 | .byte .loop_do1r - .jmptab_r /* loads bytes sign-extended. Otherwise */ | ||
147 | .byte .loop_do2r - .jmptab_r /* the last loop would be out of reach */ | ||
148 | .byte .loop_do3r - .jmptab_r /* of the offset range. */ | ||
149 | |||
150 | /* byte aligned destination (long + 1) */ | ||
151 | .align 2 | ||
152 | .loop_do1r: | ||
153 | mov.l @r6,r1 /* load first long */ | ||
154 | add #-8,r6 /* decrement source addr */ | ||
155 | mov.l @(12,r6),r0 /* load second long */ | ||
156 | cmp/hi r3,r6 /* runs r6 down to first or second long bound */ | ||
157 | mov.b r0,@-r4 /* store low byte of second long */ | ||
158 | shlr8 r0 /* get upper 3 bytes */ | ||
159 | mov r1,r2 /* copy first long */ | ||
160 | shll16 r2 /* move low byte of first long all the way up, .. */ | ||
161 | shll8 r2 | ||
162 | or r2,r0 /* ..combine with the 3 bytes of second long.. */ | ||
163 | mov.l r0,@-r4 /* ..and store as long */ | ||
164 | shlr8 r1 /* get middle 2 bytes */ | ||
165 | mov.w r1,@-r4 /* store as word */ | ||
166 | shlr16 r1 /* get upper byte */ | ||
167 | mov.b r1,@-r4 /* and store */ | ||
168 | bt .loop_do1r | ||
169 | |||
170 | add #-4,r3 /* readjust end address */ | ||
171 | .last_do13r: | ||
172 | cmp/hi r3,r6 /* first long left? */ | ||
173 | bf .start_b2r /* no, jump to trailing byte loop */ | ||
174 | |||
175 | nop /* alignment */ | ||
176 | mov.l @(4,r6),r0 /* load first long */ | ||
177 | add #-4,r6 /* decrement source addr */ | ||
178 | mov.b r0,@-r4 /* store low byte */ | ||
179 | shlr8 r0 /* get middle 2 bytes */ | ||
180 | mov.w r0,@-r4 /* store as word */ | ||
181 | shlr16 r0 /* get upper byte */ | ||
182 | bra .start_b2r /* jump to trailing byte loop */ | ||
183 | mov.b r0,@-r4 /* and store */ | ||
184 | |||
185 | /* byte aligned destination (long + 3) */ | ||
186 | .align 2 | ||
187 | .loop_do3r: | ||
188 | mov.l @r6,r1 /* load first long */ | ||
189 | add #-8,r6 /* decrement source addr */ | ||
190 | mov.l @(12,r6),r0 /* load second long */ | ||
191 | mov r1,r2 /* copy first long */ | ||
192 | mov.b r0,@-r4 /* store low byte of second long */ | ||
193 | shlr8 r0 /* get middle 2 bytes */ | ||
194 | mov.w r0,@-r4 /* store as word */ | ||
195 | shlr16 r0 /* get upper byte */ | ||
196 | shll8 r2 /* move lower 3 bytes of first long one up.. */ | ||
197 | or r2,r0 /* ..combine with the 1 byte of second long.. */ | ||
198 | mov.l r0,@-r4 /* ..and store as long */ | ||
199 | shlr16 r1 /* get upper byte of first long */ | ||
200 | shlr8 r1 | ||
201 | cmp/hi r3,r6 /* runs r6 down to first or second long bound */ | ||
202 | mov.b r1,@-r4 /* ..and store */ | ||
203 | bt .loop_do3r | ||
204 | |||
205 | bra .last_do13r /* handle first longword: reuse routine for (long + 1) */ | ||
206 | add #-4,r3 /* readjust end address */ | ||
207 | |||
208 | /* trailing byte loop: copies 0..3 bytes (or all for < 11 in total) */ | ||
209 | .align 2 | ||
210 | .loop_b2r: | ||
211 | mov.b @(7,r6),r0 /* load byte */ | ||
212 | add #-1,r6 /* decrement source addr */ | ||
213 | mov.b r0,@-r4 /* store byte */ | ||
214 | .start_b2r: | ||
215 | cmp/hi r5,r6 /* runs r6 down to start address */ | ||
216 | bt .loop_b2r | ||
217 | |||
218 | rts | ||
219 | mov r4,r0 /* return dest start address */ | ||
220 | .end: | ||
221 | .size _memmove,.end-_memmove | ||
222 | #elif defined(CPU_COLDFIRE) | ||
223 | #define FULLSPEED /* use burst writing for word aligned destinations */ | ||
224 | .align 2 | ||
225 | .global memmove | ||
226 | .type memmove,@function | ||
227 | |||
228 | /* Moves <length> bytes of data in memory from <source> to <dest> | ||
229 | * Regions may overlap. | ||
230 | * This version is optimized for speed, and needs the corresponding memcpy | ||
231 | * implementation for the forward copy branch. | ||
232 | * | ||
233 | * arguments: | ||
234 | * (4,%sp) - destination address | ||
235 | * (8,%sp) - source address | ||
236 | * (12,%sp) - length | ||
237 | * | ||
238 | * return value: | ||
239 | * %d0 - destination address (like ANSI version) | ||
240 | * | ||
241 | * register usage: | ||
242 | * %a0 - current source address | ||
243 | * %a1 - current dest address | ||
244 | * %a2 - source start address (in line-copy loops) | ||
245 | * %d0 - source start address (byte and longword copy) / data / scratch | ||
246 | * %d1 - data / scratch | ||
247 | * %d2 - data / scratch | ||
248 | * %d3..%d7 - data | ||
249 | * | ||
250 | * For maximum speed this routine reads and writes whole lines using burst | ||
251 | * move (movem.l) where possible. For byte aligned destinations (long-1 and | ||
252 | * long-3) it writes longwords only. Same goes for word aligned destinations | ||
253 | * if FULLSPEED is undefined. | ||
254 | */ | ||
255 | memmove: | ||
256 | move.l (4,%sp),%a1 /* Destination */ | ||
257 | move.l (8,%sp),%a0 /* Source */ | ||
258 | move.l (12,%sp),%d1 /* Length */ | ||
259 | |||
260 | cmp.l %a0,%a1 | ||
261 | bhi.b .backward /* dest > src -> backward copy */ | ||
262 | jmp __memcpy_fwd_entry | ||
263 | |||
264 | .backward: | ||
265 | move.l %a0,%d0 /* %d0 = source start */ | ||
266 | add.l %d1,%a0 /* %a0 = source end */ | ||
267 | add.l %d1,%a1 /* %a1 = destination end */ | ||
268 | |||
269 | move.l %a0,%d1 | ||
270 | and.l #0xFFFFFFFC,%d1 /* %d1 = last source long bound */ | ||
271 | subq.l #4,%d1 | ||
272 | cmp.l %d0,%d1 /* at least one aligned longword to copy? */ | ||
273 | blo.w .bytes2r_start | ||
274 | |||
275 | addq.l #4,%d1 /* %d1 = last source long bound */ | ||
276 | cmp.l %d1,%a0 /* any bytes to copy */ | ||
277 | jls .bytes1r_end /* no: skip byte loop */ | ||
278 | |||
279 | /* leading byte loop: copies 0..3 bytes */ | ||
280 | .bytes1r_loop: | ||
281 | move.b -(%a0),-(%a1) /* copy byte */ | ||
282 | cmp.l %d1,%a0 /* runs %a0 down to last long bound */ | ||
283 | jhi .bytes1r_loop | ||
284 | |||
285 | .bytes1r_end: | ||
286 | moveq.l #-16,%d1 | ||
287 | add.l %a0,%d1 | ||
288 | and.l #0xFFFFFFF0,%d1 /* %d1 = last source line bound - 16 */ | ||
289 | cmp.l %d0,%d1 /* at least one aligned line to copy? */ | ||
290 | blo.w .longr_start /* no: jump to longword copy loop */ | ||
291 | |||
292 | lea.l (-28,%sp),%sp /* free up some registers */ | ||
293 | movem.l %d2-%d7/%a2,(%sp) | ||
294 | |||
295 | moveq.l #16,%d2 | ||
296 | add.l %d2,%d1 /* %d1 = last source line bound */ | ||
297 | move.l %d0,%a2 /* %a2 = start address */ | ||
298 | lea.l (15,%a2),%a2 /* adjust start address for loops doing 16 bytes/pass */ | ||
299 | move.l %a1,%d0 | ||
300 | moveq.l #3,%d2 /* mask */ | ||
301 | and.l %d2,%d0 | ||
302 | jmp.l (2,%pc,%d0.l*4) /* switch (dest_addr & 3) */ | ||
303 | bra.w .lines_do0r_start | ||
304 | bra.w .lines_do1r_start | ||
305 | bra.w .lines_do2r_start | ||
306 | /* bra.w .lines_do3r_start implicit */ | ||
307 | |||
308 | /* byte aligned destination (long - 1): use line burst reads in main loop */ | ||
309 | .lines_do3r_start: | ||
310 | moveq.l #24,%d0 /* shift count for shifting by 3 bytes */ | ||
311 | cmp.l %d1,%a0 /* any leading longwords? */ | ||
312 | jhi .lines_do3r_head_start /* yes: leading longword copy */ | ||
313 | |||
314 | lea.l (-16,%a0),%a0 | ||
315 | movem.l (%a0),%d3-%d6 /* load initial line */ | ||
316 | move.l %d6,%d2 /* last longword, bytes 3210 */ | ||
317 | move.b %d2,-(%a1) /* store byte */ | ||
318 | lsr.l #8,%d2 /* last longword, bytes .321 */ | ||
319 | move.w %d2,-(%a1) /* store word */ | ||
320 | jra .lines_do3r_entry | ||
321 | |||
322 | .lines_do3r_head_start: | ||
323 | move.l -(%a0),%d3 /* load initial longword */ | ||
324 | move.l %d3,%d2 /* bytes 3210 */ | ||
325 | move.b %d2,-(%a1) /* store byte */ | ||
326 | lsr.l #8,%d2 /* bytes .321 */ | ||
327 | move.w %d2,-(%a1) /* store word */ | ||
328 | jra .lines_do3r_head_entry | ||
329 | |||
330 | .lines_do3r_head_loop: | ||
331 | move.l %d3,%d4 /* move old longword away */ | ||
332 | move.l -(%a0),%d3 /* load new longword */ | ||
333 | move.l %d3,%d2 | ||
334 | lsl.l #8,%d2 /* get bytes 210. */ | ||
335 | or.l %d2,%d4 /* combine with old high byte */ | ||
336 | move.l %d4,-(%a1) /* store longword */ | ||
337 | .lines_do3r_head_entry: | ||
338 | lsr.l %d0,%d3 /* shift down high byte */ | ||
339 | cmp.l %d1,%a0 /* run %a0 down to last line bound */ | ||
340 | jhi .lines_do3r_head_loop | ||
341 | |||
342 | .lines_do3r_loop: | ||
343 | move.l %d3,%d7 /* move first longword of last line away */ | ||
344 | lea.l (-16,%a0),%a0 | ||
345 | movem.l (%a0),%d3-%d6 /* load new line */ | ||
346 | move.l %d6,%d2 | ||
347 | lsl.l #8,%d2 /* get bytes 210. of 4th longword */ | ||
348 | or.l %d2,%d7 /* combine with high byte of old longword */ | ||
349 | move.l %d7,-(%a1) /* store longword */ | ||
350 | .lines_do3r_entry: | ||
351 | lsr.l %d0,%d6 /* shift down high byte */ | ||
352 | move.l %d5,%d2 | ||
353 | lsl.l #8,%d2 /* get bytes 210. of 3rd longword */ | ||
354 | or.l %d2,%d6 /* combine with high byte of 4th longword */ | ||
355 | move.l %d6,-(%a1) /* store longword */ | ||
356 | lsr.l %d0,%d5 /* shift down high byte */ | ||
357 | move.l %d4,%d2 | ||
358 | lsl.l #8,%d2 /* get bytes 210. of 2nd longword */ | ||
359 | or.l %d2,%d5 /* combine with high byte or 3rd longword */ | ||
360 | move.l %d5,-(%a1) /* store longword */ | ||
361 | lsr.l %d0,%d4 /* shift down high byte */ | ||
362 | move.l %d3,%d2 | ||
363 | lsl.l #8,%d2 /* get bytes 210. of 1st longword */ | ||
364 | or.l %d2,%d4 /* combine with high byte of 2nd longword */ | ||
365 | move.l %d4,-(%a1) /* store longword */ | ||
366 | lsr.l %d0,%d3 /* shift down high byte */ | ||
367 | cmp.l %a2,%a0 /* run %a0 down to first line bound */ | ||
368 | jhi .lines_do3r_loop | ||
369 | |||
370 | lea.l (-12,%a2),%a2 /* readjust start address for doing longwords */ | ||
371 | cmp.l %a2,%a0 /* any trailing longwords? */ | ||
372 | jls .lines_do3r_tail_end /* no: just store last high byte */ | ||
373 | |||
374 | .lines_do3r_tail_loop: | ||
375 | move.l %d3,%d4 /* move old longword away */ | ||
376 | move.l -(%a0),%d3 /* load new longword */ | ||
377 | move.l %d3,%d2 | ||
378 | lsl.l #8,%d2 /* get bytes 210. */ | ||
379 | or.l %d2,%d4 /* combine with old high byte */ | ||
380 | move.l %d4,-(%a1) /* store longword */ | ||
381 | lsr.l %d0,%d3 /* shift down high byte */ | ||
382 | cmp.l %a2,%a0 /* run %a0 down to first long bound */ | ||
383 | jhi .lines_do3r_tail_loop | ||
384 | |||
385 | .lines_do3r_tail_end: | ||
386 | move.b %d3,-(%a1) /* store shifted-down high byte */ | ||
387 | jra .linesr_end | ||
388 | |||
389 | /* byte aligned destination (long - 3): use line burst reads in main loop */ | ||
390 | .lines_do1r_start: | ||
391 | moveq.l #24,%d0 /* shift count for shifting by 3 bytes */ | ||
392 | cmp.l %d1,%a0 /* any leading longwords? */ | ||
393 | jhi .lines_do1r_head_start /* yes: leading longword copy */ | ||
394 | |||
395 | lea.l (-16,%a0),%a0 | ||
396 | movem.l (%a0),%d3-%d6 /* load initial line */ | ||
397 | move.b %d6,-(%a1) /* store low byte of last longword */ | ||
398 | jra .lines_do1r_entry | ||
399 | |||
400 | .lines_do1r_head_start: | ||
401 | move.l -(%a0),%d3 /* load initial longword */ | ||
402 | move.b %d3,-(%a1) /* store low byte */ | ||
403 | jra .lines_do1r_head_entry | ||
404 | |||
405 | .lines_do1r_head_loop: | ||
406 | move.l %d3,%d4 /* move old longword away */ | ||
407 | move.l -(%a0),%d3 /* load new longword */ | ||
408 | move.l %d3,%d2 | ||
409 | lsl.l %d0,%d2 /* get low byte */ | ||
410 | or.l %d2,%d4 /* combine with old bytes .321 */ | ||
411 | move.l %d4,-(%a1) /* store longword */ | ||
412 | .lines_do1r_head_entry: | ||
413 | lsr.l #8,%d3 /* get bytes .321 */ | ||
414 | cmp.l %d1,%a0 /* run %a0 down to last line bound */ | ||
415 | jhi .lines_do1r_head_loop | ||
416 | |||
417 | .lines_do1r_loop: | ||
418 | move.l %d3,%d7 /* move first longword of old line away */ | ||
419 | lea.l (-16,%a0),%a0 | ||
420 | movem.l (%a0),%d3-%d6 /* load new line */ | ||
421 | move.l %d6,%d2 | ||
422 | lsl.l %d0,%d2 /* get low byte of 4th longword */ | ||
423 | or.l %d2,%d7 /* combine with bytes .321 of old longword */ | ||
424 | move.l %d7,-(%a1) /* store longword */ | ||
425 | .lines_do1r_entry: | ||
426 | lsr.l #8,%d6 /* get bytes .321 */ | ||
427 | move.l %d5,%d2 | ||
428 | lsl.l %d0,%d2 /* get low byte of 3rd longword */ | ||
429 | or.l %d2,%d6 /* combine with bytes .321 of 4th longword */ | ||
430 | move.l %d6,-(%a1) /* store longword */ | ||
431 | lsr.l #8,%d5 /* get bytes .321 */ | ||
432 | move.l %d4,%d2 | ||
433 | lsl.l %d0,%d2 /* get low byte of 2nd longword */ | ||
434 | or.l %d2,%d5 /* combine with bytes .321 of 3rd longword */ | ||
435 | move.l %d5,-(%a1) /* store longword */ | ||
436 | lsr.l #8,%d4 /* get bytes .321 */ | ||
437 | move.l %d3,%d2 | ||
438 | lsl.l %d0,%d2 /* get low byte of 1st longword */ | ||
439 | or.l %d2,%d4 /* combine with bytes .321 of 2nd longword */ | ||
440 | move.l %d4,-(%a1) /* store longword */ | ||
441 | lsr.l #8,%d3 /* get bytes .321 */ | ||
442 | cmp.l %a2,%a0 /* run %a0 down to first line bound */ | ||
443 | jhi .lines_do1r_loop | ||
444 | |||
445 | lea.l (-12,%a2),%a2 /* readjust start address for doing longwords */ | ||
446 | cmp.l %a2,%a0 /* any trailing longwords? */ | ||
447 | jls .lines_do1r_tail_end /* no: just store last high byte */ | ||
448 | |||
449 | .lines_do1r_tail_loop: | ||
450 | move.l %d3,%d4 /* move old longword away */ | ||
451 | move.l -(%a0),%d3 /* load new longword */ | ||
452 | move.l %d3,%d2 | ||
453 | lsl.l %d0,%d2 /* get low byte */ | ||
454 | or.l %d2,%d4 /* combine with old bytes .321 */ | ||
455 | move.l %d4,-(%a1) /* store longword */ | ||
456 | lsr.l #8,%d3 /* get bytes .321 */ | ||
457 | cmp.l %a2,%a0 /* run %a0 down to first long bound */ | ||
458 | jhi .lines_do1r_tail_loop | ||
459 | |||
460 | .lines_do1r_tail_end: | ||
461 | move.w %d3,-(%a1) /* store word 21 */ | ||
462 | swap %d3 | ||
463 | move.b %d3,-(%a1) /* store byte 3 */ | ||
464 | jra .linesr_end | ||
465 | |||
466 | /* long aligned destination (line - 0/4/8/12): head */ | ||
467 | .lines_do0r_head_loop: | ||
468 | move.l -(%a0),-(%a1) /* copy longword */ | ||
469 | .lines_do0r_start: | ||
470 | cmp.l %d1,%a0 /* run %a0 down to last line bound */ | ||
471 | jhi .lines_do0r_head_loop | ||
472 | |||
473 | .lines_do0r_head_end: | ||
474 | move.l %a1,%d1 | ||
475 | lsr.l #2,%d1 | ||
476 | moveq.l #3,%d0 /* mask */ | ||
477 | and.l %d0,%d1 | ||
478 | moveq.l #16,%d0 /* address decrement for one main loop pass */ | ||
479 | jmp.l (2,%pc,%d1.l*2) /* switch ((dest_addr >> 2) & 3) */ | ||
480 | bra.b .lines_lo0r_start | ||
481 | bra.b .lines_lo4r_start | ||
482 | bra.b .lines_lo8r_start | ||
483 | /* bra.b .lines_lo12r_start implicit */ | ||
484 | |||
485 | /* long aligned destination (line - 4): use line bursts in the loop */ | ||
486 | .lines_lo12r_start: | ||
487 | sub.l %d0,%a0 | ||
488 | movem.l (%a0),%d1-%d4 /* load initial line */ | ||
489 | move.l %d4,-(%a1) /* store 4th longword */ | ||
490 | move.l %d3,-(%a1) /* store 3rd longword */ | ||
491 | move.l %d2,-(%a1) /* store 2nd longword */ | ||
492 | cmp.l %a2,%a0 /* any full lines? */ | ||
493 | jls .lines_lo12r_end /* no: skip main loop */ | ||
494 | |||
495 | .lines_lo12r_loop: | ||
496 | move.l %d1,%d5 /* move first longword of old line away */ | ||
497 | sub.l %d0,%a0 | ||
498 | movem.l (%a0),%d1-%d4 /* load new line */ | ||
499 | sub.l %d0,%a1 | ||
500 | movem.l %d2-%d5,(%a1) /* store line (1 old + 3 new longwords */ | ||
501 | cmp.l %a2,%a0 /* run %a0 down to first line bound */ | ||
502 | jhi .lines_lo12r_loop | ||
503 | |||
504 | jra .lines_lo12r_end /* handle trailing longwords */ | ||
505 | |||
506 | /* line aligned destination: use line bursts in the loop */ | ||
507 | .lines_lo0r_start: | ||
508 | .lines_lo0r_loop: | ||
509 | sub.l %d0,%a0 | ||
510 | movem.l (%a0),%d1-%d4 /* load line */ | ||
511 | sub.l %d0,%a1 | ||
512 | movem.l %d1-%d4,(%a1) /* store line */ | ||
513 | cmp.l %a2,%a0 /* run %a0 down to first line bound */ | ||
514 | jhi .lines_lo0r_loop | ||
515 | |||
516 | jra .lines_lo0r_end /* handle trailing longwords */ | ||
517 | |||
518 | /* long aligned destination (line - 8): use line bursts in the loop */ | ||
519 | .lines_lo8r_start: | ||
520 | sub.l %d0,%a0 | ||
521 | movem.l (%a0),%d1-%d4 /* load initial line */ | ||
522 | move.l %d4,-(%a1) /* store 4th longword */ | ||
523 | move.l %d3,-(%a1) /* store 3rd longword */ | ||
524 | cmp.l %a2,%a0 /* any full lines? */ | ||
525 | jls .lines_lo8r_end /* no: skip main loop */ | ||
526 | |||
527 | .lines_lo8r_loop: | ||
528 | move.l %d2,%d6 /* move first 2 longwords of old line away */ | ||
529 | move.l %d1,%d5 | ||
530 | sub.l %d0,%a0 | ||
531 | movem.l (%a0),%d1-%d4 /* load new line */ | ||
532 | sub.l %d0,%a1 | ||
533 | movem.l %d3-%d6,(%a1) /* store line (2 old + 2 new longwords */ | ||
534 | cmp.l %a2,%a0 /* run %a0 down to first line bound */ | ||
535 | jhi .lines_lo8r_loop | ||
536 | |||
537 | jra .lines_lo8r_end /* handle trailing longwords */ | ||
538 | |||
539 | /* long aligned destination (line - 12): use line bursts in the loop */ | ||
540 | .lines_lo4r_start: | ||
541 | sub.l %d0,%a0 | ||
542 | movem.l (%a0),%d1-%d4 /* load initial line */ | ||
543 | move.l %d4,-(%a1) /* store 4th longword */ | ||
544 | cmp.l %a2,%a0 /* any full lines? */ | ||
545 | jls .lines_lo4r_end /* no: skip main loop */ | ||
546 | |||
547 | .lines_lo4r_loop: | ||
548 | move.l %d3,%d7 /* move first 3 longwords of old line away */ | ||
549 | move.l %d2,%d6 | ||
550 | move.l %d1,%d5 | ||
551 | sub.l %d0,%a0 | ||
552 | movem.l (%a0),%d1-%d4 /* load new line */ | ||
553 | sub.l %d0,%a1 | ||
554 | movem.l %d4-%d7,(%a1) /* store line (3 old + 1 new longwords */ | ||
555 | cmp.l %a2,%a0 /* run %a0 down to first line bound */ | ||
556 | jhi .lines_lo4r_loop | ||
557 | |||
558 | /* long aligned destination (line - 0/4/8/12): tail */ | ||
559 | .lines_lo4r_end: | ||
560 | move.l %d3,-(%a1) /* store 3rd last longword */ | ||
561 | .lines_lo8r_end: | ||
562 | move.l %d2,-(%a1) /* store 2nd last longword */ | ||
563 | .lines_lo12r_end: | ||
564 | move.l %d1,-(%a1) /* store last longword */ | ||
565 | .lines_lo0r_end: | ||
566 | lea.l (-12,%a2),%a2 /* readjust end address for doing longwords */ | ||
567 | cmp.l %a2,%a0 /* any trailing longwords? */ | ||
568 | jls .linesr_end /* no: get outta here */ | ||
569 | |||
570 | .lines_do0r_tail_loop: | ||
571 | move.l -(%a0),-(%a1) /* copy longword */ | ||
572 | cmp.l %a2,%a0 /* run %a0 down to first long bound */ | ||
573 | jhi .lines_do0r_tail_loop | ||
574 | |||
575 | jra .linesr_end | ||
576 | |||
577 | #ifdef FULLSPEED | ||
578 | /* word aligned destination (line - 2/6/10/14): head */ | ||
579 | .lines_do2r_start: | ||
580 | cmp.l %d1,%a0 /* any leading longwords? */ | ||
581 | jls .lines_do2r_selector /* no: jump to mainloop selector */ | ||
582 | |||
583 | move.l -(%a0),%d3 /* load initial longword */ | ||
584 | move.w %d3,-(%a1) /* store low word */ | ||
585 | cmp.l %d1,%a0 /* any more longwords? */ | ||
586 | jls .lines_do2r_head_end /* no: skip head loop */ | ||
587 | |||
588 | .lines_do2r_head_loop: | ||
589 | move.l %d3,%d4 /* move old longword away */ | ||
590 | move.l -(%a0),%d3 /* load new longword */ | ||
591 | move.w %d3,%d4 /* combine low word with old high word */ | ||
592 | swap %d4 /* swap words */ | ||
593 | move.l %d4,-(%a1) /* store longword */ | ||
594 | cmp.l %d1,%a0 /* run %a0 down to last line bound */ | ||
595 | jhi .lines_do2r_head_loop | ||
596 | |||
597 | .lines_do2r_head_end: | ||
598 | swap %d3 /* get high word */ | ||
599 | move.w %d3,-(%a1) /* and store it */ | ||
600 | |||
601 | .lines_do2r_selector: | ||
602 | move.l %a1,%d1 | ||
603 | lsr.l #2,%d1 | ||
604 | moveq.l #3,%d0 /* mask */ | ||
605 | and.l %d0,%d1 | ||
606 | moveq.l #16,%d7 /* address decrement for one main loop pass */ | ||
607 | jmp.l (2,%pc,%d1.l*4) /* switch ((dest_addr >> 2) & 3) */ | ||
608 | bra.w .lines_lo2r_start | ||
609 | bra.w .lines_lo6r_start | ||
610 | bra.w .lines_lo10r_start | ||
611 | /* bra.w .lines_lo14r_start implicit */ | ||
612 | |||
613 | /* word aligned destination (line - 2): use line bursts in the loop */ | ||
614 | .lines_lo14r_start: | ||
615 | sub.l %d7,%a0 | ||
616 | movem.l (%a0),%d0-%d3 /* load initial line */ | ||
617 | move.w %d3,-(%a1) /* store last low word */ | ||
618 | move.w %d2,%d3 /* combine 3rd low word with 4th high word */ | ||
619 | swap %d3 /* swap words of 3rd long */ | ||
620 | move.w %d1,%d2 /* combine 2nd low word with 3rd high word */ | ||
621 | swap %d2 /* swap words of 2nd long */ | ||
622 | move.w %d0,%d1 /* combine 1st low word with 2nd high word */ | ||
623 | swap %d1 /* swap words of 1st long */ | ||
624 | move.l %d3,-(%a1) /* store 3rd longword */ | ||
625 | move.l %d2,-(%a1) /* store 2nd longword */ | ||
626 | move.l %d1,-(%a1) /* store 1st longword */ | ||
627 | cmp.l %a2,%a0 /* any full lines? */ | ||
628 | jls .lines_lo14r_end /* no: skip main loop */ | ||
629 | |||
630 | .lines_lo14r_loop: | ||
631 | move.l %d0,%d4 /* move first longword of old line away */ | ||
632 | sub.l %d7,%a0 | ||
633 | movem.l (%a0),%d0-%d3 /* load line */ | ||
634 | move.w %d3,%d4 /* combine 4th low word with old high word */ | ||
635 | swap %d4 /* swap words of 4th long */ | ||
636 | move.w %d2,%d3 /* combine 3rd low word with 4th high word */ | ||
637 | swap %d3 /* swap words of 3rd long */ | ||
638 | move.w %d1,%d2 /* combine 2nd low word with 3rd high word */ | ||
639 | swap %d2 /* swap words of 2nd long */ | ||
640 | move.w %d0,%d1 /* combine 1st low word with 2nd high word */ | ||
641 | swap %d1 /* swap words of 1st long */ | ||
642 | sub.l %d7,%a1 | ||
643 | movem.l %d1-%d4,(%a1) /* store line */ | ||
644 | cmp.l %a2,%a0 /* run %a0 down to first line bound */ | ||
645 | jhi .lines_lo14r_loop | ||
646 | |||
647 | jra .lines_lo14r_end /* handle trailing longwords */ | ||
648 | |||
649 | /* word aligned destination (line - 6): use line bursts in the loop */ | ||
650 | .lines_lo10r_start: | ||
651 | sub.l %d7,%a0 | ||
652 | movem.l (%a0),%d0-%d3 /* load initial line */ | ||
653 | move.w %d3,-(%a1) /* store last low word */ | ||
654 | move.w %d2,%d3 /* combine 3rd low word with 4th high word */ | ||
655 | swap %d3 /* swap words of 3rd long */ | ||
656 | move.w %d1,%d2 /* combine 2nd low word with 3rd high word */ | ||
657 | swap %d2 /* swap words of 2nd long */ | ||
658 | move.l %d3,-(%a1) /* store 3rd longword */ | ||
659 | move.l %d2,-(%a1) /* store 2nd longword */ | ||
660 | jra .lines_lo10r_entry /* jump into main loop */ | ||
661 | |||
662 | .lines_lo10r_loop: | ||
663 | move.l %d0,%d4 /* move first 2 longwords of old line away */ | ||
664 | move.l %d1,%d5 | ||
665 | sub.l %d7,%a0 | ||
666 | movem.l (%a0),%d0-%d3 /* load line */ | ||
667 | move.w %d3,%d4 /* combine 4th low word with old high word */ | ||
668 | swap %d4 /* swap words of 4th long */ | ||
669 | move.w %d2,%d3 /* combine 3rd low word with 4th high word */ | ||
670 | swap %d3 /* swap words of 3rd long */ | ||
671 | move.w %d1,%d2 /* combine 2nd low word with 3rd high word */ | ||
672 | swap %d2 /* swap words of 2nd long */ | ||
673 | sub.l %d7,%a1 | ||
674 | movem.l %d2-%d5,(%a1) /* store line */ | ||
675 | .lines_lo10r_entry: | ||
676 | move.w %d0,%d1 /* combine 1st low word with 2nd high word */ | ||
677 | swap %d1 /* swap words of 1st long */ | ||
678 | cmp.l %a2,%a0 /* run %a0 down to first line bound */ | ||
679 | jhi .lines_lo10r_loop | ||
680 | |||
681 | jra .lines_lo10r_end /* handle trailing longwords */ | ||
682 | |||
683 | /* word aligned destination (line - 10): use line bursts in the loop */ | ||
684 | .lines_lo6r_start: | ||
685 | sub.l %d7,%a0 | ||
686 | movem.l (%a0),%d0-%d3 /* load initial line */ | ||
687 | move.w %d3,-(%a1) /* store last low word */ | ||
688 | move.w %d2,%d3 /* combine 3rd low word with 4th high word */ | ||
689 | swap %d3 /* swap words of 3rd long */ | ||
690 | move.l %d3,-(%a1) /* store 3rd longword */ | ||
691 | jra .lines_lo6r_entry /* jump into main loop */ | ||
692 | |||
693 | .lines_lo6r_loop: | ||
694 | move.l %d0,%d4 /* move first 3 longwords of old line away */ | ||
695 | move.l %d1,%d5 | ||
696 | move.l %d2,%d6 | ||
697 | sub.l %d7,%a0 | ||
698 | movem.l (%a0),%d0-%d3 /* load line */ | ||
699 | move.w %d3,%d4 /* combine 4th low word with old high word */ | ||
700 | swap %d4 /* swap words of 4th long */ | ||
701 | move.w %d2,%d3 /* combine 3rd low word with 4th high word */ | ||
702 | swap %d3 /* swap words of 3rd long */ | ||
703 | sub.l %d7,%a1 | ||
704 | movem.l %d3-%d6,(%a1) /* store line */ | ||
705 | .lines_lo6r_entry: | ||
706 | move.w %d1,%d2 /* combine 2nd low word with 3rd high word */ | ||
707 | swap %d2 /* swap words of 2nd long */ | ||
708 | move.w %d0,%d1 /* combine 1st low word with 2nd high word */ | ||
709 | swap %d1 /* swap words of 1st long */ | ||
710 | cmp.l %a2,%a0 /* run %a0 down to first line bound */ | ||
711 | jhi .lines_lo6r_loop | ||
712 | |||
713 | jra .lines_lo6r_end /* handle trailing longwords */ | ||
714 | |||
715 | /* word aligned destination (line - 14): use line bursts in the loop */ | ||
716 | .lines_lo2r_start: | ||
717 | sub.l %d7,%a0 | ||
718 | movem.l (%a0),%d0-%d3 /* load initial line */ | ||
719 | move.w %d3,-(%a1) /* store last low word */ | ||
720 | jra .lines_lo2r_entry /* jump into main loop */ | ||
721 | |||
722 | .lines_lo2r_loop: | ||
723 | move.l %d0,%d4 /* move old line away */ | ||
724 | move.l %d1,%d5 | ||
725 | move.l %d2,%d6 | ||
726 | move.l %d3,%d7 | ||
727 | lea.l (-16,%a0),%a0 | ||
728 | movem.l (%a0),%d0-%d3 /* load line */ | ||
729 | move.w %d3,%d4 /* combine 4th low word with old high word */ | ||
730 | swap %d4 /* swap words of 4th long */ | ||
731 | lea.l (-16,%a1),%a1 | ||
732 | movem.l %d4-%d7,(%a1) /* store line */ | ||
733 | .lines_lo2r_entry: | ||
734 | move.w %d2,%d3 /* combine 3rd low word with 4th high word */ | ||
735 | swap %d3 /* swap words of 3rd long */ | ||
736 | move.w %d1,%d2 /* combine 2nd low word with 3rd high word */ | ||
737 | swap %d2 /* swap words of 2nd long */ | ||
738 | move.w %d0,%d1 /* combine 1st low word with 2nd high word */ | ||
739 | swap %d1 /* swap words of 1st long */ | ||
740 | cmp.l %a2,%a0 /* run %a0 down to first line bound */ | ||
741 | jhi .lines_lo2r_loop | ||
742 | |||
743 | /* word aligned destination (line - 2/6/10/14): tail */ | ||
744 | .lines_lo2r_end: | ||
745 | move.l %d3,-(%a1) /* store third last longword */ | ||
746 | .lines_lo6r_end: | ||
747 | move.l %d2,-(%a1) /* store second last longword */ | ||
748 | .lines_lo10r_end: | ||
749 | move.l %d1,-(%a1) /* store last longword */ | ||
750 | .lines_lo14r_end: | ||
751 | lea.l (-12,%a2),%a2 /* readjust start address for doing longwords */ | ||
752 | cmp.l %a2,%a0 /* any trailing longwords? */ | ||
753 | jls .lines_do2r_tail_end /* no: skip tail loop */ | ||
754 | |||
755 | .lines_do2r_tail_loop: | ||
756 | move.l %d0,%d1 /* move old longword away */ | ||
757 | move.l -(%a0),%d0 /* load new longword */ | ||
758 | move.w %d0,%d1 /* combine low word with old high word */ | ||
759 | swap %d1 /* swap words */ | ||
760 | move.l %d1,-(%a1) /* store longword */ | ||
761 | cmp.l %a2,%a0 /* run %a0 down to first long bound */ | ||
762 | jhi .lines_do2r_tail_loop | ||
763 | |||
764 | .lines_do2r_tail_end: | ||
765 | swap %d0 /* get final high word */ | ||
766 | move.w %d0,-(%a1) /* store it */ | ||
767 | /* jra .linesr_end implicit */ | ||
768 | |||
769 | #else /* !FULLSPEED */ | ||
770 | |||
771 | /* word aligned destination (long - 2): use line burst reads in the loop */ | ||
772 | .lines_do2r_start: | ||
773 | cmp.l %d1,%a0 /* any leading longwords? */ | ||
774 | jhi .lines_do2r_head_start /* yes: leading longword copy */ | ||
775 | |||
776 | lea.l (-16,%a0),%a0 | ||
777 | movem.l (%a0),%d3-%d6 /* load initial line */ | ||
778 | move.w %d6,-(%a1) /* store last low word */ | ||
779 | jra .lines_do2r_entry /* jump into main loop */ | ||
780 | |||
781 | .lines_do2r_head_start: | ||
782 | move.l -(%a0),%d3 /* load initial longword */ | ||
783 | move.w %d3,-(%a1) /* store low word */ | ||
784 | cmp.l %d1,%a0 /* any full longword? */ | ||
785 | jls .lines_do2r_loop /* no: skip head loop */ | ||
786 | |||
787 | .lines_do2r_head_loop: | ||
788 | move.l %d3,%d4 /* move old longword away */ | ||
789 | move.l -(%a0),%d3 /* load new longword */ | ||
790 | move.w %d3,%d4 /* combine low word with old high word */ | ||
791 | swap %d4 /* swap words */ | ||
792 | move.l %d4,-(%a1) /* store longword */ | ||
793 | cmp.l %d1,%a0 /* run %a0 down to last line bound */ | ||
794 | jhi .lines_do2r_head_loop | ||
795 | |||
796 | .lines_do2r_loop: | ||
797 | move.l %d3,%d7 /* move first longword of old line away */ | ||
798 | lea.l (-16,%a0),%a0 | ||
799 | movem.l (%a0),%d3-%d6 /* load line */ | ||
800 | move.w %d6,%d7 /* combine 4th low word with old high word */ | ||
801 | swap %d7 /* swap words of 4th long */ | ||
802 | move.l %d7,-(%a1) /* store 4th longword */ | ||
803 | .lines_do2r_entry: | ||
804 | move.w %d5,%d6 /* combine 3rd low word with 4th high word */ | ||
805 | swap %d6 /* swap words of 3rd long */ | ||
806 | move.l %d6,-(%a1) /* store 3rd longword */ | ||
807 | move.w %d4,%d5 /* combine 2nd low word with 3rd high word */ | ||
808 | swap %d5 /* swap words of 2nd long */ | ||
809 | move.l %d5,-(%a1) /* store 2nd longword */ | ||
810 | move.w %d3,%d4 /* combine 1st low word with 2nd high word */ | ||
811 | swap %d4 /* swap words of 1st long */ | ||
812 | move.l %d4,-(%a1) /* store 1st longword */ | ||
813 | cmp.l %a2,%a0 /* run %a0 down to first line bound */ | ||
814 | jhi .lines_do2r_loop | ||
815 | |||
816 | .lines_do2r_end: | ||
817 | lea.l (-12,%a2),%a2 /* readjust start address for doing longwords */ | ||
818 | cmp.l %a2,%a0 /* any trailing longwords? */ | ||
819 | jls .lines_do2r_tail_end /* no: skip tail loop */ | ||
820 | |||
821 | .lines_do2r_tail_loop: | ||
822 | move.l %d3,%d4 /* move old longword away */ | ||
823 | move.l -(%a0),%d3 /* load new longword */ | ||
824 | move.w %d3,%d4 /* combine low word with old high word */ | ||
825 | swap %d4 /* swap words */ | ||
826 | move.l %d4,-(%a1) /* store longword */ | ||
827 | cmp.l %a2,%a0 /* run %a0 down to first long bound */ | ||
828 | jhi .lines_do2r_tail_loop | ||
829 | |||
830 | .lines_do2r_tail_end: | ||
831 | swap %d3 /* get final high word */ | ||
832 | move.w %d3,-(%a1) /* store it */ | ||
833 | /* jra .linesr_end implicit */ | ||
834 | |||
835 | #endif /* !FULLSPEED */ | ||
836 | |||
837 | .linesr_end: | ||
838 | subq.l #3,%a2 /* readjust end address */ | ||
839 | move.l %a2,%d0 /* start address in %d0 again */ | ||
840 | movem.l (%sp),%d2-%d7/%a2 /* restore registers */ | ||
841 | lea.l (28,%sp),%sp | ||
842 | jra .bytes2r_start /* jump to trailing byte loop */ | ||
843 | |||
844 | .longr_start: | ||
845 | addq.l #3,%d0 /* adjust start address for doing 4 bytes/ pass */ | ||
846 | |||
847 | /* longword copy loop - no lines */ | ||
848 | .longr_loop: | ||
849 | move.l -(%a0),-(%a1) /* copy longword (write can be unaligned) */ | ||
850 | cmp.l %d0,%a0 /* runs %a0 down to first long bound */ | ||
851 | jhi .longr_loop | ||
852 | |||
853 | subq.l #3,%d0 /* readjust start address */ | ||
854 | cmp.l %d0,%a0 /* any bytes left? */ | ||
855 | jls .bytes2r_end /* no: skip trailing byte loop */ | ||
856 | |||
857 | /* trailing byte loop */ | ||
858 | .bytes2r_loop: | ||
859 | move.b -(%a0),-(%a1) /* copy byte */ | ||
860 | .bytes2r_start: | ||
861 | cmp.l %d0,%a0 /* runs %a0 down to start address */ | ||
862 | jhi .bytes2r_loop | ||
863 | |||
864 | .bytes2r_end: | ||
865 | rts /* returns start address */ | ||
866 | |||
867 | .end: | ||
868 | .size memmove,.end-memmove | ||
869 | #endif | ||