summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--firmware/SOURCES2
-rw-r--r--firmware/common/memcpy_a.S25
-rw-r--r--firmware/common/memmove.c148
-rwxr-xr-xfirmware/common/memmove_a.S869
4 files changed, 1034 insertions, 10 deletions
diff --git a/firmware/SOURCES b/firmware/SOURCES
index 4e32266654..880d03aadd 100644
--- a/firmware/SOURCES
+++ b/firmware/SOURCES
@@ -38,9 +38,11 @@ common/strtok.c
38common/timefuncs.c 38common/timefuncs.c
39#if (CONFIG_CPU == SH7034) || defined(CPU_COLDFIRE) 39#if (CONFIG_CPU == SH7034) || defined(CPU_COLDFIRE)
40common/memcpy_a.S 40common/memcpy_a.S
41common/memmove_a.S
41common/memset_a.S 42common/memset_a.S
42#else 43#else
43common/memcpy.c 44common/memcpy.c
45common/memmove.c
44common/memset.c 46common/memset.c
45#endif 47#endif
46#ifdef HAVE_LCD_CHARCELLS 48#ifdef HAVE_LCD_CHARCELLS
diff --git a/firmware/common/memcpy_a.S b/firmware/common/memcpy_a.S
index 7264c964a4..9f6c813be3 100644
--- a/firmware/common/memcpy_a.S
+++ b/firmware/common/memcpy_a.S
@@ -23,6 +23,7 @@
23#if CONFIG_CPU == SH7034 23#if CONFIG_CPU == SH7034
24 .align 2 24 .align 2
25 .global _memcpy 25 .global _memcpy
26 .global ___memcpy_fwd_entry
26 .type _memcpy,@function 27 .type _memcpy,@function
27 28
28/* Copies <length> bytes of data in memory from <source> to <dest> 29/* Copies <length> bytes of data in memory from <source> to <dest>
@@ -46,12 +47,13 @@
46 * r6 - source end address 47 * r6 - source end address
47 * r7 - stored dest start address 48 * r7 - stored dest start address
48 * 49 *
49 * The instruction order below is devised in a way to utilize the pipelining 50 * The instruction order is devised in a way to utilize the pipelining
50 * of the SH1 to the max. The routine also tries to utilize fast page mode. 51 * of the SH1 to the max. The routine also tries to utilize fast page mode.
51 */ 52 */
52 53
53_memcpy: 54_memcpy:
54 mov r4,r7 /* store dest for returning */ 55 mov r4,r7 /* store dest for returning */
56___memcpy_fwd_entry:
55 add #-8,r4 /* offset for early increment (max. 2 longs) */ 57 add #-8,r4 /* offset for early increment (max. 2 longs) */
56 mov #11,r0 58 mov #11,r0
57 cmp/hs r0,r6 /* at least 11 bytes to copy? (ensures 2 aligned longs) */ 59 cmp/hs r0,r6 /* at least 11 bytes to copy? (ensures 2 aligned longs) */
@@ -99,7 +101,7 @@ _memcpy:
99 mov.l r0,@-r4 /* store second long */ 101 mov.l r0,@-r4 /* store second long */
100 mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */ 102 mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */
101 bt .loop_do0 103 bt .loop_do0
102 104
103 add #4,r3 /* readjust end address */ 105 add #4,r3 /* readjust end address */
104 cmp/hi r5,r3 /* one long left? */ 106 cmp/hi r5,r3 /* one long left? */
105 bf .start_b2 /* no, jump to trailing byte loop */ 107 bf .start_b2 /* no, jump to trailing byte loop */
@@ -148,20 +150,20 @@ _memcpy:
148 mov.l @r5+,r1 /* load first long & increment source addr */ 150 mov.l @r5+,r1 /* load first long & increment source addr */
149 add #16,r4 /* increment dest addr */ 151 add #16,r4 /* increment dest addr */
150 mov.l @r5+,r0 /* load second long & increment source addr */ 152 mov.l @r5+,r0 /* load second long & increment source addr */
151 mov r1,r2 /* copy first long */ 153 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */
152 mov.b r0,@-r4 /* store low byte of second long */ 154 mov.b r0,@-r4 /* store low byte of second long */
153 shlr8 r0 /* get upper 3 bytes */ 155 shlr8 r0 /* get upper 3 bytes */
156 mov r1,r2 /* copy first long */
154 shll16 r2 /* move low byte of first long all the way up, .. */ 157 shll16 r2 /* move low byte of first long all the way up, .. */
155 shll8 r2 158 shll8 r2
156 or r2,r0 /* ..combine with the 3 bytes of second long.. */ 159 or r2,r0 /* ..combine with the 3 bytes of second long.. */
157 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */
158 mov.l r0,@-r4 /* ..and store as long */ 160 mov.l r0,@-r4 /* ..and store as long */
159 shlr8 r1 /* get middle 2 bytes */ 161 shlr8 r1 /* get middle 2 bytes */
160 mov.w r1,@-r4 /* store as word */ 162 mov.w r1,@-r4 /* store as word */
161 shlr16 r1 /* get upper byte */ 163 shlr16 r1 /* get upper byte */
162 mov.b r1,@-r4 /* and store */ 164 mov.b r1,@-r4 /* and store */
163 bt .loop_do1 165 bt .loop_do1
164 166
165 add #4,r3 /* readjust end address */ 167 add #4,r3 /* readjust end address */
166.last_do13: 168.last_do13:
167 cmp/hi r5,r3 /* one long left? */ 169 cmp/hi r5,r3 /* one long left? */
@@ -218,6 +220,7 @@ _memcpy:
218#define FULLSPEED /* use burst writing for word aligned destinations */ 220#define FULLSPEED /* use burst writing for word aligned destinations */
219 .align 2 221 .align 2
220 .global memcpy 222 .global memcpy
223 .global __memcpy_fwd_entry
221 .type memcpy,@function 224 .type memcpy,@function
222 225
223/* Copies <length> bytes of data in memory from <source> to <dest> 226/* Copies <length> bytes of data in memory from <source> to <dest>
@@ -249,7 +252,9 @@ memcpy:
249 move.l (4,%sp),%a1 /* Destination */ 252 move.l (4,%sp),%a1 /* Destination */
250 move.l (8,%sp),%a0 /* Source */ 253 move.l (8,%sp),%a0 /* Source */
251 move.l (12,%sp),%d1 /* Length */ 254 move.l (12,%sp),%d1 /* Length */
252 add.l %a0,%d1 /* %d1 = end address */ 255
256__memcpy_fwd_entry:
257 add.l %a0,%d1 /* %d1 = source end */
253 258
254 move.l %a0,%d0 259 move.l %a0,%d0
255 addq.l #7,%d0 260 addq.l #7,%d0
@@ -278,7 +283,7 @@ memcpy:
278 movem.l %d2-%d7/%a2,(%sp) 283 movem.l %d2-%d7/%a2,(%sp)
279 284
280 moveq.l #16,%d2 285 moveq.l #16,%d2
281 sub.l %d2,%d0 /* %d0 = first source long bound */ 286 sub.l %d2,%d0 /* %d0 = first source line bound */
282 move.l %d1,%a2 /* %a2 = end address */ 287 move.l %d1,%a2 /* %a2 = end address */
283 lea.l (-15,%a2),%a2 /* adjust end address for loops doing 16 bytes/ pass */ 288 lea.l (-15,%a2),%a2 /* adjust end address for loops doing 16 bytes/ pass */
284 move.l %a1,%d1 289 move.l %a1,%d1
@@ -507,7 +512,7 @@ memcpy:
507 lea.l (12,%a2),%a2 /* readjust end address for doing longwords */ 512 lea.l (12,%a2),%a2 /* readjust end address for doing longwords */
508 cmp.l %a0,%a2 /* any trailing longwords? */ 513 cmp.l %a0,%a2 /* any trailing longwords? */
509 jls .lines_end /* no: get outta here */ 514 jls .lines_end /* no: get outta here */
510 515
511.lines_do0_tail_loop: 516.lines_do0_tail_loop:
512 move.l (%a0)+,(%a1)+ /* copy longword */ 517 move.l (%a0)+,(%a1)+ /* copy longword */
513 cmp.l %a0,%a2 /* runs %a0 up to last long bound */ 518 cmp.l %a0,%a2 /* runs %a0 up to last long bound */
@@ -610,7 +615,7 @@ memcpy:
610 /* word aligned destination (line + 14): use line bursts in the loop */ 615 /* word aligned destination (line + 14): use line bursts in the loop */
611.lines_lo14_start: 616.lines_lo14_start:
612 movem.l (%a0),%d4-%d7 /* load first line */ 617 movem.l (%a0),%d4-%d7 /* load first line */
613 lea.l (16,%a0),%a0 618 add.l %d0,%a0
614 swap %d4 /* swap words of 1st long */ 619 swap %d4 /* swap words of 1st long */
615 move.w %d4,(%a1)+ /* store word */ 620 move.w %d4,(%a1)+ /* store word */
616 jra .lines_lo14_entry /* jump into main loop */ 621 jra .lines_lo14_entry /* jump into main loop */
@@ -784,7 +789,7 @@ memcpy:
784 move.l (%a0)+,%d7 /* load first longword */ 789 move.l (%a0)+,%d7 /* load first longword */
785 swap %d7 /* swap words */ 790 swap %d7 /* swap words */
786 move.w %d7,(%a1)+ /* store high word */ 791 move.w %d7,(%a1)+ /* store high word */
787 cmp.l %a0,%d0 /* any full lnogword? */ 792 cmp.l %a0,%d0 /* any full longword? */
788 jls .lines_do2_loop /* no: skip head loop */ 793 jls .lines_do2_loop /* no: skip head loop */
789 794
790.lines_do2_head_loop: 795.lines_do2_head_loop:
diff --git a/firmware/common/memmove.c b/firmware/common/memmove.c
new file mode 100644
index 0000000000..761e9eb104
--- /dev/null
+++ b/firmware/common/memmove.c
@@ -0,0 +1,148 @@
1/*
2FUNCTION
3 <<memmove>>---move possibly overlapping memory
4
5INDEX
6 memmove
7
8ANSI_SYNOPSIS
9 #include <string.h>
10 void *memmove(void *<[dst]>, const void *<[src]>, size_t <[length]>);
11
12TRAD_SYNOPSIS
13 #include <string.h>
14 void *memmove(<[dst]>, <[src]>, <[length]>)
15 void *<[dst]>;
16 void *<[src]>;
17 size_t <[length]>;
18
19DESCRIPTION
20 This function moves <[length]> characters from the block of
21 memory starting at <<*<[src]>>> to the memory starting at
22 <<*<[dst]>>>. <<memmove>> reproduces the characters correctly
23 at <<*<[dst]>>> even if the two areas overlap.
24
25
26RETURNS
27 The function returns <[dst]> as passed.
28
29PORTABILITY
30<<memmove>> is ANSI C.
31
32<<memmove>> requires no supporting OS subroutines.
33
34QUICKREF
35 memmove ansi pure
36*/
37
38#include "config.h"
39#include <_ansi.h>
40#include <stddef.h>
41#include <limits.h>
42
43/* Nonzero if either X or Y is not aligned on a "long" boundary. */
44#define UNALIGNED(X, Y) \
45 (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
46
47/* How many bytes are copied each iteration of the 4X unrolled loop. */
48#define BIGBLOCKSIZE (sizeof (long) << 2)
49
50/* How many bytes are copied each iteration of the word copy loop. */
51#define LITTLEBLOCKSIZE (sizeof (long))
52
53/* Threshhold for punting to the byte copier. */
54#define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE)
55
56_PTR
57_DEFUN (memmove, (dst_void, src_void, length),
58 _PTR dst_void _AND
59 _CONST _PTR src_void _AND
60 size_t length) ICODE_ATTR;
61
62_PTR
63_DEFUN (memmove, (dst_void, src_void, length),
64 _PTR dst_void _AND
65 _CONST _PTR src_void _AND
66 size_t length)
67{
68#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
69 char *dst = dst_void;
70 _CONST char *src = src_void;
71
72 if (src < dst && dst < src + length)
73 {
74 /* Have to copy backwards */
75 src += length;
76 dst += length;
77 while (length--)
78 {
79 *--dst = *--src;
80 }
81 }
82 else
83 {
84 while (length--)
85 {
86 *dst++ = *src++;
87 }
88 }
89
90 return dst_void;
91#else
92 char *dst = dst_void;
93 _CONST char *src = src_void;
94 long *aligned_dst;
95 _CONST long *aligned_src;
96 unsigned int len = length;
97
98 if (src < dst && dst < src + len)
99 {
100 /* Destructive overlap...have to copy backwards */
101 src += len;
102 dst += len;
103 while (len--)
104 {
105 *--dst = *--src;
106 }
107 }
108 else
109 {
110 /* Use optimizing algorithm for a non-destructive copy to closely
111 match memcpy. If the size is small or either SRC or DST is unaligned,
112 then punt into the byte copy loop. This should be rare. */
113 if (!TOO_SMALL(len) && !UNALIGNED (src, dst))
114 {
115 aligned_dst = (long*)dst;
116 aligned_src = (long*)src;
117
118 /* Copy 4X long words at a time if possible. */
119 while (len >= BIGBLOCKSIZE)
120 {
121 *aligned_dst++ = *aligned_src++;
122 *aligned_dst++ = *aligned_src++;
123 *aligned_dst++ = *aligned_src++;
124 *aligned_dst++ = *aligned_src++;
125 len -= BIGBLOCKSIZE;
126 }
127
128 /* Copy one long word at a time if possible. */
129 while (len >= LITTLEBLOCKSIZE)
130 {
131 *aligned_dst++ = *aligned_src++;
132 len -= LITTLEBLOCKSIZE;
133 }
134
135 /* Pick up any residual with a byte copier. */
136 dst = (char*)aligned_dst;
137 src = (char*)aligned_src;
138 }
139
140 while (len--)
141 {
142 *dst++ = *src++;
143 }
144 }
145
146 return dst_void;
147#endif /* not PREFER_SIZE_OVER_SPEED */
148}
diff --git a/firmware/common/memmove_a.S b/firmware/common/memmove_a.S
new file mode 100755
index 0000000000..d7421333df
--- /dev/null
+++ b/firmware/common/memmove_a.S
@@ -0,0 +1,869 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 by Jens Arnold
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19#include "config.h"
20
21 .section .icode,"ax",@progbits
22
23#if CONFIG_CPU == SH7034
24 .align 2
25 .global _memmove
26 .type _memmove,@function
27
28/* Moves <length> bytes of data in memory from <source> to <dest>
29 * Regions may overlap.
30 * This version is optimized for speed, and needs the corresponding memcpy
31 * implementation for the forward copy branch.
32 *
33 * arguments:
34 * r4 - destination address
35 * r5 - source address
36 * r6 - length
37 *
38 * return value:
39 * r0 - destination address (like ANSI version)
40 *
41 * register usage:
42 * r0 - data / scratch
43 * r1 - 2nd data / scratch
44 * r2 - scratch
45 * r3 - last long bound / adjusted start address (only if >= 11 bytes)
46 * r4 - current dest address
47 * r5 - source start address
48 * r6 - current source address
49 *
50 * The instruction order is devised in a way to utilize the pipelining
51 * of the SH1 to the max. The routine also tries to utilize fast page mode.
52 */
53
54_memmove:
55 cmp/hi r4,r5 /* source > destination */
56 bf .backward /* no: backward copy */
57 mov.l .memcpy_fwd,r0
58 jmp @r0
59 mov r4,r7 /* store dest for returning */
60
61 .align 2
62.memcpy_fwd:
63 .long ___memcpy_fwd_entry
64
65.backward:
66 add r6,r4 /* r4 = destination end */
67 mov #11,r0
68 cmp/hs r0,r6 /* at least 11 bytes to copy? (ensures 2 aligned longs) */
69 add #-8,r5 /* adjust for late decrement (max. 2 longs) */
70 add r5,r6 /* r6 = source end - 8 */
71 bf .start_b2r /* no: jump directly to byte loop */
72
73 mov #-4,r3 /* r3 = 0xfffffffc */
74 and r6,r3 /* r3 = last source long bound */
75 cmp/hi r3,r6 /* already aligned? */
76 bf .end_b1r /* yes: skip leading byte loop */
77
78.loop_b1r:
79 mov.b @(7,r6),r0 /* load byte */
80 add #-1,r6 /* decrement source addr */
81 mov.b r0,@-r4 /* store byte */
82 cmp/hi r3,r6 /* runs r6 down to last long bound */
83 bt .loop_b1r
84
85.end_b1r:
86 mov #3,r1
87 and r4,r1 /* r1 = dest alignment offset */
88 mova .jmptab_r,r0
89 mov.b @(r0,r1),r1 /* select appropriate main loop.. */
90 add r0,r1
91 mov r5,r3 /* copy start adress to r3 */
92 jmp @r1 /* ..and jump to it */
93 add #7,r3 /* adjust end addr for main loops doing 2 longs/pass */
94
95 /** main loops, copying 2 longs per pass to profit from fast page mode **/
96
97 /* long aligned destination (fastest) */
98 .align 2
99.loop_do0r:
100 mov.l @r6,r1 /* load first long */
101 add #-8,r6 /* decrement source addr */
102 mov.l @(12,r6),r0 /* load second long */
103 cmp/hi r3,r6 /* runs r6 down to first or second long bound */
104 mov.l r0,@-r4 /* store second long */
105 mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */
106 bt .loop_do0r
107
108 add #-4,r3 /* readjust end address */
109 cmp/hi r3,r6 /* first long left? */
110 bf .start_b2r /* no, jump to trailing byte loop */
111
112 mov.l @(4,r6),r0 /* load first long */
113 add #-4,r6 /* decrement source addr */
114 bra .start_b2r /* jump to trailing byte loop */
115 mov.l r0,@-r4 /* store first long */
116
117 /* word aligned destination (long + 2) */
118 .align 2
119.loop_do2r:
120 mov.l @r6,r1 /* load first long */
121 add #-8,r6 /* decrement source addr */
122 mov.l @(12,r6),r0 /* load second long */
123 cmp/hi r3,r6 /* runs r6 down to first or second long bound */
124 mov.w r0,@-r4 /* store low word of second long */
125 xtrct r1,r0 /* extract low word of first long & high word of second long */
126 mov.l r0,@-r4 /* and store as long */
127 shlr16 r1 /* get high word of first long */
128 mov.w r1,@-r4 /* and store it */
129 bt .loop_do2r
130
131 add #-4,r3 /* readjust end address */
132 cmp/hi r3,r6 /* first long left? */
133 bf .start_b2r /* no, jump to trailing byte loop */
134
135 mov.l @(4,r6),r0 /* load first long & decrement source addr */
136 add #-4,r6 /* decrement source addr */
137 mov.w r0,@-r4 /* store low word */
138 shlr16 r0 /* get high word */
139 bra .start_b2r /* jump to trailing byte loop */
140 mov.w r0,@-r4 /* and store it */
141
142 /* jumptable for loop selector */
143 .align 2
144.jmptab_r:
145 .byte .loop_do0r - .jmptab_r /* placed in the middle because the SH1 */
146 .byte .loop_do1r - .jmptab_r /* loads bytes sign-extended. Otherwise */
147 .byte .loop_do2r - .jmptab_r /* the last loop would be out of reach */
148 .byte .loop_do3r - .jmptab_r /* of the offset range. */
149
150 /* byte aligned destination (long + 1) */
151 .align 2
152.loop_do1r:
153 mov.l @r6,r1 /* load first long */
154 add #-8,r6 /* decrement source addr */
155 mov.l @(12,r6),r0 /* load second long */
156 cmp/hi r3,r6 /* runs r6 down to first or second long bound */
157 mov.b r0,@-r4 /* store low byte of second long */
158 shlr8 r0 /* get upper 3 bytes */
159 mov r1,r2 /* copy first long */
160 shll16 r2 /* move low byte of first long all the way up, .. */
161 shll8 r2
162 or r2,r0 /* ..combine with the 3 bytes of second long.. */
163 mov.l r0,@-r4 /* ..and store as long */
164 shlr8 r1 /* get middle 2 bytes */
165 mov.w r1,@-r4 /* store as word */
166 shlr16 r1 /* get upper byte */
167 mov.b r1,@-r4 /* and store */
168 bt .loop_do1r
169
170 add #-4,r3 /* readjust end address */
171.last_do13r:
172 cmp/hi r3,r6 /* first long left? */
173 bf .start_b2r /* no, jump to trailing byte loop */
174
175 nop /* alignment */
176 mov.l @(4,r6),r0 /* load first long */
177 add #-4,r6 /* decrement source addr */
178 mov.b r0,@-r4 /* store low byte */
179 shlr8 r0 /* get middle 2 bytes */
180 mov.w r0,@-r4 /* store as word */
181 shlr16 r0 /* get upper byte */
182 bra .start_b2r /* jump to trailing byte loop */
183 mov.b r0,@-r4 /* and store */
184
185 /* byte aligned destination (long + 3) */
186 .align 2
187.loop_do3r:
188 mov.l @r6,r1 /* load first long */
189 add #-8,r6 /* decrement source addr */
190 mov.l @(12,r6),r0 /* load second long */
191 mov r1,r2 /* copy first long */
192 mov.b r0,@-r4 /* store low byte of second long */
193 shlr8 r0 /* get middle 2 bytes */
194 mov.w r0,@-r4 /* store as word */
195 shlr16 r0 /* get upper byte */
196 shll8 r2 /* move lower 3 bytes of first long one up.. */
197 or r2,r0 /* ..combine with the 1 byte of second long.. */
198 mov.l r0,@-r4 /* ..and store as long */
199 shlr16 r1 /* get upper byte of first long */
200 shlr8 r1
201 cmp/hi r3,r6 /* runs r6 down to first or second long bound */
202 mov.b r1,@-r4 /* ..and store */
203 bt .loop_do3r
204
205 bra .last_do13r /* handle first longword: reuse routine for (long + 1) */
206 add #-4,r3 /* readjust end address */
207
208 /* trailing byte loop: copies 0..3 bytes (or all for < 11 in total) */
209 .align 2
210.loop_b2r:
211 mov.b @(7,r6),r0 /* load byte */
212 add #-1,r6 /* decrement source addr */
213 mov.b r0,@-r4 /* store byte */
214.start_b2r:
215 cmp/hi r5,r6 /* runs r6 down to start address */
216 bt .loop_b2r
217
218 rts
219 mov r4,r0 /* return dest start address */
220.end:
221 .size _memmove,.end-_memmove
222#elif defined(CPU_COLDFIRE)
223#define FULLSPEED /* use burst writing for word aligned destinations */
224 .align 2
225 .global memmove
226 .type memmove,@function
227
228/* Moves <length> bytes of data in memory from <source> to <dest>
229 * Regions may overlap.
230 * This version is optimized for speed, and needs the corresponding memcpy
231 * implementation for the forward copy branch.
232 *
233 * arguments:
234 * (4,%sp) - destination address
235 * (8,%sp) - source address
236 * (12,%sp) - length
237 *
238 * return value:
239 * %d0 - destination address (like ANSI version)
240 *
241 * register usage:
242 * %a0 - current source address
243 * %a1 - current dest address
244 * %a2 - source start address (in line-copy loops)
245 * %d0 - source start address (byte and longword copy) / data / scratch
246 * %d1 - data / scratch
247 * %d2 - data / scratch
248 * %d3..%d7 - data
249 *
250 * For maximum speed this routine reads and writes whole lines using burst
251 * move (movem.l) where possible. For byte aligned destinations (long-1 and
252 * long-3) it writes longwords only. Same goes for word aligned destinations
253 * if FULLSPEED is undefined.
254 */
255memmove:
256 move.l (4,%sp),%a1 /* Destination */
257 move.l (8,%sp),%a0 /* Source */
258 move.l (12,%sp),%d1 /* Length */
259
260 cmp.l %a0,%a1
261 bhi.b .backward /* dest > src -> backward copy */
262 jmp __memcpy_fwd_entry
263
264.backward:
265 move.l %a0,%d0 /* %d0 = source start */
266 add.l %d1,%a0 /* %a0 = source end */
267 add.l %d1,%a1 /* %a1 = destination end */
268
269 move.l %a0,%d1
270 and.l #0xFFFFFFFC,%d1 /* %d1 = last source long bound */
271 subq.l #4,%d1
272 cmp.l %d0,%d1 /* at least one aligned longword to copy? */
273 blo.w .bytes2r_start
274
275 addq.l #4,%d1 /* %d1 = last source long bound */
276 cmp.l %d1,%a0 /* any bytes to copy */
277 jls .bytes1r_end /* no: skip byte loop */
278
279 /* leading byte loop: copies 0..3 bytes */
280.bytes1r_loop:
281 move.b -(%a0),-(%a1) /* copy byte */
282 cmp.l %d1,%a0 /* runs %a0 down to last long bound */
283 jhi .bytes1r_loop
284
285.bytes1r_end:
286 moveq.l #-16,%d1
287 add.l %a0,%d1
288 and.l #0xFFFFFFF0,%d1 /* %d1 = last source line bound - 16 */
289 cmp.l %d0,%d1 /* at least one aligned line to copy? */
290 blo.w .longr_start /* no: jump to longword copy loop */
291
292 lea.l (-28,%sp),%sp /* free up some registers */
293 movem.l %d2-%d7/%a2,(%sp)
294
295 moveq.l #16,%d2
296 add.l %d2,%d1 /* %d1 = last source line bound */
297 move.l %d0,%a2 /* %a2 = start address */
298 lea.l (15,%a2),%a2 /* adjust start address for loops doing 16 bytes/pass */
299 move.l %a1,%d0
300 moveq.l #3,%d2 /* mask */
301 and.l %d2,%d0
302 jmp.l (2,%pc,%d0.l*4) /* switch (dest_addr & 3) */
303 bra.w .lines_do0r_start
304 bra.w .lines_do1r_start
305 bra.w .lines_do2r_start
306 /* bra.w .lines_do3r_start implicit */
307
308 /* byte aligned destination (long - 1): use line burst reads in main loop */
309.lines_do3r_start:
310 moveq.l #24,%d0 /* shift count for shifting by 3 bytes */
311 cmp.l %d1,%a0 /* any leading longwords? */
312 jhi .lines_do3r_head_start /* yes: leading longword copy */
313
314 lea.l (-16,%a0),%a0
315 movem.l (%a0),%d3-%d6 /* load initial line */
316 move.l %d6,%d2 /* last longword, bytes 3210 */
317 move.b %d2,-(%a1) /* store byte */
318 lsr.l #8,%d2 /* last longword, bytes .321 */
319 move.w %d2,-(%a1) /* store word */
320 jra .lines_do3r_entry
321
322.lines_do3r_head_start:
323 move.l -(%a0),%d3 /* load initial longword */
324 move.l %d3,%d2 /* bytes 3210 */
325 move.b %d2,-(%a1) /* store byte */
326 lsr.l #8,%d2 /* bytes .321 */
327 move.w %d2,-(%a1) /* store word */
328 jra .lines_do3r_head_entry
329
330.lines_do3r_head_loop:
331 move.l %d3,%d4 /* move old longword away */
332 move.l -(%a0),%d3 /* load new longword */
333 move.l %d3,%d2
334 lsl.l #8,%d2 /* get bytes 210. */
335 or.l %d2,%d4 /* combine with old high byte */
336 move.l %d4,-(%a1) /* store longword */
337.lines_do3r_head_entry:
338 lsr.l %d0,%d3 /* shift down high byte */
339 cmp.l %d1,%a0 /* run %a0 down to last line bound */
340 jhi .lines_do3r_head_loop
341
342.lines_do3r_loop:
343 move.l %d3,%d7 /* move first longword of last line away */
344 lea.l (-16,%a0),%a0
345 movem.l (%a0),%d3-%d6 /* load new line */
346 move.l %d6,%d2
347 lsl.l #8,%d2 /* get bytes 210. of 4th longword */
348 or.l %d2,%d7 /* combine with high byte of old longword */
349 move.l %d7,-(%a1) /* store longword */
350.lines_do3r_entry:
351 lsr.l %d0,%d6 /* shift down high byte */
352 move.l %d5,%d2
353 lsl.l #8,%d2 /* get bytes 210. of 3rd longword */
354 or.l %d2,%d6 /* combine with high byte of 4th longword */
355 move.l %d6,-(%a1) /* store longword */
356 lsr.l %d0,%d5 /* shift down high byte */
357 move.l %d4,%d2
358 lsl.l #8,%d2 /* get bytes 210. of 2nd longword */
359 or.l %d2,%d5 /* combine with high byte or 3rd longword */
360 move.l %d5,-(%a1) /* store longword */
361 lsr.l %d0,%d4 /* shift down high byte */
362 move.l %d3,%d2
363 lsl.l #8,%d2 /* get bytes 210. of 1st longword */
364 or.l %d2,%d4 /* combine with high byte of 2nd longword */
365 move.l %d4,-(%a1) /* store longword */
366 lsr.l %d0,%d3 /* shift down high byte */
367 cmp.l %a2,%a0 /* run %a0 down to first line bound */
368 jhi .lines_do3r_loop
369
370 lea.l (-12,%a2),%a2 /* readjust start address for doing longwords */
371 cmp.l %a2,%a0 /* any trailing longwords? */
372 jls .lines_do3r_tail_end /* no: just store last high byte */
373
374.lines_do3r_tail_loop:
375 move.l %d3,%d4 /* move old longword away */
376 move.l -(%a0),%d3 /* load new longword */
377 move.l %d3,%d2
378 lsl.l #8,%d2 /* get bytes 210. */
379 or.l %d2,%d4 /* combine with old high byte */
380 move.l %d4,-(%a1) /* store longword */
381 lsr.l %d0,%d3 /* shift down high byte */
382 cmp.l %a2,%a0 /* run %a0 down to first long bound */
383 jhi .lines_do3r_tail_loop
384
385.lines_do3r_tail_end:
386 move.b %d3,-(%a1) /* store shifted-down high byte */
387 jra .linesr_end
388
389 /* byte aligned destination (long - 3): use line burst reads in main loop */
390.lines_do1r_start:
391 moveq.l #24,%d0 /* shift count for shifting by 3 bytes */
392 cmp.l %d1,%a0 /* any leading longwords? */
393 jhi .lines_do1r_head_start /* yes: leading longword copy */
394
395 lea.l (-16,%a0),%a0
396 movem.l (%a0),%d3-%d6 /* load initial line */
397 move.b %d6,-(%a1) /* store low byte of last longword */
398 jra .lines_do1r_entry
399
400.lines_do1r_head_start:
401 move.l -(%a0),%d3 /* load initial longword */
402 move.b %d3,-(%a1) /* store low byte */
403 jra .lines_do1r_head_entry
404
405.lines_do1r_head_loop:
406 move.l %d3,%d4 /* move old longword away */
407 move.l -(%a0),%d3 /* load new longword */
408 move.l %d3,%d2
409 lsl.l %d0,%d2 /* get low byte */
410 or.l %d2,%d4 /* combine with old bytes .321 */
411 move.l %d4,-(%a1) /* store longword */
412.lines_do1r_head_entry:
413 lsr.l #8,%d3 /* get bytes .321 */
414 cmp.l %d1,%a0 /* run %a0 down to last line bound */
415 jhi .lines_do1r_head_loop
416
417.lines_do1r_loop:
418 move.l %d3,%d7 /* move first longword of old line away */
419 lea.l (-16,%a0),%a0
420 movem.l (%a0),%d3-%d6 /* load new line */
421 move.l %d6,%d2
422 lsl.l %d0,%d2 /* get low byte of 4th longword */
423 or.l %d2,%d7 /* combine with bytes .321 of old longword */
424 move.l %d7,-(%a1) /* store longword */
425.lines_do1r_entry:
426 lsr.l #8,%d6 /* get bytes .321 */
427 move.l %d5,%d2
428 lsl.l %d0,%d2 /* get low byte of 3rd longword */
429 or.l %d2,%d6 /* combine with bytes .321 of 4th longword */
430 move.l %d6,-(%a1) /* store longword */
431 lsr.l #8,%d5 /* get bytes .321 */
432 move.l %d4,%d2
433 lsl.l %d0,%d2 /* get low byte of 2nd longword */
434 or.l %d2,%d5 /* combine with bytes .321 of 3rd longword */
435 move.l %d5,-(%a1) /* store longword */
436 lsr.l #8,%d4 /* get bytes .321 */
437 move.l %d3,%d2
438 lsl.l %d0,%d2 /* get low byte of 1st longword */
439 or.l %d2,%d4 /* combine with bytes .321 of 2nd longword */
440 move.l %d4,-(%a1) /* store longword */
441 lsr.l #8,%d3 /* get bytes .321 */
442 cmp.l %a2,%a0 /* run %a0 down to first line bound */
443 jhi .lines_do1r_loop
444
445 lea.l (-12,%a2),%a2 /* readjust start address for doing longwords */
446 cmp.l %a2,%a0 /* any trailing longwords? */
447 jls .lines_do1r_tail_end /* no: just store last high byte */
448
449.lines_do1r_tail_loop:
450 move.l %d3,%d4 /* move old longword away */
451 move.l -(%a0),%d3 /* load new longword */
452 move.l %d3,%d2
453 lsl.l %d0,%d2 /* get low byte */
454 or.l %d2,%d4 /* combine with old bytes .321 */
455 move.l %d4,-(%a1) /* store longword */
456 lsr.l #8,%d3 /* get bytes .321 */
457 cmp.l %a2,%a0 /* run %a0 down to first long bound */
458 jhi .lines_do1r_tail_loop
459
460.lines_do1r_tail_end:
461 move.w %d3,-(%a1) /* store word 21 */
462 swap %d3
463 move.b %d3,-(%a1) /* store byte 3 */
464 jra .linesr_end
465
466 /* long aligned destination (line - 0/4/8/12): head */
467.lines_do0r_head_loop:
468 move.l -(%a0),-(%a1) /* copy longword */
469.lines_do0r_start:
470 cmp.l %d1,%a0 /* run %a0 down to last line bound */
471 jhi .lines_do0r_head_loop
472
473.lines_do0r_head_end:
474 move.l %a1,%d1
475 lsr.l #2,%d1
476 moveq.l #3,%d0 /* mask */
477 and.l %d0,%d1
478 moveq.l #16,%d0 /* address decrement for one main loop pass */
479 jmp.l (2,%pc,%d1.l*2) /* switch ((dest_addr >> 2) & 3) */
480 bra.b .lines_lo0r_start
481 bra.b .lines_lo4r_start
482 bra.b .lines_lo8r_start
483 /* bra.b .lines_lo12r_start implicit */
484
485 /* long aligned destination (line - 4): use line bursts in the loop */
486.lines_lo12r_start:
487 sub.l %d0,%a0
488 movem.l (%a0),%d1-%d4 /* load initial line */
489 move.l %d4,-(%a1) /* store 4th longword */
490 move.l %d3,-(%a1) /* store 3rd longword */
491 move.l %d2,-(%a1) /* store 2nd longword */
492 cmp.l %a2,%a0 /* any full lines? */
493 jls .lines_lo12r_end /* no: skip main loop */
494
495.lines_lo12r_loop:
496 move.l %d1,%d5 /* move first longword of old line away */
497 sub.l %d0,%a0
498 movem.l (%a0),%d1-%d4 /* load new line */
499 sub.l %d0,%a1
500 movem.l %d2-%d5,(%a1) /* store line (1 old + 3 new longwords */
501 cmp.l %a2,%a0 /* run %a0 down to first line bound */
502 jhi .lines_lo12r_loop
503
504 jra .lines_lo12r_end /* handle trailing longwords */
505
506 /* line aligned destination: use line bursts in the loop */
507.lines_lo0r_start:
508.lines_lo0r_loop:
509 sub.l %d0,%a0
510 movem.l (%a0),%d1-%d4 /* load line */
511 sub.l %d0,%a1
512 movem.l %d1-%d4,(%a1) /* store line */
513 cmp.l %a2,%a0 /* run %a0 down to first line bound */
514 jhi .lines_lo0r_loop
515
516 jra .lines_lo0r_end /* handle trailing longwords */
517
518 /* long aligned destination (line - 8): use line bursts in the loop */
519.lines_lo8r_start:
520 sub.l %d0,%a0
521 movem.l (%a0),%d1-%d4 /* load initial line */
522 move.l %d4,-(%a1) /* store 4th longword */
523 move.l %d3,-(%a1) /* store 3rd longword */
524 cmp.l %a2,%a0 /* any full lines? */
525 jls .lines_lo8r_end /* no: skip main loop */
526
527.lines_lo8r_loop:
528 move.l %d2,%d6 /* move first 2 longwords of old line away */
529 move.l %d1,%d5
530 sub.l %d0,%a0
531 movem.l (%a0),%d1-%d4 /* load new line */
532 sub.l %d0,%a1
533 movem.l %d3-%d6,(%a1) /* store line (2 old + 2 new longwords */
534 cmp.l %a2,%a0 /* run %a0 down to first line bound */
535 jhi .lines_lo8r_loop
536
537 jra .lines_lo8r_end /* handle trailing longwords */
538
539 /* long aligned destination (line - 12): use line bursts in the loop */
540.lines_lo4r_start:
541 sub.l %d0,%a0
542 movem.l (%a0),%d1-%d4 /* load initial line */
543 move.l %d4,-(%a1) /* store 4th longword */
544 cmp.l %a2,%a0 /* any full lines? */
545 jls .lines_lo4r_end /* no: skip main loop */
546
547.lines_lo4r_loop:
548 move.l %d3,%d7 /* move first 3 longwords of old line away */
549 move.l %d2,%d6
550 move.l %d1,%d5
551 sub.l %d0,%a0
552 movem.l (%a0),%d1-%d4 /* load new line */
553 sub.l %d0,%a1
554 movem.l %d4-%d7,(%a1) /* store line (3 old + 1 new longwords */
555 cmp.l %a2,%a0 /* run %a0 down to first line bound */
556 jhi .lines_lo4r_loop
557
558 /* long aligned destination (line - 0/4/8/12): tail */
559.lines_lo4r_end:
560 move.l %d3,-(%a1) /* store 3rd last longword */
561.lines_lo8r_end:
562 move.l %d2,-(%a1) /* store 2nd last longword */
563.lines_lo12r_end:
564 move.l %d1,-(%a1) /* store last longword */
565.lines_lo0r_end:
566 lea.l (-12,%a2),%a2 /* readjust end address for doing longwords */
567 cmp.l %a2,%a0 /* any trailing longwords? */
568 jls .linesr_end /* no: get outta here */
569
570.lines_do0r_tail_loop:
571 move.l -(%a0),-(%a1) /* copy longword */
572 cmp.l %a2,%a0 /* run %a0 down to first long bound */
573 jhi .lines_do0r_tail_loop
574
575 jra .linesr_end
576
577#ifdef FULLSPEED
578 /* word aligned destination (line - 2/6/10/14): head */
579.lines_do2r_start:
580 cmp.l %d1,%a0 /* any leading longwords? */
581 jls .lines_do2r_selector /* no: jump to mainloop selector */
582
583 move.l -(%a0),%d3 /* load initial longword */
584 move.w %d3,-(%a1) /* store low word */
585 cmp.l %d1,%a0 /* any more longwords? */
586 jls .lines_do2r_head_end /* no: skip head loop */
587
588.lines_do2r_head_loop:
589 move.l %d3,%d4 /* move old longword away */
590 move.l -(%a0),%d3 /* load new longword */
591 move.w %d3,%d4 /* combine low word with old high word */
592 swap %d4 /* swap words */
593 move.l %d4,-(%a1) /* store longword */
594 cmp.l %d1,%a0 /* run %a0 down to last line bound */
595 jhi .lines_do2r_head_loop
596
597.lines_do2r_head_end:
598 swap %d3 /* get high word */
599 move.w %d3,-(%a1) /* and store it */
600
601.lines_do2r_selector:
602 move.l %a1,%d1
603 lsr.l #2,%d1
604 moveq.l #3,%d0 /* mask */
605 and.l %d0,%d1
606 moveq.l #16,%d7 /* address decrement for one main loop pass */
607 jmp.l (2,%pc,%d1.l*4) /* switch ((dest_addr >> 2) & 3) */
608 bra.w .lines_lo2r_start
609 bra.w .lines_lo6r_start
610 bra.w .lines_lo10r_start
611 /* bra.w .lines_lo14r_start implicit */
612
613 /* word aligned destination (line - 2): use line bursts in the loop */
614.lines_lo14r_start:
615 sub.l %d7,%a0
616 movem.l (%a0),%d0-%d3 /* load initial line */
617 move.w %d3,-(%a1) /* store last low word */
618 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
619 swap %d3 /* swap words of 3rd long */
620 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
621 swap %d2 /* swap words of 2nd long */
622 move.w %d0,%d1 /* combine 1st low word with 2nd high word */
623 swap %d1 /* swap words of 1st long */
624 move.l %d3,-(%a1) /* store 3rd longword */
625 move.l %d2,-(%a1) /* store 2nd longword */
626 move.l %d1,-(%a1) /* store 1st longword */
627 cmp.l %a2,%a0 /* any full lines? */
628 jls .lines_lo14r_end /* no: skip main loop */
629
630.lines_lo14r_loop:
631 move.l %d0,%d4 /* move first longword of old line away */
632 sub.l %d7,%a0
633 movem.l (%a0),%d0-%d3 /* load line */
634 move.w %d3,%d4 /* combine 4th low word with old high word */
635 swap %d4 /* swap words of 4th long */
636 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
637 swap %d3 /* swap words of 3rd long */
638 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
639 swap %d2 /* swap words of 2nd long */
640 move.w %d0,%d1 /* combine 1st low word with 2nd high word */
641 swap %d1 /* swap words of 1st long */
642 sub.l %d7,%a1
643 movem.l %d1-%d4,(%a1) /* store line */
644 cmp.l %a2,%a0 /* run %a0 down to first line bound */
645 jhi .lines_lo14r_loop
646
647 jra .lines_lo14r_end /* handle trailing longwords */
648
649 /* word aligned destination (line - 6): use line bursts in the loop */
650.lines_lo10r_start:
651 sub.l %d7,%a0
652 movem.l (%a0),%d0-%d3 /* load initial line */
653 move.w %d3,-(%a1) /* store last low word */
654 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
655 swap %d3 /* swap words of 3rd long */
656 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
657 swap %d2 /* swap words of 2nd long */
658 move.l %d3,-(%a1) /* store 3rd longword */
659 move.l %d2,-(%a1) /* store 2nd longword */
660 jra .lines_lo10r_entry /* jump into main loop */
661
662.lines_lo10r_loop:
663 move.l %d0,%d4 /* move first 2 longwords of old line away */
664 move.l %d1,%d5
665 sub.l %d7,%a0
666 movem.l (%a0),%d0-%d3 /* load line */
667 move.w %d3,%d4 /* combine 4th low word with old high word */
668 swap %d4 /* swap words of 4th long */
669 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
670 swap %d3 /* swap words of 3rd long */
671 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
672 swap %d2 /* swap words of 2nd long */
673 sub.l %d7,%a1
674 movem.l %d2-%d5,(%a1) /* store line */
675.lines_lo10r_entry:
676 move.w %d0,%d1 /* combine 1st low word with 2nd high word */
677 swap %d1 /* swap words of 1st long */
678 cmp.l %a2,%a0 /* run %a0 down to first line bound */
679 jhi .lines_lo10r_loop
680
681 jra .lines_lo10r_end /* handle trailing longwords */
682
683 /* word aligned destination (line - 10): use line bursts in the loop */
684.lines_lo6r_start:
685 sub.l %d7,%a0
686 movem.l (%a0),%d0-%d3 /* load initial line */
687 move.w %d3,-(%a1) /* store last low word */
688 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
689 swap %d3 /* swap words of 3rd long */
690 move.l %d3,-(%a1) /* store 3rd longword */
691 jra .lines_lo6r_entry /* jump into main loop */
692
693.lines_lo6r_loop:
694 move.l %d0,%d4 /* move first 3 longwords of old line away */
695 move.l %d1,%d5
696 move.l %d2,%d6
697 sub.l %d7,%a0
698 movem.l (%a0),%d0-%d3 /* load line */
699 move.w %d3,%d4 /* combine 4th low word with old high word */
700 swap %d4 /* swap words of 4th long */
701 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
702 swap %d3 /* swap words of 3rd long */
703 sub.l %d7,%a1
704 movem.l %d3-%d6,(%a1) /* store line */
705.lines_lo6r_entry:
706 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
707 swap %d2 /* swap words of 2nd long */
708 move.w %d0,%d1 /* combine 1st low word with 2nd high word */
709 swap %d1 /* swap words of 1st long */
710 cmp.l %a2,%a0 /* run %a0 down to first line bound */
711 jhi .lines_lo6r_loop
712
713 jra .lines_lo6r_end /* handle trailing longwords */
714
715 /* word aligned destination (line - 14): use line bursts in the loop */
716.lines_lo2r_start:
717 sub.l %d7,%a0
718 movem.l (%a0),%d0-%d3 /* load initial line */
719 move.w %d3,-(%a1) /* store last low word */
720 jra .lines_lo2r_entry /* jump into main loop */
721
722.lines_lo2r_loop:
723 move.l %d0,%d4 /* move old line away */
724 move.l %d1,%d5
725 move.l %d2,%d6
726 move.l %d3,%d7
727 lea.l (-16,%a0),%a0
728 movem.l (%a0),%d0-%d3 /* load line */
729 move.w %d3,%d4 /* combine 4th low word with old high word */
730 swap %d4 /* swap words of 4th long */
731 lea.l (-16,%a1),%a1
732 movem.l %d4-%d7,(%a1) /* store line */
733.lines_lo2r_entry:
734 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
735 swap %d3 /* swap words of 3rd long */
736 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
737 swap %d2 /* swap words of 2nd long */
738 move.w %d0,%d1 /* combine 1st low word with 2nd high word */
739 swap %d1 /* swap words of 1st long */
740 cmp.l %a2,%a0 /* run %a0 down to first line bound */
741 jhi .lines_lo2r_loop
742
743 /* word aligned destination (line - 2/6/10/14): tail */
744.lines_lo2r_end:
745 move.l %d3,-(%a1) /* store third last longword */
746.lines_lo6r_end:
747 move.l %d2,-(%a1) /* store second last longword */
748.lines_lo10r_end:
749 move.l %d1,-(%a1) /* store last longword */
750.lines_lo14r_end:
751 lea.l (-12,%a2),%a2 /* readjust start address for doing longwords */
752 cmp.l %a2,%a0 /* any trailing longwords? */
753 jls .lines_do2r_tail_end /* no: skip tail loop */
754
755.lines_do2r_tail_loop:
756 move.l %d0,%d1 /* move old longword away */
757 move.l -(%a0),%d0 /* load new longword */
758 move.w %d0,%d1 /* combine low word with old high word */
759 swap %d1 /* swap words */
760 move.l %d1,-(%a1) /* store longword */
761 cmp.l %a2,%a0 /* run %a0 down to first long bound */
762 jhi .lines_do2r_tail_loop
763
764.lines_do2r_tail_end:
765 swap %d0 /* get final high word */
766 move.w %d0,-(%a1) /* store it */
767 /* jra .linesr_end implicit */
768
769#else /* !FULLSPEED */
770
771 /* word aligned destination (long - 2): use line burst reads in the loop */
772.lines_do2r_start:
773 cmp.l %d1,%a0 /* any leading longwords? */
774 jhi .lines_do2r_head_start /* yes: leading longword copy */
775
776 lea.l (-16,%a0),%a0
777 movem.l (%a0),%d3-%d6 /* load initial line */
778 move.w %d6,-(%a1) /* store last low word */
779 jra .lines_do2r_entry /* jump into main loop */
780
781.lines_do2r_head_start:
782 move.l -(%a0),%d3 /* load initial longword */
783 move.w %d3,-(%a1) /* store low word */
784 cmp.l %d1,%a0 /* any full longword? */
785 jls .lines_do2r_loop /* no: skip head loop */
786
787.lines_do2r_head_loop:
788 move.l %d3,%d4 /* move old longword away */
789 move.l -(%a0),%d3 /* load new longword */
790 move.w %d3,%d4 /* combine low word with old high word */
791 swap %d4 /* swap words */
792 move.l %d4,-(%a1) /* store longword */
793 cmp.l %d1,%a0 /* run %a0 down to last line bound */
794 jhi .lines_do2r_head_loop
795
796.lines_do2r_loop:
797 move.l %d3,%d7 /* move first longword of old line away */
798 lea.l (-16,%a0),%a0
799 movem.l (%a0),%d3-%d6 /* load line */
800 move.w %d6,%d7 /* combine 4th low word with old high word */
801 swap %d7 /* swap words of 4th long */
802 move.l %d7,-(%a1) /* store 4th longword */
803.lines_do2r_entry:
804 move.w %d5,%d6 /* combine 3rd low word with 4th high word */
805 swap %d6 /* swap words of 3rd long */
806 move.l %d6,-(%a1) /* store 3rd longword */
807 move.w %d4,%d5 /* combine 2nd low word with 3rd high word */
808 swap %d5 /* swap words of 2nd long */
809 move.l %d5,-(%a1) /* store 2nd longword */
810 move.w %d3,%d4 /* combine 1st low word with 2nd high word */
811 swap %d4 /* swap words of 1st long */
812 move.l %d4,-(%a1) /* store 1st longword */
813 cmp.l %a2,%a0 /* run %a0 down to first line bound */
814 jhi .lines_do2r_loop
815
816.lines_do2r_end:
817 lea.l (-12,%a2),%a2 /* readjust start address for doing longwords */
818 cmp.l %a2,%a0 /* any trailing longwords? */
819 jls .lines_do2r_tail_end /* no: skip tail loop */
820
821.lines_do2r_tail_loop:
822 move.l %d3,%d4 /* move old longword away */
823 move.l -(%a0),%d3 /* load new longword */
824 move.w %d3,%d4 /* combine low word with old high word */
825 swap %d4 /* swap words */
826 move.l %d4,-(%a1) /* store longword */
827 cmp.l %a2,%a0 /* run %a0 down to first long bound */
828 jhi .lines_do2r_tail_loop
829
830.lines_do2r_tail_end:
831 swap %d3 /* get final high word */
832 move.w %d3,-(%a1) /* store it */
833 /* jra .linesr_end implicit */
834
835#endif /* !FULLSPEED */
836
837.linesr_end:
838 subq.l #3,%a2 /* readjust end address */
839 move.l %a2,%d0 /* start address in %d0 again */
840 movem.l (%sp),%d2-%d7/%a2 /* restore registers */
841 lea.l (28,%sp),%sp
842 jra .bytes2r_start /* jump to trailing byte loop */
843
844.longr_start:
845 addq.l #3,%d0 /* adjust start address for doing 4 bytes/ pass */
846
847 /* longword copy loop - no lines */
848.longr_loop:
849 move.l -(%a0),-(%a1) /* copy longword (write can be unaligned) */
850 cmp.l %d0,%a0 /* runs %a0 down to first long bound */
851 jhi .longr_loop
852
853 subq.l #3,%d0 /* readjust start address */
854 cmp.l %d0,%a0 /* any bytes left? */
855 jls .bytes2r_end /* no: skip trailing byte loop */
856
857 /* trailing byte loop */
858.bytes2r_loop:
859 move.b -(%a0),-(%a1) /* copy byte */
860.bytes2r_start:
861 cmp.l %d0,%a0 /* runs %a0 down to start address */
862 jhi .bytes2r_loop
863
864.bytes2r_end:
865 rts /* returns start address */
866
867.end:
868 .size memmove,.end-memmove
869#endif