summaryrefslogtreecommitdiff
path: root/firmware/common/memmove_a.S
diff options
context:
space:
mode:
Diffstat (limited to 'firmware/common/memmove_a.S')
-rwxr-xr-xfirmware/common/memmove_a.S869
1 files changed, 869 insertions, 0 deletions
diff --git a/firmware/common/memmove_a.S b/firmware/common/memmove_a.S
new file mode 100755
index 0000000000..d7421333df
--- /dev/null
+++ b/firmware/common/memmove_a.S
@@ -0,0 +1,869 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 by Jens Arnold
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19#include "config.h"
20
21 .section .icode,"ax",@progbits
22
23#if CONFIG_CPU == SH7034
24 .align 2
25 .global _memmove
26 .type _memmove,@function
27
28/* Moves <length> bytes of data in memory from <source> to <dest>
29 * Regions may overlap.
30 * This version is optimized for speed, and needs the corresponding memcpy
31 * implementation for the forward copy branch.
32 *
33 * arguments:
34 * r4 - destination address
35 * r5 - source address
36 * r6 - length
37 *
38 * return value:
39 * r0 - destination address (like ANSI version)
40 *
41 * register usage:
42 * r0 - data / scratch
43 * r1 - 2nd data / scratch
44 * r2 - scratch
45 * r3 - last long bound / adjusted start address (only if >= 11 bytes)
46 * r4 - current dest address
47 * r5 - source start address
48 * r6 - current source address
49 *
50 * The instruction order is devised in a way to utilize the pipelining
51 * of the SH1 to the max. The routine also tries to utilize fast page mode.
52 */
53
54_memmove:
55 cmp/hi r4,r5 /* source > destination */
56 bf .backward /* no: backward copy */
57 mov.l .memcpy_fwd,r0
58 jmp @r0
59 mov r4,r7 /* store dest for returning */
60
61 .align 2
62.memcpy_fwd:
63 .long ___memcpy_fwd_entry
64
65.backward:
66 add r6,r4 /* r4 = destination end */
67 mov #11,r0
68 cmp/hs r0,r6 /* at least 11 bytes to copy? (ensures 2 aligned longs) */
69 add #-8,r5 /* adjust for late decrement (max. 2 longs) */
70 add r5,r6 /* r6 = source end - 8 */
71 bf .start_b2r /* no: jump directly to byte loop */
72
73 mov #-4,r3 /* r3 = 0xfffffffc */
74 and r6,r3 /* r3 = last source long bound */
75 cmp/hi r3,r6 /* already aligned? */
76 bf .end_b1r /* yes: skip leading byte loop */
77
78.loop_b1r:
79 mov.b @(7,r6),r0 /* load byte */
80 add #-1,r6 /* decrement source addr */
81 mov.b r0,@-r4 /* store byte */
82 cmp/hi r3,r6 /* runs r6 down to last long bound */
83 bt .loop_b1r
84
85.end_b1r:
86 mov #3,r1
87 and r4,r1 /* r1 = dest alignment offset */
88 mova .jmptab_r,r0
89 mov.b @(r0,r1),r1 /* select appropriate main loop.. */
90 add r0,r1
91 mov r5,r3 /* copy start adress to r3 */
92 jmp @r1 /* ..and jump to it */
93 add #7,r3 /* adjust end addr for main loops doing 2 longs/pass */
94
95 /** main loops, copying 2 longs per pass to profit from fast page mode **/
96
97 /* long aligned destination (fastest) */
98 .align 2
99.loop_do0r:
100 mov.l @r6,r1 /* load first long */
101 add #-8,r6 /* decrement source addr */
102 mov.l @(12,r6),r0 /* load second long */
103 cmp/hi r3,r6 /* runs r6 down to first or second long bound */
104 mov.l r0,@-r4 /* store second long */
105 mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */
106 bt .loop_do0r
107
108 add #-4,r3 /* readjust end address */
109 cmp/hi r3,r6 /* first long left? */
110 bf .start_b2r /* no, jump to trailing byte loop */
111
112 mov.l @(4,r6),r0 /* load first long */
113 add #-4,r6 /* decrement source addr */
114 bra .start_b2r /* jump to trailing byte loop */
115 mov.l r0,@-r4 /* store first long */
116
117 /* word aligned destination (long + 2) */
118 .align 2
119.loop_do2r:
120 mov.l @r6,r1 /* load first long */
121 add #-8,r6 /* decrement source addr */
122 mov.l @(12,r6),r0 /* load second long */
123 cmp/hi r3,r6 /* runs r6 down to first or second long bound */
124 mov.w r0,@-r4 /* store low word of second long */
125 xtrct r1,r0 /* extract low word of first long & high word of second long */
126 mov.l r0,@-r4 /* and store as long */
127 shlr16 r1 /* get high word of first long */
128 mov.w r1,@-r4 /* and store it */
129 bt .loop_do2r
130
131 add #-4,r3 /* readjust end address */
132 cmp/hi r3,r6 /* first long left? */
133 bf .start_b2r /* no, jump to trailing byte loop */
134
135 mov.l @(4,r6),r0 /* load first long & decrement source addr */
136 add #-4,r6 /* decrement source addr */
137 mov.w r0,@-r4 /* store low word */
138 shlr16 r0 /* get high word */
139 bra .start_b2r /* jump to trailing byte loop */
140 mov.w r0,@-r4 /* and store it */
141
142 /* jumptable for loop selector */
143 .align 2
144.jmptab_r:
145 .byte .loop_do0r - .jmptab_r /* placed in the middle because the SH1 */
146 .byte .loop_do1r - .jmptab_r /* loads bytes sign-extended. Otherwise */
147 .byte .loop_do2r - .jmptab_r /* the last loop would be out of reach */
148 .byte .loop_do3r - .jmptab_r /* of the offset range. */
149
150 /* byte aligned destination (long + 1) */
151 .align 2
152.loop_do1r:
153 mov.l @r6,r1 /* load first long */
154 add #-8,r6 /* decrement source addr */
155 mov.l @(12,r6),r0 /* load second long */
156 cmp/hi r3,r6 /* runs r6 down to first or second long bound */
157 mov.b r0,@-r4 /* store low byte of second long */
158 shlr8 r0 /* get upper 3 bytes */
159 mov r1,r2 /* copy first long */
160 shll16 r2 /* move low byte of first long all the way up, .. */
161 shll8 r2
162 or r2,r0 /* ..combine with the 3 bytes of second long.. */
163 mov.l r0,@-r4 /* ..and store as long */
164 shlr8 r1 /* get middle 2 bytes */
165 mov.w r1,@-r4 /* store as word */
166 shlr16 r1 /* get upper byte */
167 mov.b r1,@-r4 /* and store */
168 bt .loop_do1r
169
170 add #-4,r3 /* readjust end address */
171.last_do13r:
172 cmp/hi r3,r6 /* first long left? */
173 bf .start_b2r /* no, jump to trailing byte loop */
174
175 nop /* alignment */
176 mov.l @(4,r6),r0 /* load first long */
177 add #-4,r6 /* decrement source addr */
178 mov.b r0,@-r4 /* store low byte */
179 shlr8 r0 /* get middle 2 bytes */
180 mov.w r0,@-r4 /* store as word */
181 shlr16 r0 /* get upper byte */
182 bra .start_b2r /* jump to trailing byte loop */
183 mov.b r0,@-r4 /* and store */
184
185 /* byte aligned destination (long + 3) */
186 .align 2
187.loop_do3r:
188 mov.l @r6,r1 /* load first long */
189 add #-8,r6 /* decrement source addr */
190 mov.l @(12,r6),r0 /* load second long */
191 mov r1,r2 /* copy first long */
192 mov.b r0,@-r4 /* store low byte of second long */
193 shlr8 r0 /* get middle 2 bytes */
194 mov.w r0,@-r4 /* store as word */
195 shlr16 r0 /* get upper byte */
196 shll8 r2 /* move lower 3 bytes of first long one up.. */
197 or r2,r0 /* ..combine with the 1 byte of second long.. */
198 mov.l r0,@-r4 /* ..and store as long */
199 shlr16 r1 /* get upper byte of first long */
200 shlr8 r1
201 cmp/hi r3,r6 /* runs r6 down to first or second long bound */
202 mov.b r1,@-r4 /* ..and store */
203 bt .loop_do3r
204
205 bra .last_do13r /* handle first longword: reuse routine for (long + 1) */
206 add #-4,r3 /* readjust end address */
207
208 /* trailing byte loop: copies 0..3 bytes (or all for < 11 in total) */
209 .align 2
210.loop_b2r:
211 mov.b @(7,r6),r0 /* load byte */
212 add #-1,r6 /* decrement source addr */
213 mov.b r0,@-r4 /* store byte */
214.start_b2r:
215 cmp/hi r5,r6 /* runs r6 down to start address */
216 bt .loop_b2r
217
218 rts
219 mov r4,r0 /* return dest start address */
220.end:
221 .size _memmove,.end-_memmove
222#elif defined(CPU_COLDFIRE)
223#define FULLSPEED /* use burst writing for word aligned destinations */
224 .align 2
225 .global memmove
226 .type memmove,@function
227
228/* Moves <length> bytes of data in memory from <source> to <dest>
229 * Regions may overlap.
230 * This version is optimized for speed, and needs the corresponding memcpy
231 * implementation for the forward copy branch.
232 *
233 * arguments:
234 * (4,%sp) - destination address
235 * (8,%sp) - source address
236 * (12,%sp) - length
237 *
238 * return value:
239 * %d0 - destination address (like ANSI version)
240 *
241 * register usage:
242 * %a0 - current source address
243 * %a1 - current dest address
244 * %a2 - source start address (in line-copy loops)
245 * %d0 - source start address (byte and longword copy) / data / scratch
246 * %d1 - data / scratch
247 * %d2 - data / scratch
248 * %d3..%d7 - data
249 *
250 * For maximum speed this routine reads and writes whole lines using burst
251 * move (movem.l) where possible. For byte aligned destinations (long-1 and
252 * long-3) it writes longwords only. Same goes for word aligned destinations
253 * if FULLSPEED is undefined.
254 */
255memmove:
256 move.l (4,%sp),%a1 /* Destination */
257 move.l (8,%sp),%a0 /* Source */
258 move.l (12,%sp),%d1 /* Length */
259
260 cmp.l %a0,%a1
261 bhi.b .backward /* dest > src -> backward copy */
262 jmp __memcpy_fwd_entry
263
264.backward:
265 move.l %a0,%d0 /* %d0 = source start */
266 add.l %d1,%a0 /* %a0 = source end */
267 add.l %d1,%a1 /* %a1 = destination end */
268
269 move.l %a0,%d1
270 and.l #0xFFFFFFFC,%d1 /* %d1 = last source long bound */
271 subq.l #4,%d1
272 cmp.l %d0,%d1 /* at least one aligned longword to copy? */
273 blo.w .bytes2r_start
274
275 addq.l #4,%d1 /* %d1 = last source long bound */
276 cmp.l %d1,%a0 /* any bytes to copy */
277 jls .bytes1r_end /* no: skip byte loop */
278
279 /* leading byte loop: copies 0..3 bytes */
280.bytes1r_loop:
281 move.b -(%a0),-(%a1) /* copy byte */
282 cmp.l %d1,%a0 /* runs %a0 down to last long bound */
283 jhi .bytes1r_loop
284
285.bytes1r_end:
286 moveq.l #-16,%d1
287 add.l %a0,%d1
288 and.l #0xFFFFFFF0,%d1 /* %d1 = last source line bound - 16 */
289 cmp.l %d0,%d1 /* at least one aligned line to copy? */
290 blo.w .longr_start /* no: jump to longword copy loop */
291
292 lea.l (-28,%sp),%sp /* free up some registers */
293 movem.l %d2-%d7/%a2,(%sp)
294
295 moveq.l #16,%d2
296 add.l %d2,%d1 /* %d1 = last source line bound */
297 move.l %d0,%a2 /* %a2 = start address */
298 lea.l (15,%a2),%a2 /* adjust start address for loops doing 16 bytes/pass */
299 move.l %a1,%d0
300 moveq.l #3,%d2 /* mask */
301 and.l %d2,%d0
302 jmp.l (2,%pc,%d0.l*4) /* switch (dest_addr & 3) */
303 bra.w .lines_do0r_start
304 bra.w .lines_do1r_start
305 bra.w .lines_do2r_start
306 /* bra.w .lines_do3r_start implicit */
307
308 /* byte aligned destination (long - 1): use line burst reads in main loop */
309.lines_do3r_start:
310 moveq.l #24,%d0 /* shift count for shifting by 3 bytes */
311 cmp.l %d1,%a0 /* any leading longwords? */
312 jhi .lines_do3r_head_start /* yes: leading longword copy */
313
314 lea.l (-16,%a0),%a0
315 movem.l (%a0),%d3-%d6 /* load initial line */
316 move.l %d6,%d2 /* last longword, bytes 3210 */
317 move.b %d2,-(%a1) /* store byte */
318 lsr.l #8,%d2 /* last longword, bytes .321 */
319 move.w %d2,-(%a1) /* store word */
320 jra .lines_do3r_entry
321
322.lines_do3r_head_start:
323 move.l -(%a0),%d3 /* load initial longword */
324 move.l %d3,%d2 /* bytes 3210 */
325 move.b %d2,-(%a1) /* store byte */
326 lsr.l #8,%d2 /* bytes .321 */
327 move.w %d2,-(%a1) /* store word */
328 jra .lines_do3r_head_entry
329
330.lines_do3r_head_loop:
331 move.l %d3,%d4 /* move old longword away */
332 move.l -(%a0),%d3 /* load new longword */
333 move.l %d3,%d2
334 lsl.l #8,%d2 /* get bytes 210. */
335 or.l %d2,%d4 /* combine with old high byte */
336 move.l %d4,-(%a1) /* store longword */
337.lines_do3r_head_entry:
338 lsr.l %d0,%d3 /* shift down high byte */
339 cmp.l %d1,%a0 /* run %a0 down to last line bound */
340 jhi .lines_do3r_head_loop
341
342.lines_do3r_loop:
343 move.l %d3,%d7 /* move first longword of last line away */
344 lea.l (-16,%a0),%a0
345 movem.l (%a0),%d3-%d6 /* load new line */
346 move.l %d6,%d2
347 lsl.l #8,%d2 /* get bytes 210. of 4th longword */
348 or.l %d2,%d7 /* combine with high byte of old longword */
349 move.l %d7,-(%a1) /* store longword */
350.lines_do3r_entry:
351 lsr.l %d0,%d6 /* shift down high byte */
352 move.l %d5,%d2
353 lsl.l #8,%d2 /* get bytes 210. of 3rd longword */
354 or.l %d2,%d6 /* combine with high byte of 4th longword */
355 move.l %d6,-(%a1) /* store longword */
356 lsr.l %d0,%d5 /* shift down high byte */
357 move.l %d4,%d2
358 lsl.l #8,%d2 /* get bytes 210. of 2nd longword */
359 or.l %d2,%d5 /* combine with high byte or 3rd longword */
360 move.l %d5,-(%a1) /* store longword */
361 lsr.l %d0,%d4 /* shift down high byte */
362 move.l %d3,%d2
363 lsl.l #8,%d2 /* get bytes 210. of 1st longword */
364 or.l %d2,%d4 /* combine with high byte of 2nd longword */
365 move.l %d4,-(%a1) /* store longword */
366 lsr.l %d0,%d3 /* shift down high byte */
367 cmp.l %a2,%a0 /* run %a0 down to first line bound */
368 jhi .lines_do3r_loop
369
370 lea.l (-12,%a2),%a2 /* readjust start address for doing longwords */
371 cmp.l %a2,%a0 /* any trailing longwords? */
372 jls .lines_do3r_tail_end /* no: just store last high byte */
373
374.lines_do3r_tail_loop:
375 move.l %d3,%d4 /* move old longword away */
376 move.l -(%a0),%d3 /* load new longword */
377 move.l %d3,%d2
378 lsl.l #8,%d2 /* get bytes 210. */
379 or.l %d2,%d4 /* combine with old high byte */
380 move.l %d4,-(%a1) /* store longword */
381 lsr.l %d0,%d3 /* shift down high byte */
382 cmp.l %a2,%a0 /* run %a0 down to first long bound */
383 jhi .lines_do3r_tail_loop
384
385.lines_do3r_tail_end:
386 move.b %d3,-(%a1) /* store shifted-down high byte */
387 jra .linesr_end
388
389 /* byte aligned destination (long - 3): use line burst reads in main loop */
390.lines_do1r_start:
391 moveq.l #24,%d0 /* shift count for shifting by 3 bytes */
392 cmp.l %d1,%a0 /* any leading longwords? */
393 jhi .lines_do1r_head_start /* yes: leading longword copy */
394
395 lea.l (-16,%a0),%a0
396 movem.l (%a0),%d3-%d6 /* load initial line */
397 move.b %d6,-(%a1) /* store low byte of last longword */
398 jra .lines_do1r_entry
399
400.lines_do1r_head_start:
401 move.l -(%a0),%d3 /* load initial longword */
402 move.b %d3,-(%a1) /* store low byte */
403 jra .lines_do1r_head_entry
404
405.lines_do1r_head_loop:
406 move.l %d3,%d4 /* move old longword away */
407 move.l -(%a0),%d3 /* load new longword */
408 move.l %d3,%d2
409 lsl.l %d0,%d2 /* get low byte */
410 or.l %d2,%d4 /* combine with old bytes .321 */
411 move.l %d4,-(%a1) /* store longword */
412.lines_do1r_head_entry:
413 lsr.l #8,%d3 /* get bytes .321 */
414 cmp.l %d1,%a0 /* run %a0 down to last line bound */
415 jhi .lines_do1r_head_loop
416
417.lines_do1r_loop:
418 move.l %d3,%d7 /* move first longword of old line away */
419 lea.l (-16,%a0),%a0
420 movem.l (%a0),%d3-%d6 /* load new line */
421 move.l %d6,%d2
422 lsl.l %d0,%d2 /* get low byte of 4th longword */
423 or.l %d2,%d7 /* combine with bytes .321 of old longword */
424 move.l %d7,-(%a1) /* store longword */
425.lines_do1r_entry:
426 lsr.l #8,%d6 /* get bytes .321 */
427 move.l %d5,%d2
428 lsl.l %d0,%d2 /* get low byte of 3rd longword */
429 or.l %d2,%d6 /* combine with bytes .321 of 4th longword */
430 move.l %d6,-(%a1) /* store longword */
431 lsr.l #8,%d5 /* get bytes .321 */
432 move.l %d4,%d2
433 lsl.l %d0,%d2 /* get low byte of 2nd longword */
434 or.l %d2,%d5 /* combine with bytes .321 of 3rd longword */
435 move.l %d5,-(%a1) /* store longword */
436 lsr.l #8,%d4 /* get bytes .321 */
437 move.l %d3,%d2
438 lsl.l %d0,%d2 /* get low byte of 1st longword */
439 or.l %d2,%d4 /* combine with bytes .321 of 2nd longword */
440 move.l %d4,-(%a1) /* store longword */
441 lsr.l #8,%d3 /* get bytes .321 */
442 cmp.l %a2,%a0 /* run %a0 down to first line bound */
443 jhi .lines_do1r_loop
444
445 lea.l (-12,%a2),%a2 /* readjust start address for doing longwords */
446 cmp.l %a2,%a0 /* any trailing longwords? */
447 jls .lines_do1r_tail_end /* no: just store last high byte */
448
449.lines_do1r_tail_loop:
450 move.l %d3,%d4 /* move old longword away */
451 move.l -(%a0),%d3 /* load new longword */
452 move.l %d3,%d2
453 lsl.l %d0,%d2 /* get low byte */
454 or.l %d2,%d4 /* combine with old bytes .321 */
455 move.l %d4,-(%a1) /* store longword */
456 lsr.l #8,%d3 /* get bytes .321 */
457 cmp.l %a2,%a0 /* run %a0 down to first long bound */
458 jhi .lines_do1r_tail_loop
459
460.lines_do1r_tail_end:
461 move.w %d3,-(%a1) /* store word 21 */
462 swap %d3
463 move.b %d3,-(%a1) /* store byte 3 */
464 jra .linesr_end
465
466 /* long aligned destination (line - 0/4/8/12): head */
467.lines_do0r_head_loop:
468 move.l -(%a0),-(%a1) /* copy longword */
469.lines_do0r_start:
470 cmp.l %d1,%a0 /* run %a0 down to last line bound */
471 jhi .lines_do0r_head_loop
472
473.lines_do0r_head_end:
474 move.l %a1,%d1
475 lsr.l #2,%d1
476 moveq.l #3,%d0 /* mask */
477 and.l %d0,%d1
478 moveq.l #16,%d0 /* address decrement for one main loop pass */
479 jmp.l (2,%pc,%d1.l*2) /* switch ((dest_addr >> 2) & 3) */
480 bra.b .lines_lo0r_start
481 bra.b .lines_lo4r_start
482 bra.b .lines_lo8r_start
483 /* bra.b .lines_lo12r_start implicit */
484
485 /* long aligned destination (line - 4): use line bursts in the loop */
486.lines_lo12r_start:
487 sub.l %d0,%a0
488 movem.l (%a0),%d1-%d4 /* load initial line */
489 move.l %d4,-(%a1) /* store 4th longword */
490 move.l %d3,-(%a1) /* store 3rd longword */
491 move.l %d2,-(%a1) /* store 2nd longword */
492 cmp.l %a2,%a0 /* any full lines? */
493 jls .lines_lo12r_end /* no: skip main loop */
494
495.lines_lo12r_loop:
496 move.l %d1,%d5 /* move first longword of old line away */
497 sub.l %d0,%a0
498 movem.l (%a0),%d1-%d4 /* load new line */
499 sub.l %d0,%a1
500 movem.l %d2-%d5,(%a1) /* store line (1 old + 3 new longwords */
501 cmp.l %a2,%a0 /* run %a0 down to first line bound */
502 jhi .lines_lo12r_loop
503
504 jra .lines_lo12r_end /* handle trailing longwords */
505
506 /* line aligned destination: use line bursts in the loop */
507.lines_lo0r_start:
508.lines_lo0r_loop:
509 sub.l %d0,%a0
510 movem.l (%a0),%d1-%d4 /* load line */
511 sub.l %d0,%a1
512 movem.l %d1-%d4,(%a1) /* store line */
513 cmp.l %a2,%a0 /* run %a0 down to first line bound */
514 jhi .lines_lo0r_loop
515
516 jra .lines_lo0r_end /* handle trailing longwords */
517
518 /* long aligned destination (line - 8): use line bursts in the loop */
519.lines_lo8r_start:
520 sub.l %d0,%a0
521 movem.l (%a0),%d1-%d4 /* load initial line */
522 move.l %d4,-(%a1) /* store 4th longword */
523 move.l %d3,-(%a1) /* store 3rd longword */
524 cmp.l %a2,%a0 /* any full lines? */
525 jls .lines_lo8r_end /* no: skip main loop */
526
527.lines_lo8r_loop:
528 move.l %d2,%d6 /* move first 2 longwords of old line away */
529 move.l %d1,%d5
530 sub.l %d0,%a0
531 movem.l (%a0),%d1-%d4 /* load new line */
532 sub.l %d0,%a1
533 movem.l %d3-%d6,(%a1) /* store line (2 old + 2 new longwords */
534 cmp.l %a2,%a0 /* run %a0 down to first line bound */
535 jhi .lines_lo8r_loop
536
537 jra .lines_lo8r_end /* handle trailing longwords */
538
539 /* long aligned destination (line - 12): use line bursts in the loop */
540.lines_lo4r_start:
541 sub.l %d0,%a0
542 movem.l (%a0),%d1-%d4 /* load initial line */
543 move.l %d4,-(%a1) /* store 4th longword */
544 cmp.l %a2,%a0 /* any full lines? */
545 jls .lines_lo4r_end /* no: skip main loop */
546
547.lines_lo4r_loop:
548 move.l %d3,%d7 /* move first 3 longwords of old line away */
549 move.l %d2,%d6
550 move.l %d1,%d5
551 sub.l %d0,%a0
552 movem.l (%a0),%d1-%d4 /* load new line */
553 sub.l %d0,%a1
554 movem.l %d4-%d7,(%a1) /* store line (3 old + 1 new longwords */
555 cmp.l %a2,%a0 /* run %a0 down to first line bound */
556 jhi .lines_lo4r_loop
557
558 /* long aligned destination (line - 0/4/8/12): tail */
559.lines_lo4r_end:
560 move.l %d3,-(%a1) /* store 3rd last longword */
561.lines_lo8r_end:
562 move.l %d2,-(%a1) /* store 2nd last longword */
563.lines_lo12r_end:
564 move.l %d1,-(%a1) /* store last longword */
565.lines_lo0r_end:
566 lea.l (-12,%a2),%a2 /* readjust end address for doing longwords */
567 cmp.l %a2,%a0 /* any trailing longwords? */
568 jls .linesr_end /* no: get outta here */
569
570.lines_do0r_tail_loop:
571 move.l -(%a0),-(%a1) /* copy longword */
572 cmp.l %a2,%a0 /* run %a0 down to first long bound */
573 jhi .lines_do0r_tail_loop
574
575 jra .linesr_end
576
577#ifdef FULLSPEED
578 /* word aligned destination (line - 2/6/10/14): head */
579.lines_do2r_start:
580 cmp.l %d1,%a0 /* any leading longwords? */
581 jls .lines_do2r_selector /* no: jump to mainloop selector */
582
583 move.l -(%a0),%d3 /* load initial longword */
584 move.w %d3,-(%a1) /* store low word */
585 cmp.l %d1,%a0 /* any more longwords? */
586 jls .lines_do2r_head_end /* no: skip head loop */
587
588.lines_do2r_head_loop:
589 move.l %d3,%d4 /* move old longword away */
590 move.l -(%a0),%d3 /* load new longword */
591 move.w %d3,%d4 /* combine low word with old high word */
592 swap %d4 /* swap words */
593 move.l %d4,-(%a1) /* store longword */
594 cmp.l %d1,%a0 /* run %a0 down to last line bound */
595 jhi .lines_do2r_head_loop
596
597.lines_do2r_head_end:
598 swap %d3 /* get high word */
599 move.w %d3,-(%a1) /* and store it */
600
601.lines_do2r_selector:
602 move.l %a1,%d1
603 lsr.l #2,%d1
604 moveq.l #3,%d0 /* mask */
605 and.l %d0,%d1
606 moveq.l #16,%d7 /* address decrement for one main loop pass */
607 jmp.l (2,%pc,%d1.l*4) /* switch ((dest_addr >> 2) & 3) */
608 bra.w .lines_lo2r_start
609 bra.w .lines_lo6r_start
610 bra.w .lines_lo10r_start
611 /* bra.w .lines_lo14r_start implicit */
612
613 /* word aligned destination (line - 2): use line bursts in the loop */
614.lines_lo14r_start:
615 sub.l %d7,%a0
616 movem.l (%a0),%d0-%d3 /* load initial line */
617 move.w %d3,-(%a1) /* store last low word */
618 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
619 swap %d3 /* swap words of 3rd long */
620 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
621 swap %d2 /* swap words of 2nd long */
622 move.w %d0,%d1 /* combine 1st low word with 2nd high word */
623 swap %d1 /* swap words of 1st long */
624 move.l %d3,-(%a1) /* store 3rd longword */
625 move.l %d2,-(%a1) /* store 2nd longword */
626 move.l %d1,-(%a1) /* store 1st longword */
627 cmp.l %a2,%a0 /* any full lines? */
628 jls .lines_lo14r_end /* no: skip main loop */
629
630.lines_lo14r_loop:
631 move.l %d0,%d4 /* move first longword of old line away */
632 sub.l %d7,%a0
633 movem.l (%a0),%d0-%d3 /* load line */
634 move.w %d3,%d4 /* combine 4th low word with old high word */
635 swap %d4 /* swap words of 4th long */
636 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
637 swap %d3 /* swap words of 3rd long */
638 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
639 swap %d2 /* swap words of 2nd long */
640 move.w %d0,%d1 /* combine 1st low word with 2nd high word */
641 swap %d1 /* swap words of 1st long */
642 sub.l %d7,%a1
643 movem.l %d1-%d4,(%a1) /* store line */
644 cmp.l %a2,%a0 /* run %a0 down to first line bound */
645 jhi .lines_lo14r_loop
646
647 jra .lines_lo14r_end /* handle trailing longwords */
648
649 /* word aligned destination (line - 6): use line bursts in the loop */
650.lines_lo10r_start:
651 sub.l %d7,%a0
652 movem.l (%a0),%d0-%d3 /* load initial line */
653 move.w %d3,-(%a1) /* store last low word */
654 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
655 swap %d3 /* swap words of 3rd long */
656 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
657 swap %d2 /* swap words of 2nd long */
658 move.l %d3,-(%a1) /* store 3rd longword */
659 move.l %d2,-(%a1) /* store 2nd longword */
660 jra .lines_lo10r_entry /* jump into main loop */
661
662.lines_lo10r_loop:
663 move.l %d0,%d4 /* move first 2 longwords of old line away */
664 move.l %d1,%d5
665 sub.l %d7,%a0
666 movem.l (%a0),%d0-%d3 /* load line */
667 move.w %d3,%d4 /* combine 4th low word with old high word */
668 swap %d4 /* swap words of 4th long */
669 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
670 swap %d3 /* swap words of 3rd long */
671 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
672 swap %d2 /* swap words of 2nd long */
673 sub.l %d7,%a1
674 movem.l %d2-%d5,(%a1) /* store line */
675.lines_lo10r_entry:
676 move.w %d0,%d1 /* combine 1st low word with 2nd high word */
677 swap %d1 /* swap words of 1st long */
678 cmp.l %a2,%a0 /* run %a0 down to first line bound */
679 jhi .lines_lo10r_loop
680
681 jra .lines_lo10r_end /* handle trailing longwords */
682
683 /* word aligned destination (line - 10): use line bursts in the loop */
684.lines_lo6r_start:
685 sub.l %d7,%a0
686 movem.l (%a0),%d0-%d3 /* load initial line */
687 move.w %d3,-(%a1) /* store last low word */
688 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
689 swap %d3 /* swap words of 3rd long */
690 move.l %d3,-(%a1) /* store 3rd longword */
691 jra .lines_lo6r_entry /* jump into main loop */
692
693.lines_lo6r_loop:
694 move.l %d0,%d4 /* move first 3 longwords of old line away */
695 move.l %d1,%d5
696 move.l %d2,%d6
697 sub.l %d7,%a0
698 movem.l (%a0),%d0-%d3 /* load line */
699 move.w %d3,%d4 /* combine 4th low word with old high word */
700 swap %d4 /* swap words of 4th long */
701 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
702 swap %d3 /* swap words of 3rd long */
703 sub.l %d7,%a1
704 movem.l %d3-%d6,(%a1) /* store line */
705.lines_lo6r_entry:
706 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
707 swap %d2 /* swap words of 2nd long */
708 move.w %d0,%d1 /* combine 1st low word with 2nd high word */
709 swap %d1 /* swap words of 1st long */
710 cmp.l %a2,%a0 /* run %a0 down to first line bound */
711 jhi .lines_lo6r_loop
712
713 jra .lines_lo6r_end /* handle trailing longwords */
714
715 /* word aligned destination (line - 14): use line bursts in the loop */
716.lines_lo2r_start:
717 sub.l %d7,%a0
718 movem.l (%a0),%d0-%d3 /* load initial line */
719 move.w %d3,-(%a1) /* store last low word */
720 jra .lines_lo2r_entry /* jump into main loop */
721
722.lines_lo2r_loop:
723 move.l %d0,%d4 /* move old line away */
724 move.l %d1,%d5
725 move.l %d2,%d6
726 move.l %d3,%d7
727 lea.l (-16,%a0),%a0
728 movem.l (%a0),%d0-%d3 /* load line */
729 move.w %d3,%d4 /* combine 4th low word with old high word */
730 swap %d4 /* swap words of 4th long */
731 lea.l (-16,%a1),%a1
732 movem.l %d4-%d7,(%a1) /* store line */
733.lines_lo2r_entry:
734 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
735 swap %d3 /* swap words of 3rd long */
736 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
737 swap %d2 /* swap words of 2nd long */
738 move.w %d0,%d1 /* combine 1st low word with 2nd high word */
739 swap %d1 /* swap words of 1st long */
740 cmp.l %a2,%a0 /* run %a0 down to first line bound */
741 jhi .lines_lo2r_loop
742
743 /* word aligned destination (line - 2/6/10/14): tail */
744.lines_lo2r_end:
745 move.l %d3,-(%a1) /* store third last longword */
746.lines_lo6r_end:
747 move.l %d2,-(%a1) /* store second last longword */
748.lines_lo10r_end:
749 move.l %d1,-(%a1) /* store last longword */
750.lines_lo14r_end:
751 lea.l (-12,%a2),%a2 /* readjust start address for doing longwords */
752 cmp.l %a2,%a0 /* any trailing longwords? */
753 jls .lines_do2r_tail_end /* no: skip tail loop */
754
755.lines_do2r_tail_loop:
756 move.l %d0,%d1 /* move old longword away */
757 move.l -(%a0),%d0 /* load new longword */
758 move.w %d0,%d1 /* combine low word with old high word */
759 swap %d1 /* swap words */
760 move.l %d1,-(%a1) /* store longword */
761 cmp.l %a2,%a0 /* run %a0 down to first long bound */
762 jhi .lines_do2r_tail_loop
763
764.lines_do2r_tail_end:
765 swap %d0 /* get final high word */
766 move.w %d0,-(%a1) /* store it */
767 /* jra .linesr_end implicit */
768
769#else /* !FULLSPEED */
770
771 /* word aligned destination (long - 2): use line burst reads in the loop */
772.lines_do2r_start:
773 cmp.l %d1,%a0 /* any leading longwords? */
774 jhi .lines_do2r_head_start /* yes: leading longword copy */
775
776 lea.l (-16,%a0),%a0
777 movem.l (%a0),%d3-%d6 /* load initial line */
778 move.w %d6,-(%a1) /* store last low word */
779 jra .lines_do2r_entry /* jump into main loop */
780
781.lines_do2r_head_start:
782 move.l -(%a0),%d3 /* load initial longword */
783 move.w %d3,-(%a1) /* store low word */
784 cmp.l %d1,%a0 /* any full longword? */
785 jls .lines_do2r_loop /* no: skip head loop */
786
787.lines_do2r_head_loop:
788 move.l %d3,%d4 /* move old longword away */
789 move.l -(%a0),%d3 /* load new longword */
790 move.w %d3,%d4 /* combine low word with old high word */
791 swap %d4 /* swap words */
792 move.l %d4,-(%a1) /* store longword */
793 cmp.l %d1,%a0 /* run %a0 down to last line bound */
794 jhi .lines_do2r_head_loop
795
796.lines_do2r_loop:
797 move.l %d3,%d7 /* move first longword of old line away */
798 lea.l (-16,%a0),%a0
799 movem.l (%a0),%d3-%d6 /* load line */
800 move.w %d6,%d7 /* combine 4th low word with old high word */
801 swap %d7 /* swap words of 4th long */
802 move.l %d7,-(%a1) /* store 4th longword */
803.lines_do2r_entry:
804 move.w %d5,%d6 /* combine 3rd low word with 4th high word */
805 swap %d6 /* swap words of 3rd long */
806 move.l %d6,-(%a1) /* store 3rd longword */
807 move.w %d4,%d5 /* combine 2nd low word with 3rd high word */
808 swap %d5 /* swap words of 2nd long */
809 move.l %d5,-(%a1) /* store 2nd longword */
810 move.w %d3,%d4 /* combine 1st low word with 2nd high word */
811 swap %d4 /* swap words of 1st long */
812 move.l %d4,-(%a1) /* store 1st longword */
813 cmp.l %a2,%a0 /* run %a0 down to first line bound */
814 jhi .lines_do2r_loop
815
816.lines_do2r_end:
817 lea.l (-12,%a2),%a2 /* readjust start address for doing longwords */
818 cmp.l %a2,%a0 /* any trailing longwords? */
819 jls .lines_do2r_tail_end /* no: skip tail loop */
820
821.lines_do2r_tail_loop:
822 move.l %d3,%d4 /* move old longword away */
823 move.l -(%a0),%d3 /* load new longword */
824 move.w %d3,%d4 /* combine low word with old high word */
825 swap %d4 /* swap words */
826 move.l %d4,-(%a1) /* store longword */
827 cmp.l %a2,%a0 /* run %a0 down to first long bound */
828 jhi .lines_do2r_tail_loop
829
830.lines_do2r_tail_end:
831 swap %d3 /* get final high word */
832 move.w %d3,-(%a1) /* store it */
833 /* jra .linesr_end implicit */
834
835#endif /* !FULLSPEED */
836
837.linesr_end:
838 subq.l #3,%a2 /* readjust end address */
839 move.l %a2,%d0 /* start address in %d0 again */
840 movem.l (%sp),%d2-%d7/%a2 /* restore registers */
841 lea.l (28,%sp),%sp
842 jra .bytes2r_start /* jump to trailing byte loop */
843
844.longr_start:
845 addq.l #3,%d0 /* adjust start address for doing 4 bytes/ pass */
846
847 /* longword copy loop - no lines */
848.longr_loop:
849 move.l -(%a0),-(%a1) /* copy longword (write can be unaligned) */
850 cmp.l %d0,%a0 /* runs %a0 down to first long bound */
851 jhi .longr_loop
852
853 subq.l #3,%d0 /* readjust start address */
854 cmp.l %d0,%a0 /* any bytes left? */
855 jls .bytes2r_end /* no: skip trailing byte loop */
856
857 /* trailing byte loop */
858.bytes2r_loop:
859 move.b -(%a0),-(%a1) /* copy byte */
860.bytes2r_start:
861 cmp.l %d0,%a0 /* runs %a0 down to start address */
862 jhi .bytes2r_loop
863
864.bytes2r_end:
865 rts /* returns start address */
866
867.end:
868 .size memmove,.end-memmove
869#endif