summaryrefslogtreecommitdiff
path: root/firmware/common/memcpy_a.S
diff options
context:
space:
mode:
Diffstat (limited to 'firmware/common/memcpy_a.S')
-rw-r--r--firmware/common/memcpy_a.S25
1 files changed, 15 insertions, 10 deletions
diff --git a/firmware/common/memcpy_a.S b/firmware/common/memcpy_a.S
index 7264c964a4..9f6c813be3 100644
--- a/firmware/common/memcpy_a.S
+++ b/firmware/common/memcpy_a.S
@@ -23,6 +23,7 @@
23#if CONFIG_CPU == SH7034 23#if CONFIG_CPU == SH7034
24 .align 2 24 .align 2
25 .global _memcpy 25 .global _memcpy
26 .global ___memcpy_fwd_entry
26 .type _memcpy,@function 27 .type _memcpy,@function
27 28
28/* Copies <length> bytes of data in memory from <source> to <dest> 29/* Copies <length> bytes of data in memory from <source> to <dest>
@@ -46,12 +47,13 @@
46 * r6 - source end address 47 * r6 - source end address
47 * r7 - stored dest start address 48 * r7 - stored dest start address
48 * 49 *
49 * The instruction order below is devised in a way to utilize the pipelining 50 * The instruction order is devised in a way to utilize the pipelining
50 * of the SH1 to the max. The routine also tries to utilize fast page mode. 51 * of the SH1 to the max. The routine also tries to utilize fast page mode.
51 */ 52 */
52 53
53_memcpy: 54_memcpy:
54 mov r4,r7 /* store dest for returning */ 55 mov r4,r7 /* store dest for returning */
56___memcpy_fwd_entry:
55 add #-8,r4 /* offset for early increment (max. 2 longs) */ 57 add #-8,r4 /* offset for early increment (max. 2 longs) */
56 mov #11,r0 58 mov #11,r0
57 cmp/hs r0,r6 /* at least 11 bytes to copy? (ensures 2 aligned longs) */ 59 cmp/hs r0,r6 /* at least 11 bytes to copy? (ensures 2 aligned longs) */
@@ -99,7 +101,7 @@ _memcpy:
99 mov.l r0,@-r4 /* store second long */ 101 mov.l r0,@-r4 /* store second long */
100 mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */ 102 mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */
101 bt .loop_do0 103 bt .loop_do0
102 104
103 add #4,r3 /* readjust end address */ 105 add #4,r3 /* readjust end address */
104 cmp/hi r5,r3 /* one long left? */ 106 cmp/hi r5,r3 /* one long left? */
105 bf .start_b2 /* no, jump to trailing byte loop */ 107 bf .start_b2 /* no, jump to trailing byte loop */
@@ -148,20 +150,20 @@ _memcpy:
148 mov.l @r5+,r1 /* load first long & increment source addr */ 150 mov.l @r5+,r1 /* load first long & increment source addr */
149 add #16,r4 /* increment dest addr */ 151 add #16,r4 /* increment dest addr */
150 mov.l @r5+,r0 /* load second long & increment source addr */ 152 mov.l @r5+,r0 /* load second long & increment source addr */
151 mov r1,r2 /* copy first long */ 153 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */
152 mov.b r0,@-r4 /* store low byte of second long */ 154 mov.b r0,@-r4 /* store low byte of second long */
153 shlr8 r0 /* get upper 3 bytes */ 155 shlr8 r0 /* get upper 3 bytes */
156 mov r1,r2 /* copy first long */
154 shll16 r2 /* move low byte of first long all the way up, .. */ 157 shll16 r2 /* move low byte of first long all the way up, .. */
155 shll8 r2 158 shll8 r2
156 or r2,r0 /* ..combine with the 3 bytes of second long.. */ 159 or r2,r0 /* ..combine with the 3 bytes of second long.. */
157 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */
158 mov.l r0,@-r4 /* ..and store as long */ 160 mov.l r0,@-r4 /* ..and store as long */
159 shlr8 r1 /* get middle 2 bytes */ 161 shlr8 r1 /* get middle 2 bytes */
160 mov.w r1,@-r4 /* store as word */ 162 mov.w r1,@-r4 /* store as word */
161 shlr16 r1 /* get upper byte */ 163 shlr16 r1 /* get upper byte */
162 mov.b r1,@-r4 /* and store */ 164 mov.b r1,@-r4 /* and store */
163 bt .loop_do1 165 bt .loop_do1
164 166
165 add #4,r3 /* readjust end address */ 167 add #4,r3 /* readjust end address */
166.last_do13: 168.last_do13:
167 cmp/hi r5,r3 /* one long left? */ 169 cmp/hi r5,r3 /* one long left? */
@@ -218,6 +220,7 @@ _memcpy:
218#define FULLSPEED /* use burst writing for word aligned destinations */ 220#define FULLSPEED /* use burst writing for word aligned destinations */
219 .align 2 221 .align 2
220 .global memcpy 222 .global memcpy
223 .global __memcpy_fwd_entry
221 .type memcpy,@function 224 .type memcpy,@function
222 225
223/* Copies <length> bytes of data in memory from <source> to <dest> 226/* Copies <length> bytes of data in memory from <source> to <dest>
@@ -249,7 +252,9 @@ memcpy:
249 move.l (4,%sp),%a1 /* Destination */ 252 move.l (4,%sp),%a1 /* Destination */
250 move.l (8,%sp),%a0 /* Source */ 253 move.l (8,%sp),%a0 /* Source */
251 move.l (12,%sp),%d1 /* Length */ 254 move.l (12,%sp),%d1 /* Length */
252 add.l %a0,%d1 /* %d1 = end address */ 255
256__memcpy_fwd_entry:
257 add.l %a0,%d1 /* %d1 = source end */
253 258
254 move.l %a0,%d0 259 move.l %a0,%d0
255 addq.l #7,%d0 260 addq.l #7,%d0
@@ -278,7 +283,7 @@ memcpy:
278 movem.l %d2-%d7/%a2,(%sp) 283 movem.l %d2-%d7/%a2,(%sp)
279 284
280 moveq.l #16,%d2 285 moveq.l #16,%d2
281 sub.l %d2,%d0 /* %d0 = first source long bound */ 286 sub.l %d2,%d0 /* %d0 = first source line bound */
282 move.l %d1,%a2 /* %a2 = end address */ 287 move.l %d1,%a2 /* %a2 = end address */
283 lea.l (-15,%a2),%a2 /* adjust end address for loops doing 16 bytes/ pass */ 288 lea.l (-15,%a2),%a2 /* adjust end address for loops doing 16 bytes/ pass */
284 move.l %a1,%d1 289 move.l %a1,%d1
@@ -507,7 +512,7 @@ memcpy:
507 lea.l (12,%a2),%a2 /* readjust end address for doing longwords */ 512 lea.l (12,%a2),%a2 /* readjust end address for doing longwords */
508 cmp.l %a0,%a2 /* any trailing longwords? */ 513 cmp.l %a0,%a2 /* any trailing longwords? */
509 jls .lines_end /* no: get outta here */ 514 jls .lines_end /* no: get outta here */
510 515
511.lines_do0_tail_loop: 516.lines_do0_tail_loop:
512 move.l (%a0)+,(%a1)+ /* copy longword */ 517 move.l (%a0)+,(%a1)+ /* copy longword */
513 cmp.l %a0,%a2 /* runs %a0 up to last long bound */ 518 cmp.l %a0,%a2 /* runs %a0 up to last long bound */
@@ -610,7 +615,7 @@ memcpy:
610 /* word aligned destination (line + 14): use line bursts in the loop */ 615 /* word aligned destination (line + 14): use line bursts in the loop */
611.lines_lo14_start: 616.lines_lo14_start:
612 movem.l (%a0),%d4-%d7 /* load first line */ 617 movem.l (%a0),%d4-%d7 /* load first line */
613 lea.l (16,%a0),%a0 618 add.l %d0,%a0
614 swap %d4 /* swap words of 1st long */ 619 swap %d4 /* swap words of 1st long */
615 move.w %d4,(%a1)+ /* store word */ 620 move.w %d4,(%a1)+ /* store word */
616 jra .lines_lo14_entry /* jump into main loop */ 621 jra .lines_lo14_entry /* jump into main loop */
@@ -784,7 +789,7 @@ memcpy:
784 move.l (%a0)+,%d7 /* load first longword */ 789 move.l (%a0)+,%d7 /* load first longword */
785 swap %d7 /* swap words */ 790 swap %d7 /* swap words */
786 move.w %d7,(%a1)+ /* store high word */ 791 move.w %d7,(%a1)+ /* store high word */
787 cmp.l %a0,%d0 /* any full lnogword? */ 792 cmp.l %a0,%d0 /* any full longword? */
788 jls .lines_do2_loop /* no: skip head loop */ 793 jls .lines_do2_loop /* no: skip head loop */
789 794
790.lines_do2_head_loop: 795.lines_do2_head_loop: