summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--firmware/common/memcpy_a.S69
1 files changed, 32 insertions, 37 deletions
diff --git a/firmware/common/memcpy_a.S b/firmware/common/memcpy_a.S
index 81cced187f..125c46a505 100644
--- a/firmware/common/memcpy_a.S
+++ b/firmware/common/memcpy_a.S
@@ -78,13 +78,12 @@ _memcpy:
78 78
79 /* selector for main copy loop */ 79 /* selector for main copy loop */
80.end_b1: 80.end_b1:
81 mov r6,r3 /* move end address to r3 */
82 mov #3,r1 81 mov #3,r1
83 and r4,r1 /* r1 = dest alignment offset */ 82 and r4,r1 /* r1 = dest alignment offset */
84 sub r1,r4 /* r4 now long aligned */
85 mova .jmptab,r0 83 mova .jmptab,r0
86 mov.b @(r0,r1),r1 /* select appropriate main loop */ 84 mov.b @(r0,r1),r1 /* select appropriate main loop */
87 add r0,r1 85 add r0,r1
86 mov r6,r3 /* move end address to r3 */
88 jmp @r1 /* and jump to it */ 87 jmp @r1 /* and jump to it */
89 add #-7,r3 /* adjust end addr for main loops doing 2 longs/pass */ 88 add #-7,r3 /* adjust end addr for main loops doing 2 longs/pass */
90 89
@@ -94,11 +93,11 @@ _memcpy:
94 .align 2 93 .align 2
95.loop_do0: 94.loop_do0:
96 mov.l @r5+,r1 /* load first long & increment source addr */ 95 mov.l @r5+,r1 /* load first long & increment source addr */
97 add #8,r4 /* increment dest addr */ 96 add #16,r4 /* increment dest addr & account for decrementing stores */
98 mov.l @r5+,r0 /* load second long & increment source addr */ 97 mov.l @r5+,r0 /* load second long & increment source addr */
99 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ 98 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */
100 mov.l r1,@r4 /* store first long */ 99 mov.l r0,@-r4 /* store second long */
101 mov.l r0,@(4,r4) /* store second long; NOT ALIGNED - no speed loss here! */ 100 mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */
102 bt .loop_do0 101 bt .loop_do0
103 102
104 add #4,r3 /* readjust end address */ 103 add #4,r3 /* readjust end address */
@@ -109,22 +108,21 @@ _memcpy:
109 add #4,r4 /* increment dest addr */ 108 add #4,r4 /* increment dest addr */
110 bra .start_b2 /* jump to trailing byte loop */ 109 bra .start_b2 /* jump to trailing byte loop */
111 mov.l r0,@(4,r4) /* store last long */ 110 mov.l r0,@(4,r4) /* store last long */
112 111
113 /* word aligned destination (long + 2) */ 112 /* word aligned destination (long + 2) */
114 .align 2 113 .align 2
115.loop_do2: 114.loop_do2:
116 mov.l @r5+,r1 /* load first long & increment source addr */ 115 mov.l @r5+,r1 /* load first long & increment source addr */
117 add #8,r4 /* increment dest addr */ 116 add #16,r4 /* increment dest addr */
118 mov.l @r5+,r0 /* load second long & increment source addr */ 117 mov.l @r5+,r0 /* load second long & increment source addr */
119 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ 118 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */
120 mov.w r0,@(8,r4) /* store low word of second long */ 119 mov.w r0,@-r4 /* store low word of second long */
121 xtrct r1,r0 /* extract low word of first long & high word of second long */ 120 xtrct r1,r0 /* extract low word of first long & high word of second long */
122 mov.l r0,@(4,r4) /* and store as long */ 121 mov.l r0,@-r4 /* and store as long */
123 swap.w r1,r0 /* get high word of first long */ 122 swap.w r1,r0 /* get high word of first long */
124 mov.w r0,@(2,r4) /* and store it */ 123 mov.w r0,@-r4 /* and store it */
125 bt .loop_do2 124 bt .loop_do2
126 125
127 add #2,r4 /* readjust destination */
128 add #4,r3 /* readjust end address */ 126 add #4,r3 /* readjust end address */
129 cmp/hi r5,r3 /* one long left? */ 127 cmp/hi r5,r3 /* one long left? */
130 bf .start_b2 /* no, jump to trailing byte loop */ 128 bf .start_b2 /* no, jump to trailing byte loop */
@@ -148,62 +146,59 @@ _memcpy:
148 .align 2 146 .align 2
149.loop_do1: 147.loop_do1:
150 mov.l @r5+,r1 /* load first long & increment source addr */ 148 mov.l @r5+,r1 /* load first long & increment source addr */
151 add #8,r4 /* increment dest addr */ 149 add #16,r4 /* increment dest addr */
152 mov.l @r5+,r0 /* load second long & increment source addr */ 150 mov.l @r5+,r0 /* load second long & increment source addr */
153 mov r1,r2 /* copy first long */ 151 mov r1,r2 /* copy first long */
154 mov.b r0,@(8,r4) /* store low byte of second long */ 152 mov.b r0,@-r4 /* store low byte of second long */
155 shlr8 r0 /* get upper 3 bytes */ 153 shlr8 r0 /* get upper 3 bytes */
156 shll16 r2 /* move low byte of first long all the way up, .. */ 154 shll16 r2 /* move low byte of first long all the way up, .. */
157 shll8 r2 155 shll8 r2
158 or r0,r2 /* ..combine with the 3 bytes of second long.. */ 156 or r2,r0 /* ..combine with the 3 bytes of second long.. */
159 mov r1,r0 /* copy first long to r0 */
160 mov.l r2,@(4,r4) /* ..and store as long */
161 shlr8 r0 /* get middle 2 bytes */
162 mov.w r0,@(2,r4) /* store as word */
163 shlr16 r0 /* get upper byte */
164 mov.b r0,@(1,r4) /* and store */
165 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ 157 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */
158 mov.l r0,@-r4 /* ..and store as long */
159 shlr8 r1 /* get middle 2 bytes */
160 mov.w r1,@-r4 /* store as word */
161 shlr16 r1 /* get upper byte */
162 mov.b r1,@-r4 /* and store */
166 bt .loop_do1 163 bt .loop_do1
167 164
168.last_do13:
169 add #4,r3 /* readjust end address */ 165 add #4,r3 /* readjust end address */
166.last_do13:
170 cmp/hi r5,r3 /* one long left? */ 167 cmp/hi r5,r3 /* one long left? */
171 bf .end_do13 /* no, get out of here */ 168 bf .start_b2 /* no, jump to trailing byte loop */
172 169
173 mov.l @r5+,r0 /* load last long & increment source addr */ 170 mov.l @r5+,r0 /* load last long & increment source addr */
174 add #4,r4 /* increment dest addr */ 171 add #12,r4 /* increment dest addr */
175 mov.b r0,@(8,r4) /* store low byte */ 172 mov.b r0,@-r4 /* store low byte */
176 shlr8 r0 /* get middle 2 bytes */ 173 shlr8 r0 /* get middle 2 bytes */
177 mov.w r0,@(6,r4) /* store as word */ 174 mov.w r0,@-r4 /* store as word */
178 shlr16 r0 /* get upper byte */ 175 shlr16 r0 /* get upper byte */
179 mov.b r0,@(5,r4) /* and store */ 176 mov.b r0,@-r4 /* and store */
180
181.end_do13:
182 bra .start_b2 /* jump to trailing byte loop */ 177 bra .start_b2 /* jump to trailing byte loop */
183 add #1,r4 /* readjust destination */ 178 add #-4,r4 /* readjust destination */
184 179
185 /* byte aligned destination (long + 3) */ 180 /* byte aligned destination (long + 3) */
186 .align 2 181 .align 2
187.loop_do3: 182.loop_do3:
188 mov.l @r5+,r1 /* load first long & increment source addr */ 183 mov.l @r5+,r1 /* load first long & increment source addr */
189 add #8,r4 /* increment dest addr */ 184 add #16,r4 /* increment dest addr */
190 mov.l @r5+,r0 /* load second long & increment source addr */ 185 mov.l @r5+,r0 /* load second long & increment source addr */
191 mov r1,r2 /* copy first long */ 186 mov r1,r2 /* copy first long */
192 mov.b r0,@(10,r4) /* store low byte of second long */ 187 mov.b r0,@-r4 /* store low byte of second long */
193 shlr8 r0 /* get middle 2 bytes */ 188 shlr8 r0 /* get middle 2 bytes */
194 mov.w r0,@(8,r4) /* store as word */ 189 mov.w r0,@-r4 /* store as word */
195 shlr16 r0 /* get upper byte */ 190 shlr16 r0 /* get upper byte */
196 shll8 r2 /* move lower 3 bytes of first long one up.. */ 191 shll8 r2 /* move lower 3 bytes of first long one up.. */
197 or r2,r0 /* ..combine with the 1 byte of second long.. */ 192 or r2,r0 /* ..combine with the 1 byte of second long.. */
198 mov.l r0,@(4,r4) /* ..and store as long */ 193 mov.l r0,@-r4 /* ..and store as long */
199 swap.w r1,r0 /* swap-copy first long */ 194 shlr16 r1 /* get upper byte of first long.. */
200 shlr8 r0 /* get original upper byte.. */ 195 shlr8 r1
201 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ 196 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */
202 mov.b r0,@(3,r4) /* ..and store */ 197 mov.b r1,@-r4 /* ..and store */
203 bt .loop_do3 198 bt .loop_do3
204 199
205 bra .last_do13 /* handle last longword: reuse routine for (long + 1) */ 200 bra .last_do13 /* handle last longword: reuse routine for (long + 1) */
206 add #2,r4 /* correct the offset difference to do1 */ 201 add #4,r3 /* readjust end address */
207 202
208 /* trailing byte loop: copies 0..3 bytes (or all for < 11 in total) */ 203 /* trailing byte loop: copies 0..3 bytes (or all for < 11 in total) */
209 .align 2 204 .align 2