diff options
author | Jens Arnold <amiconn@rockbox.org> | 2005-10-31 01:10:27 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2005-10-31 01:10:27 +0000 |
commit | d8ad74de74f89fbbf5b1824b9f01878f7b5679ce (patch) | |
tree | 1fe61dfca968dd8c655bf691ef5d635eea3c83d8 | |
parent | 162ab7baeb4ac7a98d99ba6b78b300d742311301 (diff) | |
download | rockbox-d8ad74de74f89fbbf5b1824b9f01878f7b5679ce.tar.gz rockbox-d8ad74de74f89fbbf5b1824b9f01878f7b5679ce.zip |
SH1 memcpy(): Slightly changed loop concept: saved 4 bytes, and the long+1 case is now as fast as the long+3 case.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@7696 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r-- | firmware/common/memcpy_a.S | 69 |
1 files changed, 32 insertions, 37 deletions
diff --git a/firmware/common/memcpy_a.S b/firmware/common/memcpy_a.S index 81cced187f..125c46a505 100644 --- a/firmware/common/memcpy_a.S +++ b/firmware/common/memcpy_a.S | |||
@@ -78,13 +78,12 @@ _memcpy: | |||
78 | 78 | ||
79 | /* selector for main copy loop */ | 79 | /* selector for main copy loop */ |
80 | .end_b1: | 80 | .end_b1: |
81 | mov r6,r3 /* move end address to r3 */ | ||
82 | mov #3,r1 | 81 | mov #3,r1 |
83 | and r4,r1 /* r1 = dest alignment offset */ | 82 | and r4,r1 /* r1 = dest alignment offset */ |
84 | sub r1,r4 /* r4 now long aligned */ | ||
85 | mova .jmptab,r0 | 83 | mova .jmptab,r0 |
86 | mov.b @(r0,r1),r1 /* select appropriate main loop */ | 84 | mov.b @(r0,r1),r1 /* select appropriate main loop */ |
87 | add r0,r1 | 85 | add r0,r1 |
86 | mov r6,r3 /* move end address to r3 */ | ||
88 | jmp @r1 /* and jump to it */ | 87 | jmp @r1 /* and jump to it */ |
89 | add #-7,r3 /* adjust end addr for main loops doing 2 longs/pass */ | 88 | add #-7,r3 /* adjust end addr for main loops doing 2 longs/pass */ |
90 | 89 | ||
@@ -94,11 +93,11 @@ _memcpy: | |||
94 | .align 2 | 93 | .align 2 |
95 | .loop_do0: | 94 | .loop_do0: |
96 | mov.l @r5+,r1 /* load first long & increment source addr */ | 95 | mov.l @r5+,r1 /* load first long & increment source addr */ |
97 | add #8,r4 /* increment dest addr */ | 96 | add #16,r4 /* increment dest addr & account for decrementing stores */ |
98 | mov.l @r5+,r0 /* load second long & increment source addr */ | 97 | mov.l @r5+,r0 /* load second long & increment source addr */ |
99 | cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ | 98 | cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ |
100 | mov.l r1,@r4 /* store first long */ | 99 | mov.l r0,@-r4 /* store second long */ |
101 | mov.l r0,@(4,r4) /* store second long; NOT ALIGNED - no speed loss here! */ | 100 | mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */ |
102 | bt .loop_do0 | 101 | bt .loop_do0 |
103 | 102 | ||
104 | add #4,r3 /* readjust end address */ | 103 | add #4,r3 /* readjust end address */ |
@@ -109,22 +108,21 @@ _memcpy: | |||
109 | add #4,r4 /* increment dest addr */ | 108 | add #4,r4 /* increment dest addr */ |
110 | bra .start_b2 /* jump to trailing byte loop */ | 109 | bra .start_b2 /* jump to trailing byte loop */ |
111 | mov.l r0,@(4,r4) /* store last long */ | 110 | mov.l r0,@(4,r4) /* store last long */ |
112 | 111 | ||
113 | /* word aligned destination (long + 2) */ | 112 | /* word aligned destination (long + 2) */ |
114 | .align 2 | 113 | .align 2 |
115 | .loop_do2: | 114 | .loop_do2: |
116 | mov.l @r5+,r1 /* load first long & increment source addr */ | 115 | mov.l @r5+,r1 /* load first long & increment source addr */ |
117 | add #8,r4 /* increment dest addr */ | 116 | add #16,r4 /* increment dest addr */ |
118 | mov.l @r5+,r0 /* load second long & increment source addr */ | 117 | mov.l @r5+,r0 /* load second long & increment source addr */ |
119 | cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ | 118 | cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ |
120 | mov.w r0,@(8,r4) /* store low word of second long */ | 119 | mov.w r0,@-r4 /* store low word of second long */ |
121 | xtrct r1,r0 /* extract low word of first long & high word of second long */ | 120 | xtrct r1,r0 /* extract low word of first long & high word of second long */ |
122 | mov.l r0,@(4,r4) /* and store as long */ | 121 | mov.l r0,@-r4 /* and store as long */ |
123 | swap.w r1,r0 /* get high word of first long */ | 122 | swap.w r1,r0 /* get high word of first long */ |
124 | mov.w r0,@(2,r4) /* and store it */ | 123 | mov.w r0,@-r4 /* and store it */ |
125 | bt .loop_do2 | 124 | bt .loop_do2 |
126 | 125 | ||
127 | add #2,r4 /* readjust destination */ | ||
128 | add #4,r3 /* readjust end address */ | 126 | add #4,r3 /* readjust end address */ |
129 | cmp/hi r5,r3 /* one long left? */ | 127 | cmp/hi r5,r3 /* one long left? */ |
130 | bf .start_b2 /* no, jump to trailing byte loop */ | 128 | bf .start_b2 /* no, jump to trailing byte loop */ |
@@ -148,62 +146,59 @@ _memcpy: | |||
148 | .align 2 | 146 | .align 2 |
149 | .loop_do1: | 147 | .loop_do1: |
150 | mov.l @r5+,r1 /* load first long & increment source addr */ | 148 | mov.l @r5+,r1 /* load first long & increment source addr */ |
151 | add #8,r4 /* increment dest addr */ | 149 | add #16,r4 /* increment dest addr */ |
152 | mov.l @r5+,r0 /* load second long & increment source addr */ | 150 | mov.l @r5+,r0 /* load second long & increment source addr */ |
153 | mov r1,r2 /* copy first long */ | 151 | mov r1,r2 /* copy first long */ |
154 | mov.b r0,@(8,r4) /* store low byte of second long */ | 152 | mov.b r0,@-r4 /* store low byte of second long */ |
155 | shlr8 r0 /* get upper 3 bytes */ | 153 | shlr8 r0 /* get upper 3 bytes */ |
156 | shll16 r2 /* move low byte of first long all the way up, .. */ | 154 | shll16 r2 /* move low byte of first long all the way up, .. */ |
157 | shll8 r2 | 155 | shll8 r2 |
158 | or r0,r2 /* ..combine with the 3 bytes of second long.. */ | 156 | or r2,r0 /* ..combine with the 3 bytes of second long.. */ |
159 | mov r1,r0 /* copy first long to r0 */ | ||
160 | mov.l r2,@(4,r4) /* ..and store as long */ | ||
161 | shlr8 r0 /* get middle 2 bytes */ | ||
162 | mov.w r0,@(2,r4) /* store as word */ | ||
163 | shlr16 r0 /* get upper byte */ | ||
164 | mov.b r0,@(1,r4) /* and store */ | ||
165 | cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ | 157 | cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ |
158 | mov.l r0,@-r4 /* ..and store as long */ | ||
159 | shlr8 r1 /* get middle 2 bytes */ | ||
160 | mov.w r1,@-r4 /* store as word */ | ||
161 | shlr16 r1 /* get upper byte */ | ||
162 | mov.b r1,@-r4 /* and store */ | ||
166 | bt .loop_do1 | 163 | bt .loop_do1 |
167 | 164 | ||
168 | .last_do13: | ||
169 | add #4,r3 /* readjust end address */ | 165 | add #4,r3 /* readjust end address */ |
166 | .last_do13: | ||
170 | cmp/hi r5,r3 /* one long left? */ | 167 | cmp/hi r5,r3 /* one long left? */ |
171 | bf .end_do13 /* no, get out of here */ | 168 | bf .start_b2 /* no, jump to trailing byte loop */ |
172 | 169 | ||
173 | mov.l @r5+,r0 /* load last long & increment source addr */ | 170 | mov.l @r5+,r0 /* load last long & increment source addr */ |
174 | add #4,r4 /* increment dest addr */ | 171 | add #12,r4 /* increment dest addr */ |
175 | mov.b r0,@(8,r4) /* store low byte */ | 172 | mov.b r0,@-r4 /* store low byte */ |
176 | shlr8 r0 /* get middle 2 bytes */ | 173 | shlr8 r0 /* get middle 2 bytes */ |
177 | mov.w r0,@(6,r4) /* store as word */ | 174 | mov.w r0,@-r4 /* store as word */ |
178 | shlr16 r0 /* get upper byte */ | 175 | shlr16 r0 /* get upper byte */ |
179 | mov.b r0,@(5,r4) /* and store */ | 176 | mov.b r0,@-r4 /* and store */ |
180 | |||
181 | .end_do13: | ||
182 | bra .start_b2 /* jump to trailing byte loop */ | 177 | bra .start_b2 /* jump to trailing byte loop */ |
183 | add #1,r4 /* readjust destination */ | 178 | add #-4,r4 /* readjust destination */ |
184 | 179 | ||
185 | /* byte aligned destination (long + 3) */ | 180 | /* byte aligned destination (long + 3) */ |
186 | .align 2 | 181 | .align 2 |
187 | .loop_do3: | 182 | .loop_do3: |
188 | mov.l @r5+,r1 /* load first long & increment source addr */ | 183 | mov.l @r5+,r1 /* load first long & increment source addr */ |
189 | add #8,r4 /* increment dest addr */ | 184 | add #16,r4 /* increment dest addr */ |
190 | mov.l @r5+,r0 /* load second long & increment source addr */ | 185 | mov.l @r5+,r0 /* load second long & increment source addr */ |
191 | mov r1,r2 /* copy first long */ | 186 | mov r1,r2 /* copy first long */ |
192 | mov.b r0,@(10,r4) /* store low byte of second long */ | 187 | mov.b r0,@-r4 /* store low byte of second long */ |
193 | shlr8 r0 /* get middle 2 bytes */ | 188 | shlr8 r0 /* get middle 2 bytes */ |
194 | mov.w r0,@(8,r4) /* store as word */ | 189 | mov.w r0,@-r4 /* store as word */ |
195 | shlr16 r0 /* get upper byte */ | 190 | shlr16 r0 /* get upper byte */ |
196 | shll8 r2 /* move lower 3 bytes of first long one up.. */ | 191 | shll8 r2 /* move lower 3 bytes of first long one up.. */ |
197 | or r2,r0 /* ..combine with the 1 byte of second long.. */ | 192 | or r2,r0 /* ..combine with the 1 byte of second long.. */ |
198 | mov.l r0,@(4,r4) /* ..and store as long */ | 193 | mov.l r0,@-r4 /* ..and store as long */ |
199 | swap.w r1,r0 /* swap-copy first long */ | 194 | shlr16 r1 /* get upper byte of first long.. */ |
200 | shlr8 r0 /* get original upper byte.. */ | 195 | shlr8 r1 |
201 | cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ | 196 | cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ |
202 | mov.b r0,@(3,r4) /* ..and store */ | 197 | mov.b r1,@-r4 /* ..and store */ |
203 | bt .loop_do3 | 198 | bt .loop_do3 |
204 | 199 | ||
205 | bra .last_do13 /* handle last longword: reuse routine for (long + 1) */ | 200 | bra .last_do13 /* handle last longword: reuse routine for (long + 1) */ |
206 | add #2,r4 /* correct the offset difference to do1 */ | 201 | add #4,r3 /* readjust end address */ |
207 | 202 | ||
208 | /* trailing byte loop: copies 0..3 bytes (or all for < 11 in total) */ | 203 | /* trailing byte loop: copies 0..3 bytes (or all for < 11 in total) */ |
209 | .align 2 | 204 | .align 2 |