summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Ankers <dan@weirdo.org.uk>2007-01-13 23:57:14 +0000
committerDaniel Ankers <dan@weirdo.org.uk>2007-01-13 23:57:14 +0000
commit1010b76c72a560e50243f6805e70fe091fd2fb1c (patch)
tree82a80e1222d4121d3aab1b4d51cd91dce17901e8
parentec81431466ec0be59e658f4763ff6078d3702122 (diff)
downloadrockbox-1010b76c72a560e50243f6805e70fe091fd2fb1c.tar.gz
rockbox-1010b76c72a560e50243f6805e70fe091fd2fb1c.zip
ARM optimised memcpy/memmove from glibc. This should give increased performance on all ARM targets, especially iPod 5G
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12000 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--firmware/SOURCES4
-rw-r--r--firmware/target/arm/memcpy-arm.S174
-rw-r--r--firmware/target/arm/memmove-arm.S188
3 files changed, 364 insertions, 2 deletions
diff --git a/firmware/SOURCES b/firmware/SOURCES
index 4f8e531a2e..3fdbdbb820 100644
--- a/firmware/SOURCES
+++ b/firmware/SOURCES
@@ -248,8 +248,8 @@ target/coldfire/i2c-coldfire.c
248 248
249#elif defined(CPU_PP) || defined(CPU_ARM) 249#elif defined(CPU_PP) || defined(CPU_ARM)
250/* CPU_PP => CPU_ARM, CPU_ARM !=> CPU_PP */ 250/* CPU_PP => CPU_ARM, CPU_ARM !=> CPU_PP */
251common/memcpy.c 251target/arm/memcpy-arm.S
252common/memmove.c 252target/arm/memmove-arm.S
253common/strlen.c 253common/strlen.c
254#ifndef SIMULATOR 254#ifndef SIMULATOR
255target/arm/memset-arm.S 255target/arm/memset-arm.S
diff --git a/firmware/target/arm/memcpy-arm.S b/firmware/target/arm/memcpy-arm.S
new file mode 100644
index 0000000000..b8cbff02d7
--- /dev/null
+++ b/firmware/target/arm/memcpy-arm.S
@@ -0,0 +1,174 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 Free Software Foundation, Inc.
11 * This file was originally part of the GNU C Library
12 * Contributed to glibc by MontaVista Software, Inc. (written by Nicolas Pitre)
13 * Adapted for Rockbox by Daniel Ankers
14 *
15 * All files in this archive are subject to the GNU General Public License.
16 * See the file COPYING in the source tree root for full license agreement.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ****************************************************************************/
22
23#include "config.h"
24
25/*
26 * Endian independent macros for shifting bytes within registers.
27 */
28#ifndef __ARMEB__
29#define pull lsr
30#define push lsl
31#else
32#define pull lsl
33#define push lsr
34#endif
35
36/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
37
38 .section .icode,"ax",%progbits
39
40 .align 2
41 .global memcpy
42 .type memcpy,%function
43
44memcpy:
45 stmfd sp!, {r0, r4, lr}
46
47 subs r2, r2, #4
48 blt 8f
49 ands ip, r0, #3
50 bne 9f
51 ands ip, r1, #3
52 bne 10f
53
541: subs r2, r2, #(28)
55 stmfd sp!, {r5 - r8}
56 blt 5f
57
582:
593:
604: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
61 subs r2, r2, #32
62 stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
63 bge 3b
64
655: ands ip, r2, #28
66 rsb ip, ip, #32
67 addne pc, pc, ip @ C is always clear here
68 b 7f
696: nop
70 ldr r3, [r1], #4
71 ldr r4, [r1], #4
72 ldr r5, [r1], #4
73 ldr r6, [r1], #4
74 ldr r7, [r1], #4
75 ldr r8, [r1], #4
76 ldr lr, [r1], #4
77
78 add pc, pc, ip
79 nop
80 nop
81 str r3, [r0], #4
82 str r4, [r0], #4
83 str r5, [r0], #4
84 str r6, [r0], #4
85 str r7, [r0], #4
86 str r8, [r0], #4
87 str lr, [r0], #4
88
897: ldmfd sp!, {r5 - r8}
90
918: movs r2, r2, lsl #31
92 ldrneb r3, [r1], #1
93 ldrcsb r4, [r1], #1
94 ldrcsb ip, [r1]
95 strneb r3, [r0], #1
96 strcsb r4, [r0], #1
97 strcsb ip, [r0]
98
99 ldmfd sp!, {r0, r4, pc}
100
1019: rsb ip, ip, #4
102 cmp ip, #2
103 ldrgtb r3, [r1], #1
104 ldrgeb r4, [r1], #1
105 ldrb lr, [r1], #1
106 strgtb r3, [r0], #1
107 strgeb r4, [r0], #1
108 subs r2, r2, ip
109 strb lr, [r0], #1
110 blt 8b
111 ands ip, r1, #3
112 beq 1b
113
11410: bic r1, r1, #3
115 cmp ip, #2
116 ldr lr, [r1], #4
117 beq 17f
118 bgt 18f
119
120
121 .macro forward_copy_shift pull push
122
123 subs r2, r2, #28
124 blt 14f
125
12611: stmfd sp!, {r5 - r9}
127
12812:
12913: ldmia r1!, {r4, r5, r6, r7}
130 mov r3, lr, pull #\pull
131 subs r2, r2, #32
132 ldmia r1!, {r8, r9, ip, lr}
133 orr r3, r3, r4, push #\push
134 mov r4, r4, pull #\pull
135 orr r4, r4, r5, push #\push
136 mov r5, r5, pull #\pull
137 orr r5, r5, r6, push #\push
138 mov r6, r6, pull #\pull
139 orr r6, r6, r7, push #\push
140 mov r7, r7, pull #\pull
141 orr r7, r7, r8, push #\push
142 mov r8, r8, pull #\pull
143 orr r8, r8, r9, push #\push
144 mov r9, r9, pull #\pull
145 orr r9, r9, ip, push #\push
146 mov ip, ip, pull #\pull
147 orr ip, ip, lr, push #\push
148 stmia r0!, {r3, r4, r5, r6, r7, r8, r9, ip}
149 bge 12b
150
151 ldmfd sp!, {r5 - r9}
152
15314: ands ip, r2, #28
154 beq 16f
155
15615: mov r3, lr, pull #\pull
157 ldr lr, [r1], #4
158 subs ip, ip, #4
159 orr r3, r3, lr, push #\push
160 str r3, [r0], #4
161 bgt 15b
162
16316: sub r1, r1, #(\push / 8)
164 b 8b
165
166 .endm
167
168
169 forward_copy_shift pull=8 push=24
170
17117: forward_copy_shift pull=16 push=16
172
17318: forward_copy_shift pull=24 push=8
174
diff --git a/firmware/target/arm/memmove-arm.S b/firmware/target/arm/memmove-arm.S
new file mode 100644
index 0000000000..94103c0c35
--- /dev/null
+++ b/firmware/target/arm/memmove-arm.S
@@ -0,0 +1,188 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 Free Software Foundation, Inc.
11 * This file was originally part of the GNU C Library
12 * Contributed to glibc by MontaVista Software, Inc. (written by Nicolas Pitre)
13 * Adapted for Rockbox by Daniel Ankers
14 *
15 * All files in this archive are subject to the GNU General Public License.
16 * See the file COPYING in the source tree root for full license agreement.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ****************************************************************************/
22
23#include "config.h"
24
25/*
26 * Endian independent macros for shifting bytes within registers.
27 */
28#ifndef __ARMEB__
29#define pull lsr
30#define push lsl
31#else
32#define pull lsl
33#define push lsr
34#endif
35
36 .text
37
38/*
39 * Prototype: void *memmove(void *dest, const void *src, size_t n);
40 *
41 * Note:
42 *
43 * If the memory regions don't overlap, we simply branch to memcpy which is
44 * normally a bit faster. Otherwise the copy is done going downwards.
45 */
46
47 .section .icode,"ax",%progbits
48
49 .align 2
50 .global memmove
51 .type memmove,%function
52
53memmove:
54
55 subs ip, r0, r1
56 cmphi r2, ip
57 bls memcpy
58
59 stmfd sp!, {r0, r4, lr}
60 add r1, r1, r2
61 add r0, r0, r2
62 subs r2, r2, #4
63 blt 8f
64 ands ip, r0, #3
65 bne 9f
66 ands ip, r1, #3
67 bne 10f
68
691: subs r2, r2, #(28)
70 stmfd sp!, {r5 - r8}
71 blt 5f
72
732:
743:
754: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
76 subs r2, r2, #32
77 stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
78 bge 3b
79
805: ands ip, r2, #28
81 rsb ip, ip, #32
82 addne pc, pc, ip @ C is always clear here
83 b 7f
846: nop
85 ldr r3, [r1, #-4]!
86 ldr r4, [r1, #-4]!
87 ldr r5, [r1, #-4]!
88 ldr r6, [r1, #-4]!
89 ldr r7, [r1, #-4]!
90 ldr r8, [r1, #-4]!
91 ldr lr, [r1, #-4]!
92
93 add pc, pc, ip
94 nop
95 nop
96 str r3, [r0, #-4]!
97 str r4, [r0, #-4]!
98 str r5, [r0, #-4]!
99 str r6, [r0, #-4]!
100 str r7, [r0, #-4]!
101 str r8, [r0, #-4]!
102 str lr, [r0, #-4]!
103
1047: ldmfd sp!, {r5 - r8}
105
1068: movs r2, r2, lsl #31
107 ldrneb r3, [r1, #-1]!
108 ldrcsb r4, [r1, #-1]!
109 ldrcsb ip, [r1, #-1]
110 strneb r3, [r0, #-1]!
111 strcsb r4, [r0, #-1]!
112 strcsb ip, [r0, #-1]
113 ldmfd sp!, {r0, r4, pc}
114
1159: cmp ip, #2
116 ldrgtb r3, [r1, #-1]!
117 ldrgeb r4, [r1, #-1]!
118 ldrb lr, [r1, #-1]!
119 strgtb r3, [r0, #-1]!
120 strgeb r4, [r0, #-1]!
121 subs r2, r2, ip
122 strb lr, [r0, #-1]!
123 blt 8b
124 ands ip, r1, #3
125 beq 1b
126
12710: bic r1, r1, #3
128 cmp ip, #2
129 ldr r3, [r1, #0]
130 beq 17f
131 blt 18f
132
133
134 .macro backward_copy_shift push pull
135
136 subs r2, r2, #28
137 blt 14f
138
13911: stmfd sp!, {r5 - r9}
140
14112:
14213: ldmdb r1!, {r7, r8, r9, ip}
143 mov lr, r3, push #\push
144 subs r2, r2, #32
145 ldmdb r1!, {r3, r4, r5, r6}
146 orr lr, lr, ip, pull #\pull
147 mov ip, ip, push #\push
148 orr ip, ip, r9, pull #\pull
149 mov r9, r9, push #\push
150 orr r9, r9, r8, pull #\pull
151 mov r8, r8, push #\push
152 orr r8, r8, r7, pull #\pull
153 mov r7, r7, push #\push
154 orr r7, r7, r6, pull #\pull
155 mov r6, r6, push #\push
156 orr r6, r6, r5, pull #\pull
157 mov r5, r5, push #\push
158 orr r5, r5, r4, pull #\pull
159 mov r4, r4, push #\push
160 orr r4, r4, r3, pull #\pull
161 stmdb r0!, {r4 - r9, ip, lr}
162 bge 12b
163
164 ldmfd sp!, {r5 - r9}
165
16614: ands ip, r2, #28
167 beq 16f
168
16915: mov lr, r3, push #\push
170 ldr r3, [r1, #-4]!
171 subs ip, ip, #4
172 orr lr, lr, r3, pull #\pull
173 str lr, [r0, #-4]!
174 bgt 15b
175
17616: add r1, r1, #(\pull / 8)
177 b 8b
178
179 .endm
180
181
182 backward_copy_shift push=8 pull=24
183
18417: backward_copy_shift push=16 pull=16
185
18618: backward_copy_shift push=24 pull=8
187
188