summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2006-09-07 00:16:04 +0000
committerJens Arnold <amiconn@rockbox.org>2006-09-07 00:16:04 +0000
commit9d2f7b5c6dd01e113abf5ff75fa24d389da1f244 (patch)
tree6468332a4cc60b3a1e38adfdbd55d97f1ac32b01
parent825fb8a2649b187c27d93ec28a4bb87fa93797f2 (diff)
downloadrockbox-9d2f7b5c6dd01e113abf5ff75fa24d389da1f244.tar.gz
rockbox-9d2f7b5c6dd01e113abf5ff75fa24d389da1f244.zip
Assembler optimised memset16() for ARM, by Thom Johansen. Should speed up LCD clearing and solid rectangle drawing on colour iPods somewhat.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10900 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--firmware/SOURCES2
-rwxr-xr-xfirmware/common/memset16.c6
-rwxr-xr-xfirmware/include/memory.h2
-rwxr-xr-xfirmware/target/arm/memset16-arm.S80
4 files changed, 83 insertions, 7 deletions
diff --git a/firmware/SOURCES b/firmware/SOURCES
index 9001535150..545227fbdf 100644
--- a/firmware/SOURCES
+++ b/firmware/SOURCES
@@ -53,7 +53,7 @@ target/sh/memset-sh.S
53common/memcpy.c 53common/memcpy.c
54common/memmove.c 54common/memmove.c
55target/arm/memset-arm.S 55target/arm/memset-arm.S
56common/memset16.c 56target/arm/memset16-arm.S
57#else 57#else
58common/memcpy.c 58common/memcpy.c
59common/memmove.c 59common/memmove.c
diff --git a/firmware/common/memset16.c b/firmware/common/memset16.c
index bc187a5d25..5f0fc3fe08 100755
--- a/firmware/common/memset16.c
+++ b/firmware/common/memset16.c
@@ -22,15 +22,13 @@
22#define UNALIGNED(X) ((long)X & (sizeof(long) - 1)) 22#define UNALIGNED(X) ((long)X & (sizeof(long) - 1))
23#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE) 23#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
24 24
25void *memset16(void *dst, int val, size_t len) 25void memset16(void *dst, int val, size_t len)
26{ 26{
27#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) 27#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
28 unsigned short *p = (unsigned short *)dst; 28 unsigned short *p = (unsigned short *)dst;
29 29
30 while (len--) 30 while (len--)
31 *p++ = val; 31 *p++ = val;
32
33 return dst;
34#else 32#else
35 unsigned short *p = (unsigned short *)dst; 33 unsigned short *p = (unsigned short *)dst;
36 unsigned int i; 34 unsigned int i;
@@ -73,7 +71,5 @@ void *memset16(void *dst, int val, size_t len)
73 71
74 while (len--) 72 while (len--)
75 *p++ = val; 73 *p++ = val;
76
77 return dst;
78#endif /* not PREFER_SIZE_OVER_SPEED */ 74#endif /* not PREFER_SIZE_OVER_SPEED */
79} 75}
diff --git a/firmware/include/memory.h b/firmware/include/memory.h
index 2b2a60c7b4..559c6ed96a 100755
--- a/firmware/include/memory.h
+++ b/firmware/include/memory.h
@@ -22,6 +22,6 @@
22 22
23#include <sys/types.h> 23#include <sys/types.h>
24 24
25void *memset16(void *dst, int val, size_t len); 25void memset16(void *dst, int val, size_t len);
26 26
27#endif /* _MEMORY_H_ */ 27#endif /* _MEMORY_H_ */
diff --git a/firmware/target/arm/memset16-arm.S b/firmware/target/arm/memset16-arm.S
new file mode 100755
index 0000000000..13213c57f0
--- /dev/null
+++ b/firmware/target/arm/memset16-arm.S
@@ -0,0 +1,80 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 by Thom Johansen
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19#include "config.h"
20
21 .section .icode,"ax",%progbits
22
23 .align 2
24
25/* The following code is based on code from the Linux kernel version 2.6.15.3,
26 * linux/arch/arm/lib/memset.S
27 *
28 * Copyright (C) 1995-2000 Russell King
29 */
30
31 .global memset16
32 .type memset16,%function
33memset16:
34 tst r0, #2 @ unaligned?
35 cmpne r2, #0
36 strneh r1, [r0], #2 @ store one halfword to align
37 subne r2, r2, #1
38
39/*
40 * we know that the pointer in r0 is aligned to a word boundary.
41 */
42 orr r1, r1, r1, lsl #16
43 mov r3, r1
44 cmp r2, #8
45 blt 4f
46/*
47 * We need an extra register for this loop - save the return address and
48 * use the LR
49 */
50 str lr, [sp, #-4]!
51 mov ip, r1
52 mov lr, r1
53
542: subs r2, r2, #32
55 stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
56 stmgeia r0!, {r1, r3, ip, lr}
57 stmgeia r0!, {r1, r3, ip, lr}
58 stmgeia r0!, {r1, r3, ip, lr}
59 bgt 2b
60 ldmeqfd sp!, {pc} @ Now <64 bytes to go.
61/*
62 * No need to correct the count; we're only testing bits from now on
63 */
64 tst r2, #16
65 stmneia r0!, {r1, r3, ip, lr}
66 stmneia r0!, {r1, r3, ip, lr}
67 tst r2, #8
68 stmneia r0!, {r1, r3, ip, lr}
69 ldr lr, [sp], #4
70
714: tst r2, #4
72 stmneia r0!, {r1, r3}
73 tst r2, #2
74 strne r1, [r0], #4
75
76 tst r2, #1
77 strneh r1, [r0], #2
78 bx lr
79.end:
80 .size memset16,.end-memset16