From 9d2f7b5c6dd01e113abf5ff75fa24d389da1f244 Mon Sep 17 00:00:00 2001 From: Jens Arnold Date: Thu, 7 Sep 2006 00:16:04 +0000 Subject: Assembler optimised memset16() for ARM, by Thom Johansen. Should speed up LCD clearing and solid rectangle drawing on colour iPods somewhat. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10900 a1c6a512-1295-4272-9138-f99709370657 --- firmware/SOURCES | 2 +- firmware/common/memset16.c | 6 +-- firmware/include/memory.h | 2 +- firmware/target/arm/memset16-arm.S | 80 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 7 deletions(-) create mode 100755 firmware/target/arm/memset16-arm.S diff --git a/firmware/SOURCES b/firmware/SOURCES index 9001535150..545227fbdf 100644 --- a/firmware/SOURCES +++ b/firmware/SOURCES @@ -53,7 +53,7 @@ target/sh/memset-sh.S common/memcpy.c common/memmove.c target/arm/memset-arm.S -common/memset16.c +target/arm/memset16-arm.S #else common/memcpy.c common/memmove.c diff --git a/firmware/common/memset16.c b/firmware/common/memset16.c index bc187a5d25..5f0fc3fe08 100755 --- a/firmware/common/memset16.c +++ b/firmware/common/memset16.c @@ -22,15 +22,13 @@ #define UNALIGNED(X) ((long)X & (sizeof(long) - 1)) #define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE) -void *memset16(void *dst, int val, size_t len) +void memset16(void *dst, int val, size_t len) { #if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) unsigned short *p = (unsigned short *)dst; while (len--) *p++ = val; - - return dst; #else unsigned short *p = (unsigned short *)dst; unsigned int i; @@ -73,7 +71,5 @@ void *memset16(void *dst, int val, size_t len) while (len--) *p++ = val; - - return dst; #endif /* not PREFER_SIZE_OVER_SPEED */ } diff --git a/firmware/include/memory.h b/firmware/include/memory.h index 2b2a60c7b4..559c6ed96a 100755 --- a/firmware/include/memory.h +++ b/firmware/include/memory.h @@ -22,6 +22,6 @@ #include -void *memset16(void *dst, int val, size_t len); +void memset16(void *dst, int val, size_t len); #endif /* _MEMORY_H_ */ diff --git a/firmware/target/arm/memset16-arm.S b/firmware/target/arm/memset16-arm.S new file mode 100755 index 0000000000..13213c57f0 --- /dev/null +++ b/firmware/target/arm/memset16-arm.S @@ -0,0 +1,80 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006 by Thom Johansen + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ +#include "config.h" + + .section .icode,"ax",%progbits + + .align 2 + +/* The following code is based on code from the Linux kernel version 2.6.15.3, + * linux/arch/arm/lib/memset.S + * + * Copyright (C) 1995-2000 Russell King + */ + + .global memset16 + .type memset16,%function +memset16: + tst r0, #2 @ unaligned? + cmpne r2, #0 + strneh r1, [r0], #2 @ store one halfword to align + subne r2, r2, #1 + +/* + * we know that the pointer in r0 is aligned to a word boundary. + */ + orr r1, r1, r1, lsl #16 + mov r3, r1 + cmp r2, #8 + blt 4f +/* + * We need an extra register for this loop - save the return address and + * use the LR + */ + str lr, [sp, #-4]! + mov ip, r1 + mov lr, r1 + +2: subs r2, r2, #32 + stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. + stmgeia r0!, {r1, r3, ip, lr} + stmgeia r0!, {r1, r3, ip, lr} + stmgeia r0!, {r1, r3, ip, lr} + bgt 2b + ldmeqfd sp!, {pc} @ Now <64 bytes to go. +/* + * No need to correct the count; we're only testing bits from now on + */ + tst r2, #16 + stmneia r0!, {r1, r3, ip, lr} + stmneia r0!, {r1, r3, ip, lr} + tst r2, #8 + stmneia r0!, {r1, r3, ip, lr} + ldr lr, [sp], #4 + +4: tst r2, #4 + stmneia r0!, {r1, r3} + tst r2, #2 + strne r1, [r0], #4 + + tst r2, #1 + strneh r1, [r0], #2 + bx lr +.end: + .size memset16,.end-memset16 -- cgit v1.2.3