From 9d2f7b5c6dd01e113abf5ff75fa24d389da1f244 Mon Sep 17 00:00:00 2001 From: Jens Arnold Date: Thu, 7 Sep 2006 00:16:04 +0000 Subject: Assembler optimised memset16() for ARM, by Thom Johansen. Should speed up LCD clearing and solid rectangle drawing on colour iPods somewhat. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10900 a1c6a512-1295-4272-9138-f99709370657 --- firmware/target/arm/memset16-arm.S | 80 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100755 firmware/target/arm/memset16-arm.S (limited to 'firmware/target/arm/memset16-arm.S') diff --git a/firmware/target/arm/memset16-arm.S b/firmware/target/arm/memset16-arm.S new file mode 100755 index 0000000000..13213c57f0 --- /dev/null +++ b/firmware/target/arm/memset16-arm.S @@ -0,0 +1,80 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006 by Thom Johansen + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ +#include "config.h" + + .section .icode,"ax",%progbits + + .align 2 + +/* The following code is based on code from the Linux kernel version 2.6.15.3, + * linux/arch/arm/lib/memset.S + * + * Copyright (C) 1995-2000 Russell King + */ + + .global memset16 + .type memset16,%function +memset16: + tst r0, #2 @ unaligned? + cmpne r2, #0 + strneh r1, [r0], #2 @ store one halfword to align + subne r2, r2, #1 + +/* + * we know that the pointer in r0 is aligned to a word boundary. + */ + orr r1, r1, r1, lsl #16 + mov r3, r1 + cmp r2, #8 + blt 4f +/* + * We need an extra register for this loop - save the return address and + * use the LR + */ + str lr, [sp, #-4]! + mov ip, r1 + mov lr, r1 + +2: subs r2, r2, #32 + stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. + stmgeia r0!, {r1, r3, ip, lr} + stmgeia r0!, {r1, r3, ip, lr} + stmgeia r0!, {r1, r3, ip, lr} + bgt 2b + ldmeqfd sp!, {pc} @ Now <64 bytes to go. +/* + * No need to correct the count; we're only testing bits from now on + */ + tst r2, #16 + stmneia r0!, {r1, r3, ip, lr} + stmneia r0!, {r1, r3, ip, lr} + tst r2, #8 + stmneia r0!, {r1, r3, ip, lr} + ldr lr, [sp], #4 + +4: tst r2, #4 + stmneia r0!, {r1, r3} + tst r2, #2 + strne r1, [r0], #4 + + tst r2, #1 + strneh r1, [r0], #2 + bx lr +.end: + .size memset16,.end-memset16 -- cgit v1.2.3