From 1010b76c72a560e50243f6805e70fe091fd2fb1c Mon Sep 17 00:00:00 2001 From: Daniel Ankers Date: Sat, 13 Jan 2007 23:57:14 +0000 Subject: ARM optimised memcpy/memmove from glibc. This should give increased performance on all ARM targets, especially iPod 5G git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12000 a1c6a512-1295-4272-9138-f99709370657 --- firmware/SOURCES | 4 +- firmware/target/arm/memcpy-arm.S | 174 +++++++++++++++++++++++++++++++++++ firmware/target/arm/memmove-arm.S | 188 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 364 insertions(+), 2 deletions(-) create mode 100644 firmware/target/arm/memcpy-arm.S create mode 100644 firmware/target/arm/memmove-arm.S diff --git a/firmware/SOURCES b/firmware/SOURCES index 4f8e531a2e..3fdbdbb820 100644 --- a/firmware/SOURCES +++ b/firmware/SOURCES @@ -248,8 +248,8 @@ target/coldfire/i2c-coldfire.c #elif defined(CPU_PP) || defined(CPU_ARM) /* CPU_PP => CPU_ARM, CPU_ARM !=> CPU_PP */ -common/memcpy.c -common/memmove.c +target/arm/memcpy-arm.S +target/arm/memmove-arm.S common/strlen.c #ifndef SIMULATOR target/arm/memset-arm.S diff --git a/firmware/target/arm/memcpy-arm.S b/firmware/target/arm/memcpy-arm.S new file mode 100644 index 0000000000..b8cbff02d7 --- /dev/null +++ b/firmware/target/arm/memcpy-arm.S @@ -0,0 +1,174 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006 Free Software Foundation, Inc. + * This file was originally part of the GNU C Library + * Contributed to glibc by MontaVista Software, Inc. (written by Nicolas Pitre) + * Adapted for Rockbox by Daniel Ankers + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" + +/* + * Endian independent macros for shifting bytes within registers. + */ +#ifndef __ARMEB__ +#define pull lsr +#define push lsl +#else +#define pull lsl +#define push lsr +#endif + +/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ + + .section .icode,"ax",%progbits + + .align 2 + .global memcpy + .type memcpy,%function + +memcpy: + stmfd sp!, {r0, r4, lr} + + subs r2, r2, #4 + blt 8f + ands ip, r0, #3 + bne 9f + ands ip, r1, #3 + bne 10f + +1: subs r2, r2, #(28) + stmfd sp!, {r5 - r8} + blt 5f + +2: +3: +4: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr} + subs r2, r2, #32 + stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr} + bge 3b + +5: ands ip, r2, #28 + rsb ip, ip, #32 + addne pc, pc, ip @ C is always clear here + b 7f +6: nop + ldr r3, [r1], #4 + ldr r4, [r1], #4 + ldr r5, [r1], #4 + ldr r6, [r1], #4 + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr lr, [r1], #4 + + add pc, pc, ip + nop + nop + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + str r7, [r0], #4 + str r8, [r0], #4 + str lr, [r0], #4 + +7: ldmfd sp!, {r5 - r8} + +8: movs r2, r2, lsl #31 + ldrneb r3, [r1], #1 + ldrcsb r4, [r1], #1 + ldrcsb ip, [r1] + strneb r3, [r0], #1 + strcsb r4, [r0], #1 + strcsb ip, [r0] + + ldmfd sp!, {r0, r4, pc} + +9: rsb ip, ip, #4 + cmp ip, #2 + ldrgtb r3, [r1], #1 + ldrgeb r4, [r1], #1 + ldrb lr, [r1], #1 + strgtb r3, [r0], #1 + strgeb r4, [r0], #1 + subs r2, r2, ip + strb lr, [r0], #1 + blt 8b + ands ip, r1, #3 + beq 1b + +10: bic r1, r1, #3 + cmp ip, #2 + ldr lr, [r1], #4 + beq 17f + bgt 18f + + + .macro forward_copy_shift pull push + + subs r2, r2, #28 + blt 14f + +11: stmfd sp!, {r5 - r9} + +12: +13: ldmia r1!, {r4, r5, r6, r7} + mov r3, lr, pull #\pull + subs r2, r2, #32 + ldmia r1!, {r8, r9, ip, lr} + orr r3, r3, r4, push #\push + mov r4, r4, pull #\pull + orr r4, r4, r5, push #\push + mov r5, r5, pull #\pull + orr r5, r5, r6, push #\push + mov r6, r6, pull #\pull + orr r6, r6, r7, push #\push + mov r7, r7, pull #\pull + orr r7, r7, r8, push #\push + mov r8, r8, pull #\pull + orr r8, r8, r9, push #\push + mov r9, r9, pull #\pull + orr r9, r9, ip, push #\push + mov ip, ip, pull #\pull + orr ip, ip, lr, push #\push + stmia r0!, {r3, r4, r5, r6, r7, r8, r9, ip} + bge 12b + + ldmfd sp!, {r5 - r9} + +14: ands ip, r2, #28 + beq 16f + +15: mov r3, lr, pull #\pull + ldr lr, [r1], #4 + subs ip, ip, #4 + orr r3, r3, lr, push #\push + str r3, [r0], #4 + bgt 15b + +16: sub r1, r1, #(\push / 8) + b 8b + + .endm + + + forward_copy_shift pull=8 push=24 + +17: forward_copy_shift pull=16 push=16 + +18: forward_copy_shift pull=24 push=8 + diff --git a/firmware/target/arm/memmove-arm.S b/firmware/target/arm/memmove-arm.S new file mode 100644 index 0000000000..94103c0c35 --- /dev/null +++ b/firmware/target/arm/memmove-arm.S @@ -0,0 +1,188 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006 Free Software Foundation, Inc. + * This file was originally part of the GNU C Library + * Contributed to glibc by MontaVista Software, Inc. (written by Nicolas Pitre) + * Adapted for Rockbox by Daniel Ankers + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" + +/* + * Endian independent macros for shifting bytes within registers. + */ +#ifndef __ARMEB__ +#define pull lsr +#define push lsl +#else +#define pull lsl +#define push lsr +#endif + + .text + +/* + * Prototype: void *memmove(void *dest, const void *src, size_t n); + * + * Note: + * + * If the memory regions don't overlap, we simply branch to memcpy which is + * normally a bit faster. Otherwise the copy is done going downwards. + */ + + .section .icode,"ax",%progbits + + .align 2 + .global memmove + .type memmove,%function + +memmove: + + subs ip, r0, r1 + cmphi r2, ip + bls memcpy + + stmfd sp!, {r0, r4, lr} + add r1, r1, r2 + add r0, r0, r2 + subs r2, r2, #4 + blt 8f + ands ip, r0, #3 + bne 9f + ands ip, r1, #3 + bne 10f + +1: subs r2, r2, #(28) + stmfd sp!, {r5 - r8} + blt 5f + +2: +3: +4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} + subs r2, r2, #32 + stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} + bge 3b + +5: ands ip, r2, #28 + rsb ip, ip, #32 + addne pc, pc, ip @ C is always clear here + b 7f +6: nop + ldr r3, [r1, #-4]! + ldr r4, [r1, #-4]! + ldr r5, [r1, #-4]! + ldr r6, [r1, #-4]! + ldr r7, [r1, #-4]! + ldr r8, [r1, #-4]! + ldr lr, [r1, #-4]! + + add pc, pc, ip + nop + nop + str r3, [r0, #-4]! + str r4, [r0, #-4]! + str r5, [r0, #-4]! + str r6, [r0, #-4]! + str r7, [r0, #-4]! + str r8, [r0, #-4]! + str lr, [r0, #-4]! + +7: ldmfd sp!, {r5 - r8} + +8: movs r2, r2, lsl #31 + ldrneb r3, [r1, #-1]! + ldrcsb r4, [r1, #-1]! + ldrcsb ip, [r1, #-1] + strneb r3, [r0, #-1]! + strcsb r4, [r0, #-1]! + strcsb ip, [r0, #-1] + ldmfd sp!, {r0, r4, pc} + +9: cmp ip, #2 + ldrgtb r3, [r1, #-1]! + ldrgeb r4, [r1, #-1]! + ldrb lr, [r1, #-1]! + strgtb r3, [r0, #-1]! + strgeb r4, [r0, #-1]! + subs r2, r2, ip + strb lr, [r0, #-1]! + blt 8b + ands ip, r1, #3 + beq 1b + +10: bic r1, r1, #3 + cmp ip, #2 + ldr r3, [r1, #0] + beq 17f + blt 18f + + + .macro backward_copy_shift push pull + + subs r2, r2, #28 + blt 14f + +11: stmfd sp!, {r5 - r9} + +12: +13: ldmdb r1!, {r7, r8, r9, ip} + mov lr, r3, push #\push + subs r2, r2, #32 + ldmdb r1!, {r3, r4, r5, r6} + orr lr, lr, ip, pull #\pull + mov ip, ip, push #\push + orr ip, ip, r9, pull #\pull + mov r9, r9, push #\push + orr r9, r9, r8, pull #\pull + mov r8, r8, push #\push + orr r8, r8, r7, pull #\pull + mov r7, r7, push #\push + orr r7, r7, r6, pull #\pull + mov r6, r6, push #\push + orr r6, r6, r5, pull #\pull + mov r5, r5, push #\push + orr r5, r5, r4, pull #\pull + mov r4, r4, push #\push + orr r4, r4, r3, pull #\pull + stmdb r0!, {r4 - r9, ip, lr} + bge 12b + + ldmfd sp!, {r5 - r9} + +14: ands ip, r2, #28 + beq 16f + +15: mov lr, r3, push #\push + ldr r3, [r1, #-4]! + subs ip, ip, #4 + orr lr, lr, r3, pull #\pull + str lr, [r0, #-4]! + bgt 15b + +16: add r1, r1, #(\pull / 8) + b 8b + + .endm + + + backward_copy_shift push=8 pull=24 + +17: backward_copy_shift push=16 pull=16 + +18: backward_copy_shift push=24 pull=8 + + -- cgit v1.2.3