From 0310f16005a1e98c441221bc0f0f7586d0b19763 Mon Sep 17 00:00:00 2001 From: Jens Arnold Date: Fri, 21 Jan 2005 22:43:02 +0000 Subject: Revived C implementations of memcpy() and memset() for platforms without asm optimized versions (gmini), replacing the intermediate strings.c. Moved the asm optimized versions 'out of the way' for the implicit 'make' rules by renaming them to *_a.S (for assembler/alternative). git-svn-id: svn://svn.rockbox.org/rockbox/trunk@5628 a1c6a512-1295-4272-9138-f99709370657 --- firmware/common/memset_a.S | 132 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 firmware/common/memset_a.S (limited to 'firmware/common/memset_a.S') diff --git a/firmware/common/memset_a.S b/firmware/common/memset_a.S new file mode 100644 index 0000000000..bce8936089 --- /dev/null +++ b/firmware/common/memset_a.S @@ -0,0 +1,132 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2004 by Jens Arnold + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ +#include "config.h" + + .section .icode,"ax",@progbits + + .align 2 +#if CONFIG_CPU == SH7034 + .global _memset + .type _memset,@function + +/* Fills a memory region with specified byte value + * This version is optimized for speed + * + * arguments: + * r4 - start address + * r5 - data + * r6 - length + * + * return value: + * r0 - start address (like ANSI version) + * + * register usage: + * r0 - temporary + * r1 - bit mask for rounding to long bounds + * r2 - last / first long bound (only if >= 12 bytes) + * r4 - start address + * r5 - data (spread to all 4 bytes if >= 12 bytes) + * r6 - current address (runs down from end to start) + * + * The instruction order below is devised in a way to utilize the pipelining + * of the SH1 to the max. The routine fills memory from end to start in + * order to utilize the auto-decrementing store instructions. + */ + +_memset: + add r4,r6 /* r6 = end_address */ + + mov r6,r0 + add #-12,r0 /* r0 = r6 - 12; don't go below 12 here! */ + cmp/hs r4,r0 /* >= 12 bytes to fill? */ + bf .start_b2 /* no, jump directly to byte loop */ + + extu.b r5,r5 /* start: spread data to all 4 bytes */ + swap.b r5,r0 + or r0,r5 /* data now in 2 lower bytes of r5 */ + swap.w r5,r0 + or r0,r5 /* data now in all 4 bytes of r5 */ + + mov #-4,r1 /* r1 = 0xFFFFFFFC */ + + mov r6,r2 + bra .start_b1 + and r1,r2 /* r2 = last long bound */ + + /* leading byte loop: sets 0..3 bytes */ +.loop_b1: + mov.b r5,@-r6 /* store byte */ +.start_b1: + cmp/hi r2,r6 /* runs r6 down to last long bound */ + bt .loop_b1 + + mov r4,r2 + add #11,r2 /* combined for rounding and offset */ + and r1,r2 /* r2 = first long bound + 8 */ + + /* main loop: set 2 longs per pass */ +.loop2_l: + mov.l r5,@-r6 /* store first long */ + cmp/hi r2,r6 /* runs r6 down to first or second long bound */ + mov.l r5,@-r6 /* store second long */ + bt .loop2_l + + add #-8,r2 /* correct offset */ + cmp/hi r2,r6 /* 1 long left? */ + bf .start_b2 /* no, jump to trailing byte loop */ + + bra .start_b2 /* jump to trailing byte loop */ + mov.l r5,@-r6 /* store last long */ + + /* trailing byte loop */ + .align 2 +.loop_b2: + mov.b r5,@-r6 /* store byte */ +.start_b2: + cmp/hi r4,r6 /* runs r6 down to the start address */ + bt .loop_b2 + + rts + mov r4,r0 /* return start address */ + +.end: + .size _memset,.end-_memset +#elif CONFIG_CPU == MCF5249 + .global memset + .type memset,@function + +/* Fills a memory region with specified byte value + * This version is not optimized at all + */ +memset: + move.l (4,%sp),%a0 /* Start address */ + move.l (8,%sp),%d0 /* Value */ + move.l (12,%sp),%d1 /* Length */ + lea.l (%d1,%a0),%a1 /* a1 = a0+d1 */ + + bra.b .byteloopend + +.byteloop: + move.b %d0,(%a0)+ +.byteloopend: + cmp.l %a0,%a1 + bne.b .byteloop + + rts +#endif -- cgit v1.2.3