From a035261089403de259e74ce4dd196e2715138ed2 Mon Sep 17 00:00:00 2001 From: Thomas Martitz Date: Sat, 7 Jan 2012 19:56:09 +0100 Subject: Move optimized memcpy and friends and strlen to firmware/asm, using the new automatic-asm-picking infrastructure. --- firmware/asm/memmove.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 firmware/asm/memmove.c (limited to 'firmware/asm/memmove.c') diff --git a/firmware/asm/memmove.c b/firmware/asm/memmove.c new file mode 100644 index 0000000000..5f423964bb --- /dev/null +++ b/firmware/asm/memmove.c @@ -0,0 +1,147 @@ +/* +FUNCTION + <>---move possibly overlapping memory + +INDEX + memmove + +ANSI_SYNOPSIS + #include + void *memmove(void *<[dst]>, const void *<[src]>, size_t <[length]>); + +TRAD_SYNOPSIS + #include + void *memmove(<[dst]>, <[src]>, <[length]>) + void *<[dst]>; + void *<[src]>; + size_t <[length]>; + +DESCRIPTION + This function moves <[length]> characters from the block of + memory starting at <<*<[src]>>> to the memory starting at + <<*<[dst]>>>. <> reproduces the characters correctly + at <<*<[dst]>>> even if the two areas overlap. + + +RETURNS + The function returns <[dst]> as passed. + +PORTABILITY +<> is ANSI C. + +<> requires no supporting OS subroutines. + +QUICKREF + memmove ansi pure +*/ + +#include "config.h" +#include <_ansi.h> +#include + +/* Nonzero if either X or Y is not aligned on a "long" boundary. */ +#define UNALIGNED(X, Y) \ + (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1))) + +/* How many bytes are copied each iteration of the 4X unrolled loop. */ +#define BIGBLOCKSIZE (sizeof (long) << 2) + +/* How many bytes are copied each iteration of the word copy loop. */ +#define LITTLEBLOCKSIZE (sizeof (long)) + +/* Threshhold for punting to the byte copier. */ +#define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE) + +_PTR +_DEFUN (memmove, (dst_void, src_void, length), + _PTR dst_void _AND + _CONST _PTR src_void _AND + size_t length) ICODE_ATTR; + +_PTR +_DEFUN (memmove, (dst_void, src_void, length), + _PTR dst_void _AND + _CONST _PTR src_void _AND + size_t length) +{ +#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) + char *dst = dst_void; + _CONST char *src = src_void; + + if (src < dst && dst < src + length) + { + /* Have to copy backwards */ + src += length; + dst += length; + while (length--) + { + *--dst = *--src; + } + } + else + { + while (length--) + { + *dst++ = *src++; + } + } + + return dst_void; +#else + char *dst = dst_void; + _CONST char *src = src_void; + long *aligned_dst; + _CONST long *aligned_src; + unsigned int len = length; + + if (src < dst && dst < src + len) + { + /* Destructive overlap...have to copy backwards */ + src += len; + dst += len; + while (len--) + { + *--dst = *--src; + } + } + else + { + /* Use optimizing algorithm for a non-destructive copy to closely + match memcpy. If the size is small or either SRC or DST is unaligned, + then punt into the byte copy loop. This should be rare. */ + if (!TOO_SMALL(len) && !UNALIGNED (src, dst)) + { + aligned_dst = (long*)dst; + aligned_src = (long*)src; + + /* Copy 4X long words at a time if possible. */ + while (len >= BIGBLOCKSIZE) + { + *aligned_dst++ = *aligned_src++; + *aligned_dst++ = *aligned_src++; + *aligned_dst++ = *aligned_src++; + *aligned_dst++ = *aligned_src++; + len -= BIGBLOCKSIZE; + } + + /* Copy one long word at a time if possible. */ + while (len >= LITTLEBLOCKSIZE) + { + *aligned_dst++ = *aligned_src++; + len -= LITTLEBLOCKSIZE; + } + + /* Pick up any residual with a byte copier. */ + dst = (char*)aligned_dst; + src = (char*)aligned_src; + } + + while (len--) + { + *dst++ = *src++; + } + } + + return dst_void; +#endif /* not PREFER_SIZE_OVER_SPEED */ +} -- cgit v1.2.3