From 77b3625763ae4d5aa6aaa9d44fbc1bfec6b29335 Mon Sep 17 00:00:00 2001 From: Michael Sevakis Date: Wed, 6 Aug 2014 04:26:52 -0400 Subject: Add mempcpy implementation A GNU extension that returns dst + size instead of dst. It's a nice shortcut when copying strings with a known size or back-to-back blocks and you have to do it often. May of course be called directly or alternately through __builtin_mempcpy in some compiler versions. For ASM on native targets, it is implemented as an alternate entrypoint to memcpy which adds minimal code and overhead. Change-Id: I4cbb3483f6df3c1007247fe0a95fd7078737462b --- firmware/asm/SOURCES | 4 ++++ firmware/asm/arm/memcpy.S | 10 +++++++++- firmware/asm/m68k/memcpy.S | 10 ++++++++++ firmware/asm/mempcpy.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++ firmware/asm/mips/memcpy.S | 11 ++++++++++- firmware/asm/sh/memcpy.S | 8 ++++++++ 6 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 firmware/asm/mempcpy.c (limited to 'firmware/asm') diff --git a/firmware/asm/SOURCES b/firmware/asm/SOURCES index a9293b4297..ebb6951071 100644 --- a/firmware/asm/SOURCES +++ b/firmware/asm/SOURCES @@ -7,6 +7,10 @@ memset.c strlen.c #endif +#if defined(WIN32) || defined(APPLICATION) +mempcpy.c +#endif + #if (defined(SANSA_E200) || defined(GIGABEAT_F) || defined(GIGABEAT_S) || \ defined(CREATIVE_ZVx) || defined(SANSA_CONNECT) || defined(SANSA_FUZEPLUS) || \ defined(COWON_D2) || defined(MINI2440) || defined(SAMSUNG_YPR0) || \ diff --git a/firmware/asm/arm/memcpy.S b/firmware/asm/arm/memcpy.S index 2a55fb5656..83d43293e6 100644 --- a/firmware/asm/arm/memcpy.S +++ b/firmware/asm/arm/memcpy.S @@ -36,17 +36,25 @@ #endif /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ +/* Prototype: void *mempcpy(void *dest, const void *src, size_t n); */ .section .icode,"ax",%progbits .align 2 .global memcpy .type memcpy,%function + .global mempcpy + .type mempcpy,%function + +mempcpy: + add r3, r0, r2 + stmfd sp!, {r3, r4, lr} + b 0f memcpy: stmfd sp!, {r0, r4, lr} - subs r2, r2, #4 +0: subs r2, r2, #4 blt 8f ands ip, r0, #3 bne 9f diff --git a/firmware/asm/m68k/memcpy.S b/firmware/asm/m68k/memcpy.S index 9762e31e02..a88ac3d091 100644 --- a/firmware/asm/m68k/memcpy.S +++ b/firmware/asm/m68k/memcpy.S @@ -27,6 +27,8 @@ .global memcpy .global __memcpy_fwd_entry .type memcpy,@function + .global mempcpy + .type mempcpy,@function /* Copies bytes of data in memory from to * This version is optimized for speed @@ -53,6 +55,14 @@ * long+3) it writes longwords only. Same goes for word aligned destinations * if FULLSPEED is undefined. */ +mempcpy: + move.l (4,%sp),%a1 /* Destination */ + move.l (8,%sp),%a0 /* Source */ + move.l (12,%sp),%d1 /* Length */ + + add.l %d1,(4,%sp) /* retval=Destination + Length */ + bra.b __memcpy_fwd_entry + memcpy: move.l (4,%sp),%a1 /* Destination */ move.l (8,%sp),%a0 /* Source */ diff --git a/firmware/asm/mempcpy.c b/firmware/asm/mempcpy.c new file mode 100644 index 0000000000..2b1ccecbe8 --- /dev/null +++ b/firmware/asm/mempcpy.c @@ -0,0 +1,47 @@ +/* +FUNCTION + <>---copy memory regions and return end pointer + +ANSI_SYNOPSIS + #include + void* mempcpy(void *<[out]>, const void *<[in]>, size_t <[n]>); + +TRAD_SYNOPSIS + void *mempcpy(<[out]>, <[in]>, <[n]> + void *<[out]>; + void *<[in]>; + size_t <[n]>; + +DESCRIPTION + This function copies <[n]> bytes from the memory region + pointed to by <[in]> to the memory region pointed to by + <[out]>. + + If the regions overlap, the behavior is undefined. + +RETURNS + <> returns a pointer to the byte following the + last byte copied to the <[out]> region. + +PORTABILITY +<> is a GNU extension. + +<> requires no supporting OS subroutines. + + */ + +#include "config.h" +#include "_ansi.h" /* for _DEFUN */ +#include + +/* This may be conjoined with memcpy in /memcpy.S to get it nearly for + free */ + +_PTR +_DEFUN (mempcpy, (dst0, src0, len0), + _PTR dst0 _AND + _CONST _PTR src0 _AND + size_t len0) +{ + return memcpy(dst0, src0, len0) + len0; +} diff --git a/firmware/asm/mips/memcpy.S b/firmware/asm/mips/memcpy.S index edbf5ac5eb..ec1625bb4f 100644 --- a/firmware/asm/mips/memcpy.S +++ b/firmware/asm/mips/memcpy.S @@ -43,8 +43,16 @@ .global memcpy .type memcpy, %function + .global mempcpy + .type mempcpy, %function .set noreorder +mempcpy: + slti t0, a2, 8 # Less than 8? + bne t0, zero, last8 + addu v0, a0, a2 # exit value = s1 + n + b 1f + xor t0, a1, a0 # Find a0/a1 displacement (fill delay) memcpy: slti t0, a2, 8 # Less than 8? @@ -52,7 +60,8 @@ memcpy: move v0, a0 # Setup exit value before too late xor t0, a1, a0 # Find a0/a1 displacement - andi t0, 0x3 + +1: andi t0, 0x3 bne t0, zero, shift # Go handle the unaligned case subu t1, zero, a1 andi t1, 0x3 # a0/a1 are aligned, but are we diff --git a/firmware/asm/sh/memcpy.S b/firmware/asm/sh/memcpy.S index 59c5801ac0..3d623c48cd 100644 --- a/firmware/asm/sh/memcpy.S +++ b/firmware/asm/sh/memcpy.S @@ -24,8 +24,10 @@ .align 2 .global _memcpy + .global _mempcpy .global ___memcpy_fwd_entry .type _memcpy,@function + .type _mempcpy,@function /* Copies bytes of data in memory from to * This version is optimized for speed @@ -51,6 +53,10 @@ * The instruction order is devised in a way to utilize the pipelining * of the SH1 to the max. The routine also tries to utilize fast page mode. */ +_mempcpy: + mov r4,r7 /* store dest + length for returning */ + bra ___memcpy_fwd_entry + add r6,r7 _memcpy: mov r4,r7 /* store dest for returning */ @@ -217,3 +223,5 @@ ___memcpy_fwd_entry: mov r7,r0 /* return dest start address */ .end: .size _memcpy,.end-_memcpy + .size _mempcpy,.end-_mempcpy + -- cgit v1.2.3