From a035261089403de259e74ce4dd196e2715138ed2 Mon Sep 17 00:00:00 2001 From: Thomas Martitz Date: Sat, 7 Jan 2012 19:56:09 +0100 Subject: Move optimized memcpy and friends and strlen to firmware/asm, using the new automatic-asm-picking infrastructure. --- firmware/target/mips/memcpy-mips.S | 143 ---------------------- firmware/target/mips/memset-mips.S | 239 ------------------------------------- 2 files changed, 382 deletions(-) delete mode 100644 firmware/target/mips/memcpy-mips.S delete mode 100644 firmware/target/mips/memset-mips.S (limited to 'firmware/target/mips') diff --git a/firmware/target/mips/memcpy-mips.S b/firmware/target/mips/memcpy-mips.S deleted file mode 100644 index 2e7f245c69..0000000000 --- a/firmware/target/mips/memcpy-mips.S +++ /dev/null @@ -1,143 +0,0 @@ -/*************************************************************************** - * __________ __ ___. - * Open \______ \ ____ ____ | | _\_ |__ _______ ___ - * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / - * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < - * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ - * \/ \/ \/ \/ \/ - * $Id$ - * - * Copyright (C) 2002, 2003 Free Software Foundation, Inc. - * This file was originally part of the GNU C Library - * Contributed to glibc by Hartvig Ekner , 2002 - * Adapted for Rockbox by Maurus Cuelenaere, 2009 - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY - * KIND, either express or implied. - * - ****************************************************************************/ - -#include "config.h" -#include "mips.h" - -/* void *memcpy(void *s1, const void *s2, size_t n); */ - -#ifdef ROCKBOX_BIG_ENDIAN -# define LWHI lwl /* high part is left in big-endian */ -# define SWHI swl /* high part is left in big-endian */ -# define LWLO lwr /* low part is right in big-endian */ -# define SWLO swr /* low part is right in big-endian */ -#else -# define LWHI lwr /* high part is right in little-endian */ -# define SWHI swr /* high part is right in little-endian */ -# define LWLO lwl /* low part is left in little-endian */ -# define SWLO swl /* low part is left in little-endian */ -#endif - - .section .icode, "ax", %progbits - - .global memcpy - .type memcpy, %function - - .set noreorder - -memcpy: - slti t0, a2, 8 # Less than 8? - bne t0, zero, last8 - move v0, a0 # Setup exit value before too late - - xor t0, a1, a0 # Find a0/a1 displacement - andi t0, 0x3 - bne t0, zero, shift # Go handle the unaligned case - subu t1, zero, a1 - andi t1, 0x3 # a0/a1 are aligned, but are we - beq t1, zero, chk8w # starting in the middle of a word? - subu a2, t1 - LWHI t0, 0(a1) # Yes we are... take care of that - addu a1, t1 - SWHI t0, 0(a0) - addu a0, t1 - -chk8w: - andi t0, a2, 0x1f # 32 or more bytes left? - beq t0, a2, chk1w - subu a3, a2, t0 # Yes - addu a3, a1 # a3 = end address of loop - move a2, t0 # a2 = what will be left after loop -lop8w: - lw t0, 0(a1) # Loop taking 8 words at a time - lw t1, 4(a1) - lw t2, 8(a1) - lw t3, 12(a1) - lw t4, 16(a1) - lw t5, 20(a1) - lw t6, 24(a1) - lw t7, 28(a1) - addiu a0, 32 - addiu a1, 32 - sw t0, -32(a0) - sw t1, -28(a0) - sw t2, -24(a0) - sw t3, -20(a0) - sw t4, -16(a0) - sw t5, -12(a0) - sw t6, -8(a0) - bne a1, a3, lop8w - sw t7, -4(a0) - -chk1w: - andi t0, a2, 0x3 # 4 or more bytes left? - beq t0, a2, last8 - subu a3, a2, t0 # Yes, handle them one word at a time - addu a3, a1 # a3 again end address - move a2, t0 -lop1w: - lw t0, 0(a1) - addiu a0, 4 - addiu a1, 4 - bne a1, a3, lop1w - sw t0, -4(a0) - -last8: - blez a2, lst8e # Handle last 8 bytes, one at a time - addu a3, a2, a1 -lst8l: - lb t0, 0(a1) - addiu a0, 1 - addiu a1, 1 - bne a1, a3, lst8l - sb t0, -1(a0) -lst8e: - jr ra # Bye, bye - nop - -shift: - subu a3, zero, a0 # Src and Dest unaligned - andi a3, 0x3 # (unoptimized case...) - beq a3, zero, shft1 - subu a2, a3 # a2 = bytes left - LWHI t0, 0(a1) # Take care of first odd part - LWLO t0, 3(a1) - addu a1, a3 - SWHI t0, 0(a0) - addu a0, a3 -shft1: - andi t0, a2, 0x3 - subu a3, a2, t0 - addu a3, a1 -shfth: - LWHI t1, 0(a1) # Limp through, word by word - LWLO t1, 3(a1) - addiu a0, 4 - addiu a1, 4 - bne a1, a3, shfth - sw t1, -4(a0) - b last8 # Handle anything which may be left - move a2, t0 - - .set reorder diff --git a/firmware/target/mips/memset-mips.S b/firmware/target/mips/memset-mips.S deleted file mode 100644 index 8db76d9123..0000000000 --- a/firmware/target/mips/memset-mips.S +++ /dev/null @@ -1,239 +0,0 @@ -/*************************************************************************** - * __________ __ ___. - * Open \______ \ ____ ____ | | _\_ |__ _______ ___ - * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / - * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < - * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ - * \/ \/ \/ \/ \/ - * $Id$ - * - * This file was originally part of the Linux/MIPS GNU C Library - * Copyright (C) 1998 by Ralf Baechle - * Adapted for Rockbox by Maurus Cuelenaere, 2009 - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY - * KIND, either express or implied. - * - ****************************************************************************/ - -#include "config.h" -#include "mips.h" - -#define FILL256(dst, offset, val) \ - sw val, (offset + 0x00)(dst); \ - sw val, (offset + 0x04)(dst); \ - sw val, (offset + 0x08)(dst); \ - sw val, (offset + 0x0c)(dst); \ - sw val, (offset + 0x10)(dst); \ - sw val, (offset + 0x14)(dst); \ - sw val, (offset + 0x18)(dst); \ - sw val, (offset + 0x1c)(dst); \ - sw val, (offset + 0x20)(dst); \ - sw val, (offset + 0x24)(dst); \ - sw val, (offset + 0x28)(dst); \ - sw val, (offset + 0x2c)(dst); \ - sw val, (offset + 0x30)(dst); \ - sw val, (offset + 0x34)(dst); \ - sw val, (offset + 0x38)(dst); \ - sw val, (offset + 0x3c)(dst); \ - sw val, (offset + 0x40)(dst); \ - sw val, (offset + 0x44)(dst); \ - sw val, (offset + 0x48)(dst); \ - sw val, (offset + 0x4c)(dst); \ - sw val, (offset + 0x50)(dst); \ - sw val, (offset + 0x54)(dst); \ - sw val, (offset + 0x58)(dst); \ - sw val, (offset + 0x5c)(dst); \ - sw val, (offset + 0x60)(dst); \ - sw val, (offset + 0x64)(dst); \ - sw val, (offset + 0x68)(dst); \ - sw val, (offset + 0x6c)(dst); \ - sw val, (offset + 0x70)(dst); \ - sw val, (offset + 0x74)(dst); \ - sw val, (offset + 0x78)(dst); \ - sw val, (offset + 0x7c)(dst); \ - sw val, (offset + 0x80)(dst); \ - sw val, (offset + 0x84)(dst); \ - sw val, (offset + 0x88)(dst); \ - sw val, (offset + 0x8c)(dst); \ - sw val, (offset + 0x90)(dst); \ - sw val, (offset + 0x94)(dst); \ - sw val, (offset + 0x98)(dst); \ - sw val, (offset + 0x9c)(dst); \ - sw val, (offset + 0xa0)(dst); \ - sw val, (offset + 0xa4)(dst); \ - sw val, (offset + 0xa8)(dst); \ - sw val, (offset + 0xac)(dst); \ - sw val, (offset + 0xb0)(dst); \ - sw val, (offset + 0xb4)(dst); \ - sw val, (offset + 0xb8)(dst); \ - sw val, (offset + 0xbc)(dst); \ - sw val, (offset + 0xc0)(dst); \ - sw val, (offset + 0xc4)(dst); \ - sw val, (offset + 0xc8)(dst); \ - sw val, (offset + 0xcc)(dst); \ - sw val, (offset + 0xd0)(dst); \ - sw val, (offset + 0xd4)(dst); \ - sw val, (offset + 0xd8)(dst); \ - sw val, (offset + 0xdc)(dst); \ - sw val, (offset + 0xe0)(dst); \ - sw val, (offset + 0xe4)(dst); \ - sw val, (offset + 0xe8)(dst); \ - sw val, (offset + 0xec)(dst); \ - sw val, (offset + 0xf0)(dst); \ - sw val, (offset + 0xf4)(dst); \ - sw val, (offset + 0xf8)(dst); \ - sw val, (offset + 0xfc)(dst); - -#define FILL128(dst, offset, val) \ - sw val, (offset + 0x00)(dst); \ - sw val, (offset + 0x04)(dst); \ - sw val, (offset + 0x08)(dst); \ - sw val, (offset + 0x0c)(dst); \ - sw val, (offset + 0x10)(dst); \ - sw val, (offset + 0x14)(dst); \ - sw val, (offset + 0x18)(dst); \ - sw val, (offset + 0x1c)(dst); \ - sw val, (offset + 0x20)(dst); \ - sw val, (offset + 0x24)(dst); \ - sw val, (offset + 0x28)(dst); \ - sw val, (offset + 0x2c)(dst); \ - sw val, (offset + 0x30)(dst); \ - sw val, (offset + 0x34)(dst); \ - sw val, (offset + 0x38)(dst); \ - sw val, (offset + 0x3c)(dst); \ - sw val, (offset + 0x40)(dst); \ - sw val, (offset + 0x44)(dst); \ - sw val, (offset + 0x48)(dst); \ - sw val, (offset + 0x4c)(dst); \ - sw val, (offset + 0x50)(dst); \ - sw val, (offset + 0x54)(dst); \ - sw val, (offset + 0x58)(dst); \ - sw val, (offset + 0x5c)(dst); \ - sw val, (offset + 0x60)(dst); \ - sw val, (offset + 0x64)(dst); \ - sw val, (offset + 0x68)(dst); \ - sw val, (offset + 0x6c)(dst); \ - sw val, (offset + 0x70)(dst); \ - sw val, (offset + 0x74)(dst); \ - sw val, (offset + 0x78)(dst); \ - sw val, (offset + 0x7c)(dst); - -#define FILL64(dst, offset, val) \ - sw val, (offset + 0x00)(dst); \ - sw val, (offset + 0x04)(dst); \ - sw val, (offset + 0x08)(dst); \ - sw val, (offset + 0x0c)(dst); \ - sw val, (offset + 0x10)(dst); \ - sw val, (offset + 0x14)(dst); \ - sw val, (offset + 0x18)(dst); \ - sw val, (offset + 0x1c)(dst); \ - sw val, (offset + 0x20)(dst); \ - sw val, (offset + 0x24)(dst); \ - sw val, (offset + 0x28)(dst); \ - sw val, (offset + 0x2c)(dst); \ - sw val, (offset + 0x30)(dst); \ - sw val, (offset + 0x34)(dst); \ - sw val, (offset + 0x38)(dst); \ - sw val, (offset + 0x3c)(dst); - -#define FILL32(dst, offset, val) \ - sw val, (offset + 0x00)(dst); \ - sw val, (offset + 0x04)(dst); \ - sw val, (offset + 0x08)(dst); \ - sw val, (offset + 0x0c)(dst); \ - sw val, (offset + 0x10)(dst); \ - sw val, (offset + 0x14)(dst); \ - sw val, (offset + 0x18)(dst); \ - sw val, (offset + 0x1c)(dst); - -#define FILL 64 -#define F_FILL FILL64 - - -#ifdef ROCKBOX_BIG_ENDIAN -# define SWHI swl /* high part is left in big-endian */ -#else -# define SWHI swr /* high part is right in little-endian */ -#endif - -/* - * memset(void *s, int c, size_t n) - * - * a0: start of area to clear - * a1: char to fill with - * a2: size of area to clear - */ - .section .icode, "ax", %progbits - - .global memset - .type memset, %function - - .set noreorder - .align 5 -memset: - beqz a1, 1f - move v0, a0 /* result */ - - andi a1, 0xff /* spread fillword */ - sll t1, a1, 8 - or a1, t1 - sll t1, a1, 16 - or a1, t1 -1: - - sltiu t0, a2, 4 /* very small region? */ - bnez t0, small_memset - andi t0, a0, 3 /* aligned? */ - - beqz t0, 1f - subu t0, 4 /* alignment in bytes */ - - SWHI a1, (a0) /* make word aligned */ - subu a0, t0 /* word align ptr */ - addu a2, t0 /* correct size */ - -1: ori t1, a2, (FILL-1) /* # of full blocks */ - xori t1, (FILL-1) - beqz t1, memset_partial /* no block to fill */ - andi t0, a2, (FILL-4) - - addu t1, a0 /* end address */ - .set reorder -1: addiu a0, FILL - F_FILL( a0, -FILL, a1 ) - bne t1, a0, 1b - .set noreorder - -memset_partial: - la t1, 2f /* where to start */ - subu t1, t0 - jr t1 - addu a0, t0 /* dest ptr */ - - F_FILL( a0, -FILL, a1 ) /* ... but first do words ... */ -2: andi a2, 3 /* 0 <= n <= 3 to go */ - - beqz a2, 1f - addu a0, a2 /* What's left */ - SWHI a1, -1(a0) -1: jr ra - move a2, zero - -small_memset: - beqz a2, 2f - addu t1, a0, a2 - -1: addiu a0, 1 /* fill bytewise */ - bne t1, a0, 1b - sb a1, -1(a0) - -2: jr ra /* done */ - move a2, zero - - .set reorder -- cgit v1.2.3