From 9b13a5d151a14ba7a5b8c502763cb56356260ceb Mon Sep 17 00:00:00 2001 From: Maurus Cuelenaere Date: Wed, 4 Feb 2009 17:33:19 +0000 Subject: MIPS: * Add assembly optimised variants for memcpy, memset and find_first_set_bit * Add option to map_address in MMU to set caching algorithm git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19920 a1c6a512-1295-4272-9138-f99709370657 --- firmware/SOURCES | 10 +- firmware/target/mips/ffs-mips.S | 54 +++++++++ firmware/target/mips/memcpy-mips.S | 143 ++++++++++++++++++++++ firmware/target/mips/memset-mips.S | 239 +++++++++++++++++++++++++++++++++++++ firmware/target/mips/mmu-mips.c | 11 +- 5 files changed, 446 insertions(+), 11 deletions(-) create mode 100644 firmware/target/mips/ffs-mips.S create mode 100644 firmware/target/mips/memcpy-mips.S create mode 100644 firmware/target/mips/memset-mips.S diff --git a/firmware/SOURCES b/firmware/SOURCES index 46c46c2ce2..c08d8f2d66 100644 --- a/firmware/SOURCES +++ b/firmware/SOURCES @@ -400,15 +400,13 @@ target/arm/crt0.S #elif defined(CPU_MIPS) #undef mips -/*target/mips/memcpy.S -target/mips/memset.S -common/memset16.c -target/mips/strlen.S*/ -common/memcpy.c +/*target/mips/strlen.S*/ common/memmove.c -common/memset.c common/memset16.c common/strlen.c +target/mips/ffs-mips.S +target/mips/memcpy-mips.S +target/mips/memset-mips.S target/mips/mmu-mips.c #if CONFIG_CPU==JZ4732 target/mips/ingenic_jz47xx/crt0.S diff --git a/firmware/target/mips/ffs-mips.S b/firmware/target/mips/ffs-mips.S new file mode 100644 index 0000000000..4f798720a5 --- /dev/null +++ b/firmware/target/mips/ffs-mips.S @@ -0,0 +1,54 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2009 by Maurus Cuelenaere + * based on ffs-arm.S by Michael Sevakis + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "mips.h" + +/**************************************************************************** + * int find_first_set_bit(uint32_t val); + * + * Find the index of the least significant set bit in the 32-bit word. + * + * return values: + * 0 - bit 0 is set + * 1 - bit 1 is set + * ... + * 31 - bit 31 is set + * 32 - no bits set + ****************************************************************************/ + .align 2 + .global find_first_set_bit + .type find_first_set_bit, %function + +find_first_set_bit: + beqz a0, no_bits_set # If val == 0 branch to no_bits_set + + clz v0, a0 # Get lead 0's count + li t0, 31 # t0 = 31 + sub v0, t0, v0 # Return value = t0 - v0 + jr ra # Return + nop + +no_bits_set: + li v0, 32 # Return value = 32 + jr ra # Return + nop diff --git a/firmware/target/mips/memcpy-mips.S b/firmware/target/mips/memcpy-mips.S new file mode 100644 index 0000000000..2e7f245c69 --- /dev/null +++ b/firmware/target/mips/memcpy-mips.S @@ -0,0 +1,143 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2002, 2003 Free Software Foundation, Inc. + * This file was originally part of the GNU C Library + * Contributed to glibc by Hartvig Ekner , 2002 + * Adapted for Rockbox by Maurus Cuelenaere, 2009 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "mips.h" + +/* void *memcpy(void *s1, const void *s2, size_t n); */ + +#ifdef ROCKBOX_BIG_ENDIAN +# define LWHI lwl /* high part is left in big-endian */ +# define SWHI swl /* high part is left in big-endian */ +# define LWLO lwr /* low part is right in big-endian */ +# define SWLO swr /* low part is right in big-endian */ +#else +# define LWHI lwr /* high part is right in little-endian */ +# define SWHI swr /* high part is right in little-endian */ +# define LWLO lwl /* low part is left in little-endian */ +# define SWLO swl /* low part is left in little-endian */ +#endif + + .section .icode, "ax", %progbits + + .global memcpy + .type memcpy, %function + + .set noreorder + +memcpy: + slti t0, a2, 8 # Less than 8? + bne t0, zero, last8 + move v0, a0 # Setup exit value before too late + + xor t0, a1, a0 # Find a0/a1 displacement + andi t0, 0x3 + bne t0, zero, shift # Go handle the unaligned case + subu t1, zero, a1 + andi t1, 0x3 # a0/a1 are aligned, but are we + beq t1, zero, chk8w # starting in the middle of a word? + subu a2, t1 + LWHI t0, 0(a1) # Yes we are... take care of that + addu a1, t1 + SWHI t0, 0(a0) + addu a0, t1 + +chk8w: + andi t0, a2, 0x1f # 32 or more bytes left? + beq t0, a2, chk1w + subu a3, a2, t0 # Yes + addu a3, a1 # a3 = end address of loop + move a2, t0 # a2 = what will be left after loop +lop8w: + lw t0, 0(a1) # Loop taking 8 words at a time + lw t1, 4(a1) + lw t2, 8(a1) + lw t3, 12(a1) + lw t4, 16(a1) + lw t5, 20(a1) + lw t6, 24(a1) + lw t7, 28(a1) + addiu a0, 32 + addiu a1, 32 + sw t0, -32(a0) + sw t1, -28(a0) + sw t2, -24(a0) + sw t3, -20(a0) + sw t4, -16(a0) + sw t5, -12(a0) + sw t6, -8(a0) + bne a1, a3, lop8w + sw t7, -4(a0) + +chk1w: + andi t0, a2, 0x3 # 4 or more bytes left? + beq t0, a2, last8 + subu a3, a2, t0 # Yes, handle them one word at a time + addu a3, a1 # a3 again end address + move a2, t0 +lop1w: + lw t0, 0(a1) + addiu a0, 4 + addiu a1, 4 + bne a1, a3, lop1w + sw t0, -4(a0) + +last8: + blez a2, lst8e # Handle last 8 bytes, one at a time + addu a3, a2, a1 +lst8l: + lb t0, 0(a1) + addiu a0, 1 + addiu a1, 1 + bne a1, a3, lst8l + sb t0, -1(a0) +lst8e: + jr ra # Bye, bye + nop + +shift: + subu a3, zero, a0 # Src and Dest unaligned + andi a3, 0x3 # (unoptimized case...) + beq a3, zero, shft1 + subu a2, a3 # a2 = bytes left + LWHI t0, 0(a1) # Take care of first odd part + LWLO t0, 3(a1) + addu a1, a3 + SWHI t0, 0(a0) + addu a0, a3 +shft1: + andi t0, a2, 0x3 + subu a3, a2, t0 + addu a3, a1 +shfth: + LWHI t1, 0(a1) # Limp through, word by word + LWLO t1, 3(a1) + addiu a0, 4 + addiu a1, 4 + bne a1, a3, shfth + sw t1, -4(a0) + b last8 # Handle anything which may be left + move a2, t0 + + .set reorder diff --git a/firmware/target/mips/memset-mips.S b/firmware/target/mips/memset-mips.S new file mode 100644 index 0000000000..80103385f5 --- /dev/null +++ b/firmware/target/mips/memset-mips.S @@ -0,0 +1,239 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * This file was originally part of the Linux/MIPS GNU C Library + * Copyright (C) 1998 by Ralf Baechle + * Adapted for Rockbox by Maurus Cuelenaere, 2009 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "mips.h" + +#define FILL256(dst, offset, val) \ + sw val, (offset + 0x00)(dst); \ + sw val, (offset + 0x04)(dst); \ + sw val, (offset + 0x08)(dst); \ + sw val, (offset + 0x0c)(dst); \ + sw val, (offset + 0x10)(dst); \ + sw val, (offset + 0x14)(dst); \ + sw val, (offset + 0x18)(dst); \ + sw val, (offset + 0x1c)(dst); \ + sw val, (offset + 0x20)(dst); \ + sw val, (offset + 0x24)(dst); \ + sw val, (offset + 0x28)(dst); \ + sw val, (offset + 0x2c)(dst); \ + sw val, (offset + 0x30)(dst); \ + sw val, (offset + 0x34)(dst); \ + sw val, (offset + 0x38)(dst); \ + sw val, (offset + 0x3c)(dst); \ + sw val, (offset + 0x40)(dst); \ + sw val, (offset + 0x44)(dst); \ + sw val, (offset + 0x48)(dst); \ + sw val, (offset + 0x4c)(dst); \ + sw val, (offset + 0x50)(dst); \ + sw val, (offset + 0x54)(dst); \ + sw val, (offset + 0x58)(dst); \ + sw val, (offset + 0x5c)(dst); \ + sw val, (offset + 0x60)(dst); \ + sw val, (offset + 0x64)(dst); \ + sw val, (offset + 0x68)(dst); \ + sw val, (offset + 0x6c)(dst); \ + sw val, (offset + 0x70)(dst); \ + sw val, (offset + 0x74)(dst); \ + sw val, (offset + 0x78)(dst); \ + sw val, (offset + 0x7c)(dst); \ + sw val, (offset + 0x80)(dst); \ + sw val, (offset + 0x84)(dst); \ + sw val, (offset + 0x88)(dst); \ + sw val, (offset + 0x8c)(dst); \ + sw val, (offset + 0x90)(dst); \ + sw val, (offset + 0x94)(dst); \ + sw val, (offset + 0x98)(dst); \ + sw val, (offset + 0x9c)(dst); \ + sw val, (offset + 0xa0)(dst); \ + sw val, (offset + 0xa4)(dst); \ + sw val, (offset + 0xa8)(dst); \ + sw val, (offset + 0xac)(dst); \ + sw val, (offset + 0xb0)(dst); \ + sw val, (offset + 0xb4)(dst); \ + sw val, (offset + 0xb8)(dst); \ + sw val, (offset + 0xbc)(dst); \ + sw val, (offset + 0xc0)(dst); \ + sw val, (offset + 0xc4)(dst); \ + sw val, (offset + 0xc8)(dst); \ + sw val, (offset + 0xcc)(dst); \ + sw val, (offset + 0xd0)(dst); \ + sw val, (offset + 0xd4)(dst); \ + sw val, (offset + 0xd8)(dst); \ + sw val, (offset + 0xdc)(dst); \ + sw val, (offset + 0xe0)(dst); \ + sw val, (offset + 0xe4)(dst); \ + sw val, (offset + 0xe8)(dst); \ + sw val, (offset + 0xec)(dst); \ + sw val, (offset + 0xf0)(dst); \ + sw val, (offset + 0xf4)(dst); \ + sw val, (offset + 0xf8)(dst); \ + sw val, (offset + 0xfc)(dst); + +#define FILL128(dst, offset, val) \ + sw val, (offset + 0x00)(dst); \ + sw val, (offset + 0x04)(dst); \ + sw val, (offset + 0x08)(dst); \ + sw val, (offset + 0x0c)(dst); \ + sw val, (offset + 0x10)(dst); \ + sw val, (offset + 0x14)(dst); \ + sw val, (offset + 0x18)(dst); \ + sw val, (offset + 0x1c)(dst); \ + sw val, (offset + 0x20)(dst); \ + sw val, (offset + 0x24)(dst); \ + sw val, (offset + 0x28)(dst); \ + sw val, (offset + 0x2c)(dst); \ + sw val, (offset + 0x30)(dst); \ + sw val, (offset + 0x34)(dst); \ + sw val, (offset + 0x38)(dst); \ + sw val, (offset + 0x3c)(dst); \ + sw val, (offset + 0x40)(dst); \ + sw val, (offset + 0x44)(dst); \ + sw val, (offset + 0x48)(dst); \ + sw val, (offset + 0x4c)(dst); \ + sw val, (offset + 0x50)(dst); \ + sw val, (offset + 0x54)(dst); \ + sw val, (offset + 0x58)(dst); \ + sw val, (offset + 0x5c)(dst); \ + sw val, (offset + 0x60)(dst); \ + sw val, (offset + 0x64)(dst); \ + sw val, (offset + 0x68)(dst); \ + sw val, (offset + 0x6c)(dst); \ + sw val, (offset + 0x70)(dst); \ + sw val, (offset + 0x74)(dst); \ + sw val, (offset + 0x78)(dst); \ + sw val, (offset + 0x7c)(dst); + +#define FILL64(dst, offset, val) \ + sw val, (offset + 0x00)(dst); \ + sw val, (offset + 0x04)(dst); \ + sw val, (offset + 0x08)(dst); \ + sw val, (offset + 0x0c)(dst); \ + sw val, (offset + 0x10)(dst); \ + sw val, (offset + 0x14)(dst); \ + sw val, (offset + 0x18)(dst); \ + sw val, (offset + 0x1c)(dst); \ + sw val, (offset + 0x20)(dst); \ + sw val, (offset + 0x24)(dst); \ + sw val, (offset + 0x28)(dst); \ + sw val, (offset + 0x2c)(dst); \ + sw val, (offset + 0x30)(dst); \ + sw val, (offset + 0x34)(dst); \ + sw val, (offset + 0x38)(dst); \ + sw val, (offset + 0x3c)(dst); + +#define FILL32(dst, offset, val) \ + sw val, (offset + 0x00)(dst); \ + sw val, (offset + 0x04)(dst); \ + sw val, (offset + 0x08)(dst); \ + sw val, (offset + 0x0c)(dst); \ + sw val, (offset + 0x10)(dst); \ + sw val, (offset + 0x14)(dst); \ + sw val, (offset + 0x18)(dst); \ + sw val, (offset + 0x1c)(dst); + +#define FILL 32 +#define F_FILL FILL32 + + +#ifdef ROCKBOX_BIG_ENDIAN +# define SWHI swl /* high part is left in big-endian */ +#else +# define SWHI swr /* high part is right in little-endian */ +#endif + +/* + * memset(void *s, int c, size_t n) + * + * a0: start of area to clear + * a1: char to fill with + * a2: size of area to clear + */ + .section .icode, "ax", %progbits + + .global memset + .type memset, %function + + .set noreorder + .align 5 +memset: + beqz a1, 1f + move v0, a0 /* result */ + + andi a1, 0xff /* spread fillword */ + sll t1, a1, 8 + or a1, t1 + sll t1, a1, 16 + or a1, t1 +1: + + sltiu t0, a2, 4 /* very small region? */ + bnez t0, small_memset + andi t0, a0, 3 /* aligned? */ + + beqz t0, 1f + subu t0, 4 /* alignment in bytes */ + + SWHI a1, (a0) /* make word aligned */ + subu a0, t0 /* word align ptr */ + addu a2, t0 /* correct size */ + +1: ori t1, a2, (FILL-1) /* # of full blocks */ + xori t1, (FILL-1) + beqz t1, memset_partial /* no block to fill */ + andi t0, a2, (FILL-4) + + addu t1, a0 /* end address */ + .set reorder +1: addiu a0, FILL + F_FILL( a0, -FILL, a1 ) + bne t1, a0, 1b + .set noreorder + +memset_partial: + la t1, 2f /* where to start */ + subu t1, t0 + jr t1 + addu a0, t0 /* dest ptr */ + + F_FILL( a0, -FILL, a1 ) /* ... but first do words ... */ +2: andi a2, 3 /* 0 <= n <= 3 to go */ + + beqz a2, 1f + addu a0, a2 /* What's left */ + SWHI a1, -1(a0) +1: jr ra + move a2, zero + +small_memset: + beqz a2, 2f + addu t1, a0, a2 + +1: addiu a0, 1 /* fill bytewise */ + bne t1, a0, 1b + sb a1, -1(a0) + +2: jr ra /* done */ + move a2, zero + + .set reorder diff --git a/firmware/target/mips/mmu-mips.c b/firmware/target/mips/mmu-mips.c index 3c1b932325..570b209e3a 100644 --- a/firmware/target/mips/mmu-mips.c +++ b/firmware/target/mips/mmu-mips.c @@ -99,14 +99,15 @@ static void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, restore_irq(old_irq); } -void map_address(unsigned long virtual, unsigned long physical, unsigned long length) +void map_address(unsigned long virtual, unsigned long physical, + unsigned long length, unsigned int cache_flags) { unsigned long entry0 = (physical & PFN_MASK) << PFN_SHIFT; unsigned long entry1 = ((physical+length) & PFN_MASK) << PFN_SHIFT; unsigned long entryhi = virtual & ~VPN2_SHIFT; - entry0 |= (M_EntryLoG | M_EntryLoV | (K_CacheAttrC << S_EntryLoC) ); - entry1 |= (M_EntryLoG | M_EntryLoV | (K_CacheAttrC << S_EntryLoC) ); + entry0 |= (M_EntryLoG | M_EntryLoV | (cache_flags << S_EntryLoC) ); + entry1 |= (M_EntryLoG | M_EntryLoV | (cache_flags << S_EntryLoC) ); add_wired_entry(entry0, entry1, entryhi, DEFAULT_PAGE_MASK); } @@ -119,7 +120,7 @@ void tlb_init(void) local_flush_tlb_all(); /* - map_address(0x80000000, 0x80000000, 0x4000); - map_address(0x80004000, 0x80004000, MEM * 0x100000); + map_address(0x80000000, 0x80000000, 0x4000, K_CacheAttrC); + map_address(0x80004000, 0x80004000, MEM * 0x100000, K_CacheAttrC); */ } -- cgit v1.2.3