summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaurus Cuelenaere <mcuelenaere@gmail.com>2009-02-04 17:33:19 +0000
committerMaurus Cuelenaere <mcuelenaere@gmail.com>2009-02-04 17:33:19 +0000
commit9b13a5d151a14ba7a5b8c502763cb56356260ceb (patch)
tree854f056a73a8eceb06ce08a3a74de75121b37350
parent01bd736e000856ded49023ccdd4ed62b96f300ff (diff)
downloadrockbox-9b13a5d151a14ba7a5b8c502763cb56356260ceb.tar.gz
rockbox-9b13a5d151a14ba7a5b8c502763cb56356260ceb.zip
MIPS:
* Add assembly optimised variants for memcpy, memset and find_first_set_bit * Add option to map_address in MMU to set caching algorithm git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19920 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--firmware/SOURCES10
-rw-r--r--firmware/target/mips/ffs-mips.S54
-rw-r--r--firmware/target/mips/memcpy-mips.S143
-rw-r--r--firmware/target/mips/memset-mips.S239
-rw-r--r--firmware/target/mips/mmu-mips.c11
5 files changed, 446 insertions, 11 deletions
diff --git a/firmware/SOURCES b/firmware/SOURCES
index 46c46c2ce2..c08d8f2d66 100644
--- a/firmware/SOURCES
+++ b/firmware/SOURCES
@@ -400,15 +400,13 @@ target/arm/crt0.S
400 400
401#elif defined(CPU_MIPS) 401#elif defined(CPU_MIPS)
402#undef mips 402#undef mips
403/*target/mips/memcpy.S 403/*target/mips/strlen.S*/
404target/mips/memset.S
405common/memset16.c
406target/mips/strlen.S*/
407common/memcpy.c
408common/memmove.c 404common/memmove.c
409common/memset.c
410common/memset16.c 405common/memset16.c
411common/strlen.c 406common/strlen.c
407target/mips/ffs-mips.S
408target/mips/memcpy-mips.S
409target/mips/memset-mips.S
412target/mips/mmu-mips.c 410target/mips/mmu-mips.c
413#if CONFIG_CPU==JZ4732 411#if CONFIG_CPU==JZ4732
414target/mips/ingenic_jz47xx/crt0.S 412target/mips/ingenic_jz47xx/crt0.S
diff --git a/firmware/target/mips/ffs-mips.S b/firmware/target/mips/ffs-mips.S
new file mode 100644
index 0000000000..4f798720a5
--- /dev/null
+++ b/firmware/target/mips/ffs-mips.S
@@ -0,0 +1,54 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2009 by Maurus Cuelenaere
11 * based on ffs-arm.S by Michael Sevakis
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version 2
16 * of the License, or (at your option) any later version.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ****************************************************************************/
22
23#include "config.h"
24#include "mips.h"
25
26/****************************************************************************
27 * int find_first_set_bit(uint32_t val);
28 *
29 * Find the index of the least significant set bit in the 32-bit word.
30 *
31 * return values:
32 * 0 - bit 0 is set
33 * 1 - bit 1 is set
34 * ...
35 * 31 - bit 31 is set
36 * 32 - no bits set
37 ****************************************************************************/
38 .align 2
39 .global find_first_set_bit
40 .type find_first_set_bit, %function
41
42find_first_set_bit:
43 beqz a0, no_bits_set # If val == 0 branch to no_bits_set
44
45 clz v0, a0 # Get lead 0's count
46 li t0, 31 # t0 = 31
47 sub v0, t0, v0 # Return value = t0 - v0
48 jr ra # Return
49 nop
50
51no_bits_set:
52 li v0, 32 # Return value = 32
53 jr ra # Return
54 nop
diff --git a/firmware/target/mips/memcpy-mips.S b/firmware/target/mips/memcpy-mips.S
new file mode 100644
index 0000000000..2e7f245c69
--- /dev/null
+++ b/firmware/target/mips/memcpy-mips.S
@@ -0,0 +1,143 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2002, 2003 Free Software Foundation, Inc.
11 * This file was originally part of the GNU C Library
12 * Contributed to glibc by Hartvig Ekner <hartvige@mips.com>, 2002
13 * Adapted for Rockbox by Maurus Cuelenaere, 2009
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
19 *
20 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
21 * KIND, either express or implied.
22 *
23 ****************************************************************************/
24
25#include "config.h"
26#include "mips.h"
27
28/* void *memcpy(void *s1, const void *s2, size_t n); */
29
30#ifdef ROCKBOX_BIG_ENDIAN
31# define LWHI lwl /* high part is left in big-endian */
32# define SWHI swl /* high part is left in big-endian */
33# define LWLO lwr /* low part is right in big-endian */
34# define SWLO swr /* low part is right in big-endian */
35#else
36# define LWHI lwr /* high part is right in little-endian */
37# define SWHI swr /* high part is right in little-endian */
38# define LWLO lwl /* low part is left in little-endian */
39# define SWLO swl /* low part is left in little-endian */
40#endif
41
42 .section .icode, "ax", %progbits
43
44 .global memcpy
45 .type memcpy, %function
46
47 .set noreorder
48
49memcpy:
50 slti t0, a2, 8 # Less than 8?
51 bne t0, zero, last8
52 move v0, a0 # Setup exit value before too late
53
54 xor t0, a1, a0 # Find a0/a1 displacement
55 andi t0, 0x3
56 bne t0, zero, shift # Go handle the unaligned case
57 subu t1, zero, a1
58 andi t1, 0x3 # a0/a1 are aligned, but are we
59 beq t1, zero, chk8w # starting in the middle of a word?
60 subu a2, t1
61 LWHI t0, 0(a1) # Yes we are... take care of that
62 addu a1, t1
63 SWHI t0, 0(a0)
64 addu a0, t1
65
66chk8w:
67 andi t0, a2, 0x1f # 32 or more bytes left?
68 beq t0, a2, chk1w
69 subu a3, a2, t0 # Yes
70 addu a3, a1 # a3 = end address of loop
71 move a2, t0 # a2 = what will be left after loop
72lop8w:
73 lw t0, 0(a1) # Loop taking 8 words at a time
74 lw t1, 4(a1)
75 lw t2, 8(a1)
76 lw t3, 12(a1)
77 lw t4, 16(a1)
78 lw t5, 20(a1)
79 lw t6, 24(a1)
80 lw t7, 28(a1)
81 addiu a0, 32
82 addiu a1, 32
83 sw t0, -32(a0)
84 sw t1, -28(a0)
85 sw t2, -24(a0)
86 sw t3, -20(a0)
87 sw t4, -16(a0)
88 sw t5, -12(a0)
89 sw t6, -8(a0)
90 bne a1, a3, lop8w
91 sw t7, -4(a0)
92
93chk1w:
94 andi t0, a2, 0x3 # 4 or more bytes left?
95 beq t0, a2, last8
96 subu a3, a2, t0 # Yes, handle them one word at a time
97 addu a3, a1 # a3 again end address
98 move a2, t0
99lop1w:
100 lw t0, 0(a1)
101 addiu a0, 4
102 addiu a1, 4
103 bne a1, a3, lop1w
104 sw t0, -4(a0)
105
106last8:
107 blez a2, lst8e # Handle last 8 bytes, one at a time
108 addu a3, a2, a1
109lst8l:
110 lb t0, 0(a1)
111 addiu a0, 1
112 addiu a1, 1
113 bne a1, a3, lst8l
114 sb t0, -1(a0)
115lst8e:
116 jr ra # Bye, bye
117 nop
118
119shift:
120 subu a3, zero, a0 # Src and Dest unaligned
121 andi a3, 0x3 # (unoptimized case...)
122 beq a3, zero, shft1
123 subu a2, a3 # a2 = bytes left
124 LWHI t0, 0(a1) # Take care of first odd part
125 LWLO t0, 3(a1)
126 addu a1, a3
127 SWHI t0, 0(a0)
128 addu a0, a3
129shft1:
130 andi t0, a2, 0x3
131 subu a3, a2, t0
132 addu a3, a1
133shfth:
134 LWHI t1, 0(a1) # Limp through, word by word
135 LWLO t1, 3(a1)
136 addiu a0, 4
137 addiu a1, 4
138 bne a1, a3, shfth
139 sw t1, -4(a0)
140 b last8 # Handle anything which may be left
141 move a2, t0
142
143 .set reorder
diff --git a/firmware/target/mips/memset-mips.S b/firmware/target/mips/memset-mips.S
new file mode 100644
index 0000000000..80103385f5
--- /dev/null
+++ b/firmware/target/mips/memset-mips.S
@@ -0,0 +1,239 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * This file was originally part of the Linux/MIPS GNU C Library
11 * Copyright (C) 1998 by Ralf Baechle
12 * Adapted for Rockbox by Maurus Cuelenaere, 2009
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
18 *
19 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20 * KIND, either express or implied.
21 *
22 ****************************************************************************/
23
24#include "config.h"
25#include "mips.h"
26
27#define FILL256(dst, offset, val) \
28 sw val, (offset + 0x00)(dst); \
29 sw val, (offset + 0x04)(dst); \
30 sw val, (offset + 0x08)(dst); \
31 sw val, (offset + 0x0c)(dst); \
32 sw val, (offset + 0x10)(dst); \
33 sw val, (offset + 0x14)(dst); \
34 sw val, (offset + 0x18)(dst); \
35 sw val, (offset + 0x1c)(dst); \
36 sw val, (offset + 0x20)(dst); \
37 sw val, (offset + 0x24)(dst); \
38 sw val, (offset + 0x28)(dst); \
39 sw val, (offset + 0x2c)(dst); \
40 sw val, (offset + 0x30)(dst); \
41 sw val, (offset + 0x34)(dst); \
42 sw val, (offset + 0x38)(dst); \
43 sw val, (offset + 0x3c)(dst); \
44 sw val, (offset + 0x40)(dst); \
45 sw val, (offset + 0x44)(dst); \
46 sw val, (offset + 0x48)(dst); \
47 sw val, (offset + 0x4c)(dst); \
48 sw val, (offset + 0x50)(dst); \
49 sw val, (offset + 0x54)(dst); \
50 sw val, (offset + 0x58)(dst); \
51 sw val, (offset + 0x5c)(dst); \
52 sw val, (offset + 0x60)(dst); \
53 sw val, (offset + 0x64)(dst); \
54 sw val, (offset + 0x68)(dst); \
55 sw val, (offset + 0x6c)(dst); \
56 sw val, (offset + 0x70)(dst); \
57 sw val, (offset + 0x74)(dst); \
58 sw val, (offset + 0x78)(dst); \
59 sw val, (offset + 0x7c)(dst); \
60 sw val, (offset + 0x80)(dst); \
61 sw val, (offset + 0x84)(dst); \
62 sw val, (offset + 0x88)(dst); \
63 sw val, (offset + 0x8c)(dst); \
64 sw val, (offset + 0x90)(dst); \
65 sw val, (offset + 0x94)(dst); \
66 sw val, (offset + 0x98)(dst); \
67 sw val, (offset + 0x9c)(dst); \
68 sw val, (offset + 0xa0)(dst); \
69 sw val, (offset + 0xa4)(dst); \
70 sw val, (offset + 0xa8)(dst); \
71 sw val, (offset + 0xac)(dst); \
72 sw val, (offset + 0xb0)(dst); \
73 sw val, (offset + 0xb4)(dst); \
74 sw val, (offset + 0xb8)(dst); \
75 sw val, (offset + 0xbc)(dst); \
76 sw val, (offset + 0xc0)(dst); \
77 sw val, (offset + 0xc4)(dst); \
78 sw val, (offset + 0xc8)(dst); \
79 sw val, (offset + 0xcc)(dst); \
80 sw val, (offset + 0xd0)(dst); \
81 sw val, (offset + 0xd4)(dst); \
82 sw val, (offset + 0xd8)(dst); \
83 sw val, (offset + 0xdc)(dst); \
84 sw val, (offset + 0xe0)(dst); \
85 sw val, (offset + 0xe4)(dst); \
86 sw val, (offset + 0xe8)(dst); \
87 sw val, (offset + 0xec)(dst); \
88 sw val, (offset + 0xf0)(dst); \
89 sw val, (offset + 0xf4)(dst); \
90 sw val, (offset + 0xf8)(dst); \
91 sw val, (offset + 0xfc)(dst);
92
93#define FILL128(dst, offset, val) \
94 sw val, (offset + 0x00)(dst); \
95 sw val, (offset + 0x04)(dst); \
96 sw val, (offset + 0x08)(dst); \
97 sw val, (offset + 0x0c)(dst); \
98 sw val, (offset + 0x10)(dst); \
99 sw val, (offset + 0x14)(dst); \
100 sw val, (offset + 0x18)(dst); \
101 sw val, (offset + 0x1c)(dst); \
102 sw val, (offset + 0x20)(dst); \
103 sw val, (offset + 0x24)(dst); \
104 sw val, (offset + 0x28)(dst); \
105 sw val, (offset + 0x2c)(dst); \
106 sw val, (offset + 0x30)(dst); \
107 sw val, (offset + 0x34)(dst); \
108 sw val, (offset + 0x38)(dst); \
109 sw val, (offset + 0x3c)(dst); \
110 sw val, (offset + 0x40)(dst); \
111 sw val, (offset + 0x44)(dst); \
112 sw val, (offset + 0x48)(dst); \
113 sw val, (offset + 0x4c)(dst); \
114 sw val, (offset + 0x50)(dst); \
115 sw val, (offset + 0x54)(dst); \
116 sw val, (offset + 0x58)(dst); \
117 sw val, (offset + 0x5c)(dst); \
118 sw val, (offset + 0x60)(dst); \
119 sw val, (offset + 0x64)(dst); \
120 sw val, (offset + 0x68)(dst); \
121 sw val, (offset + 0x6c)(dst); \
122 sw val, (offset + 0x70)(dst); \
123 sw val, (offset + 0x74)(dst); \
124 sw val, (offset + 0x78)(dst); \
125 sw val, (offset + 0x7c)(dst);
126
127#define FILL64(dst, offset, val) \
128 sw val, (offset + 0x00)(dst); \
129 sw val, (offset + 0x04)(dst); \
130 sw val, (offset + 0x08)(dst); \
131 sw val, (offset + 0x0c)(dst); \
132 sw val, (offset + 0x10)(dst); \
133 sw val, (offset + 0x14)(dst); \
134 sw val, (offset + 0x18)(dst); \
135 sw val, (offset + 0x1c)(dst); \
136 sw val, (offset + 0x20)(dst); \
137 sw val, (offset + 0x24)(dst); \
138 sw val, (offset + 0x28)(dst); \
139 sw val, (offset + 0x2c)(dst); \
140 sw val, (offset + 0x30)(dst); \
141 sw val, (offset + 0x34)(dst); \
142 sw val, (offset + 0x38)(dst); \
143 sw val, (offset + 0x3c)(dst);
144
145#define FILL32(dst, offset, val) \
146 sw val, (offset + 0x00)(dst); \
147 sw val, (offset + 0x04)(dst); \
148 sw val, (offset + 0x08)(dst); \
149 sw val, (offset + 0x0c)(dst); \
150 sw val, (offset + 0x10)(dst); \
151 sw val, (offset + 0x14)(dst); \
152 sw val, (offset + 0x18)(dst); \
153 sw val, (offset + 0x1c)(dst);
154
155#define FILL 32
156#define F_FILL FILL32
157
158
159#ifdef ROCKBOX_BIG_ENDIAN
160# define SWHI swl /* high part is left in big-endian */
161#else
162# define SWHI swr /* high part is right in little-endian */
163#endif
164
165/*
166 * memset(void *s, int c, size_t n)
167 *
168 * a0: start of area to clear
169 * a1: char to fill with
170 * a2: size of area to clear
171 */
172 .section .icode, "ax", %progbits
173
174 .global memset
175 .type memset, %function
176
177 .set noreorder
178 .align 5
179memset:
180 beqz a1, 1f
181 move v0, a0 /* result */
182
183 andi a1, 0xff /* spread fillword */
184 sll t1, a1, 8
185 or a1, t1
186 sll t1, a1, 16
187 or a1, t1
1881:
189
190 sltiu t0, a2, 4 /* very small region? */
191 bnez t0, small_memset
192 andi t0, a0, 3 /* aligned? */
193
194 beqz t0, 1f
195 subu t0, 4 /* alignment in bytes */
196
197 SWHI a1, (a0) /* make word aligned */
198 subu a0, t0 /* word align ptr */
199 addu a2, t0 /* correct size */
200
2011: ori t1, a2, (FILL-1) /* # of full blocks */
202 xori t1, (FILL-1)
203 beqz t1, memset_partial /* no block to fill */
204 andi t0, a2, (FILL-4)
205
206 addu t1, a0 /* end address */
207 .set reorder
2081: addiu a0, FILL
209 F_FILL( a0, -FILL, a1 )
210 bne t1, a0, 1b
211 .set noreorder
212
213memset_partial:
214 la t1, 2f /* where to start */
215 subu t1, t0
216 jr t1
217 addu a0, t0 /* dest ptr */
218
219 F_FILL( a0, -FILL, a1 ) /* ... but first do words ... */
2202: andi a2, 3 /* 0 <= n <= 3 to go */
221
222 beqz a2, 1f
223 addu a0, a2 /* What's left */
224 SWHI a1, -1(a0)
2251: jr ra
226 move a2, zero
227
228small_memset:
229 beqz a2, 2f
230 addu t1, a0, a2
231
2321: addiu a0, 1 /* fill bytewise */
233 bne t1, a0, 1b
234 sb a1, -1(a0)
235
2362: jr ra /* done */
237 move a2, zero
238
239 .set reorder
diff --git a/firmware/target/mips/mmu-mips.c b/firmware/target/mips/mmu-mips.c
index 3c1b932325..570b209e3a 100644
--- a/firmware/target/mips/mmu-mips.c
+++ b/firmware/target/mips/mmu-mips.c
@@ -99,14 +99,15 @@ static void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
99 restore_irq(old_irq); 99 restore_irq(old_irq);
100} 100}
101 101
102void map_address(unsigned long virtual, unsigned long physical, unsigned long length) 102void map_address(unsigned long virtual, unsigned long physical,
103 unsigned long length, unsigned int cache_flags)
103{ 104{
104 unsigned long entry0 = (physical & PFN_MASK) << PFN_SHIFT; 105 unsigned long entry0 = (physical & PFN_MASK) << PFN_SHIFT;
105 unsigned long entry1 = ((physical+length) & PFN_MASK) << PFN_SHIFT; 106 unsigned long entry1 = ((physical+length) & PFN_MASK) << PFN_SHIFT;
106 unsigned long entryhi = virtual & ~VPN2_SHIFT; 107 unsigned long entryhi = virtual & ~VPN2_SHIFT;
107 108
108 entry0 |= (M_EntryLoG | M_EntryLoV | (K_CacheAttrC << S_EntryLoC) ); 109 entry0 |= (M_EntryLoG | M_EntryLoV | (cache_flags << S_EntryLoC) );
109 entry1 |= (M_EntryLoG | M_EntryLoV | (K_CacheAttrC << S_EntryLoC) ); 110 entry1 |= (M_EntryLoG | M_EntryLoV | (cache_flags << S_EntryLoC) );
110 111
111 add_wired_entry(entry0, entry1, entryhi, DEFAULT_PAGE_MASK); 112 add_wired_entry(entry0, entry1, entryhi, DEFAULT_PAGE_MASK);
112} 113}
@@ -119,7 +120,7 @@ void tlb_init(void)
119 120
120 local_flush_tlb_all(); 121 local_flush_tlb_all();
121/* 122/*
122 map_address(0x80000000, 0x80000000, 0x4000); 123 map_address(0x80000000, 0x80000000, 0x4000, K_CacheAttrC);
123 map_address(0x80004000, 0x80004000, MEM * 0x100000); 124 map_address(0x80004000, 0x80004000, MEM * 0x100000, K_CacheAttrC);
124*/ 125*/
125} 126}