summaryrefslogtreecommitdiff
path: root/firmware/asm
diff options
context:
space:
mode:
Diffstat (limited to 'firmware/asm')
-rw-r--r--firmware/asm/SOURCES8
-rw-r--r--firmware/asm/arm/memcpy.S176
-rw-r--r--firmware/asm/arm/memmove.S190
-rw-r--r--firmware/asm/arm/memset.S98
-rw-r--r--firmware/asm/arm/memset16.S82
-rw-r--r--firmware/asm/m68k/memcpy.S682
-rw-r--r--firmware/asm/m68k/memmove.S670
-rw-r--r--firmware/asm/m68k/memset.S152
-rw-r--r--firmware/asm/m68k/memset16.S146
-rw-r--r--firmware/asm/m68k/strlen.S71
-rw-r--r--firmware/asm/memcpy.c117
-rw-r--r--firmware/asm/memmove.c147
-rw-r--r--firmware/asm/memset.c110
-rw-r--r--firmware/asm/memset16.c78
-rw-r--r--firmware/asm/mips/memcpy.S143
-rw-r--r--firmware/asm/mips/memset.S239
-rw-r--r--firmware/asm/sh/memcpy.S219
-rw-r--r--firmware/asm/sh/memmove.S222
-rw-r--r--firmware/asm/sh/memset.S109
-rw-r--r--firmware/asm/sh/strlen.S96
-rw-r--r--firmware/asm/strlen.c93
21 files changed, 3847 insertions, 1 deletions
diff --git a/firmware/asm/SOURCES b/firmware/asm/SOURCES
index 8b13789179..805727ea93 100644
--- a/firmware/asm/SOURCES
+++ b/firmware/asm/SOURCES
@@ -1 +1,7 @@
1 1memset16.c
2#if (CONFIG_PLATFORM & PLATFORM_NATIVE) || defined(HAVE_ROCKBOX_C_LIBRARY)
3memcpy.c
4memmove.c
5memset.c
6strlen.c
7#endif
diff --git a/firmware/asm/arm/memcpy.S b/firmware/asm/arm/memcpy.S
new file mode 100644
index 0000000000..2a55fb5656
--- /dev/null
+++ b/firmware/asm/arm/memcpy.S
@@ -0,0 +1,176 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 Free Software Foundation, Inc.
11 * This file was originally part of the GNU C Library
12 * Contributed to glibc by MontaVista Software, Inc. (written by Nicolas Pitre)
13 * Adapted for Rockbox by Daniel Ankers
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
19 *
20 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
21 * KIND, either express or implied.
22 *
23 ****************************************************************************/
24
25#include "config.h"
26
27/*
28 * Endian independent macros for shifting bytes within registers.
29 */
30#ifndef __ARMEB__
31#define pull lsr
32#define push lsl
33#else
34#define pull lsl
35#define push lsr
36#endif
37
38/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
39
40 .section .icode,"ax",%progbits
41
42 .align 2
43 .global memcpy
44 .type memcpy,%function
45
46memcpy:
47 stmfd sp!, {r0, r4, lr}
48
49 subs r2, r2, #4
50 blt 8f
51 ands ip, r0, #3
52 bne 9f
53 ands ip, r1, #3
54 bne 10f
55
561: subs r2, r2, #(28)
57 stmfd sp!, {r5 - r8}
58 blt 5f
59
602:
613:
624: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
63 subs r2, r2, #32
64 stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
65 bge 3b
66
675: ands ip, r2, #28
68 rsb ip, ip, #32
69 addne pc, pc, ip @ C is always clear here
70 b 7f
716: nop
72 ldr r3, [r1], #4
73 ldr r4, [r1], #4
74 ldr r5, [r1], #4
75 ldr r6, [r1], #4
76 ldr r7, [r1], #4
77 ldr r8, [r1], #4
78 ldr lr, [r1], #4
79
80 add pc, pc, ip
81 nop
82 nop
83 str r3, [r0], #4
84 str r4, [r0], #4
85 str r5, [r0], #4
86 str r6, [r0], #4
87 str r7, [r0], #4
88 str r8, [r0], #4
89 str lr, [r0], #4
90
917: ldmfd sp!, {r5 - r8}
92
938: movs r2, r2, lsl #31
94 ldrneb r3, [r1], #1
95 ldrcsb r4, [r1], #1
96 ldrcsb ip, [r1]
97 strneb r3, [r0], #1
98 strcsb r4, [r0], #1
99 strcsb ip, [r0]
100
101 ldmpc regs="r0, r4"
102
1039: rsb ip, ip, #4
104 cmp ip, #2
105 ldrgtb r3, [r1], #1
106 ldrgeb r4, [r1], #1
107 ldrb lr, [r1], #1
108 strgtb r3, [r0], #1
109 strgeb r4, [r0], #1
110 subs r2, r2, ip
111 strb lr, [r0], #1
112 blt 8b
113 ands ip, r1, #3
114 beq 1b
115
11610: bic r1, r1, #3
117 cmp ip, #2
118 ldr lr, [r1], #4
119 beq 17f
120 bgt 18f
121
122
123 .macro forward_copy_shift pull push
124
125 subs r2, r2, #28
126 blt 14f
127
12811: stmfd sp!, {r5 - r9}
129
13012:
13113: ldmia r1!, {r4, r5, r6, r7}
132 mov r3, lr, pull #\pull
133 subs r2, r2, #32
134 ldmia r1!, {r8, r9, ip, lr}
135 orr r3, r3, r4, push #\push
136 mov r4, r4, pull #\pull
137 orr r4, r4, r5, push #\push
138 mov r5, r5, pull #\pull
139 orr r5, r5, r6, push #\push
140 mov r6, r6, pull #\pull
141 orr r6, r6, r7, push #\push
142 mov r7, r7, pull #\pull
143 orr r7, r7, r8, push #\push
144 mov r8, r8, pull #\pull
145 orr r8, r8, r9, push #\push
146 mov r9, r9, pull #\pull
147 orr r9, r9, ip, push #\push
148 mov ip, ip, pull #\pull
149 orr ip, ip, lr, push #\push
150 stmia r0!, {r3, r4, r5, r6, r7, r8, r9, ip}
151 bge 12b
152
153 ldmfd sp!, {r5 - r9}
154
15514: ands ip, r2, #28
156 beq 16f
157
15815: mov r3, lr, pull #\pull
159 ldr lr, [r1], #4
160 subs ip, ip, #4
161 orr r3, r3, lr, push #\push
162 str r3, [r0], #4
163 bgt 15b
164
16516: sub r1, r1, #(\push / 8)
166 b 8b
167
168 .endm
169
170
171 forward_copy_shift pull=8 push=24
172
17317: forward_copy_shift pull=16 push=16
174
17518: forward_copy_shift pull=24 push=8
176
diff --git a/firmware/asm/arm/memmove.S b/firmware/asm/arm/memmove.S
new file mode 100644
index 0000000000..d8cab048be
--- /dev/null
+++ b/firmware/asm/arm/memmove.S
@@ -0,0 +1,190 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 Free Software Foundation, Inc.
11 * This file was originally part of the GNU C Library
12 * Contributed to glibc by MontaVista Software, Inc. (written by Nicolas Pitre)
13 * Adapted for Rockbox by Daniel Ankers
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
19 *
20 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
21 * KIND, either express or implied.
22 *
23 ****************************************************************************/
24
25#include "config.h"
26
27/*
28 * Endian independent macros for shifting bytes within registers.
29 */
30#ifndef __ARMEB__
31#define pull lsr
32#define push lsl
33#else
34#define pull lsl
35#define push lsr
36#endif
37
38 .text
39
40/*
41 * Prototype: void *memmove(void *dest, const void *src, size_t n);
42 *
43 * Note:
44 *
45 * If the memory regions don't overlap, we simply branch to memcpy which is
46 * normally a bit faster. Otherwise the copy is done going downwards.
47 */
48
49 .section .icode,"ax",%progbits
50
51 .align 2
52 .global memmove
53 .type memmove,%function
54
55memmove:
56
57 subs ip, r0, r1
58 cmphi r2, ip
59 bls memcpy
60
61 stmfd sp!, {r0, r4, lr}
62 add r1, r1, r2
63 add r0, r0, r2
64 subs r2, r2, #4
65 blt 8f
66 ands ip, r0, #3
67 bne 9f
68 ands ip, r1, #3
69 bne 10f
70
711: subs r2, r2, #(28)
72 stmfd sp!, {r5 - r8}
73 blt 5f
74
752:
763:
774: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
78 subs r2, r2, #32
79 stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
80 bge 3b
81
825: ands ip, r2, #28
83 rsb ip, ip, #32
84 addne pc, pc, ip @ C is always clear here
85 b 7f
866: nop
87 ldr r3, [r1, #-4]!
88 ldr r4, [r1, #-4]!
89 ldr r5, [r1, #-4]!
90 ldr r6, [r1, #-4]!
91 ldr r7, [r1, #-4]!
92 ldr r8, [r1, #-4]!
93 ldr lr, [r1, #-4]!
94
95 add pc, pc, ip
96 nop
97 nop
98 str r3, [r0, #-4]!
99 str r4, [r0, #-4]!
100 str r5, [r0, #-4]!
101 str r6, [r0, #-4]!
102 str r7, [r0, #-4]!
103 str r8, [r0, #-4]!
104 str lr, [r0, #-4]!
105
1067: ldmfd sp!, {r5 - r8}
107
1088: movs r2, r2, lsl #31
109 ldrneb r3, [r1, #-1]!
110 ldrcsb r4, [r1, #-1]!
111 ldrcsb ip, [r1, #-1]
112 strneb r3, [r0, #-1]!
113 strcsb r4, [r0, #-1]!
114 strcsb ip, [r0, #-1]
115 ldmpc regs="r0, r4"
116
1179: cmp ip, #2
118 ldrgtb r3, [r1, #-1]!
119 ldrgeb r4, [r1, #-1]!
120 ldrb lr, [r1, #-1]!
121 strgtb r3, [r0, #-1]!
122 strgeb r4, [r0, #-1]!
123 subs r2, r2, ip
124 strb lr, [r0, #-1]!
125 blt 8b
126 ands ip, r1, #3
127 beq 1b
128
12910: bic r1, r1, #3
130 cmp ip, #2
131 ldr r3, [r1, #0]
132 beq 17f
133 blt 18f
134
135
136 .macro backward_copy_shift push pull
137
138 subs r2, r2, #28
139 blt 14f
140
14111: stmfd sp!, {r5 - r9}
142
14312:
14413: ldmdb r1!, {r7, r8, r9, ip}
145 mov lr, r3, push #\push
146 subs r2, r2, #32
147 ldmdb r1!, {r3, r4, r5, r6}
148 orr lr, lr, ip, pull #\pull
149 mov ip, ip, push #\push
150 orr ip, ip, r9, pull #\pull
151 mov r9, r9, push #\push
152 orr r9, r9, r8, pull #\pull
153 mov r8, r8, push #\push
154 orr r8, r8, r7, pull #\pull
155 mov r7, r7, push #\push
156 orr r7, r7, r6, pull #\pull
157 mov r6, r6, push #\push
158 orr r6, r6, r5, pull #\pull
159 mov r5, r5, push #\push
160 orr r5, r5, r4, pull #\pull
161 mov r4, r4, push #\push
162 orr r4, r4, r3, pull #\pull
163 stmdb r0!, {r4 - r9, ip, lr}
164 bge 12b
165
166 ldmfd sp!, {r5 - r9}
167
16814: ands ip, r2, #28
169 beq 16f
170
17115: mov lr, r3, push #\push
172 ldr r3, [r1, #-4]!
173 subs ip, ip, #4
174 orr lr, lr, r3, pull #\pull
175 str lr, [r0, #-4]!
176 bgt 15b
177
17816: add r1, r1, #(\pull / 8)
179 b 8b
180
181 .endm
182
183
184 backward_copy_shift push=8 pull=24
185
18617: backward_copy_shift push=16 pull=16
187
18818: backward_copy_shift push=24 pull=8
189
190
diff --git a/firmware/asm/arm/memset.S b/firmware/asm/arm/memset.S
new file mode 100644
index 0000000000..682da874ce
--- /dev/null
+++ b/firmware/asm/arm/memset.S
@@ -0,0 +1,98 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 by Thom Johansen
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#include "config.h"
22
23 .section .icode,"ax",%progbits
24
25 .align 2
26
27/* The following code is based on code found in Linux kernel version 2.6.15.3
28 * linux/arch/arm/lib/memset.S
29 *
30 * Copyright (C) 1995-2000 Russell King
31 */
32
33/* This code will align a pointer for memset, if needed */
341: cmp r2, #4 @ 1 do we have enough
35 blt 5f @ 1 bytes to align with?
36 cmp r3, #2 @ 1
37 strgtb r1, [r0, #-1]! @ 1
38 strgeb r1, [r0, #-1]! @ 1
39 strb r1, [r0, #-1]! @ 1
40 sub r2, r2, r3 @ 1 r2 = r2 - r3
41 b 2f
42
43 .global memset
44 .type memset,%function
45memset:
46 add r0, r0, r2 @ we'll write backwards in memory
47 ands r3, r0, #3 @ 1 unaligned?
48 bne 1b @ 1
492:
50/*
51 * we know that the pointer in r0 is aligned to a word boundary.
52 */
53 orr r1, r1, r1, lsl #8
54 orr r1, r1, r1, lsl #16
55 mov r3, r1
56 cmp r2, #16
57 blt 5f
58/*
59 * We need an extra register for this loop - save the return address and
60 * use the LR
61 */
62 str lr, [sp, #-4]!
63 mov ip, r1
64 mov lr, r1
65
663: subs r2, r2, #64
67 stmgedb r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
68 stmgedb r0!, {r1, r3, ip, lr}
69 stmgedb r0!, {r1, r3, ip, lr}
70 stmgedb r0!, {r1, r3, ip, lr}
71 bgt 3b
72 ldrpc cond=eq @ Now <64 bytes to go.
73/*
74 * No need to correct the count; we're only testing bits from now on
75 */
76 tst r2, #32
77 stmnedb r0!, {r1, r3, ip, lr}
78 stmnedb r0!, {r1, r3, ip, lr}
79 tst r2, #16
80 stmnedb r0!, {r1, r3, ip, lr}
81 ldr lr, [sp], #4
82
835: tst r2, #8
84 stmnedb r0!, {r1, r3}
85 tst r2, #4
86 strne r1, [r0, #-4]!
87/*
88 * When we get here, we've got less than 4 bytes to zero. We
89 * may have an unaligned pointer as well.
90 */
916: tst r2, #2
92 strneb r1, [r0, #-1]!
93 strneb r1, [r0, #-1]!
94 tst r2, #1
95 strneb r1, [r0, #-1]!
96 bx lr
97.end:
98 .size memset,.end-memset
diff --git a/firmware/asm/arm/memset16.S b/firmware/asm/arm/memset16.S
new file mode 100644
index 0000000000..5c787b1bed
--- /dev/null
+++ b/firmware/asm/arm/memset16.S
@@ -0,0 +1,82 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 by Thom Johansen
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#include "config.h"
22
23 .section .icode,"ax",%progbits
24
25 .align 2
26
27/* The following code is based on code from the Linux kernel version 2.6.15.3,
28 * linux/arch/arm/lib/memset.S
29 *
30 * Copyright (C) 1995-2000 Russell King
31 */
32
33 .global memset16
34 .type memset16,%function
35memset16:
36 tst r0, #2 @ unaligned?
37 cmpne r2, #0
38 strneh r1, [r0], #2 @ store one halfword to align
39 subne r2, r2, #1
40
41/*
42 * we know that the pointer in r0 is aligned to a word boundary.
43 */
44 orr r1, r1, r1, lsl #16
45 mov r3, r1
46 cmp r2, #8
47 blt 4f
48/*
49 * We need an extra register for this loop - save the return address and
50 * use the LR
51 */
52 str lr, [sp, #-4]!
53 mov ip, r1
54 mov lr, r1
55
562: subs r2, r2, #32
57 stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
58 stmgeia r0!, {r1, r3, ip, lr}
59 stmgeia r0!, {r1, r3, ip, lr}
60 stmgeia r0!, {r1, r3, ip, lr}
61 bgt 2b
62 ldrpc cond=eq @ Now <64 bytes to go.
63/*
64 * No need to correct the count; we're only testing bits from now on
65 */
66 tst r2, #16
67 stmneia r0!, {r1, r3, ip, lr}
68 stmneia r0!, {r1, r3, ip, lr}
69 tst r2, #8
70 stmneia r0!, {r1, r3, ip, lr}
71 ldr lr, [sp], #4
72
734: tst r2, #4
74 stmneia r0!, {r1, r3}
75 tst r2, #2
76 strne r1, [r0], #4
77
78 tst r2, #1
79 strneh r1, [r0], #2
80 bx lr
81.end:
82 .size memset16,.end-memset16
diff --git a/firmware/asm/m68k/memcpy.S b/firmware/asm/m68k/memcpy.S
new file mode 100644
index 0000000000..9762e31e02
--- /dev/null
+++ b/firmware/asm/m68k/memcpy.S
@@ -0,0 +1,682 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2004-2005 by Jens Arnold
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#include "config.h"
22
23 .section .icode,"ax",@progbits
24
25#define FULLSPEED /* use burst writing for word aligned destinations */
26 .align 2
27 .global memcpy
28 .global __memcpy_fwd_entry
29 .type memcpy,@function
30
31/* Copies <length> bytes of data in memory from <source> to <dest>
32 * This version is optimized for speed
33 *
34 * arguments:
35 * (4,%sp) - destination address
36 * (8,%sp) - source address
37 * (12,%sp) - length
38 *
39 * return value:
40 * %d0 - destination address (like ANSI version)
41 *
42 * register usage:
43 * %a0 - current source address
44 * %a1 - current dest address
45 * %a2 - source end address (in line-copy loops)
46 * %d0 - data / scratch
47 * %d1 - source end address (byte and longword copy) / data / scratch
48 * %d2 - data / scratch
49 * %d3..%d7 - data
50 *
51 * For maximum speed this routine reads and writes whole lines using burst
52 * move (movem.l) where possible. For byte aligned destinations (long+1 and
53 * long+3) it writes longwords only. Same goes for word aligned destinations
54 * if FULLSPEED is undefined.
55 */
56memcpy:
57 move.l (4,%sp),%a1 /* Destination */
58 move.l (8,%sp),%a0 /* Source */
59 move.l (12,%sp),%d1 /* Length */
60
61__memcpy_fwd_entry:
62 add.l %a0,%d1 /* %d1 = source end */
63
64 move.l %a0,%d0
65 addq.l #7,%d0
66 and.l #0xFFFFFFFC,%d0 /* %d0 = first source long bound + 4 */
67 cmp.l %d0,%d1 /* at least one aligned longword to copy? */
68 blo.w .bytes2_start /* no, jump directly to trailing byte loop */
69
70 subq.l #4,%d0 /* %d0 = first source long bound */
71 cmp.l %a0,%d0 /* any bytes to copy? */
72 jls .bytes1_end /* no: skip byte loop */
73
74 /* leading byte loop: copies 0..3 bytes */
75.bytes1_loop:
76 move.b (%a0)+,(%a1)+ /* copy byte */
77 cmp.l %a0,%d0 /* runs %a0 up to first long bound */
78 jhi .bytes1_loop
79
80.bytes1_end:
81 moveq.l #31,%d0
82 add.l %a0,%d0
83 and.l #0xFFFFFFF0,%d0 /* %d0 = first source line bound + 16 */
84 cmp.l %d0,%d1 /* at least one aligned line to copy? */
85 blo.w .long_start /* no: jump to longword copy loop */
86
87 lea.l (-28,%sp),%sp /* free up some registers */
88 movem.l %d2-%d7/%a2,(%sp)
89
90 moveq.l #16,%d2
91 sub.l %d2,%d0 /* %d0 = first source line bound */
92 move.l %d1,%a2 /* %a2 = end address */
93 lea.l (-15,%a2),%a2 /* adjust end address for loops doing 16 bytes/ pass */
94 move.l %a1,%d1
95 moveq.l #3,%d2 /* mask */
96 and.l %d2,%d1
97 jmp.l (2,%pc,%d1.l*4) /* switch (dest_addr & 3) */
98 bra.w .lines_do0_start
99 bra.w .lines_do1_start
100 bra.w .lines_do2_start
101 /* bra.w .lines_do3_start implicit */
102
103 /* byte aligned destination (long + 3): use line burst reads in main loop */
104.lines_do3_start:
105 moveq.l #24,%d1 /* shift count for shifting by 3 bytes */
106 cmp.l %a0,%d0 /* any leading longwords? */
107 jhi .lines_do3_head_start /* yes: leading longword copy */
108
109 movem.l (%a0),%d4-%d7 /* load first line */
110 lea.l (16,%a0),%a0
111 move.l %d4,%d2
112 lsr.l %d1,%d2 /* get high byte of first longword */
113 move.b %d2,(%a1)+ /* store byte */
114 jra .lines_do3_entry /* jump into main loop */
115
116.lines_do3_head_start:
117 move.l (%a0)+,%d7 /* load first longword */
118 move.l %d7,%d2
119 lsr.l %d1,%d2 /* get high byte */
120 move.b %d2,(%a1)+ /* store byte */
121 jra .lines_do3_head_entry /* jump into leading longword loop */
122
123.lines_do3_head_loop:
124 move.l %d7,%d6 /* move old longword away */
125 move.l (%a0)+,%d7 /* load new longword */
126 move.l %d7,%d2
127 lsr.l %d1,%d2 /* get high byte */
128 or.l %d2,%d6 /* combine with old lower 3 bytes */
129 move.l %d6,(%a1)+ /* store longword */
130.lines_do3_head_entry:
131 lsl.l #8,%d7 /* shift up lower 3 bytes */
132 cmp.l %a0,%d0 /* runs %a0 up to first line bound */
133 jhi .lines_do3_head_loop
134
135.lines_do3_loop:
136 move.l %d7,%d3 /* move last longword of old line away */
137 movem.l (%a0),%d4-%d7 /* load new line */
138 lea.l (16,%a0),%a0
139 move.l %d4,%d2
140 lsr.l %d1,%d2 /* get high byte of 1st longword */
141 or.l %d2,%d3 /* combine with old lower 3 bytes */
142 move.l %d3,(%a1)+ /* store longword */
143.lines_do3_entry:
144 lsl.l #8,%d4 /* shift up lower 3 bytes */
145 move.l %d5,%d2
146 lsr.l %d1,%d2 /* get high byte of 2nd longword */
147 or.l %d2,%d4 /* combine with 1st lower 3 bytes */
148 move.l %d4,(%a1)+ /* store longword */
149 lsl.l #8,%d5 /* shift up lower 3 bytes */
150 move.l %d6,%d2
151 lsr.l %d1,%d2 /* get high byte of 3rd longword */
152 or.l %d2,%d5 /* combine with 2nd lower 3 bytes */
153 move.l %d5,(%a1)+ /* store longword */
154 lsl.l #8,%d6 /* shift up lower 3 bytes */
155 move.l %d7,%d2
156 lsr.l %d1,%d2 /* get high byte of 4th longword */
157 or.l %d2,%d6 /* combine with 3rd lower 3 bytes */
158 move.l %d6,(%a1)+ /* store longword */
159 lsl.l #8,%d7 /* shift up lower 3 bytes */
160 cmp.l %a0,%a2 /* runs %a0 up to last line bound */
161 jhi .lines_do3_loop
162
163 lea.l (12,%a2),%a2 /* readjust end address for doing longwords */
164 cmp.l %a0,%a2 /* any trailing longwords? */
165 jls .lines_do3_tail_end /* no: just store last lower 3 bytes */
166
167.lines_do3_tail_loop:
168 move.l %d7,%d6 /* move old longword away */
169 move.l (%a0)+,%d7 /* load new longword */
170 move.l %d7,%d2
171 lsr.l %d1,%d2 /* get high byte */
172 or.l %d2,%d6 /* combine with old lower 3 bytes */
173 move.l %d6,(%a1)+ /* store longword */
174 lsl.l #8,%d7 /* shift up lower 3 bytes */
175 cmp.l %a0,%a2 /* runs %a0 up to last long bound */
176 jhi .lines_do3_tail_loop
177
178.lines_do3_tail_end:
179 swap %d7 /* get high word */
180 move.w %d7,(%a1)+ /* store word */
181 lsr.l %d1,%d7 /* get moved-up low byte */
182 move.b %d7,(%a1)+ /* store byte */
183 jra .lines_end
184
185 /* byte aligned destination (long + 1): use line burst reads in main loop */
186.lines_do1_start:
187 moveq.l #24,%d1 /* shift count for shifting by 3 bytes */
188 cmp.l %a0,%d0 /* any leading longwords? */
189 jhi .lines_do1_head_start /* yes: leading longword copy */
190
191 movem.l (%a0),%d4-%d7 /* load first line */
192 lea.l (16,%a0),%a0
193 move.l %d4,%d2 /* first longword, bytes 3210 */
194 lsr.l #8,%d2 /* first longword, bytes .321 */
195 swap %d2 /* first longword, bytes 21.3 */
196 move.b %d2,(%a1)+ /* store byte */
197 swap %d2 /* first longword, bytes .321 */
198 move.w %d2,(%a1)+ /* store word */
199 jra .lines_do1_entry
200
201.lines_do1_head_start:
202 move.l (%a0)+,%d7 /* load first longword */
203 move.l %d7,%d2 /* first longword, bytes 3210 */
204 lsr.l #8,%d2 /* first longword, bytes .321 */
205 swap %d2 /* first longword, bytes 21.3 */
206 move.b %d2,(%a1)+ /* store byte */
207 swap %d2 /* first longword, bytes .321 */
208 move.w %d2,(%a1)+ /* store word */
209 jra .lines_do1_head_entry
210
211.lines_do1_head_loop:
212 move.l %d7,%d6 /* move old longword away */
213 move.l (%a0)+,%d7 /* load new longword */
214 move.l %d7,%d2
215 lsr.l #8,%d2 /* get upper 3 bytes */
216 or.l %d2,%d6 /* combine with old low byte */
217 move.l %d6,(%a1)+ /* store longword */
218.lines_do1_head_entry:
219 lsl.l %d1,%d7 /* shift up low byte */
220 cmp.l %a0,%d0 /* runs %a0 up to first line bound */
221 jhi .lines_do1_head_loop
222
223.lines_do1_loop:
224 move.l %d7,%d3 /* move last longword of old line away */
225 movem.l (%a0),%d4-%d7 /* load new line */
226 lea.l (16,%a0),%a0
227 move.l %d4,%d2
228 lsr.l #8,%d2 /* get upper 3 bytes of 1st longword */
229 or.l %d2,%d3 /* combine with low byte of old longword */
230 move.l %d3,(%a1)+ /* store longword */
231.lines_do1_entry:
232 lsl.l %d1,%d4 /* shift up low byte */
233 move.l %d5,%d2
234 lsr.l #8,%d2 /* get upper 3 bytes of 2nd longword */
235 or.l %d2,%d4 /* combine with low byte of 1st longword */
236 move.l %d4,(%a1)+ /* store longword */
237 lsl.l %d1,%d5 /* shift up low byte */
238 move.l %d6,%d2
239 lsr.l #8,%d2 /* get upper 3 bytes of 3rd longword */
240 or.l %d2,%d5 /* combine with low byte of 2nd longword */
241 move.l %d5,(%a1)+ /* store longword */
242 lsl.l %d1,%d6 /* shift up low byte */
243 move.l %d7,%d2
244 lsr.l #8,%d2 /* get upper 3 bytes of 4th longword */
245 or.l %d2,%d6 /* combine with low byte of 4th longword */
246 move.l %d6,(%a1)+ /* store longword */
247 lsl.l %d1,%d7 /* shift up low byte */
248 cmp.l %a0,%a2 /* runs %a0 up to last line bound */
249 jhi .lines_do1_loop
250
251 lea.l (12,%a2),%a2 /* readjust end address for doing longwords */
252 cmp.l %a0,%a2 /* any trailing longwords? */
253 jls .lines_do1_tail_end /* no: just store last low byte */
254
255.lines_do1_tail_loop:
256 move.l %d7,%d6 /* move old longword away */
257 move.l (%a0)+,%d7 /* load new longword */
258 move.l %d7,%d2
259 lsr.l #8,%d2 /* get upper 3 bytes */
260 or.l %d2,%d6 /* combine with old low byte */
261 move.l %d6,(%a1)+ /* store longword */
262 lsl.l %d1,%d7 /* shift up low byte */
263 cmp.l %a0,%a2 /* runs %a0 up to last long bound */
264 jhi .lines_do1_tail_loop
265
266.lines_do1_tail_end:
267 lsr.l %d1,%d7 /* get shifted-up low byte */
268 move.b %d7,(%a1)+ /* store byte */
269 jra .lines_end
270
271 /* long aligned destination (line + 0/4/8/12): head */
272.lines_do0_head_loop:
273 move.l (%a0)+,(%a1)+ /* copy longword */
274.lines_do0_start:
275 cmp.l %a0,%d0 /* runs %a0 up to first line bound */
276 jhi .lines_do0_head_loop
277
278.lines_do0_head_end:
279 move.l %a1,%d1
280 lsr.l #2,%d1
281 moveq.l #3,%d0 /* mask */
282 and.l %d0,%d1
283 moveq.l #16,%d0 /* address increment for one main loop pass */
284 jmp.l (2,%pc,%d1.l*2) /* switch ((dest_addr >> 2) & 3) */
285 bra.b .lines_lo0_start
286 bra.b .lines_lo4_start
287 bra.b .lines_lo8_start
288 /* bra.b .lines_lo12_start implicit */
289
290 /* long aligned destination (line + 12): use line bursts in the loop */
291.lines_lo12_start:
292 movem.l (%a0),%d4-%d7 /* load first line */
293 add.l %d0,%a0
294 move.l %d4,(%a1)+ /* store 1st longword */
295 cmp.l %a0,%a2 /* any full lines? */
296 jls .lines_lo12_end /* no: skip main loop */
297
298.lines_lo12_loop:
299 move.l %d5,%d1 /* move last 3 longwords of old line away */
300 move.l %d6,%d2
301 move.l %d7,%d3
302 movem.l (%a0),%d4-%d7 /* load new line */
303 add.l %d0,%a0
304 movem.l %d1-%d4,(%a1) /* store line (3 old + 1 new longwords) */
305 add.l %d0,%a1
306 cmp.l %a0,%a2 /* runs %a0 up to last line bound */
307 jhi .lines_lo12_loop
308
309 /* long aligned destination (line + 0/4/8/12): tail */
310.lines_lo12_end:
311 move.l %d5,(%a1)+ /* store 3rd last longword */
312.lines_lo8_end:
313 move.l %d6,(%a1)+ /* store 2nd last longword */
314.lines_lo4_end:
315 move.l %d7,(%a1)+ /* store last longword */
316.lines_lo0_end:
317 lea.l (12,%a2),%a2 /* readjust end address for doing longwords */
318 cmp.l %a0,%a2 /* any trailing longwords? */
319 jls .lines_end /* no: get outta here */
320
321.lines_do0_tail_loop:
322 move.l (%a0)+,(%a1)+ /* copy longword */
323 cmp.l %a0,%a2 /* runs %a0 up to last long bound */
324 jhi .lines_do0_tail_loop
325
326 jra .lines_end
327
328 /* line aligned destination: use line bursts in the loop */
329.lines_lo0_start:
330.lines_lo0_loop:
331 movem.l (%a0),%d4-%d7 /* load line */
332 add.l %d0,%a0
333 movem.l %d4-%d7,(%a1) /* store line */
334 add.l %d0,%a1
335 cmp.l %a0,%a2 /* runs %a0 up to last line bound */
336 jhi .lines_lo0_loop
337
338 jra .lines_lo0_end /* handle trailing longwords */
339
340 /* long aligned destination (line + 4): use line bursts in the loop */
341.lines_lo4_start:
342 movem.l (%a0),%d4-%d7 /* load first line */
343 add.l %d0,%a0
344 move.l %d4,(%a1)+ /* store 1st longword */
345 move.l %d5,(%a1)+ /* store 2nd longword */
346 move.l %d6,(%a1)+ /* store 3rd longword */
347 cmp.l %a0,%a2 /* any full lines? */
348 jls .lines_lo4_end /* no: skip main loop */
349
350.lines_lo4_loop:
351 move.l %d7,%d3 /* move last longword of old line away */
352 movem.l (%a0),%d4-%d7 /* load new line */
353 add.l %d0,%a0
354 movem.l %d3-%d6,(%a1) /* store line (1 old + 3 new longwords) */
355 add.l %d0,%a1
356 cmp.l %a0,%a2 /* runs %a0 up to last line bound */
357 jhi .lines_lo4_loop
358
359 jra .lines_lo4_end /* handle trailing longwords */
360
361 /* long aligned destination (line + 8): use line bursts in the loop */
362.lines_lo8_start:
363 movem.l (%a0),%d4-%d7 /* load first line */
364 add.l %d0,%a0
365 move.l %d4,(%a1)+ /* store 1st longword */
366 move.l %d5,(%a1)+ /* store 2nd longword */
367 cmp.l %a0,%a2
368 jls .lines_lo8_end
369
370.lines_lo8_loop:
371 move.l %d6,%d2 /* move last 2 longwords of old line away */
372 move.l %d7,%d3
373 movem.l (%a0),%d4-%d7 /* load new line */
374 add.l %d0,%a0
375 movem.l %d2-%d5,(%a1) /* store line (2 old + 2 new longwords) */
376 add.l %d0,%a1
377 cmp.l %a0,%a2 /* runs %a0 up to last line bound */
378 jhi .lines_lo8_loop
379
380 jra .lines_lo8_end /* handle trailing longwords */
381
382#ifdef FULLSPEED
383
384 /* word aligned destination (line + 2/6/10/14): head */
385.lines_do2_start:
386 cmp.l %a0,%d0 /* any leading longwords? */
387 jls .lines_do2_selector /* no: jump to mainloop selector */
388
389 move.l (%a0)+,%d7 /* load first longword */
390 swap %d7 /* swap words */
391 move.w %d7,(%a1)+ /* store high word */
392 cmp.l %a0,%d0 /* any more longword? */
393 jls .lines_do2_head_end /* no: skip head loop */
394
395.lines_do2_head_loop:
396 move.l %d7,%d6 /* move old longword away */
397 move.l (%a0)+,%d7 /* load new longword */
398 swap %d7 /* swap words */
399 move.w %d7,%d6 /* combine high word with old low word */
400 move.l %d6,(%a1)+ /* store longword */
401 cmp.l %a0,%d0 /* runs %a0 up to first line bound */
402 jhi .lines_do2_head_loop
403
404.lines_do2_head_end:
405 swap %d7 /* undo swap */
406 move.w %d7,(%a1)+ /* store word */
407
408.lines_do2_selector:
409 move.l %a1,%d1
410 lsr.l #2,%d1
411 moveq.l #3,%d0 /* mask */
412 and.l %d0,%d1
413 moveq.l #16,%d0 /* address increment for one main loop pass */
414 jmp.l (2,%pc,%d1.l*4) /* switch ((dest_addr >> 2) & 3) */
415 bra.w .lines_lo2_start
416 bra.w .lines_lo6_start
417 bra.w .lines_lo10_start
418 /* bra.w .lines_lo14_start implicit */
419
420 /* word aligned destination (line + 14): use line bursts in the loop */
421.lines_lo14_start:
422 movem.l (%a0),%d4-%d7 /* load first line */
423 add.l %d0,%a0
424 swap %d4 /* swap words of 1st long */
425 move.w %d4,(%a1)+ /* store word */
426 jra .lines_lo14_entry /* jump into main loop */
427
428.lines_lo14_loop:
429 move.l %d4,%d0 /* move old line away */
430 move.l %d5,%d1
431 move.l %d6,%d2
432 move.l %d7,%d3
433 movem.l (%a0),%d4-%d7 /* load new line */
434 lea.l (16,%a0),%a0
435 swap %d4 /* swap words of 1st long */
436 move.w %d4,%d3 /* combine 1st high word with old low word */
437 movem.l %d0-%d3,(%a1) /* store line */
438 lea.l (16,%a1),%a1
439.lines_lo14_entry:
440 swap %d5 /* swap words of 2nd long */
441 move.w %d5,%d4 /* combine 2nd high word with 1st low word */
442 swap %d6 /* swap words of 3rd long */
443 move.w %d6,%d5 /* combine 3rd high word with 2nd low word */
444 swap %d7 /* swap words of 4th long */
445 move.w %d7,%d6 /* combine 4th high word with 3rd low word */
446 cmp.l %a0,%a2 /* runs %a0 up to last line bound */
447 jhi .lines_lo14_loop
448
449 /* word aligned destination (line + 2/6/10/14): tail */
450.lines_lo14_end:
451 move.l %d4,(%a1)+ /* store third last longword */
452.lines_lo10_end:
453 move.l %d5,(%a1)+ /* store second last longword */
454.lines_lo6_end:
455 move.l %d6,(%a1)+ /* store last longword */
456.lines_lo2_end:
457 lea.l (12,%a2),%a2 /* readjust end address for doing longwords */
458 cmp.l %a0,%a2 /* any trailing longwords? */
459 jls .lines_do2_tail_end /* no: skip tail loop */
460
461.lines_do2_tail_loop:
462 move.l %d7,%d6 /* move old longword away */
463 move.l (%a0)+,%d7 /* load new longword */
464 swap %d7 /* swap words */
465 move.w %d7,%d6 /* combine high word with old low word */
466 move.l %d6,(%a1)+ /* store longword */
467 cmp.l %a0,%a2 /* runs %a0 up to last long bound */
468 jhi .lines_do2_tail_loop
469
470.lines_do2_tail_end:
471 swap %d7 /* undo swap */
472 move.w %d7,(%a1)+ /* store last word */
473 jra .lines_end
474
475 /* word aligned destination (line + 2): use line bursts in the loop */
476.lines_lo2_start:
477 movem.l (%a0),%d4-%d7 /* load first line */
478 add.l %d0,%a0
479 swap %d4 /* swap words of 1st long */
480 move.w %d4,(%a1)+ /* store high word */
481 swap %d5 /* swap words of 2nd long */
482 move.w %d5,%d4 /* combine 2nd high word with 1st low word */
483 swap %d6 /* swap words of 3rd long */
484 move.w %d6,%d5 /* combine 3nd high word with 2nd low word */
485 swap %d7 /* swap words of 4th long */
486 move.w %d7,%d6 /* combine 4th high word with 3rd low word */
487 move.l %d4,(%a1)+ /* store 1st longword */
488 move.l %d5,(%a1)+ /* store 2nd longword */
489 move.l %d6,(%a1)+ /* store 3rd longword */
490 cmp.l %a0,%a2 /* any full lines? */
491 jls .lines_lo2_end /* no: skip main loop */
492
493.lines_lo2_loop:
494 move.l %d7,%d3 /* move last longword of old line away */
495 movem.l (%a0),%d4-%d7 /* load line */
496 add.l %d0,%a0
497 swap %d4 /* swap words of 1st long */
498 move.w %d4,%d3 /* combine 1st high word with old low word */
499 swap %d5 /* swap words of 2nd long */
500 move.w %d5,%d4 /* combine 2nd high word with 1st low word */
501 swap %d6 /* swap words of 3rd long */
502 move.w %d6,%d5 /* combine 3rd high word with 2nd low word */
503 swap %d7 /* swap words of 4th long */
504 move.w %d7,%d6 /* combine 4th high word with 3rd low word */
505 movem.l %d3-%d6,(%a1) /* store line */
506 add.l %d0,%a1
507 cmp.l %a0,%a2 /* runs %a0 up to last line bound */
508 jhi .lines_lo2_loop
509
510 jra .lines_lo2_end /* handle trailing longwords */
511
512 /* word aligned destination (line + 6): use line bursts in the loop */
513.lines_lo6_start:
514 movem.l (%a0),%d4-%d7 /* load first line */
515 add.l %d0,%a0
516 swap %d4 /* swap words of 1st long */
517 move.w %d4,(%a1)+ /* store high word */
518 swap %d5 /* swap words of 2nd long */
519 move.w %d5,%d4 /* combine 2nd high word with 1st low word */
520 swap %d6 /* swap words of 3rd long */
521 move.w %d6,%d5 /* combine 3rd high word with 2nd low word */
522 move.l %d4,(%a1)+ /* store 1st longword */
523 move.l %d5,(%a1)+ /* store 2nd longword */
524 jra .lines_lo6_entry /* jump into main loop */
525
526.lines_lo6_loop:
527 move.l %d6,%d2 /* move last 2 longwords of old line away */
528 move.l %d7,%d3
529 movem.l (%a0),%d4-%d7 /* load line */
530 add.l %d0,%a0
531 swap %d4 /* swap words of 1st long */
532 move.w %d4,%d3 /* combine 1st high word with old low word */
533 swap %d5 /* swap words of 2nd long */
534 move.w %d5,%d4 /* combine 2nd high word with 1st low word */
535 swap %d6 /* swap words of 3rd long */
536 move.w %d6,%d5 /* combine 3rd high word with 2nd low word */
537 movem.l %d2-%d5,(%a1) /* store line */
538 add.l %d0,%a1
539.lines_lo6_entry:
540 swap %d7 /* swap words of 4th long */
541 move.w %d7,%d6 /* combine 4th high word with 3rd low word */
542 cmp.l %a0,%a2 /* runs %a0 up to last line bound */
543 jhi .lines_lo6_loop
544
545 jra .lines_lo6_end /* handle trailing longwords */
546
547 /* word aligned destination (line + 10): use line bursts in the loop */
548.lines_lo10_start:
549 movem.l (%a0),%d4-%d7 /* load first line */
550 add.l %d0,%a0
551 swap %d4 /* swap words of 1st long */
552 move.w %d4,(%a1)+ /* store high word */
553 swap %d5 /* swap words of 2nd long */
554 move.w %d5,%d4 /* combine 2nd high word with 1st low word */
555 move.l %d4,(%a1)+ /* store 1st longword */
556 jra .lines_lo10_entry /* jump into main loop */
557
558.lines_lo10_loop:
559 move.l %d5,%d1 /* move last 3 longwords of old line away */
560 move.l %d6,%d2
561 move.l %d7,%d3
562 movem.l (%a0),%d4-%d7 /* load line */
563 add.l %d0,%a0
564 swap %d4 /* swap words of 1st long */
565 move.w %d4,%d3 /* combine 1st high word with old low word */
566 swap %d5 /* swap words of 2nd long */
567 move.w %d5,%d4 /* combine 2nd high word with 1st low word */
568 movem.l %d1-%d4,(%a1) /* store line */
569 add.l %d0,%a1
570.lines_lo10_entry:
571 swap %d6 /* swap words of 3rd long */
572 move.w %d6,%d5 /* combine 3rd high word with 2nd low word */
573 swap %d7 /* swap words of 4th long */
574 move.w %d7,%d6 /* combine 4th high word with 3rd low word */
575 cmp.l %a0,%a2 /* runs %a0 up to last line bound */
576 jhi .lines_lo10_loop
577
578 jra .lines_lo10_end /* handle trailing longwords */
579
580#else /* !FULLSPEED */
581
582 /* word aligned destination (long + 2): use line burst reads in the loop */
583.lines_do2_start:
584 cmp.l %a0,%d0 /* any leading longwords? */
585 jhi .lines_do2_head_start /* yes: leading longword copy */
586
587 movem.l (%a0),%d4-%d7 /* load first line */
588 lea.l (16,%a0),%a0
589 swap %d4 /* swap words of 1st long */
590 move.w %d4,(%a1)+ /* store high word */
591 jra .lines_do2_entry /* jump into main loop */
592
593.lines_do2_head_start:
594 move.l (%a0)+,%d7 /* load first longword */
595 swap %d7 /* swap words */
596 move.w %d7,(%a1)+ /* store high word */
597 cmp.l %a0,%d0 /* any full longword? */
598 jls .lines_do2_loop /* no: skip head loop */
599
600.lines_do2_head_loop:
601 move.l %d7,%d6 /* move old longword away */
602 move.l (%a0)+,%d7 /* load new longword */
603 swap %d7 /* swap words */
604 move.w %d7,%d6 /* combine high word with old low word */
605 move.l %d6,(%a1)+ /* store longword */
606 cmp.l %a0,%d0 /* runs %a0 up to first line bound */
607 jhi .lines_do2_head_loop
608
609.lines_do2_loop:
610 move.l %d7,%d3 /* move last longword of old line away */
611 movem.l (%a0),%d4-%d7 /* load line */
612 lea.l (16,%a0),%a0
613 swap %d4 /* swap words of 1st long */
614 move.w %d4,%d3 /* combine 1st high word with old low word */
615 move.l %d3,(%a1)+ /* store 1st longword */
616.lines_do2_entry:
617 swap %d5 /* swap words of 2nd long */
618 move.w %d5,%d4 /* combine 2nd high word with 1st low word */
619 move.l %d4,(%a1)+ /* store 2nd longword */
620 swap %d6 /* swap words of 3rd long */
621 move.w %d6,%d5 /* combine 3rd high word with 2nd low word */
622 move.l %d5,(%a1)+ /* store 3rd longword */
623 swap %d7 /* swap words of 4th long */
624 move.w %d7,%d6 /* combine 4th high word with 3rd low word */
625 move.l %d6,(%a1)+ /* store 4th longword */
626 cmp.l %a0,%a2 /* runs %a0 up to last line bound */
627 jhi .lines_do2_loop
628
629.lines_do2_end:
630 lea.l (12,%a2),%a2 /* readjust end address for doing longwords */
631 cmp.l %a0,%a2 /* any trailing longwords? */
632 jls .lines_do2_tail_end /* no: skip tail loop */
633
634.lines_do2_tail_loop:
635 move.l %d7,%d6 /* move old longword away */
636 move.l (%a0)+,%d7 /* load new longword */
637 swap %d7 /* swap words */
638 move.w %d7,%d6 /* combine high word with old low word */
639 move.l %d6,(%a1)+ /* store longword */
640 cmp.l %a0,%a2 /* runs %a0 up to last long bound */
641 jhi .lines_do2_tail_loop
642
643.lines_do2_tail_end:
644 swap %d7 /* undo swap */
645 move.w %d7,(%a1)+ /* store last word */
646 /* jra .lines_end implicit */
647
648#endif /* !FULLSPEED */
649
650.lines_end:
651 addq.l #3,%a2 /* readjust end address */
652 move.l %a2,%d1 /* end address in %d1 again */
653 movem.l (%sp),%d2-%d7/%a2 /* restore registers */
654 lea.l (28,%sp),%sp
655 jra .bytes2_start /* jump to trailing byte loop */
656
657.long_start:
658 subq.l #3,%d1 /* adjust end address for doing 4 bytes/ pass */
659
660 /* longword copy loop - no lines */
661.long_loop:
662 move.l (%a0)+,(%a1)+ /* copy longword (write can be unaligned) */
663 cmp.l %a0,%d1 /* runs %a0 up to last long bound */
664 jhi .long_loop
665
666 addq.l #3,%d1 /* readjust end address */
667 cmp.l %a0,%d1 /* any bytes left? */
668 jls .bytes2_end /* no: skip trailing byte loop */
669
670 /* trailing byte loop */
671.bytes2_loop:
672 move.b (%a0)+,(%a1)+ /* copy byte */
673.bytes2_start:
674 cmp.l %a0,%d1 /* runs %a0 up to end address */
675 jhi .bytes2_loop
676
677.bytes2_end:
678 move.l (4,%sp),%d0 /* return destination */
679 rts
680
681.end:
682 .size memcpy,.end-memcpy
diff --git a/firmware/asm/m68k/memmove.S b/firmware/asm/m68k/memmove.S
new file mode 100644
index 0000000000..736cd619e1
--- /dev/null
+++ b/firmware/asm/m68k/memmove.S
@@ -0,0 +1,670 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 by Jens Arnold
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#include "config.h"
22
23 .section .icode,"ax",@progbits
24
25#define FULLSPEED /* use burst writing for word aligned destinations */
26 .align 2
27 .global memmove
28 .type memmove,@function
29
30/* Moves <length> bytes of data in memory from <source> to <dest>
31 * Regions may overlap.
32 * This version is optimized for speed, and needs the corresponding memcpy
33 * implementation for the forward copy branch.
34 *
35 * arguments:
36 * (4,%sp) - destination address
37 * (8,%sp) - source address
38 * (12,%sp) - length
39 *
40 * return value:
41 * %d0 - destination address (like ANSI version)
42 *
43 * register usage:
44 * %a0 - current source address
45 * %a1 - current dest address
46 * %a2 - source start address (in line-copy loops)
47 * %d0 - source start address (byte and longword copy) / data / scratch
48 * %d1 - data / scratch
49 * %d2 - data / scratch
50 * %d3..%d7 - data
51 *
52 * For maximum speed this routine reads and writes whole lines using burst
53 * move (movem.l) where possible. For byte aligned destinations (long-1 and
54 * long-3) it writes longwords only. Same goes for word aligned destinations
55 * if FULLSPEED is undefined.
56 */
57memmove:
58 move.l (4,%sp),%a1 /* Destination */
59 move.l (8,%sp),%a0 /* Source */
60 move.l (12,%sp),%d1 /* Length */
61
62 cmp.l %a0,%a1
63 bhi.b .backward /* dest > src -> backward copy */
64 jmp __memcpy_fwd_entry
65
66.backward:
67 move.l %a0,%d0 /* %d0 = source start */
68 add.l %d1,%a0 /* %a0 = source end */
69 add.l %d1,%a1 /* %a1 = destination end */
70
71 move.l %a0,%d1
72 and.l #0xFFFFFFFC,%d1 /* %d1 = last source long bound */
73 subq.l #4,%d1
74 cmp.l %d0,%d1 /* at least one aligned longword to copy? */
75 blo.w .bytes2r_start
76
77 addq.l #4,%d1 /* %d1 = last source long bound */
78 cmp.l %d1,%a0 /* any bytes to copy */
79 jls .bytes1r_end /* no: skip byte loop */
80
81 /* leading byte loop: copies 0..3 bytes */
82.bytes1r_loop:
83 move.b -(%a0),-(%a1) /* copy byte */
84 cmp.l %d1,%a0 /* runs %a0 down to last long bound */
85 jhi .bytes1r_loop
86
87.bytes1r_end:
88 moveq.l #-16,%d1
89 add.l %a0,%d1
90 and.l #0xFFFFFFF0,%d1 /* %d1 = last source line bound - 16 */
91 cmp.l %d0,%d1 /* at least one aligned line to copy? */
92 blo.w .longr_start /* no: jump to longword copy loop */
93
94 lea.l (-28,%sp),%sp /* free up some registers */
95 movem.l %d2-%d7/%a2,(%sp)
96
97 moveq.l #16,%d2
98 add.l %d2,%d1 /* %d1 = last source line bound */
99 move.l %d0,%a2 /* %a2 = start address */
100 lea.l (15,%a2),%a2 /* adjust start address for loops doing 16 bytes/pass */
101 move.l %a1,%d0
102 moveq.l #3,%d2 /* mask */
103 and.l %d2,%d0
104 jmp.l (2,%pc,%d0.l*4) /* switch (dest_addr & 3) */
105 bra.w .lines_do0r_start
106 bra.w .lines_do1r_start
107 bra.w .lines_do2r_start
108 /* bra.w .lines_do3r_start implicit */
109
110 /* byte aligned destination (long - 1): use line burst reads in main loop */
111.lines_do3r_start:
112 moveq.l #24,%d0 /* shift count for shifting by 3 bytes */
113 cmp.l %d1,%a0 /* any leading longwords? */
114 jhi .lines_do3r_head_start /* yes: leading longword copy */
115
116 lea.l (-16,%a0),%a0
117 movem.l (%a0),%d3-%d6 /* load initial line */
118 move.l %d6,%d2 /* last longword, bytes 3210 */
119 move.b %d2,-(%a1) /* store byte */
120 lsr.l #8,%d2 /* last longword, bytes .321 */
121 move.w %d2,-(%a1) /* store word */
122 jra .lines_do3r_entry
123
124.lines_do3r_head_start:
125 move.l -(%a0),%d3 /* load initial longword */
126 move.l %d3,%d2 /* bytes 3210 */
127 move.b %d2,-(%a1) /* store byte */
128 lsr.l #8,%d2 /* bytes .321 */
129 move.w %d2,-(%a1) /* store word */
130 jra .lines_do3r_head_entry
131
132.lines_do3r_head_loop:
133 move.l %d3,%d4 /* move old longword away */
134 move.l -(%a0),%d3 /* load new longword */
135 move.l %d3,%d2
136 lsl.l #8,%d2 /* get bytes 210. */
137 or.l %d2,%d4 /* combine with old high byte */
138 move.l %d4,-(%a1) /* store longword */
139.lines_do3r_head_entry:
140 lsr.l %d0,%d3 /* shift down high byte */
141 cmp.l %d1,%a0 /* run %a0 down to last line bound */
142 jhi .lines_do3r_head_loop
143
144.lines_do3r_loop:
145 move.l %d3,%d7 /* move first longword of last line away */
146 lea.l (-16,%a0),%a0
147 movem.l (%a0),%d3-%d6 /* load new line */
148 move.l %d6,%d2
149 lsl.l #8,%d2 /* get bytes 210. of 4th longword */
150 or.l %d2,%d7 /* combine with high byte of old longword */
151 move.l %d7,-(%a1) /* store longword */
152.lines_do3r_entry:
153 lsr.l %d0,%d6 /* shift down high byte */
154 move.l %d5,%d2
155 lsl.l #8,%d2 /* get bytes 210. of 3rd longword */
156 or.l %d2,%d6 /* combine with high byte of 4th longword */
157 move.l %d6,-(%a1) /* store longword */
158 lsr.l %d0,%d5 /* shift down high byte */
159 move.l %d4,%d2
160 lsl.l #8,%d2 /* get bytes 210. of 2nd longword */
161 or.l %d2,%d5 /* combine with high byte or 3rd longword */
162 move.l %d5,-(%a1) /* store longword */
163 lsr.l %d0,%d4 /* shift down high byte */
164 move.l %d3,%d2
165 lsl.l #8,%d2 /* get bytes 210. of 1st longword */
166 or.l %d2,%d4 /* combine with high byte of 2nd longword */
167 move.l %d4,-(%a1) /* store longword */
168 lsr.l %d0,%d3 /* shift down high byte */
169 cmp.l %a2,%a0 /* run %a0 down to first line bound */
170 jhi .lines_do3r_loop
171
172 lea.l (-12,%a2),%a2 /* readjust start address for doing longwords */
173 cmp.l %a2,%a0 /* any trailing longwords? */
174 jls .lines_do3r_tail_end /* no: just store last high byte */
175
176.lines_do3r_tail_loop:
177 move.l %d3,%d4 /* move old longword away */
178 move.l -(%a0),%d3 /* load new longword */
179 move.l %d3,%d2
180 lsl.l #8,%d2 /* get bytes 210. */
181 or.l %d2,%d4 /* combine with old high byte */
182 move.l %d4,-(%a1) /* store longword */
183 lsr.l %d0,%d3 /* shift down high byte */
184 cmp.l %a2,%a0 /* run %a0 down to first long bound */
185 jhi .lines_do3r_tail_loop
186
187.lines_do3r_tail_end:
188 move.b %d3,-(%a1) /* store shifted-down high byte */
189 jra .linesr_end
190
191 /* byte aligned destination (long - 3): use line burst reads in main loop */
192.lines_do1r_start:
193 moveq.l #24,%d0 /* shift count for shifting by 3 bytes */
194 cmp.l %d1,%a0 /* any leading longwords? */
195 jhi .lines_do1r_head_start /* yes: leading longword copy */
196
197 lea.l (-16,%a0),%a0
198 movem.l (%a0),%d3-%d6 /* load initial line */
199 move.b %d6,-(%a1) /* store low byte of last longword */
200 jra .lines_do1r_entry
201
202.lines_do1r_head_start:
203 move.l -(%a0),%d3 /* load initial longword */
204 move.b %d3,-(%a1) /* store low byte */
205 jra .lines_do1r_head_entry
206
207.lines_do1r_head_loop:
208 move.l %d3,%d4 /* move old longword away */
209 move.l -(%a0),%d3 /* load new longword */
210 move.l %d3,%d2
211 lsl.l %d0,%d2 /* get low byte */
212 or.l %d2,%d4 /* combine with old bytes .321 */
213 move.l %d4,-(%a1) /* store longword */
214.lines_do1r_head_entry:
215 lsr.l #8,%d3 /* get bytes .321 */
216 cmp.l %d1,%a0 /* run %a0 down to last line bound */
217 jhi .lines_do1r_head_loop
218
219.lines_do1r_loop:
220 move.l %d3,%d7 /* move first longword of old line away */
221 lea.l (-16,%a0),%a0
222 movem.l (%a0),%d3-%d6 /* load new line */
223 move.l %d6,%d2
224 lsl.l %d0,%d2 /* get low byte of 4th longword */
225 or.l %d2,%d7 /* combine with bytes .321 of old longword */
226 move.l %d7,-(%a1) /* store longword */
227.lines_do1r_entry:
228 lsr.l #8,%d6 /* get bytes .321 */
229 move.l %d5,%d2
230 lsl.l %d0,%d2 /* get low byte of 3rd longword */
231 or.l %d2,%d6 /* combine with bytes .321 of 4th longword */
232 move.l %d6,-(%a1) /* store longword */
233 lsr.l #8,%d5 /* get bytes .321 */
234 move.l %d4,%d2
235 lsl.l %d0,%d2 /* get low byte of 2nd longword */
236 or.l %d2,%d5 /* combine with bytes .321 of 3rd longword */
237 move.l %d5,-(%a1) /* store longword */
238 lsr.l #8,%d4 /* get bytes .321 */
239 move.l %d3,%d2
240 lsl.l %d0,%d2 /* get low byte of 1st longword */
241 or.l %d2,%d4 /* combine with bytes .321 of 2nd longword */
242 move.l %d4,-(%a1) /* store longword */
243 lsr.l #8,%d3 /* get bytes .321 */
244 cmp.l %a2,%a0 /* run %a0 down to first line bound */
245 jhi .lines_do1r_loop
246
247 lea.l (-12,%a2),%a2 /* readjust start address for doing longwords */
248 cmp.l %a2,%a0 /* any trailing longwords? */
249 jls .lines_do1r_tail_end /* no: just store last high byte */
250
251.lines_do1r_tail_loop:
252 move.l %d3,%d4 /* move old longword away */
253 move.l -(%a0),%d3 /* load new longword */
254 move.l %d3,%d2
255 lsl.l %d0,%d2 /* get low byte */
256 or.l %d2,%d4 /* combine with old bytes .321 */
257 move.l %d4,-(%a1) /* store longword */
258 lsr.l #8,%d3 /* get bytes .321 */
259 cmp.l %a2,%a0 /* run %a0 down to first long bound */
260 jhi .lines_do1r_tail_loop
261
262.lines_do1r_tail_end:
263 move.w %d3,-(%a1) /* store word 21 */
264 swap %d3
265 move.b %d3,-(%a1) /* store byte 3 */
266 jra .linesr_end
267
268 /* long aligned destination (line - 0/4/8/12): head */
269.lines_do0r_head_loop:
270 move.l -(%a0),-(%a1) /* copy longword */
271.lines_do0r_start:
272 cmp.l %d1,%a0 /* run %a0 down to last line bound */
273 jhi .lines_do0r_head_loop
274
275.lines_do0r_head_end:
276 move.l %a1,%d1
277 lsr.l #2,%d1
278 moveq.l #3,%d0 /* mask */
279 and.l %d0,%d1
280 moveq.l #16,%d0 /* address decrement for one main loop pass */
281 jmp.l (2,%pc,%d1.l*2) /* switch ((dest_addr >> 2) & 3) */
282 bra.b .lines_lo0r_start
283 bra.b .lines_lo4r_start
284 bra.b .lines_lo8r_start
285 /* bra.b .lines_lo12r_start implicit */
286
287 /* long aligned destination (line - 4): use line bursts in the loop */
288.lines_lo12r_start:
289 sub.l %d0,%a0
290 movem.l (%a0),%d1-%d4 /* load initial line */
291 move.l %d4,-(%a1) /* store 4th longword */
292 move.l %d3,-(%a1) /* store 3rd longword */
293 move.l %d2,-(%a1) /* store 2nd longword */
294 cmp.l %a2,%a0 /* any full lines? */
295 jls .lines_lo12r_end /* no: skip main loop */
296
297.lines_lo12r_loop:
298 move.l %d1,%d5 /* move first longword of old line away */
299 sub.l %d0,%a0
300 movem.l (%a0),%d1-%d4 /* load new line */
301 sub.l %d0,%a1
302 movem.l %d2-%d5,(%a1) /* store line (1 old + 3 new longwords */
303 cmp.l %a2,%a0 /* run %a0 down to first line bound */
304 jhi .lines_lo12r_loop
305
306 jra .lines_lo12r_end /* handle trailing longwords */
307
308 /* line aligned destination: use line bursts in the loop */
309.lines_lo0r_start:
310.lines_lo0r_loop:
311 sub.l %d0,%a0
312 movem.l (%a0),%d1-%d4 /* load line */
313 sub.l %d0,%a1
314 movem.l %d1-%d4,(%a1) /* store line */
315 cmp.l %a2,%a0 /* run %a0 down to first line bound */
316 jhi .lines_lo0r_loop
317
318 jra .lines_lo0r_end /* handle trailing longwords */
319
320 /* long aligned destination (line - 8): use line bursts in the loop */
321.lines_lo8r_start:
322 sub.l %d0,%a0
323 movem.l (%a0),%d1-%d4 /* load initial line */
324 move.l %d4,-(%a1) /* store 4th longword */
325 move.l %d3,-(%a1) /* store 3rd longword */
326 cmp.l %a2,%a0 /* any full lines? */
327 jls .lines_lo8r_end /* no: skip main loop */
328
329.lines_lo8r_loop:
330 move.l %d2,%d6 /* move first 2 longwords of old line away */
331 move.l %d1,%d5
332 sub.l %d0,%a0
333 movem.l (%a0),%d1-%d4 /* load new line */
334 sub.l %d0,%a1
335 movem.l %d3-%d6,(%a1) /* store line (2 old + 2 new longwords */
336 cmp.l %a2,%a0 /* run %a0 down to first line bound */
337 jhi .lines_lo8r_loop
338
339 jra .lines_lo8r_end /* handle trailing longwords */
340
341 /* long aligned destination (line - 12): use line bursts in the loop */
342.lines_lo4r_start:
343 sub.l %d0,%a0
344 movem.l (%a0),%d1-%d4 /* load initial line */
345 move.l %d4,-(%a1) /* store 4th longword */
346 cmp.l %a2,%a0 /* any full lines? */
347 jls .lines_lo4r_end /* no: skip main loop */
348
349.lines_lo4r_loop:
350 move.l %d3,%d7 /* move first 3 longwords of old line away */
351 move.l %d2,%d6
352 move.l %d1,%d5
353 sub.l %d0,%a0
354 movem.l (%a0),%d1-%d4 /* load new line */
355 sub.l %d0,%a1
356 movem.l %d4-%d7,(%a1) /* store line (3 old + 1 new longwords */
357 cmp.l %a2,%a0 /* run %a0 down to first line bound */
358 jhi .lines_lo4r_loop
359
360 /* long aligned destination (line - 0/4/8/12): tail */
361.lines_lo4r_end:
362 move.l %d3,-(%a1) /* store 3rd last longword */
363.lines_lo8r_end:
364 move.l %d2,-(%a1) /* store 2nd last longword */
365.lines_lo12r_end:
366 move.l %d1,-(%a1) /* store last longword */
367.lines_lo0r_end:
368 lea.l (-12,%a2),%a2 /* readjust end address for doing longwords */
369 cmp.l %a2,%a0 /* any trailing longwords? */
370 jls .linesr_end /* no: get outta here */
371
372.lines_do0r_tail_loop:
373 move.l -(%a0),-(%a1) /* copy longword */
374 cmp.l %a2,%a0 /* run %a0 down to first long bound */
375 jhi .lines_do0r_tail_loop
376
377 jra .linesr_end
378
379#ifdef FULLSPEED
380 /* word aligned destination (line - 2/6/10/14): head */
381.lines_do2r_start:
382 cmp.l %d1,%a0 /* any leading longwords? */
383 jls .lines_do2r_selector /* no: jump to mainloop selector */
384
385 move.l -(%a0),%d3 /* load initial longword */
386 move.w %d3,-(%a1) /* store low word */
387 cmp.l %d1,%a0 /* any more longwords? */
388 jls .lines_do2r_head_end /* no: skip head loop */
389
390.lines_do2r_head_loop:
391 move.l %d3,%d4 /* move old longword away */
392 move.l -(%a0),%d3 /* load new longword */
393 move.w %d3,%d4 /* combine low word with old high word */
394 swap %d4 /* swap words */
395 move.l %d4,-(%a1) /* store longword */
396 cmp.l %d1,%a0 /* run %a0 down to last line bound */
397 jhi .lines_do2r_head_loop
398
399.lines_do2r_head_end:
400 swap %d3 /* get high word */
401 move.w %d3,-(%a1) /* and store it */
402
403.lines_do2r_selector:
404 move.l %a1,%d1
405 lsr.l #2,%d1
406 moveq.l #3,%d0 /* mask */
407 and.l %d0,%d1
408 moveq.l #16,%d7 /* address decrement for one main loop pass */
409 jmp.l (2,%pc,%d1.l*4) /* switch ((dest_addr >> 2) & 3) */
410 bra.w .lines_lo2r_start
411 bra.w .lines_lo6r_start
412 bra.w .lines_lo10r_start
413 /* bra.w .lines_lo14r_start implicit */
414
415 /* word aligned destination (line - 2): use line bursts in the loop */
416.lines_lo14r_start:
417 sub.l %d7,%a0
418 movem.l (%a0),%d0-%d3 /* load initial line */
419 move.w %d3,-(%a1) /* store last low word */
420 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
421 swap %d3 /* swap words of 3rd long */
422 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
423 swap %d2 /* swap words of 2nd long */
424 move.w %d0,%d1 /* combine 1st low word with 2nd high word */
425 swap %d1 /* swap words of 1st long */
426 move.l %d3,-(%a1) /* store 3rd longword */
427 move.l %d2,-(%a1) /* store 2nd longword */
428 move.l %d1,-(%a1) /* store 1st longword */
429 cmp.l %a2,%a0 /* any full lines? */
430 jls .lines_lo14r_end /* no: skip main loop */
431
432.lines_lo14r_loop:
433 move.l %d0,%d4 /* move first longword of old line away */
434 sub.l %d7,%a0
435 movem.l (%a0),%d0-%d3 /* load line */
436 move.w %d3,%d4 /* combine 4th low word with old high word */
437 swap %d4 /* swap words of 4th long */
438 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
439 swap %d3 /* swap words of 3rd long */
440 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
441 swap %d2 /* swap words of 2nd long */
442 move.w %d0,%d1 /* combine 1st low word with 2nd high word */
443 swap %d1 /* swap words of 1st long */
444 sub.l %d7,%a1
445 movem.l %d1-%d4,(%a1) /* store line */
446 cmp.l %a2,%a0 /* run %a0 down to first line bound */
447 jhi .lines_lo14r_loop
448
449 jra .lines_lo14r_end /* handle trailing longwords */
450
451 /* word aligned destination (line - 6): use line bursts in the loop */
452.lines_lo10r_start:
453 sub.l %d7,%a0
454 movem.l (%a0),%d0-%d3 /* load initial line */
455 move.w %d3,-(%a1) /* store last low word */
456 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
457 swap %d3 /* swap words of 3rd long */
458 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
459 swap %d2 /* swap words of 2nd long */
460 move.l %d3,-(%a1) /* store 3rd longword */
461 move.l %d2,-(%a1) /* store 2nd longword */
462 jra .lines_lo10r_entry /* jump into main loop */
463
464.lines_lo10r_loop:
465 move.l %d0,%d4 /* move first 2 longwords of old line away */
466 move.l %d1,%d5
467 sub.l %d7,%a0
468 movem.l (%a0),%d0-%d3 /* load line */
469 move.w %d3,%d4 /* combine 4th low word with old high word */
470 swap %d4 /* swap words of 4th long */
471 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
472 swap %d3 /* swap words of 3rd long */
473 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
474 swap %d2 /* swap words of 2nd long */
475 sub.l %d7,%a1
476 movem.l %d2-%d5,(%a1) /* store line */
477.lines_lo10r_entry:
478 move.w %d0,%d1 /* combine 1st low word with 2nd high word */
479 swap %d1 /* swap words of 1st long */
480 cmp.l %a2,%a0 /* run %a0 down to first line bound */
481 jhi .lines_lo10r_loop
482
483 jra .lines_lo10r_end /* handle trailing longwords */
484
485 /* word aligned destination (line - 10): use line bursts in the loop */
486.lines_lo6r_start:
487 sub.l %d7,%a0
488 movem.l (%a0),%d0-%d3 /* load initial line */
489 move.w %d3,-(%a1) /* store last low word */
490 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
491 swap %d3 /* swap words of 3rd long */
492 move.l %d3,-(%a1) /* store 3rd longword */
493 jra .lines_lo6r_entry /* jump into main loop */
494
495.lines_lo6r_loop:
496 move.l %d0,%d4 /* move first 3 longwords of old line away */
497 move.l %d1,%d5
498 move.l %d2,%d6
499 sub.l %d7,%a0
500 movem.l (%a0),%d0-%d3 /* load line */
501 move.w %d3,%d4 /* combine 4th low word with old high word */
502 swap %d4 /* swap words of 4th long */
503 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
504 swap %d3 /* swap words of 3rd long */
505 sub.l %d7,%a1
506 movem.l %d3-%d6,(%a1) /* store line */
507.lines_lo6r_entry:
508 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
509 swap %d2 /* swap words of 2nd long */
510 move.w %d0,%d1 /* combine 1st low word with 2nd high word */
511 swap %d1 /* swap words of 1st long */
512 cmp.l %a2,%a0 /* run %a0 down to first line bound */
513 jhi .lines_lo6r_loop
514
515 jra .lines_lo6r_end /* handle trailing longwords */
516
517 /* word aligned destination (line - 14): use line bursts in the loop */
518.lines_lo2r_start:
519 sub.l %d7,%a0
520 movem.l (%a0),%d0-%d3 /* load initial line */
521 move.w %d3,-(%a1) /* store last low word */
522 jra .lines_lo2r_entry /* jump into main loop */
523
524.lines_lo2r_loop:
525 move.l %d0,%d4 /* move old line away */
526 move.l %d1,%d5
527 move.l %d2,%d6
528 move.l %d3,%d7
529 lea.l (-16,%a0),%a0
530 movem.l (%a0),%d0-%d3 /* load line */
531 move.w %d3,%d4 /* combine 4th low word with old high word */
532 swap %d4 /* swap words of 4th long */
533 lea.l (-16,%a1),%a1
534 movem.l %d4-%d7,(%a1) /* store line */
535.lines_lo2r_entry:
536 move.w %d2,%d3 /* combine 3rd low word with 4th high word */
537 swap %d3 /* swap words of 3rd long */
538 move.w %d1,%d2 /* combine 2nd low word with 3rd high word */
539 swap %d2 /* swap words of 2nd long */
540 move.w %d0,%d1 /* combine 1st low word with 2nd high word */
541 swap %d1 /* swap words of 1st long */
542 cmp.l %a2,%a0 /* run %a0 down to first line bound */
543 jhi .lines_lo2r_loop
544
545 /* word aligned destination (line - 2/6/10/14): tail */
546.lines_lo2r_end:
547 move.l %d3,-(%a1) /* store third last longword */
548.lines_lo6r_end:
549 move.l %d2,-(%a1) /* store second last longword */
550.lines_lo10r_end:
551 move.l %d1,-(%a1) /* store last longword */
552.lines_lo14r_end:
553 lea.l (-12,%a2),%a2 /* readjust start address for doing longwords */
554 cmp.l %a2,%a0 /* any trailing longwords? */
555 jls .lines_do2r_tail_end /* no: skip tail loop */
556
557.lines_do2r_tail_loop:
558 move.l %d0,%d1 /* move old longword away */
559 move.l -(%a0),%d0 /* load new longword */
560 move.w %d0,%d1 /* combine low word with old high word */
561 swap %d1 /* swap words */
562 move.l %d1,-(%a1) /* store longword */
563 cmp.l %a2,%a0 /* run %a0 down to first long bound */
564 jhi .lines_do2r_tail_loop
565
566.lines_do2r_tail_end:
567 swap %d0 /* get final high word */
568 move.w %d0,-(%a1) /* store it */
569 /* jra .linesr_end implicit */
570
571#else /* !FULLSPEED */
572
573 /* word aligned destination (long - 2): use line burst reads in the loop */
574.lines_do2r_start:
575 cmp.l %d1,%a0 /* any leading longwords? */
576 jhi .lines_do2r_head_start /* yes: leading longword copy */
577
578 lea.l (-16,%a0),%a0
579 movem.l (%a0),%d3-%d6 /* load initial line */
580 move.w %d6,-(%a1) /* store last low word */
581 jra .lines_do2r_entry /* jump into main loop */
582
583.lines_do2r_head_start:
584 move.l -(%a0),%d3 /* load initial longword */
585 move.w %d3,-(%a1) /* store low word */
586 cmp.l %d1,%a0 /* any full longword? */
587 jls .lines_do2r_loop /* no: skip head loop */
588
589.lines_do2r_head_loop:
590 move.l %d3,%d4 /* move old longword away */
591 move.l -(%a0),%d3 /* load new longword */
592 move.w %d3,%d4 /* combine low word with old high word */
593 swap %d4 /* swap words */
594 move.l %d4,-(%a1) /* store longword */
595 cmp.l %d1,%a0 /* run %a0 down to last line bound */
596 jhi .lines_do2r_head_loop
597
598.lines_do2r_loop:
599 move.l %d3,%d7 /* move first longword of old line away */
600 lea.l (-16,%a0),%a0
601 movem.l (%a0),%d3-%d6 /* load line */
602 move.w %d6,%d7 /* combine 4th low word with old high word */
603 swap %d7 /* swap words of 4th long */
604 move.l %d7,-(%a1) /* store 4th longword */
605.lines_do2r_entry:
606 move.w %d5,%d6 /* combine 3rd low word with 4th high word */
607 swap %d6 /* swap words of 3rd long */
608 move.l %d6,-(%a1) /* store 3rd longword */
609 move.w %d4,%d5 /* combine 2nd low word with 3rd high word */
610 swap %d5 /* swap words of 2nd long */
611 move.l %d5,-(%a1) /* store 2nd longword */
612 move.w %d3,%d4 /* combine 1st low word with 2nd high word */
613 swap %d4 /* swap words of 1st long */
614 move.l %d4,-(%a1) /* store 1st longword */
615 cmp.l %a2,%a0 /* run %a0 down to first line bound */
616 jhi .lines_do2r_loop
617
618.lines_do2r_end:
619 lea.l (-12,%a2),%a2 /* readjust start address for doing longwords */
620 cmp.l %a2,%a0 /* any trailing longwords? */
621 jls .lines_do2r_tail_end /* no: skip tail loop */
622
623.lines_do2r_tail_loop:
624 move.l %d3,%d4 /* move old longword away */
625 move.l -(%a0),%d3 /* load new longword */
626 move.w %d3,%d4 /* combine low word with old high word */
627 swap %d4 /* swap words */
628 move.l %d4,-(%a1) /* store longword */
629 cmp.l %a2,%a0 /* run %a0 down to first long bound */
630 jhi .lines_do2r_tail_loop
631
632.lines_do2r_tail_end:
633 swap %d3 /* get final high word */
634 move.w %d3,-(%a1) /* store it */
635 /* jra .linesr_end implicit */
636
637#endif /* !FULLSPEED */
638
639.linesr_end:
640 subq.l #3,%a2 /* readjust end address */
641 move.l %a2,%d0 /* start address in %d0 again */
642 movem.l (%sp),%d2-%d7/%a2 /* restore registers */
643 lea.l (28,%sp),%sp
644 jra .bytes2r_start /* jump to trailing byte loop */
645
646.longr_start:
647 addq.l #3,%d0 /* adjust start address for doing 4 bytes/ pass */
648
649 /* longword copy loop - no lines */
650.longr_loop:
651 move.l -(%a0),-(%a1) /* copy longword (write can be unaligned) */
652 cmp.l %d0,%a0 /* runs %a0 down to first long bound */
653 jhi .longr_loop
654
655 subq.l #3,%d0 /* readjust start address */
656 cmp.l %d0,%a0 /* any bytes left? */
657 jls .bytes2r_end /* no: skip trailing byte loop */
658
659 /* trailing byte loop */
660.bytes2r_loop:
661 move.b -(%a0),-(%a1) /* copy byte */
662.bytes2r_start:
663 cmp.l %d0,%a0 /* runs %a0 down to start address */
664 jhi .bytes2r_loop
665
666.bytes2r_end:
667 rts /* returns start address */
668
669.end:
670 .size memmove,.end-memmove
diff --git a/firmware/asm/m68k/memset.S b/firmware/asm/m68k/memset.S
new file mode 100644
index 0000000000..839b305a05
--- /dev/null
+++ b/firmware/asm/m68k/memset.S
@@ -0,0 +1,152 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2004 by Jens Arnold
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#include "config.h"
22
23 .section .icode,"ax",@progbits
24
25 .align 2
26 .global memset
27 .type memset,@function
28
29/* Fills a memory region with specified byte value
30 * This version is optimized for speed
31 *
32 * arguments:
33 * (4,%sp) - start address
34 * (8,%sp) - data
35 * (12,%sp) - length
36 *
37 * return value:
38 * %d0 - start address (like ANSI version)
39 *
40 * register usage:
41 * %d0 - data (spread to all 4 bytes when using long stores)
42 * %d1 - temporary / data (for burst transfer)
43 * %d2 - data (for burst transfer)
44 * %d3 - data (for burst transfer)
45 * %a0 - start address
46 * %a1 - current address (runs down from end to start)
47 *
48 * For maximum speed this routine uses both long stores and burst mode,
49 * storing whole lines with movem.l. The routine fills memory from end
50 * to start in order to ease returning the start address.
51 */
52memset:
53 move.l (4,%sp),%a0 /* start address */
54 move.l (8,%sp),%d0 /* data */
55 move.l (12,%sp),%a1 /* length */
56 add.l %a0,%a1 /* %a1 = end address */
57
58 move.l %a0,%d1
59 addq.l #7,%d1
60 and.l #0xFFFFFFFC,%d1 /* %d1 = first long bound + 4 */
61 cmp.l %d1,%a1 /* at least one aligned longword to fill? */
62 blo.b .no_longs /* no, jump directly to byte loop */
63
64 and.l #0xFF,%d0 /* start: spread data to all 4 bytes */
65 move.l %d0,%d1
66 lsl.l #8,%d1
67 or.l %d1,%d0 /* data now in 2 lower bytes of %d0 */
68 move.l %d0,%d1
69 swap %d0
70 or.l %d1,%d0 /* data now in all 4 bytes of %d0 */
71
72 move.l %a1,%d1
73 and.l #0xFFFFFFFC,%d1 /* %d1 = last long bound */
74 cmp.l %d1,%a1 /* any bytes to set? */
75 bls.b .end_b1 /* no: skip byte loop */
76
77 /* leading byte loop: sets 0..3 bytes */
78.loop_b1:
79 move.b %d0,-(%a1) /* store byte */
80 cmp.l %d1,%a1 /* runs %a1 down to last long bound */
81 bhi.b .loop_b1
82
83.end_b1:
84 moveq.l #31,%d1
85 add.l %a0,%d1
86 and.l #0xFFFFFFF0,%d1 /* %d1 = first line bound + 16 */
87 cmp.l %d1,%a1 /* at least one full line to fill? */
88 blo.b .no_lines /* no, jump to longword loop */
89
90 mov.l %a1,%d1
91 and.l #0xFFFFFFF0,%d1 /* %d1 = last line bound */
92 cmp.l %d1,%a1 /* any longwords to set? */
93 bls.b .end_l1 /* no: skip longword loop */
94
95 /* leading longword loop: sets 0..3 longwords */
96.loop_l1:
97 move.l %d0,-(%a1) /* store longword */
98 cmp.l %d1,%a1 /* runs %a1 down to last line bound */
99 bhi.b .loop_l1
100
101.end_l1:
102 move.l %d2,-(%sp) /* free some registers */
103 move.l %d3,-(%sp)
104
105 move.l %d0,%d1 /* spread data to 4 data registers */
106 move.l %d0,%d2
107 move.l %d0,%d3
108 lea.l (15,%a0),%a0 /* start address += 15, acct. for trl. data */
109
110 /* main loop: set whole lines utilising burst mode */
111.loop_line:
112 lea.l (-16,%a1),%a1 /* pre-decrement */
113 movem.l %d0-%d3,(%a1) /* store line */
114 cmp.l %a0,%a1 /* runs %a1 down to first line bound */
115 bhi.b .loop_line
116
117 lea.l (-15,%a0),%a0 /* correct start address */
118 move.l (%sp)+,%d3 /* restore registers */
119 move.l (%sp)+,%d2
120
121 move.l %a0,%d1 /* %d1 = start address ... */
122 addq.l #3,%d1 /* ... +3, account for possible trailing bytes */
123 cmp.l %d1,%a1 /* any longwords left */
124 bhi.b .loop_l2 /* yes: jump to longword loop */
125 bra.b .no_longs /* no: skip loop */
126
127.no_lines:
128 move.l %a0,%d1 /* %d1 = start address ... */
129 addq.l #3,%d1 /* ... +3, account for possible trailing bytes */
130
131 /* trailing longword loop */
132.loop_l2:
133 move.l %d0,-(%a1) /* store longword */
134 cmp.l %d1,%a1 /* runs %a1 down to first long bound */
135 bhi.b .loop_l2
136
137.no_longs:
138 cmp.l %a0,%a1 /* any bytes left? */
139 bls.b .end_b2 /* no: skip loop */
140
141 /* trailing byte loop */
142.loop_b2:
143 move.b %d0,-(%a1) /* store byte */
144 cmp.l %a0,%a1 /* runs %a1 down to start address */
145 bhi.b .loop_b2
146
147.end_b2:
148 move.l %a0,%d0 /* return start address */
149 rts
150
151.end:
152 .size memset,.end-memset
diff --git a/firmware/asm/m68k/memset16.S b/firmware/asm/m68k/memset16.S
new file mode 100644
index 0000000000..1673038d03
--- /dev/null
+++ b/firmware/asm/m68k/memset16.S
@@ -0,0 +1,146 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 by Jens Arnold
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#include "config.h"
22
23 .section .icode,"ax",@progbits
24
25 .global memset16
26 .type memset16,@function
27
28/* Fills a memory region with specified word value
29 * Start address must be word aligned, length is in words
30 * This version is optimized for speed
31 *
32 * arguments:
33 * (4,%sp) - start address
34 * (8,%sp) - data
35 * (12,%sp) - length
36 *
37 * return value:
38 * %d0 - start address
39 *
40 * register usage:
41 * %d0 - data (spread to both words when using long stores)
42 * %d1 - temporary / data (for burst transfer)
43 * %d2 - data (for burst transfer)
44 * %d3 - data (for burst transfer)
45 * %a0 - start address
46 * %a1 - current address (runs down from end to start)
47 *
48 * For maximum speed this routine uses both long stores and burst mode,
49 * storing whole lines with movem.l. The routine fills memory from end
50 * to start in order to ease returning the start address.
51 */
52memset16:
53 move.l (4,%sp),%a0 /* start address */
54 move.l (8,%sp),%d0 /* data */
55 move.l (12,%sp),%a1 /* length */
56 add.l %a1,%a1
57 add.l %a0,%a1 /* %a1 = end address */
58
59 move.l %a0,%d1
60 addq.l #6,%d1
61 and.l #0xFFFFFFFC,%d1 /* %d1 = first long bound + 4 */
62 cmp.l %d1,%a1 /* at least one aligned longword to fill? */
63 blo.b .no_longs /* no, jump directly to word loop */
64
65 and.l #0xFFFF,%d0 /* start: spread data to both words */
66 move.l %d0,%d1
67 swap %d1
68 or.l %d1,%d0 /* data now in both words */
69
70 move.l %a1,%d1
71 and.l #0xFFFFFFFC,%d1 /* %d1 = last long bound */
72 cmp.l %d1,%a1 /* one extra word? */
73 bls.b .end_w1 /* no: skip */
74
75 move.w %d0,-(%a1) /* set leading word */
76
77.end_w1:
78 moveq.l #30,%d1
79 add.l %a0,%d1
80 and.l #0xFFFFFFF0,%d1 /* %d1 = first line bound + 16 */
81 cmp.l %d1,%a1 /* at least one full line to fill? */
82 blo.b .no_lines /* no, jump to longword loop */
83
84 mov.l %a1,%d1
85 and.l #0xFFFFFFF0,%d1 /* %d1 = last line bound */
86 cmp.l %d1,%a1 /* any longwords to set? */
87 bls.b .end_l1 /* no: skip longword loop */
88
89 /* leading longword loop: sets 0..3 longwords */
90.loop_l1:
91 move.l %d0,-(%a1) /* store longword */
92 cmp.l %d1,%a1 /* runs %a1 down to last line bound */
93 bhi.b .loop_l1
94
95.end_l1:
96 move.l %d2,-(%sp) /* free some registers */
97 move.l %d3,-(%sp)
98
99 move.l %d0,%d1 /* spread data to 4 data registers */
100 move.l %d0,%d2
101 move.l %d0,%d3
102 lea.l (14,%a0),%a0 /* start address += 14, acct. for trl. data */
103
104 /* main loop: set whole lines utilising burst mode */
105.loop_line:
106 lea.l (-16,%a1),%a1 /* pre-decrement */
107 movem.l %d0-%d3,(%a1) /* store line */
108 cmp.l %a0,%a1 /* runs %a1 down to first line bound */
109 bhi.b .loop_line
110
111 lea.l (-14,%a0),%a0 /* correct start address */
112 move.l (%sp)+,%d3 /* restore registers */
113 move.l (%sp)+,%d2
114
115 move.l %a0,%d1 /* %d1 = start address ... */
116 addq.l #2,%d1 /* ... +2, account for possible trailing word */
117 cmp.l %d1,%a1 /* any longwords left */
118 bhi.b .loop_l2 /* yes: jump to longword loop */
119 bra.b .no_longs /* no: skip loop */
120
121.no_lines:
122 move.l %a0,%d1 /* %d1 = start address ... */
123 addq.l #2,%d1 /* ... +2, account for possible trailing word */
124
125 /* trailing longword loop */
126.loop_l2:
127 move.l %d0,-(%a1) /* store longword */
128 cmp.l %d1,%a1 /* runs %a1 down to first long bound */
129 bhi.b .loop_l2
130
131.no_longs:
132 cmp.l %a0,%a1 /* any words left? */
133 bls.b .end_w2 /* no: skip loop */
134
135 /* trailing word loop */
136.loop_w2:
137 move.w %d0,-(%a1) /* store word */
138 cmp.l %a0,%a1 /* runs %a1 down to start address */
139 bhi.b .loop_w2
140
141.end_w2:
142 move.l %a0,%d0 /* return start address */
143 rts
144
145.end:
146 .size memset16,.end-memset16
diff --git a/firmware/asm/m68k/strlen.S b/firmware/asm/m68k/strlen.S
new file mode 100644
index 0000000000..765969da04
--- /dev/null
+++ b/firmware/asm/m68k/strlen.S
@@ -0,0 +1,71 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2010 Nils Wallménius
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21
22/* size_t strlen(const char *str) */
23
24 .section .text,"ax",@progbits
25 .align 2
26 .globl strlen
27 .type strlen, @function
28
29strlen:
30 move.l 4(%sp), %a0 /* %a0 = *str */
31 move.l %a0, %a1 /* %a1 = start address */
32 move.l %a0, %d0
33 andi.l #3, %d0 /* %d0 = %a0 & 3 */
34 beq.b 1f /* already aligned */
35 jmp.l (-2,%pc,%d0.l*4)
36 tst.b (%a0)+
37 beq.b .done
38 tst.b (%a0)+
39 beq.b .done
40 tst.b (%a0)+
41 beq.b .done
42
43 1:
44 move.l (%a0)+, %d0 /* load %d0 increment %a0 */
45 /* use trick to test the whole word for null bytes */
46 move.l %d0, %d1
47 subi.l #0x01010101, %d1
48 not.l %d0
49 and.l %d1, %d0
50 andi.l #0x80808080, %d0
51 beq.b 1b /* if the test was false repeat */
52
53 /* ok, so the last word contained a 0 byte, test individual bytes */
54 subq.l #4, %a0
55 tst.b (%a0)+
56 beq.b .done
57 tst.b (%a0)+
58 beq.b .done
59 tst.b (%a0)+
60 beq.b .done
61 /* last byte must be 0 so we don't need to load it, so we don't increment a0
62 so we jump past the subq instr */
63 .word 0x51fa /* trapf.w, shadow next instr */
64
65.done:
66 subq.l #1, %a0 /* %a0 is 1 too large due to the last increment */
67 sub.l %a1, %a0 /* how many times did we repeat? */
68 move.l %a0, %d0 /* return value in %d0 */
69 rts
70 .size strlen, .-strlen
71
diff --git a/firmware/asm/memcpy.c b/firmware/asm/memcpy.c
new file mode 100644
index 0000000000..c5456ab41f
--- /dev/null
+++ b/firmware/asm/memcpy.c
@@ -0,0 +1,117 @@
1/*
2FUNCTION
3 <<memcpy>>---copy memory regions
4
5ANSI_SYNOPSIS
6 #include <string.h>
7 void* memcpy(void *<[out]>, const void *<[in]>, size_t <[n]>);
8
9TRAD_SYNOPSIS
10 void *memcpy(<[out]>, <[in]>, <[n]>
11 void *<[out]>;
12 void *<[in]>;
13 size_t <[n]>;
14
15DESCRIPTION
16 This function copies <[n]> bytes from the memory region
17 pointed to by <[in]> to the memory region pointed to by
18 <[out]>.
19
20 If the regions overlap, the behavior is undefined.
21
22RETURNS
23 <<memcpy>> returns a pointer to the first byte of the <[out]>
24 region.
25
26PORTABILITY
27<<memcpy>> is ANSI C.
28
29<<memcpy>> requires no supporting OS subroutines.
30
31QUICKREF
32 memcpy ansi pure
33 */
34
35#include "config.h"
36#include "_ansi.h" /* for _DEFUN */
37#include <string.h>
38
39/* Nonzero if either X or Y is not aligned on a "long" boundary. */
40#define UNALIGNED(X, Y) \
41 (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
42
43/* How many bytes are copied each iteration of the 4X unrolled loop. */
44#define BIGBLOCKSIZE (sizeof (long) << 2)
45
46/* How many bytes are copied each iteration of the word copy loop. */
47#define LITTLEBLOCKSIZE (sizeof (long))
48
49/* Threshold for punting to the byte copier. */
50#define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE)
51
52_PTR
53_DEFUN (memcpy, (dst0, src0, len0),
54 _PTR dst0 _AND
55 _CONST _PTR src0 _AND
56 size_t len0) ICODE_ATTR;
57
58_PTR
59_DEFUN (memcpy, (dst0, src0, len0),
60 _PTR dst0 _AND
61 _CONST _PTR src0 _AND
62 size_t len0)
63{
64#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
65 char *dst = (char *) dst0;
66 char *src = (char *) src0;
67
68 _PTR save = dst0;
69
70 while (len0--)
71 {
72 *dst++ = *src++;
73 }
74
75 return save;
76#else
77 char *dst = dst0;
78 _CONST char *src = src0;
79 long *aligned_dst;
80 _CONST long *aligned_src;
81 unsigned int len = len0;
82
83 /* If the size is small, or either SRC or DST is unaligned,
84 then punt into the byte copy loop. This should be rare. */
85 if (!TOO_SMALL(len) && !UNALIGNED (src, dst))
86 {
87 aligned_dst = (long*)dst;
88 aligned_src = (long*)src;
89
90 /* Copy 4X long words at a time if possible. */
91 while (len >= BIGBLOCKSIZE)
92 {
93 *aligned_dst++ = *aligned_src++;
94 *aligned_dst++ = *aligned_src++;
95 *aligned_dst++ = *aligned_src++;
96 *aligned_dst++ = *aligned_src++;
97 len -= (unsigned int)BIGBLOCKSIZE;
98 }
99
100 /* Copy one long word at a time if possible. */
101 while (len >= LITTLEBLOCKSIZE)
102 {
103 *aligned_dst++ = *aligned_src++;
104 len -= LITTLEBLOCKSIZE;
105 }
106
107 /* Pick up any residual with a byte copier. */
108 dst = (char*)aligned_dst;
109 src = (char*)aligned_src;
110 }
111
112 while (len--)
113 *dst++ = *src++;
114
115 return dst0;
116#endif /* not PREFER_SIZE_OVER_SPEED */
117}
diff --git a/firmware/asm/memmove.c b/firmware/asm/memmove.c
new file mode 100644
index 0000000000..5f423964bb
--- /dev/null
+++ b/firmware/asm/memmove.c
@@ -0,0 +1,147 @@
1/*
2FUNCTION
3 <<memmove>>---move possibly overlapping memory
4
5INDEX
6 memmove
7
8ANSI_SYNOPSIS
9 #include <string.h>
10 void *memmove(void *<[dst]>, const void *<[src]>, size_t <[length]>);
11
12TRAD_SYNOPSIS
13 #include <string.h>
14 void *memmove(<[dst]>, <[src]>, <[length]>)
15 void *<[dst]>;
16 void *<[src]>;
17 size_t <[length]>;
18
19DESCRIPTION
20 This function moves <[length]> characters from the block of
21 memory starting at <<*<[src]>>> to the memory starting at
22 <<*<[dst]>>>. <<memmove>> reproduces the characters correctly
23 at <<*<[dst]>>> even if the two areas overlap.
24
25
26RETURNS
27 The function returns <[dst]> as passed.
28
29PORTABILITY
30<<memmove>> is ANSI C.
31
32<<memmove>> requires no supporting OS subroutines.
33
34QUICKREF
35 memmove ansi pure
36*/
37
38#include "config.h"
39#include <_ansi.h>
40#include <string.h>
41
42/* Nonzero if either X or Y is not aligned on a "long" boundary. */
43#define UNALIGNED(X, Y) \
44 (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
45
46/* How many bytes are copied each iteration of the 4X unrolled loop. */
47#define BIGBLOCKSIZE (sizeof (long) << 2)
48
49/* How many bytes are copied each iteration of the word copy loop. */
50#define LITTLEBLOCKSIZE (sizeof (long))
51
52/* Threshhold for punting to the byte copier. */
53#define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE)
54
55_PTR
56_DEFUN (memmove, (dst_void, src_void, length),
57 _PTR dst_void _AND
58 _CONST _PTR src_void _AND
59 size_t length) ICODE_ATTR;
60
61_PTR
62_DEFUN (memmove, (dst_void, src_void, length),
63 _PTR dst_void _AND
64 _CONST _PTR src_void _AND
65 size_t length)
66{
67#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
68 char *dst = dst_void;
69 _CONST char *src = src_void;
70
71 if (src < dst && dst < src + length)
72 {
73 /* Have to copy backwards */
74 src += length;
75 dst += length;
76 while (length--)
77 {
78 *--dst = *--src;
79 }
80 }
81 else
82 {
83 while (length--)
84 {
85 *dst++ = *src++;
86 }
87 }
88
89 return dst_void;
90#else
91 char *dst = dst_void;
92 _CONST char *src = src_void;
93 long *aligned_dst;
94 _CONST long *aligned_src;
95 unsigned int len = length;
96
97 if (src < dst && dst < src + len)
98 {
99 /* Destructive overlap...have to copy backwards */
100 src += len;
101 dst += len;
102 while (len--)
103 {
104 *--dst = *--src;
105 }
106 }
107 else
108 {
109 /* Use optimizing algorithm for a non-destructive copy to closely
110 match memcpy. If the size is small or either SRC or DST is unaligned,
111 then punt into the byte copy loop. This should be rare. */
112 if (!TOO_SMALL(len) && !UNALIGNED (src, dst))
113 {
114 aligned_dst = (long*)dst;
115 aligned_src = (long*)src;
116
117 /* Copy 4X long words at a time if possible. */
118 while (len >= BIGBLOCKSIZE)
119 {
120 *aligned_dst++ = *aligned_src++;
121 *aligned_dst++ = *aligned_src++;
122 *aligned_dst++ = *aligned_src++;
123 *aligned_dst++ = *aligned_src++;
124 len -= BIGBLOCKSIZE;
125 }
126
127 /* Copy one long word at a time if possible. */
128 while (len >= LITTLEBLOCKSIZE)
129 {
130 *aligned_dst++ = *aligned_src++;
131 len -= LITTLEBLOCKSIZE;
132 }
133
134 /* Pick up any residual with a byte copier. */
135 dst = (char*)aligned_dst;
136 src = (char*)aligned_src;
137 }
138
139 while (len--)
140 {
141 *dst++ = *src++;
142 }
143 }
144
145 return dst_void;
146#endif /* not PREFER_SIZE_OVER_SPEED */
147}
diff --git a/firmware/asm/memset.c b/firmware/asm/memset.c
new file mode 100644
index 0000000000..7b8d2137e8
--- /dev/null
+++ b/firmware/asm/memset.c
@@ -0,0 +1,110 @@
1/*
2FUNCTION
3 <<memset>>---set an area of memory
4
5INDEX
6 memset
7
8ANSI_SYNOPSIS
9 #include <string.h>
10 void *memset(const void *<[dst]>, int <[c]>, size_t <[length]>);
11
12TRAD_SYNOPSIS
13 #include <string.h>
14 void *memset(<[dst]>, <[c]>, <[length]>)
15 void *<[dst]>;
16 int <[c]>;
17 size_t <[length]>;
18
19DESCRIPTION
20 This function converts the argument <[c]> into an unsigned
21 char and fills the first <[length]> characters of the array
22 pointed to by <[dst]> to the value.
23
24RETURNS
25 <<memset>> returns the value of <[m]>.
26
27PORTABILITY
28<<memset>> is ANSI C.
29
30 <<memset>> requires no supporting OS subroutines.
31
32QUICKREF
33 memset ansi pure
34*/
35
36#include <string.h>
37#include "_ansi.h"
38
39#define LBLOCKSIZE (sizeof(long))
40#define UNALIGNED(X) ((long)X & (LBLOCKSIZE - 1))
41#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
42
43_PTR
44_DEFUN (memset, (m, c, n),
45 _PTR m _AND
46 int c _AND
47 size_t n)
48{
49#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
50 char *s = (char *) m;
51
52 while (n-- != 0)
53 {
54 *s++ = (char) c;
55 }
56
57 return m;
58#else
59 char *s = (char *) m;
60 unsigned int i;
61 unsigned long buffer;
62 unsigned long *aligned_addr;
63
64 if (!TOO_SMALL (n) && !UNALIGNED (m))
65 {
66 /* If we get this far, we know that n is large and m is word-aligned. */
67
68 aligned_addr = (unsigned long*)m;
69
70 /* Store C into each char sized location in BUFFER so that
71 we can set large blocks quickly. */
72 c &= 0xff;
73 if (LBLOCKSIZE == 4)
74 {
75 buffer = (c << 8) | c;
76 buffer |= (buffer << 16);
77 }
78 else
79 {
80 buffer = 0;
81 for (i = 0; i < LBLOCKSIZE; i++)
82 buffer = (buffer << 8) | c;
83 }
84
85 while (n >= LBLOCKSIZE*4)
86 {
87 *aligned_addr++ = buffer;
88 *aligned_addr++ = buffer;
89 *aligned_addr++ = buffer;
90 *aligned_addr++ = buffer;
91 n -= 4*LBLOCKSIZE;
92 }
93
94 while (n >= LBLOCKSIZE)
95 {
96 *aligned_addr++ = buffer;
97 n -= LBLOCKSIZE;
98 }
99 /* Pick up the remainder with a bytewise loop. */
100 s = (char*)aligned_addr;
101 }
102
103 while (n--)
104 {
105 *s++ = (char)c;
106 }
107
108 return m;
109#endif /* not PREFER_SIZE_OVER_SPEED */
110}
diff --git a/firmware/asm/memset16.c b/firmware/asm/memset16.c
new file mode 100644
index 0000000000..7e31df0cdd
--- /dev/null
+++ b/firmware/asm/memset16.c
@@ -0,0 +1,78 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 by Jens Arnold
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21
22#include "string-extra.h" /* memset16() */
23
24#define LBLOCKSIZE (sizeof(long)/2)
25#define UNALIGNED(X) ((long)X & (sizeof(long) - 1))
26#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
27
28void memset16(void *dst, int val, size_t len)
29{
30#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
31 unsigned short *p = (unsigned short *)dst;
32
33 while (len--)
34 *p++ = val;
35#else
36 unsigned short *p = (unsigned short *)dst;
37 unsigned int i;
38 unsigned long buffer;
39 unsigned long *aligned_addr;
40
41 if (!TOO_SMALL(len) && !UNALIGNED(dst))
42 {
43 aligned_addr = (unsigned long *)dst;
44
45 val &= 0xffff;
46 if (LBLOCKSIZE == 2)
47 {
48 buffer = (val << 16) | val;
49 }
50 else
51 {
52 buffer = 0;
53 for (i = 0; i < LBLOCKSIZE; i++)
54 buffer = (buffer << 16) | val;
55 }
56
57 while (len >= LBLOCKSIZE*4)
58 {
59 *aligned_addr++ = buffer;
60 *aligned_addr++ = buffer;
61 *aligned_addr++ = buffer;
62 *aligned_addr++ = buffer;
63 len -= 4*LBLOCKSIZE;
64 }
65
66 while (len >= LBLOCKSIZE)
67 {
68 *aligned_addr++ = buffer;
69 len -= LBLOCKSIZE;
70 }
71
72 p = (unsigned short *)aligned_addr;
73 }
74
75 while (len--)
76 *p++ = val;
77#endif /* not PREFER_SIZE_OVER_SPEED */
78}
diff --git a/firmware/asm/mips/memcpy.S b/firmware/asm/mips/memcpy.S
new file mode 100644
index 0000000000..2e7f245c69
--- /dev/null
+++ b/firmware/asm/mips/memcpy.S
@@ -0,0 +1,143 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2002, 2003 Free Software Foundation, Inc.
11 * This file was originally part of the GNU C Library
12 * Contributed to glibc by Hartvig Ekner <hartvige@mips.com>, 2002
13 * Adapted for Rockbox by Maurus Cuelenaere, 2009
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
19 *
20 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
21 * KIND, either express or implied.
22 *
23 ****************************************************************************/
24
25#include "config.h"
26#include "mips.h"
27
28/* void *memcpy(void *s1, const void *s2, size_t n); */
29
30#ifdef ROCKBOX_BIG_ENDIAN
31# define LWHI lwl /* high part is left in big-endian */
32# define SWHI swl /* high part is left in big-endian */
33# define LWLO lwr /* low part is right in big-endian */
34# define SWLO swr /* low part is right in big-endian */
35#else
36# define LWHI lwr /* high part is right in little-endian */
37# define SWHI swr /* high part is right in little-endian */
38# define LWLO lwl /* low part is left in little-endian */
39# define SWLO swl /* low part is left in little-endian */
40#endif
41
42 .section .icode, "ax", %progbits
43
44 .global memcpy
45 .type memcpy, %function
46
47 .set noreorder
48
49memcpy:
50 slti t0, a2, 8 # Less than 8?
51 bne t0, zero, last8
52 move v0, a0 # Setup exit value before too late
53
54 xor t0, a1, a0 # Find a0/a1 displacement
55 andi t0, 0x3
56 bne t0, zero, shift # Go handle the unaligned case
57 subu t1, zero, a1
58 andi t1, 0x3 # a0/a1 are aligned, but are we
59 beq t1, zero, chk8w # starting in the middle of a word?
60 subu a2, t1
61 LWHI t0, 0(a1) # Yes we are... take care of that
62 addu a1, t1
63 SWHI t0, 0(a0)
64 addu a0, t1
65
66chk8w:
67 andi t0, a2, 0x1f # 32 or more bytes left?
68 beq t0, a2, chk1w
69 subu a3, a2, t0 # Yes
70 addu a3, a1 # a3 = end address of loop
71 move a2, t0 # a2 = what will be left after loop
72lop8w:
73 lw t0, 0(a1) # Loop taking 8 words at a time
74 lw t1, 4(a1)
75 lw t2, 8(a1)
76 lw t3, 12(a1)
77 lw t4, 16(a1)
78 lw t5, 20(a1)
79 lw t6, 24(a1)
80 lw t7, 28(a1)
81 addiu a0, 32
82 addiu a1, 32
83 sw t0, -32(a0)
84 sw t1, -28(a0)
85 sw t2, -24(a0)
86 sw t3, -20(a0)
87 sw t4, -16(a0)
88 sw t5, -12(a0)
89 sw t6, -8(a0)
90 bne a1, a3, lop8w
91 sw t7, -4(a0)
92
93chk1w:
94 andi t0, a2, 0x3 # 4 or more bytes left?
95 beq t0, a2, last8
96 subu a3, a2, t0 # Yes, handle them one word at a time
97 addu a3, a1 # a3 again end address
98 move a2, t0
99lop1w:
100 lw t0, 0(a1)
101 addiu a0, 4
102 addiu a1, 4
103 bne a1, a3, lop1w
104 sw t0, -4(a0)
105
106last8:
107 blez a2, lst8e # Handle last 8 bytes, one at a time
108 addu a3, a2, a1
109lst8l:
110 lb t0, 0(a1)
111 addiu a0, 1
112 addiu a1, 1
113 bne a1, a3, lst8l
114 sb t0, -1(a0)
115lst8e:
116 jr ra # Bye, bye
117 nop
118
119shift:
120 subu a3, zero, a0 # Src and Dest unaligned
121 andi a3, 0x3 # (unoptimized case...)
122 beq a3, zero, shft1
123 subu a2, a3 # a2 = bytes left
124 LWHI t0, 0(a1) # Take care of first odd part
125 LWLO t0, 3(a1)
126 addu a1, a3
127 SWHI t0, 0(a0)
128 addu a0, a3
129shft1:
130 andi t0, a2, 0x3
131 subu a3, a2, t0
132 addu a3, a1
133shfth:
134 LWHI t1, 0(a1) # Limp through, word by word
135 LWLO t1, 3(a1)
136 addiu a0, 4
137 addiu a1, 4
138 bne a1, a3, shfth
139 sw t1, -4(a0)
140 b last8 # Handle anything which may be left
141 move a2, t0
142
143 .set reorder
diff --git a/firmware/asm/mips/memset.S b/firmware/asm/mips/memset.S
new file mode 100644
index 0000000000..8db76d9123
--- /dev/null
+++ b/firmware/asm/mips/memset.S
@@ -0,0 +1,239 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * This file was originally part of the Linux/MIPS GNU C Library
11 * Copyright (C) 1998 by Ralf Baechle
12 * Adapted for Rockbox by Maurus Cuelenaere, 2009
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
18 *
19 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20 * KIND, either express or implied.
21 *
22 ****************************************************************************/
23
24#include "config.h"
25#include "mips.h"
26
27#define FILL256(dst, offset, val) \
28 sw val, (offset + 0x00)(dst); \
29 sw val, (offset + 0x04)(dst); \
30 sw val, (offset + 0x08)(dst); \
31 sw val, (offset + 0x0c)(dst); \
32 sw val, (offset + 0x10)(dst); \
33 sw val, (offset + 0x14)(dst); \
34 sw val, (offset + 0x18)(dst); \
35 sw val, (offset + 0x1c)(dst); \
36 sw val, (offset + 0x20)(dst); \
37 sw val, (offset + 0x24)(dst); \
38 sw val, (offset + 0x28)(dst); \
39 sw val, (offset + 0x2c)(dst); \
40 sw val, (offset + 0x30)(dst); \
41 sw val, (offset + 0x34)(dst); \
42 sw val, (offset + 0x38)(dst); \
43 sw val, (offset + 0x3c)(dst); \
44 sw val, (offset + 0x40)(dst); \
45 sw val, (offset + 0x44)(dst); \
46 sw val, (offset + 0x48)(dst); \
47 sw val, (offset + 0x4c)(dst); \
48 sw val, (offset + 0x50)(dst); \
49 sw val, (offset + 0x54)(dst); \
50 sw val, (offset + 0x58)(dst); \
51 sw val, (offset + 0x5c)(dst); \
52 sw val, (offset + 0x60)(dst); \
53 sw val, (offset + 0x64)(dst); \
54 sw val, (offset + 0x68)(dst); \
55 sw val, (offset + 0x6c)(dst); \
56 sw val, (offset + 0x70)(dst); \
57 sw val, (offset + 0x74)(dst); \
58 sw val, (offset + 0x78)(dst); \
59 sw val, (offset + 0x7c)(dst); \
60 sw val, (offset + 0x80)(dst); \
61 sw val, (offset + 0x84)(dst); \
62 sw val, (offset + 0x88)(dst); \
63 sw val, (offset + 0x8c)(dst); \
64 sw val, (offset + 0x90)(dst); \
65 sw val, (offset + 0x94)(dst); \
66 sw val, (offset + 0x98)(dst); \
67 sw val, (offset + 0x9c)(dst); \
68 sw val, (offset + 0xa0)(dst); \
69 sw val, (offset + 0xa4)(dst); \
70 sw val, (offset + 0xa8)(dst); \
71 sw val, (offset + 0xac)(dst); \
72 sw val, (offset + 0xb0)(dst); \
73 sw val, (offset + 0xb4)(dst); \
74 sw val, (offset + 0xb8)(dst); \
75 sw val, (offset + 0xbc)(dst); \
76 sw val, (offset + 0xc0)(dst); \
77 sw val, (offset + 0xc4)(dst); \
78 sw val, (offset + 0xc8)(dst); \
79 sw val, (offset + 0xcc)(dst); \
80 sw val, (offset + 0xd0)(dst); \
81 sw val, (offset + 0xd4)(dst); \
82 sw val, (offset + 0xd8)(dst); \
83 sw val, (offset + 0xdc)(dst); \
84 sw val, (offset + 0xe0)(dst); \
85 sw val, (offset + 0xe4)(dst); \
86 sw val, (offset + 0xe8)(dst); \
87 sw val, (offset + 0xec)(dst); \
88 sw val, (offset + 0xf0)(dst); \
89 sw val, (offset + 0xf4)(dst); \
90 sw val, (offset + 0xf8)(dst); \
91 sw val, (offset + 0xfc)(dst);
92
93#define FILL128(dst, offset, val) \
94 sw val, (offset + 0x00)(dst); \
95 sw val, (offset + 0x04)(dst); \
96 sw val, (offset + 0x08)(dst); \
97 sw val, (offset + 0x0c)(dst); \
98 sw val, (offset + 0x10)(dst); \
99 sw val, (offset + 0x14)(dst); \
100 sw val, (offset + 0x18)(dst); \
101 sw val, (offset + 0x1c)(dst); \
102 sw val, (offset + 0x20)(dst); \
103 sw val, (offset + 0x24)(dst); \
104 sw val, (offset + 0x28)(dst); \
105 sw val, (offset + 0x2c)(dst); \
106 sw val, (offset + 0x30)(dst); \
107 sw val, (offset + 0x34)(dst); \
108 sw val, (offset + 0x38)(dst); \
109 sw val, (offset + 0x3c)(dst); \
110 sw val, (offset + 0x40)(dst); \
111 sw val, (offset + 0x44)(dst); \
112 sw val, (offset + 0x48)(dst); \
113 sw val, (offset + 0x4c)(dst); \
114 sw val, (offset + 0x50)(dst); \
115 sw val, (offset + 0x54)(dst); \
116 sw val, (offset + 0x58)(dst); \
117 sw val, (offset + 0x5c)(dst); \
118 sw val, (offset + 0x60)(dst); \
119 sw val, (offset + 0x64)(dst); \
120 sw val, (offset + 0x68)(dst); \
121 sw val, (offset + 0x6c)(dst); \
122 sw val, (offset + 0x70)(dst); \
123 sw val, (offset + 0x74)(dst); \
124 sw val, (offset + 0x78)(dst); \
125 sw val, (offset + 0x7c)(dst);
126
127#define FILL64(dst, offset, val) \
128 sw val, (offset + 0x00)(dst); \
129 sw val, (offset + 0x04)(dst); \
130 sw val, (offset + 0x08)(dst); \
131 sw val, (offset + 0x0c)(dst); \
132 sw val, (offset + 0x10)(dst); \
133 sw val, (offset + 0x14)(dst); \
134 sw val, (offset + 0x18)(dst); \
135 sw val, (offset + 0x1c)(dst); \
136 sw val, (offset + 0x20)(dst); \
137 sw val, (offset + 0x24)(dst); \
138 sw val, (offset + 0x28)(dst); \
139 sw val, (offset + 0x2c)(dst); \
140 sw val, (offset + 0x30)(dst); \
141 sw val, (offset + 0x34)(dst); \
142 sw val, (offset + 0x38)(dst); \
143 sw val, (offset + 0x3c)(dst);
144
145#define FILL32(dst, offset, val) \
146 sw val, (offset + 0x00)(dst); \
147 sw val, (offset + 0x04)(dst); \
148 sw val, (offset + 0x08)(dst); \
149 sw val, (offset + 0x0c)(dst); \
150 sw val, (offset + 0x10)(dst); \
151 sw val, (offset + 0x14)(dst); \
152 sw val, (offset + 0x18)(dst); \
153 sw val, (offset + 0x1c)(dst);
154
155#define FILL 64
156#define F_FILL FILL64
157
158
159#ifdef ROCKBOX_BIG_ENDIAN
160# define SWHI swl /* high part is left in big-endian */
161#else
162# define SWHI swr /* high part is right in little-endian */
163#endif
164
165/*
166 * memset(void *s, int c, size_t n)
167 *
168 * a0: start of area to clear
169 * a1: char to fill with
170 * a2: size of area to clear
171 */
172 .section .icode, "ax", %progbits
173
174 .global memset
175 .type memset, %function
176
177 .set noreorder
178 .align 5
179memset:
180 beqz a1, 1f
181 move v0, a0 /* result */
182
183 andi a1, 0xff /* spread fillword */
184 sll t1, a1, 8
185 or a1, t1
186 sll t1, a1, 16
187 or a1, t1
1881:
189
190 sltiu t0, a2, 4 /* very small region? */
191 bnez t0, small_memset
192 andi t0, a0, 3 /* aligned? */
193
194 beqz t0, 1f
195 subu t0, 4 /* alignment in bytes */
196
197 SWHI a1, (a0) /* make word aligned */
198 subu a0, t0 /* word align ptr */
199 addu a2, t0 /* correct size */
200
2011: ori t1, a2, (FILL-1) /* # of full blocks */
202 xori t1, (FILL-1)
203 beqz t1, memset_partial /* no block to fill */
204 andi t0, a2, (FILL-4)
205
206 addu t1, a0 /* end address */
207 .set reorder
2081: addiu a0, FILL
209 F_FILL( a0, -FILL, a1 )
210 bne t1, a0, 1b
211 .set noreorder
212
213memset_partial:
214 la t1, 2f /* where to start */
215 subu t1, t0
216 jr t1
217 addu a0, t0 /* dest ptr */
218
219 F_FILL( a0, -FILL, a1 ) /* ... but first do words ... */
2202: andi a2, 3 /* 0 <= n <= 3 to go */
221
222 beqz a2, 1f
223 addu a0, a2 /* What's left */
224 SWHI a1, -1(a0)
2251: jr ra
226 move a2, zero
227
228small_memset:
229 beqz a2, 2f
230 addu t1, a0, a2
231
2321: addiu a0, 1 /* fill bytewise */
233 bne t1, a0, 1b
234 sb a1, -1(a0)
235
2362: jr ra /* done */
237 move a2, zero
238
239 .set reorder
diff --git a/firmware/asm/sh/memcpy.S b/firmware/asm/sh/memcpy.S
new file mode 100644
index 0000000000..e23a579b05
--- /dev/null
+++ b/firmware/asm/sh/memcpy.S
@@ -0,0 +1,219 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2004-2005 by Jens Arnold
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#include "config.h"
22
23 .section .icode,"ax",@progbits
24
25 .align 2
26 .global _memcpy
27 .global ___memcpy_fwd_entry
28 .type _memcpy,@function
29
30/* Copies <length> bytes of data in memory from <source> to <dest>
31 * This version is optimized for speed
32 *
33 * arguments:
34 * r4 - destination address
35 * r5 - source address
36 * r6 - length
37 *
38 * return value:
39 * r0 - destination address (like ANSI version)
40 *
41 * register usage:
42 * r0 - data / scratch
43 * r1 - 2nd data / scratch
44 * r2 - scratch
45 * r3 - first long bound / adjusted end address (only if >= 11 bytes)
46 * r4 - current dest address
47 * r5 - current source address
48 * r6 - source end address
49 * r7 - stored dest start address
50 *
51 * The instruction order is devised in a way to utilize the pipelining
52 * of the SH1 to the max. The routine also tries to utilize fast page mode.
53 */
54
55_memcpy:
56 mov r4,r7 /* store dest for returning */
57___memcpy_fwd_entry:
58 add #-8,r4 /* offset for early increment (max. 2 longs) */
59 mov #11,r0
60 cmp/hs r0,r6 /* at least 11 bytes to copy? (ensures 2 aligned longs) */
61 add r5,r6 /* r6 = source_end */
62 bf .start_b2 /* no: jump directly to byte loop */
63
64 mov #3,r0
65 neg r5,r3
66 and r0,r3 /* r3 = (4 - align_offset) % 4 */
67 tst r3,r3 /* already aligned? */
68 bt .end_b1 /* yes: skip leading byte loop */
69
70 add r5,r3 /* r3 = first source long bound */
71
72 /* leading byte loop: copies 0..3 bytes */
73.loop_b1:
74 mov.b @r5+,r0 /* load byte & increment source addr */
75 add #1,r4 /* increment dest addr */
76 mov.b r0,@(7,r4) /* store byte */
77 cmp/hi r5,r3 /* runs r5 up to first long bound */
78 bt .loop_b1
79 /* now r5 is always at a long boundary */
80 /* -> memory reading is done in longs for all dest alignments */
81
82 /* selector for main copy loop */
83.end_b1:
84 mov #3,r1
85 and r4,r1 /* r1 = dest alignment offset */
86 mova .jmptab,r0
87 mov.b @(r0,r1),r1 /* select appropriate main loop */
88 add r0,r1
89 mov r6,r3 /* move end address to r3 */
90 jmp @r1 /* and jump to it */
91 add #-7,r3 /* adjust end addr for main loops doing 2 longs/pass */
92
93 /** main loops, copying 2 longs per pass to profit from fast page mode **/
94
95 /* long aligned destination (fastest) */
96 .align 2
97.loop_do0:
98 mov.l @r5+,r1 /* load first long & increment source addr */
99 add #16,r4 /* increment dest addr & account for decrementing stores */
100 mov.l @r5+,r0 /* load second long & increment source addr */
101 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */
102 mov.l r0,@-r4 /* store second long */
103 mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */
104 bt .loop_do0
105
106 add #4,r3 /* readjust end address */
107 cmp/hi r5,r3 /* one long left? */
108 bf .start_b2 /* no, jump to trailing byte loop */
109
110 mov.l @r5+,r0 /* load last long & increment source addr */
111 add #4,r4 /* increment dest addr */
112 bra .start_b2 /* jump to trailing byte loop */
113 mov.l r0,@(4,r4) /* store last long */
114
115 /* word aligned destination (long + 2) */
116 .align 2
117.loop_do2:
118 mov.l @r5+,r1 /* load first long & increment source addr */
119 add #16,r4 /* increment dest addr */
120 mov.l @r5+,r0 /* load second long & increment source addr */
121 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */
122 mov.w r0,@-r4 /* store low word of second long */
123 xtrct r1,r0 /* extract low word of first long & high word of second long */
124 mov.l r0,@-r4 /* and store as long */
125 swap.w r1,r0 /* get high word of first long */
126 mov.w r0,@-r4 /* and store it */
127 bt .loop_do2
128
129 add #4,r3 /* readjust end address */
130 cmp/hi r5,r3 /* one long left? */
131 bf .start_b2 /* no, jump to trailing byte loop */
132
133 mov.l @r5+,r0 /* load last long & increment source addr */
134 add #4,r4 /* increment dest addr */
135 mov.w r0,@(6,r4) /* store low word */
136 shlr16 r0 /* get high word */
137 bra .start_b2 /* jump to trailing byte loop */
138 mov.w r0,@(4,r4) /* and store it */
139
140 /* jumptable for loop selector */
141 .align 2
142.jmptab:
143 .byte .loop_do0 - .jmptab /* placed in the middle because the SH1 */
144 .byte .loop_do1 - .jmptab /* loads bytes sign-extended. Otherwise */
145 .byte .loop_do2 - .jmptab /* the last loop would be out of reach */
146 .byte .loop_do3 - .jmptab /* of the offset range. */
147
148 /* byte aligned destination (long + 1) */
149 .align 2
150.loop_do1:
151 mov.l @r5+,r1 /* load first long & increment source addr */
152 add #16,r4 /* increment dest addr */
153 mov.l @r5+,r0 /* load second long & increment source addr */
154 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */
155 mov.b r0,@-r4 /* store low byte of second long */
156 shlr8 r0 /* get upper 3 bytes */
157 mov r1,r2 /* copy first long */
158 shll16 r2 /* move low byte of first long all the way up, .. */
159 shll8 r2
160 or r2,r0 /* ..combine with the 3 bytes of second long.. */
161 mov.l r0,@-r4 /* ..and store as long */
162 shlr8 r1 /* get middle 2 bytes */
163 mov.w r1,@-r4 /* store as word */
164 shlr16 r1 /* get upper byte */
165 mov.b r1,@-r4 /* and store */
166 bt .loop_do1
167
168 add #4,r3 /* readjust end address */
169.last_do13:
170 cmp/hi r5,r3 /* one long left? */
171 bf .start_b2 /* no, jump to trailing byte loop */
172
173 mov.l @r5+,r0 /* load last long & increment source addr */
174 add #12,r4 /* increment dest addr */
175 mov.b r0,@-r4 /* store low byte */
176 shlr8 r0 /* get middle 2 bytes */
177 mov.w r0,@-r4 /* store as word */
178 shlr16 r0 /* get upper byte */
179 mov.b r0,@-r4 /* and store */
180 bra .start_b2 /* jump to trailing byte loop */
181 add #-4,r4 /* readjust destination */
182
183 /* byte aligned destination (long + 3) */
184 .align 2
185.loop_do3:
186 mov.l @r5+,r1 /* load first long & increment source addr */
187 add #16,r4 /* increment dest addr */
188 mov.l @r5+,r0 /* load second long & increment source addr */
189 mov r1,r2 /* copy first long */
190 mov.b r0,@-r4 /* store low byte of second long */
191 shlr8 r0 /* get middle 2 bytes */
192 mov.w r0,@-r4 /* store as word */
193 shlr16 r0 /* get upper byte */
194 shll8 r2 /* move lower 3 bytes of first long one up.. */
195 or r2,r0 /* ..combine with the 1 byte of second long.. */
196 mov.l r0,@-r4 /* ..and store as long */
197 shlr16 r1 /* get upper byte of first long.. */
198 shlr8 r1
199 cmp/hi r5,r3 /* runs r5 up to last or second last long bound */
200 mov.b r1,@-r4 /* ..and store */
201 bt .loop_do3
202
203 bra .last_do13 /* handle last longword: reuse routine for (long + 1) */
204 add #4,r3 /* readjust end address */
205
206 /* trailing byte loop: copies 0..3 bytes (or all for < 11 in total) */
207 .align 2
208.loop_b2:
209 mov.b @r5+,r0 /* load byte & increment source addr */
210 add #1,r4 /* increment dest addr */
211 mov.b r0,@(7,r4) /* store byte */
212.start_b2:
213 cmp/hi r5,r6 /* runs r5 up to end address */
214 bt .loop_b2
215
216 rts
217 mov r7,r0 /* return dest start address */
218.end:
219 .size _memcpy,.end-_memcpy
diff --git a/firmware/asm/sh/memmove.S b/firmware/asm/sh/memmove.S
new file mode 100644
index 0000000000..d5a7160043
--- /dev/null
+++ b/firmware/asm/sh/memmove.S
@@ -0,0 +1,222 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 by Jens Arnold
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#include "config.h"
22
23 .section .icode,"ax",@progbits
24
25 .align 2
26 .global _memmove
27 .type _memmove,@function
28
29/* Moves <length> bytes of data in memory from <source> to <dest>
30 * Regions may overlap.
31 * This version is optimized for speed, and needs the corresponding memcpy
32 * implementation for the forward copy branch.
33 *
34 * arguments:
35 * r4 - destination address
36 * r5 - source address
37 * r6 - length
38 *
39 * return value:
40 * r0 - destination address (like ANSI version)
41 *
42 * register usage:
43 * r0 - data / scratch
44 * r1 - 2nd data / scratch
45 * r2 - scratch
46 * r3 - last long bound / adjusted start address (only if >= 11 bytes)
47 * r4 - current dest address
48 * r5 - source start address
49 * r6 - current source address
50 *
51 * The instruction order is devised in a way to utilize the pipelining
52 * of the SH1 to the max. The routine also tries to utilize fast page mode.
53 */
54
55_memmove:
56 cmp/hi r4,r5 /* source > destination */
57 bf .backward /* no: backward copy */
58 mov.l .memcpy_fwd,r0
59 jmp @r0
60 mov r4,r7 /* store dest for returning */
61
62 .align 2
63.memcpy_fwd:
64 .long ___memcpy_fwd_entry
65
66.backward:
67 add r6,r4 /* r4 = destination end */
68 mov #11,r0
69 cmp/hs r0,r6 /* at least 11 bytes to copy? (ensures 2 aligned longs) */
70 add #-8,r5 /* adjust for late decrement (max. 2 longs) */
71 add r5,r6 /* r6 = source end - 8 */
72 bf .start_b2r /* no: jump directly to byte loop */
73
74 mov #-4,r3 /* r3 = 0xfffffffc */
75 and r6,r3 /* r3 = last source long bound */
76 cmp/hi r3,r6 /* already aligned? */
77 bf .end_b1r /* yes: skip leading byte loop */
78
79.loop_b1r:
80 mov.b @(7,r6),r0 /* load byte */
81 add #-1,r6 /* decrement source addr */
82 mov.b r0,@-r4 /* store byte */
83 cmp/hi r3,r6 /* runs r6 down to last long bound */
84 bt .loop_b1r
85
86.end_b1r:
87 mov #3,r1
88 and r4,r1 /* r1 = dest alignment offset */
89 mova .jmptab_r,r0
90 mov.b @(r0,r1),r1 /* select appropriate main loop.. */
91 add r0,r1
92 mov r5,r3 /* copy start adress to r3 */
93 jmp @r1 /* ..and jump to it */
94 add #7,r3 /* adjust end addr for main loops doing 2 longs/pass */
95
96 /** main loops, copying 2 longs per pass to profit from fast page mode **/
97
98 /* long aligned destination (fastest) */
99 .align 2
100.loop_do0r:
101 mov.l @r6,r1 /* load first long */
102 add #-8,r6 /* decrement source addr */
103 mov.l @(12,r6),r0 /* load second long */
104 cmp/hi r3,r6 /* runs r6 down to first or second long bound */
105 mov.l r0,@-r4 /* store second long */
106 mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */
107 bt .loop_do0r
108
109 add #-4,r3 /* readjust end address */
110 cmp/hi r3,r6 /* first long left? */
111 bf .start_b2r /* no, jump to trailing byte loop */
112
113 mov.l @(4,r6),r0 /* load first long */
114 add #-4,r6 /* decrement source addr */
115 bra .start_b2r /* jump to trailing byte loop */
116 mov.l r0,@-r4 /* store first long */
117
118 /* word aligned destination (long + 2) */
119 .align 2
120.loop_do2r:
121 mov.l @r6,r1 /* load first long */
122 add #-8,r6 /* decrement source addr */
123 mov.l @(12,r6),r0 /* load second long */
124 cmp/hi r3,r6 /* runs r6 down to first or second long bound */
125 mov.w r0,@-r4 /* store low word of second long */
126 xtrct r1,r0 /* extract low word of first long & high word of second long */
127 mov.l r0,@-r4 /* and store as long */
128 shlr16 r1 /* get high word of first long */
129 mov.w r1,@-r4 /* and store it */
130 bt .loop_do2r
131
132 add #-4,r3 /* readjust end address */
133 cmp/hi r3,r6 /* first long left? */
134 bf .start_b2r /* no, jump to trailing byte loop */
135
136 mov.l @(4,r6),r0 /* load first long & decrement source addr */
137 add #-4,r6 /* decrement source addr */
138 mov.w r0,@-r4 /* store low word */
139 shlr16 r0 /* get high word */
140 bra .start_b2r /* jump to trailing byte loop */
141 mov.w r0,@-r4 /* and store it */
142
143 /* jumptable for loop selector */
144 .align 2
145.jmptab_r:
146 .byte .loop_do0r - .jmptab_r /* placed in the middle because the SH1 */
147 .byte .loop_do1r - .jmptab_r /* loads bytes sign-extended. Otherwise */
148 .byte .loop_do2r - .jmptab_r /* the last loop would be out of reach */
149 .byte .loop_do3r - .jmptab_r /* of the offset range. */
150
151 /* byte aligned destination (long + 1) */
152 .align 2
153.loop_do1r:
154 mov.l @r6,r1 /* load first long */
155 add #-8,r6 /* decrement source addr */
156 mov.l @(12,r6),r0 /* load second long */
157 cmp/hi r3,r6 /* runs r6 down to first or second long bound */
158 mov.b r0,@-r4 /* store low byte of second long */
159 shlr8 r0 /* get upper 3 bytes */
160 mov r1,r2 /* copy first long */
161 shll16 r2 /* move low byte of first long all the way up, .. */
162 shll8 r2
163 or r2,r0 /* ..combine with the 3 bytes of second long.. */
164 mov.l r0,@-r4 /* ..and store as long */
165 shlr8 r1 /* get middle 2 bytes */
166 mov.w r1,@-r4 /* store as word */
167 shlr16 r1 /* get upper byte */
168 mov.b r1,@-r4 /* and store */
169 bt .loop_do1r
170
171 add #-4,r3 /* readjust end address */
172.last_do13r:
173 cmp/hi r3,r6 /* first long left? */
174 bf .start_b2r /* no, jump to trailing byte loop */
175
176 nop /* alignment */
177 mov.l @(4,r6),r0 /* load first long */
178 add #-4,r6 /* decrement source addr */
179 mov.b r0,@-r4 /* store low byte */
180 shlr8 r0 /* get middle 2 bytes */
181 mov.w r0,@-r4 /* store as word */
182 shlr16 r0 /* get upper byte */
183 bra .start_b2r /* jump to trailing byte loop */
184 mov.b r0,@-r4 /* and store */
185
186 /* byte aligned destination (long + 3) */
187 .align 2
188.loop_do3r:
189 mov.l @r6,r1 /* load first long */
190 add #-8,r6 /* decrement source addr */
191 mov.l @(12,r6),r0 /* load second long */
192 mov r1,r2 /* copy first long */
193 mov.b r0,@-r4 /* store low byte of second long */
194 shlr8 r0 /* get middle 2 bytes */
195 mov.w r0,@-r4 /* store as word */
196 shlr16 r0 /* get upper byte */
197 shll8 r2 /* move lower 3 bytes of first long one up.. */
198 or r2,r0 /* ..combine with the 1 byte of second long.. */
199 mov.l r0,@-r4 /* ..and store as long */
200 shlr16 r1 /* get upper byte of first long */
201 shlr8 r1
202 cmp/hi r3,r6 /* runs r6 down to first or second long bound */
203 mov.b r1,@-r4 /* ..and store */
204 bt .loop_do3r
205
206 bra .last_do13r /* handle first longword: reuse routine for (long + 1) */
207 add #-4,r3 /* readjust end address */
208
209 /* trailing byte loop: copies 0..3 bytes (or all for < 11 in total) */
210 .align 2
211.loop_b2r:
212 mov.b @(7,r6),r0 /* load byte */
213 add #-1,r6 /* decrement source addr */
214 mov.b r0,@-r4 /* store byte */
215.start_b2r:
216 cmp/hi r5,r6 /* runs r6 down to start address */
217 bt .loop_b2r
218
219 rts
220 mov r4,r0 /* return dest start address */
221.end:
222 .size _memmove,.end-_memmove
diff --git a/firmware/asm/sh/memset.S b/firmware/asm/sh/memset.S
new file mode 100644
index 0000000000..8cae1ea112
--- /dev/null
+++ b/firmware/asm/sh/memset.S
@@ -0,0 +1,109 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2004 by Jens Arnold
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#include "config.h"
22
23 .section .icode,"ax",@progbits
24
25 .align 2
26 .global _memset
27 .type _memset,@function
28
29/* Fills a memory region with specified byte value
30 * This version is optimized for speed
31 *
32 * arguments:
33 * r4 - start address
34 * r5 - data
35 * r6 - length
36 *
37 * return value:
38 * r0 - start address (like ANSI version)
39 *
40 * register usage:
41 * r0 - temporary
42 * r1 - start address +11 for main loop
43 * r4 - start address
44 * r5 - data (spread to all 4 bytes when using long stores)
45 * r6 - current address (runs down from end to start)
46 *
47 * The instruction order below is devised in a way to utilize the pipelining
48 * of the SH1 to the max. The routine fills memory from end to start in
49 * order to utilize the auto-decrementing store instructions.
50 */
51
52_memset:
53 neg r4,r0
54 and #3,r0 /* r0 = (4 - align_offset) % 4 */
55 add #4,r0
56 cmp/hs r0,r6 /* at least one aligned longword to fill? */
57 add r4,r6 /* r6 = end_address */
58 bf .no_longs /* no, jump directly to byte loop */
59
60 extu.b r5,r5 /* start: spread data to all 4 bytes */
61 swap.b r5,r0
62 or r0,r5 /* data now in 2 lower bytes of r5 */
63 swap.w r5,r0
64 or r0,r5 /* data now in all 4 bytes of r5 */
65
66 mov r6,r0
67 tst #3,r0 /* r0 already long aligned? */
68 bt .end_b1 /* yes: skip loop */
69
70 /* leading byte loop: sets 0..3 bytes */
71.loop_b1:
72 mov.b r5,@-r0 /* store byte */
73 tst #3,r0 /* r0 long aligned? */
74 bf .loop_b1 /* runs r0 down until long aligned */
75
76 mov r0,r6 /* r6 = last long bound */
77 nop /* keep alignment */
78
79.end_b1:
80 mov r4,r1 /* r1 = start_address... */
81 add #11,r1 /* ... + 11, combined for rounding and offset */
82 xor r1,r0
83 tst #4,r0 /* bit 2 tells whether an even or odd number of */
84 bf .loop_odd /* longwords to set */
85
86 /* main loop: set 2 longs per pass */
87.loop_2l:
88 mov.l r5,@-r6 /* store first long */
89.loop_odd:
90 cmp/hi r1,r6 /* runs r6 down to first long bound */
91 mov.l r5,@-r6 /* store second long */
92 bt .loop_2l
93
94.no_longs:
95 cmp/hi r4,r6 /* any bytes left? */
96 bf .end_b2 /* no: skip loop */
97
98 /* trailing byte loop */
99.loop_b2:
100 mov.b r5,@-r6 /* store byte */
101 cmp/hi r4,r6 /* runs r6 down to the start address */
102 bt .loop_b2
103
104.end_b2:
105 rts
106 mov r4,r0 /* return start address */
107
108.end:
109 .size _memset,.end-_memset
diff --git a/firmware/asm/sh/strlen.S b/firmware/asm/sh/strlen.S
new file mode 100644
index 0000000000..e7169e25db
--- /dev/null
+++ b/firmware/asm/sh/strlen.S
@@ -0,0 +1,96 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2005 by Jens Arnold
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#include "config.h"
22
23 .section .icode,"ax",@progbits
24
25 .align 2
26 .global _strlen
27 .type _strlen,@function
28
29/* Works out the length of a string
30 * This version is optimized for speed
31 *
32 * arguments:
33 * r4 - start address
34 *
35 * return value:
36 * r0 - string length
37 *
38 * register usage:
39 * r0 - current address
40 * r1 - current value (byte/long)
41 * r2 - mask for alignment / zero (for cmp/str)
42 * r4 - start address
43 *
44 */
45
46_strlen:
47 mov r4,r0 /* r0 = start address */
48 tst #3,r0 /* long aligned? */
49 bt .start_l /* yes, jump directly to the longword loop */
50
51 /* not long aligned: check the first 3 bytes */
52 mov.b @r0+,r1 /* fetch first byte */
53 tst r1,r1 /* byte == 0 ? */
54 bt .hitzero /* yes, string end found */
55 mov.b @r0+,r1 /* fetch second byte */
56 mov #3,r2 /* prepare mask: r2 = 0..00000011b */
57 tst r1,r1 /* byte == 0 ? */
58 bt .hitzero /* yes, string end found */
59 mov.b @r0+,r1 /* fetch third byte */
60 not r2,r2 /* prepare mask: r2 = 1..11111100b */
61 tst r1,r1 /* byte == 0 ? */
62 bt .hitzero /* yes, string end found */
63
64 /* not yet found, fall through into longword loop */
65 and r2,r0 /* align down to long bound */
66
67 /* main loop: check longwords */
68.start_l:
69 mov #0,r2 /* zero longword for cmp/str */
70.loop_l:
71 mov.l @r0+,r1 /* fetch long word */
72 cmp/str r1,r2 /* any zero byte within? */
73 bf .loop_l /* no, loop */
74 add #-4,r0 /* set address back to start of this longword */
75
76 /* the last longword contains the string end: figure out the byte */
77 mov.b @r0+,r1 /* fetch first byte */
78 tst r1,r1 /* byte == 0 ? */
79 bt .hitzero /* yes, string end found */
80 mov.b @r0+,r1 /* fetch second byte */
81 tst r1,r1 /* byte == 0 ? */
82 bt .hitzero /* yes, string end found */
83 mov.b @r0+,r1 /* fetch third byte */
84 tst r1,r1 /* byte == 0 ? */
85 bt .hitzero /* yes, string end found */
86 rts /* must be the fourth byte */
87 sub r4,r0 /* len = string_end - string_start */
88
89.hitzero:
90 add #-1,r0 /* undo address increment */
91 rts
92 sub r4,r0 /* len = string_end - string_start */
93
94.end:
95 .size _strlen,.end-_strlen
96
diff --git a/firmware/asm/strlen.c b/firmware/asm/strlen.c
new file mode 100644
index 0000000000..649df6764b
--- /dev/null
+++ b/firmware/asm/strlen.c
@@ -0,0 +1,93 @@
1/*
2FUNCTION
3 <<strlen>>---character string length
4
5INDEX
6 strlen
7
8ANSI_SYNOPSIS
9 #include <string.h>
10 size_t strlen(const char *<[str]>);
11
12TRAD_SYNOPSIS
13 #include <string.h>
14 size_t strlen(<[str]>)
15 char *<[src]>;
16
17DESCRIPTION
18 The <<strlen>> function works out the length of the string
19 starting at <<*<[str]>>> by counting chararacters until it
20 reaches a <<NULL>> character.
21
22RETURNS
23 <<strlen>> returns the character count.
24
25PORTABILITY
26<<strlen>> is ANSI C.
27
28<<strlen>> requires no supporting OS subroutines.
29
30QUICKREF
31 strlen ansi pure
32*/
33
34#include "config.h"
35#include "_ansi.h"
36#include <string.h>
37#include <limits.h>
38
39#define LBLOCKSIZE (sizeof (long))
40#define UNALIGNED(X) ((long)X & (LBLOCKSIZE - 1))
41
42#if LONG_MAX == 2147483647L
43#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
44#else
45#if LONG_MAX == 9223372036854775807L
46/* Nonzero if X (a long int) contains a NULL byte. */
47#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
48#else
49#error long int is not a 32bit or 64bit type.
50#endif
51#endif
52
53#ifndef DETECTNULL
54#error long int is not a 32bit or 64bit byte
55#endif
56
57size_t
58_DEFUN (strlen, (str),
59 _CONST char *str) ICODE_ATTR;
60
61size_t
62_DEFUN (strlen, (str),
63 _CONST char *str)
64{
65#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
66 _CONST char *start = str;
67
68 while (*str)
69 str++;
70
71 return str - start;
72#else
73 _CONST char *start = str;
74 unsigned long *aligned_addr;
75
76 if (!UNALIGNED (str))
77 {
78 /* If the string is word-aligned, we can check for the presence of
79 a null in each word-sized block. */
80 aligned_addr = (unsigned long*)str;
81 while (!DETECTNULL (*aligned_addr))
82 aligned_addr++;
83
84 /* Once a null is detected, we check each byte in that block for a
85 precise position of the null. */
86 str = (char*)aligned_addr;
87 }
88
89 while (*str)
90 str++;
91 return str - start;
92#endif /* not PREFER_SIZE_OVER_SPEED */
93}