summaryrefslogtreecommitdiff
path: root/firmware/target/sh/memset-sh.S
diff options
context:
space:
mode:
Diffstat (limited to 'firmware/target/sh/memset-sh.S')
-rwxr-xr-xfirmware/target/sh/memset-sh.S107
1 files changed, 107 insertions, 0 deletions
diff --git a/firmware/target/sh/memset-sh.S b/firmware/target/sh/memset-sh.S
new file mode 100755
index 0000000000..9b96b93f27
--- /dev/null
+++ b/firmware/target/sh/memset-sh.S
@@ -0,0 +1,107 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2004 by Jens Arnold
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19#include "config.h"
20
21 .section .icode,"ax",@progbits
22
23 .align 2
24 .global _memset
25 .type _memset,@function
26
27/* Fills a memory region with specified byte value
28 * This version is optimized for speed
29 *
30 * arguments:
31 * r4 - start address
32 * r5 - data
33 * r6 - length
34 *
35 * return value:
36 * r0 - start address (like ANSI version)
37 *
38 * register usage:
39 * r0 - temporary
40 * r1 - start address +11 for main loop
41 * r4 - start address
42 * r5 - data (spread to all 4 bytes when using long stores)
43 * r6 - current address (runs down from end to start)
44 *
45 * The instruction order below is devised in a way to utilize the pipelining
46 * of the SH1 to the max. The routine fills memory from end to start in
47 * order to utilize the auto-decrementing store instructions.
48 */
49
50_memset:
51 neg r4,r0
52 and #3,r0 /* r0 = (4 - align_offset) % 4 */
53 add #4,r0
54 cmp/hs r0,r6 /* at least one aligned longword to fill? */
55 add r4,r6 /* r6 = end_address */
56 bf .no_longs /* no, jump directly to byte loop */
57
58 extu.b r5,r5 /* start: spread data to all 4 bytes */
59 swap.b r5,r0
60 or r0,r5 /* data now in 2 lower bytes of r5 */
61 swap.w r5,r0
62 or r0,r5 /* data now in all 4 bytes of r5 */
63
64 mov r6,r0
65 tst #3,r0 /* r0 already long aligned? */
66 bt .end_b1 /* yes: skip loop */
67
68 /* leading byte loop: sets 0..3 bytes */
69.loop_b1:
70 mov.b r5,@-r0 /* store byte */
71 tst #3,r0 /* r0 long aligned? */
72 bf .loop_b1 /* runs r0 down until long aligned */
73
74 mov r0,r6 /* r6 = last long bound */
75 nop /* keep alignment */
76
77.end_b1:
78 mov r4,r1 /* r1 = start_address... */
79 add #11,r1 /* ... + 11, combined for rounding and offset */
80 xor r1,r0
81 tst #4,r0 /* bit 2 tells whether an even or odd number of */
82 bf .loop_odd /* longwords to set */
83
84 /* main loop: set 2 longs per pass */
85.loop_2l:
86 mov.l r5,@-r6 /* store first long */
87.loop_odd:
88 cmp/hi r1,r6 /* runs r6 down to first long bound */
89 mov.l r5,@-r6 /* store second long */
90 bt .loop_2l
91
92.no_longs:
93 cmp/hi r4,r6 /* any bytes left? */
94 bf .end_b2 /* no: skip loop */
95
96 /* trailing byte loop */
97.loop_b2:
98 mov.b r5,@-r6 /* store byte */
99 cmp/hi r4,r6 /* runs r6 down to the start address */
100 bt .loop_b2
101
102.end_b2:
103 rts
104 mov r4,r0 /* return start address */
105
106.end:
107 .size _memset,.end-_memset