summaryrefslogtreecommitdiff
path: root/firmware/common/memset.S
diff options
context:
space:
mode:
Diffstat (limited to 'firmware/common/memset.S')
-rw-r--r--firmware/common/memset.S108
1 files changed, 108 insertions, 0 deletions
diff --git a/firmware/common/memset.S b/firmware/common/memset.S
new file mode 100644
index 0000000000..038915c475
--- /dev/null
+++ b/firmware/common/memset.S
@@ -0,0 +1,108 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2004 by Jens Arnold
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20 .section .icode,"ax",@progbits
21
22 .align 2
23 .global _memset
24 .type _memset,@function
25
26/* Fills a memory region with specified byte value
27 * This version is optimized for speed
28 *
29 * arguments:
30 * r4 - start address
31 * r5 - data
32 * r6 - length
33 *
34 * return value:
35 * r0 - start address (like ANSI version)
36 *
37 * register usage:
38 * r0 - temporary
39 * r1 - bit mask for rounding to long bounds
40 * r2 - last / first long bound (only if >= 12 bytes)
41 * r4 - start address
42 * r5 - data (spread to all 4 bytes if >= 12 bytes)
43 * r6 - current address (runs down from end to start)
44 *
45 * The instruction order below is devised in a way to utilize the pipelining
46 * of the SH1 to the max. The routine fills memory from end to start in
47 * order to utilize the auto-decrementing store instructions.
48 */
49
50_memset:
51 add r4,r6 /* r6 = end_address */
52
53 mov r6,r0
54 add #-12,r0 /* r0 = r6 - 12; don't go below 12 here! */
55 cmp/hs r4,r0 /* >= 12 bytes to fill? */
56 bf .start_b2 /* no, jump directly to byte loop */
57
58 extu.b r5,r5 /* start: spread data to all 4 bytes */
59 swap.b r5,r0
60 or r0,r5 /* data now in 2 lower bytes of r5 */
61 swap.w r5,r0
62 or r0,r5 /* data now in all 4 bytes of r5 */
63
64 mov #-4,r1 /* r1 = 0xFFFFFFFC */
65
66 mov r6,r2
67 bra .start_b1
68 and r1,r2 /* r2 = last long bound */
69
70 /* leading byte loop: sets 0..3 bytes */
71.loop_b1:
72 mov.b r5,@-r6 /* store byte */
73.start_b1:
74 cmp/hi r2,r6 /* runs r6 down to last long bound */
75 bt .loop_b1
76
77 mov r4,r2
78 add #11,r2 /* combined for rounding and offset */
79 and r1,r2 /* r2 = first long bound + 8 */
80
81 /* main loop: set 2 longs per pass */
82.loop2_l:
83 mov.l r5,@-r6 /* store first long */
84 cmp/hi r2,r6 /* runs r6 down to first or second long bound */
85 mov.l r5,@-r6 /* store second long */
86 bt .loop2_l
87
88 add #-8,r2 /* correct offset */
89 cmp/hi r2,r6 /* 1 long left? */
90 bf .start_b2 /* no, jump to trailing byte loop */
91
92 bra .start_b2 /* jump to trailing byte loop */
93 mov.l r5,@-r6 /* store last long */
94
95 /* trailing byte loop */
96 .align 2
97.loop_b2:
98 mov.b r5,@-r6 /* store byte */
99.start_b2:
100 cmp/hi r4,r6 /* runs r6 down to the start address */
101 bt .loop_b2
102
103 rts
104 mov r4,r0 /* return start address */
105
106.end:
107 .size _memset,.end-_memset
108