summaryrefslogtreecommitdiff
path: root/firmware/common/memset_a.S
diff options
context:
space:
mode:
Diffstat (limited to 'firmware/common/memset_a.S')
-rw-r--r--firmware/common/memset_a.S132
1 files changed, 132 insertions, 0 deletions
diff --git a/firmware/common/memset_a.S b/firmware/common/memset_a.S
new file mode 100644
index 0000000000..bce8936089
--- /dev/null
+++ b/firmware/common/memset_a.S
@@ -0,0 +1,132 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2004 by Jens Arnold
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19#include "config.h"
20
21 .section .icode,"ax",@progbits
22
23 .align 2
24#if CONFIG_CPU == SH7034
25 .global _memset
26 .type _memset,@function
27
28/* Fills a memory region with specified byte value
29 * This version is optimized for speed
30 *
31 * arguments:
32 * r4 - start address
33 * r5 - data
34 * r6 - length
35 *
36 * return value:
37 * r0 - start address (like ANSI version)
38 *
39 * register usage:
40 * r0 - temporary
41 * r1 - bit mask for rounding to long bounds
42 * r2 - last / first long bound (only if >= 12 bytes)
43 * r4 - start address
44 * r5 - data (spread to all 4 bytes if >= 12 bytes)
45 * r6 - current address (runs down from end to start)
46 *
47 * The instruction order below is devised in a way to utilize the pipelining
48 * of the SH1 to the max. The routine fills memory from end to start in
49 * order to utilize the auto-decrementing store instructions.
50 */
51
52_memset:
53 add r4,r6 /* r6 = end_address */
54
55 mov r6,r0
56 add #-12,r0 /* r0 = r6 - 12; don't go below 12 here! */
57 cmp/hs r4,r0 /* >= 12 bytes to fill? */
58 bf .start_b2 /* no, jump directly to byte loop */
59
60 extu.b r5,r5 /* start: spread data to all 4 bytes */
61 swap.b r5,r0
62 or r0,r5 /* data now in 2 lower bytes of r5 */
63 swap.w r5,r0
64 or r0,r5 /* data now in all 4 bytes of r5 */
65
66 mov #-4,r1 /* r1 = 0xFFFFFFFC */
67
68 mov r6,r2
69 bra .start_b1
70 and r1,r2 /* r2 = last long bound */
71
72 /* leading byte loop: sets 0..3 bytes */
73.loop_b1:
74 mov.b r5,@-r6 /* store byte */
75.start_b1:
76 cmp/hi r2,r6 /* runs r6 down to last long bound */
77 bt .loop_b1
78
79 mov r4,r2
80 add #11,r2 /* combined for rounding and offset */
81 and r1,r2 /* r2 = first long bound + 8 */
82
83 /* main loop: set 2 longs per pass */
84.loop2_l:
85 mov.l r5,@-r6 /* store first long */
86 cmp/hi r2,r6 /* runs r6 down to first or second long bound */
87 mov.l r5,@-r6 /* store second long */
88 bt .loop2_l
89
90 add #-8,r2 /* correct offset */
91 cmp/hi r2,r6 /* 1 long left? */
92 bf .start_b2 /* no, jump to trailing byte loop */
93
94 bra .start_b2 /* jump to trailing byte loop */
95 mov.l r5,@-r6 /* store last long */
96
97 /* trailing byte loop */
98 .align 2
99.loop_b2:
100 mov.b r5,@-r6 /* store byte */
101.start_b2:
102 cmp/hi r4,r6 /* runs r6 down to the start address */
103 bt .loop_b2
104
105 rts
106 mov r4,r0 /* return start address */
107
108.end:
109 .size _memset,.end-_memset
110#elif CONFIG_CPU == MCF5249
111 .global memset
112 .type memset,@function
113
114/* Fills a memory region with specified byte value
115 * This version is not optimized at all
116 */
117memset:
118 move.l (4,%sp),%a0 /* Start address */
119 move.l (8,%sp),%d0 /* Value */
120 move.l (12,%sp),%d1 /* Length */
121 lea.l (%d1,%a0),%a1 /* a1 = a0+d1 */
122
123 bra.b .byteloopend
124
125.byteloop:
126 move.b %d0,(%a0)+
127.byteloopend:
128 cmp.l %a0,%a1
129 bne.b .byteloop
130
131 rts
132#endif