summaryrefslogtreecommitdiff
path: root/firmware/asm/sh/memset.S
diff options
context:
space:
mode:
Diffstat (limited to 'firmware/asm/sh/memset.S')
-rw-r--r--firmware/asm/sh/memset.S109
1 files changed, 109 insertions, 0 deletions
diff --git a/firmware/asm/sh/memset.S b/firmware/asm/sh/memset.S
new file mode 100644
index 0000000000..8cae1ea112
--- /dev/null
+++ b/firmware/asm/sh/memset.S
@@ -0,0 +1,109 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2004 by Jens Arnold
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#include "config.h"
22
23 .section .icode,"ax",@progbits
24
25 .align 2
26 .global _memset
27 .type _memset,@function
28
29/* Fills a memory region with specified byte value
30 * This version is optimized for speed
31 *
32 * arguments:
33 * r4 - start address
34 * r5 - data
35 * r6 - length
36 *
37 * return value:
38 * r0 - start address (like ANSI version)
39 *
40 * register usage:
41 * r0 - temporary
42 * r1 - start address +11 for main loop
43 * r4 - start address
44 * r5 - data (spread to all 4 bytes when using long stores)
45 * r6 - current address (runs down from end to start)
46 *
47 * The instruction order below is devised in a way to utilize the pipelining
48 * of the SH1 to the max. The routine fills memory from end to start in
49 * order to utilize the auto-decrementing store instructions.
50 */
51
52_memset:
53 neg r4,r0
54 and #3,r0 /* r0 = (4 - align_offset) % 4 */
55 add #4,r0
56 cmp/hs r0,r6 /* at least one aligned longword to fill? */
57 add r4,r6 /* r6 = end_address */
58 bf .no_longs /* no, jump directly to byte loop */
59
60 extu.b r5,r5 /* start: spread data to all 4 bytes */
61 swap.b r5,r0
62 or r0,r5 /* data now in 2 lower bytes of r5 */
63 swap.w r5,r0
64 or r0,r5 /* data now in all 4 bytes of r5 */
65
66 mov r6,r0
67 tst #3,r0 /* r0 already long aligned? */
68 bt .end_b1 /* yes: skip loop */
69
70 /* leading byte loop: sets 0..3 bytes */
71.loop_b1:
72 mov.b r5,@-r0 /* store byte */
73 tst #3,r0 /* r0 long aligned? */
74 bf .loop_b1 /* runs r0 down until long aligned */
75
76 mov r0,r6 /* r6 = last long bound */
77 nop /* keep alignment */
78
79.end_b1:
80 mov r4,r1 /* r1 = start_address... */
81 add #11,r1 /* ... + 11, combined for rounding and offset */
82 xor r1,r0
83 tst #4,r0 /* bit 2 tells whether an even or odd number of */
84 bf .loop_odd /* longwords to set */
85
86 /* main loop: set 2 longs per pass */
87.loop_2l:
88 mov.l r5,@-r6 /* store first long */
89.loop_odd:
90 cmp/hi r1,r6 /* runs r6 down to first long bound */
91 mov.l r5,@-r6 /* store second long */
92 bt .loop_2l
93
94.no_longs:
95 cmp/hi r4,r6 /* any bytes left? */
96 bf .end_b2 /* no: skip loop */
97
98 /* trailing byte loop */
99.loop_b2:
100 mov.b r5,@-r6 /* store byte */
101 cmp/hi r4,r6 /* runs r6 down to the start address */
102 bt .loop_b2
103
104.end_b2:
105 rts
106 mov r4,r0 /* return start address */
107
108.end:
109 .size _memset,.end-_memset