summaryrefslogtreecommitdiff
path: root/firmware
diff options
context:
space:
mode:
Diffstat (limited to 'firmware')
-rw-r--r--firmware/common/memset_a.S55
1 files changed, 27 insertions, 28 deletions
diff --git a/firmware/common/memset_a.S b/firmware/common/memset_a.S
index bce8936089..e555683474 100644
--- a/firmware/common/memset_a.S
+++ b/firmware/common/memset_a.S
@@ -39,7 +39,7 @@
39 * register usage: 39 * register usage:
40 * r0 - temporary 40 * r0 - temporary
41 * r1 - bit mask for rounding to long bounds 41 * r1 - bit mask for rounding to long bounds
42 * r2 - last / first long bound (only if >= 12 bytes) 42 * r2 - start address +11 for main loop
43 * r4 - start address 43 * r4 - start address
44 * r5 - data (spread to all 4 bytes if >= 12 bytes) 44 * r5 - data (spread to all 4 bytes if >= 12 bytes)
45 * r6 - current address (runs down from end to start) 45 * r6 - current address (runs down from end to start)
@@ -50,58 +50,57 @@
50 */ 50 */
51 51
52_memset: 52_memset:
53 neg r4,r0
54 and #3,r0 /* r0 = (4 - align_offset) % 4 */
55 add #4,r0
56 cmp/hs r0,r6 /* at least one aligned longword to fill? */
53 add r4,r6 /* r6 = end_address */ 57 add r4,r6 /* r6 = end_address */
54 58 bf .no_longs /* no, jump directly to byte loop */
55 mov r6,r0
56 add #-12,r0 /* r0 = r6 - 12; don't go below 12 here! */
57 cmp/hs r4,r0 /* >= 12 bytes to fill? */
58 bf .start_b2 /* no, jump directly to byte loop */
59 59
60 extu.b r5,r5 /* start: spread data to all 4 bytes */ 60 extu.b r5,r5 /* start: spread data to all 4 bytes */
61 swap.b r5,r0 61 swap.b r5,r0
62 or r0,r5 /* data now in 2 lower bytes of r5 */ 62 or r0,r5 /* data now in 2 lower bytes of r5 */
63 swap.w r5,r0 63 swap.w r5,r0
64 or r0,r5 /* data now in all 4 bytes of r5 */ 64 or r0,r5 /* data now in all 4 bytes of r5 */
65 65
66 mov #-4,r1 /* r1 = 0xFFFFFFFC */ 66 mov #-4,r1 /* r1 = 0xFFFFFFFC */
67 67 mov r6,r0
68 mov r6,r2 68 and r1,r0 /* r0 = last long bound */
69 bra .start_b1 69 cmp/hi r0,r6 /* any leading byte? */
70 and r1,r2 /* r2 = last long bound */ 70 bf .end_b1 /* no: skip loop */
71 71
72 /* leading byte loop: sets 0..3 bytes */ 72 /* leading byte loop: sets 0..3 bytes */
73.loop_b1: 73.loop_b1:
74 mov.b r5,@-r6 /* store byte */ 74 mov.b r5,@-r6 /* store byte */
75.start_b1: 75 cmp/hi r0,r6
76 cmp/hi r2,r6 /* runs r6 down to last long bound */ 76 bt .loop_b1 /* runs r6 down to last long bound */
77 bt .loop_b1
78 77
79 mov r4,r2 78.end_b1:
80 add #11,r2 /* combined for rounding and offset */ 79 mov r4,r2 /* r2 = start_address... */
81 and r1,r2 /* r2 = first long bound + 8 */ 80 add #11,r2 /* ... + 11, combined for rounding and offset */
81 xor r2,r0
82 tst #4,r0 /* bit 2 tells whether an even or odd number of */
83 bf .loop_odd /* longwords to set */
82 84
83 /* main loop: set 2 longs per pass */ 85 /* main loop: set 2 longs per pass */
84.loop2_l: 86.loop_2l:
85 mov.l r5,@-r6 /* store first long */ 87 mov.l r5,@-r6 /* store first long */
86 cmp/hi r2,r6 /* runs r6 down to first or second long bound */ 88.loop_odd:
89 cmp/hi r2,r6 /* runs r6 down to first long bound */
87 mov.l r5,@-r6 /* store second long */ 90 mov.l r5,@-r6 /* store second long */
88 bt .loop2_l 91 bt .loop_2l
89
90 add #-8,r2 /* correct offset */
91 cmp/hi r2,r6 /* 1 long left? */
92 bf .start_b2 /* no, jump to trailing byte loop */
93 92
94 bra .start_b2 /* jump to trailing byte loop */ 93.no_longs:
95 mov.l r5,@-r6 /* store last long */ 94 cmp/hi r4,r6 /* any bytes left? */
95 bf .end_b2 /* no: skip loop */
96 96
97 /* trailing byte loop */ 97 /* trailing byte loop */
98 .align 2
99.loop_b2: 98.loop_b2:
100 mov.b r5,@-r6 /* store byte */ 99 mov.b r5,@-r6 /* store byte */
101.start_b2:
102 cmp/hi r4,r6 /* runs r6 down to the start address */ 100 cmp/hi r4,r6 /* runs r6 down to the start address */
103 bt .loop_b2 101 bt .loop_b2
104 102
103.end_b2:
105 rts 104 rts
106 mov r4,r0 /* return start address */ 105 mov r4,r0 /* return start address */
107 106