diff options
Diffstat (limited to 'firmware/asm')
-rw-r--r-- | firmware/asm/mips/memcpy.S | 22 | ||||
-rw-r--r-- | firmware/asm/sh/memcpy.S | 10 |
2 files changed, 16 insertions, 16 deletions
diff --git a/firmware/asm/mips/memcpy.S b/firmware/asm/mips/memcpy.S index 2e7f245c69..edbf5ac5eb 100644 --- a/firmware/asm/mips/memcpy.S +++ b/firmware/asm/mips/memcpy.S | |||
@@ -63,13 +63,13 @@ memcpy: | |||
63 | SWHI t0, 0(a0) | 63 | SWHI t0, 0(a0) |
64 | addu a0, t1 | 64 | addu a0, t1 |
65 | 65 | ||
66 | chk8w: | 66 | chk8w: |
67 | andi t0, a2, 0x1f # 32 or more bytes left? | 67 | andi t0, a2, 0x1f # 32 or more bytes left? |
68 | beq t0, a2, chk1w | 68 | beq t0, a2, chk1w |
69 | subu a3, a2, t0 # Yes | 69 | subu a3, a2, t0 # Yes |
70 | addu a3, a1 # a3 = end address of loop | 70 | addu a3, a1 # a3 = end address of loop |
71 | move a2, t0 # a2 = what will be left after loop | 71 | move a2, t0 # a2 = what will be left after loop |
72 | lop8w: | 72 | lop8w: |
73 | lw t0, 0(a1) # Loop taking 8 words at a time | 73 | lw t0, 0(a1) # Loop taking 8 words at a time |
74 | lw t1, 4(a1) | 74 | lw t1, 4(a1) |
75 | lw t2, 8(a1) | 75 | lw t2, 8(a1) |
@@ -90,34 +90,34 @@ lop8w: | |||
90 | bne a1, a3, lop8w | 90 | bne a1, a3, lop8w |
91 | sw t7, -4(a0) | 91 | sw t7, -4(a0) |
92 | 92 | ||
93 | chk1w: | 93 | chk1w: |
94 | andi t0, a2, 0x3 # 4 or more bytes left? | 94 | andi t0, a2, 0x3 # 4 or more bytes left? |
95 | beq t0, a2, last8 | 95 | beq t0, a2, last8 |
96 | subu a3, a2, t0 # Yes, handle them one word at a time | 96 | subu a3, a2, t0 # Yes, handle them one word at a time |
97 | addu a3, a1 # a3 again end address | 97 | addu a3, a1 # a3 again end address |
98 | move a2, t0 | 98 | move a2, t0 |
99 | lop1w: | 99 | lop1w: |
100 | lw t0, 0(a1) | 100 | lw t0, 0(a1) |
101 | addiu a0, 4 | 101 | addiu a0, 4 |
102 | addiu a1, 4 | 102 | addiu a1, 4 |
103 | bne a1, a3, lop1w | 103 | bne a1, a3, lop1w |
104 | sw t0, -4(a0) | 104 | sw t0, -4(a0) |
105 | 105 | ||
106 | last8: | 106 | last8: |
107 | blez a2, lst8e # Handle last 8 bytes, one at a time | 107 | blez a2, lst8e # Handle last 8 bytes, one at a time |
108 | addu a3, a2, a1 | 108 | addu a3, a2, a1 |
109 | lst8l: | 109 | lst8l: |
110 | lb t0, 0(a1) | 110 | lb t0, 0(a1) |
111 | addiu a0, 1 | 111 | addiu a0, 1 |
112 | addiu a1, 1 | 112 | addiu a1, 1 |
113 | bne a1, a3, lst8l | 113 | bne a1, a3, lst8l |
114 | sb t0, -1(a0) | 114 | sb t0, -1(a0) |
115 | lst8e: | 115 | lst8e: |
116 | jr ra # Bye, bye | 116 | jr ra # Bye, bye |
117 | nop | 117 | nop |
118 | 118 | ||
119 | shift: | 119 | shift: |
120 | subu a3, zero, a0 # Src and Dest unaligned | 120 | subu a3, zero, a0 # Src and Dest unaligned |
121 | andi a3, 0x3 # (unoptimized case...) | 121 | andi a3, 0x3 # (unoptimized case...) |
122 | beq a3, zero, shft1 | 122 | beq a3, zero, shft1 |
123 | subu a2, a3 # a2 = bytes left | 123 | subu a2, a3 # a2 = bytes left |
@@ -126,11 +126,11 @@ shift: | |||
126 | addu a1, a3 | 126 | addu a1, a3 |
127 | SWHI t0, 0(a0) | 127 | SWHI t0, 0(a0) |
128 | addu a0, a3 | 128 | addu a0, a3 |
129 | shft1: | 129 | shft1: |
130 | andi t0, a2, 0x3 | 130 | andi t0, a2, 0x3 |
131 | subu a3, a2, t0 | 131 | subu a3, a2, t0 |
132 | addu a3, a1 | 132 | addu a3, a1 |
133 | shfth: | 133 | shfth: |
134 | LWHI t1, 0(a1) # Limp through, word by word | 134 | LWHI t1, 0(a1) # Limp through, word by word |
135 | LWLO t1, 3(a1) | 135 | LWLO t1, 3(a1) |
136 | addiu a0, 4 | 136 | addiu a0, 4 |
diff --git a/firmware/asm/sh/memcpy.S b/firmware/asm/sh/memcpy.S index e23a579b05..59c5801ac0 100644 --- a/firmware/asm/sh/memcpy.S +++ b/firmware/asm/sh/memcpy.S | |||
@@ -60,13 +60,13 @@ ___memcpy_fwd_entry: | |||
60 | cmp/hs r0,r6 /* at least 11 bytes to copy? (ensures 2 aligned longs) */ | 60 | cmp/hs r0,r6 /* at least 11 bytes to copy? (ensures 2 aligned longs) */ |
61 | add r5,r6 /* r6 = source_end */ | 61 | add r5,r6 /* r6 = source_end */ |
62 | bf .start_b2 /* no: jump directly to byte loop */ | 62 | bf .start_b2 /* no: jump directly to byte loop */ |
63 | 63 | ||
64 | mov #3,r0 | 64 | mov #3,r0 |
65 | neg r5,r3 | 65 | neg r5,r3 |
66 | and r0,r3 /* r3 = (4 - align_offset) % 4 */ | 66 | and r0,r3 /* r3 = (4 - align_offset) % 4 */ |
67 | tst r3,r3 /* already aligned? */ | 67 | tst r3,r3 /* already aligned? */ |
68 | bt .end_b1 /* yes: skip leading byte loop */ | 68 | bt .end_b1 /* yes: skip leading byte loop */ |
69 | 69 | ||
70 | add r5,r3 /* r3 = first source long bound */ | 70 | add r5,r3 /* r3 = first source long bound */ |
71 | 71 | ||
72 | /* leading byte loop: copies 0..3 bytes */ | 72 | /* leading byte loop: copies 0..3 bytes */ |
@@ -89,7 +89,7 @@ ___memcpy_fwd_entry: | |||
89 | mov r6,r3 /* move end address to r3 */ | 89 | mov r6,r3 /* move end address to r3 */ |
90 | jmp @r1 /* and jump to it */ | 90 | jmp @r1 /* and jump to it */ |
91 | add #-7,r3 /* adjust end addr for main loops doing 2 longs/pass */ | 91 | add #-7,r3 /* adjust end addr for main loops doing 2 longs/pass */ |
92 | 92 | ||
93 | /** main loops, copying 2 longs per pass to profit from fast page mode **/ | 93 | /** main loops, copying 2 longs per pass to profit from fast page mode **/ |
94 | 94 | ||
95 | /* long aligned destination (fastest) */ | 95 | /* long aligned destination (fastest) */ |
@@ -102,11 +102,11 @@ ___memcpy_fwd_entry: | |||
102 | mov.l r0,@-r4 /* store second long */ | 102 | mov.l r0,@-r4 /* store second long */ |
103 | mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */ | 103 | mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */ |
104 | bt .loop_do0 | 104 | bt .loop_do0 |
105 | 105 | ||
106 | add #4,r3 /* readjust end address */ | 106 | add #4,r3 /* readjust end address */ |
107 | cmp/hi r5,r3 /* one long left? */ | 107 | cmp/hi r5,r3 /* one long left? */ |
108 | bf .start_b2 /* no, jump to trailing byte loop */ | 108 | bf .start_b2 /* no, jump to trailing byte loop */ |
109 | 109 | ||
110 | mov.l @r5+,r0 /* load last long & increment source addr */ | 110 | mov.l @r5+,r0 /* load last long & increment source addr */ |
111 | add #4,r4 /* increment dest addr */ | 111 | add #4,r4 /* increment dest addr */ |
112 | bra .start_b2 /* jump to trailing byte loop */ | 112 | bra .start_b2 /* jump to trailing byte loop */ |