diff options
author | Thom Johansen <thomj@rockbox.org> | 2005-04-01 20:12:29 +0000 |
---|---|---|
committer | Thom Johansen <thomj@rockbox.org> | 2005-04-01 20:12:29 +0000 |
commit | 5fb6c64ffc8e6ab7512d805d2860831e492e5c52 (patch) | |
tree | f56971d85fa4af3b4f6b9bb282472cb3979f2ec2 | |
parent | d2041832cdb4296b31805dda0598aeed458b1d2e (diff) | |
download | rockbox-5fb6c64ffc8e6ab7512d805d2860831e492e5c52.tar.gz rockbox-5fb6c64ffc8e6ab7512d805d2860831e492e5c52.zip |
Smaller code size due to use of post-increment addressing. Move code out of iram. Cosmetic changes.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6242 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r-- | apps/codecs/libFLAC/coldfire.S | 263 |
1 files changed, 131 insertions, 132 deletions
diff --git a/apps/codecs/libFLAC/coldfire.S b/apps/codecs/libFLAC/coldfire.S index b36f00eede..ad4b417c9e 100644 --- a/apps/codecs/libFLAC/coldfire.S +++ b/apps/codecs/libFLAC/coldfire.S | |||
@@ -18,10 +18,10 @@ | |||
18 | ****************************************************************************/ | 18 | ****************************************************************************/ |
19 | 19 | ||
20 | /* The following is a first attempt at an assembler optimized version of | 20 | /* The following is a first attempt at an assembler optimized version of |
21 | FLAC__lpc_restore_signal programmed for MFC5249 or any other similar | 21 | FLAC__lpc_restore_signal programmed for MCF5249 or any other similar |
22 | ColdFire V2 core with the EMAC unit. | 22 | ColdFire V2 core with the EMAC unit. |
23 | */ | 23 | */ |
24 | .section .icode,"ax",@progbits | 24 | .text |
25 | .global FLAC__lpc_restore_signal_mcf5249 | 25 | .global FLAC__lpc_restore_signal_mcf5249 |
26 | .align 2 | 26 | .align 2 |
27 | FLAC__lpc_restore_signal_mcf5249: | 27 | FLAC__lpc_restore_signal_mcf5249: |
@@ -43,174 +43,173 @@ FLAC__lpc_restore_signal_mcf5249: | |||
43 | clr.l %d3 | 43 | clr.l %d3 |
44 | move.l %d3, %macsr /* we'll need integer mode for this */ | 44 | move.l %d3, %macsr /* we'll need integer mode for this */ |
45 | tst.l %d0 | 45 | tst.l %d0 |
46 | jeq .Lexit /* zero samples to process */ | 46 | jeq .exit /* zero samples to process */ |
47 | movq.l #8, %d3 | 47 | moveq.l #8, %d3 |
48 | cmp.l %d3, %d2 /* coldfire v2 only has long cmp version */ | 48 | cmp.l %d3, %d2 |
49 | jgt .Ldefault /* order is over 8, jump to default case */ | 49 | jgt .default /* order is over 8, jump to default case */ |
50 | lea.l .Ljumptable, %a4 | 50 | lea.l .jumptable, %a4 |
51 | move.l (%a4, %d2.l*4), %a4 | 51 | move.l (%a4, %d2.l*4), %a4 |
52 | jmp (%a4) | 52 | jmp (%a4) |
53 | .align 4 /* avoid unaligned fetch */ | 53 | .align 4 /* avoid unaligned fetch */ |
54 | .Ljumptable: | 54 | .jumptable: |
55 | .long .Lexit | 55 | .long .exit |
56 | .long .Lorder1 | 56 | .long .order1 |
57 | .long .Lorder2 | 57 | .long .order2 |
58 | .long .Lorder3 | 58 | .long .order3 |
59 | .long .Lorder4 | 59 | .long .order4 |
60 | .long .Lorder5 | 60 | .long .order5 |
61 | .long .Lorder6 | 61 | .long .order6 |
62 | .long .Lorder7 | 62 | .long .order7 |
63 | .long .Lorder8 | 63 | .long .order8 |
64 | 64 | ||
65 | .Lorder8: | 65 | .order8: |
66 | movem.l (%a1), %d3-%d7/%a3-%a5 /* load lpc coefs */ | 66 | movem.l (%a1), %d3-%d7/%a3-%a5 /* load lpc coefs */ |
67 | movea.l (%a2), %a6 /* load first history sample */ | 67 | move.l (%a2)+, %a6 /* load first history sample */ |
68 | .Lloop8: | 68 | .loop8: |
69 | mac.l %a6, %a5, (1*4, %a2), %a6, %acc0 | 69 | mac.l %a6, %a5, (%a2)+, %a6, %acc0 |
70 | mac.l %a6, %a4, (2*4, %a2), %a6, %acc0 | 70 | mac.l %a6, %a4, (%a2)+, %a6, %acc0 |
71 | mac.l %a6, %a3, (3*4, %a2), %a6, %acc0 | 71 | mac.l %a6, %a3, (%a2)+, %a6, %acc0 |
72 | mac.l %a6, %d7, (4*4, %a2), %a6, %acc0 | 72 | mac.l %a6, %d7, (%a2)+, %a6, %acc0 |
73 | mac.l %a6, %d6, (5*4, %a2), %a6, %acc0 | 73 | mac.l %a6, %d6, (%a2)+, %a6, %acc0 |
74 | mac.l %a6, %d5, (6*4, %a2), %a6, %acc0 | 74 | mac.l %a6, %d5, (%a2)+, %a6, %acc0 |
75 | mac.l %a6, %d4, (7*4, %a2), %a6, %acc0 | 75 | mac.l %a6, %d4, (%a2)+, %a6, %acc0 |
76 | mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 /* load for the next iteration */ | 76 | mac.l %a6, %d3, (-7*4, %a2), %a6, %acc0 /* load for the next iteration */ |
77 | addq.l #4, %a2 /* increment history pointer */ | 77 | movclr.l %acc0, %d2 /* get sum */ |
78 | movclr.l %acc0, %d2 /* get sum */ | 78 | asr.l %d1, %d2 /* shift sum by lp_quantization bits */ |
79 | asr.l %d1, %d2 /* shift sum by lp_quantization bits */ | 79 | add.l (%a0)+, %d2 /* add residual and increment residual pointer */ |
80 | add.l (%a0)+, %d2 /* add residual and increment residual pointer */ | 80 | move.l %d2, (%a2) /* save result to data */ |
81 | move.l %d2, (28, %a2) /* save result to data */ | 81 | lea.l (-6*4, %a2), %a2 /* history pointer points at second element */ |
82 | subq.l #1, %d0 /* decrement counter */ | 82 | subq.l #1, %d0 /* decrement counter */ |
83 | jne .Lloop8 /* are we done? */ | 83 | jne .loop8 /* are we done? */ |
84 | jra .Lexit | 84 | jra .exit |
85 | 85 | ||
86 | .Lorder7: | 86 | .order7: |
87 | movem.l (%a1), %d3-%d7/%a3-%a4 | 87 | movem.l (%a1), %d3-%d7/%a3-%a4 |
88 | movea.l (%a2), %a6 | 88 | move.l (%a2)+, %a6 |
89 | .Lloop7: | 89 | .loop7: |
90 | mac.l %a6, %a4, (1*4, %a2), %a6, %acc0 | 90 | mac.l %a6, %a4, (%a2)+, %a6, %acc0 |
91 | mac.l %a6, %a3, (2*4, %a2), %a6, %acc0 | 91 | mac.l %a6, %a3, (%a2)+, %a6, %acc0 |
92 | mac.l %a6, %d7, (3*4, %a2), %a6, %acc0 | 92 | mac.l %a6, %d7, (%a2)+, %a6, %acc0 |
93 | mac.l %a6, %d6, (4*4, %a2), %a6, %acc0 | 93 | mac.l %a6, %d6, (%a2)+, %a6, %acc0 |
94 | mac.l %a6, %d5, (5*4, %a2), %a6, %acc0 | 94 | mac.l %a6, %d5, (%a2)+, %a6, %acc0 |
95 | mac.l %a6, %d4, (6*4, %a2), %a6, %acc0 | 95 | mac.l %a6, %d4, (%a2)+, %a6, %acc0 |
96 | mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 | 96 | mac.l %a6, %d3, (-6*4, %a2), %a6, %acc0 |
97 | addq.l #4, %a2 | ||
98 | movclr.l %acc0, %d2 | 97 | movclr.l %acc0, %d2 |
99 | asr.l %d1, %d2 | 98 | asr.l %d1, %d2 |
100 | add.l (%a0)+, %d2 | 99 | add.l (%a0)+, %d2 |
101 | move.l %d2, (24, %a2) | 100 | move.l %d2, (%a2) |
101 | lea.l (-5*4, %a2), %a2 | ||
102 | subq.l #1, %d0 | 102 | subq.l #1, %d0 |
103 | jne .Lloop7 | 103 | jne .loop7 |
104 | jra .Lexit | 104 | jra .exit |
105 | 105 | ||
106 | .Lorder6: | 106 | .order6: |
107 | movem.l (%a1), %d3-%d7/%a3 | 107 | movem.l (%a1), %d3-%d7/%a3 |
108 | movea.l (%a2), %a6 | 108 | move.l (%a2)+, %a6 |
109 | .Lloop6: | 109 | .loop6: |
110 | mac.l %a6, %a3, (1*4, %a2), %a6, %acc0 | 110 | mac.l %a6, %a3, (%a2)+, %a6, %acc0 |
111 | mac.l %a6, %d7, (2*4, %a2), %a6, %acc0 | 111 | mac.l %a6, %d7, (%a2)+, %a6, %acc0 |
112 | mac.l %a6, %d6, (3*4, %a2), %a6, %acc0 | 112 | mac.l %a6, %d6, (%a2)+, %a6, %acc0 |
113 | mac.l %a6, %d5, (4*4, %a2), %a6, %acc0 | 113 | mac.l %a6, %d5, (%a2)+, %a6, %acc0 |
114 | mac.l %a6, %d4, (5*4, %a2), %a6, %acc0 | 114 | mac.l %a6, %d4, (%a2)+, %a6, %acc0 |
115 | mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 | 115 | mac.l %a6, %d3, (-5*4, %a2), %a6, %acc0 |
116 | addq.l #4, %a2 | ||
117 | movclr.l %acc0, %d2 | 116 | movclr.l %acc0, %d2 |
118 | asr.l %d1, %d2 | 117 | asr.l %d1, %d2 |
119 | add.l (%a0)+, %d2 | 118 | add.l (%a0)+, %d2 |
120 | move.l %d2, (20, %a2) | 119 | move.l %d2, (%a2) |
120 | lea.l (-4*4, %a2), %a2 | ||
121 | subq.l #1, %d0 | 121 | subq.l #1, %d0 |
122 | jne .Lloop6 | 122 | jne .loop6 |
123 | jra .Lexit | 123 | jra .exit |
124 | 124 | ||
125 | .Lorder5: | 125 | .order5: |
126 | movem.l (%a1), %d3-%d7 | 126 | movem.l (%a1), %d3-%d7 |
127 | movea.l (%a2), %a6 | 127 | move.l (%a2)+, %a6 |
128 | .Lloop5: | 128 | .loop5: |
129 | mac.l %a6, %d7, (1*4, %a2), %a6, %acc0 | 129 | mac.l %a6, %d7, (%a2)+, %a6, %acc0 |
130 | mac.l %a6, %d6, (2*4, %a2), %a6, %acc0 | 130 | mac.l %a6, %d6, (%a2)+, %a6, %acc0 |
131 | mac.l %a6, %d5, (3*4, %a2), %a6, %acc0 | 131 | mac.l %a6, %d5, (%a2)+, %a6, %acc0 |
132 | mac.l %a6, %d4, (4*4, %a2), %a6, %acc0 | 132 | mac.l %a6, %d4, (%a2)+, %a6, %acc0 |
133 | mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 | 133 | mac.l %a6, %d3, (-4*4, %a2), %a6, %acc0 |
134 | addq.l #4, %a2 | ||
135 | movclr.l %acc0, %d2 | 134 | movclr.l %acc0, %d2 |
136 | asr.l %d1, %d2 | 135 | asr.l %d1, %d2 |
137 | add.l (%a0)+, %d2 | 136 | add.l (%a0)+, %d2 |
138 | move.l %d2, (16, %a2) | 137 | move.l %d2, (%a2) |
138 | lea.l (-3*4, %a2), %a2 | ||
139 | subq.l #1, %d0 | 139 | subq.l #1, %d0 |
140 | jne .Lloop5 | 140 | jne .loop5 |
141 | jra .Lexit | 141 | jra .exit |
142 | 142 | ||
143 | .Lorder4: | 143 | .order4: |
144 | movem.l (%a1), %d3-%d6 | 144 | movem.l (%a1), %d3-%d6 |
145 | movea.l (%a2), %a6 | 145 | move.l (%a2)+, %a6 |
146 | .Lloop4: | 146 | .loop4: |
147 | mac.l %a6, %d6, (1*4, %a2), %a6, %acc0 | 147 | mac.l %a6, %d6, (%a2)+, %a6, %acc0 |
148 | mac.l %a6, %d5, (2*4, %a2), %a6, %acc0 | 148 | mac.l %a6, %d5, (%a2)+, %a6, %acc0 |
149 | mac.l %a6, %d4, (3*4, %a2), %a6, %acc0 | 149 | mac.l %a6, %d4, (%a2)+, %a6, %acc0 |
150 | mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 | 150 | mac.l %a6, %d3, (-3*4, %a2), %a6, %acc0 |
151 | addq.l #4, %a2 | ||
152 | movclr.l %acc0, %d2 | 151 | movclr.l %acc0, %d2 |
153 | asr.l %d1, %d2 | 152 | asr.l %d1, %d2 |
154 | add.l (%a0)+, %d2 | 153 | add.l (%a0)+, %d2 |
155 | move.l %d2, (12, %a2) | 154 | move.l %d2, (%a2) |
155 | subq.l #8, %a2 | ||
156 | subq.l #1, %d0 | 156 | subq.l #1, %d0 |
157 | jne .Lloop4 | 157 | jne .loop4 |
158 | jra .Lexit | 158 | jra .exit |
159 | 159 | ||
160 | .Lorder3: | 160 | .order3: |
161 | movem.l (%a1), %d3-%d5 | 161 | movem.l (%a1), %d3-%d5 |
162 | movea.l (%a2), %a6 | 162 | move.l (%a2)+, %a6 |
163 | .Lloop3: | 163 | .loop3: |
164 | mac.l %a6, %d5, (1*4, %a2), %a6, %acc0 | 164 | mac.l %a6, %d5, (%a2)+, %a6, %acc0 |
165 | mac.l %a6, %d4, (2*4, %a2), %a6, %acc0 | 165 | mac.l %a6, %d4, (%a2)+, %a6, %acc0 |
166 | mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 | 166 | mac.l %a6, %d3, (-2*4, %a2), %a6, %acc0 |
167 | addq.l #4, %a2 | ||
168 | movclr.l %acc0, %d2 | 167 | movclr.l %acc0, %d2 |
169 | asr.l %d1, %d2 | 168 | asr.l %d1, %d2 |
170 | add.l (%a0)+, %d2 | 169 | add.l (%a0)+, %d2 |
171 | move.l %d2, (8, %a2) | 170 | move.l %d2, (%a2) |
171 | subq.l #4, %a2 | ||
172 | subq.l #1, %d0 | 172 | subq.l #1, %d0 |
173 | jne .Lloop3 | 173 | jne .loop3 |
174 | jra .Lexit | 174 | jra .exit |
175 | 175 | ||
176 | .Lorder2: | 176 | .order2: |
177 | movem.l (%a1), %d3-%d4 | 177 | movem.l (%a1), %d3-%d4 |
178 | movea.l (%a2), %a6 | 178 | move.l (%a2)+, %a6 |
179 | .Lloop2: | 179 | .loop2: |
180 | mac.l %a6, %d4, (1*4, %a2), %a6, %acc0 | 180 | mac.l %a6, %d4, (%a2)+, %a6, %acc0 |
181 | mac.l %a6, %d3, %acc0 /* data for next iteration is already loaded */ | 181 | mac.l %a6, %d3, %acc0 /* data for next iteration is already loaded */ |
182 | addq.l #4, %a2 | ||
183 | movclr.l %acc0, %d2 | 182 | movclr.l %acc0, %d2 |
184 | asr.l %d1, %d2 | 183 | asr.l %d1, %d2 |
185 | add.l (%a0)+, %d2 | 184 | add.l (%a0)+, %d2 |
186 | move.l %d2, (4, %a2) | 185 | move.l %d2, (%a2) |
187 | subq.l #1, %d0 | 186 | subq.l #1, %d0 |
188 | jne .Lloop2 | 187 | jne .loop2 |
189 | jra .Lexit | 188 | jra .exit |
190 | 189 | ||
191 | .Lorder1: | 190 | .order1: |
192 | /* no point in using mac here */ | 191 | /* no point in using mac here */ |
193 | move.l (%a1), %d3 | 192 | move.l (%a1), %d3 |
194 | .Lloop1: | 193 | .loop1: |
195 | move.l %d3, %d2 | 194 | move.l %d3, %d2 |
196 | muls.l (%a2)+, %d2 | 195 | muls.l (%a2)+, %d2 |
197 | asr.l %d1, %d2 | 196 | asr.l %d1, %d2 |
198 | add.l (%a0)+, %d2 | 197 | add.l (%a0)+, %d2 |
199 | move.l %d2, (%a2) | 198 | move.l %d2, (%a2) |
200 | subq.l #1, %d0 | 199 | subq.l #1, %d0 |
201 | jne .Lloop1 | 200 | jne .loop1 |
202 | jra .Lexit | 201 | jra .exit |
203 | 202 | ||
204 | .Ldefault: | 203 | .default: |
205 | /* we do the filtering in an unrolled by 4 loop as far as we can, and then | 204 | /* we do the filtering in an unrolled by 4 loop as far as we can, and then |
206 | do the rest in an ordinary on by one sample loop. | 205 | do the rest in an ordinary one by one sample loop. |
207 | */ | 206 | */ |
208 | lea.l (%a1, %d2.l*4), %a3 /* need to start in the other end of coefs */ | 207 | lea.l (%a1, %d2.l*4), %a3 /* need to start in the other end of coefs */ |
209 | movea.l %a2, %a4 /* working copy of history pointer */ | 208 | move.l %a2, %a4 /* working copy of history pointer */ |
210 | move.l %d2, %d3 | 209 | move.l %d2, %d3 |
211 | lsr.l #2, %d3 /* coefs/4, number of iterations needed in next loop */ | 210 | lsr.l #2, %d3 /* coefs/4, number of iterations needed in next loop */ |
212 | movea.l (%a4)+, %a6 /* preload lpc coef for loop */ | 211 | move.l (%a4)+, %a6 /* preload lpc coef for loop */ |
213 | .Ldloop1: | 212 | .dloop1: |
214 | lea.l (-16, %a3), %a3 /* move lpc coef pointer four samples backwards */ | 213 | lea.l (-16, %a3), %a3 /* move lpc coef pointer four samples backwards */ |
215 | movem.l (%a3), %d4-%d7 /* load four coefs */ | 214 | movem.l (%a3), %d4-%d7 /* load four coefs */ |
216 | mac.l %a6, %d7, (%a4)+, %a6, %acc0 | 215 | mac.l %a6, %d7, (%a4)+, %a6, %acc0 |
@@ -218,28 +217,28 @@ FLAC__lpc_restore_signal_mcf5249: | |||
218 | mac.l %a6, %d5, (%a4)+, %a6, %acc0 | 217 | mac.l %a6, %d5, (%a4)+, %a6, %acc0 |
219 | mac.l %a6, %d4, (%a4)+, %a6, %acc0 | 218 | mac.l %a6, %d4, (%a4)+, %a6, %acc0 |
220 | subq.l #1, %d3 /* any more unrolled loop operations left? */ | 219 | subq.l #1, %d3 /* any more unrolled loop operations left? */ |
221 | jne .Ldloop1 | 220 | jne .dloop1 |
222 | 221 | ||
223 | move.l %d2, %d3 | 222 | move.l %d2, %d3 |
224 | movq.l #3, %d4 /* mask 0x00000003 */ | 223 | moveq.l #3, %d4 /* mask 0x00000003 */ |
225 | and.l %d4, %d3 /* get the remaining samples to be filtered */ | 224 | and.l %d4, %d3 /* get the remaining samples to be filtered */ |
226 | jeq .Ldsave /* no remaining samples */ | 225 | jeq .dsave /* no remaining samples */ |
227 | .Ldloop2: | 226 | .dloop2: |
228 | move.l -(%a3), %d4 /* get lpc coef */ | 227 | move.l -(%a3), %d4 /* get lpc coef */ |
229 | mac.l %a6, %d4, (%a4)+, %a6, %acc0 | 228 | mac.l %a6, %d4, (%a4)+, %a6, %acc0 |
230 | subq.l #1, %d3 /* any more iterations left? */ | 229 | subq.l #1, %d3 /* any more iterations left? */ |
231 | jne .Ldloop2 | 230 | jne .dloop2 |
232 | .Ldsave: | 231 | .dsave: |
233 | movclr.l %acc0, %d3 /* get result */ | 232 | movclr.l %acc0, %d3 /* get result */ |
234 | asr.l %d1, %d3 /* shift lp_quantization bits right */ | 233 | asr.l %d1, %d3 /* shift lp_quantization bits right */ |
235 | add.l (%a0)+, %d3 /* add residual */ | 234 | add.l (%a0)+, %d3 /* add residual */ |
236 | move.l %d3, (-4, %a4) /* history pointer is one sample past data pointer */ | 235 | move.l %d3, (-4, %a4) /* history pointer is one sample past data pointer */ |
237 | addq.l #4, %a2 /* increment history pointer */ | 236 | addq.l #4, %a2 /* increment history pointer */ |
238 | subq.l #1, %d0 /* decrement data_len */ | 237 | subq.l #1, %d0 /* decrement data_len */ |
239 | jne .Ldefault /* are we done? */ | 238 | jne .default /* are we done? */ |
240 | /* if so, fall through to exit */ | 239 | /* if so, fall through to exit */ |
241 | 240 | ||
242 | .Lexit: | 241 | .exit: |
243 | movem.l (%sp), %d2-%d7/%a2-%a6 | 242 | movem.l (%sp), %d2-%d7/%a2-%a6 |
244 | lea.l (44, %sp), %sp | 243 | lea.l (44, %sp), %sp |
245 | rts | 244 | rts |