summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThom Johansen <thomj@rockbox.org>2005-04-01 20:12:29 +0000
committerThom Johansen <thomj@rockbox.org>2005-04-01 20:12:29 +0000
commit5fb6c64ffc8e6ab7512d805d2860831e492e5c52 (patch)
treef56971d85fa4af3b4f6b9bb282472cb3979f2ec2
parentd2041832cdb4296b31805dda0598aeed458b1d2e (diff)
downloadrockbox-5fb6c64ffc8e6ab7512d805d2860831e492e5c52.tar.gz
rockbox-5fb6c64ffc8e6ab7512d805d2860831e492e5c52.zip
Smaller code size due to use of post-increment addressing. Move code out of iram. Cosmetic changes.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6242 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libFLAC/coldfire.S263
1 files changed, 131 insertions, 132 deletions
diff --git a/apps/codecs/libFLAC/coldfire.S b/apps/codecs/libFLAC/coldfire.S
index b36f00eede..ad4b417c9e 100644
--- a/apps/codecs/libFLAC/coldfire.S
+++ b/apps/codecs/libFLAC/coldfire.S
@@ -18,10 +18,10 @@
18 ****************************************************************************/ 18 ****************************************************************************/
19 19
20/* The following is a first attempt at an assembler optimized version of 20/* The following is a first attempt at an assembler optimized version of
21 FLAC__lpc_restore_signal programmed for MFC5249 or any other similar 21 FLAC__lpc_restore_signal programmed for MCF5249 or any other similar
22 ColdFire V2 core with the EMAC unit. 22 ColdFire V2 core with the EMAC unit.
23*/ 23 */
24 .section .icode,"ax",@progbits 24 .text
25 .global FLAC__lpc_restore_signal_mcf5249 25 .global FLAC__lpc_restore_signal_mcf5249
26 .align 2 26 .align 2
27FLAC__lpc_restore_signal_mcf5249: 27FLAC__lpc_restore_signal_mcf5249:
@@ -43,174 +43,173 @@ FLAC__lpc_restore_signal_mcf5249:
43 clr.l %d3 43 clr.l %d3
44 move.l %d3, %macsr /* we'll need integer mode for this */ 44 move.l %d3, %macsr /* we'll need integer mode for this */
45 tst.l %d0 45 tst.l %d0
46 jeq .Lexit /* zero samples to process */ 46 jeq .exit /* zero samples to process */
47 movq.l #8, %d3 47 moveq.l #8, %d3
48 cmp.l %d3, %d2 /* coldfire v2 only has long cmp version */ 48 cmp.l %d3, %d2
49 jgt .Ldefault /* order is over 8, jump to default case */ 49 jgt .default /* order is over 8, jump to default case */
50 lea.l .Ljumptable, %a4 50 lea.l .jumptable, %a4
51 move.l (%a4, %d2.l*4), %a4 51 move.l (%a4, %d2.l*4), %a4
52 jmp (%a4) 52 jmp (%a4)
53 .align 4 /* avoid unaligned fetch */ 53 .align 4 /* avoid unaligned fetch */
54.Ljumptable: 54.jumptable:
55 .long .Lexit 55 .long .exit
56 .long .Lorder1 56 .long .order1
57 .long .Lorder2 57 .long .order2
58 .long .Lorder3 58 .long .order3
59 .long .Lorder4 59 .long .order4
60 .long .Lorder5 60 .long .order5
61 .long .Lorder6 61 .long .order6
62 .long .Lorder7 62 .long .order7
63 .long .Lorder8 63 .long .order8
64 64
65.Lorder8: 65.order8:
66 movem.l (%a1), %d3-%d7/%a3-%a5 /* load lpc coefs */ 66 movem.l (%a1), %d3-%d7/%a3-%a5 /* load lpc coefs */
67 movea.l (%a2), %a6 /* load first history sample */ 67 move.l (%a2)+, %a6 /* load first history sample */
68.Lloop8: 68.loop8:
69 mac.l %a6, %a5, (1*4, %a2), %a6, %acc0 69 mac.l %a6, %a5, (%a2)+, %a6, %acc0
70 mac.l %a6, %a4, (2*4, %a2), %a6, %acc0 70 mac.l %a6, %a4, (%a2)+, %a6, %acc0
71 mac.l %a6, %a3, (3*4, %a2), %a6, %acc0 71 mac.l %a6, %a3, (%a2)+, %a6, %acc0
72 mac.l %a6, %d7, (4*4, %a2), %a6, %acc0 72 mac.l %a6, %d7, (%a2)+, %a6, %acc0
73 mac.l %a6, %d6, (5*4, %a2), %a6, %acc0 73 mac.l %a6, %d6, (%a2)+, %a6, %acc0
74 mac.l %a6, %d5, (6*4, %a2), %a6, %acc0 74 mac.l %a6, %d5, (%a2)+, %a6, %acc0
75 mac.l %a6, %d4, (7*4, %a2), %a6, %acc0 75 mac.l %a6, %d4, (%a2)+, %a6, %acc0
76 mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 /* load for the next iteration */ 76 mac.l %a6, %d3, (-7*4, %a2), %a6, %acc0 /* load for the next iteration */
77 addq.l #4, %a2 /* increment history pointer */ 77 movclr.l %acc0, %d2 /* get sum */
78 movclr.l %acc0, %d2 /* get sum */ 78 asr.l %d1, %d2 /* shift sum by lp_quantization bits */
79 asr.l %d1, %d2 /* shift sum by lp_quantization bits */ 79 add.l (%a0)+, %d2 /* add residual and increment residual pointer */
80 add.l (%a0)+, %d2 /* add residual and increment residual pointer */ 80 move.l %d2, (%a2) /* save result to data */
81 move.l %d2, (28, %a2) /* save result to data */ 81 lea.l (-6*4, %a2), %a2 /* history pointer points at second element */
82 subq.l #1, %d0 /* decrement counter */ 82 subq.l #1, %d0 /* decrement counter */
83 jne .Lloop8 /* are we done? */ 83 jne .loop8 /* are we done? */
84 jra .Lexit 84 jra .exit
85 85
86.Lorder7: 86.order7:
87 movem.l (%a1), %d3-%d7/%a3-%a4 87 movem.l (%a1), %d3-%d7/%a3-%a4
88 movea.l (%a2), %a6 88 move.l (%a2)+, %a6
89.Lloop7: 89.loop7:
90 mac.l %a6, %a4, (1*4, %a2), %a6, %acc0 90 mac.l %a6, %a4, (%a2)+, %a6, %acc0
91 mac.l %a6, %a3, (2*4, %a2), %a6, %acc0 91 mac.l %a6, %a3, (%a2)+, %a6, %acc0
92 mac.l %a6, %d7, (3*4, %a2), %a6, %acc0 92 mac.l %a6, %d7, (%a2)+, %a6, %acc0
93 mac.l %a6, %d6, (4*4, %a2), %a6, %acc0 93 mac.l %a6, %d6, (%a2)+, %a6, %acc0
94 mac.l %a6, %d5, (5*4, %a2), %a6, %acc0 94 mac.l %a6, %d5, (%a2)+, %a6, %acc0
95 mac.l %a6, %d4, (6*4, %a2), %a6, %acc0 95 mac.l %a6, %d4, (%a2)+, %a6, %acc0
96 mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 96 mac.l %a6, %d3, (-6*4, %a2), %a6, %acc0
97 addq.l #4, %a2
98 movclr.l %acc0, %d2 97 movclr.l %acc0, %d2
99 asr.l %d1, %d2 98 asr.l %d1, %d2
100 add.l (%a0)+, %d2 99 add.l (%a0)+, %d2
101 move.l %d2, (24, %a2) 100 move.l %d2, (%a2)
101 lea.l (-5*4, %a2), %a2
102 subq.l #1, %d0 102 subq.l #1, %d0
103 jne .Lloop7 103 jne .loop7
104 jra .Lexit 104 jra .exit
105 105
106.Lorder6: 106.order6:
107 movem.l (%a1), %d3-%d7/%a3 107 movem.l (%a1), %d3-%d7/%a3
108 movea.l (%a2), %a6 108 move.l (%a2)+, %a6
109.Lloop6: 109.loop6:
110 mac.l %a6, %a3, (1*4, %a2), %a6, %acc0 110 mac.l %a6, %a3, (%a2)+, %a6, %acc0
111 mac.l %a6, %d7, (2*4, %a2), %a6, %acc0 111 mac.l %a6, %d7, (%a2)+, %a6, %acc0
112 mac.l %a6, %d6, (3*4, %a2), %a6, %acc0 112 mac.l %a6, %d6, (%a2)+, %a6, %acc0
113 mac.l %a6, %d5, (4*4, %a2), %a6, %acc0 113 mac.l %a6, %d5, (%a2)+, %a6, %acc0
114 mac.l %a6, %d4, (5*4, %a2), %a6, %acc0 114 mac.l %a6, %d4, (%a2)+, %a6, %acc0
115 mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 115 mac.l %a6, %d3, (-5*4, %a2), %a6, %acc0
116 addq.l #4, %a2
117 movclr.l %acc0, %d2 116 movclr.l %acc0, %d2
118 asr.l %d1, %d2 117 asr.l %d1, %d2
119 add.l (%a0)+, %d2 118 add.l (%a0)+, %d2
120 move.l %d2, (20, %a2) 119 move.l %d2, (%a2)
120 lea.l (-4*4, %a2), %a2
121 subq.l #1, %d0 121 subq.l #1, %d0
122 jne .Lloop6 122 jne .loop6
123 jra .Lexit 123 jra .exit
124 124
125.Lorder5: 125.order5:
126 movem.l (%a1), %d3-%d7 126 movem.l (%a1), %d3-%d7
127 movea.l (%a2), %a6 127 move.l (%a2)+, %a6
128.Lloop5: 128.loop5:
129 mac.l %a6, %d7, (1*4, %a2), %a6, %acc0 129 mac.l %a6, %d7, (%a2)+, %a6, %acc0
130 mac.l %a6, %d6, (2*4, %a2), %a6, %acc0 130 mac.l %a6, %d6, (%a2)+, %a6, %acc0
131 mac.l %a6, %d5, (3*4, %a2), %a6, %acc0 131 mac.l %a6, %d5, (%a2)+, %a6, %acc0
132 mac.l %a6, %d4, (4*4, %a2), %a6, %acc0 132 mac.l %a6, %d4, (%a2)+, %a6, %acc0
133 mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 133 mac.l %a6, %d3, (-4*4, %a2), %a6, %acc0
134 addq.l #4, %a2
135 movclr.l %acc0, %d2 134 movclr.l %acc0, %d2
136 asr.l %d1, %d2 135 asr.l %d1, %d2
137 add.l (%a0)+, %d2 136 add.l (%a0)+, %d2
138 move.l %d2, (16, %a2) 137 move.l %d2, (%a2)
138 lea.l (-3*4, %a2), %a2
139 subq.l #1, %d0 139 subq.l #1, %d0
140 jne .Lloop5 140 jne .loop5
141 jra .Lexit 141 jra .exit
142 142
143.Lorder4: 143.order4:
144 movem.l (%a1), %d3-%d6 144 movem.l (%a1), %d3-%d6
145 movea.l (%a2), %a6 145 move.l (%a2)+, %a6
146.Lloop4: 146.loop4:
147 mac.l %a6, %d6, (1*4, %a2), %a6, %acc0 147 mac.l %a6, %d6, (%a2)+, %a6, %acc0
148 mac.l %a6, %d5, (2*4, %a2), %a6, %acc0 148 mac.l %a6, %d5, (%a2)+, %a6, %acc0
149 mac.l %a6, %d4, (3*4, %a2), %a6, %acc0 149 mac.l %a6, %d4, (%a2)+, %a6, %acc0
150 mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 150 mac.l %a6, %d3, (-3*4, %a2), %a6, %acc0
151 addq.l #4, %a2
152 movclr.l %acc0, %d2 151 movclr.l %acc0, %d2
153 asr.l %d1, %d2 152 asr.l %d1, %d2
154 add.l (%a0)+, %d2 153 add.l (%a0)+, %d2
155 move.l %d2, (12, %a2) 154 move.l %d2, (%a2)
155 subq.l #8, %a2
156 subq.l #1, %d0 156 subq.l #1, %d0
157 jne .Lloop4 157 jne .loop4
158 jra .Lexit 158 jra .exit
159 159
160.Lorder3: 160.order3:
161 movem.l (%a1), %d3-%d5 161 movem.l (%a1), %d3-%d5
162 movea.l (%a2), %a6 162 move.l (%a2)+, %a6
163.Lloop3: 163.loop3:
164 mac.l %a6, %d5, (1*4, %a2), %a6, %acc0 164 mac.l %a6, %d5, (%a2)+, %a6, %acc0
165 mac.l %a6, %d4, (2*4, %a2), %a6, %acc0 165 mac.l %a6, %d4, (%a2)+, %a6, %acc0
166 mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 166 mac.l %a6, %d3, (-2*4, %a2), %a6, %acc0
167 addq.l #4, %a2
168 movclr.l %acc0, %d2 167 movclr.l %acc0, %d2
169 asr.l %d1, %d2 168 asr.l %d1, %d2
170 add.l (%a0)+, %d2 169 add.l (%a0)+, %d2
171 move.l %d2, (8, %a2) 170 move.l %d2, (%a2)
171 subq.l #4, %a2
172 subq.l #1, %d0 172 subq.l #1, %d0
173 jne .Lloop3 173 jne .loop3
174 jra .Lexit 174 jra .exit
175 175
176.Lorder2: 176.order2:
177 movem.l (%a1), %d3-%d4 177 movem.l (%a1), %d3-%d4
178 movea.l (%a2), %a6 178 move.l (%a2)+, %a6
179.Lloop2: 179.loop2:
180 mac.l %a6, %d4, (1*4, %a2), %a6, %acc0 180 mac.l %a6, %d4, (%a2)+, %a6, %acc0
181 mac.l %a6, %d3, %acc0 /* data for next iteration is already loaded */ 181 mac.l %a6, %d3, %acc0 /* data for next iteration is already loaded */
182 addq.l #4, %a2
183 movclr.l %acc0, %d2 182 movclr.l %acc0, %d2
184 asr.l %d1, %d2 183 asr.l %d1, %d2
185 add.l (%a0)+, %d2 184 add.l (%a0)+, %d2
186 move.l %d2, (4, %a2) 185 move.l %d2, (%a2)
187 subq.l #1, %d0 186 subq.l #1, %d0
188 jne .Lloop2 187 jne .loop2
189 jra .Lexit 188 jra .exit
190 189
191.Lorder1: 190.order1:
192 /* no point in using mac here */ 191 /* no point in using mac here */
193 move.l (%a1), %d3 192 move.l (%a1), %d3
194.Lloop1: 193.loop1:
195 move.l %d3, %d2 194 move.l %d3, %d2
196 muls.l (%a2)+, %d2 195 muls.l (%a2)+, %d2
197 asr.l %d1, %d2 196 asr.l %d1, %d2
198 add.l (%a0)+, %d2 197 add.l (%a0)+, %d2
199 move.l %d2, (%a2) 198 move.l %d2, (%a2)
200 subq.l #1, %d0 199 subq.l #1, %d0
201 jne .Lloop1 200 jne .loop1
202 jra .Lexit 201 jra .exit
203 202
204.Ldefault: 203.default:
205 /* we do the filtering in an unrolled by 4 loop as far as we can, and then 204 /* we do the filtering in an unrolled by 4 loop as far as we can, and then
206 do the rest in an ordinary on by one sample loop. 205 do the rest in an ordinary one by one sample loop.
207 */ 206 */
208 lea.l (%a1, %d2.l*4), %a3 /* need to start in the other end of coefs */ 207 lea.l (%a1, %d2.l*4), %a3 /* need to start in the other end of coefs */
209 movea.l %a2, %a4 /* working copy of history pointer */ 208 move.l %a2, %a4 /* working copy of history pointer */
210 move.l %d2, %d3 209 move.l %d2, %d3
211 lsr.l #2, %d3 /* coefs/4, number of iterations needed in next loop */ 210 lsr.l #2, %d3 /* coefs/4, number of iterations needed in next loop */
212 movea.l (%a4)+, %a6 /* preload lpc coef for loop */ 211 move.l (%a4)+, %a6 /* preload lpc coef for loop */
213.Ldloop1: 212.dloop1:
214 lea.l (-16, %a3), %a3 /* move lpc coef pointer four samples backwards */ 213 lea.l (-16, %a3), %a3 /* move lpc coef pointer four samples backwards */
215 movem.l (%a3), %d4-%d7 /* load four coefs */ 214 movem.l (%a3), %d4-%d7 /* load four coefs */
216 mac.l %a6, %d7, (%a4)+, %a6, %acc0 215 mac.l %a6, %d7, (%a4)+, %a6, %acc0
@@ -218,28 +217,28 @@ FLAC__lpc_restore_signal_mcf5249:
218 mac.l %a6, %d5, (%a4)+, %a6, %acc0 217 mac.l %a6, %d5, (%a4)+, %a6, %acc0
219 mac.l %a6, %d4, (%a4)+, %a6, %acc0 218 mac.l %a6, %d4, (%a4)+, %a6, %acc0
220 subq.l #1, %d3 /* any more unrolled loop operations left? */ 219 subq.l #1, %d3 /* any more unrolled loop operations left? */
221 jne .Ldloop1 220 jne .dloop1
222 221
223 move.l %d2, %d3 222 move.l %d2, %d3
224 movq.l #3, %d4 /* mask 0x00000003 */ 223 moveq.l #3, %d4 /* mask 0x00000003 */
225 and.l %d4, %d3 /* get the remaining samples to be filtered */ 224 and.l %d4, %d3 /* get the remaining samples to be filtered */
226 jeq .Ldsave /* no remaining samples */ 225 jeq .dsave /* no remaining samples */
227.Ldloop2: 226.dloop2:
228 move.l -(%a3), %d4 /* get lpc coef */ 227 move.l -(%a3), %d4 /* get lpc coef */
229 mac.l %a6, %d4, (%a4)+, %a6, %acc0 228 mac.l %a6, %d4, (%a4)+, %a6, %acc0
230 subq.l #1, %d3 /* any more iterations left? */ 229 subq.l #1, %d3 /* any more iterations left? */
231 jne .Ldloop2 230 jne .dloop2
232.Ldsave: 231.dsave:
233 movclr.l %acc0, %d3 /* get result */ 232 movclr.l %acc0, %d3 /* get result */
234 asr.l %d1, %d3 /* shift lp_quantization bits right */ 233 asr.l %d1, %d3 /* shift lp_quantization bits right */
235 add.l (%a0)+, %d3 /* add residual */ 234 add.l (%a0)+, %d3 /* add residual */
236 move.l %d3, (-4, %a4) /* history pointer is one sample past data pointer */ 235 move.l %d3, (-4, %a4) /* history pointer is one sample past data pointer */
237 addq.l #4, %a2 /* increment history pointer */ 236 addq.l #4, %a2 /* increment history pointer */
238 subq.l #1, %d0 /* decrement data_len */ 237 subq.l #1, %d0 /* decrement data_len */
239 jne .Ldefault /* are we done? */ 238 jne .default /* are we done? */
240 /* if so, fall through to exit */ 239 /* if so, fall through to exit */
241 240
242.Lexit: 241.exit:
243 movem.l (%sp), %d2-%d7/%a2-%a6 242 movem.l (%sp), %d2-%d7/%a2-%a6
244 lea.l (44, %sp), %sp 243 lea.l (44, %sp), %sp
245 rts 244 rts