diff options
author | Jens Arnold <amiconn@rockbox.org> | 2007-10-19 22:57:19 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2007-10-19 22:57:19 +0000 |
commit | 87f5359d604a3d51526965a97c74896f392ed444 (patch) | |
tree | b24323088f7b2b7e470d5b61feca19794f7b22a8 /apps | |
parent | 152f405cce90a285b9c4f1bcaba27dbccc51fdf2 (diff) | |
download | rockbox-87f5359d604a3d51526965a97c74896f392ed444.tar.gz rockbox-87f5359d604a3d51526965a97c74896f392ed444.zip |
Shuffle some instructions around for that extra percent of performance. Fix a bunch of comments.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15216 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r-- | apps/codecs/demac/libdemac/predictor-cf.S | 243 |
1 files changed, 123 insertions, 120 deletions
diff --git a/apps/codecs/demac/libdemac/predictor-cf.S b/apps/codecs/demac/libdemac/predictor-cf.S index 19873420c3..3b9489e791 100644 --- a/apps/codecs/demac/libdemac/predictor-cf.S +++ b/apps/codecs/demac/libdemac/predictor-cf.S | |||
@@ -97,6 +97,8 @@ predictor_decode_stereo: | |||
97 | sub.l %d3, %d2 | 97 | sub.l %d3, %d2 |
98 | neg.l %d2 | %d2 = %d3 - %d2 | 98 | neg.l %d2 | %d2 = %d3 - %d2 |
99 | 99 | ||
100 | move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2 | ||
101 | |||
100 | movem.l (YcoeffsA,%a6), %a0-%a3 | %a0 = p->YcoeffsA[0] | 102 | movem.l (YcoeffsA,%a6), %a0-%a3 | %a0 = p->YcoeffsA[0] |
101 | | %a1 = p->YcoeffsA[1] | 103 | | %a1 = p->YcoeffsA[1] |
102 | | %a2 = p->YcoeffsA[2] | 104 | | %a2 = p->YcoeffsA[2] |
@@ -107,10 +109,7 @@ predictor_decode_stereo: | |||
107 | mac.l %d1, %a2, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] | 109 | mac.l %d1, %a2, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] |
108 | mac.l %d0, %a3, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] | 110 | mac.l %d0, %a3, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] |
109 | 111 | ||
110 | move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2 | ||
111 | move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3 | 112 | move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3 |
112 | |||
113 | movclr.l %acc0, %d0 | ||
114 | 113 | ||
115 | tst.l %d2 | 114 | tst.l %d2 |
116 | beq.s 1f | 115 | beq.s 1f |
@@ -118,16 +117,18 @@ predictor_decode_stereo: | |||
118 | extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 | 117 | extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 |
119 | or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 | 118 | or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 |
120 | 1: | %d2 = SIGN(%d2) | 119 | 1: | %d2 = SIGN(%d2) |
120 | move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2 | ||
121 | |||
121 | tst.l %d3 | 122 | tst.l %d3 |
122 | beq.s 1f | 123 | beq.s 1f |
123 | spl.b %d3 | 124 | spl.b %d3 |
124 | extb.l %d3 | 125 | extb.l %d3 |
125 | or.l #1, %d3 | 126 | or.l #1, %d3 |
126 | 1: | %d3 = SIGN(%d3) | 127 | 1: | %d3 = SIGN(%d3) |
127 | |||
128 | move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2 | ||
129 | move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3 | 128 | move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3 |
130 | 129 | ||
130 | movclr.l %acc0, %d0 | ||
131 | |||
131 | | NOTE: %d0 now contains predictionA - don't overwrite. | 132 | | NOTE: %d0 now contains predictionA - don't overwrite. |
132 | 133 | ||
133 | | Predictor Y, Filter B | 134 | | Predictor Y, Filter B |
@@ -149,11 +150,13 @@ predictor_decode_stereo: | |||
149 | sub.l %d3, %d7 | 150 | sub.l %d3, %d7 |
150 | neg.l %d7 | %d7 = %d3 - %d7 | 151 | neg.l %d7 | %d7 = %d3 - %d7 |
151 | 152 | ||
152 | movem.l (YcoeffsB,%a6), %d2/%a0-%a3 | %d2 = p->YcoeffsB[0] | 153 | move.l %d7, (YDELAYB-4,%a5) | p->buf[YDELAYB-1] = %d7 |
153 | | %a0 = p->YcoeffsB[1] | 154 | |
154 | | %a1 = p->YcoeffsB[2] | 155 | movem.l (YcoeffsB,%a6), %d2/%a0-%a3 | %d2 = p->YcoeffsB[0] |
155 | | %a2 = p->YcoeffsB[3] | 156 | | %a0 = p->YcoeffsB[1] |
156 | | %a3 = p->YcoeffsB[4] | 157 | | %a1 = p->YcoeffsB[2] |
158 | | %a2 = p->YcoeffsB[3] | ||
159 | | %a3 = p->YcoeffsB[4] | ||
157 | 160 | ||
158 | mac.l %d3, %d2, %acc0 | %acc0 = p->buf[YDELAYB] * p->YcoeffsB[0] | 161 | mac.l %d3, %d2, %acc0 | %acc0 = p->buf[YDELAYB] * p->YcoeffsB[0] |
159 | mac.l %d7, %a0, %acc0 | %acc0 += p->buf[YDELAYB-1] * p->YcoeffsB[1] | 162 | mac.l %d7, %a0, %acc0 | %acc0 += p->buf[YDELAYB-1] * p->YcoeffsB[1] |
@@ -161,26 +164,24 @@ predictor_decode_stereo: | |||
161 | mac.l %d5, %a2, %acc0 | %acc0 += p->buf[YDELAYB-3] * p->YcoeffsB[3] | 164 | mac.l %d5, %a2, %acc0 | %acc0 += p->buf[YDELAYB-3] * p->YcoeffsB[3] |
162 | mac.l %d4, %a3, %acc0 | %acc0 += p->buf[YDELAYB-4] * p->YcoeffsB[4] | 165 | mac.l %d4, %a3, %acc0 | %acc0 += p->buf[YDELAYB-4] * p->YcoeffsB[4] |
163 | 166 | ||
164 | move.l %d7, (YDELAYB-4,%a5) | p->buf[YDELAYB-1] = %d7 | ||
165 | move.l %d3, (YDELAYB, %a5) | p->buf[YDELAYB] = %d3 | 167 | move.l %d3, (YDELAYB, %a5) | p->buf[YDELAYB] = %d3 |
166 | 168 | ||
167 | movclr.l %acc0, %d1 | ||
168 | |||
169 | tst.l %d7 | 169 | tst.l %d7 |
170 | beq.s 1f | 170 | beq.s 1f |
171 | spl.b %d7 | 171 | spl.b %d7 |
172 | extb.l %d7 | 172 | extb.l %d7 |
173 | or.l #1, %d7 | 173 | or.l #1, %d7 |
174 | 1: | %d7 = SIGN(%d7) | 174 | 1: | %d7 = SIGN(%d7) |
175 | move.l %d7, (YADAPTCOEFFSB-4,%a5) | p->buf[YADAPTCOEFFSB-1] = %d7 | ||
175 | tst.l %d3 | 176 | tst.l %d3 |
176 | beq.s 1f | 177 | beq.s 1f |
177 | spl.b %d3 | 178 | spl.b %d3 |
178 | extb.l %d3 | 179 | extb.l %d3 |
179 | or.l #1, %d3 | 180 | or.l #1, %d3 |
180 | 1: | %d3 = SIGN(%d3) | 181 | 1: | %d3 = SIGN(%d3) |
181 | |||
182 | move.l %d7, (YADAPTCOEFFSB-4,%a5) | p->buf[YADAPTCOEFFSB-1] = %d7 | ||
183 | move.l %d3, (YADAPTCOEFFSB, %a5) | p->buf[YADAPTCOEFFSB] = %d3 | 182 | move.l %d3, (YADAPTCOEFFSB, %a5) | p->buf[YADAPTCOEFFSB] = %d3 |
183 | |||
184 | movclr.l %acc0, %d1 | ||
184 | 185 | ||
185 | | %d0 still contains predictionA | 186 | | %d0 still contains predictionA |
186 | | %d1 contains predictionB | 187 | | %d1 contains predictionB |
@@ -215,70 +216,70 @@ predictor_decode_stereo: | |||
215 | move.l %d1, (%a4)+ | *(decoded0++) = %d1 (p->YfilterA) | 216 | move.l %d1, (%a4)+ | *(decoded0++) = %d1 (p->YfilterA) |
216 | move.l %a4, (%sp) | save decoded0 | 217 | move.l %a4, (%sp) | save decoded0 |
217 | tst.l %d5 | 218 | tst.l %d5 |
218 | beq.s 2f | 219 | beq.s 3f |
219 | 220 | ||
220 | movem.l (YADAPTCOEFFSB-16,%a5), %d4-%d6 | d4 = p->buf[YADAPTCOEFFSB-4] | 221 | movem.l (YADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[YADAPTCOEFFSB-4] |
221 | | d5 = p->buf[YADAPTCOEFFSB-3] | 222 | | %d5 = p->buf[YADAPTCOEFFSB-3] |
222 | | d6 = p->buf[YADAPTCOEFFSB-2] | 223 | | %d6 = p->buf[YADAPTCOEFFSB-2] |
223 | 224 | ||
224 | bmi.s 1f | flags still valid here | 225 | bmi.s 1f | flags still valid here |
225 | 226 | ||
226 | | *decoded0 > 0 | 227 | | *decoded0 > 0 |
227 | 228 | ||
228 | sub.l %d3, %d2 | d2 = p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB] | 229 | sub.l %d3, %d2 | %d2 = p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB] |
229 | sub.l %d7, %a0 | a0 = p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1] | 230 | sub.l %d7, %a0 | %a0 = p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1] |
230 | sub.l %d6, %a1 | a1 = p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2] | 231 | sub.l %d6, %a1 | %a1 = p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2] |
231 | sub.l %d5, %a2 | a2 = p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3] | 232 | sub.l %d5, %a2 | %a2 = p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3] |
232 | sub.l %d4, %a3 | a3 = p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4] | 233 | sub.l %d4, %a3 | %a3 = p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4] |
233 | 234 | ||
234 | movem.l %d2/%a0-%a3, (YcoeffsB,%a6) | Save p->YcoeffsB[] | 235 | movem.l %d2/%a0-%a3, (YcoeffsB,%a6) | Save p->YcoeffsB[] |
235 | 236 | ||
236 | movem.l (YcoeffsA,%a6), %d4-%d7 | d4 = p->YcoeffsA[0] | 237 | movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0] |
237 | | d5 = p->YcoeffsA[1] | 238 | | %d5 = p->YcoeffsA[1] |
238 | | d6 = p->YcoeffsA[2] | 239 | | %d6 = p->YcoeffsA[2] |
239 | | d7 = p->YcoeffsA[3] | 240 | | %d7 = p->YcoeffsA[3] |
240 | 241 | ||
241 | movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | d2 = p->buf[YADAPTCOEFFSA-3] | 242 | movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | %d2 = p->buf[YADAPTCOEFFSA-3] |
242 | | a0 = p->buf[YADAPTCOEFFSA-2] | 243 | | %a0 = p->buf[YADAPTCOEFFSA-2] |
243 | | a1 = p->buf[YADAPTCOEFFSA-1] | 244 | | %a1 = p->buf[YADAPTCOEFFSA-1] |
244 | | a2 = p->buf[YADAPTCOEFFSA] | 245 | | %a2 = p->buf[YADAPTCOEFFSA] |
245 | 246 | ||
246 | sub.l %a2, %d4 | d4 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] | 247 | sub.l %a2, %d4 | %d4 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] |
247 | sub.l %a1, %d5 | d5 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] | 248 | sub.l %a1, %d5 | %d5 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] |
248 | sub.l %a0, %d6 | d6 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] | 249 | sub.l %a0, %d6 | %d6 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] |
249 | sub.l %d2, %d7 | d7 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] | 250 | sub.l %d2, %d7 | %d7 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] |
250 | 251 | ||
251 | movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[] | ||
252 | bra.s 2f | 252 | bra.s 2f |
253 | 253 | ||
254 | 1: | *decoded0 < 0 | 254 | 1: | *decoded0 < 0 |
255 | 255 | ||
256 | add.l %d3, %d2 | d2 = p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB] | 256 | add.l %d3, %d2 | %d2 = p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB] |
257 | add.l %d7, %a0 | a0 = p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1] | 257 | add.l %d7, %a0 | %a0 = p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1] |
258 | add.l %d6, %a1 | a1 = p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2] | 258 | add.l %d6, %a1 | %a1 = p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2] |
259 | add.l %d5, %a2 | a2 = p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3] | 259 | add.l %d5, %a2 | %a2 = p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3] |
260 | add.l %d4, %a3 | a3 = p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4] | 260 | add.l %d4, %a3 | %a3 = p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4] |
261 | 261 | ||
262 | movem.l %d2/%a0-%a3, (YcoeffsB,%a6) | Save p->YcoeffsB[] | 262 | movem.l %d2/%a0-%a3, (YcoeffsB,%a6) | Save p->YcoeffsB[] |
263 | 263 | ||
264 | movem.l (YcoeffsA,%a6), %d4-%d7 | d4 = p->YcoeffsA[0] | 264 | movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0] |
265 | | d5 = p->YcoeffsA[1] | 265 | | %d5 = p->YcoeffsA[1] |
266 | | d6 = p->YcoeffsA[2] | 266 | | %d6 = p->YcoeffsA[2] |
267 | | d7 = p->YcoeffsA[3] | 267 | | %d7 = p->YcoeffsA[3] |
268 | 268 | ||
269 | movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | d2 = p->buf[YADAPTCOEFFSA-3] | 269 | movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | %d2 = p->buf[YADAPTCOEFFSA-3] |
270 | | a0 = p->buf[YADAPTCOEFFSA-2] | 270 | | %a0 = p->buf[YADAPTCOEFFSA-2] |
271 | | a1 = p->buf[YADAPTCOEFFSA-1] | 271 | | %a1 = p->buf[YADAPTCOEFFSA-1] |
272 | | a2 = p->buf[YADAPTCOEFFSA] | 272 | | %a2 = p->buf[YADAPTCOEFFSA] |
273 | 273 | ||
274 | add.l %a2, %d4 | d4 = p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] | 274 | add.l %a2, %d4 | %d4 = p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] |
275 | add.l %a1, %d5 | d5 = p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1] | 275 | add.l %a1, %d5 | %d5 = p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1] |
276 | add.l %a0, %d6 | d6 = p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2] | 276 | add.l %a0, %d6 | %d6 = p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2] |
277 | add.l %d2, %d7 | d7 = p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3] | 277 | add.l %d2, %d7 | %d7 = p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3] |
278 | |||
279 | movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[] | ||
280 | 278 | ||
281 | 2: | 279 | 2: |
280 | movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[] | ||
281 | |||
282 | 3: | ||
282 | 283 | ||
283 | | ***** PREDICTOR X ***** | 284 | | ***** PREDICTOR X ***** |
284 | 285 | ||
@@ -293,6 +294,8 @@ predictor_decode_stereo: | |||
293 | sub.l %d3, %d2 | 294 | sub.l %d3, %d2 |
294 | neg.l %d2 | %d2 = %d3 -%d2 | 295 | neg.l %d2 | %d2 = %d3 -%d2 |
295 | 296 | ||
297 | move.l %d2, (XDELAYA-4,%a5) | p->buf[XDELAYA-1] = %d2 | ||
298 | |||
296 | movem.l (XcoeffsA,%a6), %a0-%a3 | %a0 = p->XcoeffsA[0] | 299 | movem.l (XcoeffsA,%a6), %a0-%a3 | %a0 = p->XcoeffsA[0] |
297 | | %a1 = p->XcoeffsA[1] | 300 | | %a1 = p->XcoeffsA[1] |
298 | | %a2 = p->XcoeffsA[2] | 301 | | %a2 = p->XcoeffsA[2] |
@@ -303,27 +306,26 @@ predictor_decode_stereo: | |||
303 | mac.l %d1, %a2, %acc0 | %acc0 += p->buf[XDELAYA-2] * p->XcoeffsA[2] | 306 | mac.l %d1, %a2, %acc0 | %acc0 += p->buf[XDELAYA-2] * p->XcoeffsA[2] |
304 | mac.l %d0, %a3, %acc0 | %acc0 += p->buf[XDELAYA-3] * p->XcoeffsA[3] | 307 | mac.l %d0, %a3, %acc0 | %acc0 += p->buf[XDELAYA-3] * p->XcoeffsA[3] |
305 | 308 | ||
306 | move.l %d2, (XDELAYA-4,%a5) | p->buf[XDELAYA-1] = %d2 | ||
307 | move.l %d3, (XDELAYA,%a5) | p->buf[XDELAYA] = %d3 | 309 | move.l %d3, (XDELAYA,%a5) | p->buf[XDELAYA] = %d3 |
308 | 310 | ||
309 | movclr.l %acc0, %d0 | ||
310 | |||
311 | tst.l %d2 | 311 | tst.l %d2 |
312 | beq.s 1f | 312 | beq.s 1f |
313 | spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 | 313 | spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 |
314 | extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 | 314 | extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 |
315 | or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 | 315 | or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 |
316 | 1: | %d2 = SIGN(%d2) | 316 | 1: | %d2 = SIGN(%d2) |
317 | move.l %d2, (XADAPTCOEFFSA-4,%a5) | p->buf[XADAPTCOEFFSA-1] = %d2 | ||
318 | |||
317 | tst.l %d3 | 319 | tst.l %d3 |
318 | beq.s 1f | 320 | beq.s 1f |
319 | spl.b %d3 | 321 | spl.b %d3 |
320 | extb.l %d3 | 322 | extb.l %d3 |
321 | or.l #1, %d3 | 323 | or.l #1, %d3 |
322 | 1: | %d3 = SIGN(%d3) | 324 | 1: | %d3 = SIGN(%d3) |
325 | move.l %d3, (XADAPTCOEFFSA,%a5) | p->buf[XADAPTCOEFFSA] = %d3 | ||
326 | |||
327 | movclr.l %acc0, %d0 | ||
323 | 328 | ||
324 | move.l %d2, (XADAPTCOEFFSA-4,%a5) | p->buf[XADAPTCOEFFSA-1] = r2 | ||
325 | move.l %d3, (XADAPTCOEFFSA,%a5) | p->buf[XADAPTCOEFFSA] = r3 | ||
326 | |||
327 | | NOTE: %d0 now contains predictionA - don't overwrite. | 329 | | NOTE: %d0 now contains predictionA - don't overwrite. |
328 | 330 | ||
329 | | Predictor X, Filter B | 331 | | Predictor X, Filter B |
@@ -345,11 +347,13 @@ predictor_decode_stereo: | |||
345 | sub.l %d3, %d7 | 347 | sub.l %d3, %d7 |
346 | neg.l %d7 | %d7 = %d3 - %d7 | 348 | neg.l %d7 | %d7 = %d3 - %d7 |
347 | 349 | ||
348 | movem.l (XcoeffsB,%a6), %d2/%a0-%a3 | %d2 = p->XcoeffsB[0] | 350 | move.l %d7, (XDELAYB-4,%a5) | p->buf[XDELAYB-1] = %d7 |
349 | | %a0 = p->XcoeffsB[1] | 351 | |
350 | | %a1 = p->XcoeffsB[2] | 352 | movem.l (XcoeffsB,%a6), %d2/%a0-%a3 | %d2 = p->XcoeffsB[0] |
351 | | %a2 = p->XcoeffsB[3] | 353 | | %a0 = p->XcoeffsB[1] |
352 | | %a3 = p->XcoeffsB[4] | 354 | | %a1 = p->XcoeffsB[2] |
355 | | %a2 = p->XcoeffsB[3] | ||
356 | | %a3 = p->XcoeffsB[4] | ||
353 | 357 | ||
354 | mac.l %d3, %d2, %acc0 | %acc0 = p->buf[XDELAYB] * p->XcoeffsB[0] | 358 | mac.l %d3, %d2, %acc0 | %acc0 = p->buf[XDELAYB] * p->XcoeffsB[0] |
355 | mac.l %d7, %a0, %acc0 | %acc0 += p->buf[XDELAYB-1] * p->XcoeffsB[1] | 359 | mac.l %d7, %a0, %acc0 | %acc0 += p->buf[XDELAYB-1] * p->XcoeffsB[1] |
@@ -357,27 +361,26 @@ predictor_decode_stereo: | |||
357 | mac.l %d5, %a2, %acc0 | %acc0 += p->buf[XDELAYB-3] * p->XcoeffsB[3] | 361 | mac.l %d5, %a2, %acc0 | %acc0 += p->buf[XDELAYB-3] * p->XcoeffsB[3] |
358 | mac.l %d4, %a3, %acc0 | %acc0 += p->buf[XDELAYB-4] * p->XcoeffsB[4] | 362 | mac.l %d4, %a3, %acc0 | %acc0 += p->buf[XDELAYB-4] * p->XcoeffsB[4] |
359 | 363 | ||
360 | move.l %d7, (XDELAYB-4,%a5) | p->buf[XDELAYB-1] = %d7 | ||
361 | move.l %d3, (XDELAYB, %a5) | p->buf[XDELAYB] = %d3 | 364 | move.l %d3, (XDELAYB, %a5) | p->buf[XDELAYB] = %d3 |
362 | 365 | ||
363 | movclr.l %acc0, %d1 | ||
364 | |||
365 | tst.l %d7 | 366 | tst.l %d7 |
366 | beq.s 1f | 367 | beq.s 1f |
367 | spl.b %d7 | 368 | spl.b %d7 |
368 | extb.l %d7 | 369 | extb.l %d7 |
369 | or.l #1, %d7 | 370 | or.l #1, %d7 |
370 | 1: | %d7 = SIGN(%d7) | 371 | 1: | %d7 = SIGN(%d7) |
372 | move.l %d7, (XADAPTCOEFFSB-4,%a5) | p->buf[XADAPTCOEFFSB-1] = %d7 | ||
373 | |||
371 | tst.l %d3 | 374 | tst.l %d3 |
372 | beq.s 1f | 375 | beq.s 1f |
373 | spl.b %d3 | 376 | spl.b %d3 |
374 | extb.l %d3 | 377 | extb.l %d3 |
375 | or.l #1, %d3 | 378 | or.l #1, %d3 |
376 | 1: | %d3 = SIGN(%d3) | 379 | 1: | %d3 = SIGN(%d3) |
377 | |||
378 | move.l %d7, (XADAPTCOEFFSB-4,%a5) | p->buf[XADAPTCOEFFSB-1] = %d7 | ||
379 | move.l %d3, (XADAPTCOEFFSB, %a5) | p->buf[XADAPTCOEFFSB] = %d3 | 380 | move.l %d3, (XADAPTCOEFFSB, %a5) | p->buf[XADAPTCOEFFSB] = %d3 |
380 | 381 | ||
382 | movclr.l %acc0, %d1 | ||
383 | |||
381 | | %d0 still contains predictionA | 384 | | %d0 still contains predictionA |
382 | | %d1 contains predictionB | 385 | | %d1 contains predictionB |
383 | 386 | ||
@@ -405,76 +408,76 @@ predictor_decode_stereo: | |||
405 | | %a4 contains decoded1 | 408 | | %a4 contains decoded1 |
406 | | %d5 contains *decoded1 | 409 | | %d5 contains *decoded1 |
407 | 410 | ||
408 | | %d2, %a0, %a1, %a2, %a31 contain p->XcoeffsB[0..4] | 411 | | %d2, %a0, %a1, %a2, %a3 contain p->XcoeffsB[0..4] |
409 | | %d7, %d3 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB] | 412 | | %d7, %d3 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB] |
410 | 413 | ||
411 | move.l %d1, (%a4)+ | *(decoded1++) = %d1 (p->XfilterA) | 414 | move.l %d1, (%a4)+ | *(decoded1++) = %d1 (p->XfilterA) |
412 | move.l %a4, (4,%sp) | save decoded1 | 415 | move.l %a4, (4,%sp) | save decoded1 |
413 | tst.l %d5 | 416 | tst.l %d5 |
414 | beq.s 2f | 417 | beq.s 3f |
415 | 418 | ||
416 | movem.l (XADAPTCOEFFSB-16,%a5), %d4-%d6 | d4 = p->buf[XADAPTCOEFFSB-4] | 419 | movem.l (XADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[XADAPTCOEFFSB-4] |
417 | | d5 = p->buf[XADAPTCOEFFSB-3] | 420 | | %d5 = p->buf[XADAPTCOEFFSB-3] |
418 | | d6 = p->buf[XADAPTCOEFFSB-2] | 421 | | %d6 = p->buf[XADAPTCOEFFSB-2] |
419 | 422 | ||
420 | bmi.s 1f | flags still valid here | 423 | bmi.s 1f | flags still valid here |
421 | 424 | ||
422 | | *decoded1 > 0 | 425 | | *decoded1 > 0 |
423 | 426 | ||
424 | sub.l %d3, %d2 | d2 = p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB] | 427 | sub.l %d3, %d2 | %d2 = p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB] |
425 | sub.l %d7, %a0 | a0 = p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1] | 428 | sub.l %d7, %a0 | %a0 = p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1] |
426 | sub.l %d6, %a1 | a1 = p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2] | 429 | sub.l %d6, %a1 | %a1 = p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2] |
427 | sub.l %d5, %a2 | a2 = p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3] | 430 | sub.l %d5, %a2 | %a2 = p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3] |
428 | sub.l %d4, %a3 | a3 = p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4] | 431 | sub.l %d4, %a3 | %a3 = p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4] |
429 | 432 | ||
430 | movem.l %d2/%a0-%a3, (XcoeffsB,%a6) | Save p->XcoeffsB[] | 433 | movem.l %d2/%a0-%a3, (XcoeffsB,%a6) | Save p->XcoeffsB[] |
431 | 434 | ||
432 | movem.l (XcoeffsA,%a6), %d4-%d7 | d4 = p->XcoeffsA[0] | 435 | movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0] |
433 | | d5 = p->XcoeffsA[1] | 436 | | %d5 = p->XcoeffsA[1] |
434 | | d6 = p->XcoeffsA[2] | 437 | | %d6 = p->XcoeffsA[2] |
435 | | d7 = p->XcoeffsA[3] | 438 | | %d7 = p->XcoeffsA[3] |
436 | 439 | ||
437 | movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | d2 = p->buf[XADAPTCOEFFSA-3] | 440 | movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | %d2 = p->buf[XADAPTCOEFFSA-3] |
438 | | a0 = p->buf[XADAPTCOEFFSA-2] | 441 | | %a0 = p->buf[XADAPTCOEFFSA-2] |
439 | | a1 = p->buf[XADAPTCOEFFSA-1] | 442 | | %a1 = p->buf[XADAPTCOEFFSA-1] |
440 | | a2 = p->buf[XADAPTCOEFFSA] | 443 | | %a2 = p->buf[XADAPTCOEFFSA] |
441 | 444 | ||
442 | sub.l %a2, %d4 | d4 = p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA] | 445 | sub.l %a2, %d4 | %d4 = p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA] |
443 | sub.l %a1, %d5 | d5 = p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1] | 446 | sub.l %a1, %d5 | %d5 = p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1] |
444 | sub.l %a0, %d6 | d6 = p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2] | 447 | sub.l %a0, %d6 | %d6 = p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2] |
445 | sub.l %d2, %d7 | d7 = p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3] | 448 | sub.l %d2, %d7 | %d7 = p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3] |
446 | 449 | ||
447 | movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[] | ||
448 | bra.s 2f | 450 | bra.s 2f |
449 | 451 | ||
450 | 1: | *decoded1 < 0 | 452 | 1: | *decoded1 < 0 |
451 | 453 | ||
452 | add.l %d3, %d2 | d2 = p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB] | 454 | add.l %d3, %d2 | %d2 = p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB] |
453 | add.l %d7, %a0 | a0 = p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1] | 455 | add.l %d7, %a0 | %a0 = p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1] |
454 | add.l %d6, %a1 | a1 = p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2] | 456 | add.l %d6, %a1 | %a1 = p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2] |
455 | add.l %d5, %a2 | a2 = p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3] | 457 | add.l %d5, %a2 | %a2 = p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3] |
456 | add.l %d4, %a3 | a3 = p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4] | 458 | add.l %d4, %a3 | %a3 = p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4] |
457 | 459 | ||
458 | movem.l %d2/%a0-%a3, (XcoeffsB,%a6) | Save p->XcoeffsB[] | 460 | movem.l %d2/%a0-%a3, (XcoeffsB,%a6) | Save p->XcoeffsB[] |
459 | 461 | ||
460 | movem.l (XcoeffsA,%a6), %d4-%d7 | d4 = p->XcoeffsA[0] | 462 | movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0] |
461 | | d5 = p->XcoeffsA[1] | 463 | | %d5 = p->XcoeffsA[1] |
462 | | d6 = p->XcoeffsA[2] | 464 | | %d6 = p->XcoeffsA[2] |
463 | | d7 = p->XcoeffsA[3] | 465 | | %d7 = p->XcoeffsA[3] |
464 | 466 | ||
465 | movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | d2 = p->buf[XADAPTCOEFFSA-3] | 467 | movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | %d2 = p->buf[XADAPTCOEFFSA-3] |
466 | | a0 = p->buf[XADAPTCOEFFSA-2] | 468 | | %a0 = p->buf[XADAPTCOEFFSA-2] |
467 | | a1 = p->buf[XADAPTCOEFFSA-1] | 469 | | %a1 = p->buf[XADAPTCOEFFSA-1] |
468 | | a2 = p->buf[XADAPTCOEFFSA] | 470 | | %a2 = p->buf[XADAPTCOEFFSA] |
469 | 471 | ||
470 | add.l %a2, %d4 | d4 = p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA] | 472 | add.l %a2, %d4 | %d4 = p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA] |
471 | add.l %a1, %d5 | d5 = p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1] | 473 | add.l %a1, %d5 | %d5 = p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1] |
472 | add.l %a0, %d6 | d6 = p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2] | 474 | add.l %a0, %d6 | %d6 = p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2] |
473 | add.l %d2, %d7 | d7 = p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3] | 475 | add.l %d2, %d7 | %d7 = p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3] |
474 | |||
475 | movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[] | ||
476 | 476 | ||
477 | 2: | 477 | 2: |
478 | movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[] | ||
479 | |||
480 | 3: | ||
478 | 481 | ||
479 | | ***** COMMON ***** | 482 | | ***** COMMON ***** |
480 | 483 | ||
@@ -484,10 +487,10 @@ predictor_decode_stereo: | |||
484 | 487 | ||
485 | cmp.l %a3, %a5 | 488 | cmp.l %a3, %a5 |
486 | bne.s .endofloop | 489 | bne.s .endofloop |
487 | 490 | ||
488 | | The history buffer is full, we need to do a memmove: | 491 | | The history buffer is full, we need to do a memmove: |
489 | 492 | ||
490 | lea.l (historybuffer,%a6), %a3 | 493 | lea.l (historybuffer,%a6), %a3 |
491 | 494 | ||
492 | | dest = %a3 (p->historybuffer) | 495 | | dest = %a3 (p->historybuffer) |
493 | | src = %a5 (p->buf) | 496 | | src = %a5 (p->buf) |
@@ -515,7 +518,7 @@ predictor_decode_stereo: | |||
515 | lea.l (40,%a3), %a3 | 518 | lea.l (40,%a3), %a3 |
516 | 519 | ||
517 | lea.l (historybuffer,%a6), %a5 | p->buf = &p->historybuffer[0] | 520 | lea.l (historybuffer,%a6), %a5 | p->buf = &p->historybuffer[0] |
518 | 521 | ||
519 | .endofloop: | 522 | .endofloop: |
520 | subq.l #1, (8,%sp) | decrease loop count | 523 | subq.l #1, (8,%sp) | decrease loop count |
521 | bne.w .loop | 524 | bne.w .loop |