summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2007-10-19 22:57:19 +0000
committerJens Arnold <amiconn@rockbox.org>2007-10-19 22:57:19 +0000
commit87f5359d604a3d51526965a97c74896f392ed444 (patch)
treeb24323088f7b2b7e470d5b61feca19794f7b22a8
parent152f405cce90a285b9c4f1bcaba27dbccc51fdf2 (diff)
downloadrockbox-87f5359d604a3d51526965a97c74896f392ed444.tar.gz
rockbox-87f5359d604a3d51526965a97c74896f392ed444.zip
Shuffle some instructions around for that extra percent of performance. Fix a bunch of comments.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15216 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/demac/libdemac/predictor-cf.S243
1 files changed, 123 insertions, 120 deletions
diff --git a/apps/codecs/demac/libdemac/predictor-cf.S b/apps/codecs/demac/libdemac/predictor-cf.S
index 19873420c3..3b9489e791 100644
--- a/apps/codecs/demac/libdemac/predictor-cf.S
+++ b/apps/codecs/demac/libdemac/predictor-cf.S
@@ -97,6 +97,8 @@ predictor_decode_stereo:
97 sub.l %d3, %d2 97 sub.l %d3, %d2
98 neg.l %d2 | %d2 = %d3 - %d2 98 neg.l %d2 | %d2 = %d3 - %d2
99 99
100 move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2
101
100 movem.l (YcoeffsA,%a6), %a0-%a3 | %a0 = p->YcoeffsA[0] 102 movem.l (YcoeffsA,%a6), %a0-%a3 | %a0 = p->YcoeffsA[0]
101 | %a1 = p->YcoeffsA[1] 103 | %a1 = p->YcoeffsA[1]
102 | %a2 = p->YcoeffsA[2] 104 | %a2 = p->YcoeffsA[2]
@@ -107,10 +109,7 @@ predictor_decode_stereo:
107 mac.l %d1, %a2, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] 109 mac.l %d1, %a2, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
108 mac.l %d0, %a3, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] 110 mac.l %d0, %a3, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
109 111
110 move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2
111 move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3 112 move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3
112
113 movclr.l %acc0, %d0
114 113
115 tst.l %d2 114 tst.l %d2
116 beq.s 1f 115 beq.s 1f
@@ -118,16 +117,18 @@ predictor_decode_stereo:
118 extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 117 extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
119 or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 118 or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
1201: | %d2 = SIGN(%d2) 1191: | %d2 = SIGN(%d2)
120 move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2
121
121 tst.l %d3 122 tst.l %d3
122 beq.s 1f 123 beq.s 1f
123 spl.b %d3 124 spl.b %d3
124 extb.l %d3 125 extb.l %d3
125 or.l #1, %d3 126 or.l #1, %d3
1261: | %d3 = SIGN(%d3) 1271: | %d3 = SIGN(%d3)
127
128 move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2
129 move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3 128 move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3
130 129
130 movclr.l %acc0, %d0
131
131 | NOTE: %d0 now contains predictionA - don't overwrite. 132 | NOTE: %d0 now contains predictionA - don't overwrite.
132 133
133 | Predictor Y, Filter B 134 | Predictor Y, Filter B
@@ -149,11 +150,13 @@ predictor_decode_stereo:
149 sub.l %d3, %d7 150 sub.l %d3, %d7
150 neg.l %d7 | %d7 = %d3 - %d7 151 neg.l %d7 | %d7 = %d3 - %d7
151 152
152 movem.l (YcoeffsB,%a6), %d2/%a0-%a3 | %d2 = p->YcoeffsB[0] 153 move.l %d7, (YDELAYB-4,%a5) | p->buf[YDELAYB-1] = %d7
153 | %a0 = p->YcoeffsB[1] 154
154 | %a1 = p->YcoeffsB[2] 155 movem.l (YcoeffsB,%a6), %d2/%a0-%a3 | %d2 = p->YcoeffsB[0]
155 | %a2 = p->YcoeffsB[3] 156 | %a0 = p->YcoeffsB[1]
156 | %a3 = p->YcoeffsB[4] 157 | %a1 = p->YcoeffsB[2]
158 | %a2 = p->YcoeffsB[3]
159 | %a3 = p->YcoeffsB[4]
157 160
158 mac.l %d3, %d2, %acc0 | %acc0 = p->buf[YDELAYB] * p->YcoeffsB[0] 161 mac.l %d3, %d2, %acc0 | %acc0 = p->buf[YDELAYB] * p->YcoeffsB[0]
159 mac.l %d7, %a0, %acc0 | %acc0 += p->buf[YDELAYB-1] * p->YcoeffsB[1] 162 mac.l %d7, %a0, %acc0 | %acc0 += p->buf[YDELAYB-1] * p->YcoeffsB[1]
@@ -161,26 +164,24 @@ predictor_decode_stereo:
161 mac.l %d5, %a2, %acc0 | %acc0 += p->buf[YDELAYB-3] * p->YcoeffsB[3] 164 mac.l %d5, %a2, %acc0 | %acc0 += p->buf[YDELAYB-3] * p->YcoeffsB[3]
162 mac.l %d4, %a3, %acc0 | %acc0 += p->buf[YDELAYB-4] * p->YcoeffsB[4] 165 mac.l %d4, %a3, %acc0 | %acc0 += p->buf[YDELAYB-4] * p->YcoeffsB[4]
163 166
164 move.l %d7, (YDELAYB-4,%a5) | p->buf[YDELAYB-1] = %d7
165 move.l %d3, (YDELAYB, %a5) | p->buf[YDELAYB] = %d3 167 move.l %d3, (YDELAYB, %a5) | p->buf[YDELAYB] = %d3
166 168
167 movclr.l %acc0, %d1
168
169 tst.l %d7 169 tst.l %d7
170 beq.s 1f 170 beq.s 1f
171 spl.b %d7 171 spl.b %d7
172 extb.l %d7 172 extb.l %d7
173 or.l #1, %d7 173 or.l #1, %d7
1741: | %d7 = SIGN(%d7) 1741: | %d7 = SIGN(%d7)
175 move.l %d7, (YADAPTCOEFFSB-4,%a5) | p->buf[YADAPTCOEFFSB-1] = %d7
175 tst.l %d3 176 tst.l %d3
176 beq.s 1f 177 beq.s 1f
177 spl.b %d3 178 spl.b %d3
178 extb.l %d3 179 extb.l %d3
179 or.l #1, %d3 180 or.l #1, %d3
1801: | %d3 = SIGN(%d3) 1811: | %d3 = SIGN(%d3)
181
182 move.l %d7, (YADAPTCOEFFSB-4,%a5) | p->buf[YADAPTCOEFFSB-1] = %d7
183 move.l %d3, (YADAPTCOEFFSB, %a5) | p->buf[YADAPTCOEFFSB] = %d3 182 move.l %d3, (YADAPTCOEFFSB, %a5) | p->buf[YADAPTCOEFFSB] = %d3
183
184 movclr.l %acc0, %d1
184 185
185 | %d0 still contains predictionA 186 | %d0 still contains predictionA
186 | %d1 contains predictionB 187 | %d1 contains predictionB
@@ -215,70 +216,70 @@ predictor_decode_stereo:
215 move.l %d1, (%a4)+ | *(decoded0++) = %d1 (p->YfilterA) 216 move.l %d1, (%a4)+ | *(decoded0++) = %d1 (p->YfilterA)
216 move.l %a4, (%sp) | save decoded0 217 move.l %a4, (%sp) | save decoded0
217 tst.l %d5 218 tst.l %d5
218 beq.s 2f 219 beq.s 3f
219 220
220 movem.l (YADAPTCOEFFSB-16,%a5), %d4-%d6 | d4 = p->buf[YADAPTCOEFFSB-4] 221 movem.l (YADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[YADAPTCOEFFSB-4]
221 | d5 = p->buf[YADAPTCOEFFSB-3] 222 | %d5 = p->buf[YADAPTCOEFFSB-3]
222 | d6 = p->buf[YADAPTCOEFFSB-2] 223 | %d6 = p->buf[YADAPTCOEFFSB-2]
223 224
224 bmi.s 1f | flags still valid here 225 bmi.s 1f | flags still valid here
225 226
226 | *decoded0 > 0 227 | *decoded0 > 0
227 228
228 sub.l %d3, %d2 | d2 = p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB] 229 sub.l %d3, %d2 | %d2 = p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB]
229 sub.l %d7, %a0 | a0 = p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1] 230 sub.l %d7, %a0 | %a0 = p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1]
230 sub.l %d6, %a1 | a1 = p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2] 231 sub.l %d6, %a1 | %a1 = p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2]
231 sub.l %d5, %a2 | a2 = p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3] 232 sub.l %d5, %a2 | %a2 = p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3]
232 sub.l %d4, %a3 | a3 = p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4] 233 sub.l %d4, %a3 | %a3 = p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4]
233 234
234 movem.l %d2/%a0-%a3, (YcoeffsB,%a6) | Save p->YcoeffsB[] 235 movem.l %d2/%a0-%a3, (YcoeffsB,%a6) | Save p->YcoeffsB[]
235 236
236 movem.l (YcoeffsA,%a6), %d4-%d7 | d4 = p->YcoeffsA[0] 237 movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
237 | d5 = p->YcoeffsA[1] 238 | %d5 = p->YcoeffsA[1]
238 | d6 = p->YcoeffsA[2] 239 | %d6 = p->YcoeffsA[2]
239 | d7 = p->YcoeffsA[3] 240 | %d7 = p->YcoeffsA[3]
240 241
241 movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | d2 = p->buf[YADAPTCOEFFSA-3] 242 movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | %d2 = p->buf[YADAPTCOEFFSA-3]
242 | a0 = p->buf[YADAPTCOEFFSA-2] 243 | %a0 = p->buf[YADAPTCOEFFSA-2]
243 | a1 = p->buf[YADAPTCOEFFSA-1] 244 | %a1 = p->buf[YADAPTCOEFFSA-1]
244 | a2 = p->buf[YADAPTCOEFFSA] 245 | %a2 = p->buf[YADAPTCOEFFSA]
245 246
246 sub.l %a2, %d4 | d4 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] 247 sub.l %a2, %d4 | %d4 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
247 sub.l %a1, %d5 | d5 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] 248 sub.l %a1, %d5 | %d5 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
248 sub.l %a0, %d6 | d6 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] 249 sub.l %a0, %d6 | %d6 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
249 sub.l %d2, %d7 | d7 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] 250 sub.l %d2, %d7 | %d7 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
250 251
251 movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[]
252 bra.s 2f 252 bra.s 2f
253 253
2541: | *decoded0 < 0 2541: | *decoded0 < 0
255 255
256 add.l %d3, %d2 | d2 = p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB] 256 add.l %d3, %d2 | %d2 = p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB]
257 add.l %d7, %a0 | a0 = p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1] 257 add.l %d7, %a0 | %a0 = p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1]
258 add.l %d6, %a1 | a1 = p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2] 258 add.l %d6, %a1 | %a1 = p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2]
259 add.l %d5, %a2 | a2 = p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3] 259 add.l %d5, %a2 | %a2 = p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3]
260 add.l %d4, %a3 | a3 = p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4] 260 add.l %d4, %a3 | %a3 = p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4]
261 261
262 movem.l %d2/%a0-%a3, (YcoeffsB,%a6) | Save p->YcoeffsB[] 262 movem.l %d2/%a0-%a3, (YcoeffsB,%a6) | Save p->YcoeffsB[]
263 263
264 movem.l (YcoeffsA,%a6), %d4-%d7 | d4 = p->YcoeffsA[0] 264 movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
265 | d5 = p->YcoeffsA[1] 265 | %d5 = p->YcoeffsA[1]
266 | d6 = p->YcoeffsA[2] 266 | %d6 = p->YcoeffsA[2]
267 | d7 = p->YcoeffsA[3] 267 | %d7 = p->YcoeffsA[3]
268 268
269 movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | d2 = p->buf[YADAPTCOEFFSA-3] 269 movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | %d2 = p->buf[YADAPTCOEFFSA-3]
270 | a0 = p->buf[YADAPTCOEFFSA-2] 270 | %a0 = p->buf[YADAPTCOEFFSA-2]
271 | a1 = p->buf[YADAPTCOEFFSA-1] 271 | %a1 = p->buf[YADAPTCOEFFSA-1]
272 | a2 = p->buf[YADAPTCOEFFSA] 272 | %a2 = p->buf[YADAPTCOEFFSA]
273 273
274 add.l %a2, %d4 | d4 = p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] 274 add.l %a2, %d4 | %d4 = p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
275 add.l %a1, %d5 | d5 = p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1] 275 add.l %a1, %d5 | %d5 = p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
276 add.l %a0, %d6 | d6 = p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2] 276 add.l %a0, %d6 | %d6 = p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
277 add.l %d2, %d7 | d7 = p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3] 277 add.l %d2, %d7 | %d7 = p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
278
279 movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[]
280 278
2812: 2792:
280 movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[]
281
2823:
282 283
283 | ***** PREDICTOR X ***** 284 | ***** PREDICTOR X *****
284 285
@@ -293,6 +294,8 @@ predictor_decode_stereo:
293 sub.l %d3, %d2 294 sub.l %d3, %d2
294 neg.l %d2 | %d2 = %d3 -%d2 295 neg.l %d2 | %d2 = %d3 -%d2
295 296
297 move.l %d2, (XDELAYA-4,%a5) | p->buf[XDELAYA-1] = %d2
298
296 movem.l (XcoeffsA,%a6), %a0-%a3 | %a0 = p->XcoeffsA[0] 299 movem.l (XcoeffsA,%a6), %a0-%a3 | %a0 = p->XcoeffsA[0]
297 | %a1 = p->XcoeffsA[1] 300 | %a1 = p->XcoeffsA[1]
298 | %a2 = p->XcoeffsA[2] 301 | %a2 = p->XcoeffsA[2]
@@ -303,27 +306,26 @@ predictor_decode_stereo:
303 mac.l %d1, %a2, %acc0 | %acc0 += p->buf[XDELAYA-2] * p->XcoeffsA[2] 306 mac.l %d1, %a2, %acc0 | %acc0 += p->buf[XDELAYA-2] * p->XcoeffsA[2]
304 mac.l %d0, %a3, %acc0 | %acc0 += p->buf[XDELAYA-3] * p->XcoeffsA[3] 307 mac.l %d0, %a3, %acc0 | %acc0 += p->buf[XDELAYA-3] * p->XcoeffsA[3]
305 308
306 move.l %d2, (XDELAYA-4,%a5) | p->buf[XDELAYA-1] = %d2
307 move.l %d3, (XDELAYA,%a5) | p->buf[XDELAYA] = %d3 309 move.l %d3, (XDELAYA,%a5) | p->buf[XDELAYA] = %d3
308 310
309 movclr.l %acc0, %d0
310
311 tst.l %d2 311 tst.l %d2
312 beq.s 1f 312 beq.s 1f
313 spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 313 spl.b %d2 | pos: 0x??????ff, neg: 0x??????00
314 extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 314 extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
315 or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 315 or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
3161: | %d2 = SIGN(%d2) 3161: | %d2 = SIGN(%d2)
317 move.l %d2, (XADAPTCOEFFSA-4,%a5) | p->buf[XADAPTCOEFFSA-1] = %d2
318
317 tst.l %d3 319 tst.l %d3
318 beq.s 1f 320 beq.s 1f
319 spl.b %d3 321 spl.b %d3
320 extb.l %d3 322 extb.l %d3
321 or.l #1, %d3 323 or.l #1, %d3
3221: | %d3 = SIGN(%d3) 3241: | %d3 = SIGN(%d3)
325 move.l %d3, (XADAPTCOEFFSA,%a5) | p->buf[XADAPTCOEFFSA] = %d3
326
327 movclr.l %acc0, %d0
323 328
324 move.l %d2, (XADAPTCOEFFSA-4,%a5) | p->buf[XADAPTCOEFFSA-1] = r2
325 move.l %d3, (XADAPTCOEFFSA,%a5) | p->buf[XADAPTCOEFFSA] = r3
326
327 | NOTE: %d0 now contains predictionA - don't overwrite. 329 | NOTE: %d0 now contains predictionA - don't overwrite.
328 330
329 | Predictor X, Filter B 331 | Predictor X, Filter B
@@ -345,11 +347,13 @@ predictor_decode_stereo:
345 sub.l %d3, %d7 347 sub.l %d3, %d7
346 neg.l %d7 | %d7 = %d3 - %d7 348 neg.l %d7 | %d7 = %d3 - %d7
347 349
348 movem.l (XcoeffsB,%a6), %d2/%a0-%a3 | %d2 = p->XcoeffsB[0] 350 move.l %d7, (XDELAYB-4,%a5) | p->buf[XDELAYB-1] = %d7
349 | %a0 = p->XcoeffsB[1] 351
350 | %a1 = p->XcoeffsB[2] 352 movem.l (XcoeffsB,%a6), %d2/%a0-%a3 | %d2 = p->XcoeffsB[0]
351 | %a2 = p->XcoeffsB[3] 353 | %a0 = p->XcoeffsB[1]
352 | %a3 = p->XcoeffsB[4] 354 | %a1 = p->XcoeffsB[2]
355 | %a2 = p->XcoeffsB[3]
356 | %a3 = p->XcoeffsB[4]
353 357
354 mac.l %d3, %d2, %acc0 | %acc0 = p->buf[XDELAYB] * p->XcoeffsB[0] 358 mac.l %d3, %d2, %acc0 | %acc0 = p->buf[XDELAYB] * p->XcoeffsB[0]
355 mac.l %d7, %a0, %acc0 | %acc0 += p->buf[XDELAYB-1] * p->XcoeffsB[1] 359 mac.l %d7, %a0, %acc0 | %acc0 += p->buf[XDELAYB-1] * p->XcoeffsB[1]
@@ -357,27 +361,26 @@ predictor_decode_stereo:
357 mac.l %d5, %a2, %acc0 | %acc0 += p->buf[XDELAYB-3] * p->XcoeffsB[3] 361 mac.l %d5, %a2, %acc0 | %acc0 += p->buf[XDELAYB-3] * p->XcoeffsB[3]
358 mac.l %d4, %a3, %acc0 | %acc0 += p->buf[XDELAYB-4] * p->XcoeffsB[4] 362 mac.l %d4, %a3, %acc0 | %acc0 += p->buf[XDELAYB-4] * p->XcoeffsB[4]
359 363
360 move.l %d7, (XDELAYB-4,%a5) | p->buf[XDELAYB-1] = %d7
361 move.l %d3, (XDELAYB, %a5) | p->buf[XDELAYB] = %d3 364 move.l %d3, (XDELAYB, %a5) | p->buf[XDELAYB] = %d3
362 365
363 movclr.l %acc0, %d1
364
365 tst.l %d7 366 tst.l %d7
366 beq.s 1f 367 beq.s 1f
367 spl.b %d7 368 spl.b %d7
368 extb.l %d7 369 extb.l %d7
369 or.l #1, %d7 370 or.l #1, %d7
3701: | %d7 = SIGN(%d7) 3711: | %d7 = SIGN(%d7)
372 move.l %d7, (XADAPTCOEFFSB-4,%a5) | p->buf[XADAPTCOEFFSB-1] = %d7
373
371 tst.l %d3 374 tst.l %d3
372 beq.s 1f 375 beq.s 1f
373 spl.b %d3 376 spl.b %d3
374 extb.l %d3 377 extb.l %d3
375 or.l #1, %d3 378 or.l #1, %d3
3761: | %d3 = SIGN(%d3) 3791: | %d3 = SIGN(%d3)
377
378 move.l %d7, (XADAPTCOEFFSB-4,%a5) | p->buf[XADAPTCOEFFSB-1] = %d7
379 move.l %d3, (XADAPTCOEFFSB, %a5) | p->buf[XADAPTCOEFFSB] = %d3 380 move.l %d3, (XADAPTCOEFFSB, %a5) | p->buf[XADAPTCOEFFSB] = %d3
380 381
382 movclr.l %acc0, %d1
383
381 | %d0 still contains predictionA 384 | %d0 still contains predictionA
382 | %d1 contains predictionB 385 | %d1 contains predictionB
383 386
@@ -405,76 +408,76 @@ predictor_decode_stereo:
405 | %a4 contains decoded1 408 | %a4 contains decoded1
406 | %d5 contains *decoded1 409 | %d5 contains *decoded1
407 410
408 | %d2, %a0, %a1, %a2, %a31 contain p->XcoeffsB[0..4] 411 | %d2, %a0, %a1, %a2, %a3 contain p->XcoeffsB[0..4]
409 | %d7, %d3 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB] 412 | %d7, %d3 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB]
410 413
411 move.l %d1, (%a4)+ | *(decoded1++) = %d1 (p->XfilterA) 414 move.l %d1, (%a4)+ | *(decoded1++) = %d1 (p->XfilterA)
412 move.l %a4, (4,%sp) | save decoded1 415 move.l %a4, (4,%sp) | save decoded1
413 tst.l %d5 416 tst.l %d5
414 beq.s 2f 417 beq.s 3f
415 418
416 movem.l (XADAPTCOEFFSB-16,%a5), %d4-%d6 | d4 = p->buf[XADAPTCOEFFSB-4] 419 movem.l (XADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[XADAPTCOEFFSB-4]
417 | d5 = p->buf[XADAPTCOEFFSB-3] 420 | %d5 = p->buf[XADAPTCOEFFSB-3]
418 | d6 = p->buf[XADAPTCOEFFSB-2] 421 | %d6 = p->buf[XADAPTCOEFFSB-2]
419 422
420 bmi.s 1f | flags still valid here 423 bmi.s 1f | flags still valid here
421 424
422 | *decoded1 > 0 425 | *decoded1 > 0
423 426
424 sub.l %d3, %d2 | d2 = p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB] 427 sub.l %d3, %d2 | %d2 = p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB]
425 sub.l %d7, %a0 | a0 = p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1] 428 sub.l %d7, %a0 | %a0 = p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1]
426 sub.l %d6, %a1 | a1 = p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2] 429 sub.l %d6, %a1 | %a1 = p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2]
427 sub.l %d5, %a2 | a2 = p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3] 430 sub.l %d5, %a2 | %a2 = p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3]
428 sub.l %d4, %a3 | a3 = p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4] 431 sub.l %d4, %a3 | %a3 = p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4]
429 432
430 movem.l %d2/%a0-%a3, (XcoeffsB,%a6) | Save p->XcoeffsB[] 433 movem.l %d2/%a0-%a3, (XcoeffsB,%a6) | Save p->XcoeffsB[]
431 434
432 movem.l (XcoeffsA,%a6), %d4-%d7 | d4 = p->XcoeffsA[0] 435 movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
433 | d5 = p->XcoeffsA[1] 436 | %d5 = p->XcoeffsA[1]
434 | d6 = p->XcoeffsA[2] 437 | %d6 = p->XcoeffsA[2]
435 | d7 = p->XcoeffsA[3] 438 | %d7 = p->XcoeffsA[3]
436 439
437 movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | d2 = p->buf[XADAPTCOEFFSA-3] 440 movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | %d2 = p->buf[XADAPTCOEFFSA-3]
438 | a0 = p->buf[XADAPTCOEFFSA-2] 441 | %a0 = p->buf[XADAPTCOEFFSA-2]
439 | a1 = p->buf[XADAPTCOEFFSA-1] 442 | %a1 = p->buf[XADAPTCOEFFSA-1]
440 | a2 = p->buf[XADAPTCOEFFSA] 443 | %a2 = p->buf[XADAPTCOEFFSA]
441 444
442 sub.l %a2, %d4 | d4 = p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA] 445 sub.l %a2, %d4 | %d4 = p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA]
443 sub.l %a1, %d5 | d5 = p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1] 446 sub.l %a1, %d5 | %d5 = p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1]
444 sub.l %a0, %d6 | d6 = p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2] 447 sub.l %a0, %d6 | %d6 = p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2]
445 sub.l %d2, %d7 | d7 = p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3] 448 sub.l %d2, %d7 | %d7 = p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3]
446 449
447 movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[]
448 bra.s 2f 450 bra.s 2f
449 451
4501: | *decoded1 < 0 4521: | *decoded1 < 0
451 453
452 add.l %d3, %d2 | d2 = p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB] 454 add.l %d3, %d2 | %d2 = p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB]
453 add.l %d7, %a0 | a0 = p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1] 455 add.l %d7, %a0 | %a0 = p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1]
454 add.l %d6, %a1 | a1 = p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2] 456 add.l %d6, %a1 | %a1 = p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2]
455 add.l %d5, %a2 | a2 = p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3] 457 add.l %d5, %a2 | %a2 = p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3]
456 add.l %d4, %a3 | a3 = p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4] 458 add.l %d4, %a3 | %a3 = p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4]
457 459
458 movem.l %d2/%a0-%a3, (XcoeffsB,%a6) | Save p->XcoeffsB[] 460 movem.l %d2/%a0-%a3, (XcoeffsB,%a6) | Save p->XcoeffsB[]
459 461
460 movem.l (XcoeffsA,%a6), %d4-%d7 | d4 = p->XcoeffsA[0] 462 movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
461 | d5 = p->XcoeffsA[1] 463 | %d5 = p->XcoeffsA[1]
462 | d6 = p->XcoeffsA[2] 464 | %d6 = p->XcoeffsA[2]
463 | d7 = p->XcoeffsA[3] 465 | %d7 = p->XcoeffsA[3]
464 466
465 movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | d2 = p->buf[XADAPTCOEFFSA-3] 467 movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | %d2 = p->buf[XADAPTCOEFFSA-3]
466 | a0 = p->buf[XADAPTCOEFFSA-2] 468 | %a0 = p->buf[XADAPTCOEFFSA-2]
467 | a1 = p->buf[XADAPTCOEFFSA-1] 469 | %a1 = p->buf[XADAPTCOEFFSA-1]
468 | a2 = p->buf[XADAPTCOEFFSA] 470 | %a2 = p->buf[XADAPTCOEFFSA]
469 471
470 add.l %a2, %d4 | d4 = p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA] 472 add.l %a2, %d4 | %d4 = p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA]
471 add.l %a1, %d5 | d5 = p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1] 473 add.l %a1, %d5 | %d5 = p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1]
472 add.l %a0, %d6 | d6 = p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2] 474 add.l %a0, %d6 | %d6 = p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2]
473 add.l %d2, %d7 | d7 = p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3] 475 add.l %d2, %d7 | %d7 = p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3]
474
475 movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[]
476 476
4772: 4772:
478 movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[]
479
4803:
478 481
479 | ***** COMMON ***** 482 | ***** COMMON *****
480 483
@@ -484,10 +487,10 @@ predictor_decode_stereo:
484 487
485 cmp.l %a3, %a5 488 cmp.l %a3, %a5
486 bne.s .endofloop 489 bne.s .endofloop
487 490
488 | The history buffer is full, we need to do a memmove: 491 | The history buffer is full, we need to do a memmove:
489 492
490 lea.l (historybuffer,%a6), %a3 493 lea.l (historybuffer,%a6), %a3
491 494
492 | dest = %a3 (p->historybuffer) 495 | dest = %a3 (p->historybuffer)
493 | src = %a5 (p->buf) 496 | src = %a5 (p->buf)
@@ -515,7 +518,7 @@ predictor_decode_stereo:
515 lea.l (40,%a3), %a3 518 lea.l (40,%a3), %a3
516 519
517 lea.l (historybuffer,%a6), %a5 | p->buf = &p->historybuffer[0] 520 lea.l (historybuffer,%a6), %a5 | p->buf = &p->historybuffer[0]
518 521
519.endofloop: 522.endofloop:
520 subq.l #1, (8,%sp) | decrease loop count 523 subq.l #1, (8,%sp) | decrease loop count
521 bne.w .loop 524 bne.w .loop