summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/demac/libdemac/predictor-cf.S
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/codecs/demac/libdemac/predictor-cf.S')
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/predictor-cf.S660
1 files changed, 660 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/demac/libdemac/predictor-cf.S b/lib/rbcodec/codecs/demac/libdemac/predictor-cf.S
new file mode 100644
index 0000000000..fc1d901a59
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/predictor-cf.S
@@ -0,0 +1,660 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9Coldfire predictor copyright (C) 2007 Jens Arnold
10
11This program is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24
25*/
26
27#include "demac_config.h"
28
29/* NOTE: The following need to be kept in sync with parser.h */
30
31#define YDELAYA 200
32#define YDELAYB 168
33#define XDELAYA 136
34#define XDELAYB 104
35#define YADAPTCOEFFSA 72
36#define XADAPTCOEFFSA 56
37#define YADAPTCOEFFSB 40
38#define XADAPTCOEFFSB 20
39
40/* struct predictor_t members: */
41#define buf 0 /* int32_t* buf */
42
43#define YlastA 4 /* int32_t YlastA; */
44#define XlastA 8 /* int32_t XlastA; */
45
46#define YfilterB 12 /* int32_t YfilterB; */
47#define XfilterA 16 /* int32_t XfilterA; */
48
49#define XfilterB 20 /* int32_t XfilterB; */
50#define YfilterA 24 /* int32_t YfilterA; */
51
52#define YcoeffsA 28 /* int32_t YcoeffsA[4]; */
53#define XcoeffsA 44 /* int32_t XcoeffsA[4]; */
54#define YcoeffsB 60 /* int32_t YcoeffsB[5]; */
55#define XcoeffsB 80 /* int32_t XcoeffsB[5]; */
56
57#define historybuffer 100 /* int32_t historybuffer[] */
58
59
60 .text
61
62 .align 2
63
64 .global predictor_decode_stereo
65 .type predictor_decode_stereo,@function
66
67| void predictor_decode_stereo(struct predictor_t* p,
68| int32_t* decoded0,
69| int32_t* decoded1,
70| int count)
71
72predictor_decode_stereo:
73 lea.l (-12*4,%sp), %sp
74 movem.l %d2-%d7/%a2-%a6, (4,%sp)
75
76 movem.l (12*4+8,%sp), %a3-%a5 | %a3 = decoded0
77 | %a4 = decoded1
78 move.l %a5, (%sp) | (%sp) = count
79
80 move.l #0, %macsr | signed integer mode
81 move.l (12*4+4,%sp), %a6 | %a6 = p
82 move.l (%a6), %a5 | %a5 = p->buf
83
84.loop:
85
86 | ***** PREDICTOR Y *****
87
88 | Predictor Y, Filter A
89
90 move.l (YlastA,%a6), %d3 | %d3 = p->YlastA
91
92 movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3]
93 | %d1 = p->buf[YDELAYA-2]
94 | %d2 = p->buf[YDELAYA-1]
95
96 move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3
97
98 sub.l %d3, %d2
99 neg.l %d2 | %d2 = %d3 - %d2
100
101 move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2
102
103 movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
104 | %d5 = p->YcoeffsA[1]
105 | %d6 = p->YcoeffsA[2]
106 | %d7 = p->YcoeffsA[3]
107
108 mac.l %d3, %d4, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0]
109 mac.l %d2, %d5, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
110 mac.l %d1, %d6, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
111 mac.l %d0, %d7, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
112
113 tst.l %d2
114 beq.s 1f
115 spl.b %d2 | pos: 0x??????ff, neg: 0x??????00
116 extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
117 or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
1181: | %d2 = SIGN(%d2)
119 move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2
120
121 tst.l %d3
122 beq.s 1f
123 spl.b %d3
124 extb.l %d3
125 or.l #1, %d3
1261: | %d3 = SIGN(%d3)
127 move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3
128
129 | Predictor Y, Filter B
130
131 movem.l (YfilterB,%a6), %d2-%d3 | %d2 = p->YfilterB
132 | %d3 = p->XfilterA
133 move.l %d3, (YfilterB,%a6) | p->YfilterB = %d3
134
135 move.l %d2, %d1 | %d1 = %d2
136 lsl.l #5, %d2 | %d2 = %d2 * 32
137 sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2)
138 asr.l #5, %d2 | %d2 >>= 5
139 sub.l %d2, %d3 | %d3 -= %d2
140
141 movem.l (YDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[YDELAYB-4]
142 | %d5 = p->buf[YDELAYB-3]
143 | %d6 = p->buf[YDELAYB-2]
144 | %d7 = p->buf[YDELAYB-1]
145 sub.l %d3, %d7
146 neg.l %d7 | %d7 = %d3 - %d7
147
148 move.l %d7, (YDELAYB-4,%a5) | p->buf[YDELAYB-1] = %d7
149
150 movem.l (YcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1 = p->YcoeffsB[0]
151 | %d2 = p->YcoeffsB[1]
152 | %a0 = p->YcoeffsB[2]
153 | %a1 = p->YcoeffsB[3]
154 | %a2 = p->YcoeffsB[4]
155
156 mac.l %d3, %d1, %acc1 | %acc1 = p->buf[YDELAYB] * p->YcoeffsB[0]
157 mac.l %d7, %d2, %acc1 | %acc1 += p->buf[YDELAYB-1] * p->YcoeffsB[1]
158 mac.l %d6, %a0, %acc1 | %acc1 += p->buf[YDELAYB-2] * p->YcoeffsB[2]
159 mac.l %d5, %a1, %acc1 | %acc1 += p->buf[YDELAYB-3] * p->YcoeffsB[3]
160 mac.l %d4, %a2, %acc1 | %acc1 += p->buf[YDELAYB-4] * p->YcoeffsB[4]
161
162 move.l %d3, (YDELAYB, %a5) | p->buf[YDELAYB] = %d3
163
164 tst.l %d7
165 beq.s 1f
166 spl.b %d7
167 extb.l %d7
168 or.l #1, %d7
1691: | %d7 = SIGN(%d7)
170 move.l %d7, (YADAPTCOEFFSB-4,%a5) | p->buf[YADAPTCOEFFSB-1] = %d7
171 tst.l %d3
172 beq.s 1f
173 spl.b %d3
174 extb.l %d3
175 or.l #1, %d3
1761: | %d3 = SIGN(%d3)
177 move.l %d3, (YADAPTCOEFFSB, %a5) | p->buf[YADAPTCOEFFSB] = %d3
178
179 | %d1, %d2, %a0, %a1, %a2 contain p->YcoeffsB[0..4]
180 | %d7, %d3 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB]
181
182 move.l (%a3), %d0 | %d0 = *decoded0
183 beq.s 3f
184
185 movem.l (YADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[YADAPTCOEFFSB-4]
186 | %d5 = p->buf[YADAPTCOEFFSB-3]
187 | %d6 = p->buf[YADAPTCOEFFSB-2]
188
189 bmi.s 1f | flags still valid here
190
191 | *decoded0 > 0
192
193 sub.l %d3, %d1 | %d1 = p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB]
194 sub.l %d7, %d2 | %d2 = p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1]
195 sub.l %d6, %a0 | %a0 = p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2]
196 sub.l %d5, %a1 | %a1 = p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3]
197 sub.l %d4, %a2 | %a2 = p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4]
198
199 movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[]
200
201 movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
202 | %d5 = p->YcoeffsA[1]
203 | %d6 = p->YcoeffsA[2]
204 | %d7 = p->YcoeffsA[3]
205
206 movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
207 | %d2 = p->buf[YADAPTCOEFFSA-3]
208 | %a0 = p->buf[YADAPTCOEFFSA-2]
209 | %a1 = p->buf[YADAPTCOEFFSA-1]
210 | %a2 = p->buf[YADAPTCOEFFSA]
211
212 sub.l %a2, %d4 | %d4 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
213 sub.l %a1, %d5 | %d5 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
214 sub.l %a0, %d6 | %d6 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
215 sub.l %d2, %d7 | %d7 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
216
217 bra.s 2f
218
2191: | *decoded0 < 0
220
221 add.l %d3, %d1 | %d1 = p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB]
222 add.l %d7, %d2 | %d2 = p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1]
223 add.l %d6, %a0 | %a0 = p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2]
224 add.l %d5, %a1 | %a1 = p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3]
225 add.l %d4, %a2 | %a2 = p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4]
226
227 movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[]
228
229 movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
230 | %d5 = p->YcoeffsA[1]
231 | %d6 = p->YcoeffsA[2]
232 | %d7 = p->YcoeffsA[3]
233
234 movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
235 | %d2 = p->buf[YADAPTCOEFFSA-3]
236 | %a0 = p->buf[YADAPTCOEFFSA-2]
237 | %a1 = p->buf[YADAPTCOEFFSA-1]
238 | %a2 = p->buf[YADAPTCOEFFSA]
239
240 add.l %a2, %d4 | %d4 = p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
241 add.l %a1, %d5 | %d5 = p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
242 add.l %a0, %d6 | %d6 = p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
243 add.l %d2, %d7 | %d7 = p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
244
2452:
246 movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[]
247
2483:
249 | Finish Predictor Y
250
251 movclr.l %acc0, %d1 | %d1 = predictionA
252 movclr.l %acc1, %d2 | %d2 = predictionB
253 asr.l #1, %d2
254 add.l %d2, %d1 | %d1 += (%d2 >> 1)
255 asr.l #8, %d1
256 asr.l #2, %d1 | %d1 >>= 10
257 add.l %d0, %d1 | %d1 += %d0
258 move.l %d1, (YlastA,%a6) | p->YlastA = %d1
259
260 move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA
261 move.l %d2, %d0
262 lsl.l #5, %d2
263 sub.l %d0, %d2 | %d2 = 31 * %d2
264 asr.l #5, %d2 | %d2 >>= 5
265 add.l %d1, %d2
266 move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2
267
268 | *decoded0 stored 2 instructions down, avoiding pipeline stall
269
270 | ***** PREDICTOR X *****
271
272 | Predictor X, Filter A
273
274 move.l (XlastA,%a6), %d3 | %d3 = p->XlastA
275
276 move.l %d2, (%a3)+ | *(decoded0++) = %d2 (p->YfilterA)
277
278 movem.l (XDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[XDELAYA-3]
279 | %d1 = p->buf[XDELAYA-2]
280 | %d2 = p->buf[XDELAYA-1]
281
282 move.l %d3, (XDELAYA,%a5) | p->buf[XDELAYA] = %d3
283
284 sub.l %d3, %d2
285 neg.l %d2 | %d2 = %d3 -%d2
286
287 move.l %d2, (XDELAYA-4,%a5) | p->buf[XDELAYA-1] = %d2
288
289 movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
290 | %d5 = p->XcoeffsA[1]
291 | %d6 = p->XcoeffsA[2]
292 | %d7 = p->XcoeffsA[3]
293
294 mac.l %d3, %d4, %acc0 | %acc0 = p->buf[XDELAYA] * p->XcoeffsA[0]
295 mac.l %d2, %d5, %acc0 | %acc0 += p->buf[XDELAYA-1] * p->XcoeffsA[1]
296 mac.l %d1, %d6, %acc0 | %acc0 += p->buf[XDELAYA-2] * p->XcoeffsA[2]
297 mac.l %d0, %d7, %acc0 | %acc0 += p->buf[XDELAYA-3] * p->XcoeffsA[3]
298
299 tst.l %d2
300 beq.s 1f
301 spl.b %d2 | pos: 0x??????ff, neg: 0x??????00
302 extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
303 or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
3041: | %d2 = SIGN(%d2)
305 move.l %d2, (XADAPTCOEFFSA-4,%a5) | p->buf[XADAPTCOEFFSA-1] = %d2
306
307 tst.l %d3
308 beq.s 1f
309 spl.b %d3
310 extb.l %d3
311 or.l #1, %d3
3121: | %d3 = SIGN(%d3)
313 move.l %d3, (XADAPTCOEFFSA,%a5) | p->buf[XADAPTCOEFFSA] = %d3
314
315 | Predictor X, Filter B
316
317 movem.l (XfilterB,%a6), %d2-%d3 | %d2 = p->XfilterB
318 | %d3 = p->YfilterA
319 move.l %d3, (XfilterB,%a6) | p->XfilterB = %d3
320
321 move.l %d2, %d1 | %d1 = %d2
322 lsl.l #5, %d2 | %d2 = %d2 * 32
323 sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2)
324 asr.l #5, %d2 | %d2 >>= 5
325 sub.l %d2, %d3 | %d3 -= %d2
326
327 movem.l (XDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[XDELAYB-4]
328 | %d5 = p->buf[XDELAYB-3]
329 | %d6 = p->buf[XDELAYB-2]
330 | %d7 = p->buf[XDELAYB-1]
331 sub.l %d3, %d7
332 neg.l %d7 | %d7 = %d3 - %d7
333
334 move.l %d7, (XDELAYB-4,%a5) | p->buf[XDELAYB-1] = %d7
335
336 movem.l (XcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1 = p->XcoeffsB[0]
337 | %d2 = p->XcoeffsB[1]
338 | %a0 = p->XcoeffsB[2]
339 | %a1 = p->XcoeffsB[3]
340 | %a2 = p->XcoeffsB[4]
341
342 mac.l %d3, %d1, %acc1 | %acc1 = p->buf[XDELAYB] * p->XcoeffsB[0]
343 mac.l %d7, %d2, %acc1 | %acc1 += p->buf[XDELAYB-1] * p->XcoeffsB[1]
344 mac.l %d6, %a0, %acc1 | %acc1 += p->buf[XDELAYB-2] * p->XcoeffsB[2]
345 mac.l %d5, %a1, %acc1 | %acc1 += p->buf[XDELAYB-3] * p->XcoeffsB[3]
346 mac.l %d4, %a2, %acc1 | %acc1 += p->buf[XDELAYB-4] * p->XcoeffsB[4]
347
348 move.l %d3, (XDELAYB, %a5) | p->buf[XDELAYB] = %d3
349
350 tst.l %d7
351 beq.s 1f
352 spl.b %d7
353 extb.l %d7
354 or.l #1, %d7
3551: | %d7 = SIGN(%d7)
356 move.l %d7, (XADAPTCOEFFSB-4,%a5) | p->buf[XADAPTCOEFFSB-1] = %d7
357
358 tst.l %d3
359 beq.s 1f
360 spl.b %d3
361 extb.l %d3
362 or.l #1, %d3
3631: | %d3 = SIGN(%d3)
364 move.l %d3, (XADAPTCOEFFSB, %a5) | p->buf[XADAPTCOEFFSB] = %d3
365
366 | %d1, %d2, %a0, %a1, %a2 contain p->XcoeffsB[0..4]
367 | %d7, %d3 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB]
368
369 move.l (%a4), %d0 | %d0 = *decoded1
370 beq.s 3f
371
372 movem.l (XADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[XADAPTCOEFFSB-4]
373 | %d5 = p->buf[XADAPTCOEFFSB-3]
374 | %d6 = p->buf[XADAPTCOEFFSB-2]
375
376 bmi.s 1f | flags still valid here
377
378 | *decoded1 > 0
379
380 sub.l %d3, %d1 | %d1 = p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB]
381 sub.l %d7, %d2 | %d2 = p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1]
382 sub.l %d6, %a0 | %a0 = p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2]
383 sub.l %d5, %a1 | %a1 = p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3]
384 sub.l %d4, %a2 | %a2 = p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4]
385
386 movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[]
387
388 movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
389 | %d5 = p->XcoeffsA[1]
390 | %d6 = p->XcoeffsA[2]
391 | %d7 = p->XcoeffsA[3]
392
393 movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
394 | %d2 = p->buf[XADAPTCOEFFSA-3]
395 | %a0 = p->buf[XADAPTCOEFFSA-2]
396 | %a1 = p->buf[XADAPTCOEFFSA-1]
397 | %a2 = p->buf[XADAPTCOEFFSA]
398
399 sub.l %a2, %d4 | %d4 = p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA]
400 sub.l %a1, %d5 | %d5 = p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1]
401 sub.l %a0, %d6 | %d6 = p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2]
402 sub.l %d2, %d7 | %d7 = p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3]
403
404 bra.s 2f
405
4061: | *decoded1 < 0
407
408 add.l %d3, %d1 | %d1 = p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB]
409 add.l %d7, %d2 | %d2 = p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1]
410 add.l %d6, %a0 | %a0 = p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2]
411 add.l %d5, %a1 | %a1 = p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3]
412 add.l %d4, %a2 | %a2 = p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4]
413
414 movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[]
415
416 movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
417 | %d5 = p->XcoeffsA[1]
418 | %d6 = p->XcoeffsA[2]
419 | %d7 = p->XcoeffsA[3]
420
421 movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
422 | %d2 = p->buf[XADAPTCOEFFSA-3]
423 | %a0 = p->buf[XADAPTCOEFFSA-2]
424 | %a1 = p->buf[XADAPTCOEFFSA-1]
425 | %a2 = p->buf[XADAPTCOEFFSA]
426
427 add.l %a2, %d4 | %d4 = p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA]
428 add.l %a1, %d5 | %d5 = p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1]
429 add.l %a0, %d6 | %d6 = p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2]
430 add.l %d2, %d7 | %d7 = p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3]
431
4322:
433 movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[]
434
4353:
436 | Finish Predictor X
437
438 movclr.l %acc0, %d1 | %d1 = predictionA
439 movclr.l %acc1, %d2 | %d2 = predictionB
440 asr.l #1, %d2
441 add.l %d2, %d1 | %d1 += (%d2 >> 1)
442 asr.l #8, %d1
443 asr.l #2, %d1 | %d1 >>= 10
444 add.l %d0, %d1 | %d1 += %d0
445 move.l %d1, (XlastA,%a6) | p->XlastA = %d1
446
447 move.l (XfilterA,%a6), %d2 | %d2 = p->XfilterA
448 move.l %d2, %d0
449 lsl.l #5, %d2
450 sub.l %d0, %d2 | %d2 = 31 * %d2
451 asr.l #5, %d2 | %d6 >>= 2
452 add.l %d1, %d2
453 move.l %d2, (XfilterA,%a6) | p->XfilterA = %d2
454
455 | *decoded1 stored 3 instructions down, avoiding pipeline stall
456
457 | ***** COMMON *****
458
459 addq.l #4, %a5 | p->buf++
460 lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a2
461 | %a2 = &p->historybuffer[PREDICTOR_HISTORY_SIZE]
462
463 move.l %d2, (%a4)+ | *(decoded1++) = %d2 (p->XfilterA)
464
465 cmp.l %a2, %a5
466 beq.s .move_hist | History buffer is full, we need to do a memmove
467
468 subq.l #1, (%sp) | decrease loop count
469 bne.w .loop
470
471.done:
472 move.l %a5, (%a6) | Save value of p->buf
473 movem.l (4,%sp), %d2-%d7/%a2-%a6
474 lea.l (12*4,%sp), %sp
475 rts
476
477.move_hist:
478 lea.l (historybuffer,%a6), %a2
479
480 | dest = %a2 (p->historybuffer)
481 | src = %a5 (p->buf)
482 | n = 200
483
484 movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes
485 movem.l %d0-%d7/%a0-%a1, (%a2)
486 movem.l (40,%a5), %d0-%d7/%a0-%a1 | 40 bytes
487 movem.l %d0-%d7/%a0-%a1, (40,%a2)
488 movem.l (80,%a5), %d0-%d7/%a0-%a1 | 40 bytes
489 movem.l %d0-%d7/%a0-%a1, (80,%a2)
490 movem.l (120,%a5), %d0-%d7/%a0-%a1 | 40 bytes
491 movem.l %d0-%d7/%a0-%a1, (120,%a2)
492 movem.l (160,%a5), %d0-%d7/%a0-%a1 | 40 bytes
493 movem.l %d0-%d7/%a0-%a1, (160,%a2)
494
495 move.l %a2, %a5 | p->buf = &p->historybuffer[0]
496
497 subq.l #1, (%sp) | decrease loop count
498 bne.w .loop
499
500 bra.s .done
501 .size predictor_decode_stereo, .-predictor_decode_stereo
502
503
504 .global predictor_decode_mono
505 .type predictor_decode_mono,@function
506
507| void predictor_decode_mono(struct predictor_t* p,
508| int32_t* decoded0,
509| int count)
510
511predictor_decode_mono:
512 lea.l (-11*4,%sp), %sp
513 movem.l %d2-%d7/%a2-%a6, (%sp)
514
515 move.l #0, %macsr | signed integer mode
516
517 move.l (11*4+4,%sp), %a6 | %a6 = p
518 move.l (11*4+8,%sp), %a4 | %a4 = decoded0
519 move.l (11*4+12,%sp), %d7 | %d7 = count
520 move.l (%a6), %a5 | %a5 = p->buf
521
522 move.l (YlastA,%a6), %d3 | %d3 = p->YlastA
523
524.loopm:
525
526 | ***** PREDICTOR *****
527
528 movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3]
529 | %d1 = p->buf[YDELAYA-2]
530 | %d2 = p->buf[YDELAYA-1]
531
532 move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3
533
534 sub.l %d3, %d2
535 neg.l %d2 | %d2 = %d3 - %d2
536
537 move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2
538
539 movem.l (YcoeffsA,%a6), %a0-%a3 | %a0 = p->YcoeffsA[0]
540 | %a1 = p->YcoeffsA[1]
541 | %a2 = p->YcoeffsA[2]
542 | %a3 = p->YcoeffsA[3]
543
544 mac.l %d3, %a0, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0]
545 mac.l %d2, %a1, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
546 mac.l %d1, %a2, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
547 mac.l %d0, %a3, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
548
549 tst.l %d2
550 beq.s 1f
551 spl.b %d2 | pos: 0x??????ff, neg: 0x??????00
552 extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
553 or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
5541: | %d2 = SIGN(%d2)
555 move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2
556
557 tst.l %d3
558 beq.s 1f
559 spl.b %d3
560 extb.l %d3
561 or.l #1, %d3
5621: | %d3 = SIGN(%d3)
563 move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3
564
565 move.l (%a4), %d0 | %d0 = *decoded0
566 beq.s 3f
567
568 movem.l (YADAPTCOEFFSA-12,%a5),%d4-%d5 | %d4 = p->buf[YADAPTCOEFFSA-3]
569 | %d5 = p->buf[YADAPTCOEFFSA-2]
570
571 bmi.s 1f | flags still valid here
572
573 | *decoded0 > 0
574
575 sub.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
576 sub.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
577 sub.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
578 sub.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
579
580 bra.s 2f
581
5821: | *decoded0 < 0
583
584 add.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
585 add.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
586 add.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
587 add.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
588
5892:
590 movem.l %a0-%a3, (YcoeffsA,%a6) | save p->YcoeffsA[]
591
5923:
593 | Finish Predictor
594
595 movclr.l %acc0, %d3 | %d3 = predictionA
596 asr.l #8, %d3
597 asr.l #2, %d3 | %d3 >>= 10
598 add.l %d0, %d3 | %d3 += %d0
599
600 move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA
601 move.l %d2, %d0
602 lsl.l #5, %d2
603 sub.l %d0, %d2 | %d2 = 31 * %d2
604 asr.l #5, %d2 | %d2 >>= 5
605 add.l %d3, %d2
606 move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2
607
608 | *decoded0 stored 3 instructions down, avoiding pipeline stall
609
610 | ***** COMMON *****
611
612 addq.l #4, %a5 | p->buf++
613 lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a3
614 | %a3 = &p->historybuffer[PREDICTOR_HISTORY_SIZE]
615
616 move.l %d2, (%a4)+ | *(decoded0++) = %d2 (p->YfilterA)
617
618 cmp.l %a3, %a5
619 beq.s .move_histm | History buffer is full, we need to do a memmove
620
621 subq.l #1, %d7 | decrease loop count
622 bne.w .loopm
623
624 move.l %d3, (YlastA,%a6) | %d3 = p->YlastA
625
626.donem:
627 move.l %a5, (%a6) | Save value of p->buf
628 movem.l (%sp), %d2-%d7/%a2-%a6
629 lea.l (11*4,%sp), %sp
630 rts
631
632.move_histm:
633 move.l %d3, (YlastA,%a6) | %d3 = p->YlastA
634
635 lea.l (historybuffer,%a6), %a3
636
637 | dest = %a3 (p->historybuffer)
638 | src = %a5 (p->buf)
639 | n = 200
640
641 movem.l (%a5), %d0-%d6/%a0-%a2 | 40 bytes
642 movem.l %d0-%d6/%a0-%a2, (%a3)
643 movem.l (40,%a5), %d0-%d6/%a0-%a2 | 40 bytes
644 movem.l %d0-%d6/%a0-%a2, (40,%a3)
645 movem.l (80,%a5), %d0-%d6/%a0-%a2 | 40 bytes
646 movem.l %d0-%d6/%a0-%a2, (80,%a3)
647 movem.l (120,%a5), %d0-%d6/%a0-%a2 | 40 bytes
648 movem.l %d0-%d6/%a0-%a2, (120,%a3)
649 movem.l (160,%a5), %d0-%d6/%a0-%a2 | 40 bytes
650 movem.l %d0-%d6/%a0-%a2, (160,%a3)
651
652 move.l %a3, %a5 | p->buf = &p->historybuffer[0]
653
654 move.l (YlastA,%a6), %d3 | %d3 = p->YlastA
655
656 subq.l #1, %d7 | decrease loop count
657 bne.w .loopm
658
659 bra.s .donem
660 .size predictor_decode_mono, .-predictor_decode_mono