diff options
author | Sean Bartell <wingedtachikoma@gmail.com> | 2011-06-25 21:32:25 -0400 |
---|---|---|
committer | Nils Wallménius <nils@rockbox.org> | 2012-04-25 22:13:20 +0200 |
commit | f40bfc9267b13b54e6379dfe7539447662879d24 (patch) | |
tree | 9b20069d5e62809ff434061ad730096836f916f2 /lib/rbcodec/codecs/demac/libdemac/predictor-cf.S | |
parent | a0009907de7a0107d49040d8a180f140e2eff299 (diff) | |
download | rockbox-f40bfc9267b13b54e6379dfe7539447662879d24.tar.gz rockbox-f40bfc9267b13b54e6379dfe7539447662879d24.zip |
Add codecs to librbcodec.
Change-Id: Id7f4717d51ed02d67cb9f9cb3c0ada4a81843f97
Reviewed-on: http://gerrit.rockbox.org/137
Reviewed-by: Nils Wallménius <nils@rockbox.org>
Tested-by: Nils Wallménius <nils@rockbox.org>
Diffstat (limited to 'lib/rbcodec/codecs/demac/libdemac/predictor-cf.S')
-rw-r--r-- | lib/rbcodec/codecs/demac/libdemac/predictor-cf.S | 660 |
1 files changed, 660 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/demac/libdemac/predictor-cf.S b/lib/rbcodec/codecs/demac/libdemac/predictor-cf.S new file mode 100644 index 0000000000..fc1d901a59 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/predictor-cf.S | |||
@@ -0,0 +1,660 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | Coldfire predictor copyright (C) 2007 Jens Arnold | ||
10 | |||
11 | This program is free software; you can redistribute it and/or modify | ||
12 | it under the terms of the GNU General Public License as published by | ||
13 | the Free Software Foundation; either version 2 of the License, or | ||
14 | (at your option) any later version. | ||
15 | |||
16 | This program is distributed in the hope that it will be useful, | ||
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | GNU General Public License for more details. | ||
20 | |||
21 | You should have received a copy of the GNU General Public License | ||
22 | along with this program; if not, write to the Free Software | ||
23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
24 | |||
25 | */ | ||
26 | |||
27 | #include "demac_config.h" | ||
28 | |||
29 | /* NOTE: The following need to be kept in sync with parser.h */ | ||
30 | |||
31 | #define YDELAYA 200 | ||
32 | #define YDELAYB 168 | ||
33 | #define XDELAYA 136 | ||
34 | #define XDELAYB 104 | ||
35 | #define YADAPTCOEFFSA 72 | ||
36 | #define XADAPTCOEFFSA 56 | ||
37 | #define YADAPTCOEFFSB 40 | ||
38 | #define XADAPTCOEFFSB 20 | ||
39 | |||
40 | /* struct predictor_t members: */ | ||
41 | #define buf 0 /* int32_t* buf */ | ||
42 | |||
43 | #define YlastA 4 /* int32_t YlastA; */ | ||
44 | #define XlastA 8 /* int32_t XlastA; */ | ||
45 | |||
46 | #define YfilterB 12 /* int32_t YfilterB; */ | ||
47 | #define XfilterA 16 /* int32_t XfilterA; */ | ||
48 | |||
49 | #define XfilterB 20 /* int32_t XfilterB; */ | ||
50 | #define YfilterA 24 /* int32_t YfilterA; */ | ||
51 | |||
52 | #define YcoeffsA 28 /* int32_t YcoeffsA[4]; */ | ||
53 | #define XcoeffsA 44 /* int32_t XcoeffsA[4]; */ | ||
54 | #define YcoeffsB 60 /* int32_t YcoeffsB[5]; */ | ||
55 | #define XcoeffsB 80 /* int32_t XcoeffsB[5]; */ | ||
56 | |||
57 | #define historybuffer 100 /* int32_t historybuffer[] */ | ||
58 | |||
59 | |||
60 | .text | ||
61 | |||
62 | .align 2 | ||
63 | |||
64 | .global predictor_decode_stereo | ||
65 | .type predictor_decode_stereo,@function | ||
66 | |||
67 | | void predictor_decode_stereo(struct predictor_t* p, | ||
68 | | int32_t* decoded0, | ||
69 | | int32_t* decoded1, | ||
70 | | int count) | ||
71 | |||
72 | predictor_decode_stereo: | ||
73 | lea.l (-12*4,%sp), %sp | ||
74 | movem.l %d2-%d7/%a2-%a6, (4,%sp) | ||
75 | |||
76 | movem.l (12*4+8,%sp), %a3-%a5 | %a3 = decoded0 | ||
77 | | %a4 = decoded1 | ||
78 | move.l %a5, (%sp) | (%sp) = count | ||
79 | |||
80 | move.l #0, %macsr | signed integer mode | ||
81 | move.l (12*4+4,%sp), %a6 | %a6 = p | ||
82 | move.l (%a6), %a5 | %a5 = p->buf | ||
83 | |||
84 | .loop: | ||
85 | |||
86 | | ***** PREDICTOR Y ***** | ||
87 | |||
88 | | Predictor Y, Filter A | ||
89 | |||
90 | move.l (YlastA,%a6), %d3 | %d3 = p->YlastA | ||
91 | |||
92 | movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3] | ||
93 | | %d1 = p->buf[YDELAYA-2] | ||
94 | | %d2 = p->buf[YDELAYA-1] | ||
95 | |||
96 | move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3 | ||
97 | |||
98 | sub.l %d3, %d2 | ||
99 | neg.l %d2 | %d2 = %d3 - %d2 | ||
100 | |||
101 | move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2 | ||
102 | |||
103 | movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0] | ||
104 | | %d5 = p->YcoeffsA[1] | ||
105 | | %d6 = p->YcoeffsA[2] | ||
106 | | %d7 = p->YcoeffsA[3] | ||
107 | |||
108 | mac.l %d3, %d4, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0] | ||
109 | mac.l %d2, %d5, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] | ||
110 | mac.l %d1, %d6, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] | ||
111 | mac.l %d0, %d7, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] | ||
112 | |||
113 | tst.l %d2 | ||
114 | beq.s 1f | ||
115 | spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 | ||
116 | extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 | ||
117 | or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 | ||
118 | 1: | %d2 = SIGN(%d2) | ||
119 | move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2 | ||
120 | |||
121 | tst.l %d3 | ||
122 | beq.s 1f | ||
123 | spl.b %d3 | ||
124 | extb.l %d3 | ||
125 | or.l #1, %d3 | ||
126 | 1: | %d3 = SIGN(%d3) | ||
127 | move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3 | ||
128 | |||
129 | | Predictor Y, Filter B | ||
130 | |||
131 | movem.l (YfilterB,%a6), %d2-%d3 | %d2 = p->YfilterB | ||
132 | | %d3 = p->XfilterA | ||
133 | move.l %d3, (YfilterB,%a6) | p->YfilterB = %d3 | ||
134 | |||
135 | move.l %d2, %d1 | %d1 = %d2 | ||
136 | lsl.l #5, %d2 | %d2 = %d2 * 32 | ||
137 | sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2) | ||
138 | asr.l #5, %d2 | %d2 >>= 5 | ||
139 | sub.l %d2, %d3 | %d3 -= %d2 | ||
140 | |||
141 | movem.l (YDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[YDELAYB-4] | ||
142 | | %d5 = p->buf[YDELAYB-3] | ||
143 | | %d6 = p->buf[YDELAYB-2] | ||
144 | | %d7 = p->buf[YDELAYB-1] | ||
145 | sub.l %d3, %d7 | ||
146 | neg.l %d7 | %d7 = %d3 - %d7 | ||
147 | |||
148 | move.l %d7, (YDELAYB-4,%a5) | p->buf[YDELAYB-1] = %d7 | ||
149 | |||
150 | movem.l (YcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1 = p->YcoeffsB[0] | ||
151 | | %d2 = p->YcoeffsB[1] | ||
152 | | %a0 = p->YcoeffsB[2] | ||
153 | | %a1 = p->YcoeffsB[3] | ||
154 | | %a2 = p->YcoeffsB[4] | ||
155 | |||
156 | mac.l %d3, %d1, %acc1 | %acc1 = p->buf[YDELAYB] * p->YcoeffsB[0] | ||
157 | mac.l %d7, %d2, %acc1 | %acc1 += p->buf[YDELAYB-1] * p->YcoeffsB[1] | ||
158 | mac.l %d6, %a0, %acc1 | %acc1 += p->buf[YDELAYB-2] * p->YcoeffsB[2] | ||
159 | mac.l %d5, %a1, %acc1 | %acc1 += p->buf[YDELAYB-3] * p->YcoeffsB[3] | ||
160 | mac.l %d4, %a2, %acc1 | %acc1 += p->buf[YDELAYB-4] * p->YcoeffsB[4] | ||
161 | |||
162 | move.l %d3, (YDELAYB, %a5) | p->buf[YDELAYB] = %d3 | ||
163 | |||
164 | tst.l %d7 | ||
165 | beq.s 1f | ||
166 | spl.b %d7 | ||
167 | extb.l %d7 | ||
168 | or.l #1, %d7 | ||
169 | 1: | %d7 = SIGN(%d7) | ||
170 | move.l %d7, (YADAPTCOEFFSB-4,%a5) | p->buf[YADAPTCOEFFSB-1] = %d7 | ||
171 | tst.l %d3 | ||
172 | beq.s 1f | ||
173 | spl.b %d3 | ||
174 | extb.l %d3 | ||
175 | or.l #1, %d3 | ||
176 | 1: | %d3 = SIGN(%d3) | ||
177 | move.l %d3, (YADAPTCOEFFSB, %a5) | p->buf[YADAPTCOEFFSB] = %d3 | ||
178 | |||
179 | | %d1, %d2, %a0, %a1, %a2 contain p->YcoeffsB[0..4] | ||
180 | | %d7, %d3 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB] | ||
181 | |||
182 | move.l (%a3), %d0 | %d0 = *decoded0 | ||
183 | beq.s 3f | ||
184 | |||
185 | movem.l (YADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[YADAPTCOEFFSB-4] | ||
186 | | %d5 = p->buf[YADAPTCOEFFSB-3] | ||
187 | | %d6 = p->buf[YADAPTCOEFFSB-2] | ||
188 | |||
189 | bmi.s 1f | flags still valid here | ||
190 | |||
191 | | *decoded0 > 0 | ||
192 | |||
193 | sub.l %d3, %d1 | %d1 = p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB] | ||
194 | sub.l %d7, %d2 | %d2 = p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1] | ||
195 | sub.l %d6, %a0 | %a0 = p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2] | ||
196 | sub.l %d5, %a1 | %a1 = p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3] | ||
197 | sub.l %d4, %a2 | %a2 = p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4] | ||
198 | |||
199 | movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[] | ||
200 | |||
201 | movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0] | ||
202 | | %d5 = p->YcoeffsA[1] | ||
203 | | %d6 = p->YcoeffsA[2] | ||
204 | | %d7 = p->YcoeffsA[3] | ||
205 | |||
206 | movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | ||
207 | | %d2 = p->buf[YADAPTCOEFFSA-3] | ||
208 | | %a0 = p->buf[YADAPTCOEFFSA-2] | ||
209 | | %a1 = p->buf[YADAPTCOEFFSA-1] | ||
210 | | %a2 = p->buf[YADAPTCOEFFSA] | ||
211 | |||
212 | sub.l %a2, %d4 | %d4 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] | ||
213 | sub.l %a1, %d5 | %d5 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] | ||
214 | sub.l %a0, %d6 | %d6 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] | ||
215 | sub.l %d2, %d7 | %d7 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] | ||
216 | |||
217 | bra.s 2f | ||
218 | |||
219 | 1: | *decoded0 < 0 | ||
220 | |||
221 | add.l %d3, %d1 | %d1 = p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB] | ||
222 | add.l %d7, %d2 | %d2 = p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1] | ||
223 | add.l %d6, %a0 | %a0 = p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2] | ||
224 | add.l %d5, %a1 | %a1 = p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3] | ||
225 | add.l %d4, %a2 | %a2 = p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4] | ||
226 | |||
227 | movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[] | ||
228 | |||
229 | movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0] | ||
230 | | %d5 = p->YcoeffsA[1] | ||
231 | | %d6 = p->YcoeffsA[2] | ||
232 | | %d7 = p->YcoeffsA[3] | ||
233 | |||
234 | movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | ||
235 | | %d2 = p->buf[YADAPTCOEFFSA-3] | ||
236 | | %a0 = p->buf[YADAPTCOEFFSA-2] | ||
237 | | %a1 = p->buf[YADAPTCOEFFSA-1] | ||
238 | | %a2 = p->buf[YADAPTCOEFFSA] | ||
239 | |||
240 | add.l %a2, %d4 | %d4 = p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] | ||
241 | add.l %a1, %d5 | %d5 = p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1] | ||
242 | add.l %a0, %d6 | %d6 = p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2] | ||
243 | add.l %d2, %d7 | %d7 = p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3] | ||
244 | |||
245 | 2: | ||
246 | movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[] | ||
247 | |||
248 | 3: | ||
249 | | Finish Predictor Y | ||
250 | |||
251 | movclr.l %acc0, %d1 | %d1 = predictionA | ||
252 | movclr.l %acc1, %d2 | %d2 = predictionB | ||
253 | asr.l #1, %d2 | ||
254 | add.l %d2, %d1 | %d1 += (%d2 >> 1) | ||
255 | asr.l #8, %d1 | ||
256 | asr.l #2, %d1 | %d1 >>= 10 | ||
257 | add.l %d0, %d1 | %d1 += %d0 | ||
258 | move.l %d1, (YlastA,%a6) | p->YlastA = %d1 | ||
259 | |||
260 | move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA | ||
261 | move.l %d2, %d0 | ||
262 | lsl.l #5, %d2 | ||
263 | sub.l %d0, %d2 | %d2 = 31 * %d2 | ||
264 | asr.l #5, %d2 | %d2 >>= 5 | ||
265 | add.l %d1, %d2 | ||
266 | move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2 | ||
267 | |||
268 | | *decoded0 stored 2 instructions down, avoiding pipeline stall | ||
269 | |||
270 | | ***** PREDICTOR X ***** | ||
271 | |||
272 | | Predictor X, Filter A | ||
273 | |||
274 | move.l (XlastA,%a6), %d3 | %d3 = p->XlastA | ||
275 | |||
276 | move.l %d2, (%a3)+ | *(decoded0++) = %d2 (p->YfilterA) | ||
277 | |||
278 | movem.l (XDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[XDELAYA-3] | ||
279 | | %d1 = p->buf[XDELAYA-2] | ||
280 | | %d2 = p->buf[XDELAYA-1] | ||
281 | |||
282 | move.l %d3, (XDELAYA,%a5) | p->buf[XDELAYA] = %d3 | ||
283 | |||
284 | sub.l %d3, %d2 | ||
285 | neg.l %d2 | %d2 = %d3 -%d2 | ||
286 | |||
287 | move.l %d2, (XDELAYA-4,%a5) | p->buf[XDELAYA-1] = %d2 | ||
288 | |||
289 | movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0] | ||
290 | | %d5 = p->XcoeffsA[1] | ||
291 | | %d6 = p->XcoeffsA[2] | ||
292 | | %d7 = p->XcoeffsA[3] | ||
293 | |||
294 | mac.l %d3, %d4, %acc0 | %acc0 = p->buf[XDELAYA] * p->XcoeffsA[0] | ||
295 | mac.l %d2, %d5, %acc0 | %acc0 += p->buf[XDELAYA-1] * p->XcoeffsA[1] | ||
296 | mac.l %d1, %d6, %acc0 | %acc0 += p->buf[XDELAYA-2] * p->XcoeffsA[2] | ||
297 | mac.l %d0, %d7, %acc0 | %acc0 += p->buf[XDELAYA-3] * p->XcoeffsA[3] | ||
298 | |||
299 | tst.l %d2 | ||
300 | beq.s 1f | ||
301 | spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 | ||
302 | extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 | ||
303 | or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 | ||
304 | 1: | %d2 = SIGN(%d2) | ||
305 | move.l %d2, (XADAPTCOEFFSA-4,%a5) | p->buf[XADAPTCOEFFSA-1] = %d2 | ||
306 | |||
307 | tst.l %d3 | ||
308 | beq.s 1f | ||
309 | spl.b %d3 | ||
310 | extb.l %d3 | ||
311 | or.l #1, %d3 | ||
312 | 1: | %d3 = SIGN(%d3) | ||
313 | move.l %d3, (XADAPTCOEFFSA,%a5) | p->buf[XADAPTCOEFFSA] = %d3 | ||
314 | |||
315 | | Predictor X, Filter B | ||
316 | |||
317 | movem.l (XfilterB,%a6), %d2-%d3 | %d2 = p->XfilterB | ||
318 | | %d3 = p->YfilterA | ||
319 | move.l %d3, (XfilterB,%a6) | p->XfilterB = %d3 | ||
320 | |||
321 | move.l %d2, %d1 | %d1 = %d2 | ||
322 | lsl.l #5, %d2 | %d2 = %d2 * 32 | ||
323 | sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2) | ||
324 | asr.l #5, %d2 | %d2 >>= 5 | ||
325 | sub.l %d2, %d3 | %d3 -= %d2 | ||
326 | |||
327 | movem.l (XDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[XDELAYB-4] | ||
328 | | %d5 = p->buf[XDELAYB-3] | ||
329 | | %d6 = p->buf[XDELAYB-2] | ||
330 | | %d7 = p->buf[XDELAYB-1] | ||
331 | sub.l %d3, %d7 | ||
332 | neg.l %d7 | %d7 = %d3 - %d7 | ||
333 | |||
334 | move.l %d7, (XDELAYB-4,%a5) | p->buf[XDELAYB-1] = %d7 | ||
335 | |||
336 | movem.l (XcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1 = p->XcoeffsB[0] | ||
337 | | %d2 = p->XcoeffsB[1] | ||
338 | | %a0 = p->XcoeffsB[2] | ||
339 | | %a1 = p->XcoeffsB[3] | ||
340 | | %a2 = p->XcoeffsB[4] | ||
341 | |||
342 | mac.l %d3, %d1, %acc1 | %acc1 = p->buf[XDELAYB] * p->XcoeffsB[0] | ||
343 | mac.l %d7, %d2, %acc1 | %acc1 += p->buf[XDELAYB-1] * p->XcoeffsB[1] | ||
344 | mac.l %d6, %a0, %acc1 | %acc1 += p->buf[XDELAYB-2] * p->XcoeffsB[2] | ||
345 | mac.l %d5, %a1, %acc1 | %acc1 += p->buf[XDELAYB-3] * p->XcoeffsB[3] | ||
346 | mac.l %d4, %a2, %acc1 | %acc1 += p->buf[XDELAYB-4] * p->XcoeffsB[4] | ||
347 | |||
348 | move.l %d3, (XDELAYB, %a5) | p->buf[XDELAYB] = %d3 | ||
349 | |||
350 | tst.l %d7 | ||
351 | beq.s 1f | ||
352 | spl.b %d7 | ||
353 | extb.l %d7 | ||
354 | or.l #1, %d7 | ||
355 | 1: | %d7 = SIGN(%d7) | ||
356 | move.l %d7, (XADAPTCOEFFSB-4,%a5) | p->buf[XADAPTCOEFFSB-1] = %d7 | ||
357 | |||
358 | tst.l %d3 | ||
359 | beq.s 1f | ||
360 | spl.b %d3 | ||
361 | extb.l %d3 | ||
362 | or.l #1, %d3 | ||
363 | 1: | %d3 = SIGN(%d3) | ||
364 | move.l %d3, (XADAPTCOEFFSB, %a5) | p->buf[XADAPTCOEFFSB] = %d3 | ||
365 | |||
366 | | %d1, %d2, %a0, %a1, %a2 contain p->XcoeffsB[0..4] | ||
367 | | %d7, %d3 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB] | ||
368 | |||
369 | move.l (%a4), %d0 | %d0 = *decoded1 | ||
370 | beq.s 3f | ||
371 | |||
372 | movem.l (XADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[XADAPTCOEFFSB-4] | ||
373 | | %d5 = p->buf[XADAPTCOEFFSB-3] | ||
374 | | %d6 = p->buf[XADAPTCOEFFSB-2] | ||
375 | |||
376 | bmi.s 1f | flags still valid here | ||
377 | |||
378 | | *decoded1 > 0 | ||
379 | |||
380 | sub.l %d3, %d1 | %d1 = p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB] | ||
381 | sub.l %d7, %d2 | %d2 = p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1] | ||
382 | sub.l %d6, %a0 | %a0 = p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2] | ||
383 | sub.l %d5, %a1 | %a1 = p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3] | ||
384 | sub.l %d4, %a2 | %a2 = p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4] | ||
385 | |||
386 | movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[] | ||
387 | |||
388 | movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0] | ||
389 | | %d5 = p->XcoeffsA[1] | ||
390 | | %d6 = p->XcoeffsA[2] | ||
391 | | %d7 = p->XcoeffsA[3] | ||
392 | |||
393 | movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | ||
394 | | %d2 = p->buf[XADAPTCOEFFSA-3] | ||
395 | | %a0 = p->buf[XADAPTCOEFFSA-2] | ||
396 | | %a1 = p->buf[XADAPTCOEFFSA-1] | ||
397 | | %a2 = p->buf[XADAPTCOEFFSA] | ||
398 | |||
399 | sub.l %a2, %d4 | %d4 = p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA] | ||
400 | sub.l %a1, %d5 | %d5 = p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1] | ||
401 | sub.l %a0, %d6 | %d6 = p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2] | ||
402 | sub.l %d2, %d7 | %d7 = p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3] | ||
403 | |||
404 | bra.s 2f | ||
405 | |||
406 | 1: | *decoded1 < 0 | ||
407 | |||
408 | add.l %d3, %d1 | %d1 = p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB] | ||
409 | add.l %d7, %d2 | %d2 = p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1] | ||
410 | add.l %d6, %a0 | %a0 = p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2] | ||
411 | add.l %d5, %a1 | %a1 = p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3] | ||
412 | add.l %d4, %a2 | %a2 = p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4] | ||
413 | |||
414 | movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[] | ||
415 | |||
416 | movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0] | ||
417 | | %d5 = p->XcoeffsA[1] | ||
418 | | %d6 = p->XcoeffsA[2] | ||
419 | | %d7 = p->XcoeffsA[3] | ||
420 | |||
421 | movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | ||
422 | | %d2 = p->buf[XADAPTCOEFFSA-3] | ||
423 | | %a0 = p->buf[XADAPTCOEFFSA-2] | ||
424 | | %a1 = p->buf[XADAPTCOEFFSA-1] | ||
425 | | %a2 = p->buf[XADAPTCOEFFSA] | ||
426 | |||
427 | add.l %a2, %d4 | %d4 = p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA] | ||
428 | add.l %a1, %d5 | %d5 = p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1] | ||
429 | add.l %a0, %d6 | %d6 = p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2] | ||
430 | add.l %d2, %d7 | %d7 = p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3] | ||
431 | |||
432 | 2: | ||
433 | movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[] | ||
434 | |||
435 | 3: | ||
436 | | Finish Predictor X | ||
437 | |||
438 | movclr.l %acc0, %d1 | %d1 = predictionA | ||
439 | movclr.l %acc1, %d2 | %d2 = predictionB | ||
440 | asr.l #1, %d2 | ||
441 | add.l %d2, %d1 | %d1 += (%d2 >> 1) | ||
442 | asr.l #8, %d1 | ||
443 | asr.l #2, %d1 | %d1 >>= 10 | ||
444 | add.l %d0, %d1 | %d1 += %d0 | ||
445 | move.l %d1, (XlastA,%a6) | p->XlastA = %d1 | ||
446 | |||
447 | move.l (XfilterA,%a6), %d2 | %d2 = p->XfilterA | ||
448 | move.l %d2, %d0 | ||
449 | lsl.l #5, %d2 | ||
450 | sub.l %d0, %d2 | %d2 = 31 * %d2 | ||
451 | asr.l #5, %d2 | %d6 >>= 2 | ||
452 | add.l %d1, %d2 | ||
453 | move.l %d2, (XfilterA,%a6) | p->XfilterA = %d2 | ||
454 | |||
455 | | *decoded1 stored 3 instructions down, avoiding pipeline stall | ||
456 | |||
457 | | ***** COMMON ***** | ||
458 | |||
459 | addq.l #4, %a5 | p->buf++ | ||
460 | lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a2 | ||
461 | | %a2 = &p->historybuffer[PREDICTOR_HISTORY_SIZE] | ||
462 | |||
463 | move.l %d2, (%a4)+ | *(decoded1++) = %d2 (p->XfilterA) | ||
464 | |||
465 | cmp.l %a2, %a5 | ||
466 | beq.s .move_hist | History buffer is full, we need to do a memmove | ||
467 | |||
468 | subq.l #1, (%sp) | decrease loop count | ||
469 | bne.w .loop | ||
470 | |||
471 | .done: | ||
472 | move.l %a5, (%a6) | Save value of p->buf | ||
473 | movem.l (4,%sp), %d2-%d7/%a2-%a6 | ||
474 | lea.l (12*4,%sp), %sp | ||
475 | rts | ||
476 | |||
477 | .move_hist: | ||
478 | lea.l (historybuffer,%a6), %a2 | ||
479 | |||
480 | | dest = %a2 (p->historybuffer) | ||
481 | | src = %a5 (p->buf) | ||
482 | | n = 200 | ||
483 | |||
484 | movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
485 | movem.l %d0-%d7/%a0-%a1, (%a2) | ||
486 | movem.l (40,%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
487 | movem.l %d0-%d7/%a0-%a1, (40,%a2) | ||
488 | movem.l (80,%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
489 | movem.l %d0-%d7/%a0-%a1, (80,%a2) | ||
490 | movem.l (120,%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
491 | movem.l %d0-%d7/%a0-%a1, (120,%a2) | ||
492 | movem.l (160,%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
493 | movem.l %d0-%d7/%a0-%a1, (160,%a2) | ||
494 | |||
495 | move.l %a2, %a5 | p->buf = &p->historybuffer[0] | ||
496 | |||
497 | subq.l #1, (%sp) | decrease loop count | ||
498 | bne.w .loop | ||
499 | |||
500 | bra.s .done | ||
501 | .size predictor_decode_stereo, .-predictor_decode_stereo | ||
502 | |||
503 | |||
504 | .global predictor_decode_mono | ||
505 | .type predictor_decode_mono,@function | ||
506 | |||
507 | | void predictor_decode_mono(struct predictor_t* p, | ||
508 | | int32_t* decoded0, | ||
509 | | int count) | ||
510 | |||
511 | predictor_decode_mono: | ||
512 | lea.l (-11*4,%sp), %sp | ||
513 | movem.l %d2-%d7/%a2-%a6, (%sp) | ||
514 | |||
515 | move.l #0, %macsr | signed integer mode | ||
516 | |||
517 | move.l (11*4+4,%sp), %a6 | %a6 = p | ||
518 | move.l (11*4+8,%sp), %a4 | %a4 = decoded0 | ||
519 | move.l (11*4+12,%sp), %d7 | %d7 = count | ||
520 | move.l (%a6), %a5 | %a5 = p->buf | ||
521 | |||
522 | move.l (YlastA,%a6), %d3 | %d3 = p->YlastA | ||
523 | |||
524 | .loopm: | ||
525 | |||
526 | | ***** PREDICTOR ***** | ||
527 | |||
528 | movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3] | ||
529 | | %d1 = p->buf[YDELAYA-2] | ||
530 | | %d2 = p->buf[YDELAYA-1] | ||
531 | |||
532 | move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3 | ||
533 | |||
534 | sub.l %d3, %d2 | ||
535 | neg.l %d2 | %d2 = %d3 - %d2 | ||
536 | |||
537 | move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2 | ||
538 | |||
539 | movem.l (YcoeffsA,%a6), %a0-%a3 | %a0 = p->YcoeffsA[0] | ||
540 | | %a1 = p->YcoeffsA[1] | ||
541 | | %a2 = p->YcoeffsA[2] | ||
542 | | %a3 = p->YcoeffsA[3] | ||
543 | |||
544 | mac.l %d3, %a0, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0] | ||
545 | mac.l %d2, %a1, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] | ||
546 | mac.l %d1, %a2, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] | ||
547 | mac.l %d0, %a3, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] | ||
548 | |||
549 | tst.l %d2 | ||
550 | beq.s 1f | ||
551 | spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 | ||
552 | extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 | ||
553 | or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 | ||
554 | 1: | %d2 = SIGN(%d2) | ||
555 | move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2 | ||
556 | |||
557 | tst.l %d3 | ||
558 | beq.s 1f | ||
559 | spl.b %d3 | ||
560 | extb.l %d3 | ||
561 | or.l #1, %d3 | ||
562 | 1: | %d3 = SIGN(%d3) | ||
563 | move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3 | ||
564 | |||
565 | move.l (%a4), %d0 | %d0 = *decoded0 | ||
566 | beq.s 3f | ||
567 | |||
568 | movem.l (YADAPTCOEFFSA-12,%a5),%d4-%d5 | %d4 = p->buf[YADAPTCOEFFSA-3] | ||
569 | | %d5 = p->buf[YADAPTCOEFFSA-2] | ||
570 | |||
571 | bmi.s 1f | flags still valid here | ||
572 | |||
573 | | *decoded0 > 0 | ||
574 | |||
575 | sub.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] | ||
576 | sub.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] | ||
577 | sub.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] | ||
578 | sub.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] | ||
579 | |||
580 | bra.s 2f | ||
581 | |||
582 | 1: | *decoded0 < 0 | ||
583 | |||
584 | add.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] | ||
585 | add.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] | ||
586 | add.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] | ||
587 | add.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] | ||
588 | |||
589 | 2: | ||
590 | movem.l %a0-%a3, (YcoeffsA,%a6) | save p->YcoeffsA[] | ||
591 | |||
592 | 3: | ||
593 | | Finish Predictor | ||
594 | |||
595 | movclr.l %acc0, %d3 | %d3 = predictionA | ||
596 | asr.l #8, %d3 | ||
597 | asr.l #2, %d3 | %d3 >>= 10 | ||
598 | add.l %d0, %d3 | %d3 += %d0 | ||
599 | |||
600 | move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA | ||
601 | move.l %d2, %d0 | ||
602 | lsl.l #5, %d2 | ||
603 | sub.l %d0, %d2 | %d2 = 31 * %d2 | ||
604 | asr.l #5, %d2 | %d2 >>= 5 | ||
605 | add.l %d3, %d2 | ||
606 | move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2 | ||
607 | |||
608 | | *decoded0 stored 3 instructions down, avoiding pipeline stall | ||
609 | |||
610 | | ***** COMMON ***** | ||
611 | |||
612 | addq.l #4, %a5 | p->buf++ | ||
613 | lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a3 | ||
614 | | %a3 = &p->historybuffer[PREDICTOR_HISTORY_SIZE] | ||
615 | |||
616 | move.l %d2, (%a4)+ | *(decoded0++) = %d2 (p->YfilterA) | ||
617 | |||
618 | cmp.l %a3, %a5 | ||
619 | beq.s .move_histm | History buffer is full, we need to do a memmove | ||
620 | |||
621 | subq.l #1, %d7 | decrease loop count | ||
622 | bne.w .loopm | ||
623 | |||
624 | move.l %d3, (YlastA,%a6) | %d3 = p->YlastA | ||
625 | |||
626 | .donem: | ||
627 | move.l %a5, (%a6) | Save value of p->buf | ||
628 | movem.l (%sp), %d2-%d7/%a2-%a6 | ||
629 | lea.l (11*4,%sp), %sp | ||
630 | rts | ||
631 | |||
632 | .move_histm: | ||
633 | move.l %d3, (YlastA,%a6) | %d3 = p->YlastA | ||
634 | |||
635 | lea.l (historybuffer,%a6), %a3 | ||
636 | |||
637 | | dest = %a3 (p->historybuffer) | ||
638 | | src = %a5 (p->buf) | ||
639 | | n = 200 | ||
640 | |||
641 | movem.l (%a5), %d0-%d6/%a0-%a2 | 40 bytes | ||
642 | movem.l %d0-%d6/%a0-%a2, (%a3) | ||
643 | movem.l (40,%a5), %d0-%d6/%a0-%a2 | 40 bytes | ||
644 | movem.l %d0-%d6/%a0-%a2, (40,%a3) | ||
645 | movem.l (80,%a5), %d0-%d6/%a0-%a2 | 40 bytes | ||
646 | movem.l %d0-%d6/%a0-%a2, (80,%a3) | ||
647 | movem.l (120,%a5), %d0-%d6/%a0-%a2 | 40 bytes | ||
648 | movem.l %d0-%d6/%a0-%a2, (120,%a3) | ||
649 | movem.l (160,%a5), %d0-%d6/%a0-%a2 | 40 bytes | ||
650 | movem.l %d0-%d6/%a0-%a2, (160,%a3) | ||
651 | |||
652 | move.l %a3, %a5 | p->buf = &p->historybuffer[0] | ||
653 | |||
654 | move.l (YlastA,%a6), %d3 | %d3 = p->YlastA | ||
655 | |||
656 | subq.l #1, %d7 | decrease loop count | ||
657 | bne.w .loopm | ||
658 | |||
659 | bra.s .donem | ||
660 | .size predictor_decode_mono, .-predictor_decode_mono | ||