diff options
author | Jens Arnold <amiconn@rockbox.org> | 2007-10-19 21:35:07 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2007-10-19 21:35:07 +0000 |
commit | 5d066590cc1285f4cbefef60267f0942e58a4cb0 (patch) | |
tree | 9f58b9a50a2093fd686c8bb4a262fc2ff95361ff /apps | |
parent | aae1c89456f202312c1bb0fdd785054aa71d295f (diff) | |
download | rockbox-5d066590cc1285f4cbefef60267f0942e58a4cb0.tar.gz rockbox-5d066590cc1285f4cbefef60267f0942e58a4cb0.zip |
APE codec: Assembler optimised predictor for coldfire. Heavily based on the arm version atm, instruction reordering will probably allow for a bit more speedup soon. Speedup: -c1000: 177% -> 210%, -c2000: 135% -> 147%, -c3000: 97% -> 103%.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15211 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r-- | apps/codecs/demac/libdemac/SOURCES | 2 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/predictor-cf.S | 526 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/predictor.c | 10 |
3 files changed, 530 insertions, 8 deletions
diff --git a/apps/codecs/demac/libdemac/SOURCES b/apps/codecs/demac/libdemac/SOURCES index c68fff104e..5a4482376c 100644 --- a/apps/codecs/demac/libdemac/SOURCES +++ b/apps/codecs/demac/libdemac/SOURCES | |||
@@ -2,6 +2,8 @@ crc.c | |||
2 | predictor.c | 2 | predictor.c |
3 | #ifdef CPU_ARM | 3 | #ifdef CPU_ARM |
4 | predictor-arm.S | 4 | predictor-arm.S |
5 | #elif defined CPU_COLDFIRE | ||
6 | predictor-cf.S | ||
5 | #endif | 7 | #endif |
6 | entropy.c | 8 | entropy.c |
7 | decoder.c | 9 | decoder.c |
diff --git a/apps/codecs/demac/libdemac/predictor-cf.S b/apps/codecs/demac/libdemac/predictor-cf.S new file mode 100644 index 0000000000..19873420c3 --- /dev/null +++ b/apps/codecs/demac/libdemac/predictor-cf.S | |||
@@ -0,0 +1,526 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | Coldfire predictor copyright (C) 2007 Jens Arnold | ||
10 | |||
11 | This program is free software; you can redistribute it and/or modify | ||
12 | it under the terms of the GNU General Public License as published by | ||
13 | the Free Software Foundation; either version 2 of the License, or | ||
14 | (at your option) any later version. | ||
15 | |||
16 | This program is distributed in the hope that it will be useful, | ||
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | GNU General Public License for more details. | ||
20 | |||
21 | You should have received a copy of the GNU General Public License | ||
22 | along with this program; if not, write to the Free Software | ||
23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
24 | |||
25 | */ | ||
26 | |||
27 | .text | ||
28 | |||
29 | .align 2 | ||
30 | |||
31 | .global predictor_decode_stereo | ||
32 | .type predictor_decode_stereo,@function | ||
33 | |||
34 | /* NOTE: The following need to be kept in sync with parser.h */ | ||
35 | |||
36 | #define HISTORY_SIZE 512 | ||
37 | |||
38 | #define YDELAYA 200 | ||
39 | #define YDELAYB 168 | ||
40 | #define XDELAYA 136 | ||
41 | #define XDELAYB 104 | ||
42 | #define YADAPTCOEFFSA 72 | ||
43 | #define XADAPTCOEFFSA 56 | ||
44 | #define YADAPTCOEFFSB 40 | ||
45 | #define XADAPTCOEFFSB 20 | ||
46 | |||
47 | /* struct predictor_t members: */ | ||
48 | #define buf 0 /* int32_t* buf */ | ||
49 | |||
50 | #define YlastA 4 /* int32_t YlastA; */ | ||
51 | #define XlastA 8 /* int32_t XlastA; */ | ||
52 | |||
53 | #define YfilterB 12 /* int32_t YfilterB; */ | ||
54 | #define XfilterA 16 /* int32_t XfilterA; */ | ||
55 | |||
56 | #define XfilterB 20 /* int32_t XfilterB; */ | ||
57 | #define YfilterA 24 /* int32_t YfilterA; */ | ||
58 | |||
59 | #define YcoeffsA 28 /* int32_t YcoeffsA[4]; */ | ||
60 | #define XcoeffsA 44 /* int32_t XcoeffsA[4]; */ | ||
61 | #define YcoeffsB 60 /* int32_t YcoeffsB[5]; */ | ||
62 | #define XcoeffsB 80 /* int32_t XcoeffsB[5]; */ | ||
63 | |||
64 | #define historybuffer 100 /* int32_t historybuffer[] */ | ||
65 | |||
66 | |||
67 | | void predictor_decode_stereo(struct predictor_t* p, | ||
68 | | int32_t* decoded0, | ||
69 | | int32_t* decoded1, | ||
70 | | int count) | ||
71 | |||
72 | predictor_decode_stereo: | ||
73 | lea.l (-14*4,%sp), %sp | ||
74 | movem.l %d2-%d7/%a2-%a6, (3*4,%sp) | ||
75 | |||
76 | movem.l (14*4+8,%sp), %d0-%d2 | ||
77 | movem.l %d0-%d2, (%sp) | (%sp) = decoded0 | ||
78 | | (4,%sp) = decoded1 | ||
79 | | (8,%sp) = count | ||
80 | |||
81 | move.l #0, %macsr | signed integer mode | ||
82 | move.l (14*4+4,%sp), %a6 | %a6 = p | ||
83 | move.l (%a6), %a5 | %a5 = p->buf | ||
84 | |||
85 | .loop: | ||
86 | |||
87 | | ***** PREDICTOR Y ***** | ||
88 | |||
89 | | Predictor Y, Filter A | ||
90 | |||
91 | move.l (YlastA,%a6), %d3 | %d3 = p->YlastA | ||
92 | |||
93 | movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3] | ||
94 | | %d1 = p->buf[YDELAYA-2] | ||
95 | | %d2 = p->buf[YDELAYA-1] | ||
96 | |||
97 | sub.l %d3, %d2 | ||
98 | neg.l %d2 | %d2 = %d3 - %d2 | ||
99 | |||
100 | movem.l (YcoeffsA,%a6), %a0-%a3 | %a0 = p->YcoeffsA[0] | ||
101 | | %a1 = p->YcoeffsA[1] | ||
102 | | %a2 = p->YcoeffsA[2] | ||
103 | | %a3 = p->YcoeffsA[3] | ||
104 | |||
105 | mac.l %d3, %a0, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0] | ||
106 | mac.l %d2, %a1, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] | ||
107 | mac.l %d1, %a2, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] | ||
108 | mac.l %d0, %a3, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] | ||
109 | |||
110 | move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2 | ||
111 | move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3 | ||
112 | |||
113 | movclr.l %acc0, %d0 | ||
114 | |||
115 | tst.l %d2 | ||
116 | beq.s 1f | ||
117 | spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 | ||
118 | extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 | ||
119 | or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 | ||
120 | 1: | %d2 = SIGN(%d2) | ||
121 | tst.l %d3 | ||
122 | beq.s 1f | ||
123 | spl.b %d3 | ||
124 | extb.l %d3 | ||
125 | or.l #1, %d3 | ||
126 | 1: | %d3 = SIGN(%d3) | ||
127 | |||
128 | move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2 | ||
129 | move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3 | ||
130 | |||
131 | | NOTE: %d0 now contains predictionA - don't overwrite. | ||
132 | |||
133 | | Predictor Y, Filter B | ||
134 | |||
135 | movem.l (YfilterB,%a6), %d2-%d3 | %d2 = p->YfilterB | ||
136 | | %d3 = p->XfilterA | ||
137 | move.l %d3, (YfilterB,%a6) | p->YfilterB = %d3 | ||
138 | |||
139 | move.l %d2, %d1 | %d1 = %d2 | ||
140 | lsl.l #5, %d2 | %d2 = %d2 * 32 | ||
141 | sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2) | ||
142 | asr.l #5, %d2 | %d2 >>= 5 | ||
143 | sub.l %d2, %d3 | %d3 -= %d2 | ||
144 | |||
145 | movem.l (YDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[YDELAYB-4] | ||
146 | | %d5 = p->buf[YDELAYB-3] | ||
147 | | %d6 = p->buf[YDELAYB-2] | ||
148 | | %d7 = p->buf[YDELAYB-1] | ||
149 | sub.l %d3, %d7 | ||
150 | neg.l %d7 | %d7 = %d3 - %d7 | ||
151 | |||
152 | movem.l (YcoeffsB,%a6), %d2/%a0-%a3 | %d2 = p->YcoeffsB[0] | ||
153 | | %a0 = p->YcoeffsB[1] | ||
154 | | %a1 = p->YcoeffsB[2] | ||
155 | | %a2 = p->YcoeffsB[3] | ||
156 | | %a3 = p->YcoeffsB[4] | ||
157 | |||
158 | mac.l %d3, %d2, %acc0 | %acc0 = p->buf[YDELAYB] * p->YcoeffsB[0] | ||
159 | mac.l %d7, %a0, %acc0 | %acc0 += p->buf[YDELAYB-1] * p->YcoeffsB[1] | ||
160 | mac.l %d6, %a1, %acc0 | %acc0 += p->buf[YDELAYB-2] * p->YcoeffsB[2] | ||
161 | mac.l %d5, %a2, %acc0 | %acc0 += p->buf[YDELAYB-3] * p->YcoeffsB[3] | ||
162 | mac.l %d4, %a3, %acc0 | %acc0 += p->buf[YDELAYB-4] * p->YcoeffsB[4] | ||
163 | |||
164 | move.l %d7, (YDELAYB-4,%a5) | p->buf[YDELAYB-1] = %d7 | ||
165 | move.l %d3, (YDELAYB, %a5) | p->buf[YDELAYB] = %d3 | ||
166 | |||
167 | movclr.l %acc0, %d1 | ||
168 | |||
169 | tst.l %d7 | ||
170 | beq.s 1f | ||
171 | spl.b %d7 | ||
172 | extb.l %d7 | ||
173 | or.l #1, %d7 | ||
174 | 1: | %d7 = SIGN(%d7) | ||
175 | tst.l %d3 | ||
176 | beq.s 1f | ||
177 | spl.b %d3 | ||
178 | extb.l %d3 | ||
179 | or.l #1, %d3 | ||
180 | 1: | %d3 = SIGN(%d3) | ||
181 | |||
182 | move.l %d7, (YADAPTCOEFFSB-4,%a5) | p->buf[YADAPTCOEFFSB-1] = %d7 | ||
183 | move.l %d3, (YADAPTCOEFFSB, %a5) | p->buf[YADAPTCOEFFSB] = %d3 | ||
184 | |||
185 | | %d0 still contains predictionA | ||
186 | | %d1 contains predictionB | ||
187 | |||
188 | | Finish Predictor Y | ||
189 | |||
190 | move.l (%sp), %a4 | %a4 = decoded0 | ||
191 | asr.l #1, %d1 | ||
192 | add.l %d1, %d0 | %d0 += (%d1 >> 1) | ||
193 | move.l (%a4), %d5 | %d5 = *decoded0 | ||
194 | move.l %d5, %d1 | %d1 = %d5 | ||
195 | asr.l #8, %d0 | ||
196 | asr.l #2, %d0 | %d0 >>= 10 | ||
197 | add.l %d0, %d1 | %d1 += %d0 | ||
198 | move.l %d1, (YlastA,%a6) | p->YlastA = %d1 | ||
199 | |||
200 | move.l (YfilterA,%a6), %d6 | %d6 = p->YfilterA | ||
201 | move.l %d6, %d0 | ||
202 | lsl.l #5, %d6 | ||
203 | sub.l %d0, %d6 | %d6 = 31 * %d6 | ||
204 | asr.l #5, %d6 | %d6 >>= 5 | ||
205 | add.l %d6, %d1 | ||
206 | move.l %d1, (YfilterA,%a6) | p->YfilterA = %d1 | ||
207 | |||
208 | | %d1 contains p->YfilterA | ||
209 | | %a4 contains decoded0 | ||
210 | | %d5 contains *decoded0 | ||
211 | |||
212 | | %d2, %a0, %a1, %a2, %a3 contain p->YcoeffsB[0..4] | ||
213 | | %d7, %d3 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB] | ||
214 | |||
215 | move.l %d1, (%a4)+ | *(decoded0++) = %d1 (p->YfilterA) | ||
216 | move.l %a4, (%sp) | save decoded0 | ||
217 | tst.l %d5 | ||
218 | beq.s 2f | ||
219 | |||
220 | movem.l (YADAPTCOEFFSB-16,%a5), %d4-%d6 | d4 = p->buf[YADAPTCOEFFSB-4] | ||
221 | | d5 = p->buf[YADAPTCOEFFSB-3] | ||
222 | | d6 = p->buf[YADAPTCOEFFSB-2] | ||
223 | |||
224 | bmi.s 1f | flags still valid here | ||
225 | |||
226 | | *decoded0 > 0 | ||
227 | |||
228 | sub.l %d3, %d2 | d2 = p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB] | ||
229 | sub.l %d7, %a0 | a0 = p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1] | ||
230 | sub.l %d6, %a1 | a1 = p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2] | ||
231 | sub.l %d5, %a2 | a2 = p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3] | ||
232 | sub.l %d4, %a3 | a3 = p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4] | ||
233 | |||
234 | movem.l %d2/%a0-%a3, (YcoeffsB,%a6) | Save p->YcoeffsB[] | ||
235 | |||
236 | movem.l (YcoeffsA,%a6), %d4-%d7 | d4 = p->YcoeffsA[0] | ||
237 | | d5 = p->YcoeffsA[1] | ||
238 | | d6 = p->YcoeffsA[2] | ||
239 | | d7 = p->YcoeffsA[3] | ||
240 | |||
241 | movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | d2 = p->buf[YADAPTCOEFFSA-3] | ||
242 | | a0 = p->buf[YADAPTCOEFFSA-2] | ||
243 | | a1 = p->buf[YADAPTCOEFFSA-1] | ||
244 | | a2 = p->buf[YADAPTCOEFFSA] | ||
245 | |||
246 | sub.l %a2, %d4 | d4 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] | ||
247 | sub.l %a1, %d5 | d5 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] | ||
248 | sub.l %a0, %d6 | d6 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] | ||
249 | sub.l %d2, %d7 | d7 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] | ||
250 | |||
251 | movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[] | ||
252 | bra.s 2f | ||
253 | |||
254 | 1: | *decoded0 < 0 | ||
255 | |||
256 | add.l %d3, %d2 | d2 = p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB] | ||
257 | add.l %d7, %a0 | a0 = p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1] | ||
258 | add.l %d6, %a1 | a1 = p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2] | ||
259 | add.l %d5, %a2 | a2 = p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3] | ||
260 | add.l %d4, %a3 | a3 = p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4] | ||
261 | |||
262 | movem.l %d2/%a0-%a3, (YcoeffsB,%a6) | Save p->YcoeffsB[] | ||
263 | |||
264 | movem.l (YcoeffsA,%a6), %d4-%d7 | d4 = p->YcoeffsA[0] | ||
265 | | d5 = p->YcoeffsA[1] | ||
266 | | d6 = p->YcoeffsA[2] | ||
267 | | d7 = p->YcoeffsA[3] | ||
268 | |||
269 | movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | d2 = p->buf[YADAPTCOEFFSA-3] | ||
270 | | a0 = p->buf[YADAPTCOEFFSA-2] | ||
271 | | a1 = p->buf[YADAPTCOEFFSA-1] | ||
272 | | a2 = p->buf[YADAPTCOEFFSA] | ||
273 | |||
274 | add.l %a2, %d4 | d4 = p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] | ||
275 | add.l %a1, %d5 | d5 = p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1] | ||
276 | add.l %a0, %d6 | d6 = p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2] | ||
277 | add.l %d2, %d7 | d7 = p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3] | ||
278 | |||
279 | movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[] | ||
280 | |||
281 | 2: | ||
282 | |||
283 | | ***** PREDICTOR X ***** | ||
284 | |||
285 | | Predictor X, Filter A | ||
286 | |||
287 | move.l (XlastA,%a6), %d3 | %d3 = p->XlastA | ||
288 | |||
289 | movem.l (XDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[XDELAYA-3] | ||
290 | | %d1 = p->buf[XDELAYA-2] | ||
291 | | %d2 = p->buf[XDELAYA-1] | ||
292 | |||
293 | sub.l %d3, %d2 | ||
294 | neg.l %d2 | %d2 = %d3 -%d2 | ||
295 | |||
296 | movem.l (XcoeffsA,%a6), %a0-%a3 | %a0 = p->XcoeffsA[0] | ||
297 | | %a1 = p->XcoeffsA[1] | ||
298 | | %a2 = p->XcoeffsA[2] | ||
299 | | %a3 = p->XcoeffsA[3] | ||
300 | |||
301 | mac.l %d3, %a0, %acc0 | %acc0 = p->buf[XDELAYA] * p->XcoeffsA[0] | ||
302 | mac.l %d2, %a1, %acc0 | %acc0 += p->buf[XDELAYA-1] * p->XcoeffsA[1] | ||
303 | mac.l %d1, %a2, %acc0 | %acc0 += p->buf[XDELAYA-2] * p->XcoeffsA[2] | ||
304 | mac.l %d0, %a3, %acc0 | %acc0 += p->buf[XDELAYA-3] * p->XcoeffsA[3] | ||
305 | |||
306 | move.l %d2, (XDELAYA-4,%a5) | p->buf[XDELAYA-1] = %d2 | ||
307 | move.l %d3, (XDELAYA,%a5) | p->buf[XDELAYA] = %d3 | ||
308 | |||
309 | movclr.l %acc0, %d0 | ||
310 | |||
311 | tst.l %d2 | ||
312 | beq.s 1f | ||
313 | spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 | ||
314 | extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 | ||
315 | or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 | ||
316 | 1: | %d2 = SIGN(%d2) | ||
317 | tst.l %d3 | ||
318 | beq.s 1f | ||
319 | spl.b %d3 | ||
320 | extb.l %d3 | ||
321 | or.l #1, %d3 | ||
322 | 1: | %d3 = SIGN(%d3) | ||
323 | |||
324 | move.l %d2, (XADAPTCOEFFSA-4,%a5) | p->buf[XADAPTCOEFFSA-1] = r2 | ||
325 | move.l %d3, (XADAPTCOEFFSA,%a5) | p->buf[XADAPTCOEFFSA] = r3 | ||
326 | |||
327 | | NOTE: %d0 now contains predictionA - don't overwrite. | ||
328 | |||
329 | | Predictor X, Filter B | ||
330 | |||
331 | movem.l (XfilterB,%a6), %d2-%d3 | %d2 = p->XfilterB | ||
332 | | %d3 = p->YfilterA | ||
333 | move.l %d3, (XfilterB,%a6) | p->XfilterB = %d3 | ||
334 | |||
335 | move.l %d2, %d1 | %d1 = %d2 | ||
336 | lsl.l #5, %d2 | %d2 = %d2 * 32 | ||
337 | sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2) | ||
338 | asr.l #5, %d2 | %d2 >>= 5 | ||
339 | sub.l %d2, %d3 | %d3 -= %d2 | ||
340 | |||
341 | movem.l (XDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[XDELAYB-4] | ||
342 | | %d5 = p->buf[XDELAYB-3] | ||
343 | | %d6 = p->buf[XDELAYB-2] | ||
344 | | %d7 = p->buf[XDELAYB-1] | ||
345 | sub.l %d3, %d7 | ||
346 | neg.l %d7 | %d7 = %d3 - %d7 | ||
347 | |||
348 | movem.l (XcoeffsB,%a6), %d2/%a0-%a3 | %d2 = p->XcoeffsB[0] | ||
349 | | %a0 = p->XcoeffsB[1] | ||
350 | | %a1 = p->XcoeffsB[2] | ||
351 | | %a2 = p->XcoeffsB[3] | ||
352 | | %a3 = p->XcoeffsB[4] | ||
353 | |||
354 | mac.l %d3, %d2, %acc0 | %acc0 = p->buf[XDELAYB] * p->XcoeffsB[0] | ||
355 | mac.l %d7, %a0, %acc0 | %acc0 += p->buf[XDELAYB-1] * p->XcoeffsB[1] | ||
356 | mac.l %d6, %a1, %acc0 | %acc0 += p->buf[XDELAYB-2] * p->XcoeffsB[2] | ||
357 | mac.l %d5, %a2, %acc0 | %acc0 += p->buf[XDELAYB-3] * p->XcoeffsB[3] | ||
358 | mac.l %d4, %a3, %acc0 | %acc0 += p->buf[XDELAYB-4] * p->XcoeffsB[4] | ||
359 | |||
360 | move.l %d7, (XDELAYB-4,%a5) | p->buf[XDELAYB-1] = %d7 | ||
361 | move.l %d3, (XDELAYB, %a5) | p->buf[XDELAYB] = %d3 | ||
362 | |||
363 | movclr.l %acc0, %d1 | ||
364 | |||
365 | tst.l %d7 | ||
366 | beq.s 1f | ||
367 | spl.b %d7 | ||
368 | extb.l %d7 | ||
369 | or.l #1, %d7 | ||
370 | 1: | %d7 = SIGN(%d7) | ||
371 | tst.l %d3 | ||
372 | beq.s 1f | ||
373 | spl.b %d3 | ||
374 | extb.l %d3 | ||
375 | or.l #1, %d3 | ||
376 | 1: | %d3 = SIGN(%d3) | ||
377 | |||
378 | move.l %d7, (XADAPTCOEFFSB-4,%a5) | p->buf[XADAPTCOEFFSB-1] = %d7 | ||
379 | move.l %d3, (XADAPTCOEFFSB, %a5) | p->buf[XADAPTCOEFFSB] = %d3 | ||
380 | |||
381 | | %d0 still contains predictionA | ||
382 | | %d1 contains predictionB | ||
383 | |||
384 | | Finish Predictor X | ||
385 | |||
386 | move.l (4,%sp), %a4 | %a4 = decoded1 | ||
387 | asr.l #1, %d1 | ||
388 | add.l %d1, %d0 | %d0 += (%d1 >> 1) | ||
389 | move.l (%a4), %d5 | %d5 = *decoded1 | ||
390 | move.l %d5, %d1 | %d1 = %d5 | ||
391 | asr.l #8, %d0 | ||
392 | asr.l #2, %d0 | %d0 >>= 10 | ||
393 | add.l %d0, %d1 | %d1 += %d0 | ||
394 | move.l %d1, (XlastA,%a6) | p->XlastA = %d1 | ||
395 | |||
396 | move.l (XfilterA,%a6), %d6 | %d6 = p->XfilterA | ||
397 | move.l %d6, %d0 | ||
398 | lsl.l #5, %d6 | ||
399 | sub.l %d0, %d6 | %d6 = 31 * %d6 | ||
400 | asr.l #5, %d6 | %d6 >>= 5 | ||
401 | add.l %d6, %d1 | ||
402 | move.l %d1, (XfilterA,%a6) | p->XfilterA = %d6 | ||
403 | |||
404 | | %d1 contains p->XfilterA | ||
405 | | %a4 contains decoded1 | ||
406 | | %d5 contains *decoded1 | ||
407 | |||
408 | | %d2, %a0, %a1, %a2, %a31 contain p->XcoeffsB[0..4] | ||
409 | | %d7, %d3 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB] | ||
410 | |||
411 | move.l %d1, (%a4)+ | *(decoded1++) = %d1 (p->XfilterA) | ||
412 | move.l %a4, (4,%sp) | save decoded1 | ||
413 | tst.l %d5 | ||
414 | beq.s 2f | ||
415 | |||
416 | movem.l (XADAPTCOEFFSB-16,%a5), %d4-%d6 | d4 = p->buf[XADAPTCOEFFSB-4] | ||
417 | | d5 = p->buf[XADAPTCOEFFSB-3] | ||
418 | | d6 = p->buf[XADAPTCOEFFSB-2] | ||
419 | |||
420 | bmi.s 1f | flags still valid here | ||
421 | |||
422 | | *decoded1 > 0 | ||
423 | |||
424 | sub.l %d3, %d2 | d2 = p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB] | ||
425 | sub.l %d7, %a0 | a0 = p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1] | ||
426 | sub.l %d6, %a1 | a1 = p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2] | ||
427 | sub.l %d5, %a2 | a2 = p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3] | ||
428 | sub.l %d4, %a3 | a3 = p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4] | ||
429 | |||
430 | movem.l %d2/%a0-%a3, (XcoeffsB,%a6) | Save p->XcoeffsB[] | ||
431 | |||
432 | movem.l (XcoeffsA,%a6), %d4-%d7 | d4 = p->XcoeffsA[0] | ||
433 | | d5 = p->XcoeffsA[1] | ||
434 | | d6 = p->XcoeffsA[2] | ||
435 | | d7 = p->XcoeffsA[3] | ||
436 | |||
437 | movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | d2 = p->buf[XADAPTCOEFFSA-3] | ||
438 | | a0 = p->buf[XADAPTCOEFFSA-2] | ||
439 | | a1 = p->buf[XADAPTCOEFFSA-1] | ||
440 | | a2 = p->buf[XADAPTCOEFFSA] | ||
441 | |||
442 | sub.l %a2, %d4 | d4 = p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA] | ||
443 | sub.l %a1, %d5 | d5 = p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1] | ||
444 | sub.l %a0, %d6 | d6 = p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2] | ||
445 | sub.l %d2, %d7 | d7 = p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3] | ||
446 | |||
447 | movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[] | ||
448 | bra.s 2f | ||
449 | |||
450 | 1: | *decoded1 < 0 | ||
451 | |||
452 | add.l %d3, %d2 | d2 = p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB] | ||
453 | add.l %d7, %a0 | a0 = p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1] | ||
454 | add.l %d6, %a1 | a1 = p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2] | ||
455 | add.l %d5, %a2 | a2 = p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3] | ||
456 | add.l %d4, %a3 | a3 = p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4] | ||
457 | |||
458 | movem.l %d2/%a0-%a3, (XcoeffsB,%a6) | Save p->XcoeffsB[] | ||
459 | |||
460 | movem.l (XcoeffsA,%a6), %d4-%d7 | d4 = p->XcoeffsA[0] | ||
461 | | d5 = p->XcoeffsA[1] | ||
462 | | d6 = p->XcoeffsA[2] | ||
463 | | d7 = p->XcoeffsA[3] | ||
464 | |||
465 | movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | d2 = p->buf[XADAPTCOEFFSA-3] | ||
466 | | a0 = p->buf[XADAPTCOEFFSA-2] | ||
467 | | a1 = p->buf[XADAPTCOEFFSA-1] | ||
468 | | a2 = p->buf[XADAPTCOEFFSA] | ||
469 | |||
470 | add.l %a2, %d4 | d4 = p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA] | ||
471 | add.l %a1, %d5 | d5 = p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1] | ||
472 | add.l %a0, %d6 | d6 = p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2] | ||
473 | add.l %d2, %d7 | d7 = p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3] | ||
474 | |||
475 | movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[] | ||
476 | |||
477 | 2: | ||
478 | |||
479 | | ***** COMMON ***** | ||
480 | |||
481 | addq.l #4, %a5 | p->buf++ | ||
482 | |||
483 | lea.l (historybuffer+HISTORY_SIZE*4,%a6), %a3 | %a3 = &p->historybuffer[HISTORY_SIZE] | ||
484 | |||
485 | cmp.l %a3, %a5 | ||
486 | bne.s .endofloop | ||
487 | |||
488 | | The history buffer is full, we need to do a memmove: | ||
489 | |||
490 | lea.l (historybuffer,%a6), %a3 | ||
491 | |||
492 | | dest = %a3 (p->historybuffer) | ||
493 | | src = %a5 (p->buf) | ||
494 | | n = 200 | ||
495 | |||
496 | movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
497 | lea.l (40,%a5), %a5 | ||
498 | movem.l %d0-%d7/%a0-%a1, (%a3) | ||
499 | lea.l (40,%a3), %a3 | ||
500 | movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
501 | lea.l (40,%a5), %a5 | ||
502 | movem.l %d0-%d7/%a0-%a1, (%a3) | ||
503 | lea.l (40,%a3), %a3 | ||
504 | movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
505 | lea.l (40,%a5), %a5 | ||
506 | movem.l %d0-%d7/%a0-%a1, (%a3) | ||
507 | lea.l (40,%a3), %a3 | ||
508 | movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
509 | lea.l (40,%a5), %a5 | ||
510 | movem.l %d0-%d7/%a0-%a1, (%a3) | ||
511 | lea.l (40,%a3), %a3 | ||
512 | movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
513 | lea.l (40,%a5), %a5 | ||
514 | movem.l %d0-%d7/%a0-%a1, (%a3) | ||
515 | lea.l (40,%a3), %a3 | ||
516 | |||
517 | lea.l (historybuffer,%a6), %a5 | p->buf = &p->historybuffer[0] | ||
518 | |||
519 | .endofloop: | ||
520 | subq.l #1, (8,%sp) | decrease loop count | ||
521 | bne.w .loop | ||
522 | |||
523 | move.l %a5, (%a6) | Save value of p->buf | ||
524 | movem.l (3*4,%sp), %d2-%d7/%a2-%a6 | ||
525 | lea.l (14*4,%sp), %sp | ||
526 | rts | ||
diff --git a/apps/codecs/demac/libdemac/predictor.c b/apps/codecs/demac/libdemac/predictor.c index efc334e858..edf8b71575 100644 --- a/apps/codecs/demac/libdemac/predictor.c +++ b/apps/codecs/demac/libdemac/predictor.c | |||
@@ -66,16 +66,10 @@ void init_predictor_decoder(struct predictor_t* p) | |||
66 | p->XlastA = 0; | 66 | p->XlastA = 0; |
67 | } | 67 | } |
68 | 68 | ||
69 | #ifdef CPU_COLDFIRE | 69 | #if !defined(CPU_ARM) && !defined(CPU_COLDFIRE) |
70 | /* Putting this in IRAM makes a small speedup (e.g. 186% -> 187% | ||
71 | realtime for a -c1000 file on Coldfire, but is slower on PP. */ | ||
72 | int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* decoded1, int count) ICODE_ATTR; | ||
73 | #endif | ||
74 | |||
75 | #ifndef CPU_ARM | ||
76 | int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* decoded1, int count) | 70 | int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* decoded1, int count) |
77 | { | 71 | { |
78 | int32_t predictionA, predictionB; | 72 | int32_t predictionA, predictionB; |
79 | 73 | ||
80 | while (count--) | 74 | while (count--) |
81 | { | 75 | { |