diff options
author | Jens Arnold <amiconn@rockbox.org> | 2008-12-02 02:26:04 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2008-12-02 02:26:04 +0000 |
commit | c1cd0469ca9f084b39d747ccca5d64442c3833ca (patch) | |
tree | 976efee8d7131013414583e5bd2ad2fe323c8063 | |
parent | 6c65b357bca384a3d65a6795edc2928b889254ee (diff) | |
download | rockbox-c1cd0469ca9f084b39d747ccca5d64442c3833ca.tar.gz rockbox-c1cd0469ca9f084b39d747ccca5d64442c3833ca.zip |
Implement mono predictor in assembler for coldfire, yielding a ~6% speedup for mono -c1000. Apply ideas gained from it back to the stereo predictor, saving 4 instructions. No speed increase for stereo, probably due to cache aliasing effects. * 80-column police.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19296 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r-- | apps/codecs/demac/libdemac/predictor-cf.S | 435 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/predictor.c | 2 |
2 files changed, 291 insertions, 146 deletions
diff --git a/apps/codecs/demac/libdemac/predictor-cf.S b/apps/codecs/demac/libdemac/predictor-cf.S index cd2e07fd5e..c76d7f629a 100644 --- a/apps/codecs/demac/libdemac/predictor-cf.S +++ b/apps/codecs/demac/libdemac/predictor-cf.S | |||
@@ -25,13 +25,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
25 | */ | 25 | */ |
26 | #include "demac_config.h" | 26 | #include "demac_config.h" |
27 | 27 | ||
28 | .text | ||
29 | |||
30 | .align 2 | ||
31 | |||
32 | .global predictor_decode_stereo | ||
33 | .type predictor_decode_stereo,@function | ||
34 | |||
35 | /* NOTE: The following need to be kept in sync with parser.h */ | 28 | /* NOTE: The following need to be kept in sync with parser.h */ |
36 | 29 | ||
37 | #define YDELAYA 200 | 30 | #define YDELAYA 200 |
@@ -63,6 +56,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
63 | #define historybuffer 100 /* int32_t historybuffer[] */ | 56 | #define historybuffer 100 /* int32_t historybuffer[] */ |
64 | 57 | ||
65 | 58 | ||
59 | .text | ||
60 | |||
61 | .align 2 | ||
62 | |||
63 | .global predictor_decode_stereo | ||
64 | .type predictor_decode_stereo,@function | ||
65 | |||
66 | | void predictor_decode_stereo(struct predictor_t* p, | 66 | | void predictor_decode_stereo(struct predictor_t* p, |
67 | | int32_t* decoded0, | 67 | | int32_t* decoded0, |
68 | | int32_t* decoded1, | 68 | | int32_t* decoded1, |
@@ -92,6 +92,8 @@ predictor_decode_stereo: | |||
92 | | %d1 = p->buf[YDELAYA-2] | 92 | | %d1 = p->buf[YDELAYA-2] |
93 | | %d2 = p->buf[YDELAYA-1] | 93 | | %d2 = p->buf[YDELAYA-1] |
94 | 94 | ||
95 | move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3 | ||
96 | |||
95 | sub.l %d3, %d2 | 97 | sub.l %d3, %d2 |
96 | neg.l %d2 | %d2 = %d3 - %d2 | 98 | neg.l %d2 | %d2 = %d3 - %d2 |
97 | 99 | ||
@@ -102,12 +104,10 @@ predictor_decode_stereo: | |||
102 | | %d6 = p->YcoeffsA[2] | 104 | | %d6 = p->YcoeffsA[2] |
103 | | %d7 = p->YcoeffsA[3] | 105 | | %d7 = p->YcoeffsA[3] |
104 | 106 | ||
105 | mac.l %d3, %d4, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0] | 107 | mac.l %d3, %d4, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0] |
106 | mac.l %d2, %d5, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] | 108 | mac.l %d2, %d5, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] |
107 | mac.l %d1, %d6, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] | 109 | mac.l %d1, %d6, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] |
108 | mac.l %d0, %d7, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] | 110 | mac.l %d0, %d7, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] |
109 | |||
110 | move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3 | ||
111 | 111 | ||
112 | tst.l %d2 | 112 | tst.l %d2 |
113 | beq.s 1f | 113 | beq.s 1f |
@@ -125,10 +125,6 @@ predictor_decode_stereo: | |||
125 | 1: | %d3 = SIGN(%d3) | 125 | 1: | %d3 = SIGN(%d3) |
126 | move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3 | 126 | move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3 |
127 | 127 | ||
128 | movclr.l %acc0, %d0 | ||
129 | |||
130 | | NOTE: %d0 now contains predictionA - don't overwrite. | ||
131 | |||
132 | | Predictor Y, Filter B | 128 | | Predictor Y, Filter B |
133 | 129 | ||
134 | movem.l (YfilterB,%a6), %d2-%d3 | %d2 = p->YfilterB | 130 | movem.l (YfilterB,%a6), %d2-%d3 | %d2 = p->YfilterB |
@@ -156,11 +152,11 @@ predictor_decode_stereo: | |||
156 | | %a1 = p->YcoeffsB[3] | 152 | | %a1 = p->YcoeffsB[3] |
157 | | %a2 = p->YcoeffsB[4] | 153 | | %a2 = p->YcoeffsB[4] |
158 | 154 | ||
159 | mac.l %d3, %d1, %acc0 | %acc0 = p->buf[YDELAYB] * p->YcoeffsB[0] | 155 | mac.l %d3, %d1, %acc1 | %acc1 = p->buf[YDELAYB] * p->YcoeffsB[0] |
160 | mac.l %d7, %d2, %acc0 | %acc0 += p->buf[YDELAYB-1] * p->YcoeffsB[1] | 156 | mac.l %d7, %d2, %acc1 | %acc1 += p->buf[YDELAYB-1] * p->YcoeffsB[1] |
161 | mac.l %d6, %a0, %acc0 | %acc0 += p->buf[YDELAYB-2] * p->YcoeffsB[2] | 157 | mac.l %d6, %a0, %acc1 | %acc1 += p->buf[YDELAYB-2] * p->YcoeffsB[2] |
162 | mac.l %d5, %a1, %acc0 | %acc0 += p->buf[YDELAYB-3] * p->YcoeffsB[3] | 158 | mac.l %d5, %a1, %acc1 | %acc1 += p->buf[YDELAYB-3] * p->YcoeffsB[3] |
163 | mac.l %d4, %a2, %acc0 | %acc0 += p->buf[YDELAYB-4] * p->YcoeffsB[4] | 159 | mac.l %d4, %a2, %acc1 | %acc1 += p->buf[YDELAYB-4] * p->YcoeffsB[4] |
164 | 160 | ||
165 | move.l %d3, (YDELAYB, %a5) | p->buf[YDELAYB] = %d3 | 161 | move.l %d3, (YDELAYB, %a5) | p->buf[YDELAYB] = %d3 |
166 | 162 | ||
@@ -179,38 +175,10 @@ predictor_decode_stereo: | |||
179 | 1: | %d3 = SIGN(%d3) | 175 | 1: | %d3 = SIGN(%d3) |
180 | move.l %d3, (YADAPTCOEFFSB, %a5) | p->buf[YADAPTCOEFFSB] = %d3 | 176 | move.l %d3, (YADAPTCOEFFSB, %a5) | p->buf[YADAPTCOEFFSB] = %d3 |
181 | 177 | ||
182 | movclr.l %acc0, %d4 | ||
183 | |||
184 | | %d0 still contains predictionA | ||
185 | | %d4 contains predictionB | ||
186 | |||
187 | | Finish Predictor Y | ||
188 | |||
189 | asr.l #1, %d4 | ||
190 | add.l %d4, %d0 | %d0 += (%d1 >> 1) | ||
191 | move.l (%a3), %d5 | %d5 = *decoded0 | ||
192 | move.l %d5, %d4 | %d4 = %d5 | ||
193 | asr.l #8, %d0 | ||
194 | asr.l #2, %d0 | %d0 >>= 10 | ||
195 | add.l %d0, %d4 | %d4 += %d0 | ||
196 | move.l %d4, (YlastA,%a6) | p->YlastA = %d4 | ||
197 | |||
198 | move.l (YfilterA,%a6), %d6 | %d6 = p->YfilterA | ||
199 | move.l %d6, %d0 | ||
200 | lsl.l #5, %d6 | ||
201 | sub.l %d0, %d6 | %d6 = 31 * %d6 | ||
202 | asr.l #5, %d6 | %d6 >>= 5 | ||
203 | add.l %d6, %d4 | ||
204 | move.l %d4, (YfilterA,%a6) | p->YfilterA = %d4 | ||
205 | |||
206 | | %d4 contains p->YfilterA | ||
207 | | %d5 contains *decoded0 | ||
208 | |||
209 | | %d1, %d2, %a0, %a1, %a2 contain p->YcoeffsB[0..4] | 178 | | %d1, %d2, %a0, %a1, %a2 contain p->YcoeffsB[0..4] |
210 | | %d7, %d3 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB] | 179 | | %d7, %d3 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB] |
211 | 180 | ||
212 | move.l %d4, (%a3)+ | *(decoded0++) = %d1 (p->YfilterA) | 181 | move.l (%a3), %d0 | %d0 = *decoded0 |
213 | tst.l %d5 | ||
214 | beq.s 3f | 182 | beq.s 3f |
215 | 183 | ||
216 | movem.l (YADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[YADAPTCOEFFSB-4] | 184 | movem.l (YADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[YADAPTCOEFFSB-4] |
@@ -221,11 +189,11 @@ predictor_decode_stereo: | |||
221 | 189 | ||
222 | | *decoded0 > 0 | 190 | | *decoded0 > 0 |
223 | 191 | ||
224 | sub.l %d3, %d1 | %d1 = p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB] | 192 | sub.l %d3, %d1 | %d1 = p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB] |
225 | sub.l %d7, %d2 | %d2 = p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1] | 193 | sub.l %d7, %d2 | %d2 = p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1] |
226 | sub.l %d6, %a0 | %a0 = p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2] | 194 | sub.l %d6, %a0 | %a0 = p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2] |
227 | sub.l %d5, %a1 | %a1 = p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3] | 195 | sub.l %d5, %a1 | %a1 = p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3] |
228 | sub.l %d4, %a2 | %a2 = p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4] | 196 | sub.l %d4, %a2 | %a2 = p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4] |
229 | 197 | ||
230 | movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[] | 198 | movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[] |
231 | 199 | ||
@@ -234,47 +202,69 @@ predictor_decode_stereo: | |||
234 | | %d6 = p->YcoeffsA[2] | 202 | | %d6 = p->YcoeffsA[2] |
235 | | %d7 = p->YcoeffsA[3] | 203 | | %d7 = p->YcoeffsA[3] |
236 | 204 | ||
237 | movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | %d2 = p->buf[YADAPTCOEFFSA-3] | 205 | movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 |
206 | | %d2 = p->buf[YADAPTCOEFFSA-3] | ||
238 | | %a0 = p->buf[YADAPTCOEFFSA-2] | 207 | | %a0 = p->buf[YADAPTCOEFFSA-2] |
239 | | %a1 = p->buf[YADAPTCOEFFSA-1] | 208 | | %a1 = p->buf[YADAPTCOEFFSA-1] |
240 | | %a2 = p->buf[YADAPTCOEFFSA] | 209 | | %a2 = p->buf[YADAPTCOEFFSA] |
241 | 210 | ||
242 | sub.l %a2, %d4 | %d4 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] | 211 | sub.l %a2, %d4 | %d4 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] |
243 | sub.l %a1, %d5 | %d5 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] | 212 | sub.l %a1, %d5 | %d5 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] |
244 | sub.l %a0, %d6 | %d6 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] | 213 | sub.l %a0, %d6 | %d6 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] |
245 | sub.l %d2, %d7 | %d7 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] | 214 | sub.l %d2, %d7 | %d7 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] |
246 | 215 | ||
247 | bra.s 2f | 216 | bra.s 2f |
248 | 217 | ||
249 | 1: | *decoded0 < 0 | 218 | 1: | *decoded0 < 0 |
250 | 219 | ||
251 | add.l %d3, %d1 | %d1 = p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB] | 220 | add.l %d3, %d1 | %d1 = p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB] |
252 | add.l %d7, %d2 | %d2 = p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1] | 221 | add.l %d7, %d2 | %d2 = p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1] |
253 | add.l %d6, %a0 | %a0 = p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2] | 222 | add.l %d6, %a0 | %a0 = p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2] |
254 | add.l %d5, %a1 | %a1 = p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3] | 223 | add.l %d5, %a1 | %a1 = p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3] |
255 | add.l %d4, %a2 | %a2 = p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4] | 224 | add.l %d4, %a2 | %a2 = p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4] |
256 | 225 | ||
257 | movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[] | 226 | movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[] |
258 | 227 | ||
259 | movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0] | 228 | movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0] |
260 | | %d5 = p->YcoeffsA[1] | 229 | | %d5 = p->YcoeffsA[1] |
261 | | %d6 = p->YcoeffsA[2] | 230 | | %d6 = p->YcoeffsA[2] |
262 | | %d7 = p->YcoeffsA[3] | 231 | | %d7 = p->YcoeffsA[3] |
263 | 232 | ||
264 | movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | %d2 = p->buf[YADAPTCOEFFSA-3] | 233 | movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 |
234 | | %d2 = p->buf[YADAPTCOEFFSA-3] | ||
265 | | %a0 = p->buf[YADAPTCOEFFSA-2] | 235 | | %a0 = p->buf[YADAPTCOEFFSA-2] |
266 | | %a1 = p->buf[YADAPTCOEFFSA-1] | 236 | | %a1 = p->buf[YADAPTCOEFFSA-1] |
267 | | %a2 = p->buf[YADAPTCOEFFSA] | 237 | | %a2 = p->buf[YADAPTCOEFFSA] |
268 | 238 | ||
269 | add.l %a2, %d4 | %d4 = p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] | 239 | add.l %a2, %d4 | %d4 = p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] |
270 | add.l %a1, %d5 | %d5 = p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1] | 240 | add.l %a1, %d5 | %d5 = p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1] |
271 | add.l %a0, %d6 | %d6 = p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2] | 241 | add.l %a0, %d6 | %d6 = p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2] |
272 | add.l %d2, %d7 | %d7 = p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3] | 242 | add.l %d2, %d7 | %d7 = p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3] |
273 | 243 | ||
274 | 2: | 244 | 2: |
275 | movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[] | 245 | movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[] |
276 | 246 | ||
277 | 3: | 247 | 3: |
248 | | Finish Predictor Y | ||
249 | |||
250 | movclr.l %acc0, %d1 | %d1 = predictionA | ||
251 | movclr.l %acc1, %d2 | %d2 = predictionB | ||
252 | asr.l #1, %d2 | ||
253 | add.l %d2, %d1 | %d1 += (%d2 >> 1) | ||
254 | asr.l #8, %d1 | ||
255 | asr.l #2, %d1 | %d1 >>= 10 | ||
256 | add.l %d0, %d1 | %d1 += %d0 | ||
257 | move.l %d1, (YlastA,%a6) | p->YlastA = %d1 | ||
258 | |||
259 | move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA | ||
260 | move.l %d2, %d0 | ||
261 | lsl.l #5, %d2 | ||
262 | sub.l %d0, %d2 | %d2 = 31 * %d2 | ||
263 | asr.l #5, %d2 | %d2 >>= 5 | ||
264 | add.l %d1, %d2 | ||
265 | move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2 | ||
266 | |||
267 | | *decoded0 stored 2 instructions down, avoiding pipeline stall | ||
278 | 268 | ||
279 | | ***** PREDICTOR X ***** | 269 | | ***** PREDICTOR X ***** |
280 | 270 | ||
@@ -282,11 +272,15 @@ predictor_decode_stereo: | |||
282 | 272 | ||
283 | move.l (XlastA,%a6), %d3 | %d3 = p->XlastA | 273 | move.l (XlastA,%a6), %d3 | %d3 = p->XlastA |
284 | 274 | ||
275 | move.l %d2, (%a3)+ | *(decoded0++) = %d2 (p->YfilterA) | ||
276 | |||
285 | movem.l (XDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[XDELAYA-3] | 277 | movem.l (XDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[XDELAYA-3] |
286 | | %d1 = p->buf[XDELAYA-2] | 278 | | %d1 = p->buf[XDELAYA-2] |
287 | | %d2 = p->buf[XDELAYA-1] | 279 | | %d2 = p->buf[XDELAYA-1] |
288 | 280 | ||
289 | sub.l %d3, %d2 | 281 | move.l %d3, (XDELAYA,%a5) | p->buf[XDELAYA] = %d3 |
282 | |||
283 | sub.l %d3, %d2 | ||
290 | neg.l %d2 | %d2 = %d3 -%d2 | 284 | neg.l %d2 | %d2 = %d3 -%d2 |
291 | 285 | ||
292 | move.l %d2, (XDELAYA-4,%a5) | p->buf[XDELAYA-1] = %d2 | 286 | move.l %d2, (XDELAYA-4,%a5) | p->buf[XDELAYA-1] = %d2 |
@@ -296,13 +290,11 @@ predictor_decode_stereo: | |||
296 | | %d6 = p->XcoeffsA[2] | 290 | | %d6 = p->XcoeffsA[2] |
297 | | %d7 = p->XcoeffsA[3] | 291 | | %d7 = p->XcoeffsA[3] |
298 | 292 | ||
299 | mac.l %d3, %d4, %acc0 | %acc0 = p->buf[XDELAYA] * p->XcoeffsA[0] | 293 | mac.l %d3, %d4, %acc0 | %acc0 = p->buf[XDELAYA] * p->XcoeffsA[0] |
300 | mac.l %d2, %d5, %acc0 | %acc0 += p->buf[XDELAYA-1] * p->XcoeffsA[1] | 294 | mac.l %d2, %d5, %acc0 | %acc0 += p->buf[XDELAYA-1] * p->XcoeffsA[1] |
301 | mac.l %d1, %d6, %acc0 | %acc0 += p->buf[XDELAYA-2] * p->XcoeffsA[2] | 295 | mac.l %d1, %d6, %acc0 | %acc0 += p->buf[XDELAYA-2] * p->XcoeffsA[2] |
302 | mac.l %d0, %d7, %acc0 | %acc0 += p->buf[XDELAYA-3] * p->XcoeffsA[3] | 296 | mac.l %d0, %d7, %acc0 | %acc0 += p->buf[XDELAYA-3] * p->XcoeffsA[3] |
303 | 297 | ||
304 | move.l %d3, (XDELAYA,%a5) | p->buf[XDELAYA] = %d3 | ||
305 | |||
306 | tst.l %d2 | 298 | tst.l %d2 |
307 | beq.s 1f | 299 | beq.s 1f |
308 | spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 | 300 | spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 |
@@ -319,10 +311,6 @@ predictor_decode_stereo: | |||
319 | 1: | %d3 = SIGN(%d3) | 311 | 1: | %d3 = SIGN(%d3) |
320 | move.l %d3, (XADAPTCOEFFSA,%a5) | p->buf[XADAPTCOEFFSA] = %d3 | 312 | move.l %d3, (XADAPTCOEFFSA,%a5) | p->buf[XADAPTCOEFFSA] = %d3 |
321 | 313 | ||
322 | movclr.l %acc0, %d0 | ||
323 | |||
324 | | NOTE: %d0 now contains predictionA - don't overwrite. | ||
325 | |||
326 | | Predictor X, Filter B | 314 | | Predictor X, Filter B |
327 | 315 | ||
328 | movem.l (XfilterB,%a6), %d2-%d3 | %d2 = p->XfilterB | 316 | movem.l (XfilterB,%a6), %d2-%d3 | %d2 = p->XfilterB |
@@ -350,11 +338,11 @@ predictor_decode_stereo: | |||
350 | | %a1 = p->XcoeffsB[3] | 338 | | %a1 = p->XcoeffsB[3] |
351 | | %a2 = p->XcoeffsB[4] | 339 | | %a2 = p->XcoeffsB[4] |
352 | 340 | ||
353 | mac.l %d3, %d1, %acc0 | %acc0 = p->buf[XDELAYB] * p->XcoeffsB[0] | 341 | mac.l %d3, %d1, %acc1 | %acc1 = p->buf[XDELAYB] * p->XcoeffsB[0] |
354 | mac.l %d7, %d2, %acc0 | %acc0 += p->buf[XDELAYB-1] * p->XcoeffsB[1] | 342 | mac.l %d7, %d2, %acc1 | %acc1 += p->buf[XDELAYB-1] * p->XcoeffsB[1] |
355 | mac.l %d6, %a0, %acc0 | %acc0 += p->buf[XDELAYB-2] * p->XcoeffsB[2] | 343 | mac.l %d6, %a0, %acc1 | %acc1 += p->buf[XDELAYB-2] * p->XcoeffsB[2] |
356 | mac.l %d5, %a1, %acc0 | %acc0 += p->buf[XDELAYB-3] * p->XcoeffsB[3] | 344 | mac.l %d5, %a1, %acc1 | %acc1 += p->buf[XDELAYB-3] * p->XcoeffsB[3] |
357 | mac.l %d4, %a2, %acc0 | %acc0 += p->buf[XDELAYB-4] * p->XcoeffsB[4] | 345 | mac.l %d4, %a2, %acc1 | %acc1 += p->buf[XDELAYB-4] * p->XcoeffsB[4] |
358 | 346 | ||
359 | move.l %d3, (XDELAYB, %a5) | p->buf[XDELAYB] = %d3 | 347 | move.l %d3, (XDELAYB, %a5) | p->buf[XDELAYB] = %d3 |
360 | 348 | ||
@@ -374,38 +362,10 @@ predictor_decode_stereo: | |||
374 | 1: | %d3 = SIGN(%d3) | 362 | 1: | %d3 = SIGN(%d3) |
375 | move.l %d3, (XADAPTCOEFFSB, %a5) | p->buf[XADAPTCOEFFSB] = %d3 | 363 | move.l %d3, (XADAPTCOEFFSB, %a5) | p->buf[XADAPTCOEFFSB] = %d3 |
376 | 364 | ||
377 | movclr.l %acc0, %d4 | ||
378 | |||
379 | | %d0 still contains predictionA | ||
380 | | %d4 contains predictionB | ||
381 | |||
382 | | Finish Predictor X | ||
383 | |||
384 | asr.l #1, %d4 | ||
385 | add.l %d4, %d0 | %d0 += (%d1 >> 1) | ||
386 | move.l (%a4), %d5 | %d5 = *decoded1 | ||
387 | move.l %d5, %d4 | %d4 = %d5 | ||
388 | asr.l #8, %d0 | ||
389 | asr.l #2, %d0 | %d0 >>= 10 | ||
390 | add.l %d0, %d4 | %d4 += %d0 | ||
391 | move.l %d4, (XlastA,%a6) | p->XlastA = %d1 | ||
392 | |||
393 | move.l (XfilterA,%a6), %d6 | %d6 = p->XfilterA | ||
394 | move.l %d6, %d0 | ||
395 | lsl.l #5, %d6 | ||
396 | sub.l %d0, %d6 | %d6 = 31 * %d6 | ||
397 | asr.l #5, %d6 | %d6 >>= 5 | ||
398 | add.l %d6, %d4 | ||
399 | move.l %d4, (XfilterA,%a6) | p->XfilterA = %d6 | ||
400 | |||
401 | | %d4 contains p->XfilterA | ||
402 | | %d5 contains *decoded1 | ||
403 | |||
404 | | %d1, %d2, %a0, %a1, %a2 contain p->XcoeffsB[0..4] | 365 | | %d1, %d2, %a0, %a1, %a2 contain p->XcoeffsB[0..4] |
405 | | %d7, %d3 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB] | 366 | | %d7, %d3 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB] |
406 | 367 | ||
407 | move.l %d4, (%a4)+ | *(decoded1++) = %d1 (p->XfilterA) | 368 | move.l (%a4), %d0 | %d0 = *decoded1 |
408 | tst.l %d5 | ||
409 | beq.s 3f | 369 | beq.s 3f |
410 | 370 | ||
411 | movem.l (XADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[XADAPTCOEFFSB-4] | 371 | movem.l (XADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[XADAPTCOEFFSB-4] |
@@ -416,38 +376,39 @@ predictor_decode_stereo: | |||
416 | 376 | ||
417 | | *decoded1 > 0 | 377 | | *decoded1 > 0 |
418 | 378 | ||
419 | sub.l %d3, %d1 | %d1 = p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB] | 379 | sub.l %d3, %d1 | %d1 = p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB] |
420 | sub.l %d7, %d2 | %d2 = p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1] | 380 | sub.l %d7, %d2 | %d2 = p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1] |
421 | sub.l %d6, %a0 | %a0 = p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2] | 381 | sub.l %d6, %a0 | %a0 = p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2] |
422 | sub.l %d5, %a1 | %a1 = p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3] | 382 | sub.l %d5, %a1 | %a1 = p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3] |
423 | sub.l %d4, %a2 | %a2 = p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4] | 383 | sub.l %d4, %a2 | %a2 = p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4] |
424 | 384 | ||
425 | movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[] | 385 | movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[] |
426 | 386 | ||
427 | movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0] | 387 | movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0] |
428 | | %d5 = p->XcoeffsA[1] | 388 | | %d5 = p->XcoeffsA[1] |
429 | | %d6 = p->XcoeffsA[2] | 389 | | %d6 = p->XcoeffsA[2] |
430 | | %d7 = p->XcoeffsA[3] | 390 | | %d7 = p->XcoeffsA[3] |
431 | 391 | ||
432 | movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | %d2 = p->buf[XADAPTCOEFFSA-3] | 392 | movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 |
393 | | %d2 = p->buf[XADAPTCOEFFSA-3] | ||
433 | | %a0 = p->buf[XADAPTCOEFFSA-2] | 394 | | %a0 = p->buf[XADAPTCOEFFSA-2] |
434 | | %a1 = p->buf[XADAPTCOEFFSA-1] | 395 | | %a1 = p->buf[XADAPTCOEFFSA-1] |
435 | | %a2 = p->buf[XADAPTCOEFFSA] | 396 | | %a2 = p->buf[XADAPTCOEFFSA] |
436 | 397 | ||
437 | sub.l %a2, %d4 | %d4 = p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA] | 398 | sub.l %a2, %d4 | %d4 = p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA] |
438 | sub.l %a1, %d5 | %d5 = p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1] | 399 | sub.l %a1, %d5 | %d5 = p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1] |
439 | sub.l %a0, %d6 | %d6 = p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2] | 400 | sub.l %a0, %d6 | %d6 = p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2] |
440 | sub.l %d2, %d7 | %d7 = p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3] | 401 | sub.l %d2, %d7 | %d7 = p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3] |
441 | 402 | ||
442 | bra.s 2f | 403 | bra.s 2f |
443 | 404 | ||
444 | 1: | *decoded1 < 0 | 405 | 1: | *decoded1 < 0 |
445 | 406 | ||
446 | add.l %d3, %d1 | %d1 = p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB] | 407 | add.l %d3, %d1 | %d1 = p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB] |
447 | add.l %d7, %d2 | %d2 = p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1] | 408 | add.l %d7, %d2 | %d2 = p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1] |
448 | add.l %d6, %a0 | %a0 = p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2] | 409 | add.l %d6, %a0 | %a0 = p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2] |
449 | add.l %d5, %a1 | %a1 = p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3] | 410 | add.l %d5, %a1 | %a1 = p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3] |
450 | add.l %d4, %a2 | %a2 = p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4] | 411 | add.l %d4, %a2 | %a2 = p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4] |
451 | 412 | ||
452 | movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[] | 413 | movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[] |
453 | 414 | ||
@@ -456,31 +417,53 @@ predictor_decode_stereo: | |||
456 | | %d6 = p->XcoeffsA[2] | 417 | | %d6 = p->XcoeffsA[2] |
457 | | %d7 = p->XcoeffsA[3] | 418 | | %d7 = p->XcoeffsA[3] |
458 | 419 | ||
459 | movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | %d2 = p->buf[XADAPTCOEFFSA-3] | 420 | movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 |
421 | | %d2 = p->buf[XADAPTCOEFFSA-3] | ||
460 | | %a0 = p->buf[XADAPTCOEFFSA-2] | 422 | | %a0 = p->buf[XADAPTCOEFFSA-2] |
461 | | %a1 = p->buf[XADAPTCOEFFSA-1] | 423 | | %a1 = p->buf[XADAPTCOEFFSA-1] |
462 | | %a2 = p->buf[XADAPTCOEFFSA] | 424 | | %a2 = p->buf[XADAPTCOEFFSA] |
463 | 425 | ||
464 | add.l %a2, %d4 | %d4 = p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA] | 426 | add.l %a2, %d4 | %d4 = p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA] |
465 | add.l %a1, %d5 | %d5 = p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1] | 427 | add.l %a1, %d5 | %d5 = p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1] |
466 | add.l %a0, %d6 | %d6 = p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2] | 428 | add.l %a0, %d6 | %d6 = p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2] |
467 | add.l %d2, %d7 | %d7 = p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3] | 429 | add.l %d2, %d7 | %d7 = p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3] |
468 | 430 | ||
469 | 2: | 431 | 2: |
470 | movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[] | 432 | movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[] |
471 | 433 | ||
472 | 3: | 434 | 3: |
435 | | Finish Predictor X | ||
436 | |||
437 | movclr.l %acc0, %d1 | %d1 = predictionA | ||
438 | movclr.l %acc1, %d2 | %d2 = predictionB | ||
439 | asr.l #1, %d2 | ||
440 | add.l %d2, %d1 | %d1 += (%d2 >> 1) | ||
441 | asr.l #8, %d1 | ||
442 | asr.l #2, %d1 | %d1 >>= 10 | ||
443 | add.l %d0, %d1 | %d1 += %d0 | ||
444 | move.l %d1, (XlastA,%a6) | p->XlastA = %d1 | ||
445 | |||
446 | move.l (XfilterA,%a6), %d2 | %d2 = p->XfilterA | ||
447 | move.l %d2, %d0 | ||
448 | lsl.l #5, %d2 | ||
449 | sub.l %d0, %d2 | %d2 = 31 * %d2 | ||
450 | asr.l #5, %d2 | %d6 >>= 2 | ||
451 | add.l %d1, %d2 | ||
452 | move.l %d2, (XfilterA,%a6) | p->XfilterA = %d2 | ||
453 | |||
454 | | *decoded1 stored 3 instructions down, avoiding pipeline stall | ||
473 | 455 | ||
474 | | ***** COMMON ***** | 456 | | ***** COMMON ***** |
475 | 457 | ||
476 | addq.l #4, %a5 | p->buf++ | 458 | addq.l #4, %a5 | p->buf++ |
477 | |||
478 | lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a2 | 459 | lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a2 |
479 | | %a2 = &p->historybuffer[PREDICTOR_HISTORY_SIZE] | 460 | | %a2 = &p->historybuffer[PREDICTOR_HISTORY_SIZE] |
480 | 461 | ||
462 | move.l %d2, (%a4)+ | *(decoded1++) = %d2 (p->XfilterA) | ||
463 | |||
481 | cmp.l %a2, %a5 | 464 | cmp.l %a2, %a5 |
482 | beq.s .move_hist | The history buffer is full, we need to do a memmove | 465 | beq.s .move_hist | History buffer is full, we need to do a memmove |
483 | 466 | ||
484 | subq.l #1, (%sp) | decrease loop count | 467 | subq.l #1, (%sp) | decrease loop count |
485 | bne.w .loop | 468 | bne.w .loop |
486 | 469 | ||
@@ -514,3 +497,163 @@ predictor_decode_stereo: | |||
514 | bne.w .loop | 497 | bne.w .loop |
515 | 498 | ||
516 | bra.s .done | 499 | bra.s .done |
500 | .size predictor_decode_stereo, .-predictor_decode_stereo | ||
501 | |||
502 | |||
503 | .global predictor_decode_mono | ||
504 | .type predictor_decode_mono,@function | ||
505 | |||
506 | | void predictor_decode_mono(struct predictor_t* p, | ||
507 | | int32_t* decoded0, | ||
508 | | int count) | ||
509 | |||
510 | predictor_decode_mono: | ||
511 | lea.l (-11*4,%sp), %sp | ||
512 | movem.l %d2-%d7/%a2-%a6, (%sp) | ||
513 | |||
514 | move.l #0, %macsr | signed integer mode | ||
515 | |||
516 | move.l (11*4+4,%sp), %a6 | %a6 = p | ||
517 | move.l (11*4+8,%sp), %a4 | %a4 = decoded0 | ||
518 | move.l (11*4+12,%sp), %d7 | %d7 = count | ||
519 | move.l (%a6), %a5 | %a5 = p->buf | ||
520 | |||
521 | move.l (YlastA,%a6), %d3 | %d3 = p->YlastA | ||
522 | |||
523 | .loopm: | ||
524 | |||
525 | | ***** PREDICTOR ***** | ||
526 | |||
527 | movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3] | ||
528 | | %d1 = p->buf[YDELAYA-2] | ||
529 | | %d2 = p->buf[YDELAYA-1] | ||
530 | |||
531 | move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3 | ||
532 | |||
533 | sub.l %d3, %d2 | ||
534 | neg.l %d2 | %d2 = %d3 - %d2 | ||
535 | |||
536 | move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2 | ||
537 | |||
538 | movem.l (YcoeffsA,%a6), %a0-%a3 | %a0 = p->YcoeffsA[0] | ||
539 | | %a1 = p->YcoeffsA[1] | ||
540 | | %a2 = p->YcoeffsA[2] | ||
541 | | %a3 = p->YcoeffsA[3] | ||
542 | |||
543 | mac.l %d3, %a0, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0] | ||
544 | mac.l %d2, %a1, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] | ||
545 | mac.l %d1, %a2, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] | ||
546 | mac.l %d0, %a3, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] | ||
547 | |||
548 | tst.l %d2 | ||
549 | beq.s 1f | ||
550 | spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 | ||
551 | extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 | ||
552 | or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 | ||
553 | 1: | %d2 = SIGN(%d2) | ||
554 | move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2 | ||
555 | |||
556 | tst.l %d3 | ||
557 | beq.s 1f | ||
558 | spl.b %d3 | ||
559 | extb.l %d3 | ||
560 | or.l #1, %d3 | ||
561 | 1: | %d3 = SIGN(%d3) | ||
562 | move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3 | ||
563 | |||
564 | move.l (%a4), %d0 | %d0 = *decoded0 | ||
565 | beq.s 3f | ||
566 | |||
567 | movem.l (YADAPTCOEFFSA-12,%a5),%d4-%d5 | %d4 = p->buf[YADAPTCOEFFSA-3] | ||
568 | | %d5 = p->buf[YADAPTCOEFFSA-2] | ||
569 | |||
570 | bmi.s 1f | flags still valid here | ||
571 | |||
572 | | *decoded0 > 0 | ||
573 | |||
574 | sub.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] | ||
575 | sub.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] | ||
576 | sub.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] | ||
577 | sub.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] | ||
578 | |||
579 | bra.s 2f | ||
580 | |||
581 | 1: | *decoded0 < 0 | ||
582 | |||
583 | add.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] | ||
584 | add.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] | ||
585 | add.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] | ||
586 | add.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] | ||
587 | |||
588 | 2: | ||
589 | movem.l %a0-%a3, (YcoeffsA,%a6) | save p->YcoeffsA[] | ||
590 | |||
591 | 3: | ||
592 | | Finish Predictor | ||
593 | |||
594 | movclr.l %acc0, %d3 | %d3 = predictionA | ||
595 | asr.l #8, %d3 | ||
596 | asr.l #2, %d3 | %d3 >>= 10 | ||
597 | add.l %d0, %d3 | %d3 += %d0 | ||
598 | |||
599 | move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA | ||
600 | move.l %d2, %d0 | ||
601 | lsl.l #5, %d2 | ||
602 | sub.l %d0, %d2 | %d2 = 31 * %d2 | ||
603 | asr.l #5, %d2 | %d2 >>= 5 | ||
604 | add.l %d3, %d2 | ||
605 | move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2 | ||
606 | |||
607 | | *decoded0 stored 3 instructions down, avoiding pipeline stall | ||
608 | |||
609 | | ***** COMMON ***** | ||
610 | |||
611 | addq.l #4, %a5 | p->buf++ | ||
612 | lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a3 | ||
613 | | %a3 = &p->historybuffer[PREDICTOR_HISTORY_SIZE] | ||
614 | |||
615 | move.l %d2, (%a4)+ | *(decoded0++) = %d2 (p->YfilterA) | ||
616 | |||
617 | cmp.l %a3, %a5 | ||
618 | beq.s .move_histm | History buffer is full, we need to do a memmove | ||
619 | |||
620 | subq.l #1, %d7 | decrease loop count | ||
621 | bne.w .loopm | ||
622 | |||
623 | move.l %d3, (YlastA,%a6) | %d3 = p->YlastA | ||
624 | |||
625 | .donem: | ||
626 | move.l %a5, (%a6) | Save value of p->buf | ||
627 | movem.l (%sp), %d2-%d7/%a2-%a6 | ||
628 | lea.l (11*4,%sp), %sp | ||
629 | rts | ||
630 | |||
631 | .move_histm: | ||
632 | move.l %d3, (YlastA,%a6) | %d3 = p->YlastA | ||
633 | |||
634 | lea.l (historybuffer,%a6), %a3 | ||
635 | |||
636 | | dest = %a3 (p->historybuffer) | ||
637 | | src = %a5 (p->buf) | ||
638 | | n = 200 | ||
639 | |||
640 | movem.l (%a5), %d0-%d6/%a0-%a2 | 40 bytes | ||
641 | movem.l %d0-%d6/%a0-%a2, (%a3) | ||
642 | movem.l (40,%a5), %d0-%d6/%a0-%a2 | 40 bytes | ||
643 | movem.l %d0-%d6/%a0-%a2, (40,%a3) | ||
644 | movem.l (80,%a5), %d0-%d6/%a0-%a2 | 40 bytes | ||
645 | movem.l %d0-%d6/%a0-%a2, (80,%a3) | ||
646 | movem.l (120,%a5), %d0-%d6/%a0-%a2 | 40 bytes | ||
647 | movem.l %d0-%d6/%a0-%a2, (120,%a3) | ||
648 | movem.l (160,%a5), %d0-%d6/%a0-%a2 | 40 bytes | ||
649 | movem.l %d0-%d6/%a0-%a2, (160,%a3) | ||
650 | |||
651 | move.l %a3, %a5 | p->buf = &p->historybuffer[0] | ||
652 | |||
653 | move.l (YlastA,%a6), %d3 | %d3 = p->YlastA | ||
654 | |||
655 | subq.l #1, %d7 | decrease loop count | ||
656 | bne.w .loopm | ||
657 | |||
658 | bra.s .donem | ||
659 | .size predictor_decode_mono, .-predictor_decode_mono | ||
diff --git a/apps/codecs/demac/libdemac/predictor.c b/apps/codecs/demac/libdemac/predictor.c index d4f886fb8c..0d03d1d2fb 100644 --- a/apps/codecs/demac/libdemac/predictor.c +++ b/apps/codecs/demac/libdemac/predictor.c | |||
@@ -211,6 +211,7 @@ void ICODE_ATTR_DEMAC predictor_decode_stereo(struct predictor_t* p, | |||
211 | } | 211 | } |
212 | #endif | 212 | #endif |
213 | 213 | ||
214 | #if !defined(CPU_COLDFIRE) | ||
214 | void ICODE_ATTR_DEMAC predictor_decode_mono(struct predictor_t* p, | 215 | void ICODE_ATTR_DEMAC predictor_decode_mono(struct predictor_t* p, |
215 | int32_t* decoded0, | 216 | int32_t* decoded0, |
216 | int count) | 217 | int count) |
@@ -269,3 +270,4 @@ void ICODE_ATTR_DEMAC predictor_decode_mono(struct predictor_t* p, | |||
269 | 270 | ||
270 | p->YlastA = currentA; | 271 | p->YlastA = currentA; |
271 | } | 272 | } |
273 | #endif | ||