summaryrefslogtreecommitdiff
path: root/apps/codecs
diff options
context:
space:
mode:
authorDave Chapman <dave@dchapman.com>2007-06-13 22:02:34 +0000
committerDave Chapman <dave@dchapman.com>2007-06-13 22:02:34 +0000
commit6b713820c180f3796c07c95826e1b1e00fdbca4f (patch)
treeba4308ac951fa4adb4c7185af1f3eb26ac14ed52 /apps/codecs
parentc7f9ca4067f26ba3d0471d50ed3f06b047171b50 (diff)
downloadrockbox-6b713820c180f3796c07c95826e1b1e00fdbca4f.tar.gz
rockbox-6b713820c180f3796c07c95826e1b1e00fdbca4f.zip
ARM assembler predictor decoding function. This increases my -c1000 test track from around 94% realtime on an ipod to around 104% realtime, but yields only a tiny speedup (453% to 455%) on the Gigabeat. Including this optimisation, total decoding time for my 245.70s -c1000 test track on an ipod is 236.06s, with the predictor decoding taking 51.40s of that time - meaning the predictor decoding is only about 22% of the total decoding time.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@13626 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
-rw-r--r--apps/codecs/demac/libdemac/SOURCES3
-rw-r--r--apps/codecs/demac/libdemac/parser.h10
-rw-r--r--apps/codecs/demac/libdemac/predictor-arm.S507
-rw-r--r--apps/codecs/demac/libdemac/predictor.c2
4 files changed, 520 insertions, 2 deletions
diff --git a/apps/codecs/demac/libdemac/SOURCES b/apps/codecs/demac/libdemac/SOURCES
index 76b891a90d..c68fff104e 100644
--- a/apps/codecs/demac/libdemac/SOURCES
+++ b/apps/codecs/demac/libdemac/SOURCES
@@ -1,5 +1,8 @@
1crc.c 1crc.c
2predictor.c 2predictor.c
3#ifdef CPU_ARM
4predictor-arm.S
5#endif
3entropy.c 6entropy.c
4decoder.c 7decoder.c
5parser.c 8parser.c
diff --git a/apps/codecs/demac/libdemac/parser.h b/apps/codecs/demac/libdemac/parser.h
index 301cf4a5e1..4ef0977e6b 100644
--- a/apps/codecs/demac/libdemac/parser.h
+++ b/apps/codecs/demac/libdemac/parser.h
@@ -71,6 +71,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
71/* Total size of all predictor histories - 50 * sizeof(int32_t) */ 71/* Total size of all predictor histories - 50 * sizeof(int32_t) */
72#define PREDICTOR_SIZE 50 72#define PREDICTOR_SIZE 50
73 73
74
75/* NOTE: This struct is used in predictor-arm.S - any updates need to
76 be reflected there. */
77
74struct predictor_t 78struct predictor_t
75{ 79{
76 /* Filter histories */ 80 /* Filter histories */
@@ -79,10 +83,12 @@ struct predictor_t
79 int32_t YlastA; 83 int32_t YlastA;
80 int32_t XlastA; 84 int32_t XlastA;
81 85
82 int32_t YfilterA; 86 /* NOTE: The order of the next four fields is important for
83 int32_t XfilterA; 87 predictor-arm.S */
84 int32_t YfilterB; 88 int32_t YfilterB;
89 int32_t XfilterA;
85 int32_t XfilterB; 90 int32_t XfilterB;
91 int32_t YfilterA;
86 92
87 /* Adaption co-efficients */ 93 /* Adaption co-efficients */
88 int32_t YcoeffsA[4]; 94 int32_t YcoeffsA[4];
diff --git a/apps/codecs/demac/libdemac/predictor-arm.S b/apps/codecs/demac/libdemac/predictor-arm.S
new file mode 100644
index 0000000000..1a04b5d66a
--- /dev/null
+++ b/apps/codecs/demac/libdemac/predictor-arm.S
@@ -0,0 +1,507 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id: predictor.c 13597 2007-06-08 22:35:26Z dave $
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25 .section .text,"ax",%progbits
26
27 .align 2
28
29 .global predictor_decode_stereo
30 .type predictor_decode_stereo,%function
31
32
33/* NOTE: The following need to be kept in sync with parser.h */
34
35#define HISTORY_SIZE 512
36
37#define YDELAYA 200
38#define YDELAYB 168
39#define XDELAYA 136
40#define XDELAYB 104
41#define YADAPTCOEFFSA 72
42#define XADAPTCOEFFSA 56
43#define YADAPTCOEFFSB 40
44#define XADAPTCOEFFSB 20
45
46/* struct predictor_t members: */
47#define buf 0 /* int32_t* buf */
48
49#define YlastA 4 /* int32_t YlastA; */
50#define XlastA 8 /* int32_t XlastA; */
51
52#define YfilterB 12 /* int32_t YfilterB; */
53#define XfilterA 16 /* int32_t XfilterA; */
54
55#define XfilterB 20 /* int32_t XfilterB; */
56#define YfilterA 24 /* int32_t YfilterA; */
57
58#define YcoeffsA 28 /* int32_t YcoeffsA[4]; */
59#define XcoeffsA 44 /* int32_t XcoeffsA[4]; */
60#define YcoeffsB 60 /* int32_t YcoeffsB[5]; */
61#define XcoeffsB 80 /* int32_t XcoeffsB[5]; */
62
63#define historybuffer 100 /* int32_t historybuffer[] */
64
65@ Register usage:
66@
67@ r0-r11 - scratch
68@ r12 - struct predictor_t* p
69@ r14 - int32_t* p->buf
70
71@ void predictor_decode_stereo(struct predictor_t* p,
72@ int32_t* decoded0,
73@ int32_t* decoded1,
74@ int count)
75
76predictor_decode_stereo:
77 stmdb sp!, {r1-r11, lr}
78
79 @ r1 (decoded0) is [sp]
80 @ r2 (decoded1) is [sp, #4]
81 @ r3 (count) is [sp, #8]
82
83 mov r12, r0 @ r12 := p
84 ldr r14, [r0] @ r14 := p->buf
85
86loop:
87
88@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR Y
89
90@ Predictor Y, Filter A
91
92 ldr r10, [r12, #YlastA] @ r10 := p->YlastA
93 add r11, r14, #YDELAYA-12 @ r11 := &p->buf[YDELAYA-3]
94
95 ldmia r11, { r2 - r4 } @ r2 := p->buf[YDELAYA-3]
96 @ r3 := p->buf[YDELAYA-2]
97 @ r4 := p->buf[YDELAYA-1]
98
99 subs r4, r10, r4 @ r4 := r10 - r4
100
101 add r1, r12, #YcoeffsA
102 ldmia r1, {r6 - r9} @ r6 := p->YcoeffsA[0]
103 @ r7 := p->YcoeffsA[1]
104 @ r8 := p->YcoeffsA[2]
105 @ r9 := p->YcoeffsA[3]
106
107 mul r0, r10, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0]
108 mla r0, r4, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
109 mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
110 mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
111
112 add r11, r14, #YDELAYA-4
113 stmia r11, { r4, r10 } @ p->buf[YDELAYA-1] = r4
114 @ p->buf[YDELAYA] = r10
115
116 @ flags were set above, in the subs instruction
117 mvngt r4, #0
118 movlt r4, #1 @ r4 := SIGN(r4) (see .c for SIGN macro)
119
120 cmp r10, #0
121 mvngt r10, #0
122 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
123
124 add r1, r14, #YADAPTCOEFFSA-4
125 stmia r1, {r4, r10} @ p->buf[YADAPTCOEFFSA-1] := r4
126 @ p->buf[YADAPTCOEFFSA] := r10
127
128 @ NOTE: r0 now contains predictionA - don't overwrite.
129
130@ Predictor Y, Filter B
131
132 add r2, r12, #YfilterB
133 ldmia r2, {r2, r11} @ r2 := p->YfilterB
134 @ r11 := p->XfilterA
135
136 rsb r2, r2, r2, lsl #5 @ r2 := r2 * 32 - r2 ( == r2*31)
137 sub r10, r11, r2, asr #5 @ r10 (p->buf[YDELAYB]) := r11 - (r2 >> 5)
138
139 str r11, [r12, #YfilterB] @ p->YfilterB := r11 (p->XfilterA)
140
141 add r11, r14, #YDELAYB-16 @ r11 := &p->buf[YDELAYB-4]
142
143 ldmia r11, { r2 - r5 } @ r2 := p->buf[YDELAYB-4]
144 @ r3 := p->buf[YDELAYB-3]
145 @ r4 := p->buf[YDELAYB-2]
146 @ r5 := p->buf[YDELAYB-1]
147
148 subs r5, r10, r5 @ r5 := r10 - r5
149
150 add r1, r12, #YcoeffsB
151 ldmia r1, {r6,r7,r8,r9,r11} @ r6 := p->YcoeffsB[0]
152 @ r7 := p->YcoeffsB[1]
153 @ r8 := p->YcoeffsB[2]
154 @ r9 := p->YcoeffsB[3]
155 @ r11 := p->YcoeffsB[4]
156
157 mul r1, r10, r6 @ r1 := p->buf[YDELAYB] * p->YcoeffsB[0]
158 mla r1, r5, r7, r1 @ r1 += p->buf[YDELAYB-1] * p->YcoeffsB[1]
159 mla r1, r4, r8, r1 @ r1 += p->buf[YDELAYB-2] * p->YcoeffsB[2]
160 mla r1, r3, r9, r1 @ r1 += p->buf[YDELAYB-3] * p->YcoeffsB[3]
161 mla r1, r2, r11, r1 @ r1 += p->buf[YDELAYB-4] * p->YcoeffsB[4]
162
163 add r2, r14, #YDELAYB-4 @ r2 := &p->buf[YDELAYB-1]
164 stmia r2, { r5, r10 } @ p->buf[YDELAYB-1] = r5
165 @ p->buf[YDELAYB] = r10
166
167 @ flags were set above, in the subs instruction
168 mvngt r5, #0
169 movlt r5, #1 @ r5 := SIGN(r5) (see .c for SIGN macro)
170
171 cmp r10, #0
172 mvngt r10, #0
173 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
174
175 add r2, r14, #YADAPTCOEFFSB-4
176 stmia r2, {r5, r10} @ p->buf[YADAPTCOEFFSB-1] := r5
177 @ p->buf[YADAPTCOEFFSB] := r10
178
179 @ r0 still contains predictionA
180 @ r1 contains predictionB
181
182 @ Finish Predictor Y
183
184 ldr r2, [sp] @ r2 := decoded0
185 add r0, r0, r1, asr #1 @ r0 := r0 + (r1 >> 1)
186 ldr r3, [r2] @ r3 := *decoded0
187 add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10)
188 str r1, [r12, #YlastA] @ p->YlastA := r1
189
190 ldr r4, [r12, #YfilterA] @ r4 := p->YfilterA
191 rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31)
192 add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5)
193 str r1, [r12, #YfilterA] @ p->YfilterA := r1
194
195 @ r1 contains p->YfilterA
196 @ r2 contains decoded0
197 @ r3 contains *decoded0
198
199 @ r6, r7, r8, r9, r11 contain p->YcoeffsB[0..4]
200 @ r5, r10 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB]
201
202 cmp r3, #0
203 stmia r2!, {r1} @ *(decoded0++) := r1 (p->YfilterA)
204 str r2, [sp] @ save decoded0
205 beq 2f
206
207 add r1, r14, #YADAPTCOEFFSB-16
208 ldmia r1, { r2, r3, r4 } @ r2 := p->buf[YADAPTCOEFFSB-4]
209 @ r3 := p->buf[YADAPTCOEFFSB-3]
210 @ r4 := p->buf[YADAPTCOEFFSB-2]
211 blt 1f
212
213 @ *decoded0 > 0
214
215 sub r6, r6, r10 @ r6 := p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB]
216 sub r7, r7, r5 @ r7 := p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1]
217 sub r8, r8, r4 @ r8 := p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2]
218 sub r9, r9, r3 @ r9 := p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3]
219 sub r11, r11, r2 @ r11 := p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4]
220
221 add r0, r12, #YcoeffsB
222 stmia r0, {r6,r7,r8,r9,r11} @ Save p->YcoeffsB[]
223
224 add r1, r12, #YcoeffsA
225 ldmia r1, { r2-r5 } @ r2 := p->YcoeffsA[0]
226 @ r3 := p->YcoeffsA[1]
227 @ r4 := p->YcoeffsA[2]
228 @ r5 := p->YcoeffsA[3]
229
230 add r0, r14, #YADAPTCOEFFSA-12
231 ldmia r0, { r6-r9} @ r6 := p->buf[YADAPTCOEFFSA-3]
232 @ r7 := p->buf[YADAPTCOEFFSA-2]
233 @ r8 := p->buf[YADAPTCOEFFSA-1]
234 @ r9 := p->buf[YADAPTCOEFFSA]
235
236 sub r2, r2, r9 @ r2 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
237 sub r3, r3, r8 @ r3 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
238 sub r4, r4, r7 @ r4 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
239 sub r5, r5, r6 @ r5 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
240
241 stmia r1, {r2-r5} @ Save p->YcoeffsA
242 b 2f
243
244
2451: @ *decoded0 < 0
246
247 add r6, r6, r10 @ r6 := p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB]
248 add r7, r7, r5 @ r7 := p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1]
249 add r8, r8, r4 @ r8 := p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2]
250 add r9, r9, r3 @ r9 := p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3]
251 add r11, r11, r2 @ r11 := p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4]
252
253 add r0, r12, #YcoeffsB
254 stmia r0, {r6,r7,r8,r9,r11} @ Save p->YcoeffsB[]
255
256 add r1, r12, #YcoeffsA
257 ldmia r1, { r2-r5 } @ r2 := p->YcoeffsA[0]
258 @ r3 := p->YcoeffsA[1]
259 @ r4 := p->YcoeffsA[2]
260 @ r5 := p->YcoeffsA[3]
261
262 add r0, r14, #YADAPTCOEFFSA-12
263 ldmia r0, { r6-r9} @ r6 := p->buf[YADAPTCOEFFSA-3]
264 @ r7 := p->buf[YADAPTCOEFFSA-2]
265 @ r8 := p->buf[YADAPTCOEFFSA-1]
266 @ r9 := p->buf[YADAPTCOEFFSA]
267
268 add r2, r2, r9 @ r2 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
269 add r3, r3, r8 @ r3 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
270 add r4, r4, r7 @ r4 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
271 add r5, r5, r6 @ r5 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
272
273 stmia r1, {r2-r5} @ Save p->YcoeffsA
274
2752:
276
277@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR X
278
279@ Predictor X, Filter A
280
281 ldr r10, [r12, #XlastA] @ r10 := p->XlastA
282 add r11, r14, #XDELAYA-12 @ r11 := &p->buf[XDELAYA-3]
283
284 ldmia r11, { r2 - r4 } @ r2 := p->buf[XDELAYA-3]
285 @ r3 := p->buf[XDELAYA-2]
286 @ r4 := p->buf[XDELAYA-1]
287
288 subs r4, r10, r4 @ r4 := r10 - r4
289
290 add r1, r12, #XcoeffsA
291 ldmia r1, {r6 - r9} @ r6 := p->XcoeffsA[0]
292 @ r7 := p->XcoeffsA[1]
293 @ r8 := p->XcoeffsA[2]
294 @ r9 := p->XcoeffsA[3]
295
296 mul r0, r10, r6 @ r0 := p->buf[XDELAYA] * p->XcoeffsA[0]
297 mla r0, r4, r7, r0 @ r0 += p->buf[XDELAYA-1] * p->XcoeffsA[1]
298 mla r0, r3, r8, r0 @ r0 += p->buf[XDELAYA-2] * p->XcoeffsA[2]
299 mla r0, r2, r9, r0 @ r0 += p->buf[XDELAYA-3] * p->XcoeffsA[3]
300
301 add r11, r14, #XDELAYA-4
302 stmia r11, { r4, r10 } @ p->buf[XDELAYA-1] = r4
303 @ p->buf[XDELAYA] = r10
304
305 @ flags were set above, in the subs instruction
306 mvngt r4, #0
307 movlt r4, #1 @ r4 := SIGN(r4) (see .c for SIGN macro)
308
309 cmp r10, #0
310 mvngt r10, #0
311 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
312
313 add r1, r14, #XADAPTCOEFFSA-4
314 stmia r1, {r4, r10} @ p->buf[XADAPTCOEFFSA-1] := r4
315 @ p->buf[XADAPTCOEFFSA] := r10
316
317 @ NOTE: r0 now contains predictionA - don't overwrite.
318
319@ Predictor X, Filter B
320
321 add r2, r12, #XfilterB
322 ldmia r2, {r2, r11} @ r2 := p->XfilterB
323 @ r11 := p->YfilterA
324
325 rsb r2, r2, r2, lsl #5 @ r2 := r2 * 32 - r2 ( == r2*31)
326 sub r10, r11, r2, asr #5 @ r10 (p->buf[XDELAYB]) := r11 - (r2 >> 5)
327
328 str r11, [r12, #XfilterB] @ p->XfilterB := r11 (p->YfilterA)
329
330 add r11, r14, #XDELAYB-16 @ r11 := &p->buf[XDELAYB-4]
331
332 ldmia r11, { r2 - r5 } @ r2 := p->buf[XDELAYB-4]
333 @ r3 := p->buf[XDELAYB-3]
334 @ r4 := p->buf[XDELAYB-2]
335 @ r5 := p->buf[XDELAYB-1]
336
337 subs r5, r10, r5 @ r5 := r10 - r5
338
339 add r1, r12, #XcoeffsB
340 ldmia r1, {r6,r7,r8,r9,r11} @ r6 := p->XcoeffsB[0]
341 @ r7 := p->XcoeffsB[1]
342 @ r8 := p->XcoeffsB[2]
343 @ r9 := p->XcoeffsB[3]
344 @ r11 := p->XcoeffsB[4]
345
346 mul r1, r10, r6 @ r1 := p->buf[XDELAYB] * p->XcoeffsB[0]
347 mla r1, r5, r7, r1 @ r1 += p->buf[XDELAYB-1] * p->XcoeffsB[1]
348 mla r1, r4, r8, r1 @ r1 += p->buf[XDELAYB-2] * p->XcoeffsB[2]
349 mla r1, r3, r9, r1 @ r1 += p->buf[XDELAYB-3] * p->XcoeffsB[3]
350 mla r1, r2, r11, r1 @ r1 += p->buf[XDELAYB-4] * p->XcoeffsB[4]
351
352 add r2, r14, #XDELAYB-4 @ r2 := &p->buf[XDELAYB-1]
353 stmia r2, { r5, r10 } @ p->buf[XDELAYB-1] = r5
354 @ p->buf[XDELAYB] = r10
355
356 @ flags were set above, in the subs instruction
357 mvngt r5, #0
358 movlt r5, #1 @ r5 := SIGN(r5) (see .c for SIGN macro)
359
360 cmp r10, #0
361 mvngt r10, #0
362 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
363
364 add r2, r14, #XADAPTCOEFFSB-4
365 stmia r2, {r5, r10} @ p->buf[XADAPTCOEFFSB-1] := r5
366 @ p->buf[XADAPTCOEFFSB] := r10
367
368 @ r0 still contains predictionA
369 @ r1 contains predictionB
370
371 @ Finish Predictor X
372
373 ldr r2, [sp, #4] @ r2 := decoded1
374 add r0, r0, r1, asr #1 @ r0 := r0 + (r1 >> 1)
375 ldr r3, [r2] @ r3 := *decoded1
376 add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10)
377 str r1, [r12, #XlastA] @ p->XlastA := r1
378
379 ldr r4, [r12, #XfilterA] @ r4 := p->XfilterA
380 rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31)
381 add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5)
382 str r1, [r12, #XfilterA] @ p->XfilterA := r1
383
384 @ r1 contains p->XfilterA
385 @ r2 contains decoded1
386 @ r3 contains *decoded1
387
388 @ r6, r7, r8, r9, r11 contain p->XcoeffsB[0..4]
389 @ r5, r10 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB]
390
391 cmp r3, #0
392 stmia r2!, {r1} @ *(decoded1++) := r1 (p->XfilterA)
393 str r2, [sp, #4] @ save decoded1
394 beq 2f
395
396 add r1, r14, #XADAPTCOEFFSB-16
397 ldmia r1, { r2, r3, r4 } @ r2 := p->buf[XADAPTCOEFFSB-4]
398 @ r3 := p->buf[XADAPTCOEFFSB-3]
399 @ r4 := p->buf[XADAPTCOEFFSB-2]
400 blt 1f
401
402 @ *decoded1 > 0
403
404 sub r6, r6, r10 @ r6 := p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB]
405 sub r7, r7, r5 @ r7 := p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1]
406 sub r8, r8, r4 @ r8 := p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2]
407 sub r9, r9, r3 @ r9 := p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3]
408 sub r11, r11, r2 @ r11 := p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4]
409
410 add r0, r12, #XcoeffsB
411 stmia r0, {r6,r7,r8,r9,r11} @ Save p->XcoeffsB[]
412
413 add r1, r12, #XcoeffsA
414 ldmia r1, { r2-r5 } @ r2 := p->XcoeffsA[0]
415 @ r3 := p->XcoeffsA[1]
416 @ r4 := p->XcoeffsA[2]
417 @ r5 := p->XcoeffsA[3]
418
419 add r0, r14, #XADAPTCOEFFSA-12
420 ldmia r0, { r6-r9} @ r6 := p->buf[XADAPTCOEFFSA-3]
421 @ r7 := p->buf[XADAPTCOEFFSA-2]
422 @ r8 := p->buf[XADAPTCOEFFSA-1]
423 @ r9 := p->buf[XADAPTCOEFFSA]
424
425 sub r2, r2, r9 @ r2 := p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA]
426 sub r3, r3, r8 @ r3 := p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1]
427 sub r4, r4, r7 @ r4 := p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2]
428 sub r5, r5, r6 @ r5 := p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3]
429
430 stmia r1, {r2-r5} @ Save p->XcoeffsA
431 b 2f
432
433
4341: @ *decoded1 < 0
435
436 add r6, r6, r10 @ r6 := p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB]
437 add r7, r7, r5 @ r7 := p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1]
438 add r8, r8, r4 @ r8 := p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2]
439 add r9, r9, r3 @ r9 := p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3]
440 add r11, r11, r2 @ r11 := p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4]
441
442 add r0, r12, #XcoeffsB
443 stmia r0, {r6,r7,r8,r9,r11} @ Save p->XcoeffsB[]
444
445 add r1, r12, #XcoeffsA
446 ldmia r1, { r2-r5 } @ r2 := p->XcoeffsA[0]
447 @ r3 := p->XcoeffsA[1]
448 @ r4 := p->XcoeffsA[2]
449 @ r5 := p->XcoeffsA[3]
450
451 add r0, r14, #XADAPTCOEFFSA-12
452 ldmia r0, { r6-r9} @ r6 := p->buf[XADAPTCOEFFSA-3]
453 @ r7 := p->buf[XADAPTCOEFFSA-2]
454 @ r8 := p->buf[XADAPTCOEFFSA-1]
455 @ r9 := p->buf[XADAPTCOEFFSA]
456
457 add r2, r2, r9 @ r2 := p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA]
458 add r3, r3, r8 @ r3 := p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1]
459 add r4, r4, r7 @ r4 := p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2]
460 add r5, r5, r6 @ r5 := p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3]
461
462 stmia r1, {r2-r5} @ Save p->XcoeffsA
463
4642:
465
466@@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON
467
468 add r14, r14, #4 @ p->buf++
469
470 add r11, r12, #historybuffer @ r11 := &p->historybuffer[0]
471
472 sub r10, r14, #HISTORY_SIZE*4 @ r10 := p->buf - HISTORY_SIZE
473
474 cmp r10, r11
475 bne endofloop
476
477 @ The history buffer is full, we need to do a memmove:
478
479 @ dest = r11 (p->historybuffer)
480 @ src = r14 (p->buf)
481 @ n = 200
482
483 ldmia r14!, {r0-r9} @ 40 bytes
484 stmia r11!, {r0-r9}
485 ldmia r14!, {r0-r9} @ 40 bytes
486 stmia r11!, {r0-r9}
487 ldmia r14!, {r0-r9} @ 40 bytes
488 stmia r11!, {r0-r9}
489 ldmia r14!, {r0-r9} @ 40 bytes
490 stmia r11!, {r0-r9}
491 ldmia r14!, {r0-r9} @ 40 bytes
492 stmia r11!, {r0-r9}
493
494 add r14, r12, #historybuffer @ p->buf = &p->historybuffer[0]
495
496
497endofloop:
498@ Check loop count
499 ldr r0, [sp, #8]
500 subs r0, r0, #1
501 strne r0, [sp, #8]
502 bne loop
503
504done:
505 str r14, [r12] @ Save value of p->buf
506 add sp, sp, #12 @ Don't bother restoring r1-r3
507 ldmia sp!, {r4-r11, pc}
diff --git a/apps/codecs/demac/libdemac/predictor.c b/apps/codecs/demac/libdemac/predictor.c
index a7210bf014..90f24e416b 100644
--- a/apps/codecs/demac/libdemac/predictor.c
+++ b/apps/codecs/demac/libdemac/predictor.c
@@ -74,6 +74,7 @@ void init_predictor_decoder(struct predictor_t* p)
74int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* decoded1, int count) ICODE_ATTR; 74int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* decoded1, int count) ICODE_ATTR;
75#endif 75#endif
76 76
77#ifndef CPU_ARM
77int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* decoded1, int count) 78int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* decoded1, int count)
78{ 79{
79 int32_t predictionA, predictionB; 80 int32_t predictionA, predictionB;
@@ -208,6 +209,7 @@ int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* d
208 209
209 return 0; 210 return 0;
210} 211}
212#endif
211 213
212int predictor_decode_mono(struct predictor_t* p, int32_t* decoded0, int count) 214int predictor_decode_mono(struct predictor_t* p, int32_t* decoded0, int count)
213{ 215{