summaryrefslogtreecommitdiff
path: root/apps/codecs/demac/libdemac/predictor-arm.S
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2008-11-27 20:52:23 +0000
committerJens Arnold <amiconn@rockbox.org>2008-11-27 20:52:23 +0000
commit6d34e33b94d6074b96917d792dc688c5fbd9356b (patch)
tree0157a2dee3b86024e7d3c02cc3147541c7e775e7 /apps/codecs/demac/libdemac/predictor-arm.S
parent92f34edf702fba5926608a55de68c33c65275420 (diff)
downloadrockbox-6d34e33b94d6074b96917d792dc688c5fbd9356b.tar.gz
rockbox-6d34e33b94d6074b96917d792dc688c5fbd9356b.zip
Speed up the predictor a little by using ldrd/strd on ARMv5+. This required shuffling around the register allocation somewhat. Performance on ARMv4 is unaffected.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19248 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/demac/libdemac/predictor-arm.S')
-rw-r--r--apps/codecs/demac/libdemac/predictor-arm.S304
1 files changed, 163 insertions, 141 deletions
diff --git a/apps/codecs/demac/libdemac/predictor-arm.S b/apps/codecs/demac/libdemac/predictor-arm.S
index ca8a3f4736..6bb3ee1cf0 100644
--- a/apps/codecs/demac/libdemac/predictor-arm.S
+++ b/apps/codecs/demac/libdemac/predictor-arm.S
@@ -61,6 +61,30 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
61 61
62#define historybuffer 100 /* int32_t historybuffer[] */ 62#define historybuffer 100 /* int32_t historybuffer[] */
63 63
64@ Macro for loading 2 registers, for various ARM versions.
65@ Registers must start with an even register, and must be consecutive.
66
67.macro LDR2OFS reg1, reg2, base, offset
68#if ARM_ARCH >= 5
69 ldrd \reg1, [\base, \offset]
70#else
71 add \reg1, \base, \offset
72 ldmia \reg1, {\reg1, \reg2}
73#endif
74.endm
75
76@ Macro for storing 2 registers, for various ARM versions.
77@ Registers must start with an even register, and must be consecutive.
78
79.macro STR2OFS reg1, reg2, base, offset, scratch
80#if ARM_ARCH >= 5
81 strd \reg1, [\base, \offset]
82#else
83 add \scratch, \base, \offset
84 stmia \scratch, {\reg1, \reg2}
85#endif
86.endm
87
64@ Register usage: 88@ Register usage:
65@ 89@
66@ r0-r11 - scratch 90@ r0-r11 - scratch
@@ -88,91 +112,90 @@ loop:
88 112
89@ Predictor Y, Filter A 113@ Predictor Y, Filter A
90 114
91 ldr r10, [r12, #YlastA] @ r10 := p->YlastA 115 ldr r11, [r12, #YlastA] @ r11 := p->YlastA
92 116
93 add r11, r14, #YDELAYA-12 @ r11 := &p->buf[YDELAYA-3] 117 add r2, r14, #YDELAYA-12 @ r2 := &p->buf[YDELAYA-3]
94 ldmia r11, { r2 - r4 } @ r2 := p->buf[YDELAYA-3] 118 ldmia r2, {r2, r3, r10} @ r2 := p->buf[YDELAYA-3]
95 @ r3 := p->buf[YDELAYA-2] 119 @ r3 := p->buf[YDELAYA-2]
96 @ r4 := p->buf[YDELAYA-1] 120 @ r10 := p->buf[YDELAYA-1]
97 121
98 add r11, r12, #YcoeffsA 122 add r6, r12, #YcoeffsA
99 ldmia r11, {r6 - r9} @ r6 := p->YcoeffsA[0] 123 ldmia r6, {r6 - r9} @ r6 := p->YcoeffsA[0]
100 @ r7 := p->YcoeffsA[1] 124 @ r7 := p->YcoeffsA[1]
101 @ r8 := p->YcoeffsA[2] 125 @ r8 := p->YcoeffsA[2]
102 @ r9 := p->YcoeffsA[3] 126 @ r9 := p->YcoeffsA[3]
103 127
104 subs r4, r10, r4 @ r4 := r10 - r4 128 subs r10, r11, r10 @ r10 := r11 - r10
105 129
106 add r11, r14, #YDELAYA-4 @ r11 := &p->buf[YDELAYA-1] 130 STR2OFS r10, r11, r14, #YDELAYA-4, r1 @ r1 -> scratch
107 stmia r11, { r4, r10 } @ p->buf[YDELAYA-1] = r4 131 @ p->buf[YDELAYA-1] = r10
108 @ p->buf[YDELAYA] = r10 132 @ p->buf[YDELAYA] = r11
109 133
110 mul r0, r10, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0] 134 mul r0, r11, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0]
111 mla r0, r4, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] 135 mla r0, r10, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
112 mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] 136 mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
113 mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] 137 mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
114 138
115 @ flags were set above, in the subs instruction 139 @ flags were set above, in the subs instruction
116 mvngt r4, #0
117 movlt r4, #1 @ r4 := SIGN(r4) (see .c for SIGN macro)
118
119 cmp r10, #0
120 mvngt r10, #0 140 mvngt r10, #0
121 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) 141 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
122 142
123 add r1, r14, #YADAPTCOEFFSA-4 143 cmp r11, #0
124 stmia r1, {r4, r10} @ p->buf[YADAPTCOEFFSA-1] := r4 144 mvngt r11, #0
125 @ p->buf[YADAPTCOEFFSA] := r10 145 movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
146
147 STR2OFS r10, r11, r14, #YADAPTCOEFFSA-4, r1 @r1 -> scratch
148 @ p->buf[YADAPTCOEFFSA-1] := r10
149 @ p->buf[YADAPTCOEFFSA] := r11
126 150
127 @ NOTE: r0 now contains predictionA - don't overwrite. 151 @ NOTE: r0 now contains predictionA - don't overwrite.
128 152
129@ Predictor Y, Filter B 153@ Predictor Y, Filter B
130 154
131 add r11, r12, #YfilterB 155 LDR2OFS r6, r7, r12, #YfilterB @ r6 := p->YfilterB
132 ldmia r11, {r6, r7} @ r6 := p->YfilterB
133 @ r7 := p->XfilterA 156 @ r7 := p->XfilterA
134 157
135 add r11, r14, #YDELAYB-16 @ r11 := &p->buf[YDELAYB-4] 158 add r2, r14, #YDELAYB-16 @ r2 := &p->buf[YDELAYB-4]
136 ldmia r11, { r2 - r5 } @ r2 := p->buf[YDELAYB-4] 159 ldmia r2, {r2 - r4, r10} @ r2 := p->buf[YDELAYB-4]
137 @ r3 := p->buf[YDELAYB-3] 160 @ r3 := p->buf[YDELAYB-3]
138 @ r4 := p->buf[YDELAYB-2] 161 @ r4 := p->buf[YDELAYB-2]
139 @ r5 := p->buf[YDELAYB-1] 162 @ r10 := p->buf[YDELAYB-1]
140 163
141 rsb r6, r6, r6, lsl #5 @ r6 := r6 * 32 - r6 ( == r6*31) 164 rsb r6, r6, r6, lsl #5 @ r6 := r6 * 32 - r6 ( == r6*31)
142 sub r10, r7, r6, asr #5 @ r10 (p->buf[YDELAYB]) := r7 - (r6 >> 5) 165 sub r11, r7, r6, asr #5 @ r11 (p->buf[YDELAYB]) := r7 - (r6 >> 5)
143 166
144 str r7, [r12, #YfilterB] @ p->YfilterB := r7 (p->XfilterA) 167 str r7, [r12, #YfilterB] @ p->YfilterB := r7 (p->XfilterA)
145 168
146 add r1, r12, #YcoeffsB 169 add r5, r12, #YcoeffsB
147 ldmia r1, {r6,r7,r8,r9,r11} @ r6 := p->YcoeffsB[0] 170 ldmia r5, {r5 - r9} @ r5 := p->YcoeffsB[0]
148 @ r7 := p->YcoeffsB[1] 171 @ r6 := p->YcoeffsB[1]
149 @ r8 := p->YcoeffsB[2] 172 @ r7 := p->YcoeffsB[2]
150 @ r9 := p->YcoeffsB[3] 173 @ r8 := p->YcoeffsB[3]
151 @ r11 := p->YcoeffsB[4] 174 @ r9 := p->YcoeffsB[4]
152 175
153 subs r5, r10, r5 @ r5 := r10 - r5 176 subs r10, r11, r10 @ r10 := r11 - r10
154 177
155 add r1, r14, #YDELAYB-4 @ r1 := &p->buf[YDELAYB-1] 178 STR2OFS r10, r11, r14, #YDELAYB-4, r1 @ r1 -> scratch
156 stmia r1, { r5, r10 } @ p->buf[YDELAYB-1] = r5 179 @ p->buf[YDELAYB-1] = r10
157 @ p->buf[YDELAYB] = r10 180 @ p->buf[YDELAYB] = r11
158 181
159 mul r1, r10, r6 @ r1 := p->buf[YDELAYB] * p->YcoeffsB[0] 182 mul r1, r11, r5 @ r1 := p->buf[YDELAYB] * p->YcoeffsB[0]
160 mla r1, r5, r7, r1 @ r1 += p->buf[YDELAYB-1] * p->YcoeffsB[1] 183 mla r1, r10, r6, r1 @ r1 += p->buf[YDELAYB-1] * p->YcoeffsB[1]
161 mla r1, r4, r8, r1 @ r1 += p->buf[YDELAYB-2] * p->YcoeffsB[2] 184 mla r1, r4, r7, r1 @ r1 += p->buf[YDELAYB-2] * p->YcoeffsB[2]
162 mla r1, r3, r9, r1 @ r1 += p->buf[YDELAYB-3] * p->YcoeffsB[3] 185 mla r1, r3, r8, r1 @ r1 += p->buf[YDELAYB-3] * p->YcoeffsB[3]
163 mla r1, r2, r11, r1 @ r1 += p->buf[YDELAYB-4] * p->YcoeffsB[4] 186 mla r1, r2, r9, r1 @ r1 += p->buf[YDELAYB-4] * p->YcoeffsB[4]
164 187
165 @ flags were set above, in the subs instruction 188 @ flags were set above, in the subs instruction
166 mvngt r5, #0
167 movlt r5, #1 @ r5 := SIGN(r5) (see .c for SIGN macro)
168
169 cmp r10, #0
170 mvngt r10, #0 189 mvngt r10, #0
171 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) 190 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
191
192 cmp r11, #0
193 mvngt r11, #0
194 movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
172 195
173 add r2, r14, #YADAPTCOEFFSB-4 196 STR2OFS r10, r11, r14, #YADAPTCOEFFSB-4, r2 @ r2 -> scratch
174 stmia r2, {r5, r10} @ p->buf[YADAPTCOEFFSB-1] := r5 197 @ p->buf[YADAPTCOEFFSB-1] := r10
175 @ p->buf[YADAPTCOEFFSB] := r10 198 @ p->buf[YADAPTCOEFFSB] := r11
176 199
177 @ r0 still contains predictionA 200 @ r0 still contains predictionA
178 @ r1 contains predictionB 201 @ r1 contains predictionB
@@ -201,31 +224,31 @@ loop:
201 cmp r3, #0 224 cmp r3, #0
202 beq 3f 225 beq 3f
203 226
204 add r1, r14, #YADAPTCOEFFSB-16 227 add r2, r14, #YADAPTCOEFFSB-16
205 ldmia r1, { r2, r3, r4 } @ r2 := p->buf[YADAPTCOEFFSB-4] 228 ldmia r2, {r2 - r4} @ r2 := p->buf[YADAPTCOEFFSB-4]
206 @ r3 := p->buf[YADAPTCOEFFSB-3] 229 @ r3 := p->buf[YADAPTCOEFFSB-3]
207 @ r4 := p->buf[YADAPTCOEFFSB-2] 230 @ r4 := p->buf[YADAPTCOEFFSB-2]
208 blt 1f 231 blt 1f
209 232
210 @ *decoded0 > 0 233 @ *decoded0 > 0
211 234
212 sub r6, r6, r10 @ r6 := p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB] 235 sub r5, r5, r11 @ r5 := p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB]
213 sub r7, r7, r5 @ r7 := p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1] 236 sub r6, r6, r10 @ r6 := p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1]
214 sub r11, r11, r2 @ r11 := p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4] 237 sub r9, r9, r2 @ r9 := p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4]
215 sub r9, r9, r3 @ r9 := p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3] 238 sub r8, r8, r3 @ r8 := p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3]
216 sub r8, r8, r4 @ r8 := p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2] 239 sub r7, r7, r4 @ r7 := p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2]
217 240
218 add r0, r12, #YcoeffsB 241 add r0, r12, #YcoeffsB
219 stmia r0, {r6,r7,r8,r9,r11} @ Save p->YcoeffsB[] 242 stmia r0, {r5 - r9} @ Save p->YcoeffsB[]
220 243
221 add r1, r12, #YcoeffsA 244 add r1, r12, #YcoeffsA
222 ldmia r1, { r2-r5 } @ r2 := p->YcoeffsA[0] 245 ldmia r1, {r2 - r5} @ r2 := p->YcoeffsA[0]
223 @ r3 := p->YcoeffsA[1] 246 @ r3 := p->YcoeffsA[1]
224 @ r4 := p->YcoeffsA[2] 247 @ r4 := p->YcoeffsA[2]
225 @ r5 := p->YcoeffsA[3] 248 @ r5 := p->YcoeffsA[3]
226 249
227 add r0, r14, #YADAPTCOEFFSA-12 250 add r6, r14, #YADAPTCOEFFSA-12
228 ldmia r0, { r6-r9} @ r6 := p->buf[YADAPTCOEFFSA-3] 251 ldmia r6, {r6 - r9} @ r6 := p->buf[YADAPTCOEFFSA-3]
229 @ r7 := p->buf[YADAPTCOEFFSA-2] 252 @ r7 := p->buf[YADAPTCOEFFSA-2]
230 @ r8 := p->buf[YADAPTCOEFFSA-1] 253 @ r8 := p->buf[YADAPTCOEFFSA-1]
231 @ r9 := p->buf[YADAPTCOEFFSA] 254 @ r9 := p->buf[YADAPTCOEFFSA]
@@ -240,23 +263,23 @@ loop:
240 263
2411: @ *decoded0 < 0 2641: @ *decoded0 < 0
242 265
243 add r6, r6, r10 @ r6 := p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB] 266 add r5, r5, r11 @ r5 := p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB]
244 add r7, r7, r5 @ r7 := p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1] 267 add r6, r6, r10 @ r6 := p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1]
245 add r11, r11, r2 @ r11 := p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4] 268 add r9, r9, r2 @ r9 := p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4]
246 add r9, r9, r3 @ r9 := p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3] 269 add r8, r8, r3 @ r9 := p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3]
247 add r8, r8, r4 @ r8 := p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2] 270 add r7, r7, r4 @ r8 := p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2]
248 271
249 add r0, r12, #YcoeffsB 272 add r0, r12, #YcoeffsB
250 stmia r0, {r6,r7,r8,r9,r11} @ Save p->YcoeffsB[] 273 stmia r0, {r5 - r9} @ Save p->YcoeffsB[]
251 274
252 add r1, r12, #YcoeffsA 275 add r1, r12, #YcoeffsA
253 ldmia r1, { r2-r5 } @ r2 := p->YcoeffsA[0] 276 ldmia r1, {r2 - r5} @ r2 := p->YcoeffsA[0]
254 @ r3 := p->YcoeffsA[1] 277 @ r3 := p->YcoeffsA[1]
255 @ r4 := p->YcoeffsA[2] 278 @ r4 := p->YcoeffsA[2]
256 @ r5 := p->YcoeffsA[3] 279 @ r5 := p->YcoeffsA[3]
257 280
258 add r0, r14, #YADAPTCOEFFSA-12 281 add r6, r14, #YADAPTCOEFFSA-12
259 ldmia r0, { r6-r9} @ r6 := p->buf[YADAPTCOEFFSA-3] 282 ldmia r6, {r6 - r9} @ r6 := p->buf[YADAPTCOEFFSA-3]
260 @ r7 := p->buf[YADAPTCOEFFSA-2] 283 @ r7 := p->buf[YADAPTCOEFFSA-2]
261 @ r8 := p->buf[YADAPTCOEFFSA-1] 284 @ r8 := p->buf[YADAPTCOEFFSA-1]
262 @ r9 := p->buf[YADAPTCOEFFSA] 285 @ r9 := p->buf[YADAPTCOEFFSA]
@@ -267,7 +290,7 @@ loop:
267 add r2, r2, r9 @ r2 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] 290 add r2, r2, r9 @ r2 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
268 291
2692: 2922:
270 stmia r1, {r2-r5} @ Save p->YcoeffsA 293 stmia r1, {r2 - r5} @ Save p->YcoeffsA
271 294
2723: 2953:
273 296
@@ -275,91 +298,90 @@ loop:
275 298
276@ Predictor X, Filter A 299@ Predictor X, Filter A
277 300
278 ldr r10, [r12, #XlastA] @ r10 := p->XlastA 301 ldr r11, [r12, #XlastA] @ r11 := p->XlastA
279 302
280 add r11, r14, #XDELAYA-12 @ r11 := &p->buf[XDELAYA-3] 303 add r2, r14, #XDELAYA-12 @ r2 := &p->buf[XDELAYA-3]
281 ldmia r11, { r2 - r4 } @ r2 := p->buf[XDELAYA-3] 304 ldmia r2, {r2, r3, r10} @ r2 := p->buf[XDELAYA-3]
282 @ r3 := p->buf[XDELAYA-2] 305 @ r3 := p->buf[XDELAYA-2]
283 @ r4 := p->buf[XDELAYA-1] 306 @ r10 := p->buf[XDELAYA-1]
284 307
285 add r11, r12, #XcoeffsA 308 add r6, r12, #XcoeffsA
286 ldmia r11, {r6 - r9} @ r6 := p->XcoeffsA[0] 309 ldmia r6, {r6 - r9} @ r6 := p->XcoeffsA[0]
287 @ r7 := p->XcoeffsA[1] 310 @ r7 := p->XcoeffsA[1]
288 @ r8 := p->XcoeffsA[2] 311 @ r8 := p->XcoeffsA[2]
289 @ r9 := p->XcoeffsA[3] 312 @ r9 := p->XcoeffsA[3]
290 313
291 subs r4, r10, r4 @ r4 := r10 - r4 314 subs r10, r11, r10 @ r10 := r11 - r10
292 315
293 add r11, r14, #XDELAYA-4 @ r11 := &p->buf[XDELAYA-1] 316 STR2OFS r10, r11, r14, #XDELAYA-4, r1 @ r1 -> scratch
294 stmia r11, { r4, r10 } @ p->buf[XDELAYA-1] = r4 317 @ p->buf[XDELAYA-1] = r10
295 @ p->buf[XDELAYA] = r10 318 @ p->buf[XDELAYA] = r11
296 319
297 mul r0, r10, r6 @ r0 := p->buf[XDELAYA] * p->XcoeffsA[0] 320 mul r0, r11, r6 @ r0 := p->buf[XDELAYA] * p->XcoeffsA[0]
298 mla r0, r4, r7, r0 @ r0 += p->buf[XDELAYA-1] * p->XcoeffsA[1] 321 mla r0, r10, r7, r0 @ r0 += p->buf[XDELAYA-1] * p->XcoeffsA[1]
299 mla r0, r3, r8, r0 @ r0 += p->buf[XDELAYA-2] * p->XcoeffsA[2] 322 mla r0, r3, r8, r0 @ r0 += p->buf[XDELAYA-2] * p->XcoeffsA[2]
300 mla r0, r2, r9, r0 @ r0 += p->buf[XDELAYA-3] * p->XcoeffsA[3] 323 mla r0, r2, r9, r0 @ r0 += p->buf[XDELAYA-3] * p->XcoeffsA[3]
301 324
302 @ flags were set above, in the subs instruction 325 @ flags were set above, in the subs instruction
303 mvngt r4, #0
304 movlt r4, #1 @ r4 := SIGN(r4) (see .c for SIGN macro)
305
306 cmp r10, #0
307 mvngt r10, #0 326 mvngt r10, #0
308 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) 327 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
328
329 cmp r11, #0
330 mvngt r11, #0
331 movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
309 332
310 add r1, r14, #XADAPTCOEFFSA-4 333 STR2OFS r10, r11, r14, #XADAPTCOEFFSA-4, r1 @ r1 -> scratch
311 stmia r1, {r4, r10} @ p->buf[XADAPTCOEFFSA-1] := r4 334 @ p->buf[XADAPTCOEFFSA-1] := r10
312 @ p->buf[XADAPTCOEFFSA] := r10 335 @ p->buf[XADAPTCOEFFSA] := r11
313 336
314 @ NOTE: r0 now contains predictionA - don't overwrite. 337 @ NOTE: r0 now contains predictionA - don't overwrite.
315 338
316@ Predictor X, Filter B 339@ Predictor X, Filter B
317 340
318 add r11, r12, #XfilterB 341 LDR2OFS r6, r7, r12, #XfilterB @ r6 := p->XfilterB
319 ldmia r11, {r6, r7} @ r6 := p->XfilterB
320 @ r7 := p->YfilterA 342 @ r7 := p->YfilterA
321 343
322 add r11, r14, #XDELAYB-16 @ r11 := &p->buf[XDELAYB-4] 344 add r2, r14, #XDELAYB-16 @ r2 := &p->buf[XDELAYB-4]
323 ldmia r11, { r2 - r5 } @ r2 := p->buf[XDELAYB-4] 345 ldmia r2, {r2 - r4, r10} @ r2 := p->buf[XDELAYB-4]
324 @ r3 := p->buf[XDELAYB-3] 346 @ r3 := p->buf[XDELAYB-3]
325 @ r4 := p->buf[XDELAYB-2] 347 @ r4 := p->buf[XDELAYB-2]
326 @ r5 := p->buf[XDELAYB-1] 348 @ r10 := p->buf[XDELAYB-1]
327 349
328 rsb r6, r6, r6, lsl #5 @ r6 := r2 * 32 - r6 ( == r6*31) 350 rsb r6, r6, r6, lsl #5 @ r6 := r2 * 32 - r6 ( == r6*31)
329 sub r10, r7, r6, asr #5 @ r10 (p->buf[XDELAYB]) := r7 - (r6 >> 5) 351 sub r11, r7, r6, asr #5 @ r11 (p->buf[XDELAYB]) := r7 - (r6 >> 5)
330 352
331 str r7, [r12, #XfilterB] @ p->XfilterB := r11 (p->YfilterA) 353 str r7, [r12, #XfilterB] @ p->XfilterB := r7 (p->YfilterA)
332 354
333 add r1, r12, #XcoeffsB 355 add r5, r12, #XcoeffsB
334 ldmia r1, {r6,r7,r8,r9,r11} @ r6 := p->XcoeffsB[0] 356 ldmia r5, {r5 - r9} @ r5 := p->XcoeffsB[0]
335 @ r7 := p->XcoeffsB[1] 357 @ r6 := p->XcoeffsB[1]
336 @ r8 := p->XcoeffsB[2] 358 @ r7 := p->XcoeffsB[2]
337 @ r9 := p->XcoeffsB[3] 359 @ r8 := p->XcoeffsB[3]
338 @ r11 := p->XcoeffsB[4] 360 @ r9 := p->XcoeffsB[4]
339 361
340 subs r5, r10, r5 @ r5 := r10 - r5 362 subs r10, r11, r10 @ r10 := r11 - r10
341 363
342 add r1, r14, #XDELAYB-4 @ r1 := &p->buf[XDELAYB-1] 364 STR2OFS r10, r11, r14, #XDELAYB-4, r1 @ r1 -> scratch
343 stmia r1, { r5, r10 } @ p->buf[XDELAYB-1] = r5 365 @ p->buf[XDELAYB-1] = r10
344 @ p->buf[XDELAYB] = r10 366 @ p->buf[XDELAYB] = r11
345 367
346 mul r1, r10, r6 @ r1 := p->buf[XDELAYB] * p->XcoeffsB[0] 368 mul r1, r11, r5 @ r1 := p->buf[XDELAYB] * p->XcoeffsB[0]
347 mla r1, r5, r7, r1 @ r1 += p->buf[XDELAYB-1] * p->XcoeffsB[1] 369 mla r1, r10, r6, r1 @ r1 += p->buf[XDELAYB-1] * p->XcoeffsB[1]
348 mla r1, r4, r8, r1 @ r1 += p->buf[XDELAYB-2] * p->XcoeffsB[2] 370 mla r1, r4, r7, r1 @ r1 += p->buf[XDELAYB-2] * p->XcoeffsB[2]
349 mla r1, r3, r9, r1 @ r1 += p->buf[XDELAYB-3] * p->XcoeffsB[3] 371 mla r1, r3, r8, r1 @ r1 += p->buf[XDELAYB-3] * p->XcoeffsB[3]
350 mla r1, r2, r11, r1 @ r1 += p->buf[XDELAYB-4] * p->XcoeffsB[4] 372 mla r1, r2, r9, r1 @ r1 += p->buf[XDELAYB-4] * p->XcoeffsB[4]
351 373
352 @ flags were set above, in the subs instruction 374 @ flags were set above, in the subs instruction
353 mvngt r5, #0
354 movlt r5, #1 @ r5 := SIGN(r5) (see .c for SIGN macro)
355
356 cmp r10, #0
357 mvngt r10, #0 375 mvngt r10, #0
358 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) 376 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
377
378 cmp r11, #0
379 mvngt r11, #0
380 movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
359 381
360 add r2, r14, #XADAPTCOEFFSB-4 382 STR2OFS r10, r11, r14, #XADAPTCOEFFSB-4, r2 @ r2 -> scratch
361 stmia r2, {r5, r10} @ p->buf[XADAPTCOEFFSB-1] := r5 383 @ p->buf[XADAPTCOEFFSB-1] := r10
362 @ p->buf[XADAPTCOEFFSB] := r10 384 @ p->buf[XADAPTCOEFFSB] := r11
363 385
364 @ r0 still contains predictionA 386 @ r0 still contains predictionA
365 @ r1 contains predictionB 387 @ r1 contains predictionB
@@ -388,31 +410,31 @@ loop:
388 cmp r3, #0 410 cmp r3, #0
389 beq 3f 411 beq 3f
390 412
391 add r1, r14, #XADAPTCOEFFSB-16 413 add r2, r14, #XADAPTCOEFFSB-16
392 ldmia r1, { r2, r3, r4 } @ r2 := p->buf[XADAPTCOEFFSB-4] 414 ldmia r2, {r2 - r4} @ r2 := p->buf[XADAPTCOEFFSB-4]
393 @ r3 := p->buf[XADAPTCOEFFSB-3] 415 @ r3 := p->buf[XADAPTCOEFFSB-3]
394 @ r4 := p->buf[XADAPTCOEFFSB-2] 416 @ r4 := p->buf[XADAPTCOEFFSB-2]
395 blt 1f 417 blt 1f
396 418
397 @ *decoded1 > 0 419 @ *decoded1 > 0
398 420
399 sub r6, r6, r10 @ r6 := p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB] 421 sub r5, r5, r11 @ r5 := p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB]
400 sub r7, r7, r5 @ r7 := p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1] 422 sub r6, r6, r10 @ r6 := p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1]
401 sub r11, r11, r2 @ r11 := p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4] 423 sub r9, r9, r2 @ r9 := p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4]
402 sub r9, r9, r3 @ r9 := p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3] 424 sub r8, r8, r3 @ r8 := p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3]
403 sub r8, r8, r4 @ r8 := p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2] 425 sub r7, r7, r4 @ r7 := p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2]
404 426
405 add r0, r12, #XcoeffsB 427 add r0, r12, #XcoeffsB
406 stmia r0, {r6,r7,r8,r9,r11} @ Save p->XcoeffsB[] 428 stmia r0, {r5 - r9} @ Save p->XcoeffsB[]
407 429
408 add r1, r12, #XcoeffsA 430 add r1, r12, #XcoeffsA
409 ldmia r1, { r2-r5 } @ r2 := p->XcoeffsA[0] 431 ldmia r1, {r2 - r5} @ r2 := p->XcoeffsA[0]
410 @ r3 := p->XcoeffsA[1] 432 @ r3 := p->XcoeffsA[1]
411 @ r4 := p->XcoeffsA[2] 433 @ r4 := p->XcoeffsA[2]
412 @ r5 := p->XcoeffsA[3] 434 @ r5 := p->XcoeffsA[3]
413 435
414 add r0, r14, #XADAPTCOEFFSA-12 436 add r6, r14, #XADAPTCOEFFSA-12
415 ldmia r0, { r6-r9} @ r6 := p->buf[XADAPTCOEFFSA-3] 437 ldmia r6, {r6 - r9} @ r6 := p->buf[XADAPTCOEFFSA-3]
416 @ r7 := p->buf[XADAPTCOEFFSA-2] 438 @ r7 := p->buf[XADAPTCOEFFSA-2]
417 @ r8 := p->buf[XADAPTCOEFFSA-1] 439 @ r8 := p->buf[XADAPTCOEFFSA-1]
418 @ r9 := p->buf[XADAPTCOEFFSA] 440 @ r9 := p->buf[XADAPTCOEFFSA]
@@ -427,23 +449,23 @@ loop:
427 449
4281: @ *decoded1 < 0 4501: @ *decoded1 < 0
429 451
430 add r6, r6, r10 @ r6 := p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB] 452 add r5, r5, r11 @ r5 := p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB]
431 add r7, r7, r5 @ r7 := p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1] 453 add r6, r6, r10 @ r6 := p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1]
432 add r11, r11, r2 @ r11 := p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4] 454 add r9, r9, r2 @ r9 := p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4]
433 add r9, r9, r3 @ r9 := p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3] 455 add r8, r8, r3 @ r8 := p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3]
434 add r8, r8, r4 @ r8 := p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2] 456 add r7, r7, r4 @ r7 := p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2]
435 457
436 add r0, r12, #XcoeffsB 458 add r0, r12, #XcoeffsB
437 stmia r0, {r6,r7,r8,r9,r11} @ Save p->XcoeffsB[] 459 stmia r0, {r5 - r9} @ Save p->XcoeffsB[]
438 460
439 add r1, r12, #XcoeffsA 461 add r1, r12, #XcoeffsA
440 ldmia r1, { r2-r5 } @ r2 := p->XcoeffsA[0] 462 ldmia r1, {r2 - r5} @ r2 := p->XcoeffsA[0]
441 @ r3 := p->XcoeffsA[1] 463 @ r3 := p->XcoeffsA[1]
442 @ r4 := p->XcoeffsA[2] 464 @ r4 := p->XcoeffsA[2]
443 @ r5 := p->XcoeffsA[3] 465 @ r5 := p->XcoeffsA[3]
444 466
445 add r0, r14, #XADAPTCOEFFSA-12 467 add r6, r14, #XADAPTCOEFFSA-12
446 ldmia r0, { r6-r9} @ r6 := p->buf[XADAPTCOEFFSA-3] 468 ldmia r6, {r6 - r9} @ r6 := p->buf[XADAPTCOEFFSA-3]
447 @ r7 := p->buf[XADAPTCOEFFSA-2] 469 @ r7 := p->buf[XADAPTCOEFFSA-2]
448 @ r8 := p->buf[XADAPTCOEFFSA-1] 470 @ r8 := p->buf[XADAPTCOEFFSA-1]
449 @ r9 := p->buf[XADAPTCOEFFSA] 471 @ r9 := p->buf[XADAPTCOEFFSA]
@@ -454,7 +476,7 @@ loop:
454 add r2, r2, r9 @ r2 := p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA] 476 add r2, r2, r9 @ r2 := p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA]
455 477
4562: 4782:
457 stmia r1, {r2-r5} @ Save p->XcoeffsA 479 stmia r1, {r2 - r5} @ Save p->XcoeffsA
458 480
4593: 4813:
460 482
@@ -479,7 +501,7 @@ loop:
479done: 501done:
480 str r14, [r12] @ Save value of p->buf 502 str r14, [r12] @ Save value of p->buf
481 add sp, sp, #12 @ Don't bother restoring r1-r3 503 add sp, sp, #12 @ Don't bother restoring r1-r3
482 ldmia sp!, {r4-r11, pc} 504 ldmia sp!, {r4 - r11, pc}
483 505
484move_hist: 506move_hist:
485 @ dest = r11 (p->historybuffer) 507 @ dest = r11 (p->historybuffer)