diff options
Diffstat (limited to 'apps/codecs/demac/libdemac')
-rw-r--r-- | apps/codecs/demac/libdemac/predictor-arm.S | 304 |
1 files changed, 163 insertions, 141 deletions
diff --git a/apps/codecs/demac/libdemac/predictor-arm.S b/apps/codecs/demac/libdemac/predictor-arm.S index ca8a3f4736..6bb3ee1cf0 100644 --- a/apps/codecs/demac/libdemac/predictor-arm.S +++ b/apps/codecs/demac/libdemac/predictor-arm.S | |||
@@ -61,6 +61,30 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
61 | 61 | ||
62 | #define historybuffer 100 /* int32_t historybuffer[] */ | 62 | #define historybuffer 100 /* int32_t historybuffer[] */ |
63 | 63 | ||
64 | @ Macro for loading 2 registers, for various ARM versions. | ||
65 | @ Registers must start with an even register, and must be consecutive. | ||
66 | |||
67 | .macro LDR2OFS reg1, reg2, base, offset | ||
68 | #if ARM_ARCH >= 5 | ||
69 | ldrd \reg1, [\base, \offset] | ||
70 | #else | ||
71 | add \reg1, \base, \offset | ||
72 | ldmia \reg1, {\reg1, \reg2} | ||
73 | #endif | ||
74 | .endm | ||
75 | |||
76 | @ Macro for storing 2 registers, for various ARM versions. | ||
77 | @ Registers must start with an even register, and must be consecutive. | ||
78 | |||
79 | .macro STR2OFS reg1, reg2, base, offset, scratch | ||
80 | #if ARM_ARCH >= 5 | ||
81 | strd \reg1, [\base, \offset] | ||
82 | #else | ||
83 | add \scratch, \base, \offset | ||
84 | stmia \scratch, {\reg1, \reg2} | ||
85 | #endif | ||
86 | .endm | ||
87 | |||
64 | @ Register usage: | 88 | @ Register usage: |
65 | @ | 89 | @ |
66 | @ r0-r11 - scratch | 90 | @ r0-r11 - scratch |
@@ -88,91 +112,90 @@ loop: | |||
88 | 112 | ||
89 | @ Predictor Y, Filter A | 113 | @ Predictor Y, Filter A |
90 | 114 | ||
91 | ldr r10, [r12, #YlastA] @ r10 := p->YlastA | 115 | ldr r11, [r12, #YlastA] @ r11 := p->YlastA |
92 | 116 | ||
93 | add r11, r14, #YDELAYA-12 @ r11 := &p->buf[YDELAYA-3] | 117 | add r2, r14, #YDELAYA-12 @ r2 := &p->buf[YDELAYA-3] |
94 | ldmia r11, { r2 - r4 } @ r2 := p->buf[YDELAYA-3] | 118 | ldmia r2, {r2, r3, r10} @ r2 := p->buf[YDELAYA-3] |
95 | @ r3 := p->buf[YDELAYA-2] | 119 | @ r3 := p->buf[YDELAYA-2] |
96 | @ r4 := p->buf[YDELAYA-1] | 120 | @ r10 := p->buf[YDELAYA-1] |
97 | 121 | ||
98 | add r11, r12, #YcoeffsA | 122 | add r6, r12, #YcoeffsA |
99 | ldmia r11, {r6 - r9} @ r6 := p->YcoeffsA[0] | 123 | ldmia r6, {r6 - r9} @ r6 := p->YcoeffsA[0] |
100 | @ r7 := p->YcoeffsA[1] | 124 | @ r7 := p->YcoeffsA[1] |
101 | @ r8 := p->YcoeffsA[2] | 125 | @ r8 := p->YcoeffsA[2] |
102 | @ r9 := p->YcoeffsA[3] | 126 | @ r9 := p->YcoeffsA[3] |
103 | 127 | ||
104 | subs r4, r10, r4 @ r4 := r10 - r4 | 128 | subs r10, r11, r10 @ r10 := r11 - r10 |
105 | 129 | ||
106 | add r11, r14, #YDELAYA-4 @ r11 := &p->buf[YDELAYA-1] | 130 | STR2OFS r10, r11, r14, #YDELAYA-4, r1 @ r1 -> scratch |
107 | stmia r11, { r4, r10 } @ p->buf[YDELAYA-1] = r4 | 131 | @ p->buf[YDELAYA-1] = r10 |
108 | @ p->buf[YDELAYA] = r10 | 132 | @ p->buf[YDELAYA] = r11 |
109 | 133 | ||
110 | mul r0, r10, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0] | 134 | mul r0, r11, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0] |
111 | mla r0, r4, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] | 135 | mla r0, r10, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] |
112 | mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] | 136 | mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] |
113 | mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] | 137 | mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] |
114 | 138 | ||
115 | @ flags were set above, in the subs instruction | 139 | @ flags were set above, in the subs instruction |
116 | mvngt r4, #0 | ||
117 | movlt r4, #1 @ r4 := SIGN(r4) (see .c for SIGN macro) | ||
118 | |||
119 | cmp r10, #0 | ||
120 | mvngt r10, #0 | 140 | mvngt r10, #0 |
121 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) | 141 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) |
122 | 142 | ||
123 | add r1, r14, #YADAPTCOEFFSA-4 | 143 | cmp r11, #0 |
124 | stmia r1, {r4, r10} @ p->buf[YADAPTCOEFFSA-1] := r4 | 144 | mvngt r11, #0 |
125 | @ p->buf[YADAPTCOEFFSA] := r10 | 145 | movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) |
146 | |||
147 | STR2OFS r10, r11, r14, #YADAPTCOEFFSA-4, r1 @r1 -> scratch | ||
148 | @ p->buf[YADAPTCOEFFSA-1] := r10 | ||
149 | @ p->buf[YADAPTCOEFFSA] := r11 | ||
126 | 150 | ||
127 | @ NOTE: r0 now contains predictionA - don't overwrite. | 151 | @ NOTE: r0 now contains predictionA - don't overwrite. |
128 | 152 | ||
129 | @ Predictor Y, Filter B | 153 | @ Predictor Y, Filter B |
130 | 154 | ||
131 | add r11, r12, #YfilterB | 155 | LDR2OFS r6, r7, r12, #YfilterB @ r6 := p->YfilterB |
132 | ldmia r11, {r6, r7} @ r6 := p->YfilterB | ||
133 | @ r7 := p->XfilterA | 156 | @ r7 := p->XfilterA |
134 | 157 | ||
135 | add r11, r14, #YDELAYB-16 @ r11 := &p->buf[YDELAYB-4] | 158 | add r2, r14, #YDELAYB-16 @ r2 := &p->buf[YDELAYB-4] |
136 | ldmia r11, { r2 - r5 } @ r2 := p->buf[YDELAYB-4] | 159 | ldmia r2, {r2 - r4, r10} @ r2 := p->buf[YDELAYB-4] |
137 | @ r3 := p->buf[YDELAYB-3] | 160 | @ r3 := p->buf[YDELAYB-3] |
138 | @ r4 := p->buf[YDELAYB-2] | 161 | @ r4 := p->buf[YDELAYB-2] |
139 | @ r5 := p->buf[YDELAYB-1] | 162 | @ r10 := p->buf[YDELAYB-1] |
140 | 163 | ||
141 | rsb r6, r6, r6, lsl #5 @ r6 := r6 * 32 - r6 ( == r6*31) | 164 | rsb r6, r6, r6, lsl #5 @ r6 := r6 * 32 - r6 ( == r6*31) |
142 | sub r10, r7, r6, asr #5 @ r10 (p->buf[YDELAYB]) := r7 - (r6 >> 5) | 165 | sub r11, r7, r6, asr #5 @ r11 (p->buf[YDELAYB]) := r7 - (r6 >> 5) |
143 | 166 | ||
144 | str r7, [r12, #YfilterB] @ p->YfilterB := r7 (p->XfilterA) | 167 | str r7, [r12, #YfilterB] @ p->YfilterB := r7 (p->XfilterA) |
145 | 168 | ||
146 | add r1, r12, #YcoeffsB | 169 | add r5, r12, #YcoeffsB |
147 | ldmia r1, {r6,r7,r8,r9,r11} @ r6 := p->YcoeffsB[0] | 170 | ldmia r5, {r5 - r9} @ r5 := p->YcoeffsB[0] |
148 | @ r7 := p->YcoeffsB[1] | 171 | @ r6 := p->YcoeffsB[1] |
149 | @ r8 := p->YcoeffsB[2] | 172 | @ r7 := p->YcoeffsB[2] |
150 | @ r9 := p->YcoeffsB[3] | 173 | @ r8 := p->YcoeffsB[3] |
151 | @ r11 := p->YcoeffsB[4] | 174 | @ r9 := p->YcoeffsB[4] |
152 | 175 | ||
153 | subs r5, r10, r5 @ r5 := r10 - r5 | 176 | subs r10, r11, r10 @ r10 := r11 - r10 |
154 | 177 | ||
155 | add r1, r14, #YDELAYB-4 @ r1 := &p->buf[YDELAYB-1] | 178 | STR2OFS r10, r11, r14, #YDELAYB-4, r1 @ r1 -> scratch |
156 | stmia r1, { r5, r10 } @ p->buf[YDELAYB-1] = r5 | 179 | @ p->buf[YDELAYB-1] = r10 |
157 | @ p->buf[YDELAYB] = r10 | 180 | @ p->buf[YDELAYB] = r11 |
158 | 181 | ||
159 | mul r1, r10, r6 @ r1 := p->buf[YDELAYB] * p->YcoeffsB[0] | 182 | mul r1, r11, r5 @ r1 := p->buf[YDELAYB] * p->YcoeffsB[0] |
160 | mla r1, r5, r7, r1 @ r1 += p->buf[YDELAYB-1] * p->YcoeffsB[1] | 183 | mla r1, r10, r6, r1 @ r1 += p->buf[YDELAYB-1] * p->YcoeffsB[1] |
161 | mla r1, r4, r8, r1 @ r1 += p->buf[YDELAYB-2] * p->YcoeffsB[2] | 184 | mla r1, r4, r7, r1 @ r1 += p->buf[YDELAYB-2] * p->YcoeffsB[2] |
162 | mla r1, r3, r9, r1 @ r1 += p->buf[YDELAYB-3] * p->YcoeffsB[3] | 185 | mla r1, r3, r8, r1 @ r1 += p->buf[YDELAYB-3] * p->YcoeffsB[3] |
163 | mla r1, r2, r11, r1 @ r1 += p->buf[YDELAYB-4] * p->YcoeffsB[4] | 186 | mla r1, r2, r9, r1 @ r1 += p->buf[YDELAYB-4] * p->YcoeffsB[4] |
164 | 187 | ||
165 | @ flags were set above, in the subs instruction | 188 | @ flags were set above, in the subs instruction |
166 | mvngt r5, #0 | ||
167 | movlt r5, #1 @ r5 := SIGN(r5) (see .c for SIGN macro) | ||
168 | |||
169 | cmp r10, #0 | ||
170 | mvngt r10, #0 | 189 | mvngt r10, #0 |
171 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) | 190 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) |
191 | |||
192 | cmp r11, #0 | ||
193 | mvngt r11, #0 | ||
194 | movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) | ||
172 | 195 | ||
173 | add r2, r14, #YADAPTCOEFFSB-4 | 196 | STR2OFS r10, r11, r14, #YADAPTCOEFFSB-4, r2 @ r2 -> scratch |
174 | stmia r2, {r5, r10} @ p->buf[YADAPTCOEFFSB-1] := r5 | 197 | @ p->buf[YADAPTCOEFFSB-1] := r10 |
175 | @ p->buf[YADAPTCOEFFSB] := r10 | 198 | @ p->buf[YADAPTCOEFFSB] := r11 |
176 | 199 | ||
177 | @ r0 still contains predictionA | 200 | @ r0 still contains predictionA |
178 | @ r1 contains predictionB | 201 | @ r1 contains predictionB |
@@ -201,31 +224,31 @@ loop: | |||
201 | cmp r3, #0 | 224 | cmp r3, #0 |
202 | beq 3f | 225 | beq 3f |
203 | 226 | ||
204 | add r1, r14, #YADAPTCOEFFSB-16 | 227 | add r2, r14, #YADAPTCOEFFSB-16 |
205 | ldmia r1, { r2, r3, r4 } @ r2 := p->buf[YADAPTCOEFFSB-4] | 228 | ldmia r2, {r2 - r4} @ r2 := p->buf[YADAPTCOEFFSB-4] |
206 | @ r3 := p->buf[YADAPTCOEFFSB-3] | 229 | @ r3 := p->buf[YADAPTCOEFFSB-3] |
207 | @ r4 := p->buf[YADAPTCOEFFSB-2] | 230 | @ r4 := p->buf[YADAPTCOEFFSB-2] |
208 | blt 1f | 231 | blt 1f |
209 | 232 | ||
210 | @ *decoded0 > 0 | 233 | @ *decoded0 > 0 |
211 | 234 | ||
212 | sub r6, r6, r10 @ r6 := p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB] | 235 | sub r5, r5, r11 @ r5 := p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB] |
213 | sub r7, r7, r5 @ r7 := p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1] | 236 | sub r6, r6, r10 @ r6 := p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1] |
214 | sub r11, r11, r2 @ r11 := p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4] | 237 | sub r9, r9, r2 @ r9 := p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4] |
215 | sub r9, r9, r3 @ r9 := p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3] | 238 | sub r8, r8, r3 @ r8 := p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3] |
216 | sub r8, r8, r4 @ r8 := p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2] | 239 | sub r7, r7, r4 @ r7 := p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2] |
217 | 240 | ||
218 | add r0, r12, #YcoeffsB | 241 | add r0, r12, #YcoeffsB |
219 | stmia r0, {r6,r7,r8,r9,r11} @ Save p->YcoeffsB[] | 242 | stmia r0, {r5 - r9} @ Save p->YcoeffsB[] |
220 | 243 | ||
221 | add r1, r12, #YcoeffsA | 244 | add r1, r12, #YcoeffsA |
222 | ldmia r1, { r2-r5 } @ r2 := p->YcoeffsA[0] | 245 | ldmia r1, {r2 - r5} @ r2 := p->YcoeffsA[0] |
223 | @ r3 := p->YcoeffsA[1] | 246 | @ r3 := p->YcoeffsA[1] |
224 | @ r4 := p->YcoeffsA[2] | 247 | @ r4 := p->YcoeffsA[2] |
225 | @ r5 := p->YcoeffsA[3] | 248 | @ r5 := p->YcoeffsA[3] |
226 | 249 | ||
227 | add r0, r14, #YADAPTCOEFFSA-12 | 250 | add r6, r14, #YADAPTCOEFFSA-12 |
228 | ldmia r0, { r6-r9} @ r6 := p->buf[YADAPTCOEFFSA-3] | 251 | ldmia r6, {r6 - r9} @ r6 := p->buf[YADAPTCOEFFSA-3] |
229 | @ r7 := p->buf[YADAPTCOEFFSA-2] | 252 | @ r7 := p->buf[YADAPTCOEFFSA-2] |
230 | @ r8 := p->buf[YADAPTCOEFFSA-1] | 253 | @ r8 := p->buf[YADAPTCOEFFSA-1] |
231 | @ r9 := p->buf[YADAPTCOEFFSA] | 254 | @ r9 := p->buf[YADAPTCOEFFSA] |
@@ -240,23 +263,23 @@ loop: | |||
240 | 263 | ||
241 | 1: @ *decoded0 < 0 | 264 | 1: @ *decoded0 < 0 |
242 | 265 | ||
243 | add r6, r6, r10 @ r6 := p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB] | 266 | add r5, r5, r11 @ r5 := p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB] |
244 | add r7, r7, r5 @ r7 := p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1] | 267 | add r6, r6, r10 @ r6 := p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1] |
245 | add r11, r11, r2 @ r11 := p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4] | 268 | add r9, r9, r2 @ r9 := p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4] |
246 | add r9, r9, r3 @ r9 := p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3] | 269 | add r8, r8, r3 @ r9 := p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3] |
247 | add r8, r8, r4 @ r8 := p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2] | 270 | add r7, r7, r4 @ r8 := p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2] |
248 | 271 | ||
249 | add r0, r12, #YcoeffsB | 272 | add r0, r12, #YcoeffsB |
250 | stmia r0, {r6,r7,r8,r9,r11} @ Save p->YcoeffsB[] | 273 | stmia r0, {r5 - r9} @ Save p->YcoeffsB[] |
251 | 274 | ||
252 | add r1, r12, #YcoeffsA | 275 | add r1, r12, #YcoeffsA |
253 | ldmia r1, { r2-r5 } @ r2 := p->YcoeffsA[0] | 276 | ldmia r1, {r2 - r5} @ r2 := p->YcoeffsA[0] |
254 | @ r3 := p->YcoeffsA[1] | 277 | @ r3 := p->YcoeffsA[1] |
255 | @ r4 := p->YcoeffsA[2] | 278 | @ r4 := p->YcoeffsA[2] |
256 | @ r5 := p->YcoeffsA[3] | 279 | @ r5 := p->YcoeffsA[3] |
257 | 280 | ||
258 | add r0, r14, #YADAPTCOEFFSA-12 | 281 | add r6, r14, #YADAPTCOEFFSA-12 |
259 | ldmia r0, { r6-r9} @ r6 := p->buf[YADAPTCOEFFSA-3] | 282 | ldmia r6, {r6 - r9} @ r6 := p->buf[YADAPTCOEFFSA-3] |
260 | @ r7 := p->buf[YADAPTCOEFFSA-2] | 283 | @ r7 := p->buf[YADAPTCOEFFSA-2] |
261 | @ r8 := p->buf[YADAPTCOEFFSA-1] | 284 | @ r8 := p->buf[YADAPTCOEFFSA-1] |
262 | @ r9 := p->buf[YADAPTCOEFFSA] | 285 | @ r9 := p->buf[YADAPTCOEFFSA] |
@@ -267,7 +290,7 @@ loop: | |||
267 | add r2, r2, r9 @ r2 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] | 290 | add r2, r2, r9 @ r2 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] |
268 | 291 | ||
269 | 2: | 292 | 2: |
270 | stmia r1, {r2-r5} @ Save p->YcoeffsA | 293 | stmia r1, {r2 - r5} @ Save p->YcoeffsA |
271 | 294 | ||
272 | 3: | 295 | 3: |
273 | 296 | ||
@@ -275,91 +298,90 @@ loop: | |||
275 | 298 | ||
276 | @ Predictor X, Filter A | 299 | @ Predictor X, Filter A |
277 | 300 | ||
278 | ldr r10, [r12, #XlastA] @ r10 := p->XlastA | 301 | ldr r11, [r12, #XlastA] @ r11 := p->XlastA |
279 | 302 | ||
280 | add r11, r14, #XDELAYA-12 @ r11 := &p->buf[XDELAYA-3] | 303 | add r2, r14, #XDELAYA-12 @ r2 := &p->buf[XDELAYA-3] |
281 | ldmia r11, { r2 - r4 } @ r2 := p->buf[XDELAYA-3] | 304 | ldmia r2, {r2, r3, r10} @ r2 := p->buf[XDELAYA-3] |
282 | @ r3 := p->buf[XDELAYA-2] | 305 | @ r3 := p->buf[XDELAYA-2] |
283 | @ r4 := p->buf[XDELAYA-1] | 306 | @ r10 := p->buf[XDELAYA-1] |
284 | 307 | ||
285 | add r11, r12, #XcoeffsA | 308 | add r6, r12, #XcoeffsA |
286 | ldmia r11, {r6 - r9} @ r6 := p->XcoeffsA[0] | 309 | ldmia r6, {r6 - r9} @ r6 := p->XcoeffsA[0] |
287 | @ r7 := p->XcoeffsA[1] | 310 | @ r7 := p->XcoeffsA[1] |
288 | @ r8 := p->XcoeffsA[2] | 311 | @ r8 := p->XcoeffsA[2] |
289 | @ r9 := p->XcoeffsA[3] | 312 | @ r9 := p->XcoeffsA[3] |
290 | 313 | ||
291 | subs r4, r10, r4 @ r4 := r10 - r4 | 314 | subs r10, r11, r10 @ r10 := r11 - r10 |
292 | 315 | ||
293 | add r11, r14, #XDELAYA-4 @ r11 := &p->buf[XDELAYA-1] | 316 | STR2OFS r10, r11, r14, #XDELAYA-4, r1 @ r1 -> scratch |
294 | stmia r11, { r4, r10 } @ p->buf[XDELAYA-1] = r4 | 317 | @ p->buf[XDELAYA-1] = r10 |
295 | @ p->buf[XDELAYA] = r10 | 318 | @ p->buf[XDELAYA] = r11 |
296 | 319 | ||
297 | mul r0, r10, r6 @ r0 := p->buf[XDELAYA] * p->XcoeffsA[0] | 320 | mul r0, r11, r6 @ r0 := p->buf[XDELAYA] * p->XcoeffsA[0] |
298 | mla r0, r4, r7, r0 @ r0 += p->buf[XDELAYA-1] * p->XcoeffsA[1] | 321 | mla r0, r10, r7, r0 @ r0 += p->buf[XDELAYA-1] * p->XcoeffsA[1] |
299 | mla r0, r3, r8, r0 @ r0 += p->buf[XDELAYA-2] * p->XcoeffsA[2] | 322 | mla r0, r3, r8, r0 @ r0 += p->buf[XDELAYA-2] * p->XcoeffsA[2] |
300 | mla r0, r2, r9, r0 @ r0 += p->buf[XDELAYA-3] * p->XcoeffsA[3] | 323 | mla r0, r2, r9, r0 @ r0 += p->buf[XDELAYA-3] * p->XcoeffsA[3] |
301 | 324 | ||
302 | @ flags were set above, in the subs instruction | 325 | @ flags were set above, in the subs instruction |
303 | mvngt r4, #0 | ||
304 | movlt r4, #1 @ r4 := SIGN(r4) (see .c for SIGN macro) | ||
305 | |||
306 | cmp r10, #0 | ||
307 | mvngt r10, #0 | 326 | mvngt r10, #0 |
308 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) | 327 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) |
328 | |||
329 | cmp r11, #0 | ||
330 | mvngt r11, #0 | ||
331 | movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) | ||
309 | 332 | ||
310 | add r1, r14, #XADAPTCOEFFSA-4 | 333 | STR2OFS r10, r11, r14, #XADAPTCOEFFSA-4, r1 @ r1 -> scratch |
311 | stmia r1, {r4, r10} @ p->buf[XADAPTCOEFFSA-1] := r4 | 334 | @ p->buf[XADAPTCOEFFSA-1] := r10 |
312 | @ p->buf[XADAPTCOEFFSA] := r10 | 335 | @ p->buf[XADAPTCOEFFSA] := r11 |
313 | 336 | ||
314 | @ NOTE: r0 now contains predictionA - don't overwrite. | 337 | @ NOTE: r0 now contains predictionA - don't overwrite. |
315 | 338 | ||
316 | @ Predictor X, Filter B | 339 | @ Predictor X, Filter B |
317 | 340 | ||
318 | add r11, r12, #XfilterB | 341 | LDR2OFS r6, r7, r12, #XfilterB @ r6 := p->XfilterB |
319 | ldmia r11, {r6, r7} @ r6 := p->XfilterB | ||
320 | @ r7 := p->YfilterA | 342 | @ r7 := p->YfilterA |
321 | 343 | ||
322 | add r11, r14, #XDELAYB-16 @ r11 := &p->buf[XDELAYB-4] | 344 | add r2, r14, #XDELAYB-16 @ r2 := &p->buf[XDELAYB-4] |
323 | ldmia r11, { r2 - r5 } @ r2 := p->buf[XDELAYB-4] | 345 | ldmia r2, {r2 - r4, r10} @ r2 := p->buf[XDELAYB-4] |
324 | @ r3 := p->buf[XDELAYB-3] | 346 | @ r3 := p->buf[XDELAYB-3] |
325 | @ r4 := p->buf[XDELAYB-2] | 347 | @ r4 := p->buf[XDELAYB-2] |
326 | @ r5 := p->buf[XDELAYB-1] | 348 | @ r10 := p->buf[XDELAYB-1] |
327 | 349 | ||
328 | rsb r6, r6, r6, lsl #5 @ r6 := r2 * 32 - r6 ( == r6*31) | 350 | rsb r6, r6, r6, lsl #5 @ r6 := r2 * 32 - r6 ( == r6*31) |
329 | sub r10, r7, r6, asr #5 @ r10 (p->buf[XDELAYB]) := r7 - (r6 >> 5) | 351 | sub r11, r7, r6, asr #5 @ r11 (p->buf[XDELAYB]) := r7 - (r6 >> 5) |
330 | 352 | ||
331 | str r7, [r12, #XfilterB] @ p->XfilterB := r11 (p->YfilterA) | 353 | str r7, [r12, #XfilterB] @ p->XfilterB := r7 (p->YfilterA) |
332 | 354 | ||
333 | add r1, r12, #XcoeffsB | 355 | add r5, r12, #XcoeffsB |
334 | ldmia r1, {r6,r7,r8,r9,r11} @ r6 := p->XcoeffsB[0] | 356 | ldmia r5, {r5 - r9} @ r5 := p->XcoeffsB[0] |
335 | @ r7 := p->XcoeffsB[1] | 357 | @ r6 := p->XcoeffsB[1] |
336 | @ r8 := p->XcoeffsB[2] | 358 | @ r7 := p->XcoeffsB[2] |
337 | @ r9 := p->XcoeffsB[3] | 359 | @ r8 := p->XcoeffsB[3] |
338 | @ r11 := p->XcoeffsB[4] | 360 | @ r9 := p->XcoeffsB[4] |
339 | 361 | ||
340 | subs r5, r10, r5 @ r5 := r10 - r5 | 362 | subs r10, r11, r10 @ r10 := r11 - r10 |
341 | 363 | ||
342 | add r1, r14, #XDELAYB-4 @ r1 := &p->buf[XDELAYB-1] | 364 | STR2OFS r10, r11, r14, #XDELAYB-4, r1 @ r1 -> scratch |
343 | stmia r1, { r5, r10 } @ p->buf[XDELAYB-1] = r5 | 365 | @ p->buf[XDELAYB-1] = r10 |
344 | @ p->buf[XDELAYB] = r10 | 366 | @ p->buf[XDELAYB] = r11 |
345 | 367 | ||
346 | mul r1, r10, r6 @ r1 := p->buf[XDELAYB] * p->XcoeffsB[0] | 368 | mul r1, r11, r5 @ r1 := p->buf[XDELAYB] * p->XcoeffsB[0] |
347 | mla r1, r5, r7, r1 @ r1 += p->buf[XDELAYB-1] * p->XcoeffsB[1] | 369 | mla r1, r10, r6, r1 @ r1 += p->buf[XDELAYB-1] * p->XcoeffsB[1] |
348 | mla r1, r4, r8, r1 @ r1 += p->buf[XDELAYB-2] * p->XcoeffsB[2] | 370 | mla r1, r4, r7, r1 @ r1 += p->buf[XDELAYB-2] * p->XcoeffsB[2] |
349 | mla r1, r3, r9, r1 @ r1 += p->buf[XDELAYB-3] * p->XcoeffsB[3] | 371 | mla r1, r3, r8, r1 @ r1 += p->buf[XDELAYB-3] * p->XcoeffsB[3] |
350 | mla r1, r2, r11, r1 @ r1 += p->buf[XDELAYB-4] * p->XcoeffsB[4] | 372 | mla r1, r2, r9, r1 @ r1 += p->buf[XDELAYB-4] * p->XcoeffsB[4] |
351 | 373 | ||
352 | @ flags were set above, in the subs instruction | 374 | @ flags were set above, in the subs instruction |
353 | mvngt r5, #0 | ||
354 | movlt r5, #1 @ r5 := SIGN(r5) (see .c for SIGN macro) | ||
355 | |||
356 | cmp r10, #0 | ||
357 | mvngt r10, #0 | 375 | mvngt r10, #0 |
358 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) | 376 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) |
377 | |||
378 | cmp r11, #0 | ||
379 | mvngt r11, #0 | ||
380 | movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) | ||
359 | 381 | ||
360 | add r2, r14, #XADAPTCOEFFSB-4 | 382 | STR2OFS r10, r11, r14, #XADAPTCOEFFSB-4, r2 @ r2 -> scratch |
361 | stmia r2, {r5, r10} @ p->buf[XADAPTCOEFFSB-1] := r5 | 383 | @ p->buf[XADAPTCOEFFSB-1] := r10 |
362 | @ p->buf[XADAPTCOEFFSB] := r10 | 384 | @ p->buf[XADAPTCOEFFSB] := r11 |
363 | 385 | ||
364 | @ r0 still contains predictionA | 386 | @ r0 still contains predictionA |
365 | @ r1 contains predictionB | 387 | @ r1 contains predictionB |
@@ -388,31 +410,31 @@ loop: | |||
388 | cmp r3, #0 | 410 | cmp r3, #0 |
389 | beq 3f | 411 | beq 3f |
390 | 412 | ||
391 | add r1, r14, #XADAPTCOEFFSB-16 | 413 | add r2, r14, #XADAPTCOEFFSB-16 |
392 | ldmia r1, { r2, r3, r4 } @ r2 := p->buf[XADAPTCOEFFSB-4] | 414 | ldmia r2, {r2 - r4} @ r2 := p->buf[XADAPTCOEFFSB-4] |
393 | @ r3 := p->buf[XADAPTCOEFFSB-3] | 415 | @ r3 := p->buf[XADAPTCOEFFSB-3] |
394 | @ r4 := p->buf[XADAPTCOEFFSB-2] | 416 | @ r4 := p->buf[XADAPTCOEFFSB-2] |
395 | blt 1f | 417 | blt 1f |
396 | 418 | ||
397 | @ *decoded1 > 0 | 419 | @ *decoded1 > 0 |
398 | 420 | ||
399 | sub r6, r6, r10 @ r6 := p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB] | 421 | sub r5, r5, r11 @ r5 := p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB] |
400 | sub r7, r7, r5 @ r7 := p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1] | 422 | sub r6, r6, r10 @ r6 := p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1] |
401 | sub r11, r11, r2 @ r11 := p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4] | 423 | sub r9, r9, r2 @ r9 := p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4] |
402 | sub r9, r9, r3 @ r9 := p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3] | 424 | sub r8, r8, r3 @ r8 := p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3] |
403 | sub r8, r8, r4 @ r8 := p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2] | 425 | sub r7, r7, r4 @ r7 := p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2] |
404 | 426 | ||
405 | add r0, r12, #XcoeffsB | 427 | add r0, r12, #XcoeffsB |
406 | stmia r0, {r6,r7,r8,r9,r11} @ Save p->XcoeffsB[] | 428 | stmia r0, {r5 - r9} @ Save p->XcoeffsB[] |
407 | 429 | ||
408 | add r1, r12, #XcoeffsA | 430 | add r1, r12, #XcoeffsA |
409 | ldmia r1, { r2-r5 } @ r2 := p->XcoeffsA[0] | 431 | ldmia r1, {r2 - r5} @ r2 := p->XcoeffsA[0] |
410 | @ r3 := p->XcoeffsA[1] | 432 | @ r3 := p->XcoeffsA[1] |
411 | @ r4 := p->XcoeffsA[2] | 433 | @ r4 := p->XcoeffsA[2] |
412 | @ r5 := p->XcoeffsA[3] | 434 | @ r5 := p->XcoeffsA[3] |
413 | 435 | ||
414 | add r0, r14, #XADAPTCOEFFSA-12 | 436 | add r6, r14, #XADAPTCOEFFSA-12 |
415 | ldmia r0, { r6-r9} @ r6 := p->buf[XADAPTCOEFFSA-3] | 437 | ldmia r6, {r6 - r9} @ r6 := p->buf[XADAPTCOEFFSA-3] |
416 | @ r7 := p->buf[XADAPTCOEFFSA-2] | 438 | @ r7 := p->buf[XADAPTCOEFFSA-2] |
417 | @ r8 := p->buf[XADAPTCOEFFSA-1] | 439 | @ r8 := p->buf[XADAPTCOEFFSA-1] |
418 | @ r9 := p->buf[XADAPTCOEFFSA] | 440 | @ r9 := p->buf[XADAPTCOEFFSA] |
@@ -427,23 +449,23 @@ loop: | |||
427 | 449 | ||
428 | 1: @ *decoded1 < 0 | 450 | 1: @ *decoded1 < 0 |
429 | 451 | ||
430 | add r6, r6, r10 @ r6 := p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB] | 452 | add r5, r5, r11 @ r5 := p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB] |
431 | add r7, r7, r5 @ r7 := p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1] | 453 | add r6, r6, r10 @ r6 := p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1] |
432 | add r11, r11, r2 @ r11 := p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4] | 454 | add r9, r9, r2 @ r9 := p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4] |
433 | add r9, r9, r3 @ r9 := p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3] | 455 | add r8, r8, r3 @ r8 := p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3] |
434 | add r8, r8, r4 @ r8 := p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2] | 456 | add r7, r7, r4 @ r7 := p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2] |
435 | 457 | ||
436 | add r0, r12, #XcoeffsB | 458 | add r0, r12, #XcoeffsB |
437 | stmia r0, {r6,r7,r8,r9,r11} @ Save p->XcoeffsB[] | 459 | stmia r0, {r5 - r9} @ Save p->XcoeffsB[] |
438 | 460 | ||
439 | add r1, r12, #XcoeffsA | 461 | add r1, r12, #XcoeffsA |
440 | ldmia r1, { r2-r5 } @ r2 := p->XcoeffsA[0] | 462 | ldmia r1, {r2 - r5} @ r2 := p->XcoeffsA[0] |
441 | @ r3 := p->XcoeffsA[1] | 463 | @ r3 := p->XcoeffsA[1] |
442 | @ r4 := p->XcoeffsA[2] | 464 | @ r4 := p->XcoeffsA[2] |
443 | @ r5 := p->XcoeffsA[3] | 465 | @ r5 := p->XcoeffsA[3] |
444 | 466 | ||
445 | add r0, r14, #XADAPTCOEFFSA-12 | 467 | add r6, r14, #XADAPTCOEFFSA-12 |
446 | ldmia r0, { r6-r9} @ r6 := p->buf[XADAPTCOEFFSA-3] | 468 | ldmia r6, {r6 - r9} @ r6 := p->buf[XADAPTCOEFFSA-3] |
447 | @ r7 := p->buf[XADAPTCOEFFSA-2] | 469 | @ r7 := p->buf[XADAPTCOEFFSA-2] |
448 | @ r8 := p->buf[XADAPTCOEFFSA-1] | 470 | @ r8 := p->buf[XADAPTCOEFFSA-1] |
449 | @ r9 := p->buf[XADAPTCOEFFSA] | 471 | @ r9 := p->buf[XADAPTCOEFFSA] |
@@ -454,7 +476,7 @@ loop: | |||
454 | add r2, r2, r9 @ r2 := p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA] | 476 | add r2, r2, r9 @ r2 := p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA] |
455 | 477 | ||
456 | 2: | 478 | 2: |
457 | stmia r1, {r2-r5} @ Save p->XcoeffsA | 479 | stmia r1, {r2 - r5} @ Save p->XcoeffsA |
458 | 480 | ||
459 | 3: | 481 | 3: |
460 | 482 | ||
@@ -479,7 +501,7 @@ loop: | |||
479 | done: | 501 | done: |
480 | str r14, [r12] @ Save value of p->buf | 502 | str r14, [r12] @ Save value of p->buf |
481 | add sp, sp, #12 @ Don't bother restoring r1-r3 | 503 | add sp, sp, #12 @ Don't bother restoring r1-r3 |
482 | ldmia sp!, {r4-r11, pc} | 504 | ldmia sp!, {r4 - r11, pc} |
483 | 505 | ||
484 | move_hist: | 506 | move_hist: |
485 | @ dest = r11 (p->historybuffer) | 507 | @ dest = r11 (p->historybuffer) |