diff options
Diffstat (limited to 'apps/codecs/demac')
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math16_armv6.h | 147 |
1 files changed, 66 insertions, 81 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv6.h b/apps/codecs/demac/libdemac/vector_math16_armv6.h index bf50d9cabd..e180429193 100644 --- a/apps/codecs/demac/libdemac/vector_math16_armv6.h +++ b/apps/codecs/demac/libdemac/vector_math16_armv6.h | |||
@@ -39,36 +39,33 @@ static inline void vector_add(int16_t* v1, int16_t* v2) | |||
39 | 39 | ||
40 | "10: \n" | 40 | "10: \n" |
41 | "ldrh r4, [%[v2]], #2 \n" | 41 | "ldrh r4, [%[v2]], #2 \n" |
42 | "mov r4, r4, lsl #16 \n" | ||
42 | "1: \n" | 43 | "1: \n" |
43 | "ldmia %[v2]!, {r5-r8} \n" | 44 | "ldmia %[v2]!, {r5-r8} \n" |
44 | "ldmia %[v1], {r0-r3} \n" | 45 | "ldmia %[v1], {r0-r3} \n" |
45 | "mov r5, r5, ror #16 \n" | 46 | "mov r5, r5, ror #16 \n" |
46 | "pkhbt r4, r4, r5 \n" | 47 | "pkhtb r4, r5, r4, asr #16 \n" |
47 | "sadd16 r0, r0, r4 \n" | 48 | "sadd16 r0, r0, r4 \n" |
48 | "mov r6, r6, ror #16 \n" | 49 | "pkhbt r5, r5, r6, lsl #16 \n" |
49 | "pkhbt r5, r5, r6 \n" | ||
50 | "sadd16 r1, r1, r5 \n" | 50 | "sadd16 r1, r1, r5 \n" |
51 | "mov r7, r7, ror #16 \n" | 51 | "mov r7, r7, ror #16 \n" |
52 | "pkhbt r6, r6, r7 \n" | 52 | "pkhtb r6, r7, r6, asr #16 \n" |
53 | "sadd16 r2, r2, r6 \n" | 53 | "sadd16 r2, r2, r6 \n" |
54 | "mov r8, r8, ror #16 \n" | 54 | "pkhbt r7, r7, r8, lsl #16 \n" |
55 | "pkhbt r7, r7, r8 \n" | ||
56 | "sadd16 r3, r3, r7 \n" | 55 | "sadd16 r3, r3, r7 \n" |
57 | "stmia %[v1]!, {r0-r3} \n" | 56 | "stmia %[v1]!, {r0-r3} \n" |
58 | "mov r4, r8 \n" | 57 | "mov r4, r8 \n" |
59 | "ldmia %[v2]!, {r5-r8} \n" | 58 | "ldmia %[v2]!, {r5-r8} \n" |
60 | "ldmia %[v1], {r0-r3} \n" | 59 | "ldmia %[v1], {r0-r3} \n" |
61 | "mov r5, r5, ror #16 \n" | 60 | "mov r5, r5, ror #16 \n" |
62 | "pkhbt r4, r4, r5 \n" | 61 | "pkhtb r4, r5, r4, asr #16 \n" |
63 | "sadd16 r0, r0, r4 \n" | 62 | "sadd16 r0, r0, r4 \n" |
64 | "mov r6, r6, ror #16 \n" | 63 | "pkhbt r5, r5, r6, lsl #16 \n" |
65 | "pkhbt r5, r5, r6 \n" | ||
66 | "sadd16 r1, r1, r5 \n" | 64 | "sadd16 r1, r1, r5 \n" |
67 | "mov r7, r7, ror #16 \n" | 65 | "mov r7, r7, ror #16 \n" |
68 | "pkhbt r6, r6, r7 \n" | 66 | "pkhtb r6, r7, r6, asr #16 \n" |
69 | "sadd16 r2, r2, r6 \n" | 67 | "sadd16 r2, r2, r6 \n" |
70 | "mov r8, r8, ror #16 \n" | 68 | "pkhbt r7, r7, r8, lsl #16 \n" |
71 | "pkhbt r7, r7, r8 \n" | ||
72 | "sadd16 r3, r3, r7 \n" | 69 | "sadd16 r3, r3, r7 \n" |
73 | "stmia %[v1]!, {r0-r3} \n" | 70 | "stmia %[v1]!, {r0-r3} \n" |
74 | #if ORDER > 16 | 71 | #if ORDER > 16 |
@@ -128,36 +125,33 @@ static inline void vector_sub(int16_t* v1, int16_t* v2) | |||
128 | 125 | ||
129 | "10: \n" | 126 | "10: \n" |
130 | "ldrh r4, [%[v2]], #2 \n" | 127 | "ldrh r4, [%[v2]], #2 \n" |
128 | "mov r4, r4, lsl #16 \n" | ||
131 | "1: \n" | 129 | "1: \n" |
132 | "ldmia %[v2]!, {r5-r8} \n" | 130 | "ldmia %[v2]!, {r5-r8} \n" |
133 | "ldmia %[v1], {r0-r3} \n" | 131 | "ldmia %[v1], {r0-r3} \n" |
134 | "mov r5, r5, ror #16 \n" | 132 | "mov r5, r5, ror #16 \n" |
135 | "pkhbt r4, r4, r5 \n" | 133 | "pkhtb r4, r5, r4, asr #16 \n" |
136 | "ssub16 r0, r0, r4 \n" | 134 | "ssub16 r0, r0, r4 \n" |
137 | "mov r6, r6, ror #16 \n" | 135 | "pkhbt r5, r5, r6, lsl #16 \n" |
138 | "pkhbt r5, r5, r6 \n" | ||
139 | "ssub16 r1, r1, r5 \n" | 136 | "ssub16 r1, r1, r5 \n" |
140 | "mov r7, r7, ror #16 \n" | 137 | "mov r7, r7, ror #16 \n" |
141 | "pkhbt r6, r6, r7 \n" | 138 | "pkhtb r6, r7, r6, asr #16 \n" |
142 | "ssub16 r2, r2, r6 \n" | 139 | "ssub16 r2, r2, r6 \n" |
143 | "mov r8, r8, ror #16 \n" | 140 | "pkhbt r7, r7, r8, lsl #16 \n" |
144 | "pkhbt r7, r7, r8 \n" | ||
145 | "ssub16 r3, r3, r7 \n" | 141 | "ssub16 r3, r3, r7 \n" |
146 | "stmia %[v1]!, {r0-r3} \n" | 142 | "stmia %[v1]!, {r0-r3} \n" |
147 | "mov r4, r8 \n" | 143 | "mov r4, r8 \n" |
148 | "ldmia %[v2]!, {r5-r8} \n" | 144 | "ldmia %[v2]!, {r5-r8} \n" |
149 | "ldmia %[v1], {r0-r3} \n" | 145 | "ldmia %[v1], {r0-r3} \n" |
150 | "mov r5, r5, ror #16 \n" | 146 | "mov r5, r5, ror #16 \n" |
151 | "pkhbt r4, r4, r5 \n" | 147 | "pkhtb r4, r5, r4, asr #16 \n" |
152 | "ssub16 r0, r0, r4 \n" | 148 | "ssub16 r0, r0, r4 \n" |
153 | "mov r6, r6, ror #16 \n" | 149 | "pkhbt r5, r5, r6, lsl #16 \n" |
154 | "pkhbt r5, r5, r6 \n" | ||
155 | "ssub16 r1, r1, r5 \n" | 150 | "ssub16 r1, r1, r5 \n" |
156 | "mov r7, r7, ror #16 \n" | 151 | "mov r7, r7, ror #16 \n" |
157 | "pkhbt r6, r6, r7 \n" | 152 | "pkhtb r6, r7, r6, asr #16 \n" |
158 | "ssub16 r2, r2, r6 \n" | 153 | "ssub16 r2, r2, r6 \n" |
159 | "mov r8, r8, ror #16 \n" | 154 | "pkhbt r7, r7, r8, lsl #16 \n" |
160 | "pkhbt r7, r7, r8 \n" | ||
161 | "ssub16 r3, r3, r7 \n" | 155 | "ssub16 r3, r3, r7 \n" |
162 | "stmia %[v1]!, {r0-r3} \n" | 156 | "stmia %[v1]!, {r0-r3} \n" |
163 | #if ORDER > 16 | 157 | #if ORDER > 16 |
@@ -217,80 +211,71 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
217 | "beq 20f \n" | 211 | "beq 20f \n" |
218 | 212 | ||
219 | "10: \n" | 213 | "10: \n" |
220 | "ldrh r2, [%[v2]], #2 \n" | 214 | "ldrh r7, [%[v2]], #2 \n" |
221 | "ldr r0, [%[v1]], #4 \n" | 215 | "ldmia %[v2]!, {r4-r5} \n" |
222 | "ldr r3, [%[v2]], #4 \n" | 216 | "ldmia %[v1]!, {r0-r1} \n" |
223 | "mov r2, r2, lsl #16 \n" | 217 | "mov r7, r7, lsl #16 \n" |
224 | "1: \n" | 218 | "1: \n" |
225 | "ldr r1, [%[v1]], #4 \n" | 219 | "pkhbt r8, r4, r7 \n" |
226 | "smlabt %[res], r0, r2, %[res] \n" | 220 | "ldmia %[v2]!, {r6-r7} \n" |
227 | "ldr r4, [%[v2]], #4 \n" | 221 | "smladx %[res], r0, r8, %[res] \n" |
228 | "smlatb %[res], r0, r3, %[res] \n" | 222 | "pkhbt r8, r5, r4 \n" |
229 | "ldr r0, [%[v1]], #4 \n" | 223 | "ldmia %[v1]!, {r2-r3} \n" |
230 | "smlabt %[res], r1, r3, %[res] \n" | 224 | "smladx %[res], r1, r8, %[res] \n" |
231 | "ldr r5, [%[v2]], #4 \n" | 225 | "pkhbt r8, r6, r5 \n" |
232 | "smlatb %[res], r1, r4, %[res] \n" | 226 | "ldmia %[v2]!, {r4-r5} \n" |
233 | "ldr r1, [%[v1]], #4 \n" | 227 | "smladx %[res], r2, r8, %[res] \n" |
234 | "smlabt %[res], r0, r4, %[res] \n" | 228 | "pkhbt r8, r7, r6 \n" |
235 | "ldr r6, [%[v2]], #4 \n" | 229 | "ldmia %[v1]!, {r0-r1} \n" |
236 | "smlatb %[res], r0, r5, %[res] \n" | 230 | "smladx %[res], r3, r8, %[res] \n" |
237 | "ldr r0, [%[v1]], #4 \n" | 231 | "pkhbt r8, r4, r7 \n" |
238 | "smlabt %[res], r1, r5, %[res] \n" | 232 | "ldmia %[v2]!, {r6-r7} \n" |
239 | "ldr r3, [%[v2]], #4 \n" | 233 | "smladx %[res], r0, r8, %[res] \n" |
240 | "smlatb %[res], r1, r6, %[res] \n" | 234 | "pkhbt r8, r5, r4 \n" |
241 | "mov r2, r6 \n" | 235 | "ldmia %[v1]!, {r2-r3} \n" |
242 | "ldr r1, [%[v1]], #4 \n" | 236 | "smladx %[res], r1, r8, %[res] \n" |
243 | "smlabt %[res], r0, r2, %[res] \n" | 237 | "pkhbt r8, r6, r5 \n" |
244 | "ldr r4, [%[v2]], #4 \n" | ||
245 | "smlatb %[res], r0, r3, %[res] \n" | ||
246 | "ldr r0, [%[v1]], #4 \n" | ||
247 | "smlabt %[res], r1, r3, %[res] \n" | ||
248 | "ldr r5, [%[v2]], #4 \n" | ||
249 | "smlatb %[res], r1, r4, %[res] \n" | ||
250 | "ldr r1, [%[v1]], #4 \n" | ||
251 | "smlabt %[res], r0, r4, %[res] \n" | ||
252 | "ldr r6, [%[v2]], #4 \n" | ||
253 | "smlatb %[res], r0, r5, %[res] \n" | ||
254 | #if ORDER > 16 | 238 | #if ORDER > 16 |
255 | "subs %[cnt], %[cnt], #1 \n" | 239 | "subs %[cnt], %[cnt], #1 \n" |
256 | "ldrne r0, [%[v1]], #4 \n" | 240 | "ldmneia %[v2]!, {r4-r5} \n" |
257 | "smlabt %[res], r1, r5, %[res] \n" | 241 | "smladx %[res], r2, r8, %[res] \n" |
258 | "ldrne r3, [%[v2]], #4 \n" | 242 | "pkhbt r8, r7, r6 \n" |
259 | "smlatb %[res], r1, r6, %[res] \n" | 243 | "ldmneia %[v1]!, {r0-r1} \n" |
260 | "mov r2, r6 \n" | 244 | "smladx %[res], r3, r8, %[res] \n" |
261 | "bne 1b \n" | 245 | "bne 1b \n" |
262 | #else | 246 | #else |
263 | "smlabt %[res], r1, r5, %[res] \n" | 247 | "pkhbt r7, r7, r6 \n" |
264 | "smlatb %[res], r1, r6, %[res] \n" | 248 | "smladx %[res], r2, r8, %[res] \n" |
249 | "smladx %[res], r3, r7, %[res] \n" | ||
265 | #endif | 250 | #endif |
266 | "b 99f \n" | 251 | "b 99f \n" |
267 | 252 | ||
268 | "20: \n" | 253 | "20: \n" |
269 | "ldmia %[v1]!, {r0-r1} \n" | 254 | "ldmia %[v1]!, {r0-r1} \n" |
270 | "ldmia %[v2]!, {r4-r5} \n" | 255 | "ldmia %[v2]!, {r5-r7} \n" |
271 | "1: \n" | 256 | "1: \n" |
272 | "ldmia %[v1]!, {r2-r3} \n" | 257 | "ldmia %[v1]!, {r2-r3} \n" |
273 | "smlad %[res], r0, r4, %[res] \n" | 258 | "smlad %[res], r0, r5, %[res] \n" |
274 | "ldmia %[v2]!, {r6-r7} \n" | ||
275 | "smlad %[res], r1, r5, %[res] \n" | ||
276 | "ldmia %[v1]!, {r0-r1} \n" | ||
277 | "smlad %[res], r2, r6, %[res] \n" | ||
278 | "ldmia %[v2]!, {r4-r5} \n" | 259 | "ldmia %[v2]!, {r4-r5} \n" |
279 | "smlad %[res], r3, r7, %[res] \n" | 260 | "smlad %[res], r1, r6, %[res] \n" |
280 | "ldmia %[v1]!, {r2-r3} \n" | 261 | "ldmia %[v1]!, {r0-r1} \n" |
281 | "smlad %[res], r0, r4, %[res] \n" | 262 | "smlad %[res], r2, r7, %[res] \n" |
282 | "ldmia %[v2]!, {r6-r7} \n" | 263 | "ldmia %[v2]!, {r6-r7} \n" |
283 | "smlad %[res], r1, r5, %[res] \n" | 264 | "smlad %[res], r3, r4, %[res] \n" |
265 | "ldmia %[v1]!, {r2-r3} \n" | ||
266 | "smlad %[res], r0, r5, %[res] \n" | ||
267 | "ldmia %[v2]!, {r4-r5} \n" | ||
268 | "smlad %[res], r1, r6, %[res] \n" | ||
284 | #if ORDER > 16 | 269 | #if ORDER > 16 |
285 | "subs %[cnt], %[cnt], #1 \n" | 270 | "subs %[cnt], %[cnt], #1 \n" |
286 | "ldmneia %[v1]!, {r0-r1} \n" | 271 | "ldmneia %[v1]!, {r0-r1} \n" |
287 | "smlad %[res], r2, r6, %[res] \n" | 272 | "smlad %[res], r2, r7, %[res] \n" |
288 | "ldmneia %[v2]!, {r4-r5} \n" | 273 | "ldmneia %[v2]!, {r6-r7} \n" |
289 | "smlad %[res], r3, r7, %[res] \n" | 274 | "smlad %[res], r3, r4, %[res] \n" |
290 | "bne 1b \n" | 275 | "bne 1b \n" |
291 | #else | 276 | #else |
292 | "smlad %[res], r2, r6, %[res] \n" | 277 | "smlad %[res], r2, r7, %[res] \n" |
293 | "smlad %[res], r3, r7, %[res] \n" | 278 | "smlad %[res], r3, r4, %[res] \n" |
294 | #endif | 279 | #endif |
295 | 280 | ||
296 | "99: \n" | 281 | "99: \n" |
@@ -303,8 +288,8 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
303 | [res]"+r"(res) | 288 | [res]"+r"(res) |
304 | : /* inputs */ | 289 | : /* inputs */ |
305 | : /* clobbers */ | 290 | : /* clobbers */ |
306 | "r0", "r1", "r2", "r3", | 291 | "r0", "r1", "r2", "r3", "r4", |
307 | "r4", "r5", "r6", "r7" | 292 | "r5", "r6", "r7", "r8" |
308 | ); | 293 | ); |
309 | return res; | 294 | return res; |
310 | } | 295 | } |