summaryrefslogtreecommitdiff
path: root/apps/codecs/demac/libdemac/vector_math16_armv6.h
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/demac/libdemac/vector_math16_armv6.h')
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_armv6.h111
1 files changed, 54 insertions, 57 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv6.h b/apps/codecs/demac/libdemac/vector_math16_armv6.h
index 49fa2ceb7d..cd27b271af 100644
--- a/apps/codecs/demac/libdemac/vector_math16_armv6.h
+++ b/apps/codecs/demac/libdemac/vector_math16_armv6.h
@@ -29,8 +29,14 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
29 * incorrect results (if ARM aligncheck is disabled). */ 29 * incorrect results (if ARM aligncheck is disabled). */
30static inline void vector_add(int16_t* v1, int16_t* v2) 30static inline void vector_add(int16_t* v1, int16_t* v2)
31{ 31{
32#if ORDER > 32
33 int cnt = ORDER>>5;
34#endif
35
32#if ORDER > 16 36#if ORDER > 16
33 int cnt = ORDER>>4; 37#define ADD_SUB_BLOCKS "4"
38#else
39#define ADD_SUB_BLOCKS "2"
34#endif 40#endif
35 41
36 asm volatile ( 42 asm volatile (
@@ -42,6 +48,7 @@ static inline void vector_add(int16_t* v1, int16_t* v2)
42 "ldr r5, [%[v2]], #4 \n" 48 "ldr r5, [%[v2]], #4 \n"
43 "mov r4, r4, lsl #16 \n" 49 "mov r4, r4, lsl #16 \n"
44 "1: \n" 50 "1: \n"
51 ".rept " ADD_SUB_BLOCKS "\n"
45 "ldmia %[v2]!, {r6-r7} \n" 52 "ldmia %[v2]!, {r6-r7} \n"
46 "ldmia %[v1], {r0-r3} \n" 53 "ldmia %[v1], {r0-r3} \n"
47 "mov r5, r5, ror #16 \n" 54 "mov r5, r5, ror #16 \n"
@@ -56,21 +63,8 @@ static inline void vector_add(int16_t* v1, int16_t* v2)
56 "pkhbt r7, r7, r4, lsl #16 \n" 63 "pkhbt r7, r7, r4, lsl #16 \n"
57 "sadd16 r3, r3, r7 \n" 64 "sadd16 r3, r3, r7 \n"
58 "stmia %[v1]!, {r0-r3} \n" 65 "stmia %[v1]!, {r0-r3} \n"
59 "ldmia %[v2]!, {r6-r7} \n" 66 ".endr \n"
60 "ldmia %[v1], {r0-r3} \n" 67#if ORDER > 32
61 "mov r5, r5, ror #16 \n"
62 "pkhtb r4, r5, r4, asr #16 \n"
63 "sadd16 r0, r0, r4 \n"
64 "pkhbt r5, r5, r6, lsl #16 \n"
65 "sadd16 r1, r1, r5 \n"
66 "ldmia %[v2]!, {r4-r5} \n"
67 "mov r7, r7, ror #16 \n"
68 "pkhtb r6, r7, r6, asr #16 \n"
69 "sadd16 r2, r2, r6 \n"
70 "pkhbt r7, r7, r4, lsl #16 \n"
71 "sadd16 r3, r3, r7 \n"
72 "stmia %[v1]!, {r0-r3} \n"
73#if ORDER > 16
74 "subs %[cnt], %[cnt], #1 \n" 68 "subs %[cnt], %[cnt], #1 \n"
75 "bne 1b \n" 69 "bne 1b \n"
76#endif 70#endif
@@ -78,6 +72,7 @@ static inline void vector_add(int16_t* v1, int16_t* v2)
78 72
79 "20: \n" 73 "20: \n"
80 "1: \n" 74 "1: \n"
75 ".rept " ADD_SUB_BLOCKS "\n"
81 "ldmia %[v2]!, {r4-r7} \n" 76 "ldmia %[v2]!, {r4-r7} \n"
82 "ldmia %[v1], {r0-r3} \n" 77 "ldmia %[v1], {r0-r3} \n"
83 "sadd16 r0, r0, r4 \n" 78 "sadd16 r0, r0, r4 \n"
@@ -85,21 +80,15 @@ static inline void vector_add(int16_t* v1, int16_t* v2)
85 "sadd16 r2, r2, r6 \n" 80 "sadd16 r2, r2, r6 \n"
86 "sadd16 r3, r3, r7 \n" 81 "sadd16 r3, r3, r7 \n"
87 "stmia %[v1]!, {r0-r3} \n" 82 "stmia %[v1]!, {r0-r3} \n"
88 "ldmia %[v2]!, {r4-r7} \n" 83 ".endr \n"
89 "ldmia %[v1], {r0-r3} \n" 84#if ORDER > 32
90 "sadd16 r0, r0, r4 \n"
91 "sadd16 r1, r1, r5 \n"
92 "sadd16 r2, r2, r6 \n"
93 "sadd16 r3, r3, r7 \n"
94 "stmia %[v1]!, {r0-r3} \n"
95#if ORDER > 16
96 "subs %[cnt], %[cnt], #1 \n" 85 "subs %[cnt], %[cnt], #1 \n"
97 "bne 1b \n" 86 "bne 1b \n"
98#endif 87#endif
99 88
100 "99: \n" 89 "99: \n"
101 : /* outputs */ 90 : /* outputs */
102#if ORDER > 16 91#if ORDER > 32
103 [cnt]"+r"(cnt), 92 [cnt]"+r"(cnt),
104#endif 93#endif
105 [v1] "+r"(v1), 94 [v1] "+r"(v1),
@@ -116,8 +105,8 @@ static inline void vector_add(int16_t* v1, int16_t* v2)
116 * incorrect results (if ARM aligncheck is disabled). */ 105 * incorrect results (if ARM aligncheck is disabled). */
117static inline void vector_sub(int16_t* v1, int16_t* v2) 106static inline void vector_sub(int16_t* v1, int16_t* v2)
118{ 107{
119#if ORDER > 16 108#if ORDER > 32
120 int cnt = ORDER>>4; 109 int cnt = ORDER>>5;
121#endif 110#endif
122 111
123 asm volatile ( 112 asm volatile (
@@ -129,6 +118,7 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
129 "ldr r5, [%[v2]], #4 \n" 118 "ldr r5, [%[v2]], #4 \n"
130 "mov r4, r4, lsl #16 \n" 119 "mov r4, r4, lsl #16 \n"
131 "1: \n" 120 "1: \n"
121 ".rept " ADD_SUB_BLOCKS "\n"
132 "ldmia %[v2]!, {r6-r7} \n" 122 "ldmia %[v2]!, {r6-r7} \n"
133 "ldmia %[v1], {r0-r3} \n" 123 "ldmia %[v1], {r0-r3} \n"
134 "mov r5, r5, ror #16 \n" 124 "mov r5, r5, ror #16 \n"
@@ -143,21 +133,8 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
143 "pkhbt r7, r7, r4, lsl #16 \n" 133 "pkhbt r7, r7, r4, lsl #16 \n"
144 "ssub16 r3, r3, r7 \n" 134 "ssub16 r3, r3, r7 \n"
145 "stmia %[v1]!, {r0-r3} \n" 135 "stmia %[v1]!, {r0-r3} \n"
146 "ldmia %[v2]!, {r6-r7} \n" 136 ".endr \n"
147 "ldmia %[v1], {r0-r3} \n" 137#if ORDER > 32
148 "mov r5, r5, ror #16 \n"
149 "pkhtb r4, r5, r4, asr #16 \n"
150 "ssub16 r0, r0, r4 \n"
151 "pkhbt r5, r5, r6, lsl #16 \n"
152 "ssub16 r1, r1, r5 \n"
153 "ldmia %[v2]!, {r4-r5} \n"
154 "mov r7, r7, ror #16 \n"
155 "pkhtb r6, r7, r6, asr #16 \n"
156 "ssub16 r2, r2, r6 \n"
157 "pkhbt r7, r7, r4, lsl #16 \n"
158 "ssub16 r3, r3, r7 \n"
159 "stmia %[v1]!, {r0-r3} \n"
160#if ORDER > 16
161 "subs %[cnt], %[cnt], #1 \n" 138 "subs %[cnt], %[cnt], #1 \n"
162 "bne 1b \n" 139 "bne 1b \n"
163#endif 140#endif
@@ -165,6 +142,7 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
165 142
166 "20: \n" 143 "20: \n"
167 "1: \n" 144 "1: \n"
145 ".rept " ADD_SUB_BLOCKS "\n"
168 "ldmia %[v2]!, {r4-r7} \n" 146 "ldmia %[v2]!, {r4-r7} \n"
169 "ldmia %[v1], {r0-r3} \n" 147 "ldmia %[v1], {r0-r3} \n"
170 "ssub16 r0, r0, r4 \n" 148 "ssub16 r0, r0, r4 \n"
@@ -172,21 +150,15 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
172 "ssub16 r2, r2, r6 \n" 150 "ssub16 r2, r2, r6 \n"
173 "ssub16 r3, r3, r7 \n" 151 "ssub16 r3, r3, r7 \n"
174 "stmia %[v1]!, {r0-r3} \n" 152 "stmia %[v1]!, {r0-r3} \n"
175 "ldmia %[v2]!, {r4-r7} \n" 153 ".endr \n"
176 "ldmia %[v1], {r0-r3} \n" 154#if ORDER > 32
177 "ssub16 r0, r0, r4 \n"
178 "ssub16 r1, r1, r5 \n"
179 "ssub16 r2, r2, r6 \n"
180 "ssub16 r3, r3, r7 \n"
181 "stmia %[v1]!, {r0-r3} \n"
182#if ORDER > 16
183 "subs %[cnt], %[cnt], #1 \n" 155 "subs %[cnt], %[cnt], #1 \n"
184 "bne 1b \n" 156 "bne 1b \n"
185#endif 157#endif
186 158
187 "99: \n" 159 "99: \n"
188 : /* outputs */ 160 : /* outputs */
189#if ORDER > 16 161#if ORDER > 32
190 [cnt]"+r"(cnt), 162 [cnt]"+r"(cnt),
191#endif 163#endif
192 [v1] "+r"(v1), 164 [v1] "+r"(v1),
@@ -203,12 +175,21 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
203 * incorrect results (if ARM aligncheck is disabled). */ 175 * incorrect results (if ARM aligncheck is disabled). */
204static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) 176static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
205{ 177{
206 int res = 0; 178 int res;
179#if ORDER > 32
180 int cnt = ORDER>>5;
181#endif
182
207#if ORDER > 16 183#if ORDER > 16
208 int cnt = ORDER>>4; 184#define MLA_BLOCKS "3"
185#else
186#define MLA_BLOCKS "1"
209#endif 187#endif
210 188
211 asm volatile ( 189 asm volatile (
190#if ORDER > 32
191 "mov %[res], #0 \n"
192#endif
212 "tst %[v2], #2 \n" 193 "tst %[v2], #2 \n"
213 "beq 20f \n" 194 "beq 20f \n"
214 195
@@ -216,11 +197,18 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
216 "ldrh r7, [%[v2]], #2 \n" 197 "ldrh r7, [%[v2]], #2 \n"
217 "ldmia %[v2]!, {r4-r5} \n" 198 "ldmia %[v2]!, {r4-r5} \n"
218 "ldmia %[v1]!, {r0-r1} \n" 199 "ldmia %[v1]!, {r0-r1} \n"
200#if ORDER > 32
219 "mov r7, r7, lsl #16 \n" 201 "mov r7, r7, lsl #16 \n"
220 "1: \n" 202 "1: \n"
221 "pkhbt r8, r4, r7 \n" 203 "pkhbt r8, r4, r7 \n"
222 "ldmia %[v2]!, {r6-r7} \n" 204 "ldmia %[v2]!, {r6-r7} \n"
223 "smladx %[res], r0, r8, %[res] \n" 205 "smladx %[res], r0, r8, %[res] \n"
206#else
207 "pkhbt r8, r4, r7, lsl #16 \n"
208 "ldmia %[v2]!, {r6-r7} \n"
209 "smuadx %[res], r0, r8 \n"
210#endif
211 ".rept " MLA_BLOCKS "\n"
224 "pkhbt r8, r5, r4 \n" 212 "pkhbt r8, r5, r4 \n"
225 "ldmia %[v1]!, {r2-r3} \n" 213 "ldmia %[v1]!, {r2-r3} \n"
226 "smladx %[res], r1, r8, %[res] \n" 214 "smladx %[res], r1, r8, %[res] \n"
@@ -233,11 +221,13 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
233 "pkhbt r8, r4, r7 \n" 221 "pkhbt r8, r4, r7 \n"
234 "ldmia %[v2]!, {r6-r7} \n" 222 "ldmia %[v2]!, {r6-r7} \n"
235 "smladx %[res], r0, r8, %[res] \n" 223 "smladx %[res], r0, r8, %[res] \n"
224 ".endr \n"
225
236 "pkhbt r8, r5, r4 \n" 226 "pkhbt r8, r5, r4 \n"
237 "ldmia %[v1]!, {r2-r3} \n" 227 "ldmia %[v1]!, {r2-r3} \n"
238 "smladx %[res], r1, r8, %[res] \n" 228 "smladx %[res], r1, r8, %[res] \n"
239 "pkhbt r8, r6, r5 \n" 229 "pkhbt r8, r6, r5 \n"
240#if ORDER > 16 230#if ORDER > 32
241 "subs %[cnt], %[cnt], #1 \n" 231 "subs %[cnt], %[cnt], #1 \n"
242 "ldmneia %[v2]!, {r4-r5} \n" 232 "ldmneia %[v2]!, {r4-r5} \n"
243 "smladx %[res], r2, r8, %[res] \n" 233 "smladx %[res], r2, r8, %[res] \n"
@@ -257,7 +247,12 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
257 "ldmia %[v2]!, {r5-r7} \n" 247 "ldmia %[v2]!, {r5-r7} \n"
258 "1: \n" 248 "1: \n"
259 "ldmia %[v1]!, {r2-r3} \n" 249 "ldmia %[v1]!, {r2-r3} \n"
250#if ORDER > 32
260 "smlad %[res], r0, r5, %[res] \n" 251 "smlad %[res], r0, r5, %[res] \n"
252#else
253 "smuad %[res], r0, r5 \n"
254#endif
255 ".rept " MLA_BLOCKS "\n"
261 "ldmia %[v2]!, {r4-r5} \n" 256 "ldmia %[v2]!, {r4-r5} \n"
262 "smlad %[res], r1, r6, %[res] \n" 257 "smlad %[res], r1, r6, %[res] \n"
263 "ldmia %[v1]!, {r0-r1} \n" 258 "ldmia %[v1]!, {r0-r1} \n"
@@ -266,9 +261,11 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
266 "smlad %[res], r3, r4, %[res] \n" 261 "smlad %[res], r3, r4, %[res] \n"
267 "ldmia %[v1]!, {r2-r3} \n" 262 "ldmia %[v1]!, {r2-r3} \n"
268 "smlad %[res], r0, r5, %[res] \n" 263 "smlad %[res], r0, r5, %[res] \n"
264 ".endr \n"
265
269 "ldmia %[v2]!, {r4-r5} \n" 266 "ldmia %[v2]!, {r4-r5} \n"
270 "smlad %[res], r1, r6, %[res] \n" 267 "smlad %[res], r1, r6, %[res] \n"
271#if ORDER > 16 268#if ORDER > 32
272 "subs %[cnt], %[cnt], #1 \n" 269 "subs %[cnt], %[cnt], #1 \n"
273 "ldmneia %[v1]!, {r0-r1} \n" 270 "ldmneia %[v1]!, {r0-r1} \n"
274 "smlad %[res], r2, r7, %[res] \n" 271 "smlad %[res], r2, r7, %[res] \n"
@@ -282,12 +279,12 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
282 279
283 "99: \n" 280 "99: \n"
284 : /* outputs */ 281 : /* outputs */
285#if ORDER > 16 282#if ORDER > 32
286 [cnt]"+r"(cnt), 283 [cnt]"+r"(cnt),
287#endif 284#endif
288 [v1] "+r"(v1), 285 [v1] "+r"(v1),
289 [v2] "+r"(v2), 286 [v2] "+r"(v2),
290 [res]"+r"(res) 287 [res]"=r"(res)
291 : /* inputs */ 288 : /* inputs */
292 : /* clobbers */ 289 : /* clobbers */
293 "r0", "r1", "r2", "r3", "r4", 290 "r0", "r1", "r2", "r3", "r4",