summaryrefslogtreecommitdiff
path: root/apps/codecs/demac/libdemac/vector_math16_armv5te.h
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/demac/libdemac/vector_math16_armv5te.h')
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_armv5te.h36
1 files changed, 30 insertions, 6 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv5te.h b/apps/codecs/demac/libdemac/vector_math16_armv5te.h
index a999c0333a..826aaa3f80 100644
--- a/apps/codecs/demac/libdemac/vector_math16_armv5te.h
+++ b/apps/codecs/demac/libdemac/vector_math16_armv5te.h
@@ -117,21 +117,35 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
117 * incorrect results (if ARM aligncheck is disabled). */ 117 * incorrect results (if ARM aligncheck is disabled). */
118static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) 118static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
119{ 119{
120 int res = 0; 120 int res;
121#if ORDER > 32
122 int cnt = ORDER>>5;
123#endif
124
121#if ORDER > 16 125#if ORDER > 16
122 int cnt = ORDER>>4; 126#define MLA_BLOCKS "3"
127#else
128#define MLA_BLOCKS "1"
123#endif 129#endif
124 130
125 asm volatile ( 131 asm volatile (
132#if ORDER > 32
133 "mov %[res], #0 \n"
134#endif
126 "tst %[v2], #2 \n" 135 "tst %[v2], #2 \n"
127 "beq 20f \n" 136 "beq 20f \n"
128 137
129 "10: \n" 138 "10: \n"
130 "ldrh r7, [%[v2]], #2 \n" 139 "ldrh r7, [%[v2]], #2 \n"
140#if ORDER > 32
131 "mov r7, r7, lsl #16 \n" 141 "mov r7, r7, lsl #16 \n"
132 "1: \n" 142 "1: \n"
133 "ldmia %[v1]!, {r0-r3} \n" 143 "ldmia %[v1]!, {r0-r3} \n"
134 "smlabt %[res], r0, r7, %[res] \n" 144 "smlabt %[res], r0, r7, %[res] \n"
145#else
146 "ldmia %[v1]!, {r0-r3} \n"
147 "smulbb %[res], r0, r7 \n"
148#endif
135 "ldmia %[v2]!, {r4-r7} \n" 149 "ldmia %[v2]!, {r4-r7} \n"
136 "smlatb %[res], r0, r4, %[res] \n" 150 "smlatb %[res], r0, r4, %[res] \n"
137 "smlabt %[res], r1, r4, %[res] \n" 151 "smlabt %[res], r1, r4, %[res] \n"
@@ -140,6 +154,8 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
140 "smlatb %[res], r2, r6, %[res] \n" 154 "smlatb %[res], r2, r6, %[res] \n"
141 "smlabt %[res], r3, r6, %[res] \n" 155 "smlabt %[res], r3, r6, %[res] \n"
142 "smlatb %[res], r3, r7, %[res] \n" 156 "smlatb %[res], r3, r7, %[res] \n"
157
158 ".rept " MLA_BLOCKS "\n"
143 "ldmia %[v1]!, {r0-r3} \n" 159 "ldmia %[v1]!, {r0-r3} \n"
144 "smlabt %[res], r0, r7, %[res] \n" 160 "smlabt %[res], r0, r7, %[res] \n"
145 "ldmia %[v2]!, {r4-r7} \n" 161 "ldmia %[v2]!, {r4-r7} \n"
@@ -150,7 +166,8 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
150 "smlatb %[res], r2, r6, %[res] \n" 166 "smlatb %[res], r2, r6, %[res] \n"
151 "smlabt %[res], r3, r6, %[res] \n" 167 "smlabt %[res], r3, r6, %[res] \n"
152 "smlatb %[res], r3, r7, %[res] \n" 168 "smlatb %[res], r3, r7, %[res] \n"
153#if ORDER > 16 169 ".endr \n"
170#if ORDER > 32
154 "subs %[cnt], %[cnt], #1 \n" 171 "subs %[cnt], %[cnt], #1 \n"
155 "bne 1b \n" 172 "bne 1b \n"
156#endif 173#endif
@@ -160,7 +177,11 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
160 "1: \n" 177 "1: \n"
161 "ldmia %[v1]!, {r0-r3} \n" 178 "ldmia %[v1]!, {r0-r3} \n"
162 "ldmia %[v2]!, {r4-r7} \n" 179 "ldmia %[v2]!, {r4-r7} \n"
180#if ORDER > 32
163 "smlabb %[res], r0, r4, %[res] \n" 181 "smlabb %[res], r0, r4, %[res] \n"
182#else
183 "smulbb %[res], r0, r4 \n"
184#endif
164 "smlatt %[res], r0, r4, %[res] \n" 185 "smlatt %[res], r0, r4, %[res] \n"
165 "smlabb %[res], r1, r5, %[res] \n" 186 "smlabb %[res], r1, r5, %[res] \n"
166 "smlatt %[res], r1, r5, %[res] \n" 187 "smlatt %[res], r1, r5, %[res] \n"
@@ -168,6 +189,8 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
168 "smlatt %[res], r2, r6, %[res] \n" 189 "smlatt %[res], r2, r6, %[res] \n"
169 "smlabb %[res], r3, r7, %[res] \n" 190 "smlabb %[res], r3, r7, %[res] \n"
170 "smlatt %[res], r3, r7, %[res] \n" 191 "smlatt %[res], r3, r7, %[res] \n"
192
193 ".rept " MLA_BLOCKS "\n"
171 "ldmia %[v1]!, {r0-r3} \n" 194 "ldmia %[v1]!, {r0-r3} \n"
172 "ldmia %[v2]!, {r4-r7} \n" 195 "ldmia %[v2]!, {r4-r7} \n"
173 "smlabb %[res], r0, r4, %[res] \n" 196 "smlabb %[res], r0, r4, %[res] \n"
@@ -178,19 +201,20 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
178 "smlatt %[res], r2, r6, %[res] \n" 201 "smlatt %[res], r2, r6, %[res] \n"
179 "smlabb %[res], r3, r7, %[res] \n" 202 "smlabb %[res], r3, r7, %[res] \n"
180 "smlatt %[res], r3, r7, %[res] \n" 203 "smlatt %[res], r3, r7, %[res] \n"
181#if ORDER > 16 204 ".endr \n"
205#if ORDER > 32
182 "subs %[cnt], %[cnt], #1 \n" 206 "subs %[cnt], %[cnt], #1 \n"
183 "bne 1b \n" 207 "bne 1b \n"
184#endif 208#endif
185 209
186 "99: \n" 210 "99: \n"
187 : /* outputs */ 211 : /* outputs */
188#if ORDER > 16 212#if ORDER > 32
189 [cnt]"+r"(cnt), 213 [cnt]"+r"(cnt),
190#endif 214#endif
191 [v1] "+r"(v1), 215 [v1] "+r"(v1),
192 [v2] "+r"(v2), 216 [v2] "+r"(v2),
193 [res]"+r"(res) 217 [res]"=r"(res)
194 : /* inputs */ 218 : /* inputs */
195 : /* clobbers */ 219 : /* clobbers */
196 "r0", "r1", "r2", "r3", 220 "r0", "r1", "r2", "r3",