diff options
Diffstat (limited to 'apps/codecs/demac/libdemac/vector_math16_armv5te.h')
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math16_armv5te.h | 36 |
1 files changed, 30 insertions, 6 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv5te.h b/apps/codecs/demac/libdemac/vector_math16_armv5te.h index a999c0333a..826aaa3f80 100644 --- a/apps/codecs/demac/libdemac/vector_math16_armv5te.h +++ b/apps/codecs/demac/libdemac/vector_math16_armv5te.h | |||
@@ -117,21 +117,35 @@ static inline void vector_sub(int16_t* v1, int16_t* v2) | |||
117 | * incorrect results (if ARM aligncheck is disabled). */ | 117 | * incorrect results (if ARM aligncheck is disabled). */ |
118 | static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | 118 | static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) |
119 | { | 119 | { |
120 | int res = 0; | 120 | int res; |
121 | #if ORDER > 32 | ||
122 | int cnt = ORDER>>5; | ||
123 | #endif | ||
124 | |||
121 | #if ORDER > 16 | 125 | #if ORDER > 16 |
122 | int cnt = ORDER>>4; | 126 | #define MLA_BLOCKS "3" |
127 | #else | ||
128 | #define MLA_BLOCKS "1" | ||
123 | #endif | 129 | #endif |
124 | 130 | ||
125 | asm volatile ( | 131 | asm volatile ( |
132 | #if ORDER > 32 | ||
133 | "mov %[res], #0 \n" | ||
134 | #endif | ||
126 | "tst %[v2], #2 \n" | 135 | "tst %[v2], #2 \n" |
127 | "beq 20f \n" | 136 | "beq 20f \n" |
128 | 137 | ||
129 | "10: \n" | 138 | "10: \n" |
130 | "ldrh r7, [%[v2]], #2 \n" | 139 | "ldrh r7, [%[v2]], #2 \n" |
140 | #if ORDER > 32 | ||
131 | "mov r7, r7, lsl #16 \n" | 141 | "mov r7, r7, lsl #16 \n" |
132 | "1: \n" | 142 | "1: \n" |
133 | "ldmia %[v1]!, {r0-r3} \n" | 143 | "ldmia %[v1]!, {r0-r3} \n" |
134 | "smlabt %[res], r0, r7, %[res] \n" | 144 | "smlabt %[res], r0, r7, %[res] \n" |
145 | #else | ||
146 | "ldmia %[v1]!, {r0-r3} \n" | ||
147 | "smulbb %[res], r0, r7 \n" | ||
148 | #endif | ||
135 | "ldmia %[v2]!, {r4-r7} \n" | 149 | "ldmia %[v2]!, {r4-r7} \n" |
136 | "smlatb %[res], r0, r4, %[res] \n" | 150 | "smlatb %[res], r0, r4, %[res] \n" |
137 | "smlabt %[res], r1, r4, %[res] \n" | 151 | "smlabt %[res], r1, r4, %[res] \n" |
@@ -140,6 +154,8 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
140 | "smlatb %[res], r2, r6, %[res] \n" | 154 | "smlatb %[res], r2, r6, %[res] \n" |
141 | "smlabt %[res], r3, r6, %[res] \n" | 155 | "smlabt %[res], r3, r6, %[res] \n" |
142 | "smlatb %[res], r3, r7, %[res] \n" | 156 | "smlatb %[res], r3, r7, %[res] \n" |
157 | |||
158 | ".rept " MLA_BLOCKS "\n" | ||
143 | "ldmia %[v1]!, {r0-r3} \n" | 159 | "ldmia %[v1]!, {r0-r3} \n" |
144 | "smlabt %[res], r0, r7, %[res] \n" | 160 | "smlabt %[res], r0, r7, %[res] \n" |
145 | "ldmia %[v2]!, {r4-r7} \n" | 161 | "ldmia %[v2]!, {r4-r7} \n" |
@@ -150,7 +166,8 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
150 | "smlatb %[res], r2, r6, %[res] \n" | 166 | "smlatb %[res], r2, r6, %[res] \n" |
151 | "smlabt %[res], r3, r6, %[res] \n" | 167 | "smlabt %[res], r3, r6, %[res] \n" |
152 | "smlatb %[res], r3, r7, %[res] \n" | 168 | "smlatb %[res], r3, r7, %[res] \n" |
153 | #if ORDER > 16 | 169 | ".endr \n" |
170 | #if ORDER > 32 | ||
154 | "subs %[cnt], %[cnt], #1 \n" | 171 | "subs %[cnt], %[cnt], #1 \n" |
155 | "bne 1b \n" | 172 | "bne 1b \n" |
156 | #endif | 173 | #endif |
@@ -160,7 +177,11 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
160 | "1: \n" | 177 | "1: \n" |
161 | "ldmia %[v1]!, {r0-r3} \n" | 178 | "ldmia %[v1]!, {r0-r3} \n" |
162 | "ldmia %[v2]!, {r4-r7} \n" | 179 | "ldmia %[v2]!, {r4-r7} \n" |
180 | #if ORDER > 32 | ||
163 | "smlabb %[res], r0, r4, %[res] \n" | 181 | "smlabb %[res], r0, r4, %[res] \n" |
182 | #else | ||
183 | "smulbb %[res], r0, r4 \n" | ||
184 | #endif | ||
164 | "smlatt %[res], r0, r4, %[res] \n" | 185 | "smlatt %[res], r0, r4, %[res] \n" |
165 | "smlabb %[res], r1, r5, %[res] \n" | 186 | "smlabb %[res], r1, r5, %[res] \n" |
166 | "smlatt %[res], r1, r5, %[res] \n" | 187 | "smlatt %[res], r1, r5, %[res] \n" |
@@ -168,6 +189,8 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
168 | "smlatt %[res], r2, r6, %[res] \n" | 189 | "smlatt %[res], r2, r6, %[res] \n" |
169 | "smlabb %[res], r3, r7, %[res] \n" | 190 | "smlabb %[res], r3, r7, %[res] \n" |
170 | "smlatt %[res], r3, r7, %[res] \n" | 191 | "smlatt %[res], r3, r7, %[res] \n" |
192 | |||
193 | ".rept " MLA_BLOCKS "\n" | ||
171 | "ldmia %[v1]!, {r0-r3} \n" | 194 | "ldmia %[v1]!, {r0-r3} \n" |
172 | "ldmia %[v2]!, {r4-r7} \n" | 195 | "ldmia %[v2]!, {r4-r7} \n" |
173 | "smlabb %[res], r0, r4, %[res] \n" | 196 | "smlabb %[res], r0, r4, %[res] \n" |
@@ -178,19 +201,20 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
178 | "smlatt %[res], r2, r6, %[res] \n" | 201 | "smlatt %[res], r2, r6, %[res] \n" |
179 | "smlabb %[res], r3, r7, %[res] \n" | 202 | "smlabb %[res], r3, r7, %[res] \n" |
180 | "smlatt %[res], r3, r7, %[res] \n" | 203 | "smlatt %[res], r3, r7, %[res] \n" |
181 | #if ORDER > 16 | 204 | ".endr \n" |
205 | #if ORDER > 32 | ||
182 | "subs %[cnt], %[cnt], #1 \n" | 206 | "subs %[cnt], %[cnt], #1 \n" |
183 | "bne 1b \n" | 207 | "bne 1b \n" |
184 | #endif | 208 | #endif |
185 | 209 | ||
186 | "99: \n" | 210 | "99: \n" |
187 | : /* outputs */ | 211 | : /* outputs */ |
188 | #if ORDER > 16 | 212 | #if ORDER > 32 |
189 | [cnt]"+r"(cnt), | 213 | [cnt]"+r"(cnt), |
190 | #endif | 214 | #endif |
191 | [v1] "+r"(v1), | 215 | [v1] "+r"(v1), |
192 | [v2] "+r"(v2), | 216 | [v2] "+r"(v2), |
193 | [res]"+r"(res) | 217 | [res]"=r"(res) |
194 | : /* inputs */ | 218 | : /* inputs */ |
195 | : /* clobbers */ | 219 | : /* clobbers */ |
196 | "r0", "r1", "r2", "r3", | 220 | "r0", "r1", "r2", "r3", |