diff options
Diffstat (limited to 'apps/codecs/demac')
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math16_armv6.h | 49 |
1 files changed, 22 insertions, 27 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv6.h b/apps/codecs/demac/libdemac/vector_math16_armv6.h index cd27b271af..61471103bd 100644 --- a/apps/codecs/demac/libdemac/vector_math16_armv6.h +++ b/apps/codecs/demac/libdemac/vector_math16_armv6.h | |||
@@ -22,7 +22,7 @@ You should have received a copy of the GNU General Public License | |||
22 | along with this program; if not, write to the Free Software | 22 | along with this program; if not, write to the Free Software |
23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | 23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA |
24 | 24 | ||
25 | */ | 25 | */ |
26 | 26 | ||
27 | /* This version fetches data as 32 bit words, and *requires* v1 to be | 27 | /* This version fetches data as 32 bit words, and *requires* v1 to be |
28 | * 32 bit aligned, otherwise it will result either in a data abort, or | 28 | * 32 bit aligned, otherwise it will result either in a data abort, or |
@@ -44,9 +44,8 @@ static inline void vector_add(int16_t* v1, int16_t* v2) | |||
44 | "beq 20f \n" | 44 | "beq 20f \n" |
45 | 45 | ||
46 | "10: \n" | 46 | "10: \n" |
47 | "ldrh r4, [%[v2]], #2 \n" | 47 | "bic %[v2], %[v2], #2 \n" |
48 | "ldr r5, [%[v2]], #4 \n" | 48 | "ldmia %[v2]!, {r4-r5} \n" |
49 | "mov r4, r4, lsl #16 \n" | ||
50 | "1: \n" | 49 | "1: \n" |
51 | ".rept " ADD_SUB_BLOCKS "\n" | 50 | ".rept " ADD_SUB_BLOCKS "\n" |
52 | "ldmia %[v2]!, {r6-r7} \n" | 51 | "ldmia %[v2]!, {r6-r7} \n" |
@@ -114,9 +113,8 @@ static inline void vector_sub(int16_t* v1, int16_t* v2) | |||
114 | "beq 20f \n" | 113 | "beq 20f \n" |
115 | 114 | ||
116 | "10: \n" | 115 | "10: \n" |
117 | "ldrh r4, [%[v2]], #2 \n" | 116 | "bic %[v2], %[v2], #2 \n" |
118 | "ldr r5, [%[v2]], #4 \n" | 117 | "ldmia %[v2]!, {r4-r5} \n" |
119 | "mov r4, r4, lsl #16 \n" | ||
120 | "1: \n" | 118 | "1: \n" |
121 | ".rept " ADD_SUB_BLOCKS "\n" | 119 | ".rept " ADD_SUB_BLOCKS "\n" |
122 | "ldmia %[v2]!, {r6-r7} \n" | 120 | "ldmia %[v2]!, {r6-r7} \n" |
@@ -194,51 +192,48 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
194 | "beq 20f \n" | 192 | "beq 20f \n" |
195 | 193 | ||
196 | "10: \n" | 194 | "10: \n" |
197 | "ldrh r7, [%[v2]], #2 \n" | 195 | "bic %[v2], %[v2], #2 \n" |
198 | "ldmia %[v2]!, {r4-r5} \n" | 196 | "ldmia %[v2]!, {r5-r7} \n" |
199 | "ldmia %[v1]!, {r0-r1} \n" | 197 | "ldmia %[v1]!, {r0-r1} \n" |
200 | #if ORDER > 32 | ||
201 | "mov r7, r7, lsl #16 \n" | ||
202 | "1: \n" | 198 | "1: \n" |
203 | "pkhbt r8, r4, r7 \n" | 199 | "pkhbt r8, r6, r5 \n" |
204 | "ldmia %[v2]!, {r6-r7} \n" | 200 | "ldmia %[v2]!, {r4-r5} \n" |
201 | #if ORDER > 32 | ||
205 | "smladx %[res], r0, r8, %[res] \n" | 202 | "smladx %[res], r0, r8, %[res] \n" |
206 | #else | 203 | #else |
207 | "pkhbt r8, r4, r7, lsl #16 \n" | ||
208 | "ldmia %[v2]!, {r6-r7} \n" | ||
209 | "smuadx %[res], r0, r8 \n" | 204 | "smuadx %[res], r0, r8 \n" |
210 | #endif | 205 | #endif |
211 | ".rept " MLA_BLOCKS "\n" | 206 | ".rept " MLA_BLOCKS "\n" |
212 | "pkhbt r8, r5, r4 \n" | 207 | "pkhbt r8, r7, r6 \n" |
213 | "ldmia %[v1]!, {r2-r3} \n" | 208 | "ldmia %[v1]!, {r2-r3} \n" |
214 | "smladx %[res], r1, r8, %[res] \n" | 209 | "smladx %[res], r1, r8, %[res] \n" |
215 | "pkhbt r8, r6, r5 \n" | 210 | "pkhbt r8, r4, r7 \n" |
216 | "ldmia %[v2]!, {r4-r5} \n" | 211 | "ldmia %[v2]!, {r6-r7} \n" |
217 | "smladx %[res], r2, r8, %[res] \n" | 212 | "smladx %[res], r2, r8, %[res] \n" |
218 | "pkhbt r8, r7, r6 \n" | 213 | "pkhbt r8, r5, r4 \n" |
219 | "ldmia %[v1]!, {r0-r1} \n" | 214 | "ldmia %[v1]!, {r0-r1} \n" |
220 | "smladx %[res], r3, r8, %[res] \n" | 215 | "smladx %[res], r3, r8, %[res] \n" |
221 | "pkhbt r8, r4, r7 \n" | 216 | "pkhbt r8, r6, r5 \n" |
222 | "ldmia %[v2]!, {r6-r7} \n" | 217 | "ldmia %[v2]!, {r4-r5} \n" |
223 | "smladx %[res], r0, r8, %[res] \n" | 218 | "smladx %[res], r0, r8, %[res] \n" |
224 | ".endr \n" | 219 | ".endr \n" |
225 | 220 | ||
226 | "pkhbt r8, r5, r4 \n" | 221 | "pkhbt r8, r7, r6 \n" |
227 | "ldmia %[v1]!, {r2-r3} \n" | 222 | "ldmia %[v1]!, {r2-r3} \n" |
228 | "smladx %[res], r1, r8, %[res] \n" | 223 | "smladx %[res], r1, r8, %[res] \n" |
229 | "pkhbt r8, r6, r5 \n" | 224 | "pkhbt r8, r4, r7 \n" |
230 | #if ORDER > 32 | 225 | #if ORDER > 32 |
231 | "subs %[cnt], %[cnt], #1 \n" | 226 | "subs %[cnt], %[cnt], #1 \n" |
232 | "ldmneia %[v2]!, {r4-r5} \n" | 227 | "ldmneia %[v2]!, {r6-r7} \n" |
233 | "smladx %[res], r2, r8, %[res] \n" | 228 | "smladx %[res], r2, r8, %[res] \n" |
234 | "pkhbt r8, r7, r6 \n" | 229 | "pkhbt r8, r5, r4 \n" |
235 | "ldmneia %[v1]!, {r0-r1} \n" | 230 | "ldmneia %[v1]!, {r0-r1} \n" |
236 | "smladx %[res], r3, r8, %[res] \n" | 231 | "smladx %[res], r3, r8, %[res] \n" |
237 | "bne 1b \n" | 232 | "bne 1b \n" |
238 | #else | 233 | #else |
239 | "pkhbt r7, r7, r6 \n" | 234 | "pkhbt r5, r5, r4 \n" |
240 | "smladx %[res], r2, r8, %[res] \n" | 235 | "smladx %[res], r2, r8, %[res] \n" |
241 | "smladx %[res], r3, r7, %[res] \n" | 236 | "smladx %[res], r3, r5, %[res] \n" |
242 | #endif | 237 | #endif |
243 | "b 99f \n" | 238 | "b 99f \n" |
244 | 239 | ||