diff options
Diffstat (limited to 'apps/codecs/demac/libdemac/vector_math32_armv4.h')
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math32_armv4.h | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math32_armv4.h b/apps/codecs/demac/libdemac/vector_math32_armv4.h new file mode 100644 index 0000000000..b729bd3a0a --- /dev/null +++ b/apps/codecs/demac/libdemac/vector_math32_armv4.h | |||
@@ -0,0 +1,210 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | ARMv4 vector math copyright (C) 2008 Jens Arnold | ||
10 | |||
11 | This program is free software; you can redistribute it and/or modify | ||
12 | it under the terms of the GNU General Public License as published by | ||
13 | the Free Software Foundation; either version 2 of the License, or | ||
14 | (at your option) any later version. | ||
15 | |||
16 | This program is distributed in the hope that it will be useful, | ||
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | GNU General Public License for more details. | ||
20 | |||
21 | You should have received a copy of the GNU General Public License | ||
22 | along with this program; if not, write to the Free Software | ||
23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
24 | |||
25 | */ | ||
26 | |||
27 | static inline void vector_add(int32_t* v1, int32_t* v2) | ||
28 | { | ||
29 | #if ORDER > 32 | ||
30 | int cnt = ORDER>>5; | ||
31 | #endif | ||
32 | |||
33 | #define ADDBLOCK4 \ | ||
34 | "ldmia %[v1], {r0-r3} \n" \ | ||
35 | "ldmia %[v2]!, {r4-r7} \n" \ | ||
36 | "add r0, r0, r4 \n" \ | ||
37 | "add r1, r1, r5 \n" \ | ||
38 | "add r2, r2, r6 \n" \ | ||
39 | "add r3, r3, r7 \n" \ | ||
40 | "stmia %[v1]!, {r0-r3} \n" | ||
41 | |||
42 | asm volatile ( | ||
43 | "1: \n" | ||
44 | ADDBLOCK4 | ||
45 | ADDBLOCK4 | ||
46 | ADDBLOCK4 | ||
47 | ADDBLOCK4 | ||
48 | #if ORDER > 16 | ||
49 | ADDBLOCK4 | ||
50 | ADDBLOCK4 | ||
51 | ADDBLOCK4 | ||
52 | ADDBLOCK4 | ||
53 | #endif | ||
54 | #if ORDER > 32 | ||
55 | "subs %[cnt], %[cnt], #1 \n" | ||
56 | "bne 1b \n" | ||
57 | #endif | ||
58 | : /* outputs */ | ||
59 | #if ORDER > 32 | ||
60 | [cnt]"+r"(cnt), | ||
61 | #endif | ||
62 | [v1] "+r"(v1), | ||
63 | [v2] "+r"(v2) | ||
64 | : /* inputs */ | ||
65 | : /* clobbers */ | ||
66 | "r0", "r1", "r2", "r3", "r4", | ||
67 | "r5", "r6", "r7", "memory" | ||
68 | ); | ||
69 | } | ||
70 | |||
71 | static inline void vector_sub(int32_t* v1, int32_t* v2) | ||
72 | { | ||
73 | #if ORDER > 32 | ||
74 | int cnt = ORDER>>5; | ||
75 | #endif | ||
76 | |||
77 | #define SUBBLOCK4 \ | ||
78 | "ldmia %[v1], {r0-r3} \n" \ | ||
79 | "ldmia %[v2]!, {r4-r7} \n" \ | ||
80 | "sub r0, r0, r4 \n" \ | ||
81 | "sub r1, r1, r5 \n" \ | ||
82 | "sub r2, r2, r6 \n" \ | ||
83 | "sub r3, r3, r7 \n" \ | ||
84 | "stmia %[v1]!, {r0-r3} \n" | ||
85 | |||
86 | asm volatile ( | ||
87 | "1: \n" | ||
88 | SUBBLOCK4 | ||
89 | SUBBLOCK4 | ||
90 | SUBBLOCK4 | ||
91 | SUBBLOCK4 | ||
92 | #if ORDER > 16 | ||
93 | SUBBLOCK4 | ||
94 | SUBBLOCK4 | ||
95 | SUBBLOCK4 | ||
96 | SUBBLOCK4 | ||
97 | #endif | ||
98 | #if ORDER > 32 | ||
99 | "subs %[cnt], %[cnt], #1 \n" | ||
100 | "bne 1b \n" | ||
101 | #endif | ||
102 | : /* outputs */ | ||
103 | #if ORDER > 32 | ||
104 | [cnt]"+r"(cnt), | ||
105 | #endif | ||
106 | [v1] "+r"(v1), | ||
107 | [v2] "+r"(v2) | ||
108 | : /* inputs */ | ||
109 | : /* clobbers */ | ||
110 | "r0", "r1", "r2", "r3", "r4", | ||
111 | "r5", "r6", "r7", "memory" | ||
112 | ); | ||
113 | } | ||
114 | |||
115 | static inline int32_t scalarproduct(int32_t* v1, int32_t* v2) | ||
116 | { | ||
117 | int res = 0; | ||
118 | #if ORDER > 32 | ||
119 | int cnt = ORDER>>5; | ||
120 | #endif | ||
121 | |||
122 | asm volatile ( | ||
123 | #if ORDER > 16 | ||
124 | "ldmia %[v2]!, {r6-r7} \n" | ||
125 | "1: \n" | ||
126 | "ldmia %[v1]!, {r0,r1,r3-r5} \n" | ||
127 | "mla %[res], r6, r0, %[res] \n" | ||
128 | "mla %[res], r7, r1, %[res] \n" | ||
129 | "ldmia %[v2]!, {r0-r2,r6-r8} \n" | ||
130 | "mla %[res], r0, r3, %[res] \n" | ||
131 | "mla %[res], r1, r4, %[res] \n" | ||
132 | "mla %[res], r2, r5, %[res] \n" | ||
133 | "ldmia %[v1]!, {r0-r4} \n" | ||
134 | "mla %[res], r6, r0, %[res] \n" | ||
135 | "mla %[res], r7, r1, %[res] \n" | ||
136 | "mla %[res], r8, r2, %[res] \n" | ||
137 | "ldmia %[v2]!, {r0,r1,r6-r8} \n" | ||
138 | "mla %[res], r0, r3, %[res] \n" | ||
139 | "mla %[res], r1, r4, %[res] \n" | ||
140 | "ldmia %[v1]!, {r0-r5} \n" | ||
141 | "mla %[res], r6, r0, %[res] \n" | ||
142 | "mla %[res], r7, r1, %[res] \n" | ||
143 | "mla %[res], r8, r2, %[res] \n" | ||
144 | "ldmia %[v2]!, {r0-r2,r6,r7} \n" | ||
145 | "mla %[res], r0, r3, %[res] \n" | ||
146 | "mla %[res], r1, r4, %[res] \n" | ||
147 | "mla %[res], r2, r5, %[res] \n" | ||
148 | "ldmia %[v1]!, {r0,r1,r3-r5} \n" | ||
149 | "mla %[res], r6, r0, %[res] \n" | ||
150 | "mla %[res], r7, r1, %[res] \n" | ||
151 | "ldmia %[v2]!, {r0-r2,r6-r8} \n" | ||
152 | "mla %[res], r0, r3, %[res] \n" | ||
153 | "mla %[res], r1, r4, %[res] \n" | ||
154 | "mla %[res], r2, r5, %[res] \n" | ||
155 | "ldmia %[v1]!, {r0-r4} \n" | ||
156 | "mla %[res], r6, r0, %[res] \n" | ||
157 | "mla %[res], r7, r1, %[res] \n" | ||
158 | "mla %[res], r8, r2, %[res] \n" | ||
159 | "ldmia %[v2]!, {r0,r1,r6-r8} \n" | ||
160 | "mla %[res], r0, r3, %[res] \n" | ||
161 | "mla %[res], r1, r4, %[res] \n" | ||
162 | "ldmia %[v1]!, {r0-r5} \n" | ||
163 | "mla %[res], r6, r0, %[res] \n" | ||
164 | "mla %[res], r7, r1, %[res] \n" | ||
165 | "mla %[res], r8, r2, %[res] \n" | ||
166 | #if ORDER > 32 | ||
167 | "ldmia %[v2]!, {r0-r2,r6,r7} \n" | ||
168 | #else | ||
169 | "ldmia %[v2]!, {r0-r2} \n" | ||
170 | #endif | ||
171 | "mla %[res], r0, r3, %[res] \n" | ||
172 | "mla %[res], r1, r4, %[res] \n" | ||
173 | "mla %[res], r2, r5, %[res] \n" | ||
174 | #if ORDER > 32 | ||
175 | "subs %[cnt], %[cnt], #1 \n" | ||
176 | "bne 1b \n" | ||
177 | #endif | ||
178 | |||
179 | #else /* ORDER <= 16 */ | ||
180 | |||
181 | #define MLABLOCK4 \ | ||
182 | "ldmia %[v1]!, {r0-r3} \n" \ | ||
183 | "ldmia %[v2]!, {r4-r7} \n" \ | ||
184 | "mla %[res], r4, r0, %[res] \n" \ | ||
185 | "mla %[res], r5, r1, %[res] \n" \ | ||
186 | "mla %[res], r6, r2, %[res] \n" \ | ||
187 | "mla %[res], r7, r3, %[res] \n" | ||
188 | |||
189 | MLABLOCK4 | ||
190 | MLABLOCK4 | ||
191 | MLABLOCK4 | ||
192 | MLABLOCK4 | ||
193 | #endif /* ORDER <= 16 */ | ||
194 | : /* outputs */ | ||
195 | #if ORDER > 32 | ||
196 | [cnt]"+r"(cnt), | ||
197 | #endif | ||
198 | [v1] "+r"(v1), | ||
199 | [v2] "+r"(v2), | ||
200 | [res]"+r"(res) | ||
201 | : /* inputs */ | ||
202 | : /* clobbers */ | ||
203 | "r0", "r1", "r2", "r3", | ||
204 | "r4", "r5", "r6", "r7" | ||
205 | #if ORDER > 16 | ||
206 | ,"r8" | ||
207 | #endif | ||
208 | ); | ||
209 | return res; | ||
210 | } | ||