summaryrefslogtreecommitdiff
path: root/apps/codecs/demac/libdemac/vector_math32_armv4.h
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/demac/libdemac/vector_math32_armv4.h')
-rw-r--r--apps/codecs/demac/libdemac/vector_math32_armv4.h210
1 files changed, 210 insertions, 0 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math32_armv4.h b/apps/codecs/demac/libdemac/vector_math32_armv4.h
new file mode 100644
index 0000000000..b729bd3a0a
--- /dev/null
+++ b/apps/codecs/demac/libdemac/vector_math32_armv4.h
@@ -0,0 +1,210 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9ARMv4 vector math copyright (C) 2008 Jens Arnold
10
11This program is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24
25*/
26
27static inline void vector_add(int32_t* v1, int32_t* v2)
28{
29#if ORDER > 32
30 int cnt = ORDER>>5;
31#endif
32
33#define ADDBLOCK4 \
34 "ldmia %[v1], {r0-r3} \n" \
35 "ldmia %[v2]!, {r4-r7} \n" \
36 "add r0, r0, r4 \n" \
37 "add r1, r1, r5 \n" \
38 "add r2, r2, r6 \n" \
39 "add r3, r3, r7 \n" \
40 "stmia %[v1]!, {r0-r3} \n"
41
42 asm volatile (
43 "1: \n"
44 ADDBLOCK4
45 ADDBLOCK4
46 ADDBLOCK4
47 ADDBLOCK4
48#if ORDER > 16
49 ADDBLOCK4
50 ADDBLOCK4
51 ADDBLOCK4
52 ADDBLOCK4
53#endif
54#if ORDER > 32
55 "subs %[cnt], %[cnt], #1 \n"
56 "bne 1b \n"
57#endif
58 : /* outputs */
59#if ORDER > 32
60 [cnt]"+r"(cnt),
61#endif
62 [v1] "+r"(v1),
63 [v2] "+r"(v2)
64 : /* inputs */
65 : /* clobbers */
66 "r0", "r1", "r2", "r3", "r4",
67 "r5", "r6", "r7", "memory"
68 );
69}
70
71static inline void vector_sub(int32_t* v1, int32_t* v2)
72{
73#if ORDER > 32
74 int cnt = ORDER>>5;
75#endif
76
77#define SUBBLOCK4 \
78 "ldmia %[v1], {r0-r3} \n" \
79 "ldmia %[v2]!, {r4-r7} \n" \
80 "sub r0, r0, r4 \n" \
81 "sub r1, r1, r5 \n" \
82 "sub r2, r2, r6 \n" \
83 "sub r3, r3, r7 \n" \
84 "stmia %[v1]!, {r0-r3} \n"
85
86 asm volatile (
87 "1: \n"
88 SUBBLOCK4
89 SUBBLOCK4
90 SUBBLOCK4
91 SUBBLOCK4
92#if ORDER > 16
93 SUBBLOCK4
94 SUBBLOCK4
95 SUBBLOCK4
96 SUBBLOCK4
97#endif
98#if ORDER > 32
99 "subs %[cnt], %[cnt], #1 \n"
100 "bne 1b \n"
101#endif
102 : /* outputs */
103#if ORDER > 32
104 [cnt]"+r"(cnt),
105#endif
106 [v1] "+r"(v1),
107 [v2] "+r"(v2)
108 : /* inputs */
109 : /* clobbers */
110 "r0", "r1", "r2", "r3", "r4",
111 "r5", "r6", "r7", "memory"
112 );
113}
114
115static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
116{
117 int res = 0;
118#if ORDER > 32
119 int cnt = ORDER>>5;
120#endif
121
122 asm volatile (
123#if ORDER > 16
124 "ldmia %[v2]!, {r6-r7} \n"
125 "1: \n"
126 "ldmia %[v1]!, {r0,r1,r3-r5} \n"
127 "mla %[res], r6, r0, %[res] \n"
128 "mla %[res], r7, r1, %[res] \n"
129 "ldmia %[v2]!, {r0-r2,r6-r8} \n"
130 "mla %[res], r0, r3, %[res] \n"
131 "mla %[res], r1, r4, %[res] \n"
132 "mla %[res], r2, r5, %[res] \n"
133 "ldmia %[v1]!, {r0-r4} \n"
134 "mla %[res], r6, r0, %[res] \n"
135 "mla %[res], r7, r1, %[res] \n"
136 "mla %[res], r8, r2, %[res] \n"
137 "ldmia %[v2]!, {r0,r1,r6-r8} \n"
138 "mla %[res], r0, r3, %[res] \n"
139 "mla %[res], r1, r4, %[res] \n"
140 "ldmia %[v1]!, {r0-r5} \n"
141 "mla %[res], r6, r0, %[res] \n"
142 "mla %[res], r7, r1, %[res] \n"
143 "mla %[res], r8, r2, %[res] \n"
144 "ldmia %[v2]!, {r0-r2,r6,r7} \n"
145 "mla %[res], r0, r3, %[res] \n"
146 "mla %[res], r1, r4, %[res] \n"
147 "mla %[res], r2, r5, %[res] \n"
148 "ldmia %[v1]!, {r0,r1,r3-r5} \n"
149 "mla %[res], r6, r0, %[res] \n"
150 "mla %[res], r7, r1, %[res] \n"
151 "ldmia %[v2]!, {r0-r2,r6-r8} \n"
152 "mla %[res], r0, r3, %[res] \n"
153 "mla %[res], r1, r4, %[res] \n"
154 "mla %[res], r2, r5, %[res] \n"
155 "ldmia %[v1]!, {r0-r4} \n"
156 "mla %[res], r6, r0, %[res] \n"
157 "mla %[res], r7, r1, %[res] \n"
158 "mla %[res], r8, r2, %[res] \n"
159 "ldmia %[v2]!, {r0,r1,r6-r8} \n"
160 "mla %[res], r0, r3, %[res] \n"
161 "mla %[res], r1, r4, %[res] \n"
162 "ldmia %[v1]!, {r0-r5} \n"
163 "mla %[res], r6, r0, %[res] \n"
164 "mla %[res], r7, r1, %[res] \n"
165 "mla %[res], r8, r2, %[res] \n"
166#if ORDER > 32
167 "ldmia %[v2]!, {r0-r2,r6,r7} \n"
168#else
169 "ldmia %[v2]!, {r0-r2} \n"
170#endif
171 "mla %[res], r0, r3, %[res] \n"
172 "mla %[res], r1, r4, %[res] \n"
173 "mla %[res], r2, r5, %[res] \n"
174#if ORDER > 32
175 "subs %[cnt], %[cnt], #1 \n"
176 "bne 1b \n"
177#endif
178
179#else /* ORDER <= 16 */
180
181#define MLABLOCK4 \
182 "ldmia %[v1]!, {r0-r3} \n" \
183 "ldmia %[v2]!, {r4-r7} \n" \
184 "mla %[res], r4, r0, %[res] \n" \
185 "mla %[res], r5, r1, %[res] \n" \
186 "mla %[res], r6, r2, %[res] \n" \
187 "mla %[res], r7, r3, %[res] \n"
188
189 MLABLOCK4
190 MLABLOCK4
191 MLABLOCK4
192 MLABLOCK4
193#endif /* ORDER <= 16 */
194 : /* outputs */
195#if ORDER > 32
196 [cnt]"+r"(cnt),
197#endif
198 [v1] "+r"(v1),
199 [v2] "+r"(v2),
200 [res]"+r"(res)
201 : /* inputs */
202 : /* clobbers */
203 "r0", "r1", "r2", "r3",
204 "r4", "r5", "r6", "r7"
205#if ORDER > 16
206 ,"r8"
207#endif
208 );
209 return res;
210}