summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h')
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h201
1 files changed, 201 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h b/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h
new file mode 100644
index 0000000000..d6bb9b0d9c
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h
@@ -0,0 +1,201 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9ARMv4 vector math copyright (C) 2008 Jens Arnold
10
11This program is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24
25*/
26
27#define FUSED_VECTOR_MATH
28
29#if ORDER > 32
30#define REPEAT_BLOCK(x) x x x x x x x x
31#elif ORDER > 16
32#define REPEAT_BLOCK(x) x x x x x x x
33#else
34#define REPEAT_BLOCK(x) x x x
35#endif
36
37/* Calculate scalarproduct, then add a 2nd vector (fused for performance) */
38static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2)
39{
40 int res;
41#if ORDER > 32
42 int cnt = ORDER>>5;
43#endif
44
45 asm volatile (
46#if ORDER > 32
47 "mov %[res], #0 \n"
48 "1: \n"
49#else
50 "ldmia %[v1], {r0-r3} \n"
51 "ldmia %[f2]!, {r4-r7} \n"
52 "mul %[res], r4, r0 \n"
53 "mla %[res], r5, r1, %[res] \n"
54 "mla %[res], r6, r2, %[res] \n"
55 "mla %[res], r7, r3, %[res] \n"
56 "ldmia %[s2]!, {r4-r7} \n"
57 "add r0, r0, r4 \n"
58 "add r1, r1, r5 \n"
59 "add r2, r2, r6 \n"
60 "add r3, r3, r7 \n"
61 "stmia %[v1]!, {r0-r3} \n"
62#endif
63 REPEAT_BLOCK(
64 "ldmia %[v1], {r0-r3} \n"
65 "ldmia %[f2]!, {r4-r7} \n"
66 "mla %[res], r4, r0, %[res] \n"
67 "mla %[res], r5, r1, %[res] \n"
68 "mla %[res], r6, r2, %[res] \n"
69 "mla %[res], r7, r3, %[res] \n"
70 "ldmia %[s2]!, {r4-r7} \n"
71 "add r0, r0, r4 \n"
72 "add r1, r1, r5 \n"
73 "add r2, r2, r6 \n"
74 "add r3, r3, r7 \n"
75 "stmia %[v1]!, {r0-r3} \n"
76 )
77#if ORDER > 32
78 "subs %[cnt], %[cnt], #1 \n"
79 "bne 1b \n"
80#endif
81 : /* outputs */
82#if ORDER > 32
83 [cnt]"+r"(cnt),
84#endif
85 [v1] "+r"(v1),
86 [f2] "+r"(f2),
87 [s2] "+r"(s2),
88 [res]"=r"(res)
89 : /* inputs */
90 : /* clobbers */
91 "r0", "r1", "r2", "r3", "r4",
92 "r5", "r6", "r7", "cc", "memory"
93 );
94 return res;
95}
96
97/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance) */
98static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2)
99{
100 int res;
101#if ORDER > 32
102 int cnt = ORDER>>5;
103#endif
104
105 asm volatile (
106#if ORDER > 32
107 "mov %[res], #0 \n"
108 "1: \n"
109#else
110 "ldmia %[v1], {r0-r3} \n"
111 "ldmia %[f2]!, {r4-r7} \n"
112 "mul %[res], r4, r0 \n"
113 "mla %[res], r5, r1, %[res] \n"
114 "mla %[res], r6, r2, %[res] \n"
115 "mla %[res], r7, r3, %[res] \n"
116 "ldmia %[s2]!, {r4-r7} \n"
117 "sub r0, r0, r4 \n"
118 "sub r1, r1, r5 \n"
119 "sub r2, r2, r6 \n"
120 "sub r3, r3, r7 \n"
121 "stmia %[v1]!, {r0-r3} \n"
122#endif
123 REPEAT_BLOCK(
124 "ldmia %[v1], {r0-r3} \n"
125 "ldmia %[f2]!, {r4-r7} \n"
126 "mla %[res], r4, r0, %[res] \n"
127 "mla %[res], r5, r1, %[res] \n"
128 "mla %[res], r6, r2, %[res] \n"
129 "mla %[res], r7, r3, %[res] \n"
130 "ldmia %[s2]!, {r4-r7} \n"
131 "sub r0, r0, r4 \n"
132 "sub r1, r1, r5 \n"
133 "sub r2, r2, r6 \n"
134 "sub r3, r3, r7 \n"
135 "stmia %[v1]!, {r0-r3} \n"
136 )
137#if ORDER > 32
138 "subs %[cnt], %[cnt], #1 \n"
139 "bne 1b \n"
140#endif
141 : /* outputs */
142#if ORDER > 32
143 [cnt]"+r"(cnt),
144#endif
145 [v1] "+r"(v1),
146 [f2] "+r"(f2),
147 [s2] "+r"(s2),
148 [res]"=r"(res)
149 : /* inputs */
150 : /* clobbers */
151 "r0", "r1", "r2", "r3", "r4",
152 "r5", "r6", "r7", "cc", "memory"
153 );
154 return res;
155}
156
157static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
158{
159 int res;
160#if ORDER > 32
161 int cnt = ORDER>>5;
162#endif
163
164 asm volatile (
165#if ORDER > 32
166 "mov %[res], #0 \n"
167 "1: \n"
168#else
169 "ldmia %[v1]!, {r0-r3} \n"
170 "ldmia %[v2]!, {r4-r7} \n"
171 "mul %[res], r4, r0 \n"
172 "mla %[res], r5, r1, %[res] \n"
173 "mla %[res], r6, r2, %[res] \n"
174 "mla %[res], r7, r3, %[res] \n"
175#endif
176 REPEAT_BLOCK(
177 "ldmia %[v1]!, {r0-r3} \n"
178 "ldmia %[v2]!, {r4-r7} \n"
179 "mla %[res], r4, r0, %[res] \n"
180 "mla %[res], r5, r1, %[res] \n"
181 "mla %[res], r6, r2, %[res] \n"
182 "mla %[res], r7, r3, %[res] \n"
183 )
184#if ORDER > 32
185 "subs %[cnt], %[cnt], #1 \n"
186 "bne 1b \n"
187#endif
188 : /* outputs */
189#if ORDER > 32
190 [cnt]"+r"(cnt),
191#endif
192 [v1] "+r"(v1),
193 [v2] "+r"(v2),
194 [res]"=r"(res)
195 : /* inputs */
196 : /* clobbers */
197 "r0", "r1", "r2", "r3",
198 "r4", "r5", "r6", "r7", "cc", "memory"
199 );
200 return res;
201}