summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h')
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h234
1 files changed, 234 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h
new file mode 100644
index 0000000000..2177fe88ea
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h
@@ -0,0 +1,234 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9MMX vector math copyright (C) 2010 Jens Arnold
10
11This program is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24
25*/
26
27#define FUSED_VECTOR_MATH
28
29#define REPEAT_MB3(x, n) x(n) x(n+8) x(n+16)
30#define REPEAT_MB7(x, n) x(n) x(n+8) x(n+16) x(n+24) x(n+32) x(n+40) x(n+48)
31#define REPEAT_MB8(x, n) REPEAT_MB7(x, n) x(n+56)
32
33#if ORDER == 16 /* 3 times */
34#define REPEAT_MB(x) REPEAT_MB3(x, 8)
35#elif ORDER == 32 /* 7 times */
36#define REPEAT_MB(x) REPEAT_MB7(x, 8)
37#elif ORDER == 64 /* 5*3 == 15 times */
38#define REPEAT_MB(x) REPEAT_MB3(x, 8) REPEAT_MB3(x, 32) REPEAT_MB3(x, 56) \
39 REPEAT_MB3(x, 80) REPEAT_MB3(x, 104)
40#elif ORDER == 256 /* 9*7 == 63 times */
41#define REPEAT_MB(x) REPEAT_MB7(x, 8) REPEAT_MB7(x, 64) REPEAT_MB7(x, 120) \
42 REPEAT_MB7(x, 176) REPEAT_MB7(x, 232) REPEAT_MB7(x, 288) \
43 REPEAT_MB7(x, 344) REPEAT_MB7(x, 400) REPEAT_MB7(x, 456)
44#elif ORDER == 1280 /* 8*8 == 64 times */
45#define REPEAT_MB(x) REPEAT_MB8(x, 0) REPEAT_MB8(x, 64) REPEAT_MB8(x, 128) \
46 REPEAT_MB8(x, 192) REPEAT_MB8(x, 256) REPEAT_MB8(x, 320) \
47 REPEAT_MB8(x, 384) REPEAT_MB8(x, 448)
48#else
49#error unsupported order
50#endif
51
52
53static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t *s2)
54{
55 int res, t;
56#if ORDER > 256
57 int cnt = ORDER>>8;
58#endif
59
60 asm volatile (
61#if ORDER > 256
62 "pxor %%mm2, %%mm2 \n"
63 "1: \n"
64#else
65 "movq (%[v1]), %%mm2 \n"
66 "movq %%mm2, %%mm0 \n"
67 "pmaddwd (%[f2]), %%mm2 \n"
68 "paddw (%[s2]), %%mm0 \n"
69 "movq %%mm0, (%[v1]) \n"
70#endif
71
72#define SP_ADD_BLOCK(n) \
73 "movq " #n "(%[v1]), %%mm1 \n" \
74 "movq %%mm1, %%mm0 \n" \
75 "pmaddwd " #n "(%[f2]), %%mm1 \n" \
76 "paddw " #n "(%[s2]), %%mm0 \n" \
77 "movq %%mm0, " #n "(%[v1]) \n" \
78 "paddd %%mm1, %%mm2 \n"
79
80REPEAT_MB(SP_ADD_BLOCK)
81
82#if ORDER > 256
83 "add $512, %[v1] \n"
84 "add $512, %[s2] \n"
85 "add $512, %[f2] \n"
86 "dec %[cnt] \n"
87 "jne 1b \n"
88#endif
89
90 "movd %%mm2, %[t] \n"
91 "psrlq $32, %%mm2 \n"
92 "movd %%mm2, %[res] \n"
93 "add %[t], %[res] \n"
94 : /* outputs */
95#if ORDER > 256
96 [cnt]"+r"(cnt),
97 [s2] "+r"(s2),
98 [res]"=r"(res),
99 [t] "=r"(t)
100 : /* inputs */
101 [v1]"2"(v1),
102 [f2]"3"(f2)
103#else
104 [res]"=r"(res),
105 [t] "=r"(t)
106 : /* inputs */
107 [v1]"r"(v1),
108 [f2]"r"(f2),
109 [s2]"r"(s2)
110#endif
111 : /* clobbers */
112 "mm0", "mm1", "mm2"
113 );
114 return res;
115}
116
117static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t *s2)
118{
119 int res, t;
120#if ORDER > 256
121 int cnt = ORDER>>8;
122#endif
123
124 asm volatile (
125#if ORDER > 256
126 "pxor %%mm2, %%mm2 \n"
127 "1: \n"
128#else
129 "movq (%[v1]), %%mm2 \n"
130 "movq %%mm2, %%mm0 \n"
131 "pmaddwd (%[f2]), %%mm2 \n"
132 "psubw (%[s2]), %%mm0 \n"
133 "movq %%mm0, (%[v1]) \n"
134#endif
135
136#define SP_SUB_BLOCK(n) \
137 "movq " #n "(%[v1]), %%mm1 \n" \
138 "movq %%mm1, %%mm0 \n" \
139 "pmaddwd " #n "(%[f2]), %%mm1 \n" \
140 "psubw " #n "(%[s2]), %%mm0 \n" \
141 "movq %%mm0, " #n "(%[v1]) \n" \
142 "paddd %%mm1, %%mm2 \n"
143
144REPEAT_MB(SP_SUB_BLOCK)
145
146#if ORDER > 256
147 "add $512, %[v1] \n"
148 "add $512, %[s2] \n"
149 "add $512, %[f2] \n"
150 "dec %[cnt] \n"
151 "jne 1b \n"
152#endif
153
154 "movd %%mm2, %[t] \n"
155 "psrlq $32, %%mm2 \n"
156 "movd %%mm2, %[res] \n"
157 "add %[t], %[res] \n"
158 : /* outputs */
159#if ORDER > 256
160 [cnt]"+r"(cnt),
161 [s2] "+r"(s2),
162 [res]"=r"(res),
163 [t] "=r"(t)
164 : /* inputs */
165 [v1]"2"(v1),
166 [f2]"3"(f2)
167#else
168 [res]"=r"(res),
169 [t] "=r"(t)
170 : /* inputs */
171 [v1]"r"(v1),
172 [f2]"r"(f2),
173 [s2]"r"(s2)
174#endif
175 : /* clobbers */
176 "mm0", "mm1", "mm2"
177 );
178 return res;
179}
180
181static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
182{
183 int res, t;
184#if ORDER > 256
185 int cnt = ORDER>>8;
186#endif
187
188 asm volatile (
189#if ORDER > 256
190 "pxor %%mm1, %%mm1 \n"
191 "1: \n"
192#else
193 "movq (%[v1]), %%mm1 \n"
194 "pmaddwd (%[v2]), %%mm1 \n"
195#endif
196
197#define SP_BLOCK(n) \
198 "movq " #n "(%[v1]), %%mm0 \n" \
199 "pmaddwd " #n "(%[v2]), %%mm0 \n" \
200 "paddd %%mm0, %%mm1 \n"
201
202REPEAT_MB(SP_BLOCK)
203
204#if ORDER > 256
205 "add $512, %[v1] \n"
206 "add $512, %[v2] \n"
207 "dec %[cnt] \n"
208 "jne 1b \n"
209#endif
210
211 "movd %%mm1, %[t] \n"
212 "psrlq $32, %%mm1 \n"
213 "movd %%mm1, %[res] \n"
214 "add %[t], %[res] \n"
215 : /* outputs */
216#if ORDER > 256
217 [cnt]"+r"(cnt),
218 [res]"=r"(res),
219 [t] "=r"(t)
220 : /* inputs */
221 [v1]"1"(v1),
222 [v2]"2"(v2)
223#else
224 [res]"=r"(res),
225 [t] "=r"(t)
226 : /* inputs */
227 [v1]"r"(v1),
228 [v2]"r"(v2)
229#endif
230 : /* clobbers */
231 "mm0", "mm1"
232 );
233 return res;
234}