diff options
-rw-r--r-- | apps/codecs/demac/libdemac/filter.c | 3 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math16_mmx.h | 219 | ||||
-rwxr-xr-x | tools/configure | 22 |
3 files changed, 238 insertions, 6 deletions
diff --git a/apps/codecs/demac/libdemac/filter.c b/apps/codecs/demac/libdemac/filter.c index ed6f3c8dc6..275f12f6ae 100644 --- a/apps/codecs/demac/libdemac/filter.c +++ b/apps/codecs/demac/libdemac/filter.c | |||
@@ -46,6 +46,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
46 | #elif defined(CPU_ARM) && (ARM_ARCH >= 5) | 46 | #elif defined(CPU_ARM) && (ARM_ARCH >= 5) |
47 | /* Assume all our ARMv5 targets are ARMv5te(j) */ | 47 | /* Assume all our ARMv5 targets are ARMv5te(j) */ |
48 | #include "vector_math16_armv5te.h" | 48 | #include "vector_math16_armv5te.h" |
49 | #elif (defined(__i386__) || defined(__i486__)) && defined(__MMX__) \ | ||
50 | || defined(__x86_64__) | ||
51 | #include "vector_math16_mmx.h" | ||
49 | #else | 52 | #else |
50 | #include "vector_math_generic.h" | 53 | #include "vector_math_generic.h" |
51 | #endif | 54 | #endif |
diff --git a/apps/codecs/demac/libdemac/vector_math16_mmx.h b/apps/codecs/demac/libdemac/vector_math16_mmx.h new file mode 100644 index 0000000000..a7f9c73af7 --- /dev/null +++ b/apps/codecs/demac/libdemac/vector_math16_mmx.h | |||
@@ -0,0 +1,219 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | MMX vector math copyright (C) 2010 Jens Arnold | ||
10 | |||
11 | This program is free software; you can redistribute it and/or modify | ||
12 | it under the terms of the GNU General Public License as published by | ||
13 | the Free Software Foundation; either version 2 of the License, or | ||
14 | (at your option) any later version. | ||
15 | |||
16 | This program is distributed in the hope that it will be useful, | ||
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | GNU General Public License for more details. | ||
20 | |||
21 | You should have received a copy of the GNU General Public License | ||
22 | along with this program; if not, write to the Free Software | ||
23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
24 | |||
25 | */ | ||
26 | |||
27 | #define FUSED_VECTOR_MATH | ||
28 | |||
29 | #define __E(__e) #__e | ||
30 | #define __S(__e) __E(__e) | ||
31 | |||
32 | static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t *s2) | ||
33 | { | ||
34 | int res, t; | ||
35 | #if ORDER > 256 | ||
36 | int cnt = ORDER>>8; | ||
37 | #endif | ||
38 | |||
39 | asm volatile ( | ||
40 | #if ORDER > 256 | ||
41 | "pxor %%mm2, %%mm2 \n" | ||
42 | ".set ofs, 0 \n" | ||
43 | "1: \n" | ||
44 | ".rept 64 \n" | ||
45 | #else | ||
46 | "movq (%[v1]), %%mm2 \n" | ||
47 | "movq %%mm2, %%mm0 \n" | ||
48 | "pmaddwd (%[f2]), %%mm2 \n" | ||
49 | "paddw (%[s2]), %%mm0 \n" | ||
50 | "movq %%mm0, (%[v1]) \n" | ||
51 | ".set ofs, 8 \n" | ||
52 | |||
53 | ".rept " __S(ORDER>>2 - 1) "\n" | ||
54 | #endif | ||
55 | "movq ofs(%[v1]), %%mm1 \n" | ||
56 | "movq %%mm1, %%mm0 \n" | ||
57 | "pmaddwd ofs(%[f2]), %%mm1 \n" | ||
58 | "paddw ofs(%[s2]), %%mm0 \n" | ||
59 | "movq %%mm0, ofs(%[v1]) \n" | ||
60 | "paddd %%mm1, %%mm2 \n" | ||
61 | ".set ofs, ofs + 8 \n" | ||
62 | ".endr \n" | ||
63 | #if ORDER > 256 | ||
64 | "add $512, %[v1] \n" | ||
65 | "add $512, %[s2] \n" | ||
66 | "add $512, %[f2] \n" | ||
67 | "dec %[cnt] \n" | ||
68 | "jne 1b \n" | ||
69 | #endif | ||
70 | |||
71 | "movd %%mm2, %[t] \n" | ||
72 | "psrlq $32, %%mm2 \n" | ||
73 | "movd %%mm2, %[res] \n" | ||
74 | "add %[t], %[res] \n" | ||
75 | : /* outputs */ | ||
76 | #if ORDER > 256 | ||
77 | [cnt]"+r"(cnt), | ||
78 | [s2] "+r"(s2), | ||
79 | [res]"=r"(res), | ||
80 | [t] "=r"(t) | ||
81 | : /* inputs */ | ||
82 | [v1]"2"(v1), | ||
83 | [f2]"3"(f2) | ||
84 | #else | ||
85 | [res]"=r"(res), | ||
86 | [t] "=r"(t) | ||
87 | : /* inputs */ | ||
88 | [v1]"r"(v1), | ||
89 | [f2]"r"(f2), | ||
90 | [s2]"r"(s2) | ||
91 | #endif | ||
92 | : /* clobbers */ | ||
93 | "mm0", "mm1", "mm2" | ||
94 | ); | ||
95 | return res; | ||
96 | } | ||
97 | |||
98 | static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t *s2) | ||
99 | { | ||
100 | int res, t; | ||
101 | #if ORDER > 256 | ||
102 | int cnt = ORDER>>8; | ||
103 | #endif | ||
104 | |||
105 | asm volatile ( | ||
106 | #if ORDER > 256 | ||
107 | "pxor %%mm2, %%mm2 \n" | ||
108 | ".set ofs, 0 \n" | ||
109 | "1: \n" | ||
110 | ".rept 64 \n" | ||
111 | #else | ||
112 | "movq (%[v1]), %%mm2 \n" | ||
113 | "movq %%mm2, %%mm0 \n" | ||
114 | "pmaddwd (%[f2]), %%mm2 \n" | ||
115 | "psubw (%[s2]), %%mm0 \n" | ||
116 | "movq %%mm0, (%[v1]) \n" | ||
117 | ".set ofs, 8 \n" | ||
118 | |||
119 | ".rept " __S(ORDER>>2 - 1) "\n" | ||
120 | #endif | ||
121 | "movq ofs(%[v1]), %%mm1 \n" | ||
122 | "movq %%mm1, %%mm0 \n" | ||
123 | "pmaddwd ofs(%[f2]), %%mm1 \n" | ||
124 | "psubw ofs(%[s2]), %%mm0 \n" | ||
125 | "movq %%mm0, ofs(%[v1]) \n" | ||
126 | "paddd %%mm1, %%mm2 \n" | ||
127 | ".set ofs, ofs + 8 \n" | ||
128 | ".endr \n" | ||
129 | #if ORDER > 256 | ||
130 | "add $512, %[v1] \n" | ||
131 | "add $512, %[s2] \n" | ||
132 | "add $512, %[f2] \n" | ||
133 | "dec %[cnt] \n" | ||
134 | "jne 1b \n" | ||
135 | #endif | ||
136 | |||
137 | "movd %%mm2, %[t] \n" | ||
138 | "psrlq $32, %%mm2 \n" | ||
139 | "movd %%mm2, %[res] \n" | ||
140 | "add %[t], %[res] \n" | ||
141 | : /* outputs */ | ||
142 | #if ORDER > 256 | ||
143 | [cnt]"+r"(cnt), | ||
144 | [s2] "+r"(s2), | ||
145 | [res]"=r"(res), | ||
146 | [t] "=r"(t) | ||
147 | : /* inputs */ | ||
148 | [v1]"2"(v1), | ||
149 | [f2]"3"(f2) | ||
150 | #else | ||
151 | [res]"=r"(res), | ||
152 | [t] "=r"(t) | ||
153 | : /* inputs */ | ||
154 | [v1]"r"(v1), | ||
155 | [f2]"r"(f2), | ||
156 | [s2]"r"(s2) | ||
157 | #endif | ||
158 | : /* clobbers */ | ||
159 | "mm0", "mm1", "mm2" | ||
160 | ); | ||
161 | return res; | ||
162 | } | ||
163 | |||
164 | static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | ||
165 | { | ||
166 | int res, t; | ||
167 | #if ORDER > 256 | ||
168 | int cnt = ORDER>>8; | ||
169 | #endif | ||
170 | |||
171 | asm volatile ( | ||
172 | #if ORDER > 256 | ||
173 | "pxor %%mm1, %%mm1 \n" | ||
174 | ".set ofs, 0 \n" | ||
175 | "1: \n" | ||
176 | ".rept 64 \n" | ||
177 | #else | ||
178 | "movq (%[v1]), %%mm1 \n" | ||
179 | "pmaddwd (%[v2]), %%mm1 \n" | ||
180 | ".set ofs, 8 \n" | ||
181 | |||
182 | ".rept " __S(ORDER>>2 - 1) "\n" | ||
183 | #endif | ||
184 | "movq ofs(%[v1]), %%mm0 \n" | ||
185 | "pmaddwd ofs(%[v2]), %%mm0 \n" | ||
186 | "paddd %%mm0, %%mm1 \n" | ||
187 | ".set ofs, ofs + 8 \n" | ||
188 | ".endr \n" | ||
189 | #if ORDER > 256 | ||
190 | "add $512, %[v1] \n" | ||
191 | "add $512, %[v2] \n" | ||
192 | "dec %[cnt] \n" | ||
193 | "jne 1b \n" | ||
194 | #endif | ||
195 | |||
196 | "movd %%mm1, %[t] \n" | ||
197 | "psrlq $32, %%mm1 \n" | ||
198 | "movd %%mm1, %[res] \n" | ||
199 | "add %[t], %[res] \n" | ||
200 | : /* outputs */ | ||
201 | #if ORDER > 256 | ||
202 | [cnt]"+r"(cnt), | ||
203 | [res]"=r"(res), | ||
204 | [t] "=r"(t) | ||
205 | : /* inputs */ | ||
206 | [v1]"1"(v1), | ||
207 | [v2]"2"(v2) | ||
208 | #else | ||
209 | [res]"=r"(res), | ||
210 | [t] "=r"(t) | ||
211 | : /* inputs */ | ||
212 | [v1]"r"(v1), | ||
213 | [v2]"r"(v2) | ||
214 | #endif | ||
215 | : /* clobbers */ | ||
216 | "mm0", "mm1" | ||
217 | ); | ||
218 | return res; | ||
219 | } | ||
diff --git a/tools/configure b/tools/configure index 7a04cecc9c..4d61d8e903 100755 --- a/tools/configure +++ b/tools/configure | |||
@@ -171,12 +171,20 @@ simcc () { | |||
171 | GCCOPTS="$GCCOPTS -I\$(SIMDIR)" | 171 | GCCOPTS="$GCCOPTS -I\$(SIMDIR)" |
172 | 172 | ||
173 | if test "X$crosscompile" != "Xyes"; then | 173 | if test "X$crosscompile" != "Xyes"; then |
174 | if [ "`uname -m`" = "x86_64" ] || [ "`uname -m`" = "amd64" ]; then | 174 | case `uname -m` in |
175 | # fPIC is needed to make shared objects link | 175 | x86_64|amd64) |
176 | # setting visibility to hidden is necessary to avoid strange crashes | 176 | # fPIC is needed to make shared objects link |
177 | # due to symbol clashing | 177 | # setting visibility to hidden is necessary to avoid strange crashes |
178 | GCCOPTS="$GCCOPTS -fPIC -fvisibility=hidden" | 178 | # due to symbol clashing |
179 | fi | 179 | GCCOPTS="$GCCOPTS -fPIC -fvisibility=hidden" |
180 | # x86_64 supports MMX by default | ||
181 | ;; | ||
182 | |||
183 | i686) | ||
184 | echo "Enabling MMX support" | ||
185 | GCCOPTS="$GCCOPTS -mmmx" | ||
186 | ;; | ||
187 | esac | ||
180 | 188 | ||
181 | id=$$ | 189 | id=$$ |
182 | cat >$tmpdir/conftest-$id.c <<EOF | 190 | cat >$tmpdir/conftest-$id.c <<EOF |
@@ -218,6 +226,8 @@ EOF | |||
218 | LDOPTS="-mconsole $sdl_libs" | 226 | LDOPTS="-mconsole $sdl_libs" |
219 | output="rockboxui.exe" # use this as output binary name | 227 | output="rockboxui.exe" # use this as output binary name |
220 | endian="little" # windows is little endian | 228 | endian="little" # windows is little endian |
229 | echo "Enabling MMX support" | ||
230 | GCCOPTS="$GCCOPTS -mmmx" | ||
221 | fi | 231 | fi |
222 | } | 232 | } |
223 | 233 | ||