summaryrefslogtreecommitdiff
path: root/apps/codecs
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2010-02-15 01:27:04 +0000
committerJens Arnold <amiconn@rockbox.org>2010-02-15 01:27:04 +0000
commitb8eb272e48b98de6ce9fba04798e4652119e0a0a (patch)
treef55f5f0e2192d34f658000f537d96767bfa5533d /apps/codecs
parent1bef4c66501893b4b7e154979a80f5386cbe964f (diff)
downloadrockbox-b8eb272e48b98de6ce9fba04798e4652119e0a0a.tar.gz
rockbox-b8eb272e48b98de6ce9fba04798e4652119e0a0a.zip
libdemac: Add x86/x86_64 MMX asm for the filters. Not relevant for target but speeds up decoding on x86/x86_64 sims. Average speedup ranges from 25% for -c2000 to 3 times for -c5000; on Intel Atom it's even 45% for -c2000 to 6 times for -c5000.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24663 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
-rw-r--r--apps/codecs/demac/libdemac/filter.c3
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_mmx.h219
2 files changed, 222 insertions, 0 deletions
diff --git a/apps/codecs/demac/libdemac/filter.c b/apps/codecs/demac/libdemac/filter.c
index ed6f3c8dc6..275f12f6ae 100644
--- a/apps/codecs/demac/libdemac/filter.c
+++ b/apps/codecs/demac/libdemac/filter.c
@@ -46,6 +46,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
46#elif defined(CPU_ARM) && (ARM_ARCH >= 5) 46#elif defined(CPU_ARM) && (ARM_ARCH >= 5)
47/* Assume all our ARMv5 targets are ARMv5te(j) */ 47/* Assume all our ARMv5 targets are ARMv5te(j) */
48#include "vector_math16_armv5te.h" 48#include "vector_math16_armv5te.h"
49#elif (defined(__i386__) || defined(__i486__)) && defined(__MMX__) \
50 || defined(__x86_64__)
51#include "vector_math16_mmx.h"
49#else 52#else
50#include "vector_math_generic.h" 53#include "vector_math_generic.h"
51#endif 54#endif
diff --git a/apps/codecs/demac/libdemac/vector_math16_mmx.h b/apps/codecs/demac/libdemac/vector_math16_mmx.h
new file mode 100644
index 0000000000..a7f9c73af7
--- /dev/null
+++ b/apps/codecs/demac/libdemac/vector_math16_mmx.h
@@ -0,0 +1,219 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9MMX vector math copyright (C) 2010 Jens Arnold
10
11This program is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24
25*/
26
27#define FUSED_VECTOR_MATH
28
29#define __E(__e) #__e
30#define __S(__e) __E(__e)
31
32static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t *s2)
33{
34 int res, t;
35#if ORDER > 256
36 int cnt = ORDER>>8;
37#endif
38
39 asm volatile (
40#if ORDER > 256
41 "pxor %%mm2, %%mm2 \n"
42 ".set ofs, 0 \n"
43 "1: \n"
44 ".rept 64 \n"
45#else
46 "movq (%[v1]), %%mm2 \n"
47 "movq %%mm2, %%mm0 \n"
48 "pmaddwd (%[f2]), %%mm2 \n"
49 "paddw (%[s2]), %%mm0 \n"
50 "movq %%mm0, (%[v1]) \n"
51 ".set ofs, 8 \n"
52
53 ".rept " __S(ORDER>>2 - 1) "\n"
54#endif
55 "movq ofs(%[v1]), %%mm1 \n"
56 "movq %%mm1, %%mm0 \n"
57 "pmaddwd ofs(%[f2]), %%mm1 \n"
58 "paddw ofs(%[s2]), %%mm0 \n"
59 "movq %%mm0, ofs(%[v1]) \n"
60 "paddd %%mm1, %%mm2 \n"
61 ".set ofs, ofs + 8 \n"
62 ".endr \n"
63#if ORDER > 256
64 "add $512, %[v1] \n"
65 "add $512, %[s2] \n"
66 "add $512, %[f2] \n"
67 "dec %[cnt] \n"
68 "jne 1b \n"
69#endif
70
71 "movd %%mm2, %[t] \n"
72 "psrlq $32, %%mm2 \n"
73 "movd %%mm2, %[res] \n"
74 "add %[t], %[res] \n"
75 : /* outputs */
76#if ORDER > 256
77 [cnt]"+r"(cnt),
78 [s2] "+r"(s2),
79 [res]"=r"(res),
80 [t] "=r"(t)
81 : /* inputs */
82 [v1]"2"(v1),
83 [f2]"3"(f2)
84#else
85 [res]"=r"(res),
86 [t] "=r"(t)
87 : /* inputs */
88 [v1]"r"(v1),
89 [f2]"r"(f2),
90 [s2]"r"(s2)
91#endif
92 : /* clobbers */
93 "mm0", "mm1", "mm2"
94 );
95 return res;
96}
97
98static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t *s2)
99{
100 int res, t;
101#if ORDER > 256
102 int cnt = ORDER>>8;
103#endif
104
105 asm volatile (
106#if ORDER > 256
107 "pxor %%mm2, %%mm2 \n"
108 ".set ofs, 0 \n"
109 "1: \n"
110 ".rept 64 \n"
111#else
112 "movq (%[v1]), %%mm2 \n"
113 "movq %%mm2, %%mm0 \n"
114 "pmaddwd (%[f2]), %%mm2 \n"
115 "psubw (%[s2]), %%mm0 \n"
116 "movq %%mm0, (%[v1]) \n"
117 ".set ofs, 8 \n"
118
119 ".rept " __S(ORDER>>2 - 1) "\n"
120#endif
121 "movq ofs(%[v1]), %%mm1 \n"
122 "movq %%mm1, %%mm0 \n"
123 "pmaddwd ofs(%[f2]), %%mm1 \n"
124 "psubw ofs(%[s2]), %%mm0 \n"
125 "movq %%mm0, ofs(%[v1]) \n"
126 "paddd %%mm1, %%mm2 \n"
127 ".set ofs, ofs + 8 \n"
128 ".endr \n"
129#if ORDER > 256
130 "add $512, %[v1] \n"
131 "add $512, %[s2] \n"
132 "add $512, %[f2] \n"
133 "dec %[cnt] \n"
134 "jne 1b \n"
135#endif
136
137 "movd %%mm2, %[t] \n"
138 "psrlq $32, %%mm2 \n"
139 "movd %%mm2, %[res] \n"
140 "add %[t], %[res] \n"
141 : /* outputs */
142#if ORDER > 256
143 [cnt]"+r"(cnt),
144 [s2] "+r"(s2),
145 [res]"=r"(res),
146 [t] "=r"(t)
147 : /* inputs */
148 [v1]"2"(v1),
149 [f2]"3"(f2)
150#else
151 [res]"=r"(res),
152 [t] "=r"(t)
153 : /* inputs */
154 [v1]"r"(v1),
155 [f2]"r"(f2),
156 [s2]"r"(s2)
157#endif
158 : /* clobbers */
159 "mm0", "mm1", "mm2"
160 );
161 return res;
162}
163
164static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
165{
166 int res, t;
167#if ORDER > 256
168 int cnt = ORDER>>8;
169#endif
170
171 asm volatile (
172#if ORDER > 256
173 "pxor %%mm1, %%mm1 \n"
174 ".set ofs, 0 \n"
175 "1: \n"
176 ".rept 64 \n"
177#else
178 "movq (%[v1]), %%mm1 \n"
179 "pmaddwd (%[v2]), %%mm1 \n"
180 ".set ofs, 8 \n"
181
182 ".rept " __S(ORDER>>2 - 1) "\n"
183#endif
184 "movq ofs(%[v1]), %%mm0 \n"
185 "pmaddwd ofs(%[v2]), %%mm0 \n"
186 "paddd %%mm0, %%mm1 \n"
187 ".set ofs, ofs + 8 \n"
188 ".endr \n"
189#if ORDER > 256
190 "add $512, %[v1] \n"
191 "add $512, %[v2] \n"
192 "dec %[cnt] \n"
193 "jne 1b \n"
194#endif
195
196 "movd %%mm1, %[t] \n"
197 "psrlq $32, %%mm1 \n"
198 "movd %%mm1, %[res] \n"
199 "add %[t], %[res] \n"
200 : /* outputs */
201#if ORDER > 256
202 [cnt]"+r"(cnt),
203 [res]"=r"(res),
204 [t] "=r"(t)
205 : /* inputs */
206 [v1]"1"(v1),
207 [v2]"2"(v2)
208#else
209 [res]"=r"(res),
210 [t] "=r"(t)
211 : /* inputs */
212 [v1]"r"(v1),
213 [v2]"r"(v2)
214#endif
215 : /* clobbers */
216 "mm0", "mm1"
217 );
218 return res;
219}