diff options
author | Sean Bartell <wingedtachikoma@gmail.com> | 2011-06-25 21:32:25 -0400 |
---|---|---|
committer | Nils Wallménius <nils@rockbox.org> | 2012-04-25 22:13:20 +0200 |
commit | f40bfc9267b13b54e6379dfe7539447662879d24 (patch) | |
tree | 9b20069d5e62809ff434061ad730096836f916f2 /lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h | |
parent | a0009907de7a0107d49040d8a180f140e2eff299 (diff) | |
download | rockbox-f40bfc9267b13b54e6379dfe7539447662879d24.tar.gz rockbox-f40bfc9267b13b54e6379dfe7539447662879d24.zip |
Add codecs to librbcodec.
Change-Id: Id7f4717d51ed02d67cb9f9cb3c0ada4a81843f97
Reviewed-on: http://gerrit.rockbox.org/137
Reviewed-by: Nils Wallménius <nils@rockbox.org>
Tested-by: Nils Wallménius <nils@rockbox.org>
Diffstat (limited to 'lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h')
-rw-r--r-- | lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h | 234 |
1 files changed, 234 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h new file mode 100644 index 0000000000..2177fe88ea --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h | |||
@@ -0,0 +1,234 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | MMX vector math copyright (C) 2010 Jens Arnold | ||
10 | |||
11 | This program is free software; you can redistribute it and/or modify | ||
12 | it under the terms of the GNU General Public License as published by | ||
13 | the Free Software Foundation; either version 2 of the License, or | ||
14 | (at your option) any later version. | ||
15 | |||
16 | This program is distributed in the hope that it will be useful, | ||
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | GNU General Public License for more details. | ||
20 | |||
21 | You should have received a copy of the GNU General Public License | ||
22 | along with this program; if not, write to the Free Software | ||
23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
24 | |||
25 | */ | ||
26 | |||
27 | #define FUSED_VECTOR_MATH | ||
28 | |||
29 | #define REPEAT_MB3(x, n) x(n) x(n+8) x(n+16) | ||
30 | #define REPEAT_MB7(x, n) x(n) x(n+8) x(n+16) x(n+24) x(n+32) x(n+40) x(n+48) | ||
31 | #define REPEAT_MB8(x, n) REPEAT_MB7(x, n) x(n+56) | ||
32 | |||
33 | #if ORDER == 16 /* 3 times */ | ||
34 | #define REPEAT_MB(x) REPEAT_MB3(x, 8) | ||
35 | #elif ORDER == 32 /* 7 times */ | ||
36 | #define REPEAT_MB(x) REPEAT_MB7(x, 8) | ||
37 | #elif ORDER == 64 /* 5*3 == 15 times */ | ||
38 | #define REPEAT_MB(x) REPEAT_MB3(x, 8) REPEAT_MB3(x, 32) REPEAT_MB3(x, 56) \ | ||
39 | REPEAT_MB3(x, 80) REPEAT_MB3(x, 104) | ||
40 | #elif ORDER == 256 /* 9*7 == 63 times */ | ||
41 | #define REPEAT_MB(x) REPEAT_MB7(x, 8) REPEAT_MB7(x, 64) REPEAT_MB7(x, 120) \ | ||
42 | REPEAT_MB7(x, 176) REPEAT_MB7(x, 232) REPEAT_MB7(x, 288) \ | ||
43 | REPEAT_MB7(x, 344) REPEAT_MB7(x, 400) REPEAT_MB7(x, 456) | ||
44 | #elif ORDER == 1280 /* 8*8 == 64 times */ | ||
45 | #define REPEAT_MB(x) REPEAT_MB8(x, 0) REPEAT_MB8(x, 64) REPEAT_MB8(x, 128) \ | ||
46 | REPEAT_MB8(x, 192) REPEAT_MB8(x, 256) REPEAT_MB8(x, 320) \ | ||
47 | REPEAT_MB8(x, 384) REPEAT_MB8(x, 448) | ||
48 | #else | ||
49 | #error unsupported order | ||
50 | #endif | ||
51 | |||
52 | |||
53 | static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t *s2) | ||
54 | { | ||
55 | int res, t; | ||
56 | #if ORDER > 256 | ||
57 | int cnt = ORDER>>8; | ||
58 | #endif | ||
59 | |||
60 | asm volatile ( | ||
61 | #if ORDER > 256 | ||
62 | "pxor %%mm2, %%mm2 \n" | ||
63 | "1: \n" | ||
64 | #else | ||
65 | "movq (%[v1]), %%mm2 \n" | ||
66 | "movq %%mm2, %%mm0 \n" | ||
67 | "pmaddwd (%[f2]), %%mm2 \n" | ||
68 | "paddw (%[s2]), %%mm0 \n" | ||
69 | "movq %%mm0, (%[v1]) \n" | ||
70 | #endif | ||
71 | |||
72 | #define SP_ADD_BLOCK(n) \ | ||
73 | "movq " #n "(%[v1]), %%mm1 \n" \ | ||
74 | "movq %%mm1, %%mm0 \n" \ | ||
75 | "pmaddwd " #n "(%[f2]), %%mm1 \n" \ | ||
76 | "paddw " #n "(%[s2]), %%mm0 \n" \ | ||
77 | "movq %%mm0, " #n "(%[v1]) \n" \ | ||
78 | "paddd %%mm1, %%mm2 \n" | ||
79 | |||
80 | REPEAT_MB(SP_ADD_BLOCK) | ||
81 | |||
82 | #if ORDER > 256 | ||
83 | "add $512, %[v1] \n" | ||
84 | "add $512, %[s2] \n" | ||
85 | "add $512, %[f2] \n" | ||
86 | "dec %[cnt] \n" | ||
87 | "jne 1b \n" | ||
88 | #endif | ||
89 | |||
90 | "movd %%mm2, %[t] \n" | ||
91 | "psrlq $32, %%mm2 \n" | ||
92 | "movd %%mm2, %[res] \n" | ||
93 | "add %[t], %[res] \n" | ||
94 | : /* outputs */ | ||
95 | #if ORDER > 256 | ||
96 | [cnt]"+r"(cnt), | ||
97 | [s2] "+r"(s2), | ||
98 | [res]"=r"(res), | ||
99 | [t] "=r"(t) | ||
100 | : /* inputs */ | ||
101 | [v1]"2"(v1), | ||
102 | [f2]"3"(f2) | ||
103 | #else | ||
104 | [res]"=r"(res), | ||
105 | [t] "=r"(t) | ||
106 | : /* inputs */ | ||
107 | [v1]"r"(v1), | ||
108 | [f2]"r"(f2), | ||
109 | [s2]"r"(s2) | ||
110 | #endif | ||
111 | : /* clobbers */ | ||
112 | "mm0", "mm1", "mm2" | ||
113 | ); | ||
114 | return res; | ||
115 | } | ||
116 | |||
117 | static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t *s2) | ||
118 | { | ||
119 | int res, t; | ||
120 | #if ORDER > 256 | ||
121 | int cnt = ORDER>>8; | ||
122 | #endif | ||
123 | |||
124 | asm volatile ( | ||
125 | #if ORDER > 256 | ||
126 | "pxor %%mm2, %%mm2 \n" | ||
127 | "1: \n" | ||
128 | #else | ||
129 | "movq (%[v1]), %%mm2 \n" | ||
130 | "movq %%mm2, %%mm0 \n" | ||
131 | "pmaddwd (%[f2]), %%mm2 \n" | ||
132 | "psubw (%[s2]), %%mm0 \n" | ||
133 | "movq %%mm0, (%[v1]) \n" | ||
134 | #endif | ||
135 | |||
136 | #define SP_SUB_BLOCK(n) \ | ||
137 | "movq " #n "(%[v1]), %%mm1 \n" \ | ||
138 | "movq %%mm1, %%mm0 \n" \ | ||
139 | "pmaddwd " #n "(%[f2]), %%mm1 \n" \ | ||
140 | "psubw " #n "(%[s2]), %%mm0 \n" \ | ||
141 | "movq %%mm0, " #n "(%[v1]) \n" \ | ||
142 | "paddd %%mm1, %%mm2 \n" | ||
143 | |||
144 | REPEAT_MB(SP_SUB_BLOCK) | ||
145 | |||
146 | #if ORDER > 256 | ||
147 | "add $512, %[v1] \n" | ||
148 | "add $512, %[s2] \n" | ||
149 | "add $512, %[f2] \n" | ||
150 | "dec %[cnt] \n" | ||
151 | "jne 1b \n" | ||
152 | #endif | ||
153 | |||
154 | "movd %%mm2, %[t] \n" | ||
155 | "psrlq $32, %%mm2 \n" | ||
156 | "movd %%mm2, %[res] \n" | ||
157 | "add %[t], %[res] \n" | ||
158 | : /* outputs */ | ||
159 | #if ORDER > 256 | ||
160 | [cnt]"+r"(cnt), | ||
161 | [s2] "+r"(s2), | ||
162 | [res]"=r"(res), | ||
163 | [t] "=r"(t) | ||
164 | : /* inputs */ | ||
165 | [v1]"2"(v1), | ||
166 | [f2]"3"(f2) | ||
167 | #else | ||
168 | [res]"=r"(res), | ||
169 | [t] "=r"(t) | ||
170 | : /* inputs */ | ||
171 | [v1]"r"(v1), | ||
172 | [f2]"r"(f2), | ||
173 | [s2]"r"(s2) | ||
174 | #endif | ||
175 | : /* clobbers */ | ||
176 | "mm0", "mm1", "mm2" | ||
177 | ); | ||
178 | return res; | ||
179 | } | ||
180 | |||
181 | static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | ||
182 | { | ||
183 | int res, t; | ||
184 | #if ORDER > 256 | ||
185 | int cnt = ORDER>>8; | ||
186 | #endif | ||
187 | |||
188 | asm volatile ( | ||
189 | #if ORDER > 256 | ||
190 | "pxor %%mm1, %%mm1 \n" | ||
191 | "1: \n" | ||
192 | #else | ||
193 | "movq (%[v1]), %%mm1 \n" | ||
194 | "pmaddwd (%[v2]), %%mm1 \n" | ||
195 | #endif | ||
196 | |||
197 | #define SP_BLOCK(n) \ | ||
198 | "movq " #n "(%[v1]), %%mm0 \n" \ | ||
199 | "pmaddwd " #n "(%[v2]), %%mm0 \n" \ | ||
200 | "paddd %%mm0, %%mm1 \n" | ||
201 | |||
202 | REPEAT_MB(SP_BLOCK) | ||
203 | |||
204 | #if ORDER > 256 | ||
205 | "add $512, %[v1] \n" | ||
206 | "add $512, %[v2] \n" | ||
207 | "dec %[cnt] \n" | ||
208 | "jne 1b \n" | ||
209 | #endif | ||
210 | |||
211 | "movd %%mm1, %[t] \n" | ||
212 | "psrlq $32, %%mm1 \n" | ||
213 | "movd %%mm1, %[res] \n" | ||
214 | "add %[t], %[res] \n" | ||
215 | : /* outputs */ | ||
216 | #if ORDER > 256 | ||
217 | [cnt]"+r"(cnt), | ||
218 | [res]"=r"(res), | ||
219 | [t] "=r"(t) | ||
220 | : /* inputs */ | ||
221 | [v1]"1"(v1), | ||
222 | [v2]"2"(v2) | ||
223 | #else | ||
224 | [res]"=r"(res), | ||
225 | [t] "=r"(t) | ||
226 | : /* inputs */ | ||
227 | [v1]"r"(v1), | ||
228 | [v2]"r"(v2) | ||
229 | #endif | ||
230 | : /* clobbers */ | ||
231 | "mm0", "mm1" | ||
232 | ); | ||
233 | return res; | ||
234 | } | ||