summaryrefslogtreecommitdiff
path: root/apps/codecs/demac/libdemac/vector_math16_mmx.h
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/demac/libdemac/vector_math16_mmx.h')
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_mmx.h91
1 files changed, 53 insertions, 38 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_mmx.h b/apps/codecs/demac/libdemac/vector_math16_mmx.h
index a7f9c73af7..2177fe88ea 100644
--- a/apps/codecs/demac/libdemac/vector_math16_mmx.h
+++ b/apps/codecs/demac/libdemac/vector_math16_mmx.h
@@ -26,8 +26,29 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
26 26
27#define FUSED_VECTOR_MATH 27#define FUSED_VECTOR_MATH
28 28
29#define __E(__e) #__e 29#define REPEAT_MB3(x, n) x(n) x(n+8) x(n+16)
30#define __S(__e) __E(__e) 30#define REPEAT_MB7(x, n) x(n) x(n+8) x(n+16) x(n+24) x(n+32) x(n+40) x(n+48)
31#define REPEAT_MB8(x, n) REPEAT_MB7(x, n) x(n+56)
32
33#if ORDER == 16 /* 3 times */
34#define REPEAT_MB(x) REPEAT_MB3(x, 8)
35#elif ORDER == 32 /* 7 times */
36#define REPEAT_MB(x) REPEAT_MB7(x, 8)
37#elif ORDER == 64 /* 5*3 == 15 times */
38#define REPEAT_MB(x) REPEAT_MB3(x, 8) REPEAT_MB3(x, 32) REPEAT_MB3(x, 56) \
39 REPEAT_MB3(x, 80) REPEAT_MB3(x, 104)
40#elif ORDER == 256 /* 9*7 == 63 times */
41#define REPEAT_MB(x) REPEAT_MB7(x, 8) REPEAT_MB7(x, 64) REPEAT_MB7(x, 120) \
42 REPEAT_MB7(x, 176) REPEAT_MB7(x, 232) REPEAT_MB7(x, 288) \
43 REPEAT_MB7(x, 344) REPEAT_MB7(x, 400) REPEAT_MB7(x, 456)
44#elif ORDER == 1280 /* 8*8 == 64 times */
45#define REPEAT_MB(x) REPEAT_MB8(x, 0) REPEAT_MB8(x, 64) REPEAT_MB8(x, 128) \
46 REPEAT_MB8(x, 192) REPEAT_MB8(x, 256) REPEAT_MB8(x, 320) \
47 REPEAT_MB8(x, 384) REPEAT_MB8(x, 448)
48#else
49#error unsupported order
50#endif
51
31 52
32static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t *s2) 53static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t *s2)
33{ 54{
@@ -39,27 +60,25 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t *s2)
39 asm volatile ( 60 asm volatile (
40#if ORDER > 256 61#if ORDER > 256
41 "pxor %%mm2, %%mm2 \n" 62 "pxor %%mm2, %%mm2 \n"
42 ".set ofs, 0 \n"
43 "1: \n" 63 "1: \n"
44 ".rept 64 \n"
45#else 64#else
46 "movq (%[v1]), %%mm2 \n" 65 "movq (%[v1]), %%mm2 \n"
47 "movq %%mm2, %%mm0 \n" 66 "movq %%mm2, %%mm0 \n"
48 "pmaddwd (%[f2]), %%mm2 \n" 67 "pmaddwd (%[f2]), %%mm2 \n"
49 "paddw (%[s2]), %%mm0 \n" 68 "paddw (%[s2]), %%mm0 \n"
50 "movq %%mm0, (%[v1]) \n" 69 "movq %%mm0, (%[v1]) \n"
51 ".set ofs, 8 \n"
52
53 ".rept " __S(ORDER>>2 - 1) "\n"
54#endif 70#endif
55 "movq ofs(%[v1]), %%mm1 \n" 71
56 "movq %%mm1, %%mm0 \n" 72#define SP_ADD_BLOCK(n) \
57 "pmaddwd ofs(%[f2]), %%mm1 \n" 73 "movq " #n "(%[v1]), %%mm1 \n" \
58 "paddw ofs(%[s2]), %%mm0 \n" 74 "movq %%mm1, %%mm0 \n" \
59 "movq %%mm0, ofs(%[v1]) \n" 75 "pmaddwd " #n "(%[f2]), %%mm1 \n" \
60 "paddd %%mm1, %%mm2 \n" 76 "paddw " #n "(%[s2]), %%mm0 \n" \
61 ".set ofs, ofs + 8 \n" 77 "movq %%mm0, " #n "(%[v1]) \n" \
62 ".endr \n" 78 "paddd %%mm1, %%mm2 \n"
79
80REPEAT_MB(SP_ADD_BLOCK)
81
63#if ORDER > 256 82#if ORDER > 256
64 "add $512, %[v1] \n" 83 "add $512, %[v1] \n"
65 "add $512, %[s2] \n" 84 "add $512, %[s2] \n"
@@ -105,27 +124,25 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t *s2)
105 asm volatile ( 124 asm volatile (
106#if ORDER > 256 125#if ORDER > 256
107 "pxor %%mm2, %%mm2 \n" 126 "pxor %%mm2, %%mm2 \n"
108 ".set ofs, 0 \n"
109 "1: \n" 127 "1: \n"
110 ".rept 64 \n"
111#else 128#else
112 "movq (%[v1]), %%mm2 \n" 129 "movq (%[v1]), %%mm2 \n"
113 "movq %%mm2, %%mm0 \n" 130 "movq %%mm2, %%mm0 \n"
114 "pmaddwd (%[f2]), %%mm2 \n" 131 "pmaddwd (%[f2]), %%mm2 \n"
115 "psubw (%[s2]), %%mm0 \n" 132 "psubw (%[s2]), %%mm0 \n"
116 "movq %%mm0, (%[v1]) \n" 133 "movq %%mm0, (%[v1]) \n"
117 ".set ofs, 8 \n"
118
119 ".rept " __S(ORDER>>2 - 1) "\n"
120#endif 134#endif
121 "movq ofs(%[v1]), %%mm1 \n" 135
122 "movq %%mm1, %%mm0 \n" 136#define SP_SUB_BLOCK(n) \
123 "pmaddwd ofs(%[f2]), %%mm1 \n" 137 "movq " #n "(%[v1]), %%mm1 \n" \
124 "psubw ofs(%[s2]), %%mm0 \n" 138 "movq %%mm1, %%mm0 \n" \
125 "movq %%mm0, ofs(%[v1]) \n" 139 "pmaddwd " #n "(%[f2]), %%mm1 \n" \
126 "paddd %%mm1, %%mm2 \n" 140 "psubw " #n "(%[s2]), %%mm0 \n" \
127 ".set ofs, ofs + 8 \n" 141 "movq %%mm0, " #n "(%[v1]) \n" \
128 ".endr \n" 142 "paddd %%mm1, %%mm2 \n"
143
144REPEAT_MB(SP_SUB_BLOCK)
145
129#if ORDER > 256 146#if ORDER > 256
130 "add $512, %[v1] \n" 147 "add $512, %[v1] \n"
131 "add $512, %[s2] \n" 148 "add $512, %[s2] \n"
@@ -171,21 +188,19 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
171 asm volatile ( 188 asm volatile (
172#if ORDER > 256 189#if ORDER > 256
173 "pxor %%mm1, %%mm1 \n" 190 "pxor %%mm1, %%mm1 \n"
174 ".set ofs, 0 \n"
175 "1: \n" 191 "1: \n"
176 ".rept 64 \n"
177#else 192#else
178 "movq (%[v1]), %%mm1 \n" 193 "movq (%[v1]), %%mm1 \n"
179 "pmaddwd (%[v2]), %%mm1 \n" 194 "pmaddwd (%[v2]), %%mm1 \n"
180 ".set ofs, 8 \n"
181
182 ".rept " __S(ORDER>>2 - 1) "\n"
183#endif 195#endif
184 "movq ofs(%[v1]), %%mm0 \n" 196
185 "pmaddwd ofs(%[v2]), %%mm0 \n" 197#define SP_BLOCK(n) \
186 "paddd %%mm0, %%mm1 \n" 198 "movq " #n "(%[v1]), %%mm0 \n" \
187 ".set ofs, ofs + 8 \n" 199 "pmaddwd " #n "(%[v2]), %%mm0 \n" \
188 ".endr \n" 200 "paddd %%mm0, %%mm1 \n"
201
202REPEAT_MB(SP_BLOCK)
203
189#if ORDER > 256 204#if ORDER > 256
190 "add $512, %[v1] \n" 205 "add $512, %[v1] \n"
191 "add $512, %[v2] \n" 206 "add $512, %[v2] \n"