1 files changed, 234 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h
new file mode 100644
index 0000000000..2177fe88ea
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h
@@ -0,0 +1,234 @@
+/*
+libdemac - A Monkey's Audio decoder
+$Id$
+Copyright (C) Dave Chapman 2007
+MMX vector math copyright (C) 2010 Jens Arnold
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+*/
+#define FUSED_VECTOR_MATH
+#define REPEAT_MB3(x, n) x(n) x(n+8) x(n+16)
+#define REPEAT_MB7(x, n) x(n) x(n+8) x(n+16) x(n+24) x(n+32) x(n+40) x(n+48)
+#define REPEAT_MB8(x, n) REPEAT_MB7(x, n) x(n+56)
+#if ORDER == 16     /* 3 times */
+#define REPEAT_MB(x) REPEAT_MB3(x, 8) 
+#elif ORDER == 32   /* 7 times */
+#define REPEAT_MB(x) REPEAT_MB7(x, 8) 
+#elif ORDER == 64   /* 5*3 == 15 times */
+#define REPEAT_MB(x) REPEAT_MB3(x,  8) REPEAT_MB3(x, 32) REPEAT_MB3(x, 56) \
+                     REPEAT_MB3(x, 80) REPEAT_MB3(x, 104)
+#elif ORDER == 256  /* 9*7 == 63 times */
+#define REPEAT_MB(x) REPEAT_MB7(x,   8) REPEAT_MB7(x,  64) REPEAT_MB7(x, 120) \
+                     REPEAT_MB7(x, 176) REPEAT_MB7(x, 232) REPEAT_MB7(x, 288) \
+                     REPEAT_MB7(x, 344) REPEAT_MB7(x, 400) REPEAT_MB7(x, 456)
+#elif ORDER == 1280 /* 8*8 == 64 times */
+#define REPEAT_MB(x) REPEAT_MB8(x,   0) REPEAT_MB8(x,  64) REPEAT_MB8(x, 128) \
+                     REPEAT_MB8(x, 192) REPEAT_MB8(x, 256) REPEAT_MB8(x, 320) \
+                     REPEAT_MB8(x, 384) REPEAT_MB8(x, 448)
+#else
+#error unsupported order
+#endif
+static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t *s2)
+{
+    int res, t;
+#if ORDER > 256
+    int cnt = ORDER>>8;
+#endif
+    asm volatile (
+#if ORDER > 256
+        "pxor    %%mm2, %%mm2        \n"
+    "1:                              \n"
+#else
+        "movq    (%[v1]), %%mm2      \n"
+        "movq    %%mm2, %%mm0        \n"
+        "pmaddwd (%[f2]), %%mm2      \n"
+        "paddw   (%[s2]), %%mm0      \n"
+        "movq    %%mm0, (%[v1])      \n"
+#endif
+#define SP_ADD_BLOCK(n)                      \
+        "movq    " #n "(%[v1]), %%mm1    \n" \
+        "movq    %%mm1, %%mm0            \n" \
+        "pmaddwd " #n "(%[f2]), %%mm1    \n" \
+        "paddw   " #n "(%[s2]), %%mm0    \n" \
+        "movq    %%mm0, " #n "(%[v1])    \n" \
+        "paddd   %%mm1, %%mm2            \n"
+        
+REPEAT_MB(SP_ADD_BLOCK)
+#if ORDER > 256
+        "add     $512, %[v1]         \n"
+        "add     $512, %[s2]         \n"
+        "add     $512, %[f2]         \n"
+        "dec     %[cnt]              \n"
+        "jne     1b                  \n"
+#endif
+        "movd    %%mm2, %[t]         \n"
+        "psrlq   $32, %%mm2          \n"
+        "movd    %%mm2, %[res]       \n"
+        "add     %[t], %[res]        \n"
+        : /* outputs */
+#if ORDER > 256
+        [cnt]"+r"(cnt),
+        [s2] "+r"(s2),
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"2"(v1),
+        [f2]"3"(f2)
+#else
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"r"(v1),
+        [f2]"r"(f2),
+        [s2]"r"(s2)
+#endif
+        : /* clobbers */
+        "mm0", "mm1", "mm2"
+    );
+    return res;
+}
+static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t *s2)
+{
+    int res, t;
+#if ORDER > 256
+    int cnt = ORDER>>8;
+#endif
+    asm volatile (
+#if ORDER > 256
+        "pxor    %%mm2, %%mm2        \n"
+    "1:                              \n"
+#else
+        "movq    (%[v1]), %%mm2      \n"
+        "movq    %%mm2, %%mm0        \n"
+        "pmaddwd (%[f2]), %%mm2      \n"
+        "psubw   (%[s2]), %%mm0      \n"
+        "movq    %%mm0, (%[v1])      \n"
+#endif
+#define SP_SUB_BLOCK(n)                      \
+        "movq    " #n "(%[v1]), %%mm1    \n" \
+        "movq    %%mm1, %%mm0            \n" \
+        "pmaddwd " #n "(%[f2]), %%mm1    \n" \
+        "psubw   " #n "(%[s2]), %%mm0    \n" \
+        "movq    %%mm0, " #n "(%[v1])    \n" \
+        "paddd   %%mm1, %%mm2            \n"
+REPEAT_MB(SP_SUB_BLOCK)
+#if ORDER > 256
+        "add     $512, %[v1]         \n"
+        "add     $512, %[s2]         \n"
+        "add     $512, %[f2]         \n"
+        "dec     %[cnt]              \n"
+        "jne     1b                  \n"
+#endif
+        "movd    %%mm2, %[t]         \n"
+        "psrlq   $32, %%mm2          \n"
+        "movd    %%mm2, %[res]       \n"
+        "add     %[t], %[res]        \n"
+        : /* outputs */
+#if ORDER > 256
+        [cnt]"+r"(cnt),
+        [s2] "+r"(s2),
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"2"(v1),
+        [f2]"3"(f2)
+#else
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"r"(v1),
+        [f2]"r"(f2),
+        [s2]"r"(s2)
+#endif
+        : /* clobbers */
+        "mm0", "mm1", "mm2"
+    );
+    return res;
+}
+static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
+{
+    int res, t;
+#if ORDER > 256
+    int cnt = ORDER>>8;
+#endif
+               
+    asm volatile (
+#if ORDER > 256
+        "pxor    %%mm1, %%mm1        \n"
+    "1:                              \n"
+#else
+        "movq    (%[v1]), %%mm1      \n"
+        "pmaddwd (%[v2]), %%mm1      \n"
+#endif
+#define SP_BLOCK(n)                          \
+        "movq    " #n "(%[v1]), %%mm0    \n" \
+        "pmaddwd " #n "(%[v2]), %%mm0    \n" \
+        "paddd   %%mm0, %%mm1            \n"
+REPEAT_MB(SP_BLOCK)
+#if ORDER > 256
+        "add     $512, %[v1]         \n"
+        "add     $512, %[v2]         \n"
+        "dec     %[cnt]              \n"
+        "jne     1b                  \n"
+#endif
+        "movd    %%mm1, %[t]         \n"
+        "psrlq   $32, %%mm1          \n"
+        "movd    %%mm1, %[res]       \n"
+        "add     %[t], %[res]        \n"
+        : /* outputs */
+#if ORDER > 256
+        [cnt]"+r"(cnt),
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"1"(v1),
+        [v2]"2"(v2)
+#else
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"r"(v1),
+        [v2]"r"(v2)
+#endif
+        : /* clobbers */
+        "mm0", "mm1"
+    );
+    return res;
+}

diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h new file mode 100644 index 0000000000..2177fe88ea --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h
@@ -0,0 +1,234 @@
	1	/*
	2
	3	libdemac - A Monkey's Audio decoder
	4
	5	$Id$
	6
	7	Copyright (C) Dave Chapman 2007
	8
	9	MMX vector math copyright (C) 2010 Jens Arnold
	10
	11	This program is free software; you can redistribute it and/or modify
	12	it under the terms of the GNU General Public License as published by
	13	the Free Software Foundation; either version 2 of the License, or
	14	(at your option) any later version.
	15
	16	This program is distributed in the hope that it will be useful,
	17	but WITHOUT ANY WARRANTY; without even the implied warranty of
	18	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	19	GNU General Public License for more details.
	20
	21	You should have received a copy of the GNU General Public License
	22	along with this program; if not, write to the Free Software
	23	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
	24
	25	*/
	26
	27	#define FUSED_VECTOR_MATH
	28
	29	#define REPEAT_MB3(x, n) x(n) x(n+8) x(n+16)
	30	#define REPEAT_MB7(x, n) x(n) x(n+8) x(n+16) x(n+24) x(n+32) x(n+40) x(n+48)
	31	#define REPEAT_MB8(x, n) REPEAT_MB7(x, n) x(n+56)
	32
	33	#if ORDER == 16 /* 3 times */
	34	#define REPEAT_MB(x) REPEAT_MB3(x, 8)
	35	#elif ORDER == 32 /* 7 times */
	36	#define REPEAT_MB(x) REPEAT_MB7(x, 8)
	37	#elif ORDER == 64 /* 53 == 15 times /
	38	#define REPEAT_MB(x) REPEAT_MB3(x, 8) REPEAT_MB3(x, 32) REPEAT_MB3(x, 56) \
	39	REPEAT_MB3(x, 80) REPEAT_MB3(x, 104)
	40	#elif ORDER == 256 /* 97 == 63 times /
	41	#define REPEAT_MB(x) REPEAT_MB7(x, 8) REPEAT_MB7(x, 64) REPEAT_MB7(x, 120) \
	42	REPEAT_MB7(x, 176) REPEAT_MB7(x, 232) REPEAT_MB7(x, 288) \
	43	REPEAT_MB7(x, 344) REPEAT_MB7(x, 400) REPEAT_MB7(x, 456)
	44	#elif ORDER == 1280 /* 88 == 64 times /
	45	#define REPEAT_MB(x) REPEAT_MB8(x, 0) REPEAT_MB8(x, 64) REPEAT_MB8(x, 128) \
	46	REPEAT_MB8(x, 192) REPEAT_MB8(x, 256) REPEAT_MB8(x, 320) \
	47	REPEAT_MB8(x, 384) REPEAT_MB8(x, 448)
	48	#else
	49	#error unsupported order
	50	#endif
	51
	52
	53	static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t *s2)
	54	{
	55	int res, t;
	56	#if ORDER > 256
	57	int cnt = ORDER>>8;
	58	#endif
	59
	60	asm volatile (
	61	#if ORDER > 256
	62	"pxor %%mm2, %%mm2 \n"
	63	"1: \n"
	64	#else
	65	"movq (%[v1]), %%mm2 \n"
	66	"movq %%mm2, %%mm0 \n"
	67	"pmaddwd (%[f2]), %%mm2 \n"
	68	"paddw (%[s2]), %%mm0 \n"
	69	"movq %%mm0, (%[v1]) \n"
	70	#endif
	71
	72	#define SP_ADD_BLOCK(n) \
	73	"movq " #n "(%[v1]), %%mm1 \n" \
	74	"movq %%mm1, %%mm0 \n" \
	75	"pmaddwd " #n "(%[f2]), %%mm1 \n" \
	76	"paddw " #n "(%[s2]), %%mm0 \n" \
	77	"movq %%mm0, " #n "(%[v1]) \n" \
	78	"paddd %%mm1, %%mm2 \n"
	79
	80	REPEAT_MB(SP_ADD_BLOCK)
	81
	82	#if ORDER > 256
	83	"add $512, %[v1] \n"
	84	"add $512, %[s2] \n"
	85	"add $512, %[f2] \n"
	86	"dec %[cnt] \n"
	87	"jne 1b \n"
	88	#endif
	89
	90	"movd %%mm2, %[t] \n"
	91	"psrlq $32, %%mm2 \n"
	92	"movd %%mm2, %[res] \n"
	93	"add %[t], %[res] \n"
	94	: /* outputs */
	95	#if ORDER > 256
	96	[cnt]"+r"(cnt),
	97	[s2] "+r"(s2),
	98	[res]"=r"(res),
	99	[t] "=r"(t)
	100	: /* inputs */
	101	[v1]"2"(v1),
	102	[f2]"3"(f2)
	103	#else
	104	[res]"=r"(res),
	105	[t] "=r"(t)
	106	: /* inputs */
	107	[v1]"r"(v1),
	108	[f2]"r"(f2),
	109	[s2]"r"(s2)
	110	#endif
	111	: /* clobbers */
	112	"mm0", "mm1", "mm2"
	113	);
	114	return res;
	115	}
	116
	117	static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t *s2)
	118	{
	119	int res, t;
	120	#if ORDER > 256
	121	int cnt = ORDER>>8;
	122	#endif
	123
	124	asm volatile (
	125	#if ORDER > 256
	126	"pxor %%mm2, %%mm2 \n"
	127	"1: \n"
	128	#else
	129	"movq (%[v1]), %%mm2 \n"
	130	"movq %%mm2, %%mm0 \n"
	131	"pmaddwd (%[f2]), %%mm2 \n"
	132	"psubw (%[s2]), %%mm0 \n"
	133	"movq %%mm0, (%[v1]) \n"
	134	#endif
	135
	136	#define SP_SUB_BLOCK(n) \
	137	"movq " #n "(%[v1]), %%mm1 \n" \
	138	"movq %%mm1, %%mm0 \n" \
	139	"pmaddwd " #n "(%[f2]), %%mm1 \n" \
	140	"psubw " #n "(%[s2]), %%mm0 \n" \
	141	"movq %%mm0, " #n "(%[v1]) \n" \
	142	"paddd %%mm1, %%mm2 \n"
	143
	144	REPEAT_MB(SP_SUB_BLOCK)
	145
	146	#if ORDER > 256
	147	"add $512, %[v1] \n"
	148	"add $512, %[s2] \n"
	149	"add $512, %[f2] \n"
	150	"dec %[cnt] \n"
	151	"jne 1b \n"
	152	#endif
	153
	154	"movd %%mm2, %[t] \n"
	155	"psrlq $32, %%mm2 \n"
	156	"movd %%mm2, %[res] \n"
	157	"add %[t], %[res] \n"
	158	: /* outputs */
	159	#if ORDER > 256
	160	[cnt]"+r"(cnt),
	161	[s2] "+r"(s2),
	162	[res]"=r"(res),
	163	[t] "=r"(t)
	164	: /* inputs */
	165	[v1]"2"(v1),
	166	[f2]"3"(f2)
	167	#else
	168	[res]"=r"(res),
	169	[t] "=r"(t)
	170	: /* inputs */
	171	[v1]"r"(v1),
	172	[f2]"r"(f2),
	173	[s2]"r"(s2)
	174	#endif
	175	: /* clobbers */
	176	"mm0", "mm1", "mm2"
	177	);
	178	return res;
	179	}
	180
	181	static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
	182	{
	183	int res, t;
	184	#if ORDER > 256
	185	int cnt = ORDER>>8;
	186	#endif
	187
	188	asm volatile (
	189	#if ORDER > 256
	190	"pxor %%mm1, %%mm1 \n"
	191	"1: \n"
	192	#else
	193	"movq (%[v1]), %%mm1 \n"
	194	"pmaddwd (%[v2]), %%mm1 \n"
	195	#endif
	196
	197	#define SP_BLOCK(n) \
	198	"movq " #n "(%[v1]), %%mm0 \n" \
	199	"pmaddwd " #n "(%[v2]), %%mm0 \n" \
	200	"paddd %%mm0, %%mm1 \n"
	201
	202	REPEAT_MB(SP_BLOCK)
	203
	204	#if ORDER > 256
	205	"add $512, %[v1] \n"
	206	"add $512, %[v2] \n"
	207	"dec %[cnt] \n"
	208	"jne 1b \n"
	209	#endif
	210
	211	"movd %%mm1, %[t] \n"
	212	"psrlq $32, %%mm1 \n"
	213	"movd %%mm1, %[res] \n"
	214	"add %[t], %[res] \n"
	215	: /* outputs */
	216	#if ORDER > 256
	217	[cnt]"+r"(cnt),
	218	[res]"=r"(res),
	219	[t] "=r"(t)
	220	: /* inputs */
	221	[v1]"1"(v1),
	222	[v2]"2"(v2)
	223	#else
	224	[res]"=r"(res),
	225	[t] "=r"(t)
	226	: /* inputs */
	227	[v1]"r"(v1),
	228	[v2]"r"(v2)
	229	#endif
	230	: /* clobbers */
	231	"mm0", "mm1"
	232	);
	233	return res;
	234	}