1 files changed, 201 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h b/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h
new file mode 100644
index 0000000000..d6bb9b0d9c
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h
@@ -0,0 +1,201 @@
+/*
+libdemac - A Monkey's Audio decoder
+$Id$
+Copyright (C) Dave Chapman 2007
+ARMv4 vector math copyright (C) 2008 Jens Arnold
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+*/
+#define FUSED_VECTOR_MATH
+#if ORDER > 32
+#define REPEAT_BLOCK(x) x x x x x x x x
+#elif ORDER > 16
+#define REPEAT_BLOCK(x) x x x x x x x
+#else
+#define REPEAT_BLOCK(x) x x x
+#endif
+/* Calculate scalarproduct, then add a 2nd vector (fused for performance) */
+static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2)
+{
+    int res;
+#if ORDER > 32
+    int cnt = ORDER>>5;
+#endif
+    asm volatile (
+#if ORDER > 32
+        "mov     %[res], #0              \n"
+    "1:                                  \n"
+#else
+        "ldmia   %[v1],  {r0-r3}         \n"
+        "ldmia   %[f2]!, {r4-r7}         \n"
+        "mul     %[res], r4, r0          \n"
+        "mla     %[res], r5, r1, %[res]  \n"
+        "mla     %[res], r6, r2, %[res]  \n"
+        "mla     %[res], r7, r3, %[res]  \n"
+        "ldmia   %[s2]!, {r4-r7}         \n"
+        "add     r0, r0, r4              \n"
+        "add     r1, r1, r5              \n"
+        "add     r2, r2, r6              \n"
+        "add     r3, r3, r7              \n"
+        "stmia   %[v1]!, {r0-r3}         \n"
+#endif
+        REPEAT_BLOCK(
+        "ldmia   %[v1],  {r0-r3}         \n"
+        "ldmia   %[f2]!, {r4-r7}         \n"
+        "mla     %[res], r4, r0, %[res]  \n"
+        "mla     %[res], r5, r1, %[res]  \n"
+        "mla     %[res], r6, r2, %[res]  \n"
+        "mla     %[res], r7, r3, %[res]  \n"
+        "ldmia   %[s2]!, {r4-r7}         \n"
+        "add     r0, r0, r4              \n"
+        "add     r1, r1, r5              \n"
+        "add     r2, r2, r6              \n"
+        "add     r3, r3, r7              \n"
+        "stmia   %[v1]!, {r0-r3}         \n"
+        )
+#if ORDER > 32
+        "subs    %[cnt], %[cnt], #1      \n"
+        "bne     1b                      \n"
+#endif
+        : /* outputs */
+#if ORDER > 32
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [f2] "+r"(f2),
+        [s2] "+r"(s2),
+        [res]"=r"(res)
+        : /* inputs */
+        : /* clobbers */
+        "r0", "r1", "r2", "r3", "r4",
+        "r5", "r6", "r7", "cc", "memory"
+    );
+    return res;
+}
+/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance) */
+static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2)
+{
+    int res;
+#if ORDER > 32
+    int cnt = ORDER>>5;
+#endif
+    asm volatile (
+#if ORDER > 32
+        "mov     %[res], #0              \n"
+    "1:                                  \n"
+#else
+        "ldmia   %[v1],  {r0-r3}         \n"
+        "ldmia   %[f2]!, {r4-r7}         \n"
+        "mul     %[res], r4, r0          \n"
+        "mla     %[res], r5, r1, %[res]  \n"
+        "mla     %[res], r6, r2, %[res]  \n"
+        "mla     %[res], r7, r3, %[res]  \n"
+        "ldmia   %[s2]!, {r4-r7}         \n"
+        "sub     r0, r0, r4              \n"
+        "sub     r1, r1, r5              \n"
+        "sub     r2, r2, r6              \n"
+        "sub     r3, r3, r7              \n"
+        "stmia   %[v1]!, {r0-r3}         \n"
+#endif
+        REPEAT_BLOCK(
+        "ldmia   %[v1],  {r0-r3}         \n"
+        "ldmia   %[f2]!, {r4-r7}         \n"
+        "mla     %[res], r4, r0, %[res]  \n"
+        "mla     %[res], r5, r1, %[res]  \n"
+        "mla     %[res], r6, r2, %[res]  \n"
+        "mla     %[res], r7, r3, %[res]  \n"
+        "ldmia   %[s2]!, {r4-r7}         \n"
+        "sub     r0, r0, r4              \n"
+        "sub     r1, r1, r5              \n"
+        "sub     r2, r2, r6              \n"
+        "sub     r3, r3, r7              \n"
+        "stmia   %[v1]!, {r0-r3}         \n"
+        )
+#if ORDER > 32
+        "subs    %[cnt], %[cnt], #1      \n"
+        "bne     1b                      \n"
+#endif
+        : /* outputs */
+#if ORDER > 32
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [f2] "+r"(f2),
+        [s2] "+r"(s2),
+        [res]"=r"(res)
+        : /* inputs */
+        : /* clobbers */
+        "r0", "r1", "r2", "r3", "r4",
+        "r5", "r6", "r7", "cc", "memory"
+    );
+    return res;
+}
+static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
+{
+    int res;
+#if ORDER > 32
+    int cnt = ORDER>>5;
+#endif
+    asm volatile (
+#if ORDER > 32
+        "mov     %[res], #0              \n"
+    "1:                                  \n"
+#else
+        "ldmia   %[v1]!, {r0-r3}         \n"
+        "ldmia   %[v2]!, {r4-r7}         \n"
+        "mul     %[res], r4, r0          \n"
+        "mla     %[res], r5, r1, %[res]  \n"
+        "mla     %[res], r6, r2, %[res]  \n"
+        "mla     %[res], r7, r3, %[res]  \n"
+#endif
+        REPEAT_BLOCK(
+        "ldmia   %[v1]!, {r0-r3}         \n"
+        "ldmia   %[v2]!, {r4-r7}         \n"
+        "mla     %[res], r4, r0, %[res]  \n"
+        "mla     %[res], r5, r1, %[res]  \n"
+        "mla     %[res], r6, r2, %[res]  \n"
+        "mla     %[res], r7, r3, %[res]  \n"
+        )
+#if ORDER > 32
+        "subs    %[cnt], %[cnt], #1      \n"
+        "bne     1b                      \n"
+#endif
+        : /* outputs */
+#if ORDER > 32
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [v2] "+r"(v2),
+        [res]"=r"(res)
+        : /* inputs */
+        : /* clobbers */
+        "r0", "r1", "r2", "r3",
+        "r4", "r5", "r6", "r7", "cc", "memory"
+    );
+    return res;
+}

diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h b/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h new file mode 100644 index 0000000000..d6bb9b0d9c --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h
@@ -0,0 +1,201 @@
	1	/*
	2
	3	libdemac - A Monkey's Audio decoder
	4
	5	$Id$
	6
	7	Copyright (C) Dave Chapman 2007
	8
	9	ARMv4 vector math copyright (C) 2008 Jens Arnold
	10
	11	This program is free software; you can redistribute it and/or modify
	12	it under the terms of the GNU General Public License as published by
	13	the Free Software Foundation; either version 2 of the License, or
	14	(at your option) any later version.
	15
	16	This program is distributed in the hope that it will be useful,
	17	but WITHOUT ANY WARRANTY; without even the implied warranty of
	18	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	19	GNU General Public License for more details.
	20
	21	You should have received a copy of the GNU General Public License
	22	along with this program; if not, write to the Free Software
	23	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
	24
	25	*/
	26
	27	#define FUSED_VECTOR_MATH
	28
	29	#if ORDER > 32
	30	#define REPEAT_BLOCK(x) x x x x x x x x
	31	#elif ORDER > 16
	32	#define REPEAT_BLOCK(x) x x x x x x x
	33	#else
	34	#define REPEAT_BLOCK(x) x x x
	35	#endif
	36
	37	/* Calculate scalarproduct, then add a 2nd vector (fused for performance) */
	38	static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2)
	39	{
	40	int res;
	41	#if ORDER > 32
	42	int cnt = ORDER>>5;
	43	#endif
	44
	45	asm volatile (
	46	#if ORDER > 32
	47	"mov %[res], #0 \n"
	48	"1: \n"
	49	#else
	50	"ldmia %[v1], {r0-r3} \n"
	51	"ldmia %[f2]!, {r4-r7} \n"
	52	"mul %[res], r4, r0 \n"
	53	"mla %[res], r5, r1, %[res] \n"
	54	"mla %[res], r6, r2, %[res] \n"
	55	"mla %[res], r7, r3, %[res] \n"
	56	"ldmia %[s2]!, {r4-r7} \n"
	57	"add r0, r0, r4 \n"
	58	"add r1, r1, r5 \n"
	59	"add r2, r2, r6 \n"
	60	"add r3, r3, r7 \n"
	61	"stmia %[v1]!, {r0-r3} \n"
	62	#endif
	63	REPEAT_BLOCK(
	64	"ldmia %[v1], {r0-r3} \n"
	65	"ldmia %[f2]!, {r4-r7} \n"
	66	"mla %[res], r4, r0, %[res] \n"
	67	"mla %[res], r5, r1, %[res] \n"
	68	"mla %[res], r6, r2, %[res] \n"
	69	"mla %[res], r7, r3, %[res] \n"
	70	"ldmia %[s2]!, {r4-r7} \n"
	71	"add r0, r0, r4 \n"
	72	"add r1, r1, r5 \n"
	73	"add r2, r2, r6 \n"
	74	"add r3, r3, r7 \n"
	75	"stmia %[v1]!, {r0-r3} \n"
	76	)
	77	#if ORDER > 32
	78	"subs %[cnt], %[cnt], #1 \n"
	79	"bne 1b \n"
	80	#endif
	81	: /* outputs */
	82	#if ORDER > 32
	83	[cnt]"+r"(cnt),
	84	#endif
	85	[v1] "+r"(v1),
	86	[f2] "+r"(f2),
	87	[s2] "+r"(s2),
	88	[res]"=r"(res)
	89	: /* inputs */
	90	: /* clobbers */
	91	"r0", "r1", "r2", "r3", "r4",
	92	"r5", "r6", "r7", "cc", "memory"
	93	);
	94	return res;
	95	}
	96
	97	/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance) */
	98	static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2)
	99	{
	100	int res;
	101	#if ORDER > 32
	102	int cnt = ORDER>>5;
	103	#endif
	104
	105	asm volatile (
	106	#if ORDER > 32
	107	"mov %[res], #0 \n"
	108	"1: \n"
	109	#else
	110	"ldmia %[v1], {r0-r3} \n"
	111	"ldmia %[f2]!, {r4-r7} \n"
	112	"mul %[res], r4, r0 \n"
	113	"mla %[res], r5, r1, %[res] \n"
	114	"mla %[res], r6, r2, %[res] \n"
	115	"mla %[res], r7, r3, %[res] \n"
	116	"ldmia %[s2]!, {r4-r7} \n"
	117	"sub r0, r0, r4 \n"
	118	"sub r1, r1, r5 \n"
	119	"sub r2, r2, r6 \n"
	120	"sub r3, r3, r7 \n"
	121	"stmia %[v1]!, {r0-r3} \n"
	122	#endif
	123	REPEAT_BLOCK(
	124	"ldmia %[v1], {r0-r3} \n"
	125	"ldmia %[f2]!, {r4-r7} \n"
	126	"mla %[res], r4, r0, %[res] \n"
	127	"mla %[res], r5, r1, %[res] \n"
	128	"mla %[res], r6, r2, %[res] \n"
	129	"mla %[res], r7, r3, %[res] \n"
	130	"ldmia %[s2]!, {r4-r7} \n"
	131	"sub r0, r0, r4 \n"
	132	"sub r1, r1, r5 \n"
	133	"sub r2, r2, r6 \n"
	134	"sub r3, r3, r7 \n"
	135	"stmia %[v1]!, {r0-r3} \n"
	136	)
	137	#if ORDER > 32
	138	"subs %[cnt], %[cnt], #1 \n"
	139	"bne 1b \n"
	140	#endif
	141	: /* outputs */
	142	#if ORDER > 32
	143	[cnt]"+r"(cnt),
	144	#endif
	145	[v1] "+r"(v1),
	146	[f2] "+r"(f2),
	147	[s2] "+r"(s2),
	148	[res]"=r"(res)
	149	: /* inputs */
	150	: /* clobbers */
	151	"r0", "r1", "r2", "r3", "r4",
	152	"r5", "r6", "r7", "cc", "memory"
	153	);
	154	return res;
	155	}
	156
	157	static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
	158	{
	159	int res;
	160	#if ORDER > 32
	161	int cnt = ORDER>>5;
	162	#endif
	163
	164	asm volatile (
	165	#if ORDER > 32
	166	"mov %[res], #0 \n"
	167	"1: \n"
	168	#else
	169	"ldmia %[v1]!, {r0-r3} \n"
	170	"ldmia %[v2]!, {r4-r7} \n"
	171	"mul %[res], r4, r0 \n"
	172	"mla %[res], r5, r1, %[res] \n"
	173	"mla %[res], r6, r2, %[res] \n"
	174	"mla %[res], r7, r3, %[res] \n"
	175	#endif
	176	REPEAT_BLOCK(
	177	"ldmia %[v1]!, {r0-r3} \n"
	178	"ldmia %[v2]!, {r4-r7} \n"
	179	"mla %[res], r4, r0, %[res] \n"
	180	"mla %[res], r5, r1, %[res] \n"
	181	"mla %[res], r6, r2, %[res] \n"
	182	"mla %[res], r7, r3, %[res] \n"
	183	)
	184	#if ORDER > 32
	185	"subs %[cnt], %[cnt], #1 \n"
	186	"bne 1b \n"
	187	#endif
	188	: /* outputs */
	189	#if ORDER > 32
	190	[cnt]"+r"(cnt),
	191	#endif
	192	[v1] "+r"(v1),
	193	[v2] "+r"(v2),
	194	[res]"=r"(res)
	195	: /* inputs */
	196	: /* clobbers */
	197	"r0", "r1", "r2", "r3",
	198	"r4", "r5", "r6", "r7", "cc", "memory"
	199	);
	200	return res;
	201	}