1 files changed, 210 insertions, 0 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math32_armv4.h b/apps/codecs/demac/libdemac/vector_math32_armv4.h
new file mode 100644
index 0000000000..b729bd3a0a
--- /dev/null
+++ b/apps/codecs/demac/libdemac/vector_math32_armv4.h
@@ -0,0 +1,210 @@
+/*
+libdemac - A Monkey's Audio decoder
+$Id$
+Copyright (C) Dave Chapman 2007
+ARMv4 vector math copyright (C) 2008 Jens Arnold
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+*/
+static inline void vector_add(int32_t* v1, int32_t* v2)
+{
+#if ORDER > 32
+    int cnt = ORDER>>5;
+#endif
+#define ADDBLOCK4                        \
+        "ldmia   %[v1],  {r0-r3}     \n" \
+        "ldmia   %[v2]!, {r4-r7}     \n" \
+        "add     r0, r0, r4          \n" \
+        "add     r1, r1, r5          \n" \
+        "add     r2, r2, r6          \n" \
+        "add     r3, r3, r7          \n" \
+        "stmia   %[v1]!, {r0-r3}     \n"
+    asm volatile (
+    "1:                              \n"
+        ADDBLOCK4
+        ADDBLOCK4
+        ADDBLOCK4
+        ADDBLOCK4
+#if ORDER > 16
+        ADDBLOCK4
+        ADDBLOCK4
+        ADDBLOCK4
+        ADDBLOCK4
+#endif
+#if ORDER > 32
+        "subs    %[cnt], %[cnt], #1  \n"
+        "bne     1b                  \n"
+#endif
+        : /* outputs */
+#if ORDER > 32
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [v2] "+r"(v2)
+        : /* inputs */
+        : /* clobbers */
+        "r0", "r1", "r2", "r3", "r4",
+        "r5", "r6", "r7", "memory"
+    );
+}
+static inline void vector_sub(int32_t* v1, int32_t* v2)
+{
+#if ORDER > 32
+    int cnt = ORDER>>5;
+#endif
+#define SUBBLOCK4                        \
+        "ldmia   %[v1],  {r0-r3}     \n" \
+        "ldmia   %[v2]!, {r4-r7}     \n" \
+        "sub     r0, r0, r4          \n" \
+        "sub     r1, r1, r5          \n" \
+        "sub     r2, r2, r6          \n" \
+        "sub     r3, r3, r7          \n" \
+        "stmia   %[v1]!, {r0-r3}     \n"
+    asm volatile (
+    "1:                              \n"
+        SUBBLOCK4
+        SUBBLOCK4
+        SUBBLOCK4
+        SUBBLOCK4
+#if ORDER > 16
+        SUBBLOCK4
+        SUBBLOCK4
+        SUBBLOCK4
+        SUBBLOCK4
+#endif
+#if ORDER > 32
+        "subs    %[cnt], %[cnt], #1  \n"
+        "bne     1b                  \n"
+#endif
+        : /* outputs */
+#if ORDER > 32
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [v2] "+r"(v2)
+        : /* inputs */
+        : /* clobbers */
+        "r0", "r1", "r2", "r3", "r4",
+        "r5", "r6", "r7", "memory"
+    );
+}
+static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
+{
+    int res = 0;
+#if ORDER > 32
+    int cnt = ORDER>>5;
+#endif
+    asm volatile (
+#if ORDER > 16
+        "ldmia   %[v2]!, {r6-r7}         \n"
+    "1:                                  \n"
+        "ldmia   %[v1]!, {r0,r1,r3-r5}   \n"
+        "mla     %[res], r6, r0, %[res]  \n"
+        "mla     %[res], r7, r1, %[res]  \n"
+        "ldmia   %[v2]!, {r0-r2,r6-r8}   \n"
+        "mla     %[res], r0, r3, %[res]  \n"
+        "mla     %[res], r1, r4, %[res]  \n"
+        "mla     %[res], r2, r5, %[res]  \n"
+        "ldmia   %[v1]!, {r0-r4}         \n"
+        "mla     %[res], r6, r0, %[res]  \n"
+        "mla     %[res], r7, r1, %[res]  \n"
+        "mla     %[res], r8, r2, %[res]  \n"
+        "ldmia   %[v2]!, {r0,r1,r6-r8}   \n"
+        "mla     %[res], r0, r3, %[res]  \n"
+        "mla     %[res], r1, r4, %[res]  \n"
+        "ldmia   %[v1]!, {r0-r5}         \n"
+        "mla     %[res], r6, r0, %[res]  \n"
+        "mla     %[res], r7, r1, %[res]  \n"
+        "mla     %[res], r8, r2, %[res]  \n"
+        "ldmia   %[v2]!, {r0-r2,r6,r7}   \n"
+        "mla     %[res], r0, r3, %[res]  \n"
+        "mla     %[res], r1, r4, %[res]  \n"
+        "mla     %[res], r2, r5, %[res]  \n"
+        "ldmia   %[v1]!, {r0,r1,r3-r5}   \n"
+        "mla     %[res], r6, r0, %[res]  \n"
+        "mla     %[res], r7, r1, %[res]  \n"
+        "ldmia   %[v2]!, {r0-r2,r6-r8}   \n"
+        "mla     %[res], r0, r3, %[res]  \n"
+        "mla     %[res], r1, r4, %[res]  \n"
+        "mla     %[res], r2, r5, %[res]  \n"
+        "ldmia   %[v1]!, {r0-r4}         \n"
+        "mla     %[res], r6, r0, %[res]  \n"
+        "mla     %[res], r7, r1, %[res]  \n"
+        "mla     %[res], r8, r2, %[res]  \n"
+        "ldmia   %[v2]!, {r0,r1,r6-r8}   \n"
+        "mla     %[res], r0, r3, %[res]  \n"
+        "mla     %[res], r1, r4, %[res]  \n"
+        "ldmia   %[v1]!, {r0-r5}         \n"
+        "mla     %[res], r6, r0, %[res]  \n"
+        "mla     %[res], r7, r1, %[res]  \n"
+        "mla     %[res], r8, r2, %[res]  \n"
+#if ORDER > 32
+        "ldmia   %[v2]!, {r0-r2,r6,r7}   \n"
+#else
+        "ldmia   %[v2]!, {r0-r2}         \n"
+#endif
+        "mla     %[res], r0, r3, %[res]  \n"
+        "mla     %[res], r1, r4, %[res]  \n"
+        "mla     %[res], r2, r5, %[res]  \n"
+#if ORDER > 32
+        "subs    %[cnt], %[cnt], #1      \n"
+        "bne     1b                      \n"
+#endif
+#else /* ORDER <= 16 */
+#define MLABLOCK4                            \
+        "ldmia   %[v1]!, {r0-r3}         \n" \
+        "ldmia   %[v2]!, {r4-r7}         \n" \
+        "mla     %[res], r4, r0, %[res]  \n" \
+        "mla     %[res], r5, r1, %[res]  \n" \
+        "mla     %[res], r6, r2, %[res]  \n" \
+        "mla     %[res], r7, r3, %[res]  \n"
+        MLABLOCK4
+        MLABLOCK4
+        MLABLOCK4
+        MLABLOCK4
+#endif /* ORDER <= 16 */
+        : /* outputs */
+#if ORDER > 32
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [v2] "+r"(v2),
+        [res]"+r"(res)
+        : /* inputs */
+        : /* clobbers */
+        "r0", "r1", "r2", "r3",
+        "r4", "r5", "r6", "r7"
+#if ORDER > 16
+        ,"r8"
+#endif
+    );
+    return res;
+}

diff --git a/apps/codecs/demac/libdemac/vector_math32_armv4.h b/apps/codecs/demac/libdemac/vector_math32_armv4.h new file mode 100644 index 0000000000..b729bd3a0a --- /dev/null +++ b/apps/codecs/demac/libdemac/vector_math32_armv4.h
@@ -0,0 +1,210 @@
	1	/*
	2
	3	libdemac - A Monkey's Audio decoder
	4
	5	$Id$
	6
	7	Copyright (C) Dave Chapman 2007
	8
	9	ARMv4 vector math copyright (C) 2008 Jens Arnold
	10
	11	This program is free software; you can redistribute it and/or modify
	12	it under the terms of the GNU General Public License as published by
	13	the Free Software Foundation; either version 2 of the License, or
	14	(at your option) any later version.
	15
	16	This program is distributed in the hope that it will be useful,
	17	but WITHOUT ANY WARRANTY; without even the implied warranty of
	18	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	19	GNU General Public License for more details.
	20
	21	You should have received a copy of the GNU General Public License
	22	along with this program; if not, write to the Free Software
	23	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
	24
	25	*/
	26
	27	static inline void vector_add(int32_t* v1, int32_t* v2)
	28	{
	29	#if ORDER > 32
	30	int cnt = ORDER>>5;
	31	#endif
	32
	33	#define ADDBLOCK4 \
	34	"ldmia %[v1], {r0-r3} \n" \
	35	"ldmia %[v2]!, {r4-r7} \n" \
	36	"add r0, r0, r4 \n" \
	37	"add r1, r1, r5 \n" \
	38	"add r2, r2, r6 \n" \
	39	"add r3, r3, r7 \n" \
	40	"stmia %[v1]!, {r0-r3} \n"
	41
	42	asm volatile (
	43	"1: \n"
	44	ADDBLOCK4
	45	ADDBLOCK4
	46	ADDBLOCK4
	47	ADDBLOCK4
	48	#if ORDER > 16
	49	ADDBLOCK4
	50	ADDBLOCK4
	51	ADDBLOCK4
	52	ADDBLOCK4
	53	#endif
	54	#if ORDER > 32
	55	"subs %[cnt], %[cnt], #1 \n"
	56	"bne 1b \n"
	57	#endif
	58	: /* outputs */
	59	#if ORDER > 32
	60	[cnt]"+r"(cnt),
	61	#endif
	62	[v1] "+r"(v1),
	63	[v2] "+r"(v2)
	64	: /* inputs */
	65	: /* clobbers */
	66	"r0", "r1", "r2", "r3", "r4",
	67	"r5", "r6", "r7", "memory"
	68	);
	69	}
	70
	71	static inline void vector_sub(int32_t* v1, int32_t* v2)
	72	{
	73	#if ORDER > 32
	74	int cnt = ORDER>>5;
	75	#endif
	76
	77	#define SUBBLOCK4 \
	78	"ldmia %[v1], {r0-r3} \n" \
	79	"ldmia %[v2]!, {r4-r7} \n" \
	80	"sub r0, r0, r4 \n" \
	81	"sub r1, r1, r5 \n" \
	82	"sub r2, r2, r6 \n" \
	83	"sub r3, r3, r7 \n" \
	84	"stmia %[v1]!, {r0-r3} \n"
	85
	86	asm volatile (
	87	"1: \n"
	88	SUBBLOCK4
	89	SUBBLOCK4
	90	SUBBLOCK4
	91	SUBBLOCK4
	92	#if ORDER > 16
	93	SUBBLOCK4
	94	SUBBLOCK4
	95	SUBBLOCK4
	96	SUBBLOCK4
	97	#endif
	98	#if ORDER > 32
	99	"subs %[cnt], %[cnt], #1 \n"
	100	"bne 1b \n"
	101	#endif
	102	: /* outputs */
	103	#if ORDER > 32
	104	[cnt]"+r"(cnt),
	105	#endif
	106	[v1] "+r"(v1),
	107	[v2] "+r"(v2)
	108	: /* inputs */
	109	: /* clobbers */
	110	"r0", "r1", "r2", "r3", "r4",
	111	"r5", "r6", "r7", "memory"
	112	);
	113	}
	114
	115	static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
	116	{
	117	int res = 0;
	118	#if ORDER > 32
	119	int cnt = ORDER>>5;
	120	#endif
	121
	122	asm volatile (
	123	#if ORDER > 16
	124	"ldmia %[v2]!, {r6-r7} \n"
	125	"1: \n"
	126	"ldmia %[v1]!, {r0,r1,r3-r5} \n"
	127	"mla %[res], r6, r0, %[res] \n"
	128	"mla %[res], r7, r1, %[res] \n"
	129	"ldmia %[v2]!, {r0-r2,r6-r8} \n"
	130	"mla %[res], r0, r3, %[res] \n"
	131	"mla %[res], r1, r4, %[res] \n"
	132	"mla %[res], r2, r5, %[res] \n"
	133	"ldmia %[v1]!, {r0-r4} \n"
	134	"mla %[res], r6, r0, %[res] \n"
	135	"mla %[res], r7, r1, %[res] \n"
	136	"mla %[res], r8, r2, %[res] \n"
	137	"ldmia %[v2]!, {r0,r1,r6-r8} \n"
	138	"mla %[res], r0, r3, %[res] \n"
	139	"mla %[res], r1, r4, %[res] \n"
	140	"ldmia %[v1]!, {r0-r5} \n"
	141	"mla %[res], r6, r0, %[res] \n"
	142	"mla %[res], r7, r1, %[res] \n"
	143	"mla %[res], r8, r2, %[res] \n"
	144	"ldmia %[v2]!, {r0-r2,r6,r7} \n"
	145	"mla %[res], r0, r3, %[res] \n"
	146	"mla %[res], r1, r4, %[res] \n"
	147	"mla %[res], r2, r5, %[res] \n"
	148	"ldmia %[v1]!, {r0,r1,r3-r5} \n"
	149	"mla %[res], r6, r0, %[res] \n"
	150	"mla %[res], r7, r1, %[res] \n"
	151	"ldmia %[v2]!, {r0-r2,r6-r8} \n"
	152	"mla %[res], r0, r3, %[res] \n"
	153	"mla %[res], r1, r4, %[res] \n"
	154	"mla %[res], r2, r5, %[res] \n"
	155	"ldmia %[v1]!, {r0-r4} \n"
	156	"mla %[res], r6, r0, %[res] \n"
	157	"mla %[res], r7, r1, %[res] \n"
	158	"mla %[res], r8, r2, %[res] \n"
	159	"ldmia %[v2]!, {r0,r1,r6-r8} \n"
	160	"mla %[res], r0, r3, %[res] \n"
	161	"mla %[res], r1, r4, %[res] \n"
	162	"ldmia %[v1]!, {r0-r5} \n"
	163	"mla %[res], r6, r0, %[res] \n"
	164	"mla %[res], r7, r1, %[res] \n"
	165	"mla %[res], r8, r2, %[res] \n"
	166	#if ORDER > 32
	167	"ldmia %[v2]!, {r0-r2,r6,r7} \n"
	168	#else
	169	"ldmia %[v2]!, {r0-r2} \n"
	170	#endif
	171	"mla %[res], r0, r3, %[res] \n"
	172	"mla %[res], r1, r4, %[res] \n"
	173	"mla %[res], r2, r5, %[res] \n"
	174	#if ORDER > 32
	175	"subs %[cnt], %[cnt], #1 \n"
	176	"bne 1b \n"
	177	#endif
	178
	179	#else /* ORDER <= 16 */
	180
	181	#define MLABLOCK4 \
	182	"ldmia %[v1]!, {r0-r3} \n" \
	183	"ldmia %[v2]!, {r4-r7} \n" \
	184	"mla %[res], r4, r0, %[res] \n" \
	185	"mla %[res], r5, r1, %[res] \n" \
	186	"mla %[res], r6, r2, %[res] \n" \
	187	"mla %[res], r7, r3, %[res] \n"
	188
	189	MLABLOCK4
	190	MLABLOCK4
	191	MLABLOCK4
	192	MLABLOCK4
	193	#endif /* ORDER <= 16 */
	194	: /* outputs */
	195	#if ORDER > 32
	196	[cnt]"+r"(cnt),
	197	#endif
	198	[v1] "+r"(v1),
	199	[v2] "+r"(v2),
	200	[res]"+r"(res)
	201	: /* inputs */
	202	: /* clobbers */
	203	"r0", "r1", "r2", "r3",
	204	"r4", "r5", "r6", "r7"
	205	#if ORDER > 16
	206	,"r8"
	207	#endif
	208	);
	209	return res;
	210	}