1 files changed, 257 insertions, 0 deletions
diff --git a/apps/codecs/Tremor/asm_mcf5249.h b/apps/codecs/Tremor/asm_mcf5249.h
new file mode 100644
index 0000000000..09c74671bc
--- /dev/null
+++ b/apps/codecs/Tremor/asm_mcf5249.h
@@ -0,0 +1,257 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ *
+ * Copyright (C) 2005 by Pedro Vasconcelos
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+/* asm routines for wide math on the MCF5249 */
+#include "os_types.h"
+#if CONFIG_CPU == MCF5249 && !defined(SIMULATOR)
+#ifndef _V_WIDE_MATH
+#define _V_WIDE_MATH
+//#define MB() asm volatile ("" : : : "memory")
+#define MB()
+static inline void mcf5249_init_mac(void) {
+  int r;
+  asm volatile ("move.l #0x20, %%macsr;"  // frac, truncate, no saturation
+                "movclr.l %%acc0, %[r];"  // clear accumulators
+                "move.l %%acc0, %%acc1;"
+                "move.l %%acc0, %%acc2;"
+                "move.l %%acc0, %%acc3;"
+                : [r] "=r" (r));
+}
+static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
+  ogg_int32_t r;
+  asm volatile ("mac.l %[x], %[y], %%acc0;"  // multiply into acc
+                "movclr.l %%acc0, %[r];"     // move & clear acc
+                "asr.l #1, %[r];"            // no overflow test
+                : [r] "=d" (r)
+                : [x] "r" (x), [y] "r" (y)
+                : "cc");
+  return r;
+}
+static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
+  ogg_int32_t r;
+  asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply
+                "movclr.l %%acc0, %[r];"    // move and clear
+                : [r] "=r" (r)
+                : [x] "r" (x), [y] "r" (y)
+                : "cc");
+  return r;
+}
+static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
+  ogg_int32_t r;
+  asm volatile ("mac.l %[x], %[y], %%acc0;"  // multiply
+                "movclr.l %%acc0, %[r];"     // get higher half
+                "mulu.l %[y], %[x];"         // get lower half
+                "asl.l #8, %[r];"    // hi << 17
+                "asl.l #8, %[r];"
+                "lsr.l #8, %[x];"    // (unsigned)lo >> 15
+                "lsr.l #7, %[x];"
+                "or.l %[x], %[r];"   // or
+                : [r] "=&d" (r), [x] "+d" (x)
+                : [y] "d" (y)
+                : "cc");
+  return r;
+}
+static inline 
+void XPROD31(ogg_int32_t  a, ogg_int32_t  b,   
+             ogg_int32_t  t, ogg_int32_t  v,
+             ogg_int32_t *x, ogg_int32_t *y)
+{ 
+  ogg_int32_t r;
+  asm volatile ("mac.l %[a], %[t], %%acc0;"
+                "mac.l %[b], %[v], %%acc0;"
+                "mac.l %[b], %[t], %%acc1;"
+                "msac.l %[a], %[v], %%acc1;"
+                "movclr.l %%acc0, %[r];"
+                "move.l %[r], (%[x]);"
+                "movclr.l %%acc1, %[r];"
+                "move.l %[r], (%[y]);"
+                : [r] "=&r" (r)
+                : [x] "a" (x), [y] "a" (y),
+                  [a] "r" (a), [b] "r" (b), [t] "r" (t), [v] "r" (v)
+                : "cc", "memory");
+}
+static inline
+void XNPROD31(ogg_int32_t  a, ogg_int32_t  b,   
+              ogg_int32_t  t, ogg_int32_t  v,
+              ogg_int32_t *x, ogg_int32_t *y)
+{
+  ogg_int32_t r;
+  asm volatile ("mac.l %[a], %[t], %%acc0;"
+                "msac.l %[b], %[v], %%acc0;"
+                "mac.l %[b], %[t], %%acc1;"
+                "mac.l %[a], %[v], %%acc1;"
+                "movclr.l %%acc0, %[r];"
+                "move.l %[r], (%[x]);"
+                "movclr.l %%acc1, %[r];"
+                "move.l %[r], (%[y]);"
+                : [r] "=&r" (r)
+                : [x] "a" (x), [y] "a" (y),
+                  [a] "r" (a), [b] "r" (b), [t] "r" (t), [v] "r" (v)
+                : "cc", "memory");
+}
+/* no faster way of doing this using the MAC? */
+#define XPROD32(_a, _b, _t, _v, _x, _y)         \
+  { (_x)=MULT32(_a,_t)+MULT32(_b,_v);           \
+    (_y)=MULT32(_b,_t)-MULT32(_a,_v); }
+/* asm versions of vector multiplication for window.c */
+/* assumes MAC is initialized & accumulators cleared */
+static inline 
+void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
+{
+  asm volatile ("movem.l (%[d]), %%d0-%%d3;"  // loop start
+                "movem.l (%[w]), %%a0-%%a3;"  // pre-fetch registers
+                "lea.l (4*4, %[w]), %[w];"
+                "bra 1f;"               // jump to loop condition
+                "0:" // loop body
+                // multiply and load next window values
+                "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
+                "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
+                "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
+                "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"              
+                "movclr.l %%acc0, %%d0;"  // get the products
+                "movclr.l %%acc1, %%d1;"
+                "movclr.l %%acc2, %%d2;"
+                "movclr.l %%acc3, %%d3;"
+                // store and advance
+                "movem.l %%d0-%%d3, (%[d]);"  
+                "lea.l (4*4, %[d]), %[d];"
+                "movem.l (%[d]), %%d0-%%d3;"
+                "subq.l #4, %[n];"     // done 4 elements
+                "1: cmpi.l #4, %[n];"
+                "bge 0b;"
+                // multiply final elements
+                "tst.l %[n];"
+                "beq 1f;"      // n=0
+                "mac.l %%d0, %%a0, %%acc0;"
+                "movclr.l %%acc0, %%d0;"
+                "move.l %%d0, (%[d])+;"
+                "subq.l #1, %[n];"
+                "beq 1f;"     // n=1
+                "mac.l %%d1, %%a1, %%acc0;"
+                "movclr.l %%acc0, %%d1;"
+                "move.l %%d1, (%[d])+;"
+                "subq.l #1, %[n];"
+                "beq 1f;"     // n=2
+                // otherwise n = 3
+                "mac.l %%d2, %%a2, %%acc0;"
+                "movclr.l %%acc0, %%d2;"
+                "move.l %%d2, (%[d])+;"
+                "1:"
+                : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
+                : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
+                    "cc", "memory");
+}
+static inline 
+void mcf5249_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
+{
+  asm volatile ("lea.l (-3*4, %[w]), %[w];"     // loop start
+                "movem.l (%[d]), %%d0-%%d3;"    // pre-fetch registers
+                "movem.l (%[w]), %%a0-%%a3;"
+                "bra 1f;"               // jump to loop condition
+                "0:" // loop body
+                // multiply and load next window value
+                "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
+                "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
+                "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
+                "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"              
+                "movclr.l %%acc0, %%d0;"  // get the products
+                "movclr.l %%acc1, %%d1;"
+                "movclr.l %%acc2, %%d2;"
+                "movclr.l %%acc3, %%d3;"
+                // store and advance
+                "movem.l %%d0-%%d3, (%[d]);"  
+                "lea.l (4*4, %[d]), %[d];"
+                "movem.l (%[d]), %%d0-%%d3;"
+                "subq.l #4, %[n];"     // done 4 elements
+                "1: cmpi.l #4, %[n];"
+                "bge 0b;"
+                // multiply final elements
+                "tst.l %[n];"
+                "beq 1f;"      // n=0
+                "mac.l %%d0, %%a3, %%acc0;"
+                "movclr.l %%acc0, %%d0;"
+                "move.l %%d0, (%[d])+;"
+                "subq.l #1, %[n];"
+                "beq 1f;"     // n=1
+                "mac.l %%d1, %%a2, %%acc0;"
+                "movclr.l %%acc0, %%d1;"
+                "move.l %%d1, (%[d])+;"
+                "subq.l #1, %[n];"
+                "beq 1f;"     // n=2
+                // otherwise n = 3
+                "mac.l %%d2, %%a1, %%acc0;"
+                "movclr.l %%acc0, %%d2;"
+                "move.l %%d2, (%[d])+;"
+                "1:"
+                : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
+                : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
+                    "cc", "memory");
+}
+static inline 
+void mcf5249_vect_zero(ogg_int32_t *ptr, int n)
+{
+  asm volatile ("clr.l %%d0;"
+                "clr.l %%d1;"
+                "clr.l %%d2;"
+                "clr.l %%d3;"
+                // loop start
+                "tst.l %[n];"
+                "bra 1f;"
+                "0: movem.l %%d0-%%d3, (%[ptr]);"
+                "lea (4*4, %[ptr]), %[ptr];"
+                "subq.l #4, %[n];"
+                "1: bgt 0b;"
+                // remaing elements
+                "tst.l %[n];"
+                "beq 1f;"      // n=0
+                "clr.l (%[ptr])+;"
+                "subq.l #1, %[n];"
+                "beq 1f;"     // n=1
+                "clr.l (%[ptr])+;"
+                "subq.l #1, %[n];"
+                "beq 1f;"     // n=2
+                // otherwise n = 3
+                "clr.l (%[ptr])+;"
+                "1:"
+                : [n] "+d" (n), [ptr] "+a" (ptr)
+                :
+                : "%d0","%d1","%d2","%d3","cc","memory");
+}
+#endif
+#endif

diff --git a/apps/codecs/Tremor/asm_mcf5249.h b/apps/codecs/Tremor/asm_mcf5249.h new file mode 100644 index 0000000000..09c74671bc --- /dev/null +++ b/apps/codecs/Tremor/asm_mcf5249.h
@@ -0,0 +1,257 @@
	1	/***************************************************************************
	2	* __________ __ ___.
	3	* Open \______ \ ____ ____ \| \| _\_ \|__ _______ ___
	4	* Source \| _// _ \_/ ___\\| \|/ /\| __ \ / _ \ \/ /
	5	* Jukebox \| \| ( <_> ) \___\| < \| \_\ ( <_> > < <
	6	* Firmware \|____\|_ /\____/ \___ >__\|_ \\|___ /\____/__/\_ \
	7	* \/ \/ \/ \/ \/
	8	*
	9	* Copyright (C) 2005 by Pedro Vasconcelos
	10	*
	11	* All files in this archive are subject to the GNU General Public License.
	12	* See the file COPYING in the source tree root for full license agreement.
	13	*
	14	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
	15	* KIND, either express or implied.
	16	*
	17	****************************************************************************/
	18	/* asm routines for wide math on the MCF5249 */
	19
	20	#include "os_types.h"
	21
	22	#if CONFIG_CPU == MCF5249 && !defined(SIMULATOR)
	23
	24	#ifndef _V_WIDE_MATH
	25	#define _V_WIDE_MATH
	26
	27	//#define MB() asm volatile ("" : : : "memory")
	28	#define MB()
	29
	30	static inline void mcf5249_init_mac(void) {
	31	int r;
	32	asm volatile ("move.l #0x20, %%macsr;" // frac, truncate, no saturation
	33	"movclr.l %%acc0, %[r];" // clear accumulators
	34	"move.l %%acc0, %%acc1;"
	35	"move.l %%acc0, %%acc2;"
	36	"move.l %%acc0, %%acc3;"
	37	: [r] "=r" (r));
	38	}
	39
	40	static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
	41	ogg_int32_t r;
	42	asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply into acc
	43	"movclr.l %%acc0, %[r];" // move & clear acc
	44	"asr.l #1, %[r];" // no overflow test
	45	: [r] "=d" (r)
	46	: [x] "r" (x), [y] "r" (y)
	47	: "cc");
	48	return r;
	49	}
	50
	51	static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
	52	ogg_int32_t r;
	53	asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply
	54	"movclr.l %%acc0, %[r];" // move and clear
	55	: [r] "=r" (r)
	56	: [x] "r" (x), [y] "r" (y)
	57	: "cc");
	58	return r;
	59	}
	60
	61
	62	static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
	63	ogg_int32_t r;
	64	asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply
	65	"movclr.l %%acc0, %[r];" // get higher half
	66	"mulu.l %[y], %[x];" // get lower half
	67	"asl.l #8, %[r];" // hi << 17
	68	"asl.l #8, %[r];"
	69	"lsr.l #8, %[x];" // (unsigned)lo >> 15
	70	"lsr.l #7, %[x];"
	71	"or.l %[x], %[r];" // or
	72	: [r] "=&d" (r), [x] "+d" (x)
	73	: [y] "d" (y)
	74	: "cc");
	75	return r;
	76	}
	77
	78
	79	static inline
	80	void XPROD31(ogg_int32_t a, ogg_int32_t b,
	81	ogg_int32_t t, ogg_int32_t v,
	82	ogg_int32_t x, ogg_int32_t y)
	83	{
	84	ogg_int32_t r;
	85	asm volatile ("mac.l %[a], %[t], %%acc0;"
	86	"mac.l %[b], %[v], %%acc0;"
	87	"mac.l %[b], %[t], %%acc1;"
	88	"msac.l %[a], %[v], %%acc1;"
	89	"movclr.l %%acc0, %[r];"
	90	"move.l %[r], (%[x]);"
	91	"movclr.l %%acc1, %[r];"
	92	"move.l %[r], (%[y]);"
	93	: [r] "=&r" (r)
	94	: [x] "a" (x), [y] "a" (y),
	95	[a] "r" (a), [b] "r" (b), [t] "r" (t), [v] "r" (v)
	96	: "cc", "memory");
	97	}
	98
	99
	100	static inline
	101	void XNPROD31(ogg_int32_t a, ogg_int32_t b,
	102	ogg_int32_t t, ogg_int32_t v,
	103	ogg_int32_t x, ogg_int32_t y)
	104	{
	105	ogg_int32_t r;
	106	asm volatile ("mac.l %[a], %[t], %%acc0;"
	107	"msac.l %[b], %[v], %%acc0;"
	108	"mac.l %[b], %[t], %%acc1;"
	109	"mac.l %[a], %[v], %%acc1;"
	110	"movclr.l %%acc0, %[r];"
	111	"move.l %[r], (%[x]);"
	112	"movclr.l %%acc1, %[r];"
	113	"move.l %[r], (%[y]);"
	114	: [r] "=&r" (r)
	115	: [x] "a" (x), [y] "a" (y),
	116	[a] "r" (a), [b] "r" (b), [t] "r" (t), [v] "r" (v)
	117	: "cc", "memory");
	118	}
	119
	120
	121	/* no faster way of doing this using the MAC? */
	122	#define XPROD32(_a, _b, _t, _v, _x, _y) \
	123	{ (_x)=MULT32(_a,_t)+MULT32(_b,_v); \
	124	(_y)=MULT32(_b,_t)-MULT32(_a,_v); }
	125
	126
	127	/* asm versions of vector multiplication for window.c */
	128	/* assumes MAC is initialized & accumulators cleared */
	129	static inline
	130	void mcf5249_vect_mult_fw(ogg_int32_t data, LOOKUP_T window, int n)
	131	{
	132	asm volatile ("movem.l (%[d]), %%d0-%%d3;" // loop start
	133	"movem.l (%[w]), %%a0-%%a3;" // pre-fetch registers
	134	"lea.l (4*4, %[w]), %[w];"
	135	"bra 1f;" // jump to loop condition
	136	"0:" // loop body
	137	// multiply and load next window values
	138	"mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
	139	"mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
	140	"mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
	141	"mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
	142	"movclr.l %%acc0, %%d0;" // get the products
	143	"movclr.l %%acc1, %%d1;"
	144	"movclr.l %%acc2, %%d2;"
	145	"movclr.l %%acc3, %%d3;"
	146	// store and advance
	147	"movem.l %%d0-%%d3, (%[d]);"
	148	"lea.l (4*4, %[d]), %[d];"
	149	"movem.l (%[d]), %%d0-%%d3;"
	150	"subq.l #4, %[n];" // done 4 elements
	151	"1: cmpi.l #4, %[n];"
	152	"bge 0b;"
	153	// multiply final elements
	154	"tst.l %[n];"
	155	"beq 1f;" // n=0
	156	"mac.l %%d0, %%a0, %%acc0;"
	157	"movclr.l %%acc0, %%d0;"
	158	"move.l %%d0, (%[d])+;"
	159	"subq.l #1, %[n];"
	160	"beq 1f;" // n=1
	161	"mac.l %%d1, %%a1, %%acc0;"
	162	"movclr.l %%acc0, %%d1;"
	163	"move.l %%d1, (%[d])+;"
	164	"subq.l #1, %[n];"
	165	"beq 1f;" // n=2
	166	// otherwise n = 3
	167	"mac.l %%d2, %%a2, %%acc0;"
	168	"movclr.l %%acc0, %%d2;"
	169	"move.l %%d2, (%[d])+;"
	170	"1:"
	171	: [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
	172	: : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
	173	"cc", "memory");
	174	}
	175
	176	static inline
	177	void mcf5249_vect_mult_bw(ogg_int32_t data, LOOKUP_T window, int n)
	178	{
	179	asm volatile ("lea.l (-3*4, %[w]), %[w];" // loop start
	180	"movem.l (%[d]), %%d0-%%d3;" // pre-fetch registers
	181	"movem.l (%[w]), %%a0-%%a3;"
	182	"bra 1f;" // jump to loop condition
	183	"0:" // loop body
	184	// multiply and load next window value
	185	"mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
	186	"mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
	187	"mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
	188	"mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
	189	"movclr.l %%acc0, %%d0;" // get the products
	190	"movclr.l %%acc1, %%d1;"
	191	"movclr.l %%acc2, %%d2;"
	192	"movclr.l %%acc3, %%d3;"
	193	// store and advance
	194	"movem.l %%d0-%%d3, (%[d]);"
	195	"lea.l (4*4, %[d]), %[d];"
	196	"movem.l (%[d]), %%d0-%%d3;"
	197	"subq.l #4, %[n];" // done 4 elements
	198	"1: cmpi.l #4, %[n];"
	199	"bge 0b;"
	200	// multiply final elements
	201	"tst.l %[n];"
	202	"beq 1f;" // n=0
	203	"mac.l %%d0, %%a3, %%acc0;"
	204	"movclr.l %%acc0, %%d0;"
	205	"move.l %%d0, (%[d])+;"
	206	"subq.l #1, %[n];"
	207	"beq 1f;" // n=1
	208	"mac.l %%d1, %%a2, %%acc0;"
	209	"movclr.l %%acc0, %%d1;"
	210	"move.l %%d1, (%[d])+;"
	211	"subq.l #1, %[n];"
	212	"beq 1f;" // n=2
	213	// otherwise n = 3
	214	"mac.l %%d2, %%a1, %%acc0;"
	215	"movclr.l %%acc0, %%d2;"
	216	"move.l %%d2, (%[d])+;"
	217	"1:"
	218	: [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
	219	: : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
	220	"cc", "memory");
	221	}
	222
	223
	224	static inline
	225	void mcf5249_vect_zero(ogg_int32_t *ptr, int n)
	226	{
	227	asm volatile ("clr.l %%d0;"
	228	"clr.l %%d1;"
	229	"clr.l %%d2;"
	230	"clr.l %%d3;"
	231	// loop start
	232	"tst.l %[n];"
	233	"bra 1f;"
	234	"0: movem.l %%d0-%%d3, (%[ptr]);"
	235	"lea (4*4, %[ptr]), %[ptr];"
	236	"subq.l #4, %[n];"
	237	"1: bgt 0b;"
	238	// remaing elements
	239	"tst.l %[n];"
	240	"beq 1f;" // n=0
	241	"clr.l (%[ptr])+;"
	242	"subq.l #1, %[n];"
	243	"beq 1f;" // n=1
	244	"clr.l (%[ptr])+;"
	245	"subq.l #1, %[n];"
	246	"beq 1f;" // n=2
	247	// otherwise n = 3
	248	"clr.l (%[ptr])+;"
	249	"1:"
	250	: [n] "+d" (n), [ptr] "+a" (ptr)
	251	:
	252	: "%d0","%d1","%d2","%d3","cc","memory");
	253	}
	254
	255
	256	#endif
	257	#endif