1 files changed, 0 insertions, 327 deletions
diff --git a/apps/codecs/libwma/asm_mcf5249.h b/apps/codecs/libwma/asm_mcf5249.h
deleted file mode 100644
index 6b5c600685..0000000000
--- a/apps/codecs/libwma/asm_mcf5249.h
+++ /dev/null
@@ -1,327 +0,0 @@
-/***************************************************************************
- *             __________               __   ___.
- *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
- *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
- *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
- *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
- *                     \/            \/     \/    \/            \/
- *
- * Copyright (C) 2005 by Pedro Vasconcelos
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- ****************************************************************************/
-/* asm routines for wide math on the MCF5249 */
-//#include "os_types.h"
-#if defined(CPU_COLDFIRE)
-/* attribute for 16-byte alignment */
-#define LINE_ATTR   __attribute__ ((aligned (16)))
-#ifndef _V_WIDE_MATH
-#define _V_WIDE_MATH
-#define MB()
-static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
-  asm volatile ("mac.l %[x], %[y], %%acc0;"    /* multiply & shift  */
-                "movclr.l %%acc0, %[x];"       /* move & clear acc */
-                "asr.l #1, %[x];"              /* no overflow test */
-                : [x] "+&d" (x)
-                : [y] "r" (y)
-                : "cc");
-  return x;
-}
-static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
-  asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
-                "movclr.l %%acc0, %[x];"    /* move and clear */
-                : [x] "+&r" (x)
-                : [y] "r" (y)
-                : "cc");
-  return x;
-}
-static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
-  ogg_int32_t r;
-  asm volatile ("mac.l %[x], %[y], %%acc0;"  /* multiply */
-                "mulu.l %[y], %[x];"         /* get lower half, avoid emac stall */
-                "movclr.l %%acc0, %[r];"     /* get higher half */
-                "asl.l #8, %[r];"            /* hi<<16, plus one free */
-                "asl.l #8, %[r];"
-                "lsr.l #8, %[x];"            /* (unsigned)lo >> 15 */
-                "lsr.l #7, %[x];"
-                "or.l %[x], %[r];"           /* logical-or results */
-                : [r] "=&d" (r), [x] "+d" (x)
-                : [y] "d" (y)
-                : "cc");
-  return r;
-}
-static inline
-void XPROD31(ogg_int32_t  a, ogg_int32_t  b,
-             ogg_int32_t  t, ogg_int32_t  v,
-             ogg_int32_t *x, ogg_int32_t *y)
-{
-  asm volatile ("mac.l %[a], %[t], %%acc0;"
-                "mac.l %[b], %[v], %%acc0;"
-                "mac.l %[b], %[t], %%acc1;"
-                "msac.l %[a], %[v], %%acc1;"
-                "movclr.l %%acc0, %[a];"
-                "move.l %[a], (%[x]);"
-                "movclr.l %%acc1, %[a];"
-                "move.l %[a], (%[y]);"
-                : [a] "+&r" (a)
-                : [x] "a" (x), [y] "a" (y),
-                  [b] "r" (b), [t] "r" (t), [v] "r" (v)
-                : "cc", "memory");
-}
-static inline
-void XNPROD31(ogg_int32_t  a, ogg_int32_t  b,
-              ogg_int32_t  t, ogg_int32_t  v,
-              ogg_int32_t *x, ogg_int32_t *y)
-{
-  asm volatile ("mac.l %[a], %[t], %%acc0;"
-                "msac.l %[b], %[v], %%acc0;"
-                "mac.l %[b], %[t], %%acc1;"
-                "mac.l %[a], %[v], %%acc1;"
-                "movclr.l %%acc0, %[a];"
-                "move.l %[a], (%[x]);"
-                "movclr.l %%acc1, %[a];"
-                "move.l %[a], (%[y]);"
-                : [a] "+&r" (a)
-                : [x] "a" (x), [y] "a" (y),
-                  [b] "r" (b), [t] "r" (t), [v] "r" (v)
-                : "cc", "memory");
-}
-#if 0    /* canonical Tremor definition */
-#define XPROD32(_a, _b, _t, _v, _x, _y)         \
-  { (_x)=MULT32(_a,_t)+MULT32(_b,_v);           \
-    (_y)=MULT32(_b,_t)-MULT32(_a,_v); }
-#endif
-/* this could lose the LSB by overflow, but i don't think it'll ever happen.
-   if anyone think they can hear a bug caused by this, please try the above
-   version. */
-#define XPROD32(_a, _b, _t, _v, _x, _y)     \
-  asm volatile ("mac.l %[a], %[t], %%acc0;" \
-                "mac.l %[b], %[v], %%acc0;" \
-                "mac.l %[b], %[t], %%acc1;" \
-                "msac.l %[a], %[v], %%acc1;" \
-                "movclr.l %%acc0, %[x];" \
-                "asr.l #1, %[x];" \
-                "movclr.l %%acc1, %[y];" \
-                "asr.l #1, %[y];" \
-                : [x] "=&d" (_x), [y] "=&d" (_y) \
-                : [a] "r" (_a), [b] "r" (_b), \
-                  [t] "r" (_t), [v] "r" (_v) \
-                : "cc");
-#ifndef _V_VECT_OPS
-#define _V_VECT_OPS
-/* asm versions of vector operations for block.c, window.c */
-/* assumes MAC is initialized & accumulators cleared */
-static inline
-void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
-{
-  /* align to 16 bytes */
-  while(n>0 && (int)x&16) {
-    *x++ += *y++;
-    n--;
-  }
-  asm volatile ("bra 1f;"
-                "0:"                          /* loop start */
-                "movem.l (%[x]), %%d0-%%d3;"  /* fetch values */
-                "movem.l (%[y]), %%a0-%%a3;"
-                /* add */
-                "add.l %%a0, %%d0;"
-                "add.l %%a1, %%d1;"
-                "add.l %%a2, %%d2;"
-                "add.l %%a3, %%d3;"
-                /* store and advance */
-                "movem.l %%d0-%%d3, (%[x]);"
-                "lea.l (4*4, %[x]), %[x];"
-                "lea.l (4*4, %[y]), %[y];"
-                "subq.l #4, %[n];"     /* done 4 elements */
-                "1: cmpi.l #4, %[n];"
-                "bge 0b;"
-                : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
-                : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
-                    "cc", "memory");
-  /* add final elements */
-  while (n>0) {
-    *x++ += *y++;
-    n--;
-  }
-}
-static inline
-void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n)
-{
-  /* align to 16 bytes */
-  while(n>0 && (int)x&16) {
-    *x++ = *y++;
-    n--;
-  }
-  asm volatile ("bra 1f;"
-                "0:"                                    /* loop start */
-                "movem.l (%[y]), %%d0-%%d3;"            /* fetch values */
-                "movem.l %%d0-%%d3, (%[x]);"            /* store */
-                "lea.l (4*4, %[x]), %[x];"              /* advance */
-                "lea.l (4*4, %[y]), %[y];"
-                "subq.l #4, %[n];"                      /* done 4 elements */
-                "1: cmpi.l #4, %[n];"
-                "bge 0b;"
-                : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
-                : : "%d0", "%d1", "%d2", "%d3", "cc", "memory");
-  /* copy final elements */
-  while (n>0) {
-    *x++ = *y++;
-    n--;
-  }
-}
-static inline
-void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
-{
-  /* ensure data is aligned to 16-bytes */
-  while(n>0 && (int)data%16) {
-    *data = MULT31(*data, *window);
-    data++;
-    window++;
-    n--;
-  }
-  asm volatile ("movem.l (%[d]), %%d0-%%d3;"  /* loop start */
-                "movem.l (%[w]), %%a0-%%a3;"  /* pre-fetch registers */
-                "lea.l (4*4, %[w]), %[w];"
-                "bra 1f;"               /* jump to loop condition */
-                "0:" /* loop body */
-                /* multiply and load next window values */
-                "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
-                "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
-                "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
-                "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
-                "movclr.l %%acc0, %%d0;"  /* get the products */
-                "movclr.l %%acc1, %%d1;"
-                "movclr.l %%acc2, %%d2;"
-                "movclr.l %%acc3, %%d3;"
-                /* store and advance */
-                "movem.l %%d0-%%d3, (%[d]);"
-                "lea.l (4*4, %[d]), %[d];"
-                "movem.l (%[d]), %%d0-%%d3;"
-                "subq.l #4, %[n];"     /* done 4 elements */
-                "1: cmpi.l #4, %[n];"
-                "bge 0b;"
-                /* multiply final elements */
-                "tst.l %[n];"
-                "beq 1f;"      /* n=0 */
-                "mac.l %%d0, %%a0, %%acc0;"
-                "movclr.l %%acc0, %%d0;"
-                "move.l %%d0, (%[d])+;"
-                "subq.l #1, %[n];"
-                "beq 1f;"     /* n=1 */
-                "mac.l %%d1, %%a1, %%acc0;"
-                "movclr.l %%acc0, %%d1;"
-                "move.l %%d1, (%[d])+;"
-                "subq.l #1, %[n];"
-                "beq 1f;"     /* n=2 */
-                /* otherwise n = 3 */
-                "mac.l %%d2, %%a2, %%acc0;"
-                "movclr.l %%acc0, %%d2;"
-                "move.l %%d2, (%[d])+;"
-                "1:"
-                : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
-                : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
-                    "cc", "memory");
-}
-static inline
-void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
-{
-  /* ensure at least data is aligned to 16-bytes */
-  while(n>0 && (int)data%16) {
-    *data = MULT31(*data, *window);
-    data++;
-    window--;
-    n--;
-  }
-  asm volatile ("lea.l (-3*4, %[w]), %[w];"     /* loop start */
-                "movem.l (%[d]), %%d0-%%d3;"    /* pre-fetch registers */
-                "movem.l (%[w]), %%a0-%%a3;"
-                "bra 1f;"               /* jump to loop condition */
-                "0:" /* loop body */
-                /* multiply and load next window value */
-                "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
-                "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
-                "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
-                "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
-                "movclr.l %%acc0, %%d0;"  /* get the products */
-                "movclr.l %%acc1, %%d1;"
-                "movclr.l %%acc2, %%d2;"
-                "movclr.l %%acc3, %%d3;"
-                /* store and advance */
-                "movem.l %%d0-%%d3, (%[d]);"
-                "lea.l (4*4, %[d]), %[d];"
-                "movem.l (%[d]), %%d0-%%d3;"
-                "subq.l #4, %[n];"     /* done 4 elements */
-                "1: cmpi.l #4, %[n];"
-                "bge 0b;"
-                /* multiply final elements */
-                "tst.l %[n];"
-                "beq 1f;"      /* n=0 */
-                "mac.l %%d0, %%a3, %%acc0;"
-                "movclr.l %%acc0, %%d0;"
-                "move.l %%d0, (%[d])+;"
-                "subq.l #1, %[n];"
-                "beq 1f;"     /* n=1 */
-                "mac.l %%d1, %%a2, %%acc0;"
-                "movclr.l %%acc0, %%d1;"
-                "move.l %%d1, (%[d])+;"
-                "subq.l #1, %[n];"
-                "beq 1f;"     /* n=2 */
-                /* otherwise n = 3 */
-                "mac.l %%d2, %%a1, %%acc0;"
-                "movclr.l %%acc0, %%d2;"
-                "move.l %%d2, (%[d])+;"
-                "1:"
-                : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
-                : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
-                    "cc", "memory");
-}
-#endif
-#endif
-#ifndef _V_CLIP_MATH
-#define _V_CLIP_MATH
-/* this is portable C and simple; why not use this as default? */
-static inline ogg_int32_t CLIP_TO_15(register ogg_int32_t x) {
-  register ogg_int32_t hi=32767, lo=-32768;
-  return (x>=hi ? hi : (x<=lo ? lo : x));
-}
-#endif
-#else
-#define LINE_ATTR
-#endif

diff --git a/apps/codecs/libwma/asm_mcf5249.h b/apps/codecs/libwma/asm_mcf5249.h deleted file mode 100644 index 6b5c600685..0000000000 --- a/apps/codecs/libwma/asm_mcf5249.h +++ /dev/null
@@ -1,327 +0,0 @@
1	/***************************************************************************
2	* __________ __ ___.
3	* Open \______ \ ____ ____ \| \| _\_ \|__ _______ ___
4	* Source \| _// _ \_/ ___\\| \|/ /\| __ \ / _ \ \/ /
5	* Jukebox \| \| ( <_> ) \___\| < \| \_\ ( <_> > < <
6	* Firmware \|____\|_ /\____/ \___ >__\|_ \\|___ /\____/__/\_ \
7	* \/ \/ \/ \/ \/
8	*
9	* Copyright (C) 2005 by Pedro Vasconcelos
10	*
11	* This program is free software; you can redistribute it and/or
12	* modify it under the terms of the GNU General Public License
13	* as published by the Free Software Foundation; either version 2
14	* of the License, or (at your option) any later version.
15	*
16	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
17	* KIND, either express or implied.
18	*
19	****************************************************************************/
20	/* asm routines for wide math on the MCF5249 */
21
22	//#include "os_types.h"
23
24	#if defined(CPU_COLDFIRE)
25
26	/* attribute for 16-byte alignment */
27	#define LINE_ATTR __attribute__ ((aligned (16)))
28
29	#ifndef _V_WIDE_MATH
30	#define _V_WIDE_MATH
31
32	#define MB()
33
34	static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
35
36	asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply & shift */
37	"movclr.l %%acc0, %[x];" /* move & clear acc */
38	"asr.l #1, %[x];" /* no overflow test */
39	: [x] "+&d" (x)
40	: [y] "r" (y)
41	: "cc");
42	return x;
43	}
44
45	static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
46
47	asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
48	"movclr.l %%acc0, %[x];" /* move and clear */
49	: [x] "+&r" (x)
50	: [y] "r" (y)
51	: "cc");
52	return x;
53	}
54
55
56	static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
57	ogg_int32_t r;
58
59	asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
60	"mulu.l %[y], %[x];" /* get lower half, avoid emac stall */
61	"movclr.l %%acc0, %[r];" /* get higher half */
62	"asl.l #8, %[r];" /* hi<<16, plus one free */
63	"asl.l #8, %[r];"
64	"lsr.l #8, %[x];" /* (unsigned)lo >> 15 */
65	"lsr.l #7, %[x];"
66	"or.l %[x], %[r];" /* logical-or results */
67	: [r] "=&d" (r), [x] "+d" (x)
68	: [y] "d" (y)
69	: "cc");
70	return r;
71	}
72
73
74	static inline
75	void XPROD31(ogg_int32_t a, ogg_int32_t b,
76	ogg_int32_t t, ogg_int32_t v,
77	ogg_int32_t x, ogg_int32_t y)
78	{
79	asm volatile ("mac.l %[a], %[t], %%acc0;"
80	"mac.l %[b], %[v], %%acc0;"
81	"mac.l %[b], %[t], %%acc1;"
82	"msac.l %[a], %[v], %%acc1;"
83	"movclr.l %%acc0, %[a];"
84	"move.l %[a], (%[x]);"
85	"movclr.l %%acc1, %[a];"
86	"move.l %[a], (%[y]);"
87	: [a] "+&r" (a)
88	: [x] "a" (x), [y] "a" (y),
89	[b] "r" (b), [t] "r" (t), [v] "r" (v)
90	: "cc", "memory");
91	}
92
93
94	static inline
95	void XNPROD31(ogg_int32_t a, ogg_int32_t b,
96	ogg_int32_t t, ogg_int32_t v,
97	ogg_int32_t x, ogg_int32_t y)
98	{
99	asm volatile ("mac.l %[a], %[t], %%acc0;"
100	"msac.l %[b], %[v], %%acc0;"
101	"mac.l %[b], %[t], %%acc1;"
102	"mac.l %[a], %[v], %%acc1;"
103	"movclr.l %%acc0, %[a];"
104	"move.l %[a], (%[x]);"
105	"movclr.l %%acc1, %[a];"
106	"move.l %[a], (%[y]);"
107	: [a] "+&r" (a)
108	: [x] "a" (x), [y] "a" (y),
109	[b] "r" (b), [t] "r" (t), [v] "r" (v)
110	: "cc", "memory");
111	}
112
113
114	#if 0 /* canonical Tremor definition */
115	#define XPROD32(_a, _b, _t, _v, _x, _y) \
116	{ (_x)=MULT32(_a,_t)+MULT32(_b,_v); \
117	(_y)=MULT32(_b,_t)-MULT32(_a,_v); }
118	#endif
119
120	/* this could lose the LSB by overflow, but i don't think it'll ever happen.
121	if anyone think they can hear a bug caused by this, please try the above
122	version. */
123	#define XPROD32(_a, _b, _t, _v, _x, _y) \
124	asm volatile ("mac.l %[a], %[t], %%acc0;" \
125	"mac.l %[b], %[v], %%acc0;" \
126	"mac.l %[b], %[t], %%acc1;" \
127	"msac.l %[a], %[v], %%acc1;" \
128	"movclr.l %%acc0, %[x];" \
129	"asr.l #1, %[x];" \
130	"movclr.l %%acc1, %[y];" \
131	"asr.l #1, %[y];" \
132	: [x] "=&d" (_x), [y] "=&d" (_y) \
133	: [a] "r" (_a), [b] "r" (_b), \
134	[t] "r" (_t), [v] "r" (_v) \
135	: "cc");
136
137	#ifndef _V_VECT_OPS
138	#define _V_VECT_OPS
139
140	/* asm versions of vector operations for block.c, window.c */
141	/* assumes MAC is initialized & accumulators cleared */
142	static inline
143	void vect_add(ogg_int32_t x, ogg_int32_t y, int n)
144	{
145	/* align to 16 bytes */
146	while(n>0 && (int)x&16) {
147	x++ += y++;
148	n--;
149	}
150	asm volatile ("bra 1f;"
151	"0:" /* loop start */
152	"movem.l (%[x]), %%d0-%%d3;" /* fetch values */
153	"movem.l (%[y]), %%a0-%%a3;"
154	/* add */
155	"add.l %%a0, %%d0;"
156	"add.l %%a1, %%d1;"
157	"add.l %%a2, %%d2;"
158	"add.l %%a3, %%d3;"
159	/* store and advance */
160	"movem.l %%d0-%%d3, (%[x]);"
161	"lea.l (4*4, %[x]), %[x];"
162	"lea.l (4*4, %[y]), %[y];"
163	"subq.l #4, %[n];" /* done 4 elements */
164	"1: cmpi.l #4, %[n];"
165	"bge 0b;"
166	: [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
167	: : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
168	"cc", "memory");
169	/* add final elements */
170	while (n>0) {
171	x++ += y++;
172	n--;
173	}
174	}
175
176	static inline
177	void vect_copy(ogg_int32_t x, ogg_int32_t y, int n)
178	{
179	/* align to 16 bytes */
180	while(n>0 && (int)x&16) {
181	x++ = y++;
182	n--;
183	}
184	asm volatile ("bra 1f;"
185	"0:" /* loop start */
186	"movem.l (%[y]), %%d0-%%d3;" /* fetch values */
187	"movem.l %%d0-%%d3, (%[x]);" /* store */
188	"lea.l (44, %[x]), %[x];" / advance */
189	"lea.l (4*4, %[y]), %[y];"
190	"subq.l #4, %[n];" /* done 4 elements */
191	"1: cmpi.l #4, %[n];"
192	"bge 0b;"
193	: [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
194	: : "%d0", "%d1", "%d2", "%d3", "cc", "memory");
195	/* copy final elements */
196	while (n>0) {
197	x++ = y++;
198	n--;
199	}
200	}
201
202
203	static inline
204	void vect_mult_fw(ogg_int32_t data, LOOKUP_T window, int n)
205	{
206	/* ensure data is aligned to 16-bytes */
207	while(n>0 && (int)data%16) {
208	data = MULT31(data, *window);
209	data++;
210	window++;
211	n--;
212	}
213	asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */
214	"movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */
215	"lea.l (4*4, %[w]), %[w];"
216	"bra 1f;" /* jump to loop condition */
217	"0:" /* loop body */
218	/* multiply and load next window values */
219	"mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
220	"mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
221	"mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
222	"mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
223	"movclr.l %%acc0, %%d0;" /* get the products */
224	"movclr.l %%acc1, %%d1;"
225	"movclr.l %%acc2, %%d2;"
226	"movclr.l %%acc3, %%d3;"
227	/* store and advance */
228	"movem.l %%d0-%%d3, (%[d]);"
229	"lea.l (4*4, %[d]), %[d];"
230	"movem.l (%[d]), %%d0-%%d3;"
231	"subq.l #4, %[n];" /* done 4 elements */
232	"1: cmpi.l #4, %[n];"
233	"bge 0b;"
234	/* multiply final elements */
235	"tst.l %[n];"
236	"beq 1f;" /* n=0 */
237	"mac.l %%d0, %%a0, %%acc0;"
238	"movclr.l %%acc0, %%d0;"
239	"move.l %%d0, (%[d])+;"
240	"subq.l #1, %[n];"
241	"beq 1f;" /* n=1 */
242	"mac.l %%d1, %%a1, %%acc0;"
243	"movclr.l %%acc0, %%d1;"
244	"move.l %%d1, (%[d])+;"
245	"subq.l #1, %[n];"
246	"beq 1f;" /* n=2 */
247	/* otherwise n = 3 */
248	"mac.l %%d2, %%a2, %%acc0;"
249	"movclr.l %%acc0, %%d2;"
250	"move.l %%d2, (%[d])+;"
251	"1:"
252	: [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
253	: : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
254	"cc", "memory");
255	}
256
257	static inline
258	void vect_mult_bw(ogg_int32_t data, LOOKUP_T window, int n)
259	{
260	/* ensure at least data is aligned to 16-bytes */
261	while(n>0 && (int)data%16) {
262	data = MULT31(data, *window);
263	data++;
264	window--;
265	n--;
266	}
267	asm volatile ("lea.l (-34, %[w]), %[w];" / loop start */
268	"movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */
269	"movem.l (%[w]), %%a0-%%a3;"
270	"bra 1f;" /* jump to loop condition */
271	"0:" /* loop body */
272	/* multiply and load next window value */
273	"mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
274	"mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
275	"mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
276	"mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
277	"movclr.l %%acc0, %%d0;" /* get the products */
278	"movclr.l %%acc1, %%d1;"
279	"movclr.l %%acc2, %%d2;"
280	"movclr.l %%acc3, %%d3;"
281	/* store and advance */
282	"movem.l %%d0-%%d3, (%[d]);"
283	"lea.l (4*4, %[d]), %[d];"
284	"movem.l (%[d]), %%d0-%%d3;"
285	"subq.l #4, %[n];" /* done 4 elements */
286	"1: cmpi.l #4, %[n];"
287	"bge 0b;"
288	/* multiply final elements */
289	"tst.l %[n];"
290	"beq 1f;" /* n=0 */
291	"mac.l %%d0, %%a3, %%acc0;"
292	"movclr.l %%acc0, %%d0;"
293	"move.l %%d0, (%[d])+;"
294	"subq.l #1, %[n];"
295	"beq 1f;" /* n=1 */
296	"mac.l %%d1, %%a2, %%acc0;"
297	"movclr.l %%acc0, %%d1;"
298	"move.l %%d1, (%[d])+;"
299	"subq.l #1, %[n];"
300	"beq 1f;" /* n=2 */
301	/* otherwise n = 3 */
302	"mac.l %%d2, %%a1, %%acc0;"
303	"movclr.l %%acc0, %%d2;"
304	"move.l %%d2, (%[d])+;"
305	"1:"
306	: [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
307	: : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
308	"cc", "memory");
309	}
310
311	#endif
312
313	#endif
314
315	#ifndef _V_CLIP_MATH
316	#define _V_CLIP_MATH
317
318	/* this is portable C and simple; why not use this as default? */
319	static inline ogg_int32_t CLIP_TO_15(register ogg_int32_t x) {
320	register ogg_int32_t hi=32767, lo=-32768;
321	return (x>=hi ? hi : (x<=lo ? lo : x));
322	}
323
324	#endif
325	#else
326	#define LINE_ATTR
327	#endif