1 files changed, 250 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libwmapro/wmapro_math.h b/lib/rbcodec/codecs/libwmapro/wmapro_math.h
new file mode 100644
index 0000000000..c78d6b627f
--- /dev/null
+++ b/lib/rbcodec/codecs/libwmapro/wmapro_math.h
@@ -0,0 +1,250 @@
+#ifndef _WMAPRO_MATH_H_
+#define _WMAPRO_MATH_H_
+#include <inttypes.h>
+/* rockbox: not used
+#define fixtof16(x)       (float)((float)(x) / (float)(1 << 16))
+#define fixtof31(x)       (float)((float)(x) / (float)(1 << 31))
+#define ftofix16(x)       ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5:0.5)))
+#define ftofix31(x)       ((int32_t)((x) * (float)(1 << 31) + ((x) < 0 ? -0.5:0.5)))
+*/
+#if defined(CPU_ARM)
+    /* Calculates: result = (X*Y)>>Z */
+    #define fixmulshift(X,Y,Z) \
+    ({ \
+        int32_t lo; \
+        int32_t hi; \
+        asm volatile ( \
+            "smull %[lo], %[hi], %[x], %[y] \n\t"   /* multiply */ \
+            "mov   %[lo], %[lo], lsr %[shr] \n\t"   /* lo >>= Z */ \
+            "orr   %[lo], %[lo], %[hi], lsl %[shl]" /* lo |= (hi << (32-Z)) */ \
+            : [lo]"=&r"(lo), [hi]"=&r"(hi) \
+            : [x]"r"(X), [y]"r"(Y), [shr]"r"(Z), [shl]"r"(32-Z)); \
+        lo; \
+    })
+     
+    /* Calculates: result = (X*Y)>>16 */
+    #define fixmul16(X,Y) \
+     ({ \
+        int32_t lo; \
+        int32_t hi; \
+        asm volatile ( \
+           "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
+           "mov   %[lo], %[lo], lsr #16    \n\t" /* lo >>= 16 */ \
+           "orr   %[lo], %[lo], %[hi], lsl #16"  /* lo |= (hi << 16) */ \
+           : [lo]"=&r"(lo), [hi]"=&r"(hi) \
+           : [x]"r"(X), [y]"r"(Y)); \
+        lo; \
+     })
+     
+    /* Calculates: result = (X*Y)>>24 */
+    #define fixmul24(X,Y) \
+     ({ \
+        int32_t lo; \
+        int32_t hi; \
+        asm volatile ( \
+           "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
+           "mov   %[lo], %[lo], lsr #24    \n\t" /* lo >>= 24 */ \
+           "orr   %[lo], %[lo], %[hi], lsl #8"   /* lo |= (hi << 8) */ \
+           : [lo]"=&r"(lo), [hi]"=&r"(hi) \
+           : [x]"r"(X), [y]"r"(Y)); \
+        lo; \
+     })
+     
+    /* Calculates: result = (X*Y)>>31, loose 1 bit precision */
+    #define fixmul31(X,Y) \
+     ({ \
+        int32_t lo; \
+        int32_t hi; \
+        asm volatile ( \
+           "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
+           "mov   %[hi], %[hi], lsl #1"          /* hi <<= 1 */ \
+           : [lo]"=&r"(lo), [hi]"=&r"(hi) \
+           : [x]"r"(X), [y]"r"(Y)); \
+        hi; \
+     })
+#elif defined(CPU_COLDFIRE)
+    /* Calculates: result = (X*Y)>>Z */
+    #define fixmulshift(X,Y,Z) \
+    ({ \
+        int32_t t1; \
+        int32_t t2; \
+        asm volatile ( \
+            "mac.l   %[x],%[y],%%acc0\n\t" /* multiply */ \
+            "mulu.l  %[y],%[x]       \n\t" /* get lower half, avoid emac stall */ \
+            "movclr.l %%acc0,%[t1]   \n\t" /* get higher half */ \
+            "moveq.l #31,%[t2]       \n\t" \
+            "sub.l   %[sh],%[t2]     \n\t" /* t2 = 31 - shift */ \
+            "ble.s   1f              \n\t" \
+            "asl.l   %[t2],%[t1]     \n\t" /* hi <<= 31 - shift */ \
+            "lsr.l   %[sh],%[x]      \n\t" /* (unsigned)lo >>= shift */ \
+            "or.l    %[x],%[t1]      \n\t" /* combine result */ \
+            "bra.s   2f              \n\t" \
+         "1:                         \n\t" \
+            "neg.l   %[t2]           \n\t" /* t2 = shift - 31 */ \
+            "asr.l   %[t2],%[t1]     \n\t" /* hi >>= t2 */ \
+         "2:                         \n" \
+        : [t1]"=&d"(t1), [t2]"=&d"(t2) \
+        : [x] "d"((X)), [y] "d"((Y)), [sh]"d"((Z))); \
+        t1; \
+    })
+    /* Calculates: result = (X*Y)>>16 */
+    #define fixmul16(X,Y) \
+    ({ \
+        int32_t t, x = (X); \
+        asm volatile ( \
+            "mac.l    %[x],%[y],%%acc0\n\t" /* multiply */ \
+            "mulu.l   %[y],%[x]       \n\t" /* get lower half, avoid emac stall */ \
+            "movclr.l %%acc0,%[t]     \n\t" /* get higher half */ \
+            "lsr.l    #1,%[t]         \n\t" /* hi >>= 1 to compensate emac shift */ \
+            "move.w   %[t],%[x]       \n\t" /* combine halfwords */\
+            "swap     %[x]            \n\t" \
+            : [t]"=&d"(t), [x] "+d" (x) \
+            : [y] "d" ((Y))); \
+        x; \
+    })
+    /* Calculates: result = (X*Y)>>31 (may lose msb to overflow) */
+    #define fixmul31(X,Y) \
+    ({ \
+       int32_t t; \
+       asm volatile ( \
+          "mac.l %[x], %[y], %%acc0\n\t"   /* multiply */ \
+          "movclr.l %%acc0, %[t]\n\t"      /* get higher half as result */ \
+          : [t] "=d" (t) \
+          : [x] "r" ((X)), [y] "r" ((Y))); \
+       t; \
+    })
+#else
+    static inline int32_t fixmulshift(int32_t x, int32_t y, int shamt)
+    {
+        int64_t temp;
+        temp = x;
+        temp *= y;
+    
+        temp >>= shamt;
+    
+        return (int32_t)temp;
+    }
+    
+    static inline int32_t fixmul31(int32_t x, int32_t y)
+    {
+        int64_t temp;
+        temp = x;
+        temp *= y;
+    
+        temp >>= 31;
+    
+        return (int32_t)temp;
+    }
+    
+    static inline int32_t fixmul24(int32_t x, int32_t y)
+    {
+        int64_t temp;
+        temp = x;
+        temp *= y;
+    
+        temp >>= 24;
+    
+        return (int32_t)temp;
+    }
+    
+    static inline int32_t fixmul16(int32_t x, int32_t y)
+    {
+        int64_t temp;
+        temp = x;
+        temp *= y;
+    
+        temp >>= 16;
+    
+        return (int32_t)temp;
+    }
+#endif /* CPU_COLDFIRE, CPU_ARM */
+#if defined(CPU_COLDFIRE)
+    #define VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj) \
+        asm volatile ( \
+            "mac.l    %[s0], %[wj], %%acc0 \n\t" \
+            "msac.l   %[s1], %[wi], %%acc0 \n\t" \
+            "mac.l    %[s0], %[wi], %%acc1 \n\t" \
+            "mac.l    %[s1], %[wj], %%acc1 \n\t" \
+            "movclr.l %%acc0, %[s0]        \n\t" \
+            "move.l   %[s0], (%[dst_i])    \n\t" \
+            "movclr.l %%acc1, %[s0]        \n\t" \
+            "move.l   %[s0], (%[dst_j])    \n\t" \
+            : [s0] "+r" (s0) /* register is clobbered so specify it as an input */ \
+            : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]), \
+              [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj) \
+            : "cc", "memory");
+#else
+    #define VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj) \
+        dst[i] = fixmul31(wj, s0) - fixmul31(wi, s1); \
+        dst[j] = fixmul31(wi, s0) + fixmul31(wj, s1);
+#endif /* CPU_COLDFIRE */
+static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, 
+                                   const int32_t *src1, const int32_t *win, 
+                                   int len)
+{
+    int i, j;
+    dst += len;
+    win += len;
+    src0+= len;
+    for(i=-len, j=len-1; i<0; i++, j--) {
+        int32_t s0 = src0[i]; /* s0 = src0[      0 ... len-1] */
+        int32_t s1 = src1[j]; /* s1 = src1[2*len-1 ... len]   */
+        int32_t wi = -win[i]; /* wi = -win[      0 ... len-1] */
+        int32_t wj = -win[j]; /* wj = -win[2*len-1 ... len]   */
+        VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj);
+    }
+}
+#if defined(CPU_ARM)
+    #define VECT_MUL_SCALAR_KERNEL(dst, src, mul) \
+        asm volatile ( \
+            "ldmia %[src]!, {r1-r4}   \n\t" \
+            "smull r0, r5, r1, %[mul] \n\t" \
+            "mov   r0, r0, lsr #16    \n\t" \
+            "orr   r0, r0, r5, lsl #16\n\t" \
+            "smull r1, r5, r2, %[mul] \n\t" \
+            "mov   r1, r1, lsr #16    \n\t" \
+            "orr   r1, r1, r5, lsl #16\n\t" \
+            "smull r2, r5, r3, %[mul] \n\t" \
+            "mov   r2, r2, lsr #16    \n\t" \
+            "orr   r2, r2, r5, lsl #16\n\t" \
+            "smull r3, r5, r4, %[mul] \n\t" \
+            "mov   r3, r3, lsr #16    \n\t" \
+            "orr   r3, r3, r5, lsl #16\n\t" \
+            "stmia %[dst]!, {r0-r3}   \n"   \
+            : [dst]"+r"(dst), [src]"+r"(src) \
+            : [mul]"r"(mul) \
+            : "r0", "r1", "r2", "r3", "r4", "r5", "memory");
+#else
+    #define VECT_MUL_SCALAR_KERNEL(dst, src, mul) \
+        dst[i  ] = fixmul16(src[i  ], mul); \
+        dst[i+1] = fixmul16(src[i+1], mul); \
+        dst[i+2] = fixmul16(src[i+2], mul); \
+        dst[i+3] = fixmul16(src[i+3], mul);
+#endif /* CPU_ARM, CPU_COLDFIRE */
+static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, 
+                                        int32_t mul, int len)
+{
+    /* len is _always_ a multiple of 4, because len is the difference of sfb's
+     * which themselves are always a multiple of 4. */
+    int i;
+    for (i=0; i<len; i+=4) {
+        VECT_MUL_SCALAR_KERNEL(dst, src, mul);
+    }
+}
+static inline int av_clip(int a, int amin, int amax)
+{
+    if      (a < amin) return amin;
+    else if (a > amax) return amax;
+    else               return a;
+}
+#endif /* _WMAPRO_MATH_H_ */

diff --git a/lib/rbcodec/codecs/libwmapro/wmapro_math.h b/lib/rbcodec/codecs/libwmapro/wmapro_math.h new file mode 100644 index 0000000000..c78d6b627f --- /dev/null +++ b/lib/rbcodec/codecs/libwmapro/wmapro_math.h
@@ -0,0 +1,250 @@
	1	#ifndef _WMAPRO_MATH_H_
	2	#define _WMAPRO_MATH_H_
	3
	4	#include <inttypes.h>
	5
	6	/* rockbox: not used
	7	#define fixtof16(x) (float)((float)(x) / (float)(1 << 16))
	8	#define fixtof31(x) (float)((float)(x) / (float)(1 << 31))
	9	#define ftofix16(x) ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5:0.5)))
	10	#define ftofix31(x) ((int32_t)((x) * (float)(1 << 31) + ((x) < 0 ? -0.5:0.5)))
	11	*/
	12
	13	#if defined(CPU_ARM)
	14	/* Calculates: result = (XY)>>Z /
	15	#define fixmulshift(X,Y,Z) \
	16	({ \
	17	int32_t lo; \
	18	int32_t hi; \
	19	asm volatile ( \
	20	"smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
	21	"mov %[lo], %[lo], lsr %[shr] \n\t" /* lo >>= Z */ \
	22	"orr %[lo], %[lo], %[hi], lsl %[shl]" /* lo \|= (hi << (32-Z)) */ \
	23	: [lo]"=&r"(lo), [hi]"=&r"(hi) \
	24	: [x]"r"(X), [y]"r"(Y), [shr]"r"(Z), [shl]"r"(32-Z)); \
	25	lo; \
	26	})
	27
	28	/* Calculates: result = (XY)>>16 /
	29	#define fixmul16(X,Y) \
	30	({ \
	31	int32_t lo; \
	32	int32_t hi; \
	33	asm volatile ( \
	34	"smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
	35	"mov %[lo], %[lo], lsr #16 \n\t" /* lo >>= 16 */ \
	36	"orr %[lo], %[lo], %[hi], lsl #16" /* lo \|= (hi << 16) */ \
	37	: [lo]"=&r"(lo), [hi]"=&r"(hi) \
	38	: [x]"r"(X), [y]"r"(Y)); \
	39	lo; \
	40	})
	41
	42	/* Calculates: result = (XY)>>24 /
	43	#define fixmul24(X,Y) \
	44	({ \
	45	int32_t lo; \
	46	int32_t hi; \
	47	asm volatile ( \
	48	"smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
	49	"mov %[lo], %[lo], lsr #24 \n\t" /* lo >>= 24 */ \
	50	"orr %[lo], %[lo], %[hi], lsl #8" /* lo \|= (hi << 8) */ \
	51	: [lo]"=&r"(lo), [hi]"=&r"(hi) \
	52	: [x]"r"(X), [y]"r"(Y)); \
	53	lo; \
	54	})
	55
	56	/* Calculates: result = (XY)>>31, loose 1 bit precision /
	57	#define fixmul31(X,Y) \
	58	({ \
	59	int32_t lo; \
	60	int32_t hi; \
	61	asm volatile ( \
	62	"smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
	63	"mov %[hi], %[hi], lsl #1" /* hi <<= 1 */ \
	64	: [lo]"=&r"(lo), [hi]"=&r"(hi) \
	65	: [x]"r"(X), [y]"r"(Y)); \
	66	hi; \
	67	})
	68	#elif defined(CPU_COLDFIRE)
	69	/* Calculates: result = (XY)>>Z /
	70	#define fixmulshift(X,Y,Z) \
	71	({ \
	72	int32_t t1; \
	73	int32_t t2; \
	74	asm volatile ( \
	75	"mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \
	76	"mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \
	77	"movclr.l %%acc0,%[t1] \n\t" /* get higher half */ \
	78	"moveq.l #31,%[t2] \n\t" \
	79	"sub.l %[sh],%[t2] \n\t" /* t2 = 31 - shift */ \
	80	"ble.s 1f \n\t" \
	81	"asl.l %[t2],%[t1] \n\t" /* hi <<= 31 - shift */ \
	82	"lsr.l %[sh],%[x] \n\t" /* (unsigned)lo >>= shift */ \
	83	"or.l %[x],%[t1] \n\t" /* combine result */ \
	84	"bra.s 2f \n\t" \
	85	"1: \n\t" \
	86	"neg.l %[t2] \n\t" /* t2 = shift - 31 */ \
	87	"asr.l %[t2],%[t1] \n\t" /* hi >>= t2 */ \
	88	"2: \n" \
	89	: [t1]"=&d"(t1), [t2]"=&d"(t2) \
	90	: [x] "d"((X)), [y] "d"((Y)), [sh]"d"((Z))); \
	91	t1; \
	92	})
	93
	94	/* Calculates: result = (XY)>>16 /
	95	#define fixmul16(X,Y) \
	96	({ \
	97	int32_t t, x = (X); \
	98	asm volatile ( \
	99	"mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \
	100	"mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \
	101	"movclr.l %%acc0,%[t] \n\t" /* get higher half */ \
	102	"lsr.l #1,%[t] \n\t" /* hi >>= 1 to compensate emac shift */ \
	103	"move.w %[t],%[x] \n\t" /* combine halfwords */\
	104	"swap %[x] \n\t" \
	105	: [t]"=&d"(t), [x] "+d" (x) \
	106	: [y] "d" ((Y))); \
	107	x; \
	108	})
	109
	110	/* Calculates: result = (XY)>>31 (may lose msb to overflow) /
	111	#define fixmul31(X,Y) \
	112	({ \
	113	int32_t t; \
	114	asm volatile ( \
	115	"mac.l %[x], %[y], %%acc0\n\t" /* multiply */ \
	116	"movclr.l %%acc0, %[t]\n\t" /* get higher half as result */ \
	117	: [t] "=d" (t) \
	118	: [x] "r" ((X)), [y] "r" ((Y))); \
	119	t; \
	120	})
	121	#else
	122	static inline int32_t fixmulshift(int32_t x, int32_t y, int shamt)
	123	{
	124	int64_t temp;
	125	temp = x;
	126	temp *= y;
	127
	128	temp >>= shamt;
	129
	130	return (int32_t)temp;
	131	}
	132
	133	static inline int32_t fixmul31(int32_t x, int32_t y)
	134	{
	135	int64_t temp;
	136	temp = x;
	137	temp *= y;
	138
	139	temp >>= 31;
	140
	141	return (int32_t)temp;
	142	}
	143
	144	static inline int32_t fixmul24(int32_t x, int32_t y)
	145	{
	146	int64_t temp;
	147	temp = x;
	148	temp *= y;
	149
	150	temp >>= 24;
	151
	152	return (int32_t)temp;
	153	}
	154
	155	static inline int32_t fixmul16(int32_t x, int32_t y)
	156	{
	157	int64_t temp;
	158	temp = x;
	159	temp *= y;
	160
	161	temp >>= 16;
	162
	163	return (int32_t)temp;
	164	}
	165	#endif /* CPU_COLDFIRE, CPU_ARM */
	166
	167	#if defined(CPU_COLDFIRE)
	168	#define VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj) \
	169	asm volatile ( \
	170	"mac.l %[s0], %[wj], %%acc0 \n\t" \
	171	"msac.l %[s1], %[wi], %%acc0 \n\t" \
	172	"mac.l %[s0], %[wi], %%acc1 \n\t" \
	173	"mac.l %[s1], %[wj], %%acc1 \n\t" \
	174	"movclr.l %%acc0, %[s0] \n\t" \
	175	"move.l %[s0], (%[dst_i]) \n\t" \
	176	"movclr.l %%acc1, %[s0] \n\t" \
	177	"move.l %[s0], (%[dst_j]) \n\t" \
	178	: [s0] "+r" (s0) /* register is clobbered so specify it as an input */ \
	179	: [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]), \
	180	[s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj) \
	181	: "cc", "memory");
	182	#else
	183	#define VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj) \
	184	dst[i] = fixmul31(wj, s0) - fixmul31(wi, s1); \
	185	dst[j] = fixmul31(wi, s0) + fixmul31(wj, s1);
	186	#endif /* CPU_COLDFIRE */
	187
	188	static inline void vector_fixmul_window(int32_t dst, const int32_t src0,
	189	const int32_t src1, const int32_t win,
	190	int len)
	191	{
	192	int i, j;
	193	dst += len;
	194	win += len;
	195	src0+= len;
	196	for(i=-len, j=len-1; i<0; i++, j--) {
	197	int32_t s0 = src0[i]; /* s0 = src0[ 0 ... len-1] */
	198	int32_t s1 = src1[j]; /* s1 = src1[2len-1 ... len] /
	199	int32_t wi = -win[i]; /* wi = -win[ 0 ... len-1] */
	200	int32_t wj = -win[j]; /* wj = -win[2len-1 ... len] /
	201	VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj);
	202	}
	203	}
	204
	205	#if defined(CPU_ARM)
	206	#define VECT_MUL_SCALAR_KERNEL(dst, src, mul) \
	207	asm volatile ( \
	208	"ldmia %[src]!, {r1-r4} \n\t" \
	209	"smull r0, r5, r1, %[mul] \n\t" \
	210	"mov r0, r0, lsr #16 \n\t" \
	211	"orr r0, r0, r5, lsl #16\n\t" \
	212	"smull r1, r5, r2, %[mul] \n\t" \
	213	"mov r1, r1, lsr #16 \n\t" \
	214	"orr r1, r1, r5, lsl #16\n\t" \
	215	"smull r2, r5, r3, %[mul] \n\t" \
	216	"mov r2, r2, lsr #16 \n\t" \
	217	"orr r2, r2, r5, lsl #16\n\t" \
	218	"smull r3, r5, r4, %[mul] \n\t" \
	219	"mov r3, r3, lsr #16 \n\t" \
	220	"orr r3, r3, r5, lsl #16\n\t" \
	221	"stmia %[dst]!, {r0-r3} \n" \
	222	: [dst]"+r"(dst), [src]"+r"(src) \
	223	: [mul]"r"(mul) \
	224	: "r0", "r1", "r2", "r3", "r4", "r5", "memory");
	225	#else
	226	#define VECT_MUL_SCALAR_KERNEL(dst, src, mul) \
	227	dst[i ] = fixmul16(src[i ], mul); \
	228	dst[i+1] = fixmul16(src[i+1], mul); \
	229	dst[i+2] = fixmul16(src[i+2], mul); \
	230	dst[i+3] = fixmul16(src[i+3], mul);
	231	#endif /* CPU_ARM, CPU_COLDFIRE */
	232
	233	static inline void vector_fixmul_scalar(int32_t dst, const int32_t src,
	234	int32_t mul, int len)
	235	{
	236	/* len is _always_ a multiple of 4, because len is the difference of sfb's
	237	* which themselves are always a multiple of 4. */
	238	int i;
	239	for (i=0; i<len; i+=4) {
	240	VECT_MUL_SCALAR_KERNEL(dst, src, mul);
	241	}
	242	}
	243
	244	static inline int av_clip(int a, int amin, int amax)
	245	{
	246	if (a < amin) return amin;
	247	else if (a > amax) return amax;
	248	else return a;
	249	}
	250	#endif /* _WMAPRO_MATH_H_ */