Merge from branches/mdctexp - faster ifft+imdct in codec lib

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24712 a1c6a512-1295-4272-9138-f99709370657
author: Dave Hooper <dave@beermex.com> 2010-02-17 00:49:53 +0000
committer: Dave Hooper <dave@beermex.com> 2010-02-17 00:49:53 +0000
commit: 42774d3128b91d5a37344cb40d56d3c4d147e5f2 (patch)
tree: bf336b407992ec9a5e454556f3351e3f8a0d10de /apps/codecs/libwma
parent: 62257ebc38bc0a3095b25dd0f58c4c8215edf602 (diff)
download: rockbox-42774d3128b91d5a37344cb40d56d3c4d147e5f2.tar.gz
rockbox-42774d3128b91d5a37344cb40d56d3c4d147e5f2.zip
4 files changed, 9 insertions, 170 deletions
diff --git a/apps/codecs/libwma/wmadec.h b/apps/codecs/libwma/wmadec.h
index a547ece157..4efaa9b8a2 100644
--- a/apps/codecs/libwma/wmadec.h
+++ b/apps/codecs/libwma/wmadec.h
@@ -23,8 +23,6 @@
 #include "asf.h"
 #include "bitstream.h" /* For GetBitContext */
 #include "types.h"
-//#include "dsputil.h"  /* For MDCTContext */
 //#define TRACE
 /* size of blocks */
diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c
index ae1a93ecf2..6ff6a176ee 100644
--- a/apps/codecs/libwma/wmadeci.c
+++ b/apps/codecs/libwma/wmadeci.c
@@ -452,17 +452,6 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
        }
    }
-    /*Not using the ffmpeg IMDCT anymore*/
-    /* mdct_init_global();
-    for(i = 0; i < s->nb_block_sizes; ++i)
-    {
-        ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1);
-    }
-    */
    /* ffmpeg uses malloc to only allocate as many window sizes as needed.  
    *  However, we're really only interested in the worst case memory usage.
    *  In the worst case you can have 5 window sizes, 128 doubling up 2048
@@ -1253,14 +1242,9 @@ static int wma_decode_block(WMADecodeContext *s, int32_t *scratch_buffer)
            n4 = s->block_len >>1;
-            /*faster IMDCT from Vorbis*/
+            ff_imdct_calc( (s->frame_len_bits - bsize + 1),
-            mdct_backward( (1 << (s->block_len_bits+1)), (int32_t*)(*(s->coefs))[ch], (int32_t*)scratch_buffer);
+                          (int32_t*)scratch_buffer,
+                          (*(s->coefs))[ch]);
-            /*slower but more easily understood IMDCT from FFMPEG*/
-            //ff_imdct_calc(&s->mdct_ctx[bsize],
-            //              output,
-            //              (*(s->coefs))[ch]);
            /* add in the frame */
            index = (s->frame_len / 2) + s->block_pos - n4;
diff --git a/apps/codecs/libwma/wmafixed.c b/apps/codecs/libwma/wmafixed.c
index 5569309145..1472ed081c 100644
--- a/apps/codecs/libwma/wmafixed.c
+++ b/apps/codecs/libwma/wmafixed.c
@@ -250,113 +250,3 @@ fixed64 fixdiv64(fixed64 x, fixed64 y)
    return (fixed32)(r << (PRECISION / 2));
 }
-/* Inverse gain of circular cordic rotation in s0.31 format. */
-static const long cordic_circular_gain = 0xb2458939; /* 0.607252929 */
-/* Table of values of atan(2^-i) in 0.32 format fractions of pi where pi = 0xffffffff / 2 */
-static const unsigned long atan_table[] = {
-    0x1fffffff, /* +0.785398163 (or pi/4) */
-    0x12e4051d, /* +0.463647609 */
-    0x09fb385b, /* +0.244978663 */
-    0x051111d4, /* +0.124354995 */
-    0x028b0d43, /* +0.062418810 */
-    0x0145d7e1, /* +0.031239833 */
-    0x00a2f61e, /* +0.015623729 */
-    0x00517c55, /* +0.007812341 */
-    0x0028be53, /* +0.003906230 */
-    0x00145f2e, /* +0.001953123 */
-    0x000a2f98, /* +0.000976562 */
-    0x000517cc, /* +0.000488281 */
-    0x00028be6, /* +0.000244141 */
-    0x000145f3, /* +0.000122070 */
-    0x0000a2f9, /* +0.000061035 */
-    0x0000517c, /* +0.000030518 */
-    0x000028be, /* +0.000015259 */
-    0x0000145f, /* +0.000007629 */
-    0x00000a2f, /* +0.000003815 */
-    0x00000517, /* +0.000001907 */
-    0x0000028b, /* +0.000000954 */
-    0x00000145, /* +0.000000477 */
-    0x000000a2, /* +0.000000238 */
-    0x00000051, /* +0.000000119 */
-    0x00000028, /* +0.000000060 */
-    0x00000014, /* +0.000000030 */
-    0x0000000a, /* +0.000000015 */
-    0x00000005, /* +0.000000007 */
-    0x00000002, /* +0.000000004 */
-    0x00000001, /* +0.000000002 */
-    0x00000000, /* +0.000000001 */
-    0x00000000, /* +0.000000000 */
-};
-/*
-    Below here functions do not use standard fixed precision!
-*/
-/**
- * Implements sin and cos using CORDIC rotation.
- *
- * @param phase has range from 0 to 0xffffffff, representing 0 and
- *        2*pi respectively.
- * @param cos return address for cos
- * @return sin of phase, value is a signed value from LONG_MIN to LONG_MAX,
- *         representing -1 and 1 respectively.
- *
- *        Gives at least 24 bits precision (last 2-8 bits or so are probably off)
- */
-long fsincos(unsigned long phase, fixed32 *cos)
-{
-    int32_t x, x1, y, y1;
-    unsigned long z, z1;
-    int i;
-    /* Setup initial vector */
-    x = cordic_circular_gain;
-    y = 0;
-    z = phase;
-    /* The phase has to be somewhere between 0..pi for this to work right */
-    if (z < 0xffffffff / 4) {
-        /* z in first quadrant, z += pi/2 to correct */
-        x = -x;
-        z += 0xffffffff / 4;
-    } else if (z < 3 * (0xffffffff / 4)) {
-        /* z in third quadrant, z -= pi/2 to correct */
-        z -= 0xffffffff / 4;
-    } else {
-        /* z in fourth quadrant, z -= 3pi/2 to correct */
-        x = -x;
-        z -= 3 * (0xffffffff / 4);
-    }
-    /* Each iteration adds roughly 1-bit of extra precision */
-    for (i = 0; i < 31; i++) {
-        x1 = x >> i;
-        y1 = y >> i;
-        z1 = atan_table[i];
-        /* Decided which direction to rotate vector. Pivot point is pi/2 */
-        if (z >= 0xffffffff / 4) {
-            x -= y1;
-            y += x1;
-            z -= z1;
-        } else {
-            x += y1;
-            y -= x1;
-            z += z1;
-        }
-    }
-    if (cos)
-        *cos = x;
-    return y;
-}
diff --git a/apps/codecs/libwma/wmafixed.h b/apps/codecs/libwma/wmafixed.h
index 6b5137e044..0ecdc5cfbc 100644
--- a/apps/codecs/libwma/wmafixed.h
+++ b/apps/codecs/libwma/wmafixed.h
@@ -52,10 +52,10 @@ fixed64 fixdiv64(fixed64 x, fixed64 y);
 fixed32 fixsqrt32(fixed32 x);
 long fsincos(unsigned long phase, fixed32 *cos);
 #ifdef CPU_ARM
 /*Sign-15.16 format */
 #define fixmul32(x, y)  \
    ({ int32_t __hi;  \
       uint32_t __lo;  \
@@ -70,18 +70,6 @@ long fsincos(unsigned long phase, fixed32 *cos);
       __result;  \
    })
-#define fixmul32b(x, y)  \
-    ({ int32_t __hi;  \
-       uint32_t __lo;  \
-       int32_t __result;  \
-       asm ("smull   %0, %1, %3, %4\n\t"  \
-            "movs    %2, %1, lsl #1"  \
-            : "=&r" (__lo), "=&r" (__hi), "=r" (__result)  \
-            : "%r" (x), "r" (y)  \
-            : "cc");  \
-       __result;  \
-    })
 #elif defined(CPU_COLDFIRE)
 static inline int32_t fixmul32(int32_t x, int32_t y)
@@ -91,9 +79,9 @@ static inline int32_t fixmul32(int32_t x, int32_t y)
 #endif
    int32_t t1;
    asm (
-        "mac.l   %[x], %[y], %%acc0  \n" /* multiply */
+        "mac.l   %[x], %[y], %%acc0  \n" // multiply
-        "mulu.l  %[y], %[x]      \n"     /* get lower half, avoid emac stall */
+        "mulu.l  %[y], %[x]      \n"     // get lower half, avoid emac stall
-        "movclr.l %%acc0, %[t1]  \n"     /* get higher half */
+        "movclr.l %%acc0, %[t1]  \n"     // get higher half
        "lsr.l   #1, %[t1]       \n"
        "move.w  %[t1], %[x]     \n"
        "swap    %[x]            \n"
@@ -103,17 +91,6 @@ static inline int32_t fixmul32(int32_t x, int32_t y)
    return x;
 }
-static inline int32_t fixmul32b(int32_t x, int32_t y)
-{
-    asm (
-        "mac.l   %[x], %[y], %%acc0  \n" /* multiply */
-        "movclr.l %%acc0, %[x]  \n"     /* get higher half */
-        : [x] "+d" (x)
-        : [y] "d"  (y)
-    );
-    return x;
-}
 #else
 static inline fixed32 fixmul32(fixed32 x, fixed32 y)
@@ -127,17 +104,7 @@ static inline fixed32 fixmul32(fixed32 x, fixed32 y)
    return (fixed32)temp;
 }
-static inline fixed32 fixmul32b(fixed32 x, fixed32 y)
-{
-    fixed64 temp;
-    temp = x;
-    temp *= y;
-    temp >>= 31;        //16+31-16 = 31 bits
-    return (fixed32)temp;
-}
 #endif
+/* get fixmul32b from codeclib */
author	Dave Hooper <dave@beermex.com>	2010-02-17 00:49:53 +0000
committer	Dave Hooper <dave@beermex.com>	2010-02-17 00:49:53 +0000
commit	42774d3128b91d5a37344cb40d56d3c4d147e5f2 (patch)
tree	bf336b407992ec9a5e454556f3351e3f8a0d10de /apps/codecs/libwma
parent	62257ebc38bc0a3095b25dd0f58c4c8215edf602 (diff)
download	rockbox-42774d3128b91d5a37344cb40d56d3c4d147e5f2.tar.gz rockbox-42774d3128b91d5a37344cb40d56d3c4d147e5f2.zip

diff --git a/apps/codecs/libwma/wmadec.h b/apps/codecs/libwma/wmadec.h index a547ece157..4efaa9b8a2 100644 --- a/apps/codecs/libwma/wmadec.h +++ b/apps/codecs/libwma/wmadec.h
@@ -23,8 +23,6 @@
23	#include "asf.h"	23	#include "asf.h"
24	#include "bitstream.h" /* For GetBitContext */	24	#include "bitstream.h" /* For GetBitContext */
25	#include "types.h"	25	#include "types.h"
26	//#include "dsputil.h" /* For MDCTContext */
27
28		26
29	//#define TRACE	27	//#define TRACE
30	/* size of blocks */	28	/* size of blocks */


diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c index ae1a93ecf2..6ff6a176ee 100644 --- a/apps/codecs/libwma/wmadeci.c +++ b/apps/codecs/libwma/wmadeci.c
@@ -452,17 +452,6 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
452	}	452	}
453	}	453	}
454		454
455	/Not using the ffmpeg IMDCT anymore/
456
457	/* mdct_init_global();
458
459	for(i = 0; i < s->nb_block_sizes; ++i)
460	{
461	ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1);
462
463	}
464	*/
465
466	/* ffmpeg uses malloc to only allocate as many window sizes as needed.	455	/* ffmpeg uses malloc to only allocate as many window sizes as needed.
467	* However, we're really only interested in the worst case memory usage.	456	* However, we're really only interested in the worst case memory usage.
468	* In the worst case you can have 5 window sizes, 128 doubling up 2048	457	* In the worst case you can have 5 window sizes, 128 doubling up 2048
@@ -1253,14 +1242,9 @@ static int wma_decode_block(WMADecodeContext s, int32_t scratch_buffer)
1253		1242
1254	n4 = s->block_len >>1;	1243	n4 = s->block_len >>1;
1255		1244
1256	/faster IMDCT from Vorbis/	1245	ff_imdct_calc( (s->frame_len_bits - bsize + 1),
1257	mdct_backward( (1 << (s->block_len_bits+1)), (int32_t)((s->coefs))[ch], (int32_t*)scratch_buffer);	1246	(int32_t*)scratch_buffer,
1258		1247	(*(s->coefs))[ch]);
1259	/slower but more easily understood IMDCT from FFMPEG/
1260	//ff_imdct_calc(&s->mdct_ctx[bsize],
1261	// output,
1262	// (*(s->coefs))[ch]);
1263
1264		1248
1265	/* add in the frame */	1249	/* add in the frame */
1266	index = (s->frame_len / 2) + s->block_pos - n4;	1250	index = (s->frame_len / 2) + s->block_pos - n4;


diff --git a/apps/codecs/libwma/wmafixed.c b/apps/codecs/libwma/wmafixed.c index 5569309145..1472ed081c 100644 --- a/apps/codecs/libwma/wmafixed.c +++ b/apps/codecs/libwma/wmafixed.c
@@ -250,113 +250,3 @@ fixed64 fixdiv64(fixed64 x, fixed64 y)
250		250
251	return (fixed32)(r << (PRECISION / 2));	251	return (fixed32)(r << (PRECISION / 2));
252	}	252	}
253
254
255
256	/* Inverse gain of circular cordic rotation in s0.31 format. */
257	static const long cordic_circular_gain = 0xb2458939; /* 0.607252929 */
258
259	/* Table of values of atan(2^-i) in 0.32 format fractions of pi where pi = 0xffffffff / 2 */
260	static const unsigned long atan_table[] = {
261	0x1fffffff, /* +0.785398163 (or pi/4) */
262	0x12e4051d, /* +0.463647609 */
263	0x09fb385b, /* +0.244978663 */
264	0x051111d4, /* +0.124354995 */
265	0x028b0d43, /* +0.062418810 */
266	0x0145d7e1, /* +0.031239833 */
267	0x00a2f61e, /* +0.015623729 */
268	0x00517c55, /* +0.007812341 */
269	0x0028be53, /* +0.003906230 */
270	0x00145f2e, /* +0.001953123 */
271	0x000a2f98, /* +0.000976562 */
272	0x000517cc, /* +0.000488281 */
273	0x00028be6, /* +0.000244141 */
274	0x000145f3, /* +0.000122070 */
275	0x0000a2f9, /* +0.000061035 */
276	0x0000517c, /* +0.000030518 */
277	0x000028be, /* +0.000015259 */
278	0x0000145f, /* +0.000007629 */
279	0x00000a2f, /* +0.000003815 */
280	0x00000517, /* +0.000001907 */
281	0x0000028b, /* +0.000000954 */
282	0x00000145, /* +0.000000477 */
283	0x000000a2, /* +0.000000238 */
284	0x00000051, /* +0.000000119 */
285	0x00000028, /* +0.000000060 */
286	0x00000014, /* +0.000000030 */
287	0x0000000a, /* +0.000000015 */
288	0x00000005, /* +0.000000007 */
289	0x00000002, /* +0.000000004 */
290	0x00000001, /* +0.000000002 */
291	0x00000000, /* +0.000000001 */
292	0x00000000, /* +0.000000000 */
293	};
294
295
296	/*
297
298	Below here functions do not use standard fixed precision!
299	*/
300
301
302	/**
303	* Implements sin and cos using CORDIC rotation.
304	*
305	* @param phase has range from 0 to 0xffffffff, representing 0 and
306	* 2*pi respectively.
307	* @param cos return address for cos
308	* @return sin of phase, value is a signed value from LONG_MIN to LONG_MAX,
309	* representing -1 and 1 respectively.
310	*
311	* Gives at least 24 bits precision (last 2-8 bits or so are probably off)
312	*/
313	long fsincos(unsigned long phase, fixed32 *cos)
314	{
315	int32_t x, x1, y, y1;
316	unsigned long z, z1;
317	int i;
318
319	/* Setup initial vector */
320	x = cordic_circular_gain;
321	y = 0;
322	z = phase;
323
324	/* The phase has to be somewhere between 0..pi for this to work right */
325	if (z < 0xffffffff / 4) {
326	/* z in first quadrant, z += pi/2 to correct */
327	x = -x;
328	z += 0xffffffff / 4;
329	} else if (z < 3 * (0xffffffff / 4)) {
330	/* z in third quadrant, z -= pi/2 to correct */
331	z -= 0xffffffff / 4;
332	} else {
333	/* z in fourth quadrant, z -= 3pi/2 to correct */
334	x = -x;
335	z -= 3 * (0xffffffff / 4);
336	}
337
338	/* Each iteration adds roughly 1-bit of extra precision */
339	for (i = 0; i < 31; i++) {
340	x1 = x >> i;
341	y1 = y >> i;
342	z1 = atan_table[i];
343
344	/* Decided which direction to rotate vector. Pivot point is pi/2 */
345	if (z >= 0xffffffff / 4) {
346	x -= y1;
347	y += x1;
348	z -= z1;
349	} else {
350	x += y1;
351	y -= x1;
352	z += z1;
353	}
354	}
355
356	if (cos)
357	*cos = x;
358
359	return y;
360	}
361
362


diff --git a/apps/codecs/libwma/wmafixed.h b/apps/codecs/libwma/wmafixed.h index 6b5137e044..0ecdc5cfbc 100644 --- a/apps/codecs/libwma/wmafixed.h +++ b/apps/codecs/libwma/wmafixed.h
@@ -52,10 +52,10 @@ fixed64 fixdiv64(fixed64 x, fixed64 y);
52	fixed32 fixsqrt32(fixed32 x);	52	fixed32 fixsqrt32(fixed32 x);
53	long fsincos(unsigned long phase, fixed32 *cos);	53	long fsincos(unsigned long phase, fixed32 *cos);
54		54
		55
55	#ifdef CPU_ARM	56	#ifdef CPU_ARM
56		57
57	/Sign-15.16 format /	58	/Sign-15.16 format /
58
59	#define fixmul32(x, y) \	59	#define fixmul32(x, y) \
60	({ int32_t __hi; \	60	({ int32_t __hi; \
61	uint32_t __lo; \	61	uint32_t __lo; \
@@ -70,18 +70,6 @@ long fsincos(unsigned long phase, fixed32 *cos);
70	__result; \	70	__result; \
71	})	71	})
72		72
73	#define fixmul32b(x, y) \
74	({ int32_t __hi; \
75	uint32_t __lo; \
76	int32_t __result; \
77	asm ("smull %0, %1, %3, %4\n\t" \
78	"movs %2, %1, lsl #1" \
79	: "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
80	: "%r" (x), "r" (y) \
81	: "cc"); \
82	__result; \
83	})
84
85	#elif defined(CPU_COLDFIRE)	73	#elif defined(CPU_COLDFIRE)
86		74
87	static inline int32_t fixmul32(int32_t x, int32_t y)	75	static inline int32_t fixmul32(int32_t x, int32_t y)
@@ -91,9 +79,9 @@ static inline int32_t fixmul32(int32_t x, int32_t y)
91	#endif	79	#endif
92	int32_t t1;	80	int32_t t1;
93	asm (	81	asm (
94	"mac.l %[x], %[y], %%acc0 \n" /* multiply */	82	"mac.l %[x], %[y], %%acc0 \n" // multiply
95	"mulu.l %[y], %[x] \n" /* get lower half, avoid emac stall */	83	"mulu.l %[y], %[x] \n" // get lower half, avoid emac stall
96	"movclr.l %%acc0, %[t1] \n" /* get higher half */	84	"movclr.l %%acc0, %[t1] \n" // get higher half
97	"lsr.l #1, %[t1] \n"	85	"lsr.l #1, %[t1] \n"
98	"move.w %[t1], %[x] \n"	86	"move.w %[t1], %[x] \n"
99	"swap %[x] \n"	87	"swap %[x] \n"
@@ -103,17 +91,6 @@ static inline int32_t fixmul32(int32_t x, int32_t y)
103	return x;	91	return x;
104	}	92	}
105		93
106	static inline int32_t fixmul32b(int32_t x, int32_t y)
107	{
108	asm (
109	"mac.l %[x], %[y], %%acc0 \n" /* multiply */
110	"movclr.l %%acc0, %[x] \n" /* get higher half */
111	: [x] "+d" (x)
112	: [y] "d" (y)
113	);
114	return x;
115	}
116
117	#else	94	#else
118		95
119	static inline fixed32 fixmul32(fixed32 x, fixed32 y)	96	static inline fixed32 fixmul32(fixed32 x, fixed32 y)
@@ -127,17 +104,7 @@ static inline fixed32 fixmul32(fixed32 x, fixed32 y)
127	return (fixed32)temp;	104	return (fixed32)temp;
128	}	105	}
129		106
130	static inline fixed32 fixmul32b(fixed32 x, fixed32 y)
131	{
132	fixed64 temp;
133
134	temp = x;
135	temp *= y;
136
137	temp >>= 31; //16+31-16 = 31 bits
138
139	return (fixed32)temp;
140	}
141
142	#endif	107	#endif
143		108
		109
		110	/* get fixmul32b from codeclib */