Rearrange the MDCT library lookup tables so that codecs can access them. Access them in cook. Remove old cook trig tables. Replace old 32x16 bit fixed point multiply code with faster ASM ARM/Coldfire versions using full 32 bit precision. Improves performance signficantly on PP5024 (~200% realtime for a 64k file) while saving a lot of memory and improving accuracy.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21939 a1c6a512-1295-4272-9138-f99709370657
author: Michael Giacomelli <giac2000@hotmail.com> 2009-07-17 23:53:40 +0000
committer: Michael Giacomelli <giac2000@hotmail.com> 2009-07-17 23:53:40 +0000
commit: b957f7214be31bc5752625f9b9d60f96a77a9e34 (patch)
tree: 35987cd27988c0442dcb234912d3fb59468f5aad /apps/codecs/libcook/cook_fixpoint.h
parent: c247483ef18360668c997ab64dc7345eaf65209c (diff)
download: rockbox-b957f7214be31bc5752625f9b9d60f96a77a9e34.tar.gz
rockbox-b957f7214be31bc5752625f9b9d60f96a77a9e34.zip
1 files changed, 56 insertions, 9 deletions
diff --git a/apps/codecs/libcook/cook_fixpoint.h b/apps/codecs/libcook/cook_fixpoint.h
index 2e7f68913b..c2ab9299c6 100644
--- a/apps/codecs/libcook/cook_fixpoint.h
+++ b/apps/codecs/libcook/cook_fixpoint.h
@@ -73,14 +73,55 @@ static inline FIXP fixp_pow2(FIXP x, int i)
 * @param a                     fix point value
 * @param b                     fix point fraction, 0 <= b < 1
 */
 static inline FIXP fixp_mult_su(FIXP a, FIXPU b)
 {
+ 
    int32_t hb = (a >> 16) * b;
    uint32_t lb = (a & 0xffff) * b;
    return hb + (lb >> 16) + ((lb & 0x8000) >> 15);
 }
+/* Faster version of the above using 32x32=64 bit multiply */
+#ifdef CPU_ARM
+#define fixmul31(x, y)  \
+    ({ int32_t __hi;  \
+       uint32_t __lo;  \
+       int32_t __result;  \
+       asm ("smull   %0, %1, %3, %4\n\t"  \
+            "movs    %2, %1, lsl #1"  \
+            : "=&r" (__lo), "=&r" (__hi), "=r" (__result)  \
+            : "%r" (x), "r" (y)  \
+            : "cc");  \
+       __result;  \
+    })
+#elif defined(CPU_COLDFIRE)
+static inline int32_t fixmul31(int32_t x, int32_t y)
+{
+    asm (
+        "mac.l   %[x], %[y], %%acc0  \n" /* multiply */
+        "movclr.l %%acc0, %[x]  \n"     /* get higher half */
+        : [x] "+d" (x)
+        : [y] "d"  (y)
+    );
+    return x;
+}
+#else
+static inline int32_t fixmul31(int32_t x, int32_t y)
+{
+    int64_t temp;
+    temp = x;
+    temp *= y;
+    temp >>= 31;        //16+31-16 = 31 bits
+    return (int32_t)temp;
+}
+#endif
 /* math functions taken from libavutil/common.h */
 static inline int av_log2(unsigned int v)
@@ -162,7 +203,7 @@ static inline void imlt_math(COOKContext *q, FIXP *in)
    const int n = q->samples_per_channel;
    const int step = 4 << (10 - av_log2(n));
    int i = 0, j = step>>1;
+    
    cook_mdct_backward(2 * n, in, q->mono_mdct_output);
    do {
@@ -184,30 +225,35 @@ static inline void imlt_math(COOKContext *q, FIXP *in)
 }
 #else
 #include <codecs/lib/codeclib.h>
+#include <codecs/lib/mdct_lookup.h>
 static inline void imlt_math(COOKContext *q, FIXP *in)
 {
    const int n = q->samples_per_channel;
-    const int step = 4 << (10 - av_log2(n));
+    const int step = 2 << (10 - av_log2(n));
-    int i = 0, j = step>>1;
+    int i = 0, j = 0;
+    
    mdct_backward(2 * n, in, q->mono_mdct_output);
    do {
        FIXP tmp = q->mono_mdct_output[i];
        
        q->mono_mdct_output[i] =
-          fixp_mult_su(-q->mono_mdct_output[n + i], sincos_lookup[j]);
+          fixmul31(-q->mono_mdct_output[n + i], (sincos_lookup0[j]));
-        q->mono_mdct_output[n + i] = fixp_mult_su(tmp, sincos_lookup[j+1]);
+          
+        q->mono_mdct_output[n + i] = fixmul31(tmp, (sincos_lookup0[j+1]) );
+            
        j += step;
+        
    } while (++i < n/2);
    do {
        FIXP tmp = q->mono_mdct_output[i];
        
        j -= step;
        q->mono_mdct_output[i] =
-          fixp_mult_su(-q->mono_mdct_output[n + i], sincos_lookup[j+1]);
+          fixmul31(-q->mono_mdct_output[n + i], (sincos_lookup0[j+1]) );
-        q->mono_mdct_output[n + i] = fixp_mult_su(tmp, sincos_lookup[j]);
+        q->mono_mdct_output[n + i] = fixmul31(tmp, (sincos_lookup0[j]) );
    } while (++i < n);
 }
 #endif
@@ -291,6 +337,7 @@ static inline void output_math(COOKContext *q, int16_t *out, int chan)
    int j;
    for (j = 0; j < q->samples_per_channel; j++) {
-        out[chan + q->nb_channels * j] = fixp_pow2(q->mono_mdct_output[j], -11);
+        out[chan + q->nb_channels * j] =
+          av_clip(fixp_pow2(q->mono_mdct_output[j], -11), -32768, 32767);
    }
 }
author	Michael Giacomelli <giac2000@hotmail.com>	2009-07-17 23:53:40 +0000
committer	Michael Giacomelli <giac2000@hotmail.com>	2009-07-17 23:53:40 +0000
commit	b957f7214be31bc5752625f9b9d60f96a77a9e34 (patch)
tree	35987cd27988c0442dcb234912d3fb59468f5aad /apps/codecs/libcook/cook_fixpoint.h
parent	c247483ef18360668c997ab64dc7345eaf65209c (diff)
download	rockbox-b957f7214be31bc5752625f9b9d60f96a77a9e34.tar.gz rockbox-b957f7214be31bc5752625f9b9d60f96a77a9e34.zip

diff --git a/apps/codecs/libcook/cook_fixpoint.h b/apps/codecs/libcook/cook_fixpoint.h index 2e7f68913b..c2ab9299c6 100644 --- a/apps/codecs/libcook/cook_fixpoint.h +++ b/apps/codecs/libcook/cook_fixpoint.h
@@ -73,14 +73,55 @@ static inline FIXP fixp_pow2(FIXP x, int i)
73	* @param a fix point value	73	* @param a fix point value
74	* @param b fix point fraction, 0 <= b < 1	74	* @param b fix point fraction, 0 <= b < 1
75	*/	75	*/
		76
76	static inline FIXP fixp_mult_su(FIXP a, FIXPU b)	77	static inline FIXP fixp_mult_su(FIXP a, FIXPU b)
77	{	78	{
		79
78	int32_t hb = (a >> 16) * b;	80	int32_t hb = (a >> 16) * b;
79	uint32_t lb = (a & 0xffff) * b;	81	uint32_t lb = (a & 0xffff) * b;
80		82
81	return hb + (lb >> 16) + ((lb & 0x8000) >> 15);	83	return hb + (lb >> 16) + ((lb & 0x8000) >> 15);
82	}	84	}
83		85
		86	/* Faster version of the above using 32x32=64 bit multiply */
		87	#ifdef CPU_ARM
		88	#define fixmul31(x, y) \
		89	({ int32_t __hi; \
		90	uint32_t __lo; \
		91	int32_t __result; \
		92	asm ("smull %0, %1, %3, %4\n\t" \
		93	"movs %2, %1, lsl #1" \
		94	: "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
		95	: "%r" (x), "r" (y) \
		96	: "cc"); \
		97	__result; \
		98	})
		99
		100	#elif defined(CPU_COLDFIRE)
		101	static inline int32_t fixmul31(int32_t x, int32_t y)
		102	{
		103	asm (
		104	"mac.l %[x], %[y], %%acc0 \n" /* multiply */
		105	"movclr.l %%acc0, %[x] \n" /* get higher half */
		106	: [x] "+d" (x)
		107	: [y] "d" (y)
		108	);
		109	return x;
		110	}
		111	#else
		112	static inline int32_t fixmul31(int32_t x, int32_t y)
		113	{
		114	int64_t temp;
		115
		116	temp = x;
		117	temp *= y;
		118
		119	temp >>= 31; //16+31-16 = 31 bits
		120
		121	return (int32_t)temp;
		122	}
		123	#endif
		124
84	/* math functions taken from libavutil/common.h */	125	/* math functions taken from libavutil/common.h */
85		126
86	static inline int av_log2(unsigned int v)	127	static inline int av_log2(unsigned int v)
@@ -162,7 +203,7 @@ static inline void imlt_math(COOKContext q, FIXP in)
162	const int n = q->samples_per_channel;	203	const int n = q->samples_per_channel;
163	const int step = 4 << (10 - av_log2(n));	204	const int step = 4 << (10 - av_log2(n));
164	int i = 0, j = step>>1;	205	int i = 0, j = step>>1;
165		206
166	cook_mdct_backward(2 * n, in, q->mono_mdct_output);	207	cook_mdct_backward(2 * n, in, q->mono_mdct_output);
167		208
168	do {	209	do {
@@ -184,30 +225,35 @@ static inline void imlt_math(COOKContext q, FIXP in)
184	}	225	}
185	#else	226	#else
186	#include <codecs/lib/codeclib.h>	227	#include <codecs/lib/codeclib.h>
		228	#include <codecs/lib/mdct_lookup.h>
187		229
188	static inline void imlt_math(COOKContext q, FIXP in)	230	static inline void imlt_math(COOKContext q, FIXP in)
189	{	231	{
190	const int n = q->samples_per_channel;	232	const int n = q->samples_per_channel;
191	const int step = 4 << (10 - av_log2(n));	233	const int step = 2 << (10 - av_log2(n));
192	int i = 0, j = step>>1;	234	int i = 0, j = 0;
193		235
194	mdct_backward(2 * n, in, q->mono_mdct_output);	236	mdct_backward(2 * n, in, q->mono_mdct_output);
195		237
196	do {	238	do {
197	FIXP tmp = q->mono_mdct_output[i];	239	FIXP tmp = q->mono_mdct_output[i];
198		240
199	q->mono_mdct_output[i] =	241	q->mono_mdct_output[i] =
200	fixp_mult_su(-q->mono_mdct_output[n + i], sincos_lookup[j]);	242	fixmul31(-q->mono_mdct_output[n + i], (sincos_lookup0[j]));
201	q->mono_mdct_output[n + i] = fixp_mult_su(tmp, sincos_lookup[j+1]);	243
		244	q->mono_mdct_output[n + i] = fixmul31(tmp, (sincos_lookup0[j+1]) );
		245
202	j += step;	246	j += step;
		247
203	} while (++i < n/2);	248	} while (++i < n/2);
		249
204	do {	250	do {
205	FIXP tmp = q->mono_mdct_output[i];	251	FIXP tmp = q->mono_mdct_output[i];
206		252
207	j -= step;	253	j -= step;
208	q->mono_mdct_output[i] =	254	q->mono_mdct_output[i] =
209	fixp_mult_su(-q->mono_mdct_output[n + i], sincos_lookup[j+1]);	255	fixmul31(-q->mono_mdct_output[n + i], (sincos_lookup0[j+1]) );
210	q->mono_mdct_output[n + i] = fixp_mult_su(tmp, sincos_lookup[j]);	256	q->mono_mdct_output[n + i] = fixmul31(tmp, (sincos_lookup0[j]) );
211	} while (++i < n);	257	} while (++i < n);
212	}	258	}
213	#endif	259	#endif
@@ -291,6 +337,7 @@ static inline void output_math(COOKContext q, int16_t out, int chan)
291	int j;	337	int j;
292		338
293	for (j = 0; j < q->samples_per_channel; j++) {	339	for (j = 0; j < q->samples_per_channel; j++) {
294	out[chan + q->nb_channels * j] = fixp_pow2(q->mono_mdct_output[j], -11);	340	out[chan + q->nb_channels * j] =
		341	av_clip(fixp_pow2(q->mono_mdct_output[j], -11), -32768, 32767);
295	}	342	}
296	}	343	}