libmusepack: ARMv6 assembler for mpc_decoder_windowing_D, speeds up decoding of 128kbps sample file 2MHz, or 8%, on gigabeat S. The output difference to the c implementation and the other ARM implementation is +/-1 in less than 0.1% of the output samples.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28487 a1c6a512-1295-4272-9138-f99709370657
author: Nils Wallménius <nils@rockbox.org> 2010-11-05 11:20:50 +0000
committer: Nils Wallménius <nils@rockbox.org> 2010-11-05 11:20:50 +0000
commit: dbdc0a8a8cbfa4e6b72e5f6fb643f5b0ef4afc27 (patch)
tree: 7f18e2158a33462af7eae0d068e437be01ce5805 /apps/codecs/libmusepack/synth_filter.c
parent: 8404c53ee6d2f828fb7ea2b0713d2cd0afcfeeca (diff)
download: rockbox-dbdc0a8a8cbfa4e6b72e5f6fb643f5b0ef4afc27.tar.gz
rockbox-dbdc0a8a8cbfa4e6b72e5f6fb643f5b0ef4afc27.zip
1 files changed, 10 insertions, 4 deletions
diff --git a/apps/codecs/libmusepack/synth_filter.c b/apps/codecs/libmusepack/synth_filter.c
index 9a79328106..94c57eb213 100644
--- a/apps/codecs/libmusepack/synth_filter.c
+++ b/apps/codecs/libmusepack/synth_filter.c
@@ -46,10 +46,16 @@
 #if defined(MPC_FIXED_POINT)
    #if defined(CPU_ARM)
-      // do not up-scale D-values to achieve higher speed in smull/mlal
+      #if ARM_ARCH >= 6
-      // operations. saves ~14/8 = 1.75 cycles per multiplication
+        // on ARMv6 we use 32*32=64>>32 multiplies (smmul/smmla) so we need to scale up the D coefficients
-      #define D(value)  (value)
+        // the ARM11 multiplier doesn't have early termination so the magnitude of the multiplicands does not
-      
+        // matter for speed.
+        #define D(value)  (value << (14))
+      #else
+        // do not up-scale D-values to achieve higher speed in smull/mlal
+        // operations. saves ~14/8 = 1.75 cycles per multiplication
+        #define D(value)  (value)
+      #endif
      // in this configuration a post-shift by >>16 is needed after synthesis
    #else
      // saturate to +/- 2^31 (= value << (31-17)), D-values are +/- 2^17
author	Nils Wallménius <nils@rockbox.org>	2010-11-05 11:20:50 +0000
committer	Nils Wallménius <nils@rockbox.org>	2010-11-05 11:20:50 +0000
commit	dbdc0a8a8cbfa4e6b72e5f6fb643f5b0ef4afc27 (patch)
tree	7f18e2158a33462af7eae0d068e437be01ce5805 /apps/codecs/libmusepack/synth_filter.c
parent	8404c53ee6d2f828fb7ea2b0713d2cd0afcfeeca (diff)
download	rockbox-dbdc0a8a8cbfa4e6b72e5f6fb643f5b0ef4afc27.tar.gz rockbox-dbdc0a8a8cbfa4e6b72e5f6fb643f5b0ef4afc27.zip

diff --git a/apps/codecs/libmusepack/synth_filter.c b/apps/codecs/libmusepack/synth_filter.c index 9a79328106..94c57eb213 100644 --- a/apps/codecs/libmusepack/synth_filter.c +++ b/apps/codecs/libmusepack/synth_filter.c
@@ -46,10 +46,16 @@
46		46
47	#if defined(MPC_FIXED_POINT)	47	#if defined(MPC_FIXED_POINT)
48	#if defined(CPU_ARM)	48	#if defined(CPU_ARM)
49	// do not up-scale D-values to achieve higher speed in smull/mlal	49	#if ARM_ARCH >= 6
50	// operations. saves ~14/8 = 1.75 cycles per multiplication	50	// on ARMv6 we use 32*32=64>>32 multiplies (smmul/smmla) so we need to scale up the D coefficients
51	#define D(value) (value)	51	// the ARM11 multiplier doesn't have early termination so the magnitude of the multiplicands does not
52		52	// matter for speed.
		53	#define D(value) (value << (14))
		54	#else
		55	// do not up-scale D-values to achieve higher speed in smull/mlal
		56	// operations. saves ~14/8 = 1.75 cycles per multiplication
		57	#define D(value) (value)
		58	#endif
53	// in this configuration a post-shift by >>16 is needed after synthesis	59	// in this configuration a post-shift by >>16 is needed after synthesis
54	#else	60	#else
55	// saturate to +/- 2^31 (= value << (31-17)), D-values are +/- 2^17	61	// saturate to +/- 2^31 (= value << (31-17)), D-values are +/- 2^17