From f52696ef8a6e46b8379a0b2bc3d0661df3f9312e Mon Sep 17 00:00:00 2001 From: Andree Buschmann Date: Tue, 10 Jun 2008 06:11:50 +0000 Subject: Correct DSP_SET_SAMPLE_DEPTH to 29 for mpc (18.14 fixed point samples). Higher precision for fast 32x32=32 multiplication in dewindowing part of synthesis filter. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17708 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libmusepack/synth_filter.c | 32 ++++++++++++++++++------------ apps/codecs/libmusepack/synth_filter_arm.S | 5 +++-- apps/codecs/mpc.c | 4 +++- 3 files changed, 25 insertions(+), 16 deletions(-) (limited to 'apps') diff --git a/apps/codecs/libmusepack/synth_filter.c b/apps/codecs/libmusepack/synth_filter.c index 44d7192664..ae94741368 100644 --- a/apps/codecs/libmusepack/synth_filter.c +++ b/apps/codecs/libmusepack/synth_filter.c @@ -44,11 +44,15 @@ #if defined(MPC_FIXED_POINT) #if defined(OPTIMIZE_FOR_SPEED) - // round to +/- 2^14 as pre-shift before 32=32x32-multiply + // round at compile time to +/- 2^14 as a pre-shift before 32=32x32-multiply #define D(value) (MPC_SHR_RND(value, 3)) - // round to +/- 2^17 as pre-shift before 32=32x32-multiply - #define MPC_V_PRESHIFT(X) MPC_SHR_RND(X, 14) + // round at runtime to +/- 2^17 as a pre-shift before 32=32x32-multiply + // samples are 18.14 fixed point. 30.2 after this shift, whereas the + // 15.2 bits are significant (not including sign) + #define MPC_V_PRESHIFT(X) MPC_SHR_RND(X, 12) + + // in this configuration a post-shift by >>1 is needed after synthesis #else // saturate to +/- 2^31 (= value << (31-17)), D-values are +/- 2^17 #define D(value) (value << (14)) @@ -65,7 +69,7 @@ #define MPC_V_PRESHIFT(X) (X) #endif -// Di_opt coefficients are +/- 2^17 +// Di_opt coefficients are +/- 2^17 (pre-shifted by <<16) static const MPC_SAMPLE_FORMAT Di_opt [512] ICONST_ATTR = { /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ /* 0 */ D( 0), -D( 29), D( 213), -D( 459), D(2037), -D(5153), D( 6574), -D(37489), D(75038), D(37489), D(6574), D(5153), D(2037), D(459), D(213), D(29), @@ -462,6 +466,7 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7] + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11] + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15]; + *Data >>= 1; // post shift to compensate for pre-shifting Data += 1; // total: 32 * (16 muls, 15 adds) } @@ -493,6 +498,7 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t" "mac.l %%d3, %%a5, %%acc0 \n\t" "movclr.l %%acc0, %%d0 \n\t" + "lsl.l #1, %%d0 \n\t" "move.l %%d0, (%[Data])+ \n" : [Data] "+a" (Data) : [V] "a" (V), [D] "a" (D) @@ -500,16 +506,16 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, } #else // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C - for ( k = 0; k < 32; k++, D += 16, V++ ) + for ( k = 0; k < 32; k++, D += 16, V++ ) { - *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31) - + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31) - + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31) - + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31) - + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31) - + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31) - + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31) - + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31); + *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],30) + MPC_MULTIPLY_EX(V[ 96],D[ 1],30) + + MPC_MULTIPLY_EX(V[128],D[ 2],30) + MPC_MULTIPLY_EX(V[224],D[ 3],30) + + MPC_MULTIPLY_EX(V[256],D[ 4],30) + MPC_MULTIPLY_EX(V[352],D[ 5],30) + + MPC_MULTIPLY_EX(V[384],D[ 6],30) + MPC_MULTIPLY_EX(V[480],D[ 7],30) + + MPC_MULTIPLY_EX(V[512],D[ 8],30) + MPC_MULTIPLY_EX(V[608],D[ 9],30) + + MPC_MULTIPLY_EX(V[640],D[10],30) + MPC_MULTIPLY_EX(V[736],D[11],30) + + MPC_MULTIPLY_EX(V[768],D[12],30) + MPC_MULTIPLY_EX(V[864],D[13],30) + + MPC_MULTIPLY_EX(V[896],D[14],30) + MPC_MULTIPLY_EX(V[992],D[15],30); Data += 1; // total: 16 muls, 15 adds, 16 shifts } diff --git a/apps/codecs/libmusepack/synth_filter_arm.S b/apps/codecs/libmusepack/synth_filter_arm.S index ce668e888c..7b2d2dfd23 100755 --- a/apps/codecs/libmusepack/synth_filter_arm.S +++ b/apps/codecs/libmusepack/synth_filter_arm.S @@ -75,7 +75,8 @@ mpc_decoder_windowing_D: ldr r11, [r1, #896*4] /* 14 */ mla r12, r9, r11, r12 ldr r11, [r1, #992*4] /* 15 */ - mla r12, r10, r11, r12 + mla r12, r10, r11, r12 + mov r12, r12, asr #1 /* post shift to compensate for pre-shifting */ str r12, [r0], #4 /* store Data */ add r1, r1, #4 /* V++ */ @@ -142,7 +143,7 @@ mpc_decoder_windowing_D: smlal r11, r12, r9, r11 ldr r11, [r1, #992*4] /* 15 */ smlal r11, r12, r10, r11 - mov r4, r12, lsl #1 /* get result from hi-part */ + mov r4, r12, lsl #2 /* get result from hi-part, loose 2 bits */ str r4, [r0], #4 /* store Data */ add r1, r1, #4 /* V++ */ diff --git a/apps/codecs/mpc.c b/apps/codecs/mpc.c index 79264d3bfc..bdf675d169 100644 --- a/apps/codecs/mpc.c +++ b/apps/codecs/mpc.c @@ -76,7 +76,9 @@ enum codec_status codec_main(void) mpc_streaminfo info; int retval = CODEC_OK; - ci->configure(DSP_SET_SAMPLE_DEPTH, 28); + /* musepack's sample representation is 18.14 + * DSP_SET_SAMPLE_DEPTH = 14 (FRACT) + 16 (NATIVE) - 1 (SIGN) = 29 */ + ci->configure(DSP_SET_SAMPLE_DEPTH, 29); /* Create a decoder instance */ reader.read = read_impl; -- cgit v1.2.3