From f52696ef8a6e46b8379a0b2bc3d0661df3f9312e Mon Sep 17 00:00:00 2001
From: Andree Buschmann <AndreeBuschmann@t-online.de>
Date: Tue, 10 Jun 2008 06:11:50 +0000
Subject: Correct DSP_SET_SAMPLE_DEPTH to 29 for mpc (18.14 fixed point
 samples). Higher precision for fast 32x32=32 multiplication in dewindowing
 part of synthesis filter.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17708 a1c6a512-1295-4272-9138-f99709370657
---
 apps/codecs/libmusepack/synth_filter.c     | 32 ++++++++++++++++++------------
 apps/codecs/libmusepack/synth_filter_arm.S |  5 +++--
 apps/codecs/mpc.c                          |  4 +++-
 3 files changed, 25 insertions(+), 16 deletions(-)

(limited to 'apps')

diff --git a/apps/codecs/libmusepack/synth_filter.c b/apps/codecs/libmusepack/synth_filter.c
index 44d7192664..ae94741368 100644
--- a/apps/codecs/libmusepack/synth_filter.c
+++ b/apps/codecs/libmusepack/synth_filter.c
@@ -44,11 +44,15 @@
 
 #if defined(MPC_FIXED_POINT)
    #if defined(OPTIMIZE_FOR_SPEED)
-      // round to +/- 2^14 as pre-shift before 32=32x32-multiply
+      // round at compile time to +/- 2^14 as a pre-shift before 32=32x32-multiply
       #define D(value)  (MPC_SHR_RND(value, 3))
       
-      // round to +/- 2^17 as pre-shift before 32=32x32-multiply
-      #define MPC_V_PRESHIFT(X) MPC_SHR_RND(X, 14)
+      // round at runtime to +/- 2^17 as a pre-shift before 32=32x32-multiply
+      // samples are 18.14 fixed point. 30.2 after this shift, whereas the
+      // 15.2 bits are significant (not including sign)
+      #define MPC_V_PRESHIFT(X) MPC_SHR_RND(X, 12)
+      
+      // in this configuration a post-shift by >>1 is needed after synthesis
    #else
       // saturate to +/- 2^31 (= value << (31-17)), D-values are +/- 2^17
       #define D(value)  (value << (14))
@@ -65,7 +69,7 @@
    #define MPC_V_PRESHIFT(X) (X)
 #endif
     
-// Di_opt coefficients are +/- 2^17
+// Di_opt coefficients are +/- 2^17 (pre-shifted by <<16)
 static const MPC_SAMPLE_FORMAT  Di_opt [512] ICONST_ATTR = {
 /*           0        1        2         3         4         5          6          7         8         9       10        11       12       13      14     15  */
 /*  0 */  D( 0), -D( 29), D( 213), -D( 459),  D(2037), -D(5153),  D( 6574), -D(37489), D(75038),  D(37489), D(6574),  D(5153), D(2037),  D(459), D(213), D(29),
@@ -462,6 +466,7 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
                + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7]
                + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11]
                + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15];
+         *Data >>= 1; // post shift to compensate for pre-shifting
          Data += 1;
          // total: 32 * (16 muls, 15 adds)
       }
@@ -493,6 +498,7 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
                "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t"
                "mac.l %%d3, %%a5, %%acc0                     \n\t"
                "movclr.l %%acc0, %%d0                        \n\t"
+               "lsl.l #1, %%d0                               \n\t"
                "move.l %%d0, (%[Data])+                      \n"
                : [Data] "+a" (Data)
                : [V] "a" (V), [D] "a" (D)
@@ -500,16 +506,16 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
          }
       #else
          // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C
-         for ( k = 0; k < 32; k++, D += 16, V++ ) 
+         for ( k = 0; k < 32; k++, D += 16, V++ )
          {
-            *Data = MPC_MULTIPLY_EX(V[  0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31)
-                  + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31)
-                  + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31)
-                  + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31)
-                  + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31)
-                  + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31)
-                  + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31)
-                  + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31);
+            *Data = MPC_MULTIPLY_EX(V[  0],D[ 0],30) + MPC_MULTIPLY_EX(V[ 96],D[ 1],30)
+                  + MPC_MULTIPLY_EX(V[128],D[ 2],30) + MPC_MULTIPLY_EX(V[224],D[ 3],30)
+                  + MPC_MULTIPLY_EX(V[256],D[ 4],30) + MPC_MULTIPLY_EX(V[352],D[ 5],30)
+                  + MPC_MULTIPLY_EX(V[384],D[ 6],30) + MPC_MULTIPLY_EX(V[480],D[ 7],30)
+                  + MPC_MULTIPLY_EX(V[512],D[ 8],30) + MPC_MULTIPLY_EX(V[608],D[ 9],30)
+                  + MPC_MULTIPLY_EX(V[640],D[10],30) + MPC_MULTIPLY_EX(V[736],D[11],30)
+                  + MPC_MULTIPLY_EX(V[768],D[12],30) + MPC_MULTIPLY_EX(V[864],D[13],30)
+                  + MPC_MULTIPLY_EX(V[896],D[14],30) + MPC_MULTIPLY_EX(V[992],D[15],30);
             Data += 1;
             // total: 16 muls, 15 adds, 16 shifts
          }
diff --git a/apps/codecs/libmusepack/synth_filter_arm.S b/apps/codecs/libmusepack/synth_filter_arm.S
index ce668e888c..7b2d2dfd23 100755
--- a/apps/codecs/libmusepack/synth_filter_arm.S
+++ b/apps/codecs/libmusepack/synth_filter_arm.S
@@ -75,7 +75,8 @@ mpc_decoder_windowing_D:
     ldr r11, [r1, #896*4]   /* 14 */
     mla r12, r9, r11, r12     
     ldr r11, [r1, #992*4]   /* 15 */
-    mla r12, r10, r11, r12     
+    mla r12, r10, r11, r12
+    mov r12, r12, asr #1    /* post shift to compensate for pre-shifting */
     str r12, [r0], #4       /* store Data */
     add r1, r1, #4          /* V++ */
     
@@ -142,7 +143,7 @@ mpc_decoder_windowing_D:
     smlal r11, r12, r9, r11  
     ldr r11, [r1, #992*4]   /* 15 */
     smlal r11, r12, r10, r11
-    mov r4, r12, lsl #1     /* get result from hi-part */
+    mov r4, r12, lsl #2     /* get result from hi-part, loose 2 bits */
     str r4, [r0], #4        /* store Data */
     add r1, r1, #4          /* V++ */
     
diff --git a/apps/codecs/mpc.c b/apps/codecs/mpc.c
index 79264d3bfc..bdf675d169 100644
--- a/apps/codecs/mpc.c
+++ b/apps/codecs/mpc.c
@@ -76,7 +76,9 @@ enum codec_status codec_main(void)
     mpc_streaminfo info;
     int retval = CODEC_OK;
     
-    ci->configure(DSP_SET_SAMPLE_DEPTH, 28);
+    /* musepack's sample representation is 18.14
+     * DSP_SET_SAMPLE_DEPTH = 14 (FRACT) + 16 (NATIVE) - 1 (SIGN) = 29 */
+    ci->configure(DSP_SET_SAMPLE_DEPTH, 29);
     
     /* Create a decoder instance */
     reader.read = read_impl;
-- 
cgit v1.2.3