summaryrefslogtreecommitdiff
path: root/apps/codecs
diff options
context:
space:
mode:
authorThom Johansen <thomj@rockbox.org>2007-08-30 11:41:05 +0000
committerThom Johansen <thomj@rockbox.org>2007-08-30 11:41:05 +0000
commit953348f1ba4845984980ad71b05c17d4ded84b30 (patch)
tree728605bcacc89b221e71065d0d092d1b06132799 /apps/codecs
parentc7b3f8be9b1ea505463c3d8ba17740af36c6df2c (diff)
downloadrockbox-953348f1ba4845984980ad71b05c17d4ded84b30.tar.gz
rockbox-953348f1ba4845984980ad71b05c17d4ded84b30.zip
FS #7166. Musepack optimizations and cleanups. Huge speedup on ARM. Note that all ARM (Ipod/Sansa/Gigabeat) targets will now play back Musepack files with somewhat reduced accuracy. Please get in touch via forums or Flyspray if you've got sample files where this is very apparent. Most users should hear no difference.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@14531 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
-rw-r--r--apps/codecs/libmusepack/Makefile8
-rw-r--r--apps/codecs/libmusepack/math.h350
-rw-r--r--apps/codecs/libmusepack/requant.c4
-rw-r--r--apps/codecs/libmusepack/synth_filter.c874
-rw-r--r--apps/codecs/mpc.c4
5 files changed, 723 insertions, 517 deletions
diff --git a/apps/codecs/libmusepack/Makefile b/apps/codecs/libmusepack/Makefile
index e50769719b..a70d33644a 100644
--- a/apps/codecs/libmusepack/Makefile
+++ b/apps/codecs/libmusepack/Makefile
@@ -14,7 +14,13 @@ ifdef APPEXTRA
14 INCLUDES += $(patsubst %,-I$(APPSDIR)/%,$(subst :, ,$(APPEXTRA))) 14 INCLUDES += $(patsubst %,-I$(APPSDIR)/%,$(subst :, ,$(APPEXTRA)))
15endif 15endif
16 16
17MUSEPACKOPTS = -O2 17# libmusepack is faster on ARM-targets with -O1 instead of -O2
18ifeq ($(CPU),arm)
19 MUSEPACKOPTS += -O1
20else
21 MUSEPACKOPTS += -O2
22endif
23
18CFLAGS = $(INCLUDES) $(GCCOPTS) $(TARGET_INC) $(MUSEPACKOPTS) $(TARGET) \ 24CFLAGS = $(INCLUDES) $(GCCOPTS) $(TARGET_INC) $(MUSEPACKOPTS) $(TARGET) \
19$(EXTRA_DEFINES) -DMEM=${MEMORYSIZE} $(PROFILE_OPTS) 25$(EXTRA_DEFINES) -DMEM=${MEMORYSIZE} $(PROFILE_OPTS)
20 26
diff --git a/apps/codecs/libmusepack/math.h b/apps/codecs/libmusepack/math.h
index f9e9d08b08..a015d45cbb 100644
--- a/apps/codecs/libmusepack/math.h
+++ b/apps/codecs/libmusepack/math.h
@@ -44,174 +44,192 @@
44 44
45#ifdef MPC_FIXED_POINT 45#ifdef MPC_FIXED_POINT
46 46
47 #ifdef _WIN32_WCE
48 #include <cmnintrin.h>
49 #define MPC_HAVE_MULHIGH
50 #endif
51
52 #define MPC_FIXED_POINT_SCALE_SHIFT (MPC_FIXED_POINT_SHIFT + MPC_FIXED_POINT_FRACTPART)
53 #define MPC_FIXED_POINT_SCALE (1 << (MPC_FIXED_POINT_SCALE_SHIFT - 1))
54 //in fixedpoint mode, results in decode output buffer are in -MPC_FIXED_POINT_SCALE ... MPC_FIXED_POINT_SCALE range
55
56 #define MPC_FIXED_POINT_FRACTPART 14
57 typedef mpc_int32_t MPC_SAMPLE_FORMAT;
58 typedef mpc_int64_t MPC_SAMPLE_FORMAT_MULTIPLY;
59
60 #define MAKE_MPC_SAMPLE(X) (MPC_SAMPLE_FORMAT)((double)(X) * (double)(((mpc_int64_t)1)<<MPC_FIXED_POINT_FRACTPART))
61 #define MAKE_MPC_SAMPLE_EX(X,Y) (MPC_SAMPLE_FORMAT)((double)(X) * (double)(((mpc_int64_t)1)<<(Y)))
62
63 #define MPC_SHR_RND(X, Y) ((X+(1<<(Y-1)))>>Y)
64
65 #if defined(CPU_COLDFIRE)
66
67 #define MPC_MULTIPLY(X,Y) mpc_multiply((X), (Y))
68 #define MPC_MULTIPLY_EX(X,Y,Z) mpc_multiply_ex((X), (Y), (Z))
69
70 static inline MPC_SAMPLE_FORMAT mpc_multiply(MPC_SAMPLE_FORMAT x,
71 MPC_SAMPLE_FORMAT y)
72 {
73 MPC_SAMPLE_FORMAT t1, t2;
74 asm volatile (
75 "mac.l %[x],%[y],%%acc0\n" /* multiply */
76 "mulu.l %[y],%[x] \n" /* get lower half, avoid emac stall */
77 "movclr.l %%acc0,%[t1] \n" /* get higher half */
78 "moveq.l #17,%[t2] \n"
79 "asl.l %[t2],%[t1] \n" /* hi <<= 17, plus one free */
80 "moveq.l #14,%[t2] \n"
81 "lsr.l %[t2],%[x] \n" /* (unsigned)lo >>= 14 */
82 "or.l %[x],%[t1] \n" /* combine result */
83 : /* outputs */
84 [t1]"=&d"(t1),
85 [t2]"=&d"(t2),
86 [x] "+d" (x)
87 : /* inputs */
88 [y] "d" (y)
89 );
90 return t1;
91 }
92
93 static inline MPC_SAMPLE_FORMAT mpc_multiply_ex(MPC_SAMPLE_FORMAT x,
94 MPC_SAMPLE_FORMAT y,
95 unsigned shift)
96 {
97 MPC_SAMPLE_FORMAT t1, t2;
98 asm volatile (
99 "mac.l %[x],%[y],%%acc0\n" /* multiply */
100 "mulu.l %[y],%[x] \n" /* get lower half, avoid emac stall */
101 "movclr.l %%acc0,%[t1] \n" /* get higher half */
102 "moveq.l #31,%[t2] \n"
103 "sub.l %[sh],%[t2] \n" /* t2 = 31 - shift */
104 "ble.s 1f \n"
105 "asl.l %[t2],%[t1] \n" /* hi <<= 31 - shift */
106 "lsr.l %[sh],%[x] \n" /* (unsigned)lo >>= shift */
107 "or.l %[x],%[t1] \n" /* combine result */
108 "bra.s 2f \n"
109 "1: \n"
110 "neg.l %[t2] \n" /* t2 = shift - 31 */
111 "asr.l %[t2],%[t1] \n" /* hi >>= t2 */
112 "2: \n"
113 : /* outputs */
114 [t1]"=&d"(t1),
115 [t2]"=&d"(t2),
116 [x] "+d" (x)
117 : /* inputs */
118 [y] "d" (y),
119 [sh]"d" (shift)
120 );
121 return t1;
122 }
123 #elif defined(CPU_ARM)
124 // borrowed and adapted from libMAD
125 #define MPC_MULTIPLY(X,Y) \
126 ({ \
127 MPC_SAMPLE_FORMAT low; \
128 MPC_SAMPLE_FORMAT high; \
129 asm volatile ( /* will calculate: result = (X*Y)>>14 */ \
130 "smull %0,%1,%2,%3 \n\t" /* multiply with result %0 [0..31], %1 [32..63] */ \
131 "mov %0, %0, lsr #14 \n\t" /* %0 = %0 >> 14 */ \
132 "orr %0, %0, %1, lsl #18 \n\t"/* result = %0 OR (%1 << 18) */ \
133 : "=&r"(low), "=&r" (high) \
134 : "r"(X),"r"(Y)); \
135 low; \
136 })
137
138 // borrowed and adapted from libMAD
139 #define MPC_MULTIPLY_EX(X,Y,Z) \
140 ({ \
141 MPC_SAMPLE_FORMAT low; \
142 MPC_SAMPLE_FORMAT high; \
143 asm volatile ( /* will calculate: result = (X*Y)>>Z */ \
144 "smull %0,%1,%2,%3 \n\t" /* multiply with result %0 [0..31], %1 [32..63] */ \
145 "mov %0, %0, lsr %4 \n\t" /* %0 = %0 >> Z */ \
146 "orr %0, %0, %1, lsl %5 \n\t" /* result = %0 OR (%1 << (32-Z)) */ \
147 : "=&r"(low), "=&r" (high) \
148 : "r"(X),"r"(Y),"r"(Z),"r"(32-Z)); \
149 low; \
150 })
151 #else /* libmusepack standard */
152
153 #define MPC_MULTIPLY_NOTRUNCATE(X,Y) \
154 (((MPC_SAMPLE_FORMAT_MULTIPLY)(X) * (MPC_SAMPLE_FORMAT_MULTIPLY)(Y)) >> MPC_FIXED_POINT_FRACTPART)
155
156 #define MPC_MULTIPLY_EX_NOTRUNCATE(X,Y,Z) \
157 (((MPC_SAMPLE_FORMAT_MULTIPLY)(X) * (MPC_SAMPLE_FORMAT_MULTIPLY)(Y)) >> (Z))
158
159 #ifdef _DEBUG
160 static inline MPC_SAMPLE_FORMAT MPC_MULTIPLY(MPC_SAMPLE_FORMAT item1,MPC_SAMPLE_FORMAT item2)
161 {
162 MPC_SAMPLE_FORMAT_MULTIPLY temp = MPC_MULTIPLY_NOTRUNCATE(item1,item2);
163 assert(temp == (MPC_SAMPLE_FORMAT_MULTIPLY)(MPC_SAMPLE_FORMAT)temp);
164 return (MPC_SAMPLE_FORMAT)temp;
165 }
166
167 static inline MPC_SAMPLE_FORMAT MPC_MULTIPLY_EX(MPC_SAMPLE_FORMAT item1,MPC_SAMPLE_FORMAT item2,unsigned shift)
168 {
169 MPC_SAMPLE_FORMAT_MULTIPLY temp = MPC_MULTIPLY_EX_NOTRUNCATE(item1,item2,shift);
170 assert(temp == (MPC_SAMPLE_FORMAT_MULTIPLY)(MPC_SAMPLE_FORMAT)temp);
171 return (MPC_SAMPLE_FORMAT)temp;
172 }
173 #else
174 #define MPC_MULTIPLY(X,Y) ((MPC_SAMPLE_FORMAT)MPC_MULTIPLY_NOTRUNCATE(X,Y))
175 #define MPC_MULTIPLY_EX(X,Y,Z) ((MPC_SAMPLE_FORMAT)MPC_MULTIPLY_EX_NOTRUNCATE(X,Y,Z))
176 #endif
177
178 #endif
179
180 #ifdef MPC_HAVE_MULHIGH
181 #define MPC_MULTIPLY_FRACT(X,Y) _MulHigh(X,Y)
182 #else
183 #if defined(CPU_COLDFIRE)
184 /* loses one bit of accuracy. The rest of the macros won't be as easy as this... */
185 #define MPC_MULTIPLY_FRACT(X,Y) \
186 ({ \
187 MPC_SAMPLE_FORMAT t; \
188 asm volatile ( \
189 "mac.l %[A], %[B], %%acc0\n\t" \
190 "movclr.l %%acc0, %[t]\n\t" \
191 "asr.l #1, %[t]\n\t" \
192 : [t] "=d" (t) \
193 : [A] "r" ((X)), [B] "r" ((Y))); \
194 t; \
195 })
196 #elif defined(CPU_ARM)
197 // borrowed and adapted from libMAD
198 #define MPC_MULTIPLY_FRACT(X,Y) \
199 ({ \
200 MPC_SAMPLE_FORMAT low; \
201 MPC_SAMPLE_FORMAT high; \
202 asm volatile ( /* will calculate: result = (X*Y)>>32 */ \
203 "smull %0,%1,%2,%3 \n\t" /* multiply with result %0 [0..31], %1 [32..63] */ \
204 : "=&r"(low), "=&r" (high) /* result = %1 [32..63], saves the >>32 */ \
205 : "r"(X),"r"(Y)); \
206 high; \
207 })
208 #else
209 #define MPC_MULTIPLY_FRACT(X,Y) MPC_MULTIPLY_EX(X,Y,32)
210 #endif
211 #endif
212
213 #define MPC_MAKE_FRACT_CONST(X) (MPC_SAMPLE_FORMAT)((X) * (double)(((mpc_int64_t)1)<<32) )
214
215 #define MPC_MULTIPLY_FLOAT_INT(X,Y) ((X)*(Y))
47 216
48#ifdef _WIN32_WCE
49
50#include <cmnintrin.h>
51
52#define MPC_HAVE_MULHIGH
53
54#endif
55
56
57#define MPC_FIXED_POINT_SCALE_SHIFT (MPC_FIXED_POINT_SHIFT + MPC_FIXED_POINT_FRACTPART)
58#define MPC_FIXED_POINT_SCALE (1 << (MPC_FIXED_POINT_SCALE_SHIFT - 1))
59
60
61//in fixedpoint mode, results in decode output buffer are in -MPC_FIXED_POINT_SCALE ... MPC_FIXED_POINT_SCALE range
62
63#define MPC_FIXED_POINT_FRACTPART 14
64typedef mpc_int32_t MPC_SAMPLE_FORMAT;
65
66typedef mpc_int64_t MPC_SAMPLE_FORMAT_MULTIPLY;
67
68#define MAKE_MPC_SAMPLE(X) (MPC_SAMPLE_FORMAT)((double)(X) * (double)(((mpc_int64_t)1)<<MPC_FIXED_POINT_FRACTPART))
69#define MAKE_MPC_SAMPLE_EX(X,Y) (MPC_SAMPLE_FORMAT)((double)(X) * (double)(((mpc_int64_t)1)<<(Y)))
70
71#if defined(CPU_COLDFIRE)
72
73#define MPC_MULTIPLY(X,Y) mpc_multiply((X), (Y))
74#define MPC_MULTIPLY_EX(X,Y,Z) mpc_multiply_ex((X), (Y), (Z))
75
76static inline MPC_SAMPLE_FORMAT mpc_multiply(MPC_SAMPLE_FORMAT x,
77 MPC_SAMPLE_FORMAT y)
78{
79 MPC_SAMPLE_FORMAT t1, t2;
80 asm volatile (
81 "mac.l %[x],%[y],%%acc0\n" /* multiply */
82 "mulu.l %[y],%[x] \n" /* get lower half, avoid emac stall */
83 "movclr.l %%acc0,%[t1] \n" /* get higher half */
84 "moveq.l #17,%[t2] \n"
85 "asl.l %[t2],%[t1] \n" /* hi <<= 17, plus one free */
86 "moveq.l #14,%[t2] \n"
87 "lsr.l %[t2],%[x] \n" /* (unsigned)lo >>= 14 */
88 "or.l %[x],%[t1] \n" /* combine result */
89 : /* outputs */
90 [t1]"=&d"(t1),
91 [t2]"=&d"(t2),
92 [x] "+d" (x)
93 : /* inputs */
94 [y] "d" (y)
95 );
96 return t1;
97}
98
99static inline MPC_SAMPLE_FORMAT mpc_multiply_ex(MPC_SAMPLE_FORMAT x,
100 MPC_SAMPLE_FORMAT y,
101 unsigned shift)
102{
103 MPC_SAMPLE_FORMAT t1, t2;
104 asm volatile (
105 "mac.l %[x],%[y],%%acc0\n" /* multiply */
106 "mulu.l %[y],%[x] \n" /* get lower half, avoid emac stall */
107 "movclr.l %%acc0,%[t1] \n" /* get higher half */
108 "moveq.l #31,%[t2] \n"
109 "sub.l %[sh],%[t2] \n" /* t2 = 31 - shift */
110 "ble.s 1f \n"
111 "asl.l %[t2],%[t1] \n" /* hi <<= 31 - shift */
112 "lsr.l %[sh],%[x] \n" /* (unsigned)lo >>= shift */
113 "or.l %[x],%[t1] \n" /* combine result */
114 "bra.s 2f \n"
115 "1: \n"
116 "neg.l %[t2] \n" /* t2 = shift - 31 */
117 "asr.l %[t2],%[t1] \n" /* hi >>= t2 */
118 "2: \n"
119 : /* outputs */
120 [t1]"=&d"(t1),
121 [t2]"=&d"(t2),
122 [x] "+d" (x)
123 : /* inputs */
124 [y] "d" (y),
125 [sh]"d" (shift)
126 );
127 return t1;
128}
129#else /* libmusepack standard */
130
131#define MPC_MULTIPLY_NOTRUNCATE(X,Y) \
132 (((MPC_SAMPLE_FORMAT_MULTIPLY)(X) * (MPC_SAMPLE_FORMAT_MULTIPLY)(Y)) >> MPC_FIXED_POINT_FRACTPART)
133
134#define MPC_MULTIPLY_EX_NOTRUNCATE(X,Y,Z) \
135 (((MPC_SAMPLE_FORMAT_MULTIPLY)(X) * (MPC_SAMPLE_FORMAT_MULTIPLY)(Y)) >> (Z))
136
137#ifdef _DEBUG
138static inline MPC_SAMPLE_FORMAT MPC_MULTIPLY(MPC_SAMPLE_FORMAT item1,MPC_SAMPLE_FORMAT item2)
139{
140 MPC_SAMPLE_FORMAT_MULTIPLY temp = MPC_MULTIPLY_NOTRUNCATE(item1,item2);
141 assert(temp == (MPC_SAMPLE_FORMAT_MULTIPLY)(MPC_SAMPLE_FORMAT)temp);
142 return (MPC_SAMPLE_FORMAT)temp;
143}
144
145static inline MPC_SAMPLE_FORMAT MPC_MULTIPLY_EX(MPC_SAMPLE_FORMAT item1,MPC_SAMPLE_FORMAT item2,unsigned shift)
146{
147 MPC_SAMPLE_FORMAT_MULTIPLY temp = MPC_MULTIPLY_EX_NOTRUNCATE(item1,item2,shift);
148 assert(temp == (MPC_SAMPLE_FORMAT_MULTIPLY)(MPC_SAMPLE_FORMAT)temp);
149 return (MPC_SAMPLE_FORMAT)temp;
150}
151#else
152#define MPC_MULTIPLY(X,Y) ((MPC_SAMPLE_FORMAT)MPC_MULTIPLY_NOTRUNCATE(X,Y))
153#define MPC_MULTIPLY_EX(X,Y,Z) ((MPC_SAMPLE_FORMAT)MPC_MULTIPLY_EX_NOTRUNCATE(X,Y,Z))
154#endif
155
156#endif
157
158#ifdef MPC_HAVE_MULHIGH
159#define MPC_MULTIPLY_FRACT(X,Y) _MulHigh(X,Y)
160#else
161#if defined(CPU_COLDFIRE)
162/* loses one bit of accuracy.
163 the rest of the macros won't be as easy as this... */
164#define MPC_MULTIPLY_FRACT(X,Y) \
165 ({ \
166 MPC_SAMPLE_FORMAT t; \
167 asm volatile ( \
168 "mac.l %[A], %[B], %%acc0\n\t" \
169 "movclr.l %%acc0, %[t]\n\t" \
170 "asr.l #1, %[t]\n\t" \
171 : [t] "=d" (t) \
172 : [A] "r" ((X)), [B] "r" ((Y))); \
173 t; \
174 })
175#else 217#else
176#define MPC_MULTIPLY_FRACT(X,Y) MPC_MULTIPLY_EX(X,Y,32) 218 //in floating-point mode, decoded samples are in -1...1 range
177#endif 219
178#endif 220 typedef float MPC_SAMPLE_FORMAT;
179 221
180#define MPC_MAKE_FRACT_CONST(X) (MPC_SAMPLE_FORMAT)((X) * (double)(((mpc_int64_t)1)<<32) ) 222 #define MAKE_MPC_SAMPLE(X) ((MPC_SAMPLE_FORMAT)(X))
181#define MPC_MULTIPLY_FRACT_CONST(X,Y) MPC_MULTIPLY_FRACT(X,MPC_MAKE_FRACT_CONST(Y)) 223 #define MAKE_MPC_SAMPLE_EX(X,Y) ((MPC_SAMPLE_FORMAT)(X))
182#define MPC_MULTIPLY_FRACT_CONST_FIX(X,Y,Z) ( MPC_MULTIPLY_FRACT(X,MPC_MAKE_FRACT_CONST( Y / (1<<(Z)) )) << (Z) ) 224
183#define MPC_MULTIPLY_FRACT_CONST_SHR(X,Y,Z) MPC_MULTIPLY_FRACT(X,MPC_MAKE_FRACT_CONST( Y / (1<<(Z)) )) 225 #define MPC_MULTIPLY_FRACT(X,Y) ((X)*(Y))
184 226 #define MPC_MAKE_FRACT_CONST(X) (X)
185#define MPC_MULTIPLY_FLOAT_INT(X,Y) ((X)*(Y)) 227
186#define MPC_SCALE_CONST(X,Y,Z) MPC_MULTIPLY_EX(X,MAKE_MPC_SAMPLE_EX(Y,Z),(Z)) 228 #define MPC_MULTIPLY_FLOAT_INT(X,Y) ((X)*(Y))
187#define MPC_SCALE_CONST_SHL(X,Y,Z,S) MPC_MULTIPLY_EX(X,MAKE_MPC_SAMPLE_EX(Y,Z),(Z)-(S)) 229 #define MPC_MULTIPLY(X,Y) ((X)*(Y))
188#define MPC_SCALE_CONST_SHR(X,Y,Z,S) MPC_MULTIPLY_EX(X,MAKE_MPC_SAMPLE_EX(Y,Z),(Z)+(S)) 230 #define MPC_MULTIPLY_EX(X,Y,Z) ((X)*(Y))
189#define MPC_SHR(X,Y) ((X)>>(Y)) 231
190#define MPC_SHL(X,Y) ((X)<<(Y)) 232 #define MPC_SHR_RND(X, Y) (X)
191
192#else
193
194//in floating-point mode, decoded samples are in -1...1 range
195
196typedef float MPC_SAMPLE_FORMAT;
197
198#define MAKE_MPC_SAMPLE(X) ((MPC_SAMPLE_FORMAT)(X))
199#define MAKE_MPC_SAMPLE_EX(X,Y) ((MPC_SAMPLE_FORMAT)(X))
200
201#define MPC_MULTIPLY_FRACT(X,Y) ((X)*(Y))
202#define MPC_MAKE_FRACT_CONST(X) (X)
203#define MPC_MULTIPLY_FRACT_CONST(X,Y) MPC_MULTPLY_FRACT(X,MPC_MAKE_FRACT_CONST(Y))
204#define MPC_MULTIPLY_FRACT_CONST_SHR(X,Y,Z) MPC_MULTIPLY_FRACT(X,MPC_MAKE_FRACT_CONST( Y ))
205#define MPC_MULTIPLY_FRACT_CONST_FIX(X,Y,Z) MPC_MULTIPLY_FRACT(X,MPC_MAKE_FRACT_CONST( Y ))
206
207#define MPC_MULTIPLY_FLOAT_INT(X,Y) ((X)*(Y))
208#define MPC_MULTIPLY(X,Y) ((X)*(Y))
209#define MPC_MULTIPLY_EX(X,Y,Z) ((X)*(Y))
210#define MPC_SCALE_CONST(X,Y,Z) ((X)*(Y))
211#define MPC_SCALE_CONST_SHL(X,Y,Z,S) ((X)*(Y))
212#define MPC_SCALE_CONST_SHR(X,Y,Z,S) ((X)*(Y))
213#define MPC_SHR(X,Y) (X)
214#define MPC_SHL(X,Y) (X)
215 233
216#endif 234#endif
217 235
diff --git a/apps/codecs/libmusepack/requant.c b/apps/codecs/libmusepack/requant.c
index a72a165786..6b77585d32 100644
--- a/apps/codecs/libmusepack/requant.c
+++ b/apps/codecs/libmusepack/requant.c
@@ -53,8 +53,8 @@ const mpc_uint32_t Res_bit [18] = {
53const MPC_SAMPLE_FORMAT __Cc [1 + 18] = { 53const MPC_SAMPLE_FORMAT __Cc [1 + 18] = {
54 _(111.285962475327f), // 32768/2/255*sqrt(3) 54 _(111.285962475327f), // 32768/2/255*sqrt(3)
55 _(65536.000000000000f), _(21845.333333333332f), _(13107.200000000001f), _(9362.285714285713f), 55 _(65536.000000000000f), _(21845.333333333332f), _(13107.200000000001f), _(9362.285714285713f),
56 _(7281.777777777777f), _(4369.066666666666f), _(2114.064516129032f), _(1040.253968253968f), 56 _(7281.777777777777f), _(4369.066666666666f), _(2114.064516129032f), _(1040.253968253968f),
57 _(516.031496062992f), _(257.003921568627f), _(128.250489236790f), _(64.062561094819f), 57 _(516.031496062992f), _(257.003921568627f), _(128.250489236790f), _(64.062561094819f),
58 _(32.015632633121f), _(16.003907203907f), _(8.000976681723f), _(4.000244155527f), 58 _(32.015632633121f), _(16.003907203907f), _(8.000976681723f), _(4.000244155527f),
59 _(2.000061037018f), _(1.000015259021f) 59 _(2.000061037018f), _(1.000015259021f)
60}; 60};
diff --git a/apps/codecs/libmusepack/synth_filter.c b/apps/codecs/libmusepack/synth_filter.c
index 0dabe59150..eca23804c5 100644
--- a/apps/codecs/libmusepack/synth_filter.c
+++ b/apps/codecs/libmusepack/synth_filter.c
@@ -39,19 +39,51 @@
39#include "musepack.h" 39#include "musepack.h"
40#include "internal.h" 40#include "internal.h"
41 41
42/* S E T T I N G S */
43// choose speed vs. accuracy for MPC_FIXED_POINT
44// speed-setting will increase decoding speed on ARM only (+20%), loss of accuracy equals about 5 dB SNR (15bit output precision)
45// to not use the speed-optimization -> comment OPTIMIZE_FOR_SPEED
46#if defined(MPC_FIXED_POINT)
47 #if defined(CPU_COLDFIRE)
48 // do nothing
49 #elif defined(CPU_ARM)
50 #define OPTIMIZE_FOR_SPEED
51 #else
52 #define OPTIMIZE_FOR_SPEED
53 #endif
54#else
55 // do nothing
56#endif
57
42/* C O N S T A N T S */ 58/* C O N S T A N T S */
43#undef _ 59#undef _
44 60
45#define MPC_FIXED_POINT_SYNTH_FIX 2 61#if defined(MPC_FIXED_POINT)
46 62 #if defined(OPTIMIZE_FOR_SPEED)
47#ifdef MPC_FIXED_POINT 63 // round to +/- 2^14 as pre-shift before 32=32x32-multiply
48#define _(value) MPC_MAKE_FRACT_CONST((double)value/(double)(0x40000)) 64 #define _(value) (MPC_SHR_RND(value, 3))
65
66 // round to +/- 2^17 as pre-shift before 32=32x32-multiply
67 #define MPC_V_PRESHIFT(X) MPC_SHR_RND(X, 14)
68 #else
69 // saturate to +/- 2^31 (= value << (31-17)), D-values are +/- 2^17
70 #define _(value) (value << (14))
71
72 // do not perform pre-shift
73 #define MPC_V_PRESHIFT(X) (X)
74 #endif
49#else 75#else
50#define _(value) MAKE_MPC_SAMPLE((double)value/(double)(0x10000)) 76 // IMPORTANT: internal scaling is somehow strange for floating point, therefore we scale the coefficients Di_opt
77 // by the correct amount to have proper scaled output
78 #define _(value) MAKE_MPC_SAMPLE((double)value*(double)(0x1000))
79
80 // do not perform pre-shift
81 #define MPC_V_PRESHIFT(X) (X)
51#endif 82#endif
52 83
53 84// Di_opt coefficients are +/- 2^17
54static const MPC_SAMPLE_FORMAT Di_opt [32] [16] ICONST_ATTR = { 85static const MPC_SAMPLE_FORMAT Di_opt [32] [16] ICONST_ATTR = {
86 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
55 { _( 0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _( 6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29) }, 87 { _( 0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _( 6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29) },
56 { _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _( 5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26) }, 88 { _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _( 5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26) },
57 { _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _( 5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24) }, 89 { _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _( 5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24) },
@@ -88,363 +120,513 @@ static const MPC_SAMPLE_FORMAT Di_opt [32] [16] ICONST_ATTR = {
88 120
89#undef _ 121#undef _
90 122
91static void Calculate_New_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V ) 123// V-coefficients were expanded (<<) by V_COEFFICIENT_EXPAND
124#define V_COEFFICIENT_EXPAND 27
125
126#if defined(MPC_FIXED_POINT)
127 #if defined(OPTIMIZE_FOR_SPEED)
128 // define 32=32x32-multiplication for DCT-coefficients with samples, vcoef will be pre-shifted on creation
129 // samples are rounded to +/- 2^19 as pre-shift before 32=32x32-multiply
130 #define MPC_MULTIPLY_V(sample, vcoef) ( MPC_SHR_RND(sample, 12) * vcoef )
131
132 // round to +/- 2^16 as pre-shift before 32=32x32-multiply
133 #define MPC_MAKE_INVCOS(value) (MPC_SHR_RND(value, 15))
134 #else
135 // define 64=32x32-multiplication for DCT-coefficients with samples. Via usage of MPC_FRACT highly optimized assembler might be used
136 // MULTIPLY_FRACT will do >>32 after multiplication, as V-coef were expanded by V_COEFFICIENT_EXPAND we'll correct this on the result.
137 // Will loose 5bit accuracy on result in fract part without effect on final audio result
138 #define MPC_MULTIPLY_V(sample, vcoef) ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND) )
139
140 // directly use accurate 32bit-coefficients
141 #define MPC_MAKE_INVCOS(value) (value)
142 #endif
143#else
144 // for floating point use the standard multiplication macro
145 #define MPC_MULTIPLY_V(sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
146
147 // downscale the accurate 32bit-coefficients and convert to float
148 #define MPC_MAKE_INVCOS(value) MAKE_MPC_SAMPLE((double)value/(double)(1<<V_COEFFICIENT_EXPAND))
149#endif
150
151// define constants for DCT-synthesis
152// INVCOSxx = (0.5 / cos(xx*PI/64)) << 27, <<27 to saturate to +/- 2^31
153#define INVCOS01 MPC_MAKE_INVCOS( 67189797)
154#define INVCOS02 MPC_MAKE_INVCOS( 67433575)
155#define INVCOS03 MPC_MAKE_INVCOS( 67843164)
156#define INVCOS04 MPC_MAKE_INVCOS( 68423604)
157#define INVCOS05 MPC_MAKE_INVCOS( 69182167)
158#define INVCOS06 MPC_MAKE_INVCOS( 70128577)
159#define INVCOS07 MPC_MAKE_INVCOS( 71275330)
160#define INVCOS08 MPC_MAKE_INVCOS( 72638111)
161#define INVCOS09 MPC_MAKE_INVCOS( 74236348)
162#define INVCOS10 MPC_MAKE_INVCOS( 76093940)
163#define INVCOS11 MPC_MAKE_INVCOS( 78240207)
164#define INVCOS12 MPC_MAKE_INVCOS( 80711144)
165#define INVCOS13 MPC_MAKE_INVCOS( 83551089)
166#define INVCOS14 MPC_MAKE_INVCOS( 86814950)
167#define INVCOS15 MPC_MAKE_INVCOS( 90571242)
168#define INVCOS16 MPC_MAKE_INVCOS( 94906266)
169#define INVCOS17 MPC_MAKE_INVCOS( 99929967)
170#define INVCOS18 MPC_MAKE_INVCOS( 105784321)
171#define INVCOS19 MPC_MAKE_INVCOS( 112655602)
172#define INVCOS20 MPC_MAKE_INVCOS( 120792764)
173#define INVCOS21 MPC_MAKE_INVCOS( 130535899)
174#define INVCOS22 MPC_MAKE_INVCOS( 142361749)
175#define INVCOS23 MPC_MAKE_INVCOS( 156959571)
176#define INVCOS24 MPC_MAKE_INVCOS( 175363913)
177#define INVCOS25 MPC_MAKE_INVCOS( 199201203)
178#define INVCOS26 MPC_MAKE_INVCOS( 231182936)
179#define INVCOS27 MPC_MAKE_INVCOS( 276190692)
180#define INVCOS28 MPC_MAKE_INVCOS( 343988688)
181#define INVCOS29 MPC_MAKE_INVCOS( 457361460)
182#define INVCOS30 MPC_MAKE_INVCOS( 684664578)
183#define INVCOS31 MPC_MAKE_INVCOS(1367679739)
184
185static inline void
186mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V )
92{ 187{
93 // Calculating new V-buffer values for left channel 188 // Calculating new V-buffer values for left channel
94 // calculate new V-values (ISO-11172-3, p. 39) 189 // calculate new V-values (ISO-11172-3, p. 39)
95 // based upon fast-MDCT algorithm by Byeong Gi Lee 190 // based upon fast-MDCT algorithm by Byeong Gi Lee
96 /*static*/ MPC_SAMPLE_FORMAT A00, A01, A02, A03, A04, A05, A06, A07, A08, A09, A10, A11, A12, A13, A14, A15; 191 MPC_SAMPLE_FORMAT A[16];
97 /*static*/ MPC_SAMPLE_FORMAT B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B11, B12, B13, B14, B15; 192 MPC_SAMPLE_FORMAT B[16];
98 MPC_SAMPLE_FORMAT tmp; 193 MPC_SAMPLE_FORMAT tmp;
99 194
100 A00 = Sample[ 0] + Sample[31]; 195 A[ 0] = Sample[ 0] + Sample[31];
101 A01 = Sample[ 1] + Sample[30]; 196 A[ 1] = Sample[ 1] + Sample[30];
102 A02 = Sample[ 2] + Sample[29]; 197 A[ 2] = Sample[ 2] + Sample[29];
103 A03 = Sample[ 3] + Sample[28]; 198 A[ 3] = Sample[ 3] + Sample[28];
104 A04 = Sample[ 4] + Sample[27]; 199 A[ 4] = Sample[ 4] + Sample[27];
105 A05 = Sample[ 5] + Sample[26]; 200 A[ 5] = Sample[ 5] + Sample[26];
106 A06 = Sample[ 6] + Sample[25]; 201 A[ 6] = Sample[ 6] + Sample[25];
107 A07 = Sample[ 7] + Sample[24]; 202 A[ 7] = Sample[ 7] + Sample[24];
108 A08 = Sample[ 8] + Sample[23]; 203 A[ 8] = Sample[ 8] + Sample[23];
109 A09 = Sample[ 9] + Sample[22]; 204 A[ 9] = Sample[ 9] + Sample[22];
110 A10 = Sample[10] + Sample[21]; 205 A[10] = Sample[10] + Sample[21];
111 A11 = Sample[11] + Sample[20]; 206 A[11] = Sample[11] + Sample[20];
112 A12 = Sample[12] + Sample[19]; 207 A[12] = Sample[12] + Sample[19];
113 A13 = Sample[13] + Sample[18]; 208 A[13] = Sample[13] + Sample[18];
114 A14 = Sample[14] + Sample[17]; 209 A[14] = Sample[14] + Sample[17];
115 A15 = Sample[15] + Sample[16]; 210 A[15] = Sample[15] + Sample[16];
116 211 // 16 adds
117 B00 = A00 + A15; 212
118 B01 = A01 + A14; 213 B[ 0] = A[ 0] + A[15];
119 B02 = A02 + A13; 214 B[ 1] = A[ 1] + A[14];
120 B03 = A03 + A12; 215 B[ 2] = A[ 2] + A[13];
121 B04 = A04 + A11; 216 B[ 3] = A[ 3] + A[12];
122 B05 = A05 + A10; 217 B[ 4] = A[ 4] + A[11];
123 B06 = A06 + A09; 218 B[ 5] = A[ 5] + A[10];
124 B07 = A07 + A08;; 219 B[ 6] = A[ 6] + A[ 9];
125 B08 = MPC_SCALE_CONST((A00 - A15) , 0.5024192929f , 31); 220 B[ 7] = A[ 7] + A[ 8];;
126 B09 = MPC_SCALE_CONST((A01 - A14) , 0.5224986076f , 31); 221 B[ 8] = MPC_MULTIPLY_V((A[ 0] - A[15]), INVCOS02);
127 B10 = MPC_SCALE_CONST((A02 - A13) , 0.5669440627f , 31); 222 B[ 9] = MPC_MULTIPLY_V((A[ 1] - A[14]), INVCOS06);
128 B11 = MPC_SCALE_CONST((A03 - A12) , 0.6468217969f , 31); 223 B[10] = MPC_MULTIPLY_V((A[ 2] - A[13]), INVCOS10);
129 B12 = MPC_SCALE_CONST((A04 - A11) , 0.7881546021f , 31); 224 B[11] = MPC_MULTIPLY_V((A[ 3] - A[12]), INVCOS14);
130 B13 = MPC_SCALE_CONST((A05 - A10) , 1.0606776476f , 30); 225 B[12] = MPC_MULTIPLY_V((A[ 4] - A[11]), INVCOS18);
131 B14 = MPC_SCALE_CONST((A06 - A09) , 1.7224471569f , 30); 226 B[13] = MPC_MULTIPLY_V((A[ 5] - A[10]), INVCOS22);
132 B15 = MPC_SCALE_CONST((A07 - A08) , 5.1011486053f , 28); 227 B[14] = MPC_MULTIPLY_V((A[ 6] - A[ 9]), INVCOS26);
133 228 B[15] = MPC_MULTIPLY_V((A[ 7] - A[ 8]), INVCOS30);
134 A00 = B00 + B07; 229 // 8 adds, 8 subs, 8 muls, 8 shifts
135 A01 = B01 + B06; 230
136 A02 = B02 + B05; 231 A[ 0] = B[ 0] + B[ 7];
137 A03 = B03 + B04; 232 A[ 1] = B[ 1] + B[ 6];
138 A04 = MPC_SCALE_CONST((B00 - B07) , 0.5097956061f , 31); 233 A[ 2] = B[ 2] + B[ 5];
139 A05 = MPC_SCALE_CONST((B01 - B06) , 0.6013448834f , 31); 234 A[ 3] = B[ 3] + B[ 4];
140 A06 = MPC_SCALE_CONST((B02 - B05) , 0.8999761939f , 31); 235 A[ 4] = MPC_MULTIPLY_V((B[ 0] - B[ 7]), INVCOS04);
141 A07 = MPC_SCALE_CONST((B03 - B04) , 2.5629155636f , 29); 236 A[ 5] = MPC_MULTIPLY_V((B[ 1] - B[ 6]), INVCOS12);
142 A08 = B08 + B15; 237 A[ 6] = MPC_MULTIPLY_V((B[ 2] - B[ 5]), INVCOS20);
143 A09 = B09 + B14; 238 A[ 7] = MPC_MULTIPLY_V((B[ 3] - B[ 4]), INVCOS28);
144 A10 = B10 + B13; 239 A[ 8] = B[ 8] + B[15];
145 A11 = B11 + B12; 240 A[ 9] = B[ 9] + B[14];
146 A12 = MPC_SCALE_CONST((B08 - B15) , 0.5097956061f , 31); 241 A[10] = B[10] + B[13];
147 A13 = MPC_SCALE_CONST((B09 - B14) , 0.6013448834f , 31); 242 A[11] = B[11] + B[12];
148 A14 = MPC_SCALE_CONST((B10 - B13) , 0.8999761939f , 31); 243 A[12] = MPC_MULTIPLY_V((B[ 8] - B[15]), INVCOS04);
149 A15 = MPC_SCALE_CONST((B11 - B12) , 2.5629155636f , 29); 244 A[13] = MPC_MULTIPLY_V((B[ 9] - B[14]), INVCOS12);
150 245 A[14] = MPC_MULTIPLY_V((B[10] - B[13]), INVCOS20);
151 B00 = A00 + A03; 246 A[15] = MPC_MULTIPLY_V((B[11] - B[12]), INVCOS28);
152 B01 = A01 + A02; 247 // 8 adds, 8 subs, 8 muls, 8 shifts
153 B02 = MPC_MULTIPLY_FRACT_CONST_FIX((A00 - A03) , 0.5411961079f , 1); 248
154 B03 = MPC_MULTIPLY_FRACT_CONST_FIX((A01 - A02) , 1.3065630198f , 2); 249 B[ 0] = A[ 0] + A[ 3];
155 B04 = A04 + A07; 250 B[ 1] = A[ 1] + A[ 2];
156 B05 = A05 + A06; 251 B[ 2] = MPC_MULTIPLY_V((A[ 0] - A[ 3]), INVCOS08);
157 B06 = MPC_MULTIPLY_FRACT_CONST_FIX((A04 - A07) , 0.5411961079f , 1); 252 B[ 3] = MPC_MULTIPLY_V((A[ 1] - A[ 2]), INVCOS24);
158 B07 = MPC_MULTIPLY_FRACT_CONST_FIX((A05 - A06) , 1.3065630198f , 2); 253 B[ 4] = A[ 4] + A[ 7];
159 B08 = A08 + A11; 254 B[ 5] = A[ 5] + A[ 6];
160 B09 = A09 + A10; 255 B[ 6] = MPC_MULTIPLY_V((A[ 4] - A[ 7]), INVCOS08);
161 B10 = MPC_MULTIPLY_FRACT_CONST_FIX((A08 - A11) , 0.5411961079f , 1); 256 B[ 7] = MPC_MULTIPLY_V((A[ 5] - A[ 6]), INVCOS24);
162 B11 = MPC_MULTIPLY_FRACT_CONST_FIX((A09 - A10) , 1.3065630198f , 2); 257 B[ 8] = A[ 8] + A[11];
163 B12 = A12 + A15; 258 B[ 9] = A[ 9] + A[10];
164 B13 = A13 + A14; 259 B[10] = MPC_MULTIPLY_V((A[ 8] - A[11]), INVCOS08);
165 B14 = MPC_MULTIPLY_FRACT_CONST_FIX((A12 - A15) , 0.5411961079f , 1); 260 B[11] = MPC_MULTIPLY_V((A[ 9] - A[10]), INVCOS24);
166 B15 = MPC_MULTIPLY_FRACT_CONST_FIX((A13 - A14) , 1.3065630198f , 2); 261 B[12] = A[12] + A[15];
167 262 B[13] = A[13] + A[14];
168 A00 = B00 + B01; 263 B[14] = MPC_MULTIPLY_V((A[12] - A[15]), INVCOS08);
169 A01 = MPC_MULTIPLY_FRACT_CONST_FIX((B00 - B01) , 0.7071067691f , 1); 264 B[15] = MPC_MULTIPLY_V((A[13] - A[14]), INVCOS24);
170 A02 = B02 + B03; 265 // 8 adds, 8 subs, 8 muls, 8 shifts
171 A03 = MPC_MULTIPLY_FRACT_CONST_FIX((B02 - B03) , 0.7071067691f , 1); 266
172 A04 = B04 + B05; 267 A[ 0] = B[ 0] + B[ 1];
173 A05 = MPC_MULTIPLY_FRACT_CONST_FIX((B04 - B05) , 0.7071067691f , 1); 268 A[ 1] = MPC_MULTIPLY_V((B[ 0] - B[ 1]), INVCOS16);
174 A06 = B06 + B07; 269 A[ 2] = B[ 2] + B[ 3];
175 A07 = MPC_MULTIPLY_FRACT_CONST_FIX((B06 - B07) , 0.7071067691f , 1); 270 A[ 3] = MPC_MULTIPLY_V((B[ 2] - B[ 3]), INVCOS16);
176 A08 = B08 + B09; 271 A[ 4] = B[ 4] + B[ 5];
177 A09 = MPC_MULTIPLY_FRACT_CONST_FIX((B08 - B09) , 0.7071067691f , 1); 272 A[ 5] = MPC_MULTIPLY_V((B[ 4] - B[ 5]), INVCOS16);
178 A10 = B10 + B11; 273 A[ 6] = B[ 6] + B[ 7];
179 A11 = MPC_MULTIPLY_FRACT_CONST_FIX((B10 - B11) , 0.7071067691f , 1); 274 A[ 7] = MPC_MULTIPLY_V((B[ 6] - B[ 7]), INVCOS16);
180 A12 = B12 + B13; 275 A[ 8] = B[ 8] + B[ 9];
181 A13 = MPC_MULTIPLY_FRACT_CONST_FIX((B12 - B13) , 0.7071067691f , 1); 276 A[ 9] = MPC_MULTIPLY_V((B[ 8] - B[ 9]), INVCOS16);
182 A14 = B14 + B15; 277 A[10] = B[10] + B[11];
183 A15 = MPC_MULTIPLY_FRACT_CONST_FIX((B14 - B15) , 0.7071067691f , 1); 278 A[11] = MPC_MULTIPLY_V((B[10] - B[11]), INVCOS16);
184 279 A[12] = B[12] + B[13];
185 V[48] = -A00; 280 A[13] = MPC_MULTIPLY_V((B[12] - B[13]), INVCOS16);
186 V[ 0] = A01; 281 A[14] = B[14] + B[15];
187 V[40] = -A02 - (V[ 8] = A03); 282 A[15] = MPC_MULTIPLY_V((B[14] - B[15]), INVCOS16);
188 V[36] = -((V[ 4] = A05 + (V[12] = A07)) + A06); 283 // 8 adds, 8 subs, 8 muls, 8 shifts
189 V[44] = - A04 - A06 - A07; 284
190 V[ 6] = (V[10] = A11 + (V[14] = A15)) + A13; 285 // multiple used expressions: -(A[12] + A[14] + A[15])
191 V[38] = (V[34] = -(V[ 2] = A09 + A13 + A15) - A14) + A09 - A10 - A11; 286 V[48] = -A[ 0];
192 V[46] = (tmp = -(A12 + A14 + A15)) - A08; 287 V[ 0] = A[ 1];
193 V[42] = tmp - A10 - A11; 288 V[40] = -A[ 2] - (V[ 8] = A[ 3]);
194 289 V[36] = -((V[ 4] = A[ 5] + (V[12] = A[ 7])) + A[ 6]);
195 A00 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 0] - Sample[31]) , 0.5006030202f , MPC_FIXED_POINT_SYNTH_FIX); 290 V[44] = - A[ 4] - A[ 6] - A[ 7];
196 A01 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 1] - Sample[30]) , 0.5054709315f , MPC_FIXED_POINT_SYNTH_FIX); 291 V[ 6] = (V[10] = A[11] + (V[14] = A[15])) + A[13];
197 A02 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 2] - Sample[29]) , 0.5154473186f , MPC_FIXED_POINT_SYNTH_FIX); 292 V[38] = (V[34] = -(V[ 2] = A[ 9] + A[13] + A[15]) - A[14]) + A[ 9] - A[10] - A[11];
198 A03 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 3] - Sample[28]) , 0.5310425758f , MPC_FIXED_POINT_SYNTH_FIX); 293 V[46] = (tmp = -(A[12] + A[14] + A[15])) - A[ 8];
199 A04 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 4] - Sample[27]) , 0.5531039238f , MPC_FIXED_POINT_SYNTH_FIX); 294 V[42] = tmp - A[10] - A[11];
200 A05 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 5] - Sample[26]) , 0.5829349756f , MPC_FIXED_POINT_SYNTH_FIX); 295 // 9 adds, 9 subs
201 A06 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 6] - Sample[25]) , 0.6225041151f , MPC_FIXED_POINT_SYNTH_FIX); 296
202 A07 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 7] - Sample[24]) , 0.6748083234f , MPC_FIXED_POINT_SYNTH_FIX); 297 A[ 0] = MPC_MULTIPLY_V((Sample[ 0] - Sample[31]), INVCOS01);
203 A08 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 8] - Sample[23]) , 0.7445362806f , MPC_FIXED_POINT_SYNTH_FIX); 298 A[ 1] = MPC_MULTIPLY_V((Sample[ 1] - Sample[30]), INVCOS03);
204 A09 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 9] - Sample[22]) , 0.8393496275f , MPC_FIXED_POINT_SYNTH_FIX); 299 A[ 2] = MPC_MULTIPLY_V((Sample[ 2] - Sample[29]), INVCOS05);
205 A10 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[10] - Sample[21]) , 0.9725682139f , MPC_FIXED_POINT_SYNTH_FIX); 300 A[ 3] = MPC_MULTIPLY_V((Sample[ 3] - Sample[28]), INVCOS07);
206#if MPC_FIXED_POINT_SYNTH_FIX>=2 301 A[ 4] = MPC_MULTIPLY_V((Sample[ 4] - Sample[27]), INVCOS09);
207 A11 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[11] - Sample[20]) , 1.1694399118f , MPC_FIXED_POINT_SYNTH_FIX); 302 A[ 5] = MPC_MULTIPLY_V((Sample[ 5] - Sample[26]), INVCOS11);
208 A12 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[12] - Sample[19]) , 1.4841645956f , MPC_FIXED_POINT_SYNTH_FIX); 303 A[ 6] = MPC_MULTIPLY_V((Sample[ 6] - Sample[25]), INVCOS13);
209#else 304 A[ 7] = MPC_MULTIPLY_V((Sample[ 7] - Sample[24]), INVCOS15);
210 A11 = MPC_SCALE_CONST_SHR ((Sample[11] - Sample[20]) , 1.1694399118f , 30, MPC_FIXED_POINT_SYNTH_FIX); 305 A[ 8] = MPC_MULTIPLY_V((Sample[ 8] - Sample[23]), INVCOS17);
211 A12 = MPC_SCALE_CONST_SHR ((Sample[12] - Sample[19]) , 1.4841645956f , 30, MPC_FIXED_POINT_SYNTH_FIX); 306 A[ 9] = MPC_MULTIPLY_V((Sample[ 9] - Sample[22]), INVCOS19);
212#endif 307 A[10] = MPC_MULTIPLY_V((Sample[10] - Sample[21]), INVCOS21);
213 A13 = MPC_SCALE_CONST_SHR ((Sample[13] - Sample[18]) , 2.0577809811f , 29, MPC_FIXED_POINT_SYNTH_FIX); 308 A[11] = MPC_MULTIPLY_V((Sample[11] - Sample[20]), INVCOS23);
214 A14 = MPC_SCALE_CONST_SHR ((Sample[14] - Sample[17]) , 3.4076085091f , 29, MPC_FIXED_POINT_SYNTH_FIX); 309 A[12] = MPC_MULTIPLY_V((Sample[12] - Sample[19]), INVCOS25);
215 A15 = MPC_SCALE_CONST_SHR ((Sample[15] - Sample[16]) , 10.1900081635f, 27 ,MPC_FIXED_POINT_SYNTH_FIX); 310 A[13] = MPC_MULTIPLY_V((Sample[13] - Sample[18]), INVCOS27);
216 311 A[14] = MPC_MULTIPLY_V((Sample[14] - Sample[17]), INVCOS29);
217 B00 = A00 + A15; 312 A[15] = MPC_MULTIPLY_V((Sample[15] - Sample[16]), INVCOS31);
218 B01 = A01 + A14; 313 // 16 subs, 16 muls, 16 shifts
219 B02 = A02 + A13; 314
220 B03 = A03 + A12; 315 B[ 0] = A[ 0] + A[15];
221 B04 = A04 + A11; 316 B[ 1] = A[ 1] + A[14];
222 B05 = A05 + A10; 317 B[ 2] = A[ 2] + A[13];
223 B06 = A06 + A09; 318 B[ 3] = A[ 3] + A[12];
224 B07 = A07 + A08; 319 B[ 4] = A[ 4] + A[11];
225 B08 = MPC_SCALE_CONST((A00 - A15) , 0.5024192929f , 31); 320 B[ 5] = A[ 5] + A[10];
226 B09 = MPC_SCALE_CONST((A01 - A14) , 0.5224986076f , 31); 321 B[ 6] = A[ 6] + A[ 9];
227 B10 = MPC_SCALE_CONST((A02 - A13) , 0.5669440627f , 31); 322 B[ 7] = A[ 7] + A[ 8];
228 B11 = MPC_SCALE_CONST((A03 - A12) , 0.6468217969f , 31); 323 B[ 8] = MPC_MULTIPLY_V((A[ 0] - A[15]), INVCOS02);
229 B12 = MPC_SCALE_CONST((A04 - A11) , 0.7881546021f , 31); 324 B[ 9] = MPC_MULTIPLY_V((A[ 1] - A[14]), INVCOS06);
230 B13 = MPC_SCALE_CONST((A05 - A10) , 1.0606776476f , 30); 325 B[10] = MPC_MULTIPLY_V((A[ 2] - A[13]), INVCOS10);
231 B14 = MPC_SCALE_CONST((A06 - A09) , 1.7224471569f , 30); 326 B[11] = MPC_MULTIPLY_V((A[ 3] - A[12]), INVCOS14);
232 B15 = MPC_SCALE_CONST((A07 - A08) , 5.1011486053f , 28); 327 B[12] = MPC_MULTIPLY_V((A[ 4] - A[11]), INVCOS18);
233 328 B[13] = MPC_MULTIPLY_V((A[ 5] - A[10]), INVCOS22);
234 A00 = B00 + B07; 329 B[14] = MPC_MULTIPLY_V((A[ 6] - A[ 9]), INVCOS26);
235 A01 = B01 + B06; 330 B[15] = MPC_MULTIPLY_V((A[ 7] - A[ 8]), INVCOS30);
236 A02 = B02 + B05; 331 // 8 adds, 8 subs, 8 muls, 8 shift
237 A03 = B03 + B04; 332
238 A04 = MPC_SCALE_CONST((B00 - B07) , 0.5097956061f , 31); 333 A[ 0] = B[ 0] + B[ 7];
239 A05 = MPC_SCALE_CONST((B01 - B06) , 0.6013448834f , 31); 334 A[ 1] = B[ 1] + B[ 6];
240 A06 = MPC_SCALE_CONST((B02 - B05) , 0.8999761939f , 31); 335 A[ 2] = B[ 2] + B[ 5];
241 A07 = MPC_SCALE_CONST((B03 - B04) , 2.5629155636f , 29); 336 A[ 3] = B[ 3] + B[ 4];
242 A08 = B08 + B15; 337 A[ 4] = MPC_MULTIPLY_V((B[ 0] - B[ 7]), INVCOS04);
243 A09 = B09 + B14; 338 A[ 5] = MPC_MULTIPLY_V((B[ 1] - B[ 6]), INVCOS12);
244 A10 = B10 + B13; 339 A[ 6] = MPC_MULTIPLY_V((B[ 2] - B[ 5]), INVCOS20);
245 A11 = B11 + B12; 340 A[ 7] = MPC_MULTIPLY_V((B[ 3] - B[ 4]), INVCOS28);
246 A12 = MPC_SCALE_CONST((B08 - B15) , 0.5097956061f , 31); 341 A[ 8] = B[ 8] + B[15];
247 A13 = MPC_SCALE_CONST((B09 - B14) , 0.6013448834f , 31); 342 A[ 9] = B[ 9] + B[14];
248 A14 = MPC_SCALE_CONST((B10 - B13) , 0.8999761939f , 31); 343 A[10] = B[10] + B[13];
249 A15 = MPC_SCALE_CONST((B11 - B12) , 2.5629155636f , 29); 344 A[11] = B[11] + B[12];
250 345 A[12] = MPC_MULTIPLY_V((B[ 8] - B[15]), INVCOS04);
251 B00 = A00 + A03; 346 A[13] = MPC_MULTIPLY_V((B[ 9] - B[14]), INVCOS12);
252 B01 = A01 + A02; 347 A[14] = MPC_MULTIPLY_V((B[10] - B[13]), INVCOS20);
253 B02 = MPC_SCALE_CONST((A00 - A03) , 0.5411961079f , 31); 348 A[15] = MPC_MULTIPLY_V((B[11] - B[12]), INVCOS28);
254 B03 = MPC_SCALE_CONST((A01 - A02) , 1.3065630198f , 30); 349 // 8 adds, 8 subs, 8 muls, 8 shift
255 B04 = A04 + A07; 350
256 B05 = A05 + A06; 351 B[ 0] = A[ 0] + A[ 3];
257 B06 = MPC_SCALE_CONST((A04 - A07) , 0.5411961079f , 31); 352 B[ 1] = A[ 1] + A[ 2];
258 B07 = MPC_SCALE_CONST((A05 - A06) , 1.3065630198f , 30); 353 B[ 2] = MPC_MULTIPLY_V((A[ 0] - A[ 3]), INVCOS08);
259 B08 = A08 + A11; 354 B[ 3] = MPC_MULTIPLY_V((A[ 1] - A[ 2]), INVCOS24);
260 B09 = A09 + A10; 355 B[ 4] = A[ 4] + A[ 7];
261 B10 = MPC_SCALE_CONST((A08 - A11) , 0.5411961079f , 31); 356 B[ 5] = A[ 5] + A[ 6];
262 B11 = MPC_SCALE_CONST((A09 - A10) , 1.3065630198f , 30); 357 B[ 6] = MPC_MULTIPLY_V((A[ 4] - A[ 7]), INVCOS08);
263 B12 = A12 + A15; 358 B[ 7] = MPC_MULTIPLY_V((A[ 5] - A[ 6]), INVCOS24);
264 B13 = A13 + A14; 359 B[ 8] = A[ 8] + A[11];
265 B14 = MPC_SCALE_CONST((A12 - A15) , 0.5411961079f , 31); 360 B[ 9] = A[ 9] + A[10];
266 B15 = MPC_SCALE_CONST((A13 - A14) , 1.3065630198f , 30); 361 B[10] = MPC_MULTIPLY_V((A[ 8] - A[11]), INVCOS08);
267 362 B[11] = MPC_MULTIPLY_V((A[ 9] - A[10]), INVCOS24);
268 A00 = MPC_SHL(B00 + B01, MPC_FIXED_POINT_SYNTH_FIX); 363 B[12] = A[12] + A[15];
269 A01 = MPC_SCALE_CONST_SHL((B00 - B01) , 0.7071067691f , 31, MPC_FIXED_POINT_SYNTH_FIX); 364 B[13] = A[13] + A[14];
270 A02 = MPC_SHL(B02 + B03, MPC_FIXED_POINT_SYNTH_FIX); 365 B[14] = MPC_MULTIPLY_V((A[12] - A[15]), INVCOS08);
271 A03 = MPC_SCALE_CONST_SHL((B02 - B03) , 0.7071067691f , 31, MPC_FIXED_POINT_SYNTH_FIX); 366 B[15] = MPC_MULTIPLY_V((A[13] - A[14]), INVCOS24);
272 A04 = MPC_SHL(B04 + B05, MPC_FIXED_POINT_SYNTH_FIX); 367 // 8 adds, 8 subs, 8 muls, 8 shift
273 A05 = MPC_SCALE_CONST_SHL((B04 - B05) , 0.7071067691f , 31, MPC_FIXED_POINT_SYNTH_FIX); 368
274 A06 = MPC_SHL(B06 + B07, MPC_FIXED_POINT_SYNTH_FIX); 369 A[ 0] = B[ 0] + B[ 1];
275 A07 = MPC_SCALE_CONST_SHL((B06 - B07) , 0.7071067691f , 31, MPC_FIXED_POINT_SYNTH_FIX); 370 A[ 1] = MPC_MULTIPLY_V((B[ 0] - B[ 1]), INVCOS16);
276 A08 = MPC_SHL(B08 + B09, MPC_FIXED_POINT_SYNTH_FIX); 371 A[ 2] = B[ 2] + B[ 3];
277 A09 = MPC_SCALE_CONST_SHL((B08 - B09) , 0.7071067691f , 31, MPC_FIXED_POINT_SYNTH_FIX); 372 A[ 3] = MPC_MULTIPLY_V((B[ 2] - B[ 3]), INVCOS16);
278 A10 = MPC_SHL(B10 + B11, MPC_FIXED_POINT_SYNTH_FIX); 373 A[ 4] = B[ 4] + B[ 5];
279 A11 = MPC_SCALE_CONST_SHL((B10 - B11) , 0.7071067691f , 31, MPC_FIXED_POINT_SYNTH_FIX); 374 A[ 5] = MPC_MULTIPLY_V((B[ 4] - B[ 5]), INVCOS16);
280 A12 = MPC_SHL(B12 + B13, MPC_FIXED_POINT_SYNTH_FIX); 375 A[ 6] = B[ 6] + B[ 7];
281 A13 = MPC_SCALE_CONST_SHL((B12 - B13) , 0.7071067691f , 31, MPC_FIXED_POINT_SYNTH_FIX); 376 A[ 7] = MPC_MULTIPLY_V((B[ 6] - B[ 7]), INVCOS16);
282 A14 = MPC_SHL(B14 + B15, MPC_FIXED_POINT_SYNTH_FIX); 377 A[ 8] = B[ 8] + B[ 9];
283 A15 = MPC_SCALE_CONST_SHL((B14 - B15) , 0.7071067691f , 31, MPC_FIXED_POINT_SYNTH_FIX); 378 A[ 9] = MPC_MULTIPLY_V((B[ 8] - B[ 9]), INVCOS16);
284 379 A[10] = B[10] + B[11];
285 // mehrfach verwendete Ausdrücke: A04+A06+A07, A09+A13+A15 380 A[11] = MPC_MULTIPLY_V((B[10] - B[11]), INVCOS16);
286 V[ 5] = (V[11] = (V[13] = A07 + (V[15] = A15)) + A11) + A05 + A13; 381 A[12] = B[12] + B[13];
287 V[ 7] = (V[ 9] = A03 + A11 + A15) + A13; 382 A[13] = MPC_MULTIPLY_V((B[12] - B[13]), INVCOS16);
288 V[33] = -(V[ 1] = A01 + A09 + A13 + A15) - A14; 383 A[14] = B[14] + B[15];
289 V[35] = -(V[ 3] = A05 + A07 + A09 + A13 + A15) - A06 - A14; 384 A[15] = MPC_MULTIPLY_V((B[14] - B[15]), INVCOS16);
290 V[37] = (tmp = -(A10 + A11 + A13 + A14 + A15)) - A05 - A06 - A07; 385 // 8 adds, 8 subs, 8 muls, 8 shift
291 V[39] = tmp - A02 - A03; // abhängig vom Befehl drüber 386
292 V[41] = (tmp += A13 - A12) - A02 - A03; // abhängig vom Befehl 2 drüber 387 // multiple used expressions: A[ 4]+A[ 6]+A[ 7], A[ 9]+A[13]+A[15]
293 V[43] = tmp - A04 - A06 - A07; // abhängig von Befehlen 1 und 3 drüber 388 V[ 5] = (V[11] = (V[13] = A[ 7] + (V[15] = A[15])) + A[11]) + A[ 5] + A[13];
294 V[47] = (tmp = -(A08 + A12 + A14 + A15)) - A00; 389 V[ 7] = (V[ 9] = A[ 3] + A[11] + A[15]) + A[13];
295 V[45] = tmp - A04 - A06 - A07; // abhängig vom Befehl drüber 390 V[33] = -(V[ 1] = A[ 1] + A[ 9] + A[13] + A[15]) - A[14];
296 391 V[35] = -(V[ 3] = A[ 5] + A[ 7] + A[ 9] + A[13] + A[15]) - A[ 6] - A[14];
297 V[32] = -V[ 0]; 392 V[37] = (tmp = -(A[10] + A[11] + A[13] + A[14] + A[15])) - A[ 5] - A[ 6] - A[ 7];
298 V[31] = -V[ 1]; 393 V[39] = tmp - A[ 2] - A[ 3];
299 V[30] = -V[ 2]; 394 V[41] = (tmp += A[13] - A[12]) - A[ 2] - A[ 3];
300 V[29] = -V[ 3]; 395 V[43] = tmp - A[ 4] - A[ 6] - A[ 7];
301 V[28] = -V[ 4]; 396 V[47] = (tmp = -(A[ 8] + A[12] + A[14] + A[15])) - A[ 0];
302 V[27] = -V[ 5]; 397 V[45] = tmp - A[ 4] - A[ 6] - A[ 7];
303 V[26] = -V[ 6]; 398 // 22 adds, 18 subs
304 V[25] = -V[ 7]; 399
305 V[24] = -V[ 8]; 400 V[32] = -(V[ 0] = MPC_V_PRESHIFT(V[ 0]));
306 V[23] = -V[ 9]; 401 V[31] = -(V[ 1] = MPC_V_PRESHIFT(V[ 1]));
307 V[22] = -V[10]; 402 V[30] = -(V[ 2] = MPC_V_PRESHIFT(V[ 2]));
308 V[21] = -V[11]; 403 V[29] = -(V[ 3] = MPC_V_PRESHIFT(V[ 3]));
309 V[20] = -V[12]; 404 V[28] = -(V[ 4] = MPC_V_PRESHIFT(V[ 4]));
310 V[19] = -V[13]; 405 V[27] = -(V[ 5] = MPC_V_PRESHIFT(V[ 5]));
311 V[18] = -V[14]; 406 V[26] = -(V[ 6] = MPC_V_PRESHIFT(V[ 6]));
312 V[17] = -V[15]; 407 V[25] = -(V[ 7] = MPC_V_PRESHIFT(V[ 7]));
313 408 V[24] = -(V[ 8] = MPC_V_PRESHIFT(V[ 8]));
314 V[63] = V[33]; 409 V[23] = -(V[ 9] = MPC_V_PRESHIFT(V[ 9]));
315 V[62] = V[34]; 410 V[22] = -(V[10] = MPC_V_PRESHIFT(V[10]));
316 V[61] = V[35]; 411 V[21] = -(V[11] = MPC_V_PRESHIFT(V[11]));
317 V[60] = V[36]; 412 V[20] = -(V[12] = MPC_V_PRESHIFT(V[12]));
318 V[59] = V[37]; 413 V[19] = -(V[13] = MPC_V_PRESHIFT(V[13]));
319 V[58] = V[38]; 414 V[18] = -(V[14] = MPC_V_PRESHIFT(V[14]));
320 V[57] = V[39]; 415 V[17] = -(V[15] = MPC_V_PRESHIFT(V[15]));
321 V[56] = V[40]; 416 // 16 adds, 16 shifts (OPTIMIZE_FOR_SPEED only)
322 V[55] = V[41]; 417
323 V[54] = V[42]; 418 V[63] = (V[33] = MPC_V_PRESHIFT(V[33]));
324 V[53] = V[43]; 419 V[62] = (V[34] = MPC_V_PRESHIFT(V[34]));
325 V[52] = V[44]; 420 V[61] = (V[35] = MPC_V_PRESHIFT(V[35]));
326 V[51] = V[45]; 421 V[60] = (V[36] = MPC_V_PRESHIFT(V[36]));
327 V[50] = V[46]; 422 V[59] = (V[37] = MPC_V_PRESHIFT(V[37]));
328 V[49] = V[47]; 423 V[58] = (V[38] = MPC_V_PRESHIFT(V[38]));
424 V[57] = (V[39] = MPC_V_PRESHIFT(V[39]));
425 V[56] = (V[40] = MPC_V_PRESHIFT(V[40]));
426 V[55] = (V[41] = MPC_V_PRESHIFT(V[41]));
427 V[54] = (V[42] = MPC_V_PRESHIFT(V[42]));
428 V[53] = (V[43] = MPC_V_PRESHIFT(V[43]));
429 V[52] = (V[44] = MPC_V_PRESHIFT(V[44]));
430 V[51] = (V[45] = MPC_V_PRESHIFT(V[45]));
431 V[50] = (V[46] = MPC_V_PRESHIFT(V[46]));
432 V[49] = (V[47] = MPC_V_PRESHIFT(V[47]));
433 V[48] = (V[48] = MPC_V_PRESHIFT(V[48]));
434 // 16 adds, 16 shifts (OPTIMIZE_FOR_SPEED only)
435
436 // OPTIMIZE_FOR_SPEED total: 143 adds, 107 subs, 80 muls, 112 shifts
437 // total: 111 adds, 107 subs, 80 muls, 80 shifts
438}
439
440static inline void
441mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V)
442{
443 const MPC_SAMPLE_FORMAT *D = (const MPC_SAMPLE_FORMAT *) &Di_opt;
444 mpc_int32_t k;
445
446 #if defined(OPTIMIZE_FOR_SPEED)
447 #if defined(CPU_ARM)
448 // 32=32x32-multiply assembler for ARM
449 for ( k = 0; k < 32; k++, V++ )
450 {
451 asm volatile (
452 "ldmia %[D]!, { r0-r3 } \n\t"
453 "ldr r4, [%[V]] \n\t"
454 "mul r5, r0, r4 \n\t"
455 "ldr r4, [%[V], #96*4] \n\t"
456 "mla r5, r1, r4, r5 \n\t"
457 "ldr r4, [%[V], #128*4] \n\t"
458 "mla r5, r2, r4, r5 \n\t"
459 "ldr r4, [%[V], #224*4] \n\t"
460 "mla r5, r3, r4, r5 \n\t"
461
462 "ldmia %[D]!, { r0-r3 } \n\t"
463 "ldr r4, [%[V], #256*4] \n\t"
464 "mla r5, r0, r4, r5 \n\t"
465 "ldr r4, [%[V], #352*4] \n\t"
466 "mla r5, r1, r4, r5 \n\t"
467 "ldr r4, [%[V], #384*4] \n\t"
468 "mla r5, r2, r4, r5 \n\t"
469 "ldr r4, [%[V], #480*4] \n\t"
470 "mla r5, r3, r4, r5 \n\t"
471
472 "ldmia %[D]!, { r0-r3 } \n\t"
473 "ldr r4, [%[V], #512*4] \n\t"
474 "mla r5, r0, r4, r5 \n\t"
475 "ldr r4, [%[V], #608*4] \n\t"
476 "mla r5, r1, r4, r5 \n\t"
477 "ldr r4, [%[V], #640*4] \n\t"
478 "mla r5, r2, r4, r5 \n\t"
479 "ldr r4, [%[V], #736*4] \n\t"
480 "mla r5, r3, r4, r5 \n\t"
481
482 "ldmia %[D]!, { r0-r3 } \n\t"
483 "ldr r4, [%[V], #768*4] \n\t"
484 "mla r5, r0, r4, r5 \n\t"
485 "ldr r4, [%[V], #864*4] \n\t"
486 "mla r5, r1, r4, r5 \n\t"
487 "ldr r4, [%[V], #896*4] \n\t"
488 "mla r5, r2, r4, r5 \n\t"
489 "ldr r4, [%[V], #992*4] \n\t"
490 "mla r5, r3, r4, r5 \n\t"
491 "str r5, [%[Data]], #4 \n"
492 : [Data] "+r" (Data), [D] "+r" (D)
493 : [V] "r" (V)
494 : "r0", "r1", "r2", "r3", "r4", "r5");
495 }
496 #else
497 // 32=32x32-multiply (FIXED_POINT)
498 for ( k = 0; k < 32; k++, D += 16, V++ )
499 {
500 *Data = V[ 0]*D[ 0] + V[ 96]*D[ 1] + V[128]*D[ 2] + V[224]*D[ 3]
501 + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7]
502 + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11]
503 + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15];
504 Data += 1;
505 // total: 16 muls, 15 adds
506 }
507 #endif
508 #else
509 #if defined(CPU_COLDFIRE)
510 // 64=32x32-multiply assembler for Coldfire
511 for ( k = 0; k < 32; k++, D += 16, V++ )
512 {
513 asm volatile (
514 "movem.l (%[D]), %%d0-%%d3 \n\t"
515 "move.l (%[V]), %%a5 \n\t"
516 "mac.l %%d0, %%a5, (96*4, %[V]), %%a5, %%acc0 \n\t"
517 "mac.l %%d1, %%a5, (128*4, %[V]), %%a5, %%acc0\n\t"
518 "mac.l %%d2, %%a5, (224*4, %[V]), %%a5, %%acc0\n\t"
519 "mac.l %%d3, %%a5, (256*4, %[V]), %%a5, %%acc0\n\t"
520 "movem.l (4*4, %[D]), %%d0-%%d3 \n\t"
521 "mac.l %%d0, %%a5, (352*4, %[V]), %%a5, %%acc0\n\t"
522 "mac.l %%d1, %%a5, (384*4, %[V]), %%a5, %%acc0\n\t"
523 "mac.l %%d2, %%a5, (480*4, %[V]), %%a5, %%acc0\n\t"
524 "mac.l %%d3, %%a5, (512*4, %[V]), %%a5, %%acc0\n\t"
525 "movem.l (8*4, %[D]), %%d0-%%d3 \n\t"
526 "mac.l %%d0, %%a5, (608*4, %[V]), %%a5, %%acc0\n\t"
527 "mac.l %%d1, %%a5, (640*4, %[V]), %%a5, %%acc0\n\t"
528 "mac.l %%d2, %%a5, (736*4, %[V]), %%a5, %%acc0\n\t"
529 "mac.l %%d3, %%a5, (768*4, %[V]), %%a5, %%acc0\n\t"
530 "movem.l (12*4, %[D]), %%d0-%%d3 \n\t"
531 "mac.l %%d0, %%a5, (864*4, %[V]), %%a5, %%acc0\n\t"
532 "mac.l %%d1, %%a5, (896*4, %[V]), %%a5, %%acc0\n\t"
533 "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t"
534 "mac.l %%d3, %%a5, %%acc0 \n\t"
535 "movclr.l %%acc0, %%d0 \n\t"
536 "move.l %%d0, (%[Data])+ \n"
537 : [Data] "+a" (Data)
538 : [V] "a" (V), [D] "a" (D)
539 : "d0", "d1", "d2", "d3", "a5");
540 }
541 #elif defined(CPU_ARM)
542 // 64=32x32-multiply assembler for ARM
543 for ( k = 0; k < 32; k++, V++ )
544 {
545 asm volatile (
546 "ldmia %[D]!, { r0-r3 } \n\t"
547 "ldr r4, [%[V]] \n\t"
548 "smull r5, r6, r0, r4 \n\t"
549 "ldr r4, [%[V], #96*4] \n\t"
550 "smlal r5, r6, r1, r4 \n\t"
551 "ldr r4, [%[V], #128*4] \n\t"
552 "smlal r5, r6, r2, r4 \n\t"
553 "ldr r4, [%[V], #224*4] \n\t"
554 "smlal r5, r6, r3, r4 \n\t"
555
556 "ldmia %[D]!, { r0-r3 } \n\t"
557 "ldr r4, [%[V], #256*4] \n\t"
558 "smlal r5, r6, r0, r4 \n\t"
559 "ldr r4, [%[V], #352*4] \n\t"
560 "smlal r5, r6, r1, r4 \n\t"
561 "ldr r4, [%[V], #384*4] \n\t"
562 "smlal r5, r6, r2, r4 \n\t"
563 "ldr r4, [%[V], #480*4] \n\t"
564 "smlal r5, r6, r3, r4 \n\t"
565
566 "ldmia %[D]!, { r0-r3 } \n\t"
567 "ldr r4, [%[V], #512*4] \n\t"
568 "smlal r5, r6, r0, r4 \n\t"
569 "ldr r4, [%[V], #608*4] \n\t"
570 "smlal r5, r6, r1, r4 \n\t"
571 "ldr r4, [%[V], #640*4] \n\t"
572 "smlal r5, r6, r2, r4 \n\t"
573 "ldr r4, [%[V], #736*4] \n\t"
574 "smlal r5, r6, r3, r4 \n\t"
575
576 "ldmia %[D]!, { r0-r3 } \n\t"
577 "ldr r4, [%[V], #768*4] \n\t"
578 "smlal r5, r6, r0, r4 \n\t"
579 "ldr r4, [%[V], #864*4] \n\t"
580 "smlal r5, r6, r1, r4 \n\t"
581 "ldr r4, [%[V], #896*4] \n\t"
582 "smlal r5, r6, r2, r4 \n\t"
583 "ldr r4, [%[V], #992*4] \n\t"
584 "smlal r5, r6, r3, r4 \n\t"
585 "mov r4, r6, lsl #1 \n\t"
586 "orr r4, r4, r5, lsr #31\n\t"
587 "str r4, [%[Data]], #4 \n"
588 : [Data] "+r" (Data), [D] "+r" (D)
589 : [V] "r" (V)
590 : "r0", "r1", "r2", "r3", "r4", "r5", "r6");
591 }
592 #else
593 // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C
594 for ( k = 0; k < 32; k++, D += 16, V++ )
595 {
596 *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31) + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31)
597 + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31) + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31)
598 + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31) + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31)
599 + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31) + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31);
600 Data += 1;
601 // total: 16 muls, 15 adds, 16 shifts
602 }
603 #endif
604 #endif
329} 605}
330 606
331static void Synthese_Filter_float_internal(MPC_SAMPLE_FORMAT * OutData,MPC_SAMPLE_FORMAT * V,const MPC_SAMPLE_FORMAT * Y) 607static void
608mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y)
332{ 609{
333 mpc_uint32_t n; 610 mpc_uint32_t n;
334 for ( n = 0; n < 36; n++, Y += 32 ) { 611
335 V -= 64; 612 if (NULL != OutData)
336 Calculate_New_V ( Y, V ); 613 {
337 if (OutData != NULL) 614 for ( n = 0; n < 36; n++, Y += 32, OutData += 32 )
338 { 615 {
339 MPC_SAMPLE_FORMAT * Data = OutData; 616 V -= 64;
340 const MPC_SAMPLE_FORMAT * D = (const MPC_SAMPLE_FORMAT *) &Di_opt; 617 mpc_calculate_new_V ( Y, V );
341 mpc_int32_t k; 618 mpc_decoder_windowing_D( OutData, V);
342 //mpc_int32_t tmp;
343
344
345
346 #if defined(CPU_COLDFIRE)
347 for ( k = 0; k < 32; k++, D += 16, V++ ) {
348 asm volatile (
349 "movem.l (%[D]), %%d0-%%d3 \n\t"
350 "move.l (%[V]), %%a5 \n\t"
351 "mac.l %%d0, %%a5, (96*4, %[V]), %%a5, %%acc0 \n\t"
352 "mac.l %%d1, %%a5, (128*4, %[V]), %%a5, %%acc0\n\t"
353 "mac.l %%d2, %%a5, (224*4, %[V]), %%a5, %%acc0\n\t"
354 "mac.l %%d3, %%a5, (256*4, %[V]), %%a5, %%acc0\n\t"
355 "movem.l (4*4, %[D]), %%d0-%%d3 \n\t"
356 "mac.l %%d0, %%a5, (352*4, %[V]), %%a5, %%acc0\n\t"
357 "mac.l %%d1, %%a5, (384*4, %[V]), %%a5, %%acc0\n\t"
358 "mac.l %%d2, %%a5, (480*4, %[V]), %%a5, %%acc0\n\t"
359 "mac.l %%d3, %%a5, (512*4, %[V]), %%a5, %%acc0\n\t"
360 "movem.l (8*4, %[D]), %%d0-%%d3 \n\t"
361 "mac.l %%d0, %%a5, (608*4, %[V]), %%a5, %%acc0\n\t"
362 "mac.l %%d1, %%a5, (640*4, %[V]), %%a5, %%acc0\n\t"
363 "mac.l %%d2, %%a5, (736*4, %[V]), %%a5, %%acc0\n\t"
364 "mac.l %%d3, %%a5, (768*4, %[V]), %%a5, %%acc0\n\t"
365 "movem.l (12*4, %[D]), %%d0-%%d3 \n\t"
366 "mac.l %%d0, %%a5, (864*4, %[V]), %%a5, %%acc0\n\t"
367 "mac.l %%d1, %%a5, (896*4, %[V]), %%a5, %%acc0\n\t"
368 "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t"
369 "mac.l %%d3, %%a5, %%acc0 \n\t"
370 "movclr.l %%acc0, %%d0 \n\t"
371 "move.l %%d0, (%[Data])+ \n"
372 : [Data] "+a" (Data)
373 : [V] "a" (V), [D] "a" (D)
374 : "d0", "d1", "d2", "d3", "a5");
375 #elif defined(CPU_ARM)
376 for ( k = 0; k < 32; k++, V++ ) {
377 asm volatile (
378 "ldmia %[D]!, { r0-r3 } \n\t"
379 "ldr r4, [%[V]] \n\t"
380 "smull r5, r6, r0, r4 \n\t"
381 "ldr r4, [%[V], #96*4] \n\t"
382 "smlal r5, r6, r1, r4 \n\t"
383 "ldr r4, [%[V], #128*4] \n\t"
384 "smlal r5, r6, r2, r4 \n\t"
385 "ldr r4, [%[V], #224*4] \n\t"
386 "smlal r5, r6, r3, r4 \n\t"
387
388 "ldmia %[D]!, { r0-r3 } \n\t"
389 "ldr r4, [%[V], #256*4] \n\t"
390 "smlal r5, r6, r0, r4 \n\t"
391 "ldr r4, [%[V], #352*4] \n\t"
392 "smlal r5, r6, r1, r4 \n\t"
393 "ldr r4, [%[V], #384*4] \n\t"
394 "smlal r5, r6, r2, r4 \n\t"
395 "ldr r4, [%[V], #480*4] \n\t"
396 "smlal r5, r6, r3, r4 \n\t"
397
398 "ldmia %[D]!, { r0-r3 } \n\t"
399 "ldr r4, [%[V], #512*4] \n\t"
400 "smlal r5, r6, r0, r4 \n\t"
401 "ldr r4, [%[V], #608*4] \n\t"
402 "smlal r5, r6, r1, r4 \n\t"
403 "ldr r4, [%[V], #640*4] \n\t"
404 "smlal r5, r6, r2, r4 \n\t"
405 "ldr r4, [%[V], #736*4] \n\t"
406 "smlal r5, r6, r3, r4 \n\t"
407
408 "ldmia %[D]!, { r0-r3 } \n\t"
409 "ldr r4, [%[V], #768*4] \n\t"
410 "smlal r5, r6, r0, r4 \n\t"
411 "ldr r4, [%[V], #864*4] \n\t"
412 "smlal r5, r6, r1, r4 \n\t"
413 "ldr r4, [%[V], #896*4] \n\t"
414 "smlal r5, r6, r2, r4 \n\t"
415 "ldr r4, [%[V], #992*4] \n\t"
416 "smlal r5, r6, r3, r4 \n\t"
417 "mov r4, r6, lsl #1 \n\t"
418 "orr r4, r4, r5, lsr #31\n\t"
419 "str r4, [%[Data]], #4 \n"
420 : [Data] "+r" (Data), [D] "+r" (D)
421 : [V] "r" (V)
422 : "r0", "r1", "r2", "r3", "r4", "r5", "r6");
423 #else
424 for ( k = 0; k < 32; k++, D += 16, V++ ) {
425 *Data = MPC_SHL(
426 MPC_MULTIPLY_FRACT(V[ 0],D[ 0]) + MPC_MULTIPLY_FRACT(V[ 96],D[ 1]) + MPC_MULTIPLY_FRACT(V[128],D[ 2]) + MPC_MULTIPLY_FRACT(V[224],D[ 3])
427 + MPC_MULTIPLY_FRACT(V[256],D[ 4]) + MPC_MULTIPLY_FRACT(V[352],D[ 5]) + MPC_MULTIPLY_FRACT(V[384],D[ 6]) + MPC_MULTIPLY_FRACT(V[480],D[ 7])
428 + MPC_MULTIPLY_FRACT(V[512],D[ 8]) + MPC_MULTIPLY_FRACT(V[608],D[ 9]) + MPC_MULTIPLY_FRACT(V[640],D[10]) + MPC_MULTIPLY_FRACT(V[736],D[11])
429 + MPC_MULTIPLY_FRACT(V[768],D[12]) + MPC_MULTIPLY_FRACT(V[864],D[13]) + MPC_MULTIPLY_FRACT(V[896],D[14]) + MPC_MULTIPLY_FRACT(V[992],D[15])
430 , 1);
431
432 Data += 1;
433 #endif
434 }
435 V -= 32;//bleh
436 OutData+=32;
437 } 619 }
438 } 620 }
439} 621}
440 622
441void 623void
442mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT* OutData) 624mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT *OutData)
443{ 625{
444 /********* left channel ********/ 626 /********* left channel ********/
445 memmove(d->V_L + MPC_V_MEM, d->V_L, 960 * sizeof(MPC_SAMPLE_FORMAT) ); 627 memmove(d->V_L + MPC_V_MEM, d->V_L, 960 * sizeof(MPC_SAMPLE_FORMAT) );
446 628
447 Synthese_Filter_float_internal( 629 mpc_full_synthesis_filter(
448 OutData, 630 OutData,
449 (MPC_SAMPLE_FORMAT *)(d->V_L + MPC_V_MEM), 631 (MPC_SAMPLE_FORMAT *)(d->V_L + MPC_V_MEM),
450 (MPC_SAMPLE_FORMAT *)(d->Y_L [0])); 632 (MPC_SAMPLE_FORMAT *)(d->Y_L [0]));
@@ -452,7 +634,7 @@ mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT* OutData)
452 /******** right channel ********/ 634 /******** right channel ********/
453 memmove(d->V_R + MPC_V_MEM, d->V_R, 960 * sizeof(MPC_SAMPLE_FORMAT) ); 635 memmove(d->V_R + MPC_V_MEM, d->V_R, 960 * sizeof(MPC_SAMPLE_FORMAT) );
454 636
455 Synthese_Filter_float_internal( 637 mpc_full_synthesis_filter(
456 (OutData == NULL ? NULL : OutData + MPC_FRAME_LENGTH), 638 (OutData == NULL ? NULL : OutData + MPC_FRAME_LENGTH),
457 (MPC_SAMPLE_FORMAT *)(d->V_R + MPC_V_MEM), 639 (MPC_SAMPLE_FORMAT *)(d->V_R + MPC_V_MEM),
458 (MPC_SAMPLE_FORMAT *)(d->Y_R [0])); 640 (MPC_SAMPLE_FORMAT *)(d->Y_R [0]));
diff --git a/apps/codecs/mpc.c b/apps/codecs/mpc.c
index 122cb22025..8143e04f71 100644
--- a/apps/codecs/mpc.c
+++ b/apps/codecs/mpc.c
@@ -64,8 +64,8 @@ mpc_bool_t canseek_impl(void *data)
64} 64}
65 65
66MPC_SAMPLE_FORMAT sample_buffer[MPC_DECODER_BUFFER_LENGTH] 66MPC_SAMPLE_FORMAT sample_buffer[MPC_DECODER_BUFFER_LENGTH]
67IBSS_ATTR_MPC_SAMPLE_BUF; 67 IBSS_ATTR_MPC_SAMPLE_BUF;
68mpc_uint32_t seek_table[10000]; 68mpc_uint32_t seek_table[10000];
69 69
70/* this is the codec entry point */ 70/* this is the codec entry point */
71enum codec_status codec_main(void) 71enum codec_status codec_main(void)