summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apps/codecs/libmusepack/Makefile8
-rw-r--r--apps/codecs/libmusepack/math.h350
-rw-r--r--apps/codecs/libmusepack/requant.c4
-rw-r--r--apps/codecs/libmusepack/synth_filter.c874
-rw-r--r--apps/codecs/mpc.c4
5 files changed, 723 insertions, 517 deletions
diff --git a/apps/codecs/libmusepack/Makefile b/apps/codecs/libmusepack/Makefile
index e50769719b..a70d33644a 100644
--- a/apps/codecs/libmusepack/Makefile
+++ b/apps/codecs/libmusepack/Makefile
@@ -14,7 +14,13 @@ ifdef APPEXTRA
14 INCLUDES += $(patsubst %,-I$(APPSDIR)/%,$(subst :, ,$(APPEXTRA))) 14 INCLUDES += $(patsubst %,-I$(APPSDIR)/%,$(subst :, ,$(APPEXTRA)))
15endif 15endif
16 16
17MUSEPACKOPTS = -O2 17# libmusepack is faster on ARM-targets with -O1 instead of -O2
18ifeq ($(CPU),arm)
19 MUSEPACKOPTS += -O1
20else
21 MUSEPACKOPTS += -O2
22endif
23
18CFLAGS = $(INCLUDES) $(GCCOPTS) $(TARGET_INC) $(MUSEPACKOPTS) $(TARGET) \ 24CFLAGS = $(INCLUDES) $(GCCOPTS) $(TARGET_INC) $(MUSEPACKOPTS) $(TARGET) \
19$(EXTRA_DEFINES) -DMEM=${MEMORYSIZE} $(PROFILE_OPTS) 25$(EXTRA_DEFINES) -DMEM=${MEMORYSIZE} $(PROFILE_OPTS)
20 26
diff --git a/apps/codecs/libmusepack/math.h b/apps/codecs/libmusepack/math.h
index f9e9d08b08..a015d45cbb 100644
--- a/apps/codecs/libmusepack/math.h
+++ b/apps/codecs/libmusepack/math.h
@@ -44,174 +44,192 @@
44 44
45#ifdef MPC_FIXED_POINT 45#ifdef MPC_FIXED_POINT
46 46
47 #ifdef _WIN32_WCE
48 #include <cmnintrin.h>
49 #define MPC_HAVE_MULHIGH
50 #endif
51
52 #define MPC_FIXED_POINT_SCALE_SHIFT (MPC_FIXED_POINT_SHIFT + MPC_FIXED_POINT_FRACTPART)
53 #define MPC_FIXED_POINT_SCALE (1 << (MPC_FIXED_POINT_SCALE_SHIFT - 1))
54 //in fixedpoint mode, results in decode output buffer are in -MPC_FIXED_POINT_SCALE ... MPC_FIXED_POINT_SCALE range
55
56 #define MPC_FIXED_POINT_FRACTPART 14
57 typedef mpc_int32_t MPC_SAMPLE_FORMAT;
58 typedef mpc_int64_t MPC_SAMPLE_FORMAT_MULTIPLY;
59
60 #define MAKE_MPC_SAMPLE(X) (MPC_SAMPLE_FORMAT)((double)(X) * (double)(((mpc_int64_t)1)<<MPC_FIXED_POINT_FRACTPART))
61 #define MAKE_MPC_SAMPLE_EX(X,Y) (MPC_SAMPLE_FORMAT)((double)(X) * (double)(((mpc_int64_t)1)<<(Y)))
62
63 #define MPC_SHR_RND(X, Y) ((X+(1<<(Y-1)))>>Y)
64
65 #if defined(CPU_COLDFIRE)
66
67 #define MPC_MULTIPLY(X,Y) mpc_multiply((X), (Y))
68 #define MPC_MULTIPLY_EX(X,Y,Z) mpc_multiply_ex((X), (Y), (Z))
69
70 static inline MPC_SAMPLE_FORMAT mpc_multiply(MPC_SAMPLE_FORMAT x,
71 MPC_SAMPLE_FORMAT y)
72 {
73 MPC_SAMPLE_FORMAT t1, t2;
74 asm volatile (
75 "mac.l %[x],%[y],%%acc0\n" /* multiply */
76 "mulu.l %[y],%[x] \n" /* get lower half, avoid emac stall */
77 "movclr.l %%acc0,%[t1] \n" /* get higher half */
78 "moveq.l #17,%[t2] \n"
79 "asl.l %[t2],%[t1] \n" /* hi <<= 17, plus one free */
80 "moveq.l #14,%[t2] \n"
81 "lsr.l %[t2],%[x] \n" /* (unsigned)lo >>= 14 */
82 "or.l %[x],%[t1] \n" /* combine result */
83 : /* outputs */
84 [t1]"=&d"(t1),
85 [t2]"=&d"(t2),
86 [x] "+d" (x)
87 : /* inputs */
88 [y] "d" (y)
89 );
90 return t1;
91 }
92
93 static inline MPC_SAMPLE_FORMAT mpc_multiply_ex(MPC_SAMPLE_FORMAT x,
94 MPC_SAMPLE_FORMAT y,
95 unsigned shift)
96 {
97 MPC_SAMPLE_FORMAT t1, t2;
98 asm volatile (
99 "mac.l %[x],%[y],%%acc0\n" /* multiply */
100 "mulu.l %[y],%[x] \n" /* get lower half, avoid emac stall */
101 "movclr.l %%acc0,%[t1] \n" /* get higher half */
102 "moveq.l #31,%[t2] \n"
103 "sub.l %[sh],%[t2] \n" /* t2 = 31 - shift */
104 "ble.s 1f \n"
105 "asl.l %[t2],%[t1] \n" /* hi <<= 31 - shift */
106 "lsr.l %[sh],%[x] \n" /* (unsigned)lo >>= shift */
107 "or.l %[x],%[t1] \n" /* combine result */
108 "bra.s 2f \n"
109 "1: \n"
110 "neg.l %[t2] \n" /* t2 = shift - 31 */
111 "asr.l %[t2],%[t1] \n" /* hi >>= t2 */
112 "2: \n"
113 : /* outputs */
114 [t1]"=&d"(t1),
115 [t2]"=&d"(t2),
116 [x] "+d" (x)
117 : /* inputs */
118 [y] "d" (y),
119 [sh]"d" (shift)
120 );
121 return t1;
122 }
123 #elif defined(CPU_ARM)
124 // borrowed and adapted from libMAD
125 #define MPC_MULTIPLY(X,Y) \
126 ({ \
127 MPC_SAMPLE_FORMAT low; \
128 MPC_SAMPLE_FORMAT high; \
129 asm volatile ( /* will calculate: result = (X*Y)>>14 */ \
130 "smull %0,%1,%2,%3 \n\t" /* multiply with result %0 [0..31], %1 [32..63] */ \
131 "mov %0, %0, lsr #14 \n\t" /* %0 = %0 >> 14 */ \
132 "orr %0, %0, %1, lsl #18 \n\t"/* result = %0 OR (%1 << 18) */ \
133 : "=&r"(low), "=&r" (high) \
134 : "r"(X),"r"(Y)); \
135 low; \
136 })
137
138 // borrowed and adapted from libMAD
139 #define MPC_MULTIPLY_EX(X,Y,Z) \
140 ({ \
141 MPC_SAMPLE_FORMAT low; \
142 MPC_SAMPLE_FORMAT high; \
143 asm volatile ( /* will calculate: result = (X*Y)>>Z */ \
144 "smull %0,%1,%2,%3 \n\t" /* multiply with result %0 [0..31], %1 [32..63] */ \
145 "mov %0, %0, lsr %4 \n\t" /* %0 = %0 >> Z */ \
146 "orr %0, %0, %1, lsl %5 \n\t" /* result = %0 OR (%1 << (32-Z)) */ \
147 : "=&r"(low), "=&r" (high) \
148 : "r"(X),"r"(Y),"r"(Z),"r"(32-Z)); \
149 low; \
150 })
151 #else /* libmusepack standard */
152
153 #define MPC_MULTIPLY_NOTRUNCATE(X,Y) \
154 (((MPC_SAMPLE_FORMAT_MULTIPLY)(X) * (MPC_SAMPLE_FORMAT_MULTIPLY)(Y)) >> MPC_FIXED_POINT_FRACTPART)
155
156 #define MPC_MULTIPLY_EX_NOTRUNCATE(X,Y,Z) \
157 (((MPC_SAMPLE_FORMAT_MULTIPLY)(X) * (MPC_SAMPLE_FORMAT_MULTIPLY)(Y)) >> (Z))
158
159 #ifdef _DEBUG
160 static inline MPC_SAMPLE_FORMAT MPC_MULTIPLY(MPC_SAMPLE_FORMAT item1,MPC_SAMPLE_FORMAT item2)
161 {
162 MPC_SAMPLE_FORMAT_MULTIPLY temp = MPC_MULTIPLY_NOTRUNCATE(item1,item2);
163 assert(temp == (MPC_SAMPLE_FORMAT_MULTIPLY)(MPC_SAMPLE_FORMAT)temp);
164 return (MPC_SAMPLE_FORMAT)temp;
165 }
166
167 static inline MPC_SAMPLE_FORMAT MPC_MULTIPLY_EX(MPC_SAMPLE_FORMAT item1,MPC_SAMPLE_FORMAT item2,unsigned shift)
168 {
169 MPC_SAMPLE_FORMAT_MULTIPLY temp = MPC_MULTIPLY_EX_NOTRUNCATE(item1,item2,shift);
170 assert(temp == (MPC_SAMPLE_FORMAT_MULTIPLY)(MPC_SAMPLE_FORMAT)temp);
171 return (MPC_SAMPLE_FORMAT)temp;
172 }
173 #else
174 #define MPC_MULTIPLY(X,Y) ((MPC_SAMPLE_FORMAT)MPC_MULTIPLY_NOTRUNCATE(X,Y))
175 #define MPC_MULTIPLY_EX(X,Y,Z) ((MPC_SAMPLE_FORMAT)MPC_MULTIPLY_EX_NOTRUNCATE(X,Y,Z))
176 #endif
177
178 #endif
179
180 #ifdef MPC_HAVE_MULHIGH
181 #define MPC_MULTIPLY_FRACT(X,Y) _MulHigh(X,Y)
182 #else
183 #if defined(CPU_COLDFIRE)
184 /* loses one bit of accuracy. The rest of the macros won't be as easy as this... */
185 #define MPC_MULTIPLY_FRACT(X,Y) \
186 ({ \
187 MPC_SAMPLE_FORMAT t; \
188 asm volatile ( \
189 "mac.l %[A], %[B], %%acc0\n\t" \
190 "movclr.l %%acc0, %[t]\n\t" \
191 "asr.l #1, %[t]\n\t" \
192 : [t] "=d" (t) \
193 : [A] "r" ((X)), [B] "r" ((Y))); \
194 t; \
195 })
196 #elif defined(CPU_ARM)
197 // borrowed and adapted from libMAD
198 #define MPC_MULTIPLY_FRACT(X,Y) \
199 ({ \
200 MPC_SAMPLE_FORMAT low; \
201 MPC_SAMPLE_FORMAT high; \
202 asm volatile ( /* will calculate: result = (X*Y)>>32 */ \
203 "smull %0,%1,%2,%3 \n\t" /* multiply with result %0 [0..31], %1 [32..63] */ \
204 : "=&r"(low), "=&r" (high) /* result = %1 [32..63], saves the >>32 */ \
205 : "r"(X),"r"(Y)); \
206 high; \
207 })
208 #else
209 #define MPC_MULTIPLY_FRACT(X,Y) MPC_MULTIPLY_EX(X,Y,32)
210 #endif
211 #endif
212
213 #define MPC_MAKE_FRACT_CONST(X) (MPC_SAMPLE_FORMAT)((X) * (double)(((mpc_int64_t)1)<<32) )
214
215 #define MPC_MULTIPLY_FLOAT_INT(X,Y) ((X)*(Y))
47 216
48#ifdef _WIN32_WCE
49
50#include <cmnintrin.h>
51
52#define MPC_HAVE_MULHIGH
53
54#endif
55
56
57#define MPC_FIXED_POINT_SCALE_SHIFT (MPC_FIXED_POINT_SHIFT + MPC_FIXED_POINT_FRACTPART)
58#define MPC_FIXED_POINT_SCALE (1 << (MPC_FIXED_POINT_SCALE_SHIFT - 1))
59
60
61//in fixedpoint mode, results in decode output buffer are in -MPC_FIXED_POINT_SCALE ... MPC_FIXED_POINT_SCALE range
62
63#define MPC_FIXED_POINT_FRACTPART 14
64typedef mpc_int32_t MPC_SAMPLE_FORMAT;
65
66typedef mpc_int64_t MPC_SAMPLE_FORMAT_MULTIPLY;
67
68#define MAKE_MPC_SAMPLE(X) (MPC_SAMPLE_FORMAT)((double)(X) * (double)(((mpc_int64_t)1)<<MPC_FIXED_POINT_FRACTPART))
69#define MAKE_MPC_SAMPLE_EX(X,Y) (MPC_SAMPLE_FORMAT)((double)(X) * (double)(((mpc_int64_t)1)<<(Y)))
70
71#if defined(CPU_COLDFIRE)
72
73#define MPC_MULTIPLY(X,Y) mpc_multiply((X), (Y))
74#define MPC_MULTIPLY_EX(X,Y,Z) mpc_multiply_ex((X), (Y), (Z))
75
76static inline MPC_SAMPLE_FORMAT mpc_multiply(MPC_SAMPLE_FORMAT x,
77 MPC_SAMPLE_FORMAT y)
78{
79 MPC_SAMPLE_FORMAT t1, t2;
80 asm volatile (
81 "mac.l %[x],%[y],%%acc0\n" /* multiply */
82 "mulu.l %[y],%[x] \n" /* get lower half, avoid emac stall */
83 "movclr.l %%acc0,%[t1] \n" /* get higher half */
84 "moveq.l #17,%[t2] \n"
85 "asl.l %[t2],%[t1] \n" /* hi <<= 17, plus one free */
86 "moveq.l #14,%[t2] \n"
87 "lsr.l %[t2],%[x] \n" /* (unsigned)lo >>= 14 */
88 "or.l %[x],%[t1] \n" /* combine result */
89 : /* outputs */
90 [t1]"=&d"(t1),
91 [t2]"=&d"(t2),
92 [x] "+d" (x)
93 : /* inputs */
94 [y] "d" (y)
95 );
96 return t1;
97}
98
99static inline MPC_SAMPLE_FORMAT mpc_multiply_ex(MPC_SAMPLE_FORMAT x,
100 MPC_SAMPLE_FORMAT y,
101 unsigned shift)
102{
103 MPC_SAMPLE_FORMAT t1, t2;
104 asm volatile (
105 "mac.l %[x],%[y],%%acc0\n" /* multiply */
106 "mulu.l %[y],%[x] \n" /* get lower half, avoid emac stall */
107 "movclr.l %%acc0,%[t1] \n" /* get higher half */
108 "moveq.l #31,%[t2] \n"
109 "sub.l %[sh],%[t2] \n" /* t2 = 31 - shift */
110 "ble.s 1f \n"
111 "asl.l %[t2],%[t1] \n" /* hi <<= 31 - shift */
112 "lsr.l %[sh],%[x] \n" /* (unsigned)lo >>= shift */
113 "or.l %[x],%[t1] \n" /* combine result */
114 "bra.s 2f \n"
115 "1: \n"
116 "neg.l %[t2] \n" /* t2 = shift - 31 */
117 "asr.l %[t2],%[t1] \n" /* hi >>= t2 */
118 "2: \n"
119 : /* outputs */
120 [t1]"=&d"(t1),
121 [t2]"=&d"(t2),
122 [x] "+d" (x)
123 : /* inputs */
124 [y] "d" (y),
125 [sh]"d" (shift)
126 );
127 return t1;
128}
129#else /* libmusepack standard */
130
131#define MPC_MULTIPLY_NOTRUNCATE(X,Y) \
132 (((MPC_SAMPLE_FORMAT_MULTIPLY)(X) * (MPC_SAMPLE_FORMAT_MULTIPLY)(Y)) >> MPC_FIXED_POINT_FRACTPART)
133
134#define MPC_MULTIPLY_EX_NOTRUNCATE(X,Y,Z) \
135 (((MPC_SAMPLE_FORMAT_MULTIPLY)(X) * (MPC_SAMPLE_FORMAT_MULTIPLY)(Y)) >> (Z))
136
137#ifdef _DEBUG
138static inline MPC_SAMPLE_FORMAT MPC_MULTIPLY(MPC_SAMPLE_FORMAT item1,MPC_SAMPLE_FORMAT item2)
139{
140 MPC_SAMPLE_FORMAT_MULTIPLY temp = MPC_MULTIPLY_NOTRUNCATE(item1,item2);
141 assert(temp == (MPC_SAMPLE_FORMAT_MULTIPLY)(MPC_SAMPLE_FORMAT)temp);
142 return (MPC_SAMPLE_FORMAT)temp;
143}
144
145static inline MPC_SAMPLE_FORMAT MPC_MULTIPLY_EX(MPC_SAMPLE_FORMAT item1,MPC_SAMPLE_FORMAT item2,unsigned shift)
146{
147 MPC_SAMPLE_FORMAT_MULTIPLY temp = MPC_MULTIPLY_EX_NOTRUNCATE(item1,item2,shift);
148 assert(temp == (MPC_SAMPLE_FORMAT_MULTIPLY)(MPC_SAMPLE_FORMAT)temp);
149 return (MPC_SAMPLE_FORMAT)temp;
150}
151#else
152#define MPC_MULTIPLY(X,Y) ((MPC_SAMPLE_FORMAT)MPC_MULTIPLY_NOTRUNCATE(X,Y))
153#define MPC_MULTIPLY_EX(X,Y,Z) ((MPC_SAMPLE_FORMAT)MPC_MULTIPLY_EX_NOTRUNCATE(X,Y,Z))
154#endif
155
156#endif
157
158#ifdef MPC_HAVE_MULHIGH
159#define MPC_MULTIPLY_FRACT(X,Y) _MulHigh(X,Y)
160#else
161#if defined(CPU_COLDFIRE)
162/* loses one bit of accuracy.
163 the rest of the macros won't be as easy as this... */
164#define MPC_MULTIPLY_FRACT(X,Y) \
165 ({ \
166 MPC_SAMPLE_FORMAT t; \
167 asm volatile ( \
168 "mac.l %[A], %[B], %%acc0\n\t" \
169 "movclr.l %%acc0, %[t]\n\t" \
170 "asr.l #1, %[t]\n\t" \
171 : [t] "=d" (t) \
172 : [A] "r" ((X)), [B] "r" ((Y))); \
173 t; \
174 })
175#else 217#else
176#define MPC_MULTIPLY_FRACT(X,Y) MPC_MULTIPLY_EX(X,Y,32) 218 //in floating-point mode, decoded samples are in -1...1 range
177#endif 219
178#endif 220 typedef float MPC_SAMPLE_FORMAT;
179 221
180#define MPC_MAKE_FRACT_CONST(X) (MPC_SAMPLE_FORMAT)((X) * (double)(((mpc_int64_t)1)<<32) ) 222 #define MAKE_MPC_SAMPLE(X) ((MPC_SAMPLE_FORMAT)(X))
181#define MPC_MULTIPLY_FRACT_CONST(X,Y) MPC_MULTIPLY_FRACT(X,MPC_MAKE_FRACT_CONST(Y)) 223 #define MAKE_MPC_SAMPLE_EX(X,Y) ((MPC_SAMPLE_FORMAT)(X))
182#define MPC_MULTIPLY_FRACT_CONST_FIX(X,Y,Z) ( MPC_MULTIPLY_FRACT(X,MPC_MAKE_FRACT_CONST( Y / (1<<(Z)) )) << (Z) ) 224
183#define MPC_MULTIPLY_FRACT_CONST_SHR(X,Y,Z) MPC_MULTIPLY_FRACT(X,MPC_MAKE_FRACT_CONST( Y / (1<<(Z)) )) 225 #define MPC_MULTIPLY_FRACT(X,Y) ((X)*(Y))
184 226 #define MPC_MAKE_FRACT_CONST(X) (X)
185#define MPC_MULTIPLY_FLOAT_INT(X,Y) ((X)*(Y)) 227
186#define MPC_SCALE_CONST(X,Y,Z) MPC_MULTIPLY_EX(X,MAKE_MPC_SAMPLE_EX(Y,Z),(Z)) 228 #define MPC_MULTIPLY_FLOAT_INT(X,Y) ((X)*(Y))
187#define MPC_SCALE_CONST_SHL(X,Y,Z,S) MPC_MULTIPLY_EX(X,MAKE_MPC_SAMPLE_EX(Y,Z),(Z)-(S)) 229 #define MPC_MULTIPLY(X,Y) ((X)*(Y))
188#define MPC_SCALE_CONST_SHR(X,Y,Z,S) MPC_MULTIPLY_EX(X,MAKE_MPC_SAMPLE_EX(Y,Z),(Z)+(S)) 230 #define MPC_MULTIPLY_EX(X,Y,Z) ((X)*(Y))
189#define MPC_SHR(X,Y) ((X)>>(Y)) 231
190#define MPC_SHL(X,Y) ((X)<<(Y)) 232 #define MPC_SHR_RND(X, Y) (X)
191
192#else
193
194//in floating-point mode, decoded samples are in -1...1 range
195
196typedef float MPC_SAMPLE_FORMAT;
197
198#define MAKE_MPC_SAMPLE(X) ((MPC_SAMPLE_FORMAT)(X))
199#define MAKE_MPC_SAMPLE_EX(X,Y) ((MPC_SAMPLE_FORMAT)(X))
200
201#define MPC_MULTIPLY_FRACT(X,Y) ((X)*(Y))
202#define MPC_MAKE_FRACT_CONST(X) (X)
203#define MPC_MULTIPLY_FRACT_CONST(X,Y) MPC_MULTPLY_FRACT(X,MPC_MAKE_FRACT_CONST(Y))
204#define MPC_MULTIPLY_FRACT_CONST_SHR(X,Y,Z) MPC_MULTIPLY_FRACT(X,MPC_MAKE_FRACT_CONST( Y ))
205#define MPC_MULTIPLY_FRACT_CONST_FIX(X,Y,Z) MPC_MULTIPLY_FRACT(X,MPC_MAKE_FRACT_CONST( Y ))
206
207#define MPC_MULTIPLY_FLOAT_INT(X,Y) ((X)*(Y))
208#define MPC_MULTIPLY(X,Y) ((X)*(Y))
209#define MPC_MULTIPLY_EX(X,Y,Z) ((X)*(Y))
210#define MPC_SCALE_CONST(X,Y,Z) ((X)*(Y))
211#define MPC_SCALE_CONST_SHL(X,Y,Z,S) ((X)*(Y))
212#define MPC_SCALE_CONST_SHR(X,Y,Z,S) ((X)*(Y))
213#define MPC_SHR(X,Y) (X)
214#define MPC_SHL(X,Y) (X)
215 233
216#endif 234#endif
217 235
diff --git a/apps/codecs/libmusepack/requant.c b/apps/codecs/libmusepack/requant.c
index a72a165786..6b77585d32 100644
--- a/apps/codecs/libmusepack/requant.c
+++ b/apps/codecs/libmusepack/requant.c
@@ -53,8 +53,8 @@ const mpc_uint32_t Res_bit [18] = {
53const MPC_SAMPLE_FORMAT __Cc [1 + 18] = { 53const MPC_SAMPLE_FORMAT __Cc [1 + 18] = {
54 _(111.285962475327f), // 32768/2/255*sqrt(3) 54 _(111.285962475327f), // 32768/2/255*sqrt(3)
55 _(65536.000000000000f), _(21845.333333333332f), _(13107.200000000001f), _(9362.285714285713f), 55 _(65536.000000000000f), _(21845.333333333332f), _(13107.200000000001f), _(9362.285714285713f),
56 _(7281.777777777777f), _(4369.066666666666f), _(2114.064516129032f), _(1040.253968253968f), 56 _(7281.777777777777f), _(4369.066666666666f), _(2114.064516129032f), _(1040.253968253968f),
57 _(516.031496062992f), _(257.003921568627f), _(128.250489236790f), _(64.062561094819f), 57 _(516.031496062992f), _(257.003921568627f), _(128.250489236790f), _(64.062561094819f),
58 _(32.015632633121f), _(16.003907203907f), _(8.000976681723f), _(4.000244155527f), 58 _(32.015632633121f), _(16.003907203907f), _(8.000976681723f), _(4.000244155527f),
59 _(2.000061037018f), _(1.000015259021f) 59 _(2.000061037018f), _(1.000015259021f)
60}; 60};
diff --git a/apps/codecs/libmusepack/synth_filter.c b/apps/codecs/libmusepack/synth_filter.c
index 0dabe59150..eca23804c5 100644
--- a/apps/codecs/libmusepack/synth_filter.c
+++ b/apps/codecs/libmusepack/synth_filter.c
@@ -39,19 +39,51 @@
39#include "musepack.h" 39#include "musepack.h"
40#include "internal.h" 40#include "internal.h"
41 41
42/* S E T T I N G S */
43// choose speed vs. accuracy for MPC_FIXED_POINT
44// speed-setting will increase decoding speed on ARM only (+20%), loss of accuracy equals about 5 dB SNR (15bit output precision)
45// to not use the speed-optimization -> comment OPTIMIZE_FOR_SPEED
46#if defined(MPC_FIXED_POINT)
47 #if defined(CPU_COLDFIRE)
48 // do nothing
49 #elif defined(CPU_ARM)
50 #define OPTIMIZE_FOR_SPEED
51 #else
52 #define OPTIMIZE_FOR_SPEED
53 #endif
54#else
55 // do nothing
56#endif
57
42/* C O N S T A N T S */ 58/* C O N S T A N T S */
43#undef _ 59#undef _
44 60
45#define MPC_FIXED_POINT_SYNTH_FIX 2 61#if defined(MPC_FIXED_POINT)
46 62 #if defined(OPTIMIZE_FOR_SPEED)
47#ifdef MPC_FIXED_POINT 63 // round to +/- 2^14 as pre-shift before 32=32x32-multiply
48#define _(value) MPC_MAKE_FRACT_CONST((double)value/(double)(0x40000)) 64 #define _(value) (MPC_SHR_RND(value, 3))
65
66 // round to +/- 2^17 as pre-shift before 32=32x32-multiply
67 #define MPC_V_PRESHIFT(X) MPC_SHR_RND(X, 14)
68 #else
69 // saturate to +/- 2^31 (= value << (31-17)), D-values are +/- 2^17
70 #define _(value) (value << (14))
71
72 // do not perform pre-shift
73 #define MPC_V_PRESHIFT(X) (X)
74 #endif
49#else 75#else
50#define _(value) MAKE_MPC_SAMPLE((double)value/(double)(0x10000)) 76 // IMPORTANT: internal scaling is somehow strange for floating point, therefore we scale the coefficients Di_opt
77 // by the correct amount to have proper scaled output
78 #define _(value) MAKE_MPC_SAMPLE((double)value*(double)(0x1000))
79
80 // do not perform pre-shift
81 #define MPC_V_PRESHIFT(X) (X)
51#endif 82#endif
52 83
53 84// Di_opt coefficients are +/- 2^17
54static const MPC_SAMPLE_FORMAT Di_opt [32] [16] ICONST_ATTR = { 85static const MPC_SAMPLE_FORMAT Di_opt [32] [16] ICONST_ATTR = {
86 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
55 { _( 0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _( 6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29) }, 87 { _( 0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _( 6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29) },
56 { _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _( 5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26) }, 88 { _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _( 5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26) },
57 { _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _( 5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24) }, 89 { _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _( 5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24) },
@@ -88,363 +120,513 @@ static const MPC_SAMPLE_FORMAT Di_opt [32] [16] ICONST_ATTR = {
88 120
89#undef _ 121#undef _
90 122
91static void Calculate_New_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V ) 123// V-coefficients were expanded (<<) by V_COEFFICIENT_EXPAND
124#define V_COEFFICIENT_EXPAND 27
125
126#if defined(MPC_FIXED_POINT)
127 #if defined(OPTIMIZE_FOR_SPEED)
128 // define 32=32x32-multiplication for DCT-coefficients with samples, vcoef will be pre-shifted on creation
129 // samples are rounded to +/- 2^19 as pre-shift before 32=32x32-multiply
130 #define MPC_MULTIPLY_V(sample, vcoef) ( MPC_SHR_RND(sample, 12) * vcoef )
131
132 // round to +/- 2^16 as pre-shift before 32=32x32-multiply
133 #define MPC_MAKE_INVCOS(value) (MPC_SHR_RND(value, 15))
134 #else
135 // define 64=32x32-multiplication for DCT-coefficients with samples. Via usage of MPC_FRACT highly optimized assembler might be used
136 // MULTIPLY_FRACT will do >>32 after multiplication, as V-coef were expanded by V_COEFFICIENT_EXPAND we'll correct this on the result.
137 // Will loose 5bit accuracy on result in fract part without effect on final audio result
138 #define MPC_MULTIPLY_V(sample, vcoef) ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND) )
139
140 // directly use accurate 32bit-coefficients
141 #define MPC_MAKE_INVCOS(value) (value)
142 #endif
143#else
144 // for floating point use the standard multiplication macro
145 #define MPC_MULTIPLY_V(sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
146
147 // downscale the accurate 32bit-coefficients and convert to float
148 #define MPC_MAKE_INVCOS(value) MAKE_MPC_SAMPLE((double)value/(double)(1<<V_COEFFICIENT_EXPAND))
149#endif
150
151// define constants for DCT-synthesis
152// INVCOSxx = (0.5 / cos(xx*PI/64)) << 27, <<27 to saturate to +/- 2^31
153#define INVCOS01 MPC_MAKE_INVCOS( 67189797)
154#define INVCOS02 MPC_MAKE_INVCOS( 67433575)
155#define INVCOS03 MPC_MAKE_INVCOS( 67843164)
156#define INVCOS04 MPC_MAKE_INVCOS( 68423604)
157#define INVCOS05 MPC_MAKE_INVCOS( 69182167)
158#define INVCOS06 MPC_MAKE_INVCOS( 70128577)
159#define INVCOS07 MPC_MAKE_INVCOS( 71275330)
160#define INVCOS08 MPC_MAKE_INVCOS( 72638111)
161#define INVCOS09 MPC_MAKE_INVCOS( 74236348)
162#define INVCOS10 MPC_MAKE_INVCOS( 76093940)
163#define INVCOS11 MPC_MAKE_INVCOS( 78240207)
164#define INVCOS12 MPC_MAKE_INVCOS( 80711144)
165#define INVCOS13 MPC_MAKE_INVCOS( 83551089)
166#define INVCOS14 MPC_MAKE_INVCOS( 86814950)
167#define INVCOS15 MPC_MAKE_INVCOS( 90571242)
168#define INVCOS16 MPC_MAKE_INVCOS( 94906266)
169#define INVCOS17 MPC_MAKE_INVCOS( 99929967)
170#define INVCOS18 MPC_MAKE_INVCOS( 105784321)
171#define INVCOS19 MPC_MAKE_INVCOS( 112655602)
172#define INVCOS20 MPC_MAKE_INVCOS( 120792764)
173#define INVCOS21 MPC_MAKE_INVCOS( 130535899)
174#define INVCOS22 MPC_MAKE_INVCOS( 142361749)
175#define INVCOS23 MPC_MAKE_INVCOS( 156959571)
176#define INVCOS24 MPC_MAKE_INVCOS( 175363913)
177#define INVCOS25 MPC_MAKE_INVCOS( 199201203)
178#define INVCOS26 MPC_MAKE_INVCOS( 231182936)
179#define INVCOS27 MPC_MAKE_INVCOS( 276190692)
180#define INVCOS28 MPC_MAKE_INVCOS( 343988688)
181#define INVCOS29 MPC_MAKE_INVCOS( 457361460)
182#define INVCOS30 MPC_MAKE_INVCOS( 684664578)
183#define INVCOS31 MPC_MAKE_INVCOS(1367679739)
184
185static inline void
186mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V )
92{ 187{
93 // Calculating new V-buffer values for left channel 188 // Calculating new V-buffer values for left channel
94 // calculate new V-values (ISO-11172-3, p. 39) 189 // calculate new V-values (ISO-11172-3, p. 39)
95 // based upon fast-MDCT algorithm by Byeong Gi Lee 190 // based upon fast-MDCT algorithm by Byeong Gi Lee
96 /*static*/ MPC_SAMPLE_FORMAT A00, A01, A02, A03, A04, A05, A06, A07, A08, A09, A10, A11, A12, A13, A14, A15; 191 MPC_SAMPLE_FORMAT A[16];
97 /*static*/ MPC_SAMPLE_FORMAT B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B11, B12, B13, B14, B15; 192 MPC_SAMPLE_FORMAT B[16];
98 MPC_SAMPLE_FORMAT tmp; 193 MPC_SAMPLE_FORMAT tmp;
99 194
100 A00 = Sample[ 0] + Sample[31]; 195 A[ 0] = Sample[ 0] + Sample[31];
101 A01 = Sample[ 1] + Sample[30]; 196 A[ 1] = Sample[ 1] + Sample[30];
102 A02 = Sample[ 2] + Sample[29]; 197 A[ 2] = Sample[ 2] + Sample[29];
103 A03 = Sample[ 3] + Sample[28]; 198 A[ 3] = Sample[ 3] + Sample[28];
104 A04 = Sample[ 4] + Sample[27]; 199 A[ 4] = Sample[ 4] + Sample[27];
105 A05 = Sample[ 5] + Sample[26]; 200 A[ 5] = Sample[ 5] + Sample[26];
106 A06 = Sample[ 6] + Sample[25]; 201 A[ 6] = Sample[ 6] + Sample[25];
107 A07 = Sample[ 7] + Sample[24]; 202 A[ 7] = Sample[ 7] + Sample[24];
108 A08 = Sample[ 8] + Sample[23]; 203 A[ 8] = Sample[ 8] + Sample[23];
109 A09 = Sample[ 9] + Sample[22]; 204 A[ 9] = Sample[ 9] + Sample[22];
110 A10 = Sample[10] + Sample[21]; 205 A[10] = Sample[10] + Sample[21];
111 A11 = Sample[11] + Sample[20]; 206 A[11] = Sample[11] + Sample[20];
112 A12 = Sample[12] + Sample[19]; 207 A[12] = Sample[12] + Sample[19];
113 A13 = Sample[13] + Sample[18]; 208 A[13] = Sample[13] + Sample[18];
114 A14 = Sample[14] + Sample[17]; 209 A[14] = Sample[14] + Sample[17];
115 A15 = Sample[15] + Sample[16]; 210 A[15] = Sample[15] + Sample[16];
116 211 // 16 adds
117 B00 = A00 + A15; 212
118 B01 = A01 + A14; 213 B[ 0] = A[ 0] + A[15];
119 B02 = A02 + A13; 214 B[ 1] = A[ 1] + A[14];
120 B03 = A03 + A12; 215 B[ 2] = A[ 2] + A[13];
121 B04 = A04 + A11; 216 B[ 3] = A[ 3] + A[12];
122 B05 = A05 + A10; 217 B[ 4] = A[ 4] + A[11];
123 B06 = A06 + A09; 218 B[ 5] = A[ 5] + A[10];
124 B07 = A07 + A08;; 219 B[ 6] = A[ 6] + A[ 9];
125 B08 = MPC_SCALE_CONST((A00 - A15) , 0.5024192929f , 31); 220 B[ 7] = A[ 7] + A[ 8];;
126 B09 = MPC_SCALE_CONST((A01 - A14) , 0.5224986076f , 31); 221 B[ 8] = MPC_MULTIPLY_V((A[ 0] - A[15]), INVCOS02);
127 B10 = MPC_SCALE_CONST((A02 - A13) , 0.5669440627f , 31); 222 B[ 9] = MPC_MULTIPLY_V((A[ 1] - A[14]), INVCOS06);
128 B11 = MPC_SCALE_CONST((A03 - A12) , 0.6468217969f , 31); 223 B[10] = MPC_MULTIPLY_V((A[ 2] - A[13]), INVCOS10);
129 B12 = MPC_SCALE_CONST((A04 - A11) , 0.7881546021f , 31); 224 B[11] = MPC_MULTIPLY_V((A[ 3] - A[12]), INVCOS14);
130 B13 = MPC_SCALE_CONST((A05 - A10) , 1.0606776476f , 30); 225 B[12] = MPC_MULTIPLY_V((A[ 4] - A[11]), INVCOS18);
131 B14 = MPC_SCALE_CONST((A06 - A09) , 1.7224471569f , 30); 226 B[13] = MPC_MULTIPLY_V((A[ 5] - A[10]), INVCOS22);
132 B15 = MPC_SCALE_CONST((A07 - A08) , 5.1011486053f , 28); 227 B[14] = MPC_MULTIPLY_V((A[ 6] - A[ 9]), INVCOS26);
133 228 B[15] = MPC_MULTIPLY_V((A[ 7] - A[ 8]), INVCOS30);
134 A00 = B00 + B07; 229 // 8 adds, 8 subs, 8 muls, 8 shifts
135 A01 = B01 + B06; 230
136 A02 = B02 + B05; 231 A[ 0] = B[ 0] + B[ 7];
137 A03 = B03 + B04; 232 A[ 1] = B[ 1] + B[ 6];
138 A04 = MPC_SCALE_CONST((B00 - B07) , 0.5097956061f , 31); 233 A[ 2] = B[ 2] + B[ 5];
139 A05 = MPC_SCALE_CONST((B01 - B06) , 0.6013448834f , 31); 234 A[ 3] = B[ 3] + B[ 4];
140 A06 = MPC_SCALE_CONST((B02 - B05) , 0.8999761939f , 31); 235 A[ 4] = MPC_MULTIPLY_V((B[ 0] - B[ 7]), INVCOS04);
141 A07 = MPC_SCALE_CONST((B03 - B04) , 2.5629155636f , 29); 236 A[ 5] = MPC_MULTIPLY_V((B[ 1] - B[ 6]), INVCOS12);
142 A08 = B08 + B15; 237 A[ 6] = MPC_MULTIPLY_V((B[ 2] - B[ 5]), INVCOS20);
143 A09 = B09 + B14; 238 A[ 7] = MPC_MULTIPLY_V((B[ 3] - B[ 4]), INVCOS28);
144 A10 = B10 + B13; 239 A[ 8] = B[ 8] + B[15];
145 A11 = B11 + B12; 240 A[ 9] = B[ 9] + B[14];
146 A12 = MPC_SCALE_CONST((B08 - B15) , 0.5097956061f , 31); 241 A[10] = B[10] + B[13];
147 A13 = MPC_SCALE_CONST((B09 - B14) , 0.6013448834f , 31); 242 A[11] = B[11] + B[12];
148 A14 = MPC_SCALE_CONST((B10 - B13) , 0.8999761939f , 31); 243 A[12] = MPC_MULTIPLY_V((B[ 8] - B[15]), INVCOS04);
149 A15 = MPC_SCALE_CONST((B11 - B12) , 2.5629155636f , 29); 244 A[13] = MPC_MULTIPLY_V((B[ 9] - B[14]), INVCOS12);
150 245 A[14] = MPC_MULTIPLY_V((B[10] - B[13]), INVCOS20);
151 B00 = A00 + A03; 246 A[15] = MPC_MULTIPLY_V((B[11] - B[12]), INVCOS28);
152 B01 = A01 + A02; 247 // 8 adds, 8 subs, 8 muls, 8 shifts
153 B02 = MPC_MULTIPLY_FRACT_CONST_FIX((A00 - A03) , 0.5411961079f , 1); 248
154 B03 = MPC_MULTIPLY_FRACT_CONST_FIX((A01 - A02) , 1.3065630198f , 2); 249 B[ 0] = A[ 0] + A[ 3];
155 B04 = A04 + A07; 250 B[ 1] = A[ 1] + A[ 2];
156 B05 = A05 + A06; 251 B[ 2] = MPC_MULTIPLY_V((A[ 0] - A[ 3]), INVCOS08);
157 B06 = MPC_MULTIPLY_FRACT_CONST_FIX((A04 - A07) , 0.5411961079f , 1); 252 B[ 3] = MPC_MULTIPLY_V((A[ 1] - A[ 2]), INVCOS24);
158 B07 = MPC_MULTIPLY_FRACT_CONST_FIX((A05 - A06) , 1.3065630198f , 2); 253 B[ 4] = A[ 4] + A[ 7];
159 B08 = A08 + A11; 254 B[ 5] = A[ 5] + A[ 6];
160 B09 = A09 + A10; 255 B[ 6] = MPC_MULTIPLY_V((A[ 4] - A[ 7]), INVCOS08);
161 B10 = MPC_MULTIPLY_FRACT_CONST_FIX((A08 - A11) , 0.5411961079f , 1); 256 B[ 7] = MPC_MULTIPLY_V((A[ 5] - A[ 6]), INVCOS24);
162 B11 = MPC_MULTIPLY_FRACT_CONST_FIX((A09 - A10) , 1.3065630198f , 2); 257 B[ 8] = A[ 8] + A[11];
163 B12 = A12 + A15; 258 B[ 9] = A[ 9] + A[10];
164 B13 = A13 + A14; 259 B[10] = MPC_MULTIPLY_V((A[ 8] - A[11]), INVCOS08);
165 B14 = MPC_MULTIPLY_FRACT_CONST_FIX((A12 - A15) , 0.5411961079f , 1); 260 B[11] = MPC_MULTIPLY_V((A[ 9] - A[10]), INVCOS24);
166 B15 = MPC_MULTIPLY_FRACT_CONST_FIX((A13 - A14) , 1.3065630198f , 2); 261 B[12] = A[12] + A[15];
167 262 B[13] = A[13] + A[14];
168 A00 = B00 + B01; 263 B[14] = MPC_MULTIPLY_V((A[12] - A[15]), INVCOS08);
169 A01 = MPC_MULTIPLY_FRACT_CONST_FIX((B00 - B01) , 0.7071067691f , 1); 264 B[15] = MPC_MULTIPLY_V((A[13] - A[14]), INVCOS24);
170 A02 = B02 + B03; 265 // 8 adds, 8 subs, 8 muls, 8 shifts
171 A03 = MPC_MULTIPLY_FRACT_CONST_FIX((B02 - B03) , 0.7071067691f , 1); 266
172 A04 = B04 + B05; 267 A[ 0] = B[ 0] + B[ 1];
173 A05 = MPC_MULTIPLY_FRACT_CONST_FIX((B04 - B05) , 0.7071067691f , 1); 268 A[ 1] = MPC_MULTIPLY_V((B[ 0] - B[ 1]), INVCOS16);
174 A06 = B06 + B07; 269 A[ 2] = B[ 2] + B[ 3];
175 A07 = MPC_MULTIPLY_FRACT_CONST_FIX((B06 - B07) , 0.7071067691f , 1); 270 A[ 3] = MPC_MULTIPLY_V((B[ 2] - B[ 3]), INVCOS16);
176 A08 = B08 + B09; 271 A[ 4] = B[ 4] + B[ 5];
177 A09 = MPC_MULTIPLY_FRACT_CONST_FIX((B08 - B09) , 0.7071067691f , 1); 272 A[ 5] = MPC_MULTIPLY_V((B[ 4] - B[ 5]), INVCOS16);
178 A10 = B10 + B11; 273 A[ 6] = B[ 6] + B[ 7];
179 A11 = MPC_MULTIPLY_FRACT_CONST_FIX((B10 - B11) , 0.7071067691f , 1); 274 A[ 7] = MPC_MULTIPLY_V((B[ 6] - B[ 7]), INVCOS16);
180 A12 = B12 + B13; 275 A[ 8] = B[ 8] + B[ 9];
181 A13 = MPC_MULTIPLY_FRACT_CONST_FIX((B12 - B13) , 0.7071067691f , 1); 276 A[ 9] = MPC_MULTIPLY_V((B[ 8] - B[ 9]), INVCOS16);
182 A14 = B14 + B15; 277 A[10] = B[10] + B[11];
183 A15 = MPC_MULTIPLY_FRACT_CONST_FIX((B14 - B15) , 0.7071067691f , 1); 278 A[11] = MPC_MULTIPLY_V((B[10] - B[11]), INVCOS16);
184 279 A[12] = B[12] + B[13];
185 V[48] = -A00; 280 A[13] = MPC_MULTIPLY_V((B[12] - B[13]), INVCOS16);
186 V[ 0] = A01; 281 A[14] = B[14] + B[15];
187 V[40] = -A02 - (V[ 8] = A03); 282 A[15] = MPC_MULTIPLY_V((B[14] - B[15]), INVCOS16);
188 V[36] = -((V[ 4] = A05 + (V[12] = A07)) + A06); 283 // 8 adds, 8 subs, 8 muls, 8 shifts
189 V[44] = - A04 - A06 - A07; 284
190 V[ 6] = (V[10] = A11 + (V[14] = A15)) + A13; 285 // multiple used expressions: -(A[12] + A[14] + A[15])
191 V[38] = (V[34] = -(V[ 2] = A09 + A13 + A15) - A14) + A09 - A10 - A11; 286 V[48] = -A[ 0];
192 V[46] = (tmp = -(A12 + A14 + A15)) - A08; 287 V[ 0] = A[ 1];
193 V[42] = tmp - A10 - A11; 288 V[40] = -A[ 2] - (V[ 8] = A[ 3]);
194 289 V[36] = -((V[ 4] = A[ 5] + (V[12] = A[ 7])) + A[ 6]);
195 A00 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 0] - Sample[31]) , 0.5006030202f , MPC_FIXED_POINT_SYNTH_FIX); 290 V[44] = - A[ 4] - A[ 6] - A[ 7];
196 A01 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 1] - Sample[30]) , 0.5054709315f , MPC_FIXED_POINT_SYNTH_FIX); 291 V[ 6] = (V[10] = A[11] + (V[14] = A[15])) + A[13];
197 A02 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 2] - Sample[29]) , 0.5154473186f , MPC_FIXED_POINT_SYNTH_FIX); 292 V[38] = (V[34] = -(V[ 2] = A[ 9] + A[13] + A[15]) - A[14]) + A[ 9] - A[10] - A[11];
198 A03 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 3] - Sample[28]) , 0.5310425758f , MPC_FIXED_POINT_SYNTH_FIX); 293 V[46] = (tmp = -(A[12] + A[14] + A[15])) - A[ 8];
199 A04 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 4] - Sample[27]) , 0.5531039238f , MPC_FIXED_POINT_SYNTH_FIX); 294 V[42] = tmp - A[10] - A[11];
200 A05 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 5] - Sample[26]) , 0.5829349756f , MPC_FIXED_POINT_SYNTH_FIX); 295 // 9 adds, 9 subs
201 A06 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 6] - Sample[25]) , 0.6225041151f , MPC_FIXED_POINT_SYNTH_FIX); 296
202 A07 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 7] - Sample[24]) , 0.6748083234f , MPC_FIXED_POINT_SYNTH_FIX); 297 A[ 0] = MPC_MULTIPLY_V((Sample[ 0] - Sample[31]), INVCOS01);
203 A08 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 8] - Sample[23]) , 0.7445362806f , MPC_FIXED_POINT_SYNTH_FIX); 298 A[ 1] = MPC_MULTIPLY_V((Sample[ 1] - Sample[30]), INVCOS03);
204 A09 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[ 9] - Sample[22]) , 0.8393496275f , MPC_FIXED_POINT_SYNTH_FIX); 299 A[ 2] = MPC_MULTIPLY_V((Sample[ 2] - Sample[29]), INVCOS05);
205 A10 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[10] - Sample[21]) , 0.9725682139f , MPC_FIXED_POINT_SYNTH_FIX); 300 A[ 3] = MPC_MULTIPLY_V((Sample[ 3] - Sample[28]), INVCOS07);
206#if MPC_FIXED_POINT_SYNTH_FIX>=2 301 A[ 4] = MPC_MULTIPLY_V((Sample[ 4] - Sample[27]), INVCOS09);
207 A11 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[11] - Sample[20]) , 1.1694399118f , MPC_FIXED_POINT_SYNTH_FIX); 302 A[ 5] = MPC_MULTIPLY_V((Sample[ 5] - Sample[26]), INVCOS11);
208 A12 = MPC_MULTIPLY_FRACT_CONST_SHR((Sample[12] - Sample[19]) , 1.4841645956f , MPC_FIXED_POINT_SYNTH_FIX); 303 A[ 6] = MPC_MULTIPLY_V((Sample[ 6] - Sample[25]), INVCOS13);
209#else 304 A[ 7] = MPC_MULTIPLY_V((Sample[ 7] - Sample[24]), INVCOS15);
210 A11 = MPC_SCALE_CONST_SHR ((Sample[11] - Sample[20]) , 1.1694399118f , 30, MPC_FIXED_POINT_SYNTH_FIX); 305 A[ 8] = MPC_MULTIPLY_V((Sample[ 8] - Sample[23]), INVCOS17);
211 A12 = MPC_SCALE_CONST_SHR ((Sample[12] - Sample[19]) , 1.4841645956f , 30, MPC_FIXED_POINT_SYNTH_FIX); 306 A[ 9] = MPC_MULTIPLY_V((Sample[ 9] - Sample[22]), INVCOS19);
212#endif 307 A[10] = MPC_MULTIPLY_V((Sample[10] - Sample[21]), INVCOS21);
213 A13 = MPC_SCALE_CONST_SHR ((Sample[13] - Sample[18]) , 2.0577809811f , 29, MPC_FIXED_POINT_SYNTH_FIX); 308 A[11] = MPC_MULTIPLY_V((Sample[11] - Sample[20]), INVCOS23);
214 A14 = MPC_SCALE_CONST_SHR ((Sample[14] - Sample[17]) , 3.4076085091f , 29, MPC_FIXED_POINT_SYNTH_FIX); 309 A[12] = MPC_MULTIPLY_V((Sample[12] - Sample[19]), INVCOS25);
215 A15 = MPC_SCALE_CONST_SHR ((Sample[15] - Sample[16]) , 10.1900081635f, 27 ,MPC_FIXED_POINT_SYNTH_FIX); 310 A[13] = MPC_MULTIPLY_V((Sample[13] - Sample[18]), INVCOS27);
216 311 A[14] = MPC_MULTIPLY_V((Sample[14] - Sample[17]), INVCOS29);
217 B00 = A00 + A15; 312 A[15] = MPC_MULTIPLY_V((Sample[15] - Sample[16]), INVCOS31);
218 B01 = A01 + A14; 313 // 16 subs, 16 muls, 16 shifts
219 B02 = A02 + A13; 314
220 B03 = A03 + A12; 315 B[ 0] = A[ 0] + A[15];
221 B04 = A04 + A11; 316 B[ 1] = A[ 1] + A[14];
222 B05 = A05 + A10; 317 B[ 2] = A[ 2] + A[13];
223 B06 = A06 + A09; 318 B[ 3] = A[ 3] + A[12];
224 B07 = A07 + A08; 319 B[ 4] = A[ 4] + A[11];
225 B08 = MPC_SCALE_CONST((A00 - A15) , 0.5024192929f , 31); 320 B[ 5] = A[ 5] + A[10];
226 B09 = MPC_SCALE_CONST((A01 - A14) , 0.5224986076f , 31); 321 B[ 6] = A[ 6] + A[ 9];
227 B10 = MPC_SCALE_CONST((A02 - A13) , 0.5669440627f , 31); 322 B[ 7] = A[ 7] + A[ 8];
228 B11 = MPC_SCALE_CONST((A03 - A12) , 0.6468217969f , 31); 323 B[ 8] = MPC_MULTIPLY_V((A[ 0] - A[15]), INVCOS02);
229 B12 = MPC_SCALE_CONST((A04 - A11) , 0.7881546021f , 31); 324 B[ 9] = MPC_MULTIPLY_V((A[ 1] - A[14]), INVCOS06);
230 B13 = MPC_SCALE_CONST((A05 - A10) , 1.0606776476f , 30); 325 B[10] = MPC_MULTIPLY_V((A[ 2] - A[13]), INVCOS10);
231 B14 = MPC_SCALE_CONST((A06 - A09) , 1.7224471569f , 30); 326 B[11] = MPC_MULTIPLY_V((A[ 3] - A[12]), INVCOS14);
232 B15 = MPC_SCALE_CONST((A07 - A08) , 5.1011486053f , 28); 327 B[12] = MPC_MULTIPLY_V((A[ 4] - A[11]), INVCOS18);
233 328 B[13] = MPC_MULTIPLY_V((A[ 5] - A[10]), INVCOS22);
234 A00 = B00 + B07; 329 B[14] = MPC_MULTIPLY_V((A[ 6] - A[ 9]), INVCOS26);
235 A01 = B01 + B06; 330 B[15] = MPC_MULTIPLY_V((A[ 7] - A[ 8]), INVCOS30);
236 A02 = B02 + B05; 331 // 8 adds, 8 subs, 8 muls, 8 shift
237 A03 = B03 + B04; 332
238 A04 = MPC_SCALE_CONST((B00 - B07) , 0.5097956061f , 31); 333 A[ 0] = B[ 0] + B[ 7];
239 A05 = MPC_SCALE_CONST((B01 - B06) , 0.6013448834f , 31); 334 A[ 1] = B[ 1] + B[ 6];
240 A06 = MPC_SCALE_CONST((B02 - B05) , 0.8999761939f , 31); 335 A[ 2] = B[ 2] + B[ 5];
241 A07 = MPC_SCALE_CONST((B03 - B04) , 2.5629155636f , 29); 336 A[ 3] = B[ 3] + B[ 4];
242 A08 = B08 + B15; 337 A[ 4] = MPC_MULTIPLY_V((B[ 0] - B[ 7]), INVCOS04);
243 A09 = B09 + B14; 338 A[ 5] = MPC_MULTIPLY_V((B[ 1] - B[ 6]), INVCOS12);
244 A10 = B10 + B13; 339 A[ 6] = MPC_MULTIPLY_V((B[ 2] - B[ 5]), INVCOS20);
245 A11 = B11 + B12; 340 A[ 7] = MPC_MULTIPLY_V((B[ 3] - B[ 4]), INVCOS28);
246 A12 = MPC_SCALE_CONST((B08 - B15) , 0.5097956061f , 31); 341 A[ 8] = B[ 8] + B[15];
247 A13 = MPC_SCALE_CONST((B09 - B14) , 0.6013448834f , 31); 342 A[ 9] = B[ 9] + B[14];
248 A14 = MPC_SCALE_CONST((B10 - B13) , 0.8999761939f , 31); 343 A[10] = B[10] + B[13];
249 A15 = MPC_SCALE_CONST((B11 - B12) , 2.5629155636f , 29); 344 A[11] = B[11] + B[12];
250 345 A[12] = MPC_MULTIPLY_V((B[ 8] - B[15]), INVCOS04);
251 B00 = A00 + A03; 346 A[13] = MPC_MULTIPLY_V((B[ 9] - B[14]), INVCOS12);
252 B01 = A01 + A02; 347 A[14] = MPC_MULTIPLY_V((B[10] - B[13]), INVCOS20);
253 B02 = MPC_SCALE_CONST((A00 - A03) , 0.5411961079f , 31); 348 A[15] = MPC_MULTIPLY_V((B[11] - B[12]), INVCOS28);
254 B03 = MPC_SCALE_CONST((A01 - A02) , 1.3065630198f , 30); 349 // 8 adds, 8 subs, 8 muls, 8 shift
255 B04 = A04 + A07; 350
256 B05 = A05 + A06; 351 B[ 0] = A[ 0] + A[ 3];
257 B06 = MPC_SCALE_CONST((A04 - A07) , 0.5411961079f , 31); 352 B[ 1] = A[ 1] + A[ 2];
258 B07 = MPC_SCALE_CONST((A05 - A06) , 1.3065630198f , 30); 353 B[ 2] = MPC_MULTIPLY_V((A[ 0] - A[ 3]), INVCOS08);
259 B08 = A08 + A11; 354 B[ 3] = MPC_MULTIPLY_V((A[ 1] - A[ 2]), INVCOS24);
260 B09 = A09 + A10; 355 B[ 4] = A[ 4] + A[ 7];
261 B10 = MPC_SCALE_CONST((A08 - A11) , 0.5411961079f , 31); 356 B[ 5] = A[ 5] + A[ 6];
262 B11 = MPC_SCALE_CONST((A09 - A10) , 1.3065630198f , 30); 357 B[ 6] = MPC_MULTIPLY_V((A[ 4] - A[ 7]), INVCOS08);
263 B12 = A12 + A15; 358 B[ 7] = MPC_MULTIPLY_V((A[ 5] - A[ 6]), INVCOS24);
264 B13 = A13 + A14; 359 B[ 8] = A[ 8] + A[11];
265 B14 = MPC_SCALE_CONST((A12 - A15) , 0.5411961079f , 31); 360 B[ 9] = A[ 9] + A[10];
266 B15 = MPC_SCALE_CONST((A13 - A14) , 1.3065630198f , 30); 361 B[10] = MPC_MULTIPLY_V((A[ 8] - A[11]), INVCOS08);
267 362 B[11] = MPC_MULTIPLY_V((A[ 9] - A[10]), INVCOS24);
268 A00 = MPC_SHL(B00 + B01, MPC_FIXED_POINT_SYNTH_FIX); 363 B[12] = A[12] + A[15];
269 A01 = MPC_SCALE_CONST_SHL((B00 - B01) , 0.7071067691f , 31, MPC_FIXED_POINT_SYNTH_FIX); 364 B[13] = A[13] + A[14];
270 A02 = MPC_SHL(B02 + B03, MPC_FIXED_POINT_SYNTH_FIX); 365 B[14] = MPC_MULTIPLY_V((A[12] - A[15]), INVCOS08);
271 A03 = MPC_SCALE_CONST_SHL((B02 - B03) , 0.7071067691f , 31, MPC_FIXED_POINT_SYNTH_FIX); 366 B[15] = MPC_MULTIPLY_V((A[13] - A[14]), INVCOS24);
272 A04 = MPC_SHL(B04 + B05, MPC_FIXED_POINT_SYNTH_FIX); 367 // 8 adds, 8 subs, 8 muls, 8 shift
273 A05 = MPC_SCALE_CONST_SHL((B04 - B05) , 0.7071067691f , 31, MPC_FIXED_POINT_SYNTH_FIX); 368
274 A06 = MPC_SHL(B06 + B07, MPC_FIXED_POINT_SYNTH_FIX); 369 A[ 0] = B[ 0] + B[ 1];
275 A07 = MPC_SCALE_CONST_SHL((B06 - B07) , 0.7071067691f , 31, MPC_FIXED_POINT_SYNTH_FIX); 370 A[ 1] = MPC_MULTIPLY_V((B[ 0] - B[ 1]), INVCOS16);
276 A08 = MPC_SHL(B08 + B09, MPC_FIXED_POINT_SYNTH_FIX); 371 A[ 2] = B[ 2] + B[ 3];
277 A09 = MPC_SCALE_CONST_SHL((B08 - B09) , 0.7071067691f , 31, MPC_FIXED_POINT_SYNTH_FIX); 372 A[ 3] = MPC_MULTIPLY_V((B[ 2] - B[ 3]), INVCOS16);
278 A10 = MPC_SHL(B10 + B11, MPC_FIXED_POINT_SYNTH_FIX); 373 A[ 4] = B[ 4] + B[ 5];
279 A11 = MPC_SCALE_CONST_SHL((B10 - B11) , 0.7071067691f , 31, MPC_FIXED_POINT_SYNTH_FIX); 374 A[ 5] = MPC_MULTIPLY_V((B[ 4] - B[ 5]), INVCOS16);
280 A12 = MPC_SHL(B12 + B13, MPC_FIXED_POINT_SYNTH_FIX); 375 A[ 6] = B[ 6] + B[ 7];
281 A13 = MPC_SCALE_CONST_SHL((B12 - B13) , 0.7071067691f , 31, MPC_FIXED_POINT_SYNTH_FIX); 376 A[ 7] = MPC_MULTIPLY_V((B[ 6] - B[ 7]), INVCOS16);
282 A14 = MPC_SHL(B14 + B15, MPC_FIXED_POINT_SYNTH_FIX); 377 A[ 8] = B[ 8] + B[ 9];
283 A15 = MPC_SCALE_CONST_SHL((B14 - B15) , 0.7071067691f , 31, MPC_FIXED_POINT_SYNTH_FIX); 378 A[ 9] = MPC_MULTIPLY_V((B[ 8] - B[ 9]), INVCOS16);
284 379 A[10] = B[10] + B[11];
285 // mehrfach verwendete Ausdrücke: A04+A06+A07, A09+A13+A15 380 A[11] = MPC_MULTIPLY_V((B[10] - B[11]), INVCOS16);
286 V[ 5] = (V[11] = (V[13] = A07 + (V[15] = A15)) + A11) + A05 + A13; 381 A[12] = B[12] + B[13];
287 V[ 7] = (V[ 9] = A03 + A11 + A15) + A13; 382 A[13] = MPC_MULTIPLY_V((B[12] - B[13]), INVCOS16);
288 V[33] = -(V[ 1] = A01 + A09 + A13 + A15) - A14; 383 A[14] = B[14] + B[15];
289 V[35] = -(V[ 3] = A05 + A07 + A09 + A13 + A15) - A06 - A14; 384 A[15] = MPC_MULTIPLY_V((B[14] - B[15]), INVCOS16);
290 V[37] = (tmp = -(A10 + A11 + A13 + A14 + A15)) - A05 - A06 - A07; 385 // 8 adds, 8 subs, 8 muls, 8 shift
291 V[39] = tmp - A02 - A03; // abhängig vom Befehl drüber 386
292 V[41] = (tmp += A13 - A12) - A02 - A03; // abhängig vom Befehl 2 drüber 387 // multiple used expressions: A[ 4]+A[ 6]+A[ 7], A[ 9]+A[13]+A[15]
293 V[43] = tmp - A04 - A06 - A07; // abhängig von Befehlen 1 und 3 drüber 388 V[ 5] = (V[11] = (V[13] = A[ 7] + (V[15] = A[15])) + A[11]) + A[ 5] + A[13];
294 V[47] = (tmp = -(A08 + A12 + A14 + A15)) - A00; 389 V[ 7] = (V[ 9] = A[ 3] + A[11] + A[15]) + A[13];
295 V[45] = tmp - A04 - A06 - A07; // abhängig vom Befehl drüber 390 V[33] = -(V[ 1] = A[ 1] + A[ 9] + A[13] + A[15]) - A[14];
296 391 V[35] = -(V[ 3] = A[ 5] + A[ 7] + A[ 9] + A[13] + A[15]) - A[ 6] - A[14];
297 V[32] = -V[ 0]; 392 V[37] = (tmp = -(A[10] + A[11] + A[13] + A[14] + A[15])) - A[ 5] - A[ 6] - A[ 7];
298 V[31] = -V[ 1]; 393 V[39] = tmp - A[ 2] - A[ 3];
299 V[30] = -V[ 2]; 394 V[41] = (tmp += A[13] - A[12]) - A[ 2] - A[ 3];
300 V[29] = -V[ 3]; 395 V[43] = tmp - A[ 4] - A[ 6] - A[ 7];
301 V[28] = -V[ 4]; 396 V[47] = (tmp = -(A[ 8] + A[12] + A[14] + A[15])) - A[ 0];
302 V[27] = -V[ 5]; 397 V[45] = tmp - A[ 4] - A[ 6] - A[ 7];
303 V[26] = -V[ 6]; 398 // 22 adds, 18 subs
304 V[25] = -V[ 7]; 399
305 V[24] = -V[ 8]; 400 V[32] = -(V[ 0] = MPC_V_PRESHIFT(V[ 0]));
306 V[23] = -V[ 9]; 401 V[31] = -(V[ 1] = MPC_V_PRESHIFT(V[ 1]));
307 V[22] = -V[10]; 402 V[30] = -(V[ 2] = MPC_V_PRESHIFT(V[ 2]));
308 V[21] = -V[11]; 403 V[29] = -(V[ 3] = MPC_V_PRESHIFT(V[ 3]));
309 V[20] = -V[12]; 404 V[28] = -(V[ 4] = MPC_V_PRESHIFT(V[ 4]));
310 V[19] = -V[13]; 405 V[27] = -(V[ 5] = MPC_V_PRESHIFT(V[ 5]));
311 V[18] = -V[14]; 406 V[26] = -(V[ 6] = MPC_V_PRESHIFT(V[ 6]));
312 V[17] = -V[15]; 407 V[25] = -(V[ 7] = MPC_V_PRESHIFT(V[ 7]));
313 408 V[24] = -(V[ 8] = MPC_V_PRESHIFT(V[ 8]));
314 V[63] = V[33]; 409 V[23] = -(V[ 9] = MPC_V_PRESHIFT(V[ 9]));
315 V[62] = V[34]; 410 V[22] = -(V[10] = MPC_V_PRESHIFT(V[10]));
316 V[61] = V[35]; 411 V[21] = -(V[11] = MPC_V_PRESHIFT(V[11]));
317 V[60] = V[36]; 412 V[20] = -(V[12] = MPC_V_PRESHIFT(V[12]));
318 V[59] = V[37]; 413 V[19] = -(V[13] = MPC_V_PRESHIFT(V[13]));
319 V[58] = V[38]; 414 V[18] = -(V[14] = MPC_V_PRESHIFT(V[14]));
320 V[57] = V[39]; 415 V[17] = -(V[15] = MPC_V_PRESHIFT(V[15]));
321 V[56] = V[40]; 416 // 16 adds, 16 shifts (OPTIMIZE_FOR_SPEED only)
322 V[55] = V[41]; 417
323 V[54] = V[42]; 418 V[63] = (V[33] = MPC_V_PRESHIFT(V[33]));
324 V[53] = V[43]; 419 V[62] = (V[34] = MPC_V_PRESHIFT(V[34]));
325 V[52] = V[44]; 420 V[61] = (V[35] = MPC_V_PRESHIFT(V[35]));
326 V[51] = V[45]; 421 V[60] = (V[36] = MPC_V_PRESHIFT(V[36]));
327 V[50] = V[46]; 422 V[59] = (V[37] = MPC_V_PRESHIFT(V[37]));
328 V[49] = V[47]; 423 V[58] = (V[38] = MPC_V_PRESHIFT(V[38]));
424 V[57] = (V[39] = MPC_V_PRESHIFT(V[39]));
425 V[56] = (V[40] = MPC_V_PRESHIFT(V[40]));
426 V[55] = (V[41] = MPC_V_PRESHIFT(V[41]));
427 V[54] = (V[42] = MPC_V_PRESHIFT(V[42]));
428 V[53] = (V[43] = MPC_V_PRESHIFT(V[43]));
429 V[52] = (V[44] = MPC_V_PRESHIFT(V[44]));
430 V[51] = (V[45] = MPC_V_PRESHIFT(V[45]));
431 V[50] = (V[46] = MPC_V_PRESHIFT(V[46]));
432 V[49] = (V[47] = MPC_V_PRESHIFT(V[47]));
433 V[48] = (V[48] = MPC_V_PRESHIFT(V[48]));
434 // 16 adds, 16 shifts (OPTIMIZE_FOR_SPEED only)
435
436 // OPTIMIZE_FOR_SPEED total: 143 adds, 107 subs, 80 muls, 112 shifts
437 // total: 111 adds, 107 subs, 80 muls, 80 shifts
438}
439
440static inline void
441mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V)
442{
443 const MPC_SAMPLE_FORMAT *D = (const MPC_SAMPLE_FORMAT *) &Di_opt;
444 mpc_int32_t k;
445
446 #if defined(OPTIMIZE_FOR_SPEED)
447 #if defined(CPU_ARM)
448 // 32=32x32-multiply assembler for ARM
449 for ( k = 0; k < 32; k++, V++ )
450 {
451 asm volatile (
452 "ldmia %[D]!, { r0-r3 } \n\t"
453 "ldr r4, [%[V]] \n\t"
454 "mul r5, r0, r4 \n\t"
455 "ldr r4, [%[V], #96*4] \n\t"
456 "mla r5, r1, r4, r5 \n\t"
457 "ldr r4, [%[V], #128*4] \n\t"
458 "mla r5, r2, r4, r5 \n\t"
459 "ldr r4, [%[V], #224*4] \n\t"
460 "mla r5, r3, r4, r5 \n\t"
461
462 "ldmia %[D]!, { r0-r3 } \n\t"
463 "ldr r4, [%[V], #256*4] \n\t"
464 "mla r5, r0, r4, r5 \n\t"
465 "ldr r4, [%[V], #352*4] \n\t"
466 "mla r5, r1, r4, r5 \n\t"
467 "ldr r4, [%[V], #384*4] \n\t"
468 "mla r5, r2, r4, r5 \n\t"
469 "ldr r4, [%[V], #480*4] \n\t"
470 "mla r5, r3, r4, r5 \n\t"
471
472 "ldmia %[D]!, { r0-r3 } \n\t"
473 "ldr r4, [%[V], #512*4] \n\t"
474 "mla r5, r0, r4, r5 \n\t"
475 "ldr r4, [%[V], #608*4] \n\t"
476 "mla r5, r1, r4, r5 \n\t"
477 "ldr r4, [%[V], #640*4] \n\t"
478 "mla r5, r2, r4, r5 \n\t"
479 "ldr r4, [%[V], #736*4] \n\t"
480 "mla r5, r3, r4, r5 \n\t"
481
482 "ldmia %[D]!, { r0-r3 } \n\t"
483 "ldr r4, [%[V], #768*4] \n\t"
484 "mla r5, r0, r4, r5 \n\t"
485 "ldr r4, [%[V], #864*4] \n\t"
486 "mla r5, r1, r4, r5 \n\t"
487 "ldr r4, [%[V], #896*4] \n\t"
488 "mla r5, r2, r4, r5 \n\t"
489 "ldr r4, [%[V], #992*4] \n\t"
490 "mla r5, r3, r4, r5 \n\t"
491 "str r5, [%[Data]], #4 \n"
492 : [Data] "+r" (Data), [D] "+r" (D)
493 : [V] "r" (V)
494 : "r0", "r1", "r2", "r3", "r4", "r5");
495 }
496 #else
497 // 32=32x32-multiply (FIXED_POINT)
498 for ( k = 0; k < 32; k++, D += 16, V++ )
499 {
500 *Data = V[ 0]*D[ 0] + V[ 96]*D[ 1] + V[128]*D[ 2] + V[224]*D[ 3]
501 + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7]
502 + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11]
503 + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15];
504 Data += 1;
505 // total: 16 muls, 15 adds
506 }
507 #endif
508 #else
509 #if defined(CPU_COLDFIRE)
510 // 64=32x32-multiply assembler for Coldfire
511 for ( k = 0; k < 32; k++, D += 16, V++ )
512 {
513 asm volatile (
514 "movem.l (%[D]), %%d0-%%d3 \n\t"
515 "move.l (%[V]), %%a5 \n\t"
516 "mac.l %%d0, %%a5, (96*4, %[V]), %%a5, %%acc0 \n\t"
517 "mac.l %%d1, %%a5, (128*4, %[V]), %%a5, %%acc0\n\t"
518 "mac.l %%d2, %%a5, (224*4, %[V]), %%a5, %%acc0\n\t"
519 "mac.l %%d3, %%a5, (256*4, %[V]), %%a5, %%acc0\n\t"
520 "movem.l (4*4, %[D]), %%d0-%%d3 \n\t"
521 "mac.l %%d0, %%a5, (352*4, %[V]), %%a5, %%acc0\n\t"
522 "mac.l %%d1, %%a5, (384*4, %[V]), %%a5, %%acc0\n\t"
523 "mac.l %%d2, %%a5, (480*4, %[V]), %%a5, %%acc0\n\t"
524 "mac.l %%d3, %%a5, (512*4, %[V]), %%a5, %%acc0\n\t"
525 "movem.l (8*4, %[D]), %%d0-%%d3 \n\t"
526 "mac.l %%d0, %%a5, (608*4, %[V]), %%a5, %%acc0\n\t"
527 "mac.l %%d1, %%a5, (640*4, %[V]), %%a5, %%acc0\n\t"
528 "mac.l %%d2, %%a5, (736*4, %[V]), %%a5, %%acc0\n\t"
529 "mac.l %%d3, %%a5, (768*4, %[V]), %%a5, %%acc0\n\t"
530 "movem.l (12*4, %[D]), %%d0-%%d3 \n\t"
531 "mac.l %%d0, %%a5, (864*4, %[V]), %%a5, %%acc0\n\t"
532 "mac.l %%d1, %%a5, (896*4, %[V]), %%a5, %%acc0\n\t"
533 "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t"
534 "mac.l %%d3, %%a5, %%acc0 \n\t"
535 "movclr.l %%acc0, %%d0 \n\t"
536 "move.l %%d0, (%[Data])+ \n"
537 : [Data] "+a" (Data)
538 : [V] "a" (V), [D] "a" (D)
539 : "d0", "d1", "d2", "d3", "a5");
540 }
541 #elif defined(CPU_ARM)
542 // 64=32x32-multiply assembler for ARM
543 for ( k = 0; k < 32; k++, V++ )
544 {
545 asm volatile (
546 "ldmia %[D]!, { r0-r3 } \n\t"
547 "ldr r4, [%[V]] \n\t"
548 "smull r5, r6, r0, r4 \n\t"
549 "ldr r4, [%[V], #96*4] \n\t"
550 "smlal r5, r6, r1, r4 \n\t"
551 "ldr r4, [%[V], #128*4] \n\t"
552 "smlal r5, r6, r2, r4 \n\t"
553 "ldr r4, [%[V], #224*4] \n\t"
554 "smlal r5, r6, r3, r4 \n\t"
555
556 "ldmia %[D]!, { r0-r3 } \n\t"
557 "ldr r4, [%[V], #256*4] \n\t"
558 "smlal r5, r6, r0, r4 \n\t"
559 "ldr r4, [%[V], #352*4] \n\t"
560 "smlal r5, r6, r1, r4 \n\t"
561 "ldr r4, [%[V], #384*4] \n\t"
562 "smlal r5, r6, r2, r4 \n\t"
563 "ldr r4, [%[V], #480*4] \n\t"
564 "smlal r5, r6, r3, r4 \n\t"
565
566 "ldmia %[D]!, { r0-r3 } \n\t"
567 "ldr r4, [%[V], #512*4] \n\t"
568 "smlal r5, r6, r0, r4 \n\t"
569 "ldr r4, [%[V], #608*4] \n\t"
570 "smlal r5, r6, r1, r4 \n\t"
571 "ldr r4, [%[V], #640*4] \n\t"
572 "smlal r5, r6, r2, r4 \n\t"
573 "ldr r4, [%[V], #736*4] \n\t"
574 "smlal r5, r6, r3, r4 \n\t"
575
576 "ldmia %[D]!, { r0-r3 } \n\t"
577 "ldr r4, [%[V], #768*4] \n\t"
578 "smlal r5, r6, r0, r4 \n\t"
579 "ldr r4, [%[V], #864*4] \n\t"
580 "smlal r5, r6, r1, r4 \n\t"
581 "ldr r4, [%[V], #896*4] \n\t"
582 "smlal r5, r6, r2, r4 \n\t"
583 "ldr r4, [%[V], #992*4] \n\t"
584 "smlal r5, r6, r3, r4 \n\t"
585 "mov r4, r6, lsl #1 \n\t"
586 "orr r4, r4, r5, lsr #31\n\t"
587 "str r4, [%[Data]], #4 \n"
588 : [Data] "+r" (Data), [D] "+r" (D)
589 : [V] "r" (V)
590 : "r0", "r1", "r2", "r3", "r4", "r5", "r6");
591 }
592 #else
593 // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C
594 for ( k = 0; k < 32; k++, D += 16, V++ )
595 {
596 *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31) + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31)
597 + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31) + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31)
598 + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31) + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31)
599 + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31) + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31);
600 Data += 1;
601 // total: 16 muls, 15 adds, 16 shifts
602 }
603 #endif
604 #endif
329} 605}
330 606
331static void Synthese_Filter_float_internal(MPC_SAMPLE_FORMAT * OutData,MPC_SAMPLE_FORMAT * V,const MPC_SAMPLE_FORMAT * Y) 607static void
608mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y)
332{ 609{
333 mpc_uint32_t n; 610 mpc_uint32_t n;
334 for ( n = 0; n < 36; n++, Y += 32 ) { 611
335 V -= 64; 612 if (NULL != OutData)
336 Calculate_New_V ( Y, V ); 613 {
337 if (OutData != NULL) 614 for ( n = 0; n < 36; n++, Y += 32, OutData += 32 )
338 { 615 {
339 MPC_SAMPLE_FORMAT * Data = OutData; 616 V -= 64;
340 const MPC_SAMPLE_FORMAT * D = (const MPC_SAMPLE_FORMAT *) &Di_opt; 617 mpc_calculate_new_V ( Y, V );
341 mpc_int32_t k; 618 mpc_decoder_windowing_D( OutData, V);
342 //mpc_int32_t tmp;
343
344
345
346 #if defined(CPU_COLDFIRE)
347 for ( k = 0; k < 32; k++, D += 16, V++ ) {
348 asm volatile (
349 "movem.l (%[D]), %%d0-%%d3 \n\t"
350 "move.l (%[V]), %%a5 \n\t"
351 "mac.l %%d0, %%a5, (96*4, %[V]), %%a5, %%acc0 \n\t"
352 "mac.l %%d1, %%a5, (128*4, %[V]), %%a5, %%acc0\n\t"
353 "mac.l %%d2, %%a5, (224*4, %[V]), %%a5, %%acc0\n\t"
354 "mac.l %%d3, %%a5, (256*4, %[V]), %%a5, %%acc0\n\t"
355 "movem.l (4*4, %[D]), %%d0-%%d3 \n\t"
356 "mac.l %%d0, %%a5, (352*4, %[V]), %%a5, %%acc0\n\t"
357 "mac.l %%d1, %%a5, (384*4, %[V]), %%a5, %%acc0\n\t"
358 "mac.l %%d2, %%a5, (480*4, %[V]), %%a5, %%acc0\n\t"
359 "mac.l %%d3, %%a5, (512*4, %[V]), %%a5, %%acc0\n\t"
360 "movem.l (8*4, %[D]), %%d0-%%d3 \n\t"
361 "mac.l %%d0, %%a5, (608*4, %[V]), %%a5, %%acc0\n\t"
362 "mac.l %%d1, %%a5, (640*4, %[V]), %%a5, %%acc0\n\t"
363 "mac.l %%d2, %%a5, (736*4, %[V]), %%a5, %%acc0\n\t"
364 "mac.l %%d3, %%a5, (768*4, %[V]), %%a5, %%acc0\n\t"
365 "movem.l (12*4, %[D]), %%d0-%%d3 \n\t"
366 "mac.l %%d0, %%a5, (864*4, %[V]), %%a5, %%acc0\n\t"
367 "mac.l %%d1, %%a5, (896*4, %[V]), %%a5, %%acc0\n\t"
368 "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t"
369 "mac.l %%d3, %%a5, %%acc0 \n\t"
370 "movclr.l %%acc0, %%d0 \n\t"
371 "move.l %%d0, (%[Data])+ \n"
372 : [Data] "+a" (Data)
373 : [V] "a" (V), [D] "a" (D)
374 : "d0", "d1", "d2", "d3", "a5");
375 #elif defined(CPU_ARM)
376 for ( k = 0; k < 32; k++, V++ ) {
377 asm volatile (
378 "ldmia %[D]!, { r0-r3 } \n\t"
379 "ldr r4, [%[V]] \n\t"
380 "smull r5, r6, r0, r4 \n\t"
381 "ldr r4, [%[V], #96*4] \n\t"
382 "smlal r5, r6, r1, r4 \n\t"
383 "ldr r4, [%[V], #128*4] \n\t"
384 "smlal r5, r6, r2, r4 \n\t"
385 "ldr r4, [%[V], #224*4] \n\t"
386 "smlal r5, r6, r3, r4 \n\t"
387
388 "ldmia %[D]!, { r0-r3 } \n\t"
389 "ldr r4, [%[V], #256*4] \n\t"
390 "smlal r5, r6, r0, r4 \n\t"
391 "ldr r4, [%[V], #352*4] \n\t"
392 "smlal r5, r6, r1, r4 \n\t"
393 "ldr r4, [%[V], #384*4] \n\t"
394 "smlal r5, r6, r2, r4 \n\t"
395 "ldr r4, [%[V], #480*4] \n\t"
396 "smlal r5, r6, r3, r4 \n\t"
397
398 "ldmia %[D]!, { r0-r3 } \n\t"
399 "ldr r4, [%[V], #512*4] \n\t"
400 "smlal r5, r6, r0, r4 \n\t"
401 "ldr r4, [%[V], #608*4] \n\t"
402 "smlal r5, r6, r1, r4 \n\t"
403 "ldr r4, [%[V], #640*4] \n\t"
404 "smlal r5, r6, r2, r4 \n\t"
405 "ldr r4, [%[V], #736*4] \n\t"
406 "smlal r5, r6, r3, r4 \n\t"
407
408 "ldmia %[D]!, { r0-r3 } \n\t"
409 "ldr r4, [%[V], #768*4] \n\t"
410 "smlal r5, r6, r0, r4 \n\t"
411 "ldr r4, [%[V], #864*4] \n\t"
412 "smlal r5, r6, r1, r4 \n\t"
413 "ldr r4, [%[V], #896*4] \n\t"
414 "smlal r5, r6, r2, r4 \n\t"
415 "ldr r4, [%[V], #992*4] \n\t"
416 "smlal r5, r6, r3, r4 \n\t"
417 "mov r4, r6, lsl #1 \n\t"
418 "orr r4, r4, r5, lsr #31\n\t"
419 "str r4, [%[Data]], #4 \n"
420 : [Data] "+r" (Data), [D] "+r" (D)
421 : [V] "r" (V)
422 : "r0", "r1", "r2", "r3", "r4", "r5", "r6");
423 #else
424 for ( k = 0; k < 32; k++, D += 16, V++ ) {
425 *Data = MPC_SHL(
426 MPC_MULTIPLY_FRACT(V[ 0],D[ 0]) + MPC_MULTIPLY_FRACT(V[ 96],D[ 1]) + MPC_MULTIPLY_FRACT(V[128],D[ 2]) + MPC_MULTIPLY_FRACT(V[224],D[ 3])
427 + MPC_MULTIPLY_FRACT(V[256],D[ 4]) + MPC_MULTIPLY_FRACT(V[352],D[ 5]) + MPC_MULTIPLY_FRACT(V[384],D[ 6]) + MPC_MULTIPLY_FRACT(V[480],D[ 7])
428 + MPC_MULTIPLY_FRACT(V[512],D[ 8]) + MPC_MULTIPLY_FRACT(V[608],D[ 9]) + MPC_MULTIPLY_FRACT(V[640],D[10]) + MPC_MULTIPLY_FRACT(V[736],D[11])
429 + MPC_MULTIPLY_FRACT(V[768],D[12]) + MPC_MULTIPLY_FRACT(V[864],D[13]) + MPC_MULTIPLY_FRACT(V[896],D[14]) + MPC_MULTIPLY_FRACT(V[992],D[15])
430 , 1);
431
432 Data += 1;
433 #endif
434 }
435 V -= 32;//bleh
436 OutData+=32;
437 } 619 }
438 } 620 }
439} 621}
440 622
441void 623void
442mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT* OutData) 624mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT *OutData)
443{ 625{
444 /********* left channel ********/ 626 /********* left channel ********/
445 memmove(d->V_L + MPC_V_MEM, d->V_L, 960 * sizeof(MPC_SAMPLE_FORMAT) ); 627 memmove(d->V_L + MPC_V_MEM, d->V_L, 960 * sizeof(MPC_SAMPLE_FORMAT) );
446 628
447 Synthese_Filter_float_internal( 629 mpc_full_synthesis_filter(
448 OutData, 630 OutData,
449 (MPC_SAMPLE_FORMAT *)(d->V_L + MPC_V_MEM), 631 (MPC_SAMPLE_FORMAT *)(d->V_L + MPC_V_MEM),
450 (MPC_SAMPLE_FORMAT *)(d->Y_L [0])); 632 (MPC_SAMPLE_FORMAT *)(d->Y_L [0]));
@@ -452,7 +634,7 @@ mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT* OutData)
452 /******** right channel ********/ 634 /******** right channel ********/
453 memmove(d->V_R + MPC_V_MEM, d->V_R, 960 * sizeof(MPC_SAMPLE_FORMAT) ); 635 memmove(d->V_R + MPC_V_MEM, d->V_R, 960 * sizeof(MPC_SAMPLE_FORMAT) );
454 636
455 Synthese_Filter_float_internal( 637 mpc_full_synthesis_filter(
456 (OutData == NULL ? NULL : OutData + MPC_FRAME_LENGTH), 638 (OutData == NULL ? NULL : OutData + MPC_FRAME_LENGTH),
457 (MPC_SAMPLE_FORMAT *)(d->V_R + MPC_V_MEM), 639 (MPC_SAMPLE_FORMAT *)(d->V_R + MPC_V_MEM),
458 (MPC_SAMPLE_FORMAT *)(d->Y_R [0])); 640 (MPC_SAMPLE_FORMAT *)(d->Y_R [0]));
diff --git a/apps/codecs/mpc.c b/apps/codecs/mpc.c
index 122cb22025..8143e04f71 100644
--- a/apps/codecs/mpc.c
+++ b/apps/codecs/mpc.c
@@ -64,8 +64,8 @@ mpc_bool_t canseek_impl(void *data)
64} 64}
65 65
66MPC_SAMPLE_FORMAT sample_buffer[MPC_DECODER_BUFFER_LENGTH] 66MPC_SAMPLE_FORMAT sample_buffer[MPC_DECODER_BUFFER_LENGTH]
67IBSS_ATTR_MPC_SAMPLE_BUF; 67 IBSS_ATTR_MPC_SAMPLE_BUF;
68mpc_uint32_t seek_table[10000]; 68mpc_uint32_t seek_table[10000];
69 69
70/* this is the codec entry point */ 70/* this is the codec entry point */
71enum codec_status codec_main(void) 71enum codec_status codec_main(void)