summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apps/codecs/libmusepack/SOURCES4
-rw-r--r--apps/codecs/libmusepack/math.h2
-rwxr-xr-xapps/codecs/libmusepack/mpc_config.h48
-rw-r--r--apps/codecs/libmusepack/synth_filter.c217
-rwxr-xr-xapps/codecs/libmusepack/synth_filter_arm.S155
5 files changed, 265 insertions, 161 deletions
diff --git a/apps/codecs/libmusepack/SOURCES b/apps/codecs/libmusepack/SOURCES
index 9c588e3c39..0de114336f 100644
--- a/apps/codecs/libmusepack/SOURCES
+++ b/apps/codecs/libmusepack/SOURCES
@@ -5,4 +5,6 @@ mpc_decoder.c
5requant.c 5requant.c
6streaminfo.c 6streaminfo.c
7synth_filter.c 7synth_filter.c
8 8#if defined(CPU_ARM)
9synth_filter_arm.S
10#endif
diff --git a/apps/codecs/libmusepack/math.h b/apps/codecs/libmusepack/math.h
index a015d45cbb..e4c2ffce20 100644
--- a/apps/codecs/libmusepack/math.h
+++ b/apps/codecs/libmusepack/math.h
@@ -38,7 +38,7 @@
38#ifndef _mpcdec_math_h_ 38#ifndef _mpcdec_math_h_
39#define _mpcdec_math_h_ 39#define _mpcdec_math_h_
40 40
41#define MPC_FIXED_POINT 41#include "mpc_config.h"
42 42
43#define MPC_FIXED_POINT_SHIFT 16 43#define MPC_FIXED_POINT_SHIFT 16
44 44
diff --git a/apps/codecs/libmusepack/mpc_config.h b/apps/codecs/libmusepack/mpc_config.h
new file mode 100755
index 0000000000..6993775703
--- /dev/null
+++ b/apps/codecs/libmusepack/mpc_config.h
@@ -0,0 +1,48 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2008 by Andree Buschmann
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20#ifndef _mpc_config_h_
21#define _mpc_config_h_
22
23#include "config.h"
24
25/* choose fixed point or floating point */
26#define MPC_FIXED_POINT
27
28#ifndef MPC_FIXED_POINT
29#error FIXME, mpc will not with floating point now
30#endif
31
32/* choose speed vs. accuracy for MPC_FIXED_POINT
33 * speed-setting will increase decoding speed on ARM only (+20%), loss of accuracy
34 * equals about 5 dB SNR (15bit output precision) to not use the speed-optimization
35 * -> comment OPTIMIZE_FOR_SPEED here for desired target */
36#if defined(MPC_FIXED_POINT)
37 #if defined(CPU_COLDFIRE)
38 // do nothing
39 #elif defined(CPU_ARM)
40 #define OPTIMIZE_FOR_SPEED
41 #else
42 #define OPTIMIZE_FOR_SPEED
43 #endif
44#else
45 // do nothing
46#endif
47
48#endif
diff --git a/apps/codecs/libmusepack/synth_filter.c b/apps/codecs/libmusepack/synth_filter.c
index faf014c90d..d48b563a0b 100644
--- a/apps/codecs/libmusepack/synth_filter.c
+++ b/apps/codecs/libmusepack/synth_filter.c
@@ -39,22 +39,6 @@
39#include "musepack.h" 39#include "musepack.h"
40#include "internal.h" 40#include "internal.h"
41 41
42/* S E T T I N G S */
43// choose speed vs. accuracy for MPC_FIXED_POINT
44// speed-setting will increase decoding speed on ARM only (+20%), loss of accuracy equals about 5 dB SNR (15bit output precision)
45// to not use the speed-optimization -> comment OPTIMIZE_FOR_SPEED
46#if defined(MPC_FIXED_POINT)
47 #if defined(CPU_COLDFIRE)
48 // do nothing
49 #elif defined(CPU_ARM)
50 #define OPTIMIZE_FOR_SPEED
51 #else
52 #define OPTIMIZE_FOR_SPEED
53 #endif
54#else
55 // do nothing
56#endif
57
58/* C O N S T A N T S */ 42/* C O N S T A N T S */
59#undef _ 43#undef _
60 44
@@ -82,40 +66,40 @@
82#endif 66#endif
83 67
84// Di_opt coefficients are +/- 2^17 68// Di_opt coefficients are +/- 2^17
85static const MPC_SAMPLE_FORMAT Di_opt [32] [16] ICONST_ATTR = { 69static const MPC_SAMPLE_FORMAT Di_opt [512] ICONST_ATTR = {
86 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ 70/* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
87 { _( 0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _( 6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29) }, 71/* 0 */ _( 0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _( 6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29),
88 { _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _( 5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26) }, 72/* 1 */ _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _( 5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26),
89 { _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _( 5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24) }, 73/* 2 */ _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _( 5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24),
90 { _( -1), _( -38), _( 225), _( -645), _( 1893), _(-6237), _( 4561), _(-43006), _(74630), _(31947), _(8092), _( 4063), _(2087), _( 294), _(196), _(21) }, 74/* 3 */ _( -1), _( -38), _( 225), _( -645), _( 1893), _(-6237), _( 4561), _(-43006), _(74630), _(31947), _(8092), _( 4063), _(2087), _( 294), _(196), _(21),
91 { _( -1), _( -41), _( 227), _( -711), _( 1822), _(-6589), _( 3776), _(-44821), _(74313), _(30112), _(8492), _( 3705), _(2085), _( 244), _(190), _(19) }, 75/* 4 */ _( -1), _( -41), _( 227), _( -711), _( 1822), _(-6589), _( 3776), _(-44821), _(74313), _(30112), _(8492), _( 3705), _(2085), _( 244), _(190), _(19),
92 { _( -1), _( -45), _( 228), _( -779), _( 1739), _(-6935), _( 2935), _(-46617), _(73908), _(28289), _(8840), _( 3351), _(2075), _( 197), _(183), _(17) }, 76/* 5 */ _( -1), _( -45), _( 228), _( -779), _( 1739), _(-6935), _( 2935), _(-46617), _(73908), _(28289), _(8840), _( 3351), _(2075), _( 197), _(183), _(17),
93 { _( -1), _( -49), _( 228), _( -848), _( 1644), _(-7271), _( 2037), _(-48390), _(73415), _(26482), _(9139), _( 3004), _(2057), _( 153), _(176), _(16) }, 77/* 6 */ _( -1), _( -49), _( 228), _( -848), _( 1644), _(-7271), _( 2037), _(-48390), _(73415), _(26482), _(9139), _( 3004), _(2057), _( 153), _(176), _(16),
94 { _( -2), _( -53), _( 227), _( -919), _( 1535), _(-7597), _( 1082), _(-50137), _(72835), _(24694), _(9389), _( 2663), _(2032), _( 111), _(169), _(14) }, 78/* 7 */ _( -2), _( -53), _( 227), _( -919), _( 1535), _(-7597), _( 1082), _(-50137), _(72835), _(24694), _(9389), _( 2663), _(2032), _( 111), _(169), _(14),
95 { _( -2), _( -58), _( 224), _( -991), _( 1414), _(-7910), _( 70), _(-51853), _(72169), _(22929), _(9592), _( 2330), _(2001), _( 72), _(161), _(13) }, 79/* 8 */ _( -2), _( -58), _( 224), _( -991), _( 1414), _(-7910), _( 70), _(-51853), _(72169), _(22929), _(9592), _( 2330), _(2001), _( 72), _(161), _(13),
96 { _( -2), _( -63), _( 221), _(-1064), _( 1280), _(-8209), _( -998), _(-53534), _(71420), _(21189), _(9750), _( 2006), _(1962), _( 36), _(154), _(11) }, 80/* 9 */ _( -2), _( -63), _( 221), _(-1064), _( 1280), _(-8209), _( -998), _(-53534), _(71420), _(21189), _(9750), _( 2006), _(1962), _( 36), _(154), _(11),
97 { _( -2), _( -68), _( 215), _(-1137), _( 1131), _(-8491), _( -2122), _(-55178), _(70590), _(19478), _(9863), _( 1692), _(1919), _( 2), _(147), _(10) }, 81/* 10 */ _( -2), _( -68), _( 215), _(-1137), _( 1131), _(-8491), _( -2122), _(-55178), _(70590), _(19478), _(9863), _( 1692), _(1919), _( 2), _(147), _(10),
98 { _( -3), _( -73), _( 208), _(-1210), _( 970), _(-8755), _( -3300), _(-56778), _(69679), _(17799), _(9935), _( 1388), _(1870), _( -29), _(139), _( 9) }, 82/* 11 */ _( -3), _( -73), _( 208), _(-1210), _( 970), _(-8755), _( -3300), _(-56778), _(69679), _(17799), _(9935), _( 1388), _(1870), _( -29), _(139), _( 9),
99 { _( -3), _( -79), _( 200), _(-1283), _( 794), _(-8998), _( -4533), _(-58333), _(68692), _(16155), _(9966), _( 1095), _(1817), _( -57), _(132), _( 8) }, 83/* 12 */ _( -3), _( -79), _( 200), _(-1283), _( 794), _(-8998), _( -4533), _(-58333), _(68692), _(16155), _(9966), _( 1095), _(1817), _( -57), _(132), _( 8),
100 { _( -4), _( -85), _( 189), _(-1356), _( 605), _(-9219), _( -5818), _(-59838), _(67629), _(14548), _(9959), _( 814), _(1759), _( -83), _(125), _( 7) }, 84/* 13 */ _( -4), _( -85), _( 189), _(-1356), _( 605), _(-9219), _( -5818), _(-59838), _(67629), _(14548), _(9959), _( 814), _(1759), _( -83), _(125), _( 7),
101 { _( -4), _( -91), _( 177), _(-1428), _( 402), _(-9416), _( -7154), _(-61289), _(66494), _(12980), _(9916), _( 545), _(1698), _(-106), _(117), _( 7) }, 85/* 14 */ _( -4), _( -91), _( 177), _(-1428), _( 402), _(-9416), _( -7154), _(-61289), _(66494), _(12980), _(9916), _( 545), _(1698), _(-106), _(117), _( 7),
102 { _( -5), _( -97), _( 163), _(-1498), _( 185), _(-9585), _( -8540), _(-62684), _(65290), _(11455), _(9838), _( 288), _(1634), _(-127), _(111), _( 6) }, 86/* 15 */ _( -5), _( -97), _( 163), _(-1498), _( 185), _(-9585), _( -8540), _(-62684), _(65290), _(11455), _(9838), _( 288), _(1634), _(-127), _(111), _( 6),
103 { _( -5), _(-104), _( 146), _(-1567), _( -45), _(-9727), _( -9975), _(-64019), _(64019), _( 9975), _(9727), _( 45), _(1567), _(-146), _(104), _( 5) }, 87/* 16 */ _( -5), _(-104), _( 146), _(-1567), _( -45), _(-9727), _( -9975), _(-64019), _(64019), _( 9975), _(9727), _( 45), _(1567), _(-146), _(104), _( 5),
104 { _( -6), _(-111), _( 127), _(-1634), _( -288), _(-9838), _(-11455), _(-65290), _(62684), _( 8540), _(9585), _( -185), _(1498), _(-163), _( 97), _( 5) }, 88/* 17 */ _( -6), _(-111), _( 127), _(-1634), _( -288), _(-9838), _(-11455), _(-65290), _(62684), _( 8540), _(9585), _( -185), _(1498), _(-163), _( 97), _( 5),
105 { _( -7), _(-117), _( 106), _(-1698), _( -545), _(-9916), _(-12980), _(-66494), _(61289), _( 7154), _(9416), _( -402), _(1428), _(-177), _( 91), _( 4) }, 89/* 18 */ _( -7), _(-117), _( 106), _(-1698), _( -545), _(-9916), _(-12980), _(-66494), _(61289), _( 7154), _(9416), _( -402), _(1428), _(-177), _( 91), _( 4),
106 { _( -7), _(-125), _( 83), _(-1759), _( -814), _(-9959), _(-14548), _(-67629), _(59838), _( 5818), _(9219), _( -605), _(1356), _(-189), _( 85), _( 4) }, 90/* 19 */ _( -7), _(-125), _( 83), _(-1759), _( -814), _(-9959), _(-14548), _(-67629), _(59838), _( 5818), _(9219), _( -605), _(1356), _(-189), _( 85), _( 4),
107 { _( -8), _(-132), _( 57), _(-1817), _(-1095), _(-9966), _(-16155), _(-68692), _(58333), _( 4533), _(8998), _( -794), _(1283), _(-200), _( 79), _( 3) }, 91/* 20 */ _( -8), _(-132), _( 57), _(-1817), _(-1095), _(-9966), _(-16155), _(-68692), _(58333), _( 4533), _(8998), _( -794), _(1283), _(-200), _( 79), _( 3),
108 { _( -9), _(-139), _( 29), _(-1870), _(-1388), _(-9935), _(-17799), _(-69679), _(56778), _( 3300), _(8755), _( -970), _(1210), _(-208), _( 73), _( 3) }, 92/* 21 */ _( -9), _(-139), _( 29), _(-1870), _(-1388), _(-9935), _(-17799), _(-69679), _(56778), _( 3300), _(8755), _( -970), _(1210), _(-208), _( 73), _( 3),
109 { _(-10), _(-147), _( -2), _(-1919), _(-1692), _(-9863), _(-19478), _(-70590), _(55178), _( 2122), _(8491), _(-1131), _(1137), _(-215), _( 68), _( 2) }, 93/* 22 */ _(-10), _(-147), _( -2), _(-1919), _(-1692), _(-9863), _(-19478), _(-70590), _(55178), _( 2122), _(8491), _(-1131), _(1137), _(-215), _( 68), _( 2),
110 { _(-11), _(-154), _( -36), _(-1962), _(-2006), _(-9750), _(-21189), _(-71420), _(53534), _( 998), _(8209), _(-1280), _(1064), _(-221), _( 63), _( 2) }, 94/* 23 */ _(-11), _(-154), _( -36), _(-1962), _(-2006), _(-9750), _(-21189), _(-71420), _(53534), _( 998), _(8209), _(-1280), _(1064), _(-221), _( 63), _( 2),
111 { _(-13), _(-161), _( -72), _(-2001), _(-2330), _(-9592), _(-22929), _(-72169), _(51853), _( -70), _(7910), _(-1414), _( 991), _(-224), _( 58), _( 2) }, 95/* 24 */ _(-13), _(-161), _( -72), _(-2001), _(-2330), _(-9592), _(-22929), _(-72169), _(51853), _( -70), _(7910), _(-1414), _( 991), _(-224), _( 58), _( 2),
112 { _(-14), _(-169), _(-111), _(-2032), _(-2663), _(-9389), _(-24694), _(-72835), _(50137), _(-1082), _(7597), _(-1535), _( 919), _(-227), _( 53), _( 2) }, 96/* 25 */ _(-14), _(-169), _(-111), _(-2032), _(-2663), _(-9389), _(-24694), _(-72835), _(50137), _(-1082), _(7597), _(-1535), _( 919), _(-227), _( 53), _( 2),
113 { _(-16), _(-176), _(-153), _(-2057), _(-3004), _(-9139), _(-26482), _(-73415), _(48390), _(-2037), _(7271), _(-1644), _( 848), _(-228), _( 49), _( 1) }, 97/* 26 */ _(-16), _(-176), _(-153), _(-2057), _(-3004), _(-9139), _(-26482), _(-73415), _(48390), _(-2037), _(7271), _(-1644), _( 848), _(-228), _( 49), _( 1),
114 { _(-17), _(-183), _(-197), _(-2075), _(-3351), _(-8840), _(-28289), _(-73908), _(46617), _(-2935), _(6935), _(-1739), _( 779), _(-228), _( 45), _( 1) }, 98/* 27 */ _(-17), _(-183), _(-197), _(-2075), _(-3351), _(-8840), _(-28289), _(-73908), _(46617), _(-2935), _(6935), _(-1739), _( 779), _(-228), _( 45), _( 1),
115 { _(-19), _(-190), _(-244), _(-2085), _(-3705), _(-8492), _(-30112), _(-74313), _(44821), _(-3776), _(6589), _(-1822), _( 711), _(-227), _( 41), _( 1) }, 99/* 28 */ _(-19), _(-190), _(-244), _(-2085), _(-3705), _(-8492), _(-30112), _(-74313), _(44821), _(-3776), _(6589), _(-1822), _( 711), _(-227), _( 41), _( 1),
116 { _(-21), _(-196), _(-294), _(-2087), _(-4063), _(-8092), _(-31947), _(-74630), _(43006), _(-4561), _(6237), _(-1893), _( 645), _(-225), _( 38), _( 1) }, 100/* 29 */ _(-21), _(-196), _(-294), _(-2087), _(-4063), _(-8092), _(-31947), _(-74630), _(43006), _(-4561), _(6237), _(-1893), _( 645), _(-225), _( 38), _( 1),
117 { _(-24), _(-202), _(-347), _(-2080), _(-4425), _(-7640), _(-33791), _(-74856), _(41176), _(-5288), _(5879), _(-1952), _( 581), _(-222), _( 35), _( 1) }, 101/* 30 */ _(-24), _(-202), _(-347), _(-2080), _(-4425), _(-7640), _(-33791), _(-74856), _(41176), _(-5288), _(5879), _(-1952), _( 581), _(-222), _( 35), _( 1),
118 { _(-26), _(-208), _(-401), _(-2063), _(-4788), _(-7134), _(-35640), _(-74992), _(39336), _(-5959), _(5517), _(-2000), _( 519), _(-218), _( 31), _( 1) } 102/* 31 */ _(-26), _(-208), _(-401), _(-2063), _(-4788), _(-7134), _(-35640), _(-74992), _(39336), _(-5959), _(5517), _(-2000), _( 519), _(-218), _( 31), _( 1)
119}; 103};
120 104
121#undef _ 105#undef _
@@ -457,69 +441,30 @@ mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V )
457 // total: 111 adds, 107 subs, 80 muls, 80 shifts 441 // total: 111 adds, 107 subs, 80 muls, 80 shifts
458} 442}
459 443
444#if defined(CPU_ARM)
445extern void
446mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
447 const MPC_SAMPLE_FORMAT * V,
448 const MPC_SAMPLE_FORMAT * D);
449#else
460static void 450static void
461mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V) 451mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
452 const MPC_SAMPLE_FORMAT * V,
453 const MPC_SAMPLE_FORMAT * D)
462{ 454{
463 const MPC_SAMPLE_FORMAT *D = (const MPC_SAMPLE_FORMAT *) &Di_opt;
464 mpc_int32_t k; 455 mpc_int32_t k;
465 456
466 #if defined(OPTIMIZE_FOR_SPEED) 457 #if defined(OPTIMIZE_FOR_SPEED)
467 #if defined(CPU_ARM)
468 // 32=32x32-multiply assembler for ARM
469 for ( k = 0; k < 32; k++, V++ )
470 {
471 asm volatile (
472 "ldmia %[D]!, { r0-r7 } \n\t"
473 "ldr r8, [%[V]] \n\t"
474 "mul r9, r0, r8 \n\t"
475 "ldr r8, [%[V], #96*4] \n\t"
476 "mla r9, r1, r8, r9 \n\t"
477 "ldr r8, [%[V], #128*4] \n\t"
478 "mla r9, r2, r8, r9 \n\t"
479 "ldr r8, [%[V], #224*4] \n\t"
480 "mla r9, r3, r8, r9 \n\t"
481 "ldr r8, [%[V], #256*4] \n\t"
482 "mla r9, r4, r8, r9 \n\t"
483 "ldr r8, [%[V], #352*4] \n\t"
484 "mla r9, r5, r8, r9 \n\t"
485 "ldr r8, [%[V], #384*4] \n\t"
486 "mla r9, r6, r8, r9 \n\t"
487 "ldr r8, [%[V], #480*4] \n\t"
488 "mla r9, r7, r8, r9 \n\t"
489 "ldmia %[D]!, { r0-r7 } \n\t"
490 "ldr r8, [%[V], #512*4] \n\t"
491 "mla r9, r0, r8, r9 \n\t"
492 "ldr r8, [%[V], #608*4] \n\t"
493 "mla r9, r1, r8, r9 \n\t"
494 "ldr r8, [%[V], #640*4] \n\t"
495 "mla r9, r2, r8, r9 \n\t"
496 "ldr r8, [%[V], #736*4] \n\t"
497 "mla r9, r3, r8, r9 \n\t"
498 "ldr r8, [%[V], #768*4] \n\t"
499 "mla r9, r4, r8, r9 \n\t"
500 "ldr r8, [%[V], #864*4] \n\t"
501 "mla r9, r5, r8, r9 \n\t"
502 "ldr r8, [%[V], #896*4] \n\t"
503 "mla r9, r6, r8, r9 \n\t"
504 "ldr r8, [%[V], #992*4] \n\t"
505 "mla r9, r7, r8, r9 \n\t"
506 "str r9, [%[Data]], #4 \n"
507 : [Data] "+r" (Data), [D] "+r" (D)
508 : [V] "r" (V)
509 : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9");
510 }
511 #else
512 // 32=32x32-multiply (FIXED_POINT) 458 // 32=32x32-multiply (FIXED_POINT)
513 for ( k = 0; k < 32; k++, D += 16, V++ ) 459 for ( k = 0; k < 32; k++, D += 16, V++ )
514 { 460 {
515 *Data = V[ 0]*D[ 0] + V[ 96]*D[ 1] + V[128]*D[ 2] + V[224]*D[ 3] 461 *Data = V[ 0]*D[ 0] + V[ 96]*D[ 1] + V[128]*D[ 2] + V[224]*D[ 3]
516 + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7] 462 + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7]
517 + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11] 463 + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11]
518 + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15]; 464 + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15];
519 Data += 1; 465 Data += 1;
520 // total: 16 muls, 15 adds 466 // total: 32 * (16 muls, 15 adds)
521 } 467 }
522 #endif
523 #else 468 #else
524 #if defined(CPU_COLDFIRE) 469 #if defined(CPU_COLDFIRE)
525 // 64=32x32-multiply assembler for Coldfire 470 // 64=32x32-multiply assembler for Coldfire
@@ -553,71 +498,25 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V)
553 : [V] "a" (V), [D] "a" (D) 498 : [V] "a" (V), [D] "a" (D)
554 : "d0", "d1", "d2", "d3", "a5"); 499 : "d0", "d1", "d2", "d3", "a5");
555 } 500 }
556 #elif defined(CPU_ARM)
557 // 64=32x32-multiply assembler for ARM
558 for ( k = 0; k < 32; k++, V++ )
559 {
560 asm volatile (
561 "ldmia %[D]!, { r0-r3 } \n\t"
562 "ldr r4, [%[V]] \n\t"
563 "smull r5, r6, r0, r4 \n\t"
564 "ldr r4, [%[V], #96*4] \n\t"
565 "smlal r5, r6, r1, r4 \n\t"
566 "ldr r4, [%[V], #128*4] \n\t"
567 "smlal r5, r6, r2, r4 \n\t"
568 "ldr r4, [%[V], #224*4] \n\t"
569 "smlal r5, r6, r3, r4 \n\t"
570
571 "ldmia %[D]!, { r0-r3 } \n\t"
572 "ldr r4, [%[V], #256*4] \n\t"
573 "smlal r5, r6, r0, r4 \n\t"
574 "ldr r4, [%[V], #352*4] \n\t"
575 "smlal r5, r6, r1, r4 \n\t"
576 "ldr r4, [%[V], #384*4] \n\t"
577 "smlal r5, r6, r2, r4 \n\t"
578 "ldr r4, [%[V], #480*4] \n\t"
579 "smlal r5, r6, r3, r4 \n\t"
580
581 "ldmia %[D]!, { r0-r3 } \n\t"
582 "ldr r4, [%[V], #512*4] \n\t"
583 "smlal r5, r6, r0, r4 \n\t"
584 "ldr r4, [%[V], #608*4] \n\t"
585 "smlal r5, r6, r1, r4 \n\t"
586 "ldr r4, [%[V], #640*4] \n\t"
587 "smlal r5, r6, r2, r4 \n\t"
588 "ldr r4, [%[V], #736*4] \n\t"
589 "smlal r5, r6, r3, r4 \n\t"
590
591 "ldmia %[D]!, { r0-r3 } \n\t"
592 "ldr r4, [%[V], #768*4] \n\t"
593 "smlal r5, r6, r0, r4 \n\t"
594 "ldr r4, [%[V], #864*4] \n\t"
595 "smlal r5, r6, r1, r4 \n\t"
596 "ldr r4, [%[V], #896*4] \n\t"
597 "smlal r5, r6, r2, r4 \n\t"
598 "ldr r4, [%[V], #992*4] \n\t"
599 "smlal r5, r6, r3, r4 \n\t"
600 "mov r4, r6, lsl #1 \n\t"
601 "orr r4, r4, r5, lsr #31\n\t"
602 "str r4, [%[Data]], #4 \n"
603 : [Data] "+r" (Data), [D] "+r" (D)
604 : [V] "r" (V)
605 : "r0", "r1", "r2", "r3", "r4", "r5", "r6");
606 }
607 #else 501 #else
608 // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C 502 // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C
609 for ( k = 0; k < 32; k++, D += 16, V++ ) 503 for ( k = 0; k < 32; k++, D += 16, V++ )
610 { 504 {
611 *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31) + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31) 505 *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31)
612 + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31) + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31) 506 + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31)
613 + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31) + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31) 507 + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31)
614 + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31) + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31); 508 + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31)
509 + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31)
510 + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31)
511 + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31)
512 + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31);
615 Data += 1; 513 Data += 1;
616 // total: 16 muls, 15 adds, 16 shifts 514 // total: 16 muls, 15 adds, 16 shifts
617 } 515 }
618 #endif 516 #endif
619 #endif 517 #endif
620} 518}
519#endif /* CPU_ARM */
621 520
622static void 521static void
623mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y) 522mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y)
@@ -630,7 +529,7 @@ mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, cons
630 { 529 {
631 V -= 64; 530 V -= 64;
632 mpc_calculate_new_V ( Y, V ); 531 mpc_calculate_new_V ( Y, V );
633 mpc_decoder_windowing_D( OutData, V); 532 mpc_decoder_windowing_D( OutData, V, Di_opt );
634 } 533 }
635 } 534 }
636} 535}
@@ -661,7 +560,7 @@ mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT *OutData)
661/* */ 560/* */
662/*******************************************/ 561/*******************************************/
663 562
664static const unsigned char Parity [256] = { // parity 563static const unsigned char Parity [256] ICONST_ATTR = { // parity
665 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1, 564 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
666 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, 565 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
667 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, 566 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
diff --git a/apps/codecs/libmusepack/synth_filter_arm.S b/apps/codecs/libmusepack/synth_filter_arm.S
new file mode 100755
index 0000000000..ce668e888c
--- /dev/null
+++ b/apps/codecs/libmusepack/synth_filter_arm.S
@@ -0,0 +1,155 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2008 by Andree Buschmann
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20#include "mpc_config.h"
21
22 .section .text, "ax", %progbits
23
24/****************************************************************************
25 * void mpc_decoder_windowing_D(...)
26 *
27 * 2nd step within synthesis filter. Does the dewindowing.
28 * 32=32x32 multiplies (OPTIMIZE_FOR_SPEED)
29 * Uses pre-shifted V[] and D[] values.
30 ****************************************************************************/
31#if defined(OPTIMIZE_FOR_SPEED)
32 .align 2
33 .global mpc_decoder_windowing_D
34 .type mpc_decoder_windowing_D, %function
35mpc_decoder_windowing_D:
36 /* r0 = Data[] */
37 /* r1 = V[] */
38 /* r2 = D[] */
39 /* lr = counter */
40
41 stmfd sp!, {r4-r12, lr}
42
43 mov lr, #32
44.loop32:
45 ldmia r2!, { r3-r10 } /* load first 8 window coefficients */
46 ldr r11, [r1] /* 0 */
47 mul r12, r3, r11
48 ldr r11, [r1, #96*4] /* 1 */
49 mla r12, r4, r11, r12
50 ldr r11, [r1, #128*4] /* 2 */
51 mla r12, r5, r11, r12
52 ldr r11, [r1, #224*4] /* 3 */
53 mla r12, r6, r11, r12
54 ldr r11, [r1, #256*4] /* 4 */
55 mla r12, r7, r11, r12
56 ldr r11, [r1, #352*4] /* 5 */
57 mla r12, r8, r11, r12
58 ldr r11, [r1, #384*4] /* 6 */
59 mla r12, r9, r11, r12
60 ldr r11, [r1, #480*4] /* 7 */
61 mla r12, r10, r11, r12
62 ldmia r2!, { r3-r10 } /* load last 8 window coefficients */
63 ldr r11, [r1, #512*4] /* 8 */
64 mla r12, r3, r11, r12
65 ldr r11, [r1, #608*4] /* 9 */
66 mla r12, r4, r11, r12
67 ldr r11, [r1, #640*4] /* 10 */
68 mla r12, r5, r11, r12
69 ldr r11, [r1, #736*4] /* 11 */
70 mla r12, r6, r11, r12
71 ldr r11, [r1, #768*4] /* 12 */
72 mla r12, r7, r11, r12
73 ldr r11, [r1, #864*4] /* 13 */
74 mla r12, r8, r11, r12
75 ldr r11, [r1, #896*4] /* 14 */
76 mla r12, r9, r11, r12
77 ldr r11, [r1, #992*4] /* 15 */
78 mla r12, r10, r11, r12
79 str r12, [r0], #4 /* store Data */
80 add r1, r1, #4 /* V++ */
81
82 subs lr, lr, #1
83 bgt .loop32
84
85 ldmfd sp!, {r4-r12, pc}
86.mpc_dewindowing_end:
87 .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
88#else
89/****************************************************************************
90 * void mpc_decoder_windowing_D(...)
91 *
92 * 2nd step within synthesis filter. Does the dewindowing.
93 * 64=32x32 multiplies
94 * Drops lo-part of 64bit multiply results and will therefor loose 1 bit
95 * accuracy. The decoder output is binary identical as this imprecision is
96 * far below the output's 16bit resolution.
97 ****************************************************************************/
98 .align 2
99 .global mpc_decoder_windowing_D
100 .type mpc_decoder_windowing_D, %function
101mpc_decoder_windowing_D:
102 /* r0 = Data[] */
103 /* r1 = V[] */
104 /* r2 = D[] */
105 /* lr = counter */
106
107 stmfd sp!, {r4-r12, lr}
108
109 mov lr, #32
110.loop32:
111 ldmia r2!, { r3-r10 } /* load first 8 window coefficients */
112 ldr r11, [r1] /* 0 */
113 smull r11, r12, r3, r11
114 ldr r11, [r1, #96*4] /* 1 */
115 smlal r11, r12, r4, r11
116 ldr r11, [r1, #128*4] /* 2 */
117 smlal r11, r12, r5, r11
118 ldr r11, [r1, #224*4] /* 3 */
119 smlal r11, r12, r6, r11
120 ldr r11, [r1, #256*4] /* 4 */
121 smlal r11, r12, r7, r11
122 ldr r11, [r1, #352*4] /* 5 */
123 smlal r11, r12, r8, r11
124 ldr r11, [r1, #384*4] /* 6 */
125 smlal r11, r12, r9, r11
126 ldr r11, [r1, #480*4] /* 7 */
127 smlal r11, r12, r10, r11
128 ldmia r2!, { r3-r10 } /* load last 8 window coefficients */
129 ldr r11, [r1, #512*4] /* 8 */
130 smlal r11, r12, r3, r11
131 ldr r11, [r1, #608*4] /* 9 */
132 smlal r11, r12, r4, r11
133 ldr r11, [r1, #640*4] /* 10 */
134 smlal r11, r12, r5, r11
135 ldr r11, [r1, #736*4] /* 11 */
136 smlal r11, r12, r6, r11
137 ldr r11, [r1, #768*4] /* 12 */
138 smlal r11, r12, r7, r11
139 ldr r11, [r1, #864*4] /* 13 */
140 smlal r11, r12, r8, r11
141 ldr r11, [r1, #896*4] /* 14 */
142 smlal r11, r12, r9, r11
143 ldr r11, [r1, #992*4] /* 15 */
144 smlal r11, r12, r10, r11
145 mov r4, r12, lsl #1 /* get result from hi-part */
146 str r4, [r0], #4 /* store Data */
147 add r1, r1, #4 /* V++ */
148
149 subs lr, lr, #1
150 bgt .loop32
151
152 ldmfd sp!, {r4-r12, pc}
153.mpc_dewindowing_end:
154 .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
155#endif