diff options
-rw-r--r-- | apps/codecs/libmusepack/SOURCES | 4 | ||||
-rw-r--r-- | apps/codecs/libmusepack/math.h | 2 | ||||
-rwxr-xr-x | apps/codecs/libmusepack/mpc_config.h | 48 | ||||
-rw-r--r-- | apps/codecs/libmusepack/synth_filter.c | 217 | ||||
-rwxr-xr-x | apps/codecs/libmusepack/synth_filter_arm.S | 155 |
5 files changed, 265 insertions, 161 deletions
diff --git a/apps/codecs/libmusepack/SOURCES b/apps/codecs/libmusepack/SOURCES index 9c588e3c39..0de114336f 100644 --- a/apps/codecs/libmusepack/SOURCES +++ b/apps/codecs/libmusepack/SOURCES | |||
@@ -5,4 +5,6 @@ mpc_decoder.c | |||
5 | requant.c | 5 | requant.c |
6 | streaminfo.c | 6 | streaminfo.c |
7 | synth_filter.c | 7 | synth_filter.c |
8 | 8 | #if defined(CPU_ARM) | |
9 | synth_filter_arm.S | ||
10 | #endif | ||
diff --git a/apps/codecs/libmusepack/math.h b/apps/codecs/libmusepack/math.h index a015d45cbb..e4c2ffce20 100644 --- a/apps/codecs/libmusepack/math.h +++ b/apps/codecs/libmusepack/math.h | |||
@@ -38,7 +38,7 @@ | |||
38 | #ifndef _mpcdec_math_h_ | 38 | #ifndef _mpcdec_math_h_ |
39 | #define _mpcdec_math_h_ | 39 | #define _mpcdec_math_h_ |
40 | 40 | ||
41 | #define MPC_FIXED_POINT | 41 | #include "mpc_config.h" |
42 | 42 | ||
43 | #define MPC_FIXED_POINT_SHIFT 16 | 43 | #define MPC_FIXED_POINT_SHIFT 16 |
44 | 44 | ||
diff --git a/apps/codecs/libmusepack/mpc_config.h b/apps/codecs/libmusepack/mpc_config.h new file mode 100755 index 0000000000..6993775703 --- /dev/null +++ b/apps/codecs/libmusepack/mpc_config.h | |||
@@ -0,0 +1,48 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2008 by Andree Buschmann | ||
11 | * | ||
12 | * All files in this archive are subject to the GNU General Public License. | ||
13 | * See the file COPYING in the source tree root for full license agreement. | ||
14 | * | ||
15 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
16 | * KIND, either express or implied. | ||
17 | * | ||
18 | ****************************************************************************/ | ||
19 | |||
20 | #ifndef _mpc_config_h_ | ||
21 | #define _mpc_config_h_ | ||
22 | |||
23 | #include "config.h" | ||
24 | |||
25 | /* choose fixed point or floating point */ | ||
26 | #define MPC_FIXED_POINT | ||
27 | |||
28 | #ifndef MPC_FIXED_POINT | ||
29 | #error FIXME, mpc will not with floating point now | ||
30 | #endif | ||
31 | |||
32 | /* choose speed vs. accuracy for MPC_FIXED_POINT | ||
33 | * speed-setting will increase decoding speed on ARM only (+20%), loss of accuracy | ||
34 | * equals about 5 dB SNR (15bit output precision) to not use the speed-optimization | ||
35 | * -> comment OPTIMIZE_FOR_SPEED here for desired target */ | ||
36 | #if defined(MPC_FIXED_POINT) | ||
37 | #if defined(CPU_COLDFIRE) | ||
38 | // do nothing | ||
39 | #elif defined(CPU_ARM) | ||
40 | #define OPTIMIZE_FOR_SPEED | ||
41 | #else | ||
42 | #define OPTIMIZE_FOR_SPEED | ||
43 | #endif | ||
44 | #else | ||
45 | // do nothing | ||
46 | #endif | ||
47 | |||
48 | #endif | ||
diff --git a/apps/codecs/libmusepack/synth_filter.c b/apps/codecs/libmusepack/synth_filter.c index faf014c90d..d48b563a0b 100644 --- a/apps/codecs/libmusepack/synth_filter.c +++ b/apps/codecs/libmusepack/synth_filter.c | |||
@@ -39,22 +39,6 @@ | |||
39 | #include "musepack.h" | 39 | #include "musepack.h" |
40 | #include "internal.h" | 40 | #include "internal.h" |
41 | 41 | ||
42 | /* S E T T I N G S */ | ||
43 | // choose speed vs. accuracy for MPC_FIXED_POINT | ||
44 | // speed-setting will increase decoding speed on ARM only (+20%), loss of accuracy equals about 5 dB SNR (15bit output precision) | ||
45 | // to not use the speed-optimization -> comment OPTIMIZE_FOR_SPEED | ||
46 | #if defined(MPC_FIXED_POINT) | ||
47 | #if defined(CPU_COLDFIRE) | ||
48 | // do nothing | ||
49 | #elif defined(CPU_ARM) | ||
50 | #define OPTIMIZE_FOR_SPEED | ||
51 | #else | ||
52 | #define OPTIMIZE_FOR_SPEED | ||
53 | #endif | ||
54 | #else | ||
55 | // do nothing | ||
56 | #endif | ||
57 | |||
58 | /* C O N S T A N T S */ | 42 | /* C O N S T A N T S */ |
59 | #undef _ | 43 | #undef _ |
60 | 44 | ||
@@ -82,40 +66,40 @@ | |||
82 | #endif | 66 | #endif |
83 | 67 | ||
84 | // Di_opt coefficients are +/- 2^17 | 68 | // Di_opt coefficients are +/- 2^17 |
85 | static const MPC_SAMPLE_FORMAT Di_opt [32] [16] ICONST_ATTR = { | 69 | static const MPC_SAMPLE_FORMAT Di_opt [512] ICONST_ATTR = { |
86 | /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ | 70 | /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ |
87 | { _( 0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _( 6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29) }, | 71 | /* 0 */ _( 0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _( 6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29), |
88 | { _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _( 5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26) }, | 72 | /* 1 */ _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _( 5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26), |
89 | { _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _( 5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24) }, | 73 | /* 2 */ _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _( 5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24), |
90 | { _( -1), _( -38), _( 225), _( -645), _( 1893), _(-6237), _( 4561), _(-43006), _(74630), _(31947), _(8092), _( 4063), _(2087), _( 294), _(196), _(21) }, | 74 | /* 3 */ _( -1), _( -38), _( 225), _( -645), _( 1893), _(-6237), _( 4561), _(-43006), _(74630), _(31947), _(8092), _( 4063), _(2087), _( 294), _(196), _(21), |
91 | { _( -1), _( -41), _( 227), _( -711), _( 1822), _(-6589), _( 3776), _(-44821), _(74313), _(30112), _(8492), _( 3705), _(2085), _( 244), _(190), _(19) }, | 75 | /* 4 */ _( -1), _( -41), _( 227), _( -711), _( 1822), _(-6589), _( 3776), _(-44821), _(74313), _(30112), _(8492), _( 3705), _(2085), _( 244), _(190), _(19), |
92 | { _( -1), _( -45), _( 228), _( -779), _( 1739), _(-6935), _( 2935), _(-46617), _(73908), _(28289), _(8840), _( 3351), _(2075), _( 197), _(183), _(17) }, | 76 | /* 5 */ _( -1), _( -45), _( 228), _( -779), _( 1739), _(-6935), _( 2935), _(-46617), _(73908), _(28289), _(8840), _( 3351), _(2075), _( 197), _(183), _(17), |
93 | { _( -1), _( -49), _( 228), _( -848), _( 1644), _(-7271), _( 2037), _(-48390), _(73415), _(26482), _(9139), _( 3004), _(2057), _( 153), _(176), _(16) }, | 77 | /* 6 */ _( -1), _( -49), _( 228), _( -848), _( 1644), _(-7271), _( 2037), _(-48390), _(73415), _(26482), _(9139), _( 3004), _(2057), _( 153), _(176), _(16), |
94 | { _( -2), _( -53), _( 227), _( -919), _( 1535), _(-7597), _( 1082), _(-50137), _(72835), _(24694), _(9389), _( 2663), _(2032), _( 111), _(169), _(14) }, | 78 | /* 7 */ _( -2), _( -53), _( 227), _( -919), _( 1535), _(-7597), _( 1082), _(-50137), _(72835), _(24694), _(9389), _( 2663), _(2032), _( 111), _(169), _(14), |
95 | { _( -2), _( -58), _( 224), _( -991), _( 1414), _(-7910), _( 70), _(-51853), _(72169), _(22929), _(9592), _( 2330), _(2001), _( 72), _(161), _(13) }, | 79 | /* 8 */ _( -2), _( -58), _( 224), _( -991), _( 1414), _(-7910), _( 70), _(-51853), _(72169), _(22929), _(9592), _( 2330), _(2001), _( 72), _(161), _(13), |
96 | { _( -2), _( -63), _( 221), _(-1064), _( 1280), _(-8209), _( -998), _(-53534), _(71420), _(21189), _(9750), _( 2006), _(1962), _( 36), _(154), _(11) }, | 80 | /* 9 */ _( -2), _( -63), _( 221), _(-1064), _( 1280), _(-8209), _( -998), _(-53534), _(71420), _(21189), _(9750), _( 2006), _(1962), _( 36), _(154), _(11), |
97 | { _( -2), _( -68), _( 215), _(-1137), _( 1131), _(-8491), _( -2122), _(-55178), _(70590), _(19478), _(9863), _( 1692), _(1919), _( 2), _(147), _(10) }, | 81 | /* 10 */ _( -2), _( -68), _( 215), _(-1137), _( 1131), _(-8491), _( -2122), _(-55178), _(70590), _(19478), _(9863), _( 1692), _(1919), _( 2), _(147), _(10), |
98 | { _( -3), _( -73), _( 208), _(-1210), _( 970), _(-8755), _( -3300), _(-56778), _(69679), _(17799), _(9935), _( 1388), _(1870), _( -29), _(139), _( 9) }, | 82 | /* 11 */ _( -3), _( -73), _( 208), _(-1210), _( 970), _(-8755), _( -3300), _(-56778), _(69679), _(17799), _(9935), _( 1388), _(1870), _( -29), _(139), _( 9), |
99 | { _( -3), _( -79), _( 200), _(-1283), _( 794), _(-8998), _( -4533), _(-58333), _(68692), _(16155), _(9966), _( 1095), _(1817), _( -57), _(132), _( 8) }, | 83 | /* 12 */ _( -3), _( -79), _( 200), _(-1283), _( 794), _(-8998), _( -4533), _(-58333), _(68692), _(16155), _(9966), _( 1095), _(1817), _( -57), _(132), _( 8), |
100 | { _( -4), _( -85), _( 189), _(-1356), _( 605), _(-9219), _( -5818), _(-59838), _(67629), _(14548), _(9959), _( 814), _(1759), _( -83), _(125), _( 7) }, | 84 | /* 13 */ _( -4), _( -85), _( 189), _(-1356), _( 605), _(-9219), _( -5818), _(-59838), _(67629), _(14548), _(9959), _( 814), _(1759), _( -83), _(125), _( 7), |
101 | { _( -4), _( -91), _( 177), _(-1428), _( 402), _(-9416), _( -7154), _(-61289), _(66494), _(12980), _(9916), _( 545), _(1698), _(-106), _(117), _( 7) }, | 85 | /* 14 */ _( -4), _( -91), _( 177), _(-1428), _( 402), _(-9416), _( -7154), _(-61289), _(66494), _(12980), _(9916), _( 545), _(1698), _(-106), _(117), _( 7), |
102 | { _( -5), _( -97), _( 163), _(-1498), _( 185), _(-9585), _( -8540), _(-62684), _(65290), _(11455), _(9838), _( 288), _(1634), _(-127), _(111), _( 6) }, | 86 | /* 15 */ _( -5), _( -97), _( 163), _(-1498), _( 185), _(-9585), _( -8540), _(-62684), _(65290), _(11455), _(9838), _( 288), _(1634), _(-127), _(111), _( 6), |
103 | { _( -5), _(-104), _( 146), _(-1567), _( -45), _(-9727), _( -9975), _(-64019), _(64019), _( 9975), _(9727), _( 45), _(1567), _(-146), _(104), _( 5) }, | 87 | /* 16 */ _( -5), _(-104), _( 146), _(-1567), _( -45), _(-9727), _( -9975), _(-64019), _(64019), _( 9975), _(9727), _( 45), _(1567), _(-146), _(104), _( 5), |
104 | { _( -6), _(-111), _( 127), _(-1634), _( -288), _(-9838), _(-11455), _(-65290), _(62684), _( 8540), _(9585), _( -185), _(1498), _(-163), _( 97), _( 5) }, | 88 | /* 17 */ _( -6), _(-111), _( 127), _(-1634), _( -288), _(-9838), _(-11455), _(-65290), _(62684), _( 8540), _(9585), _( -185), _(1498), _(-163), _( 97), _( 5), |
105 | { _( -7), _(-117), _( 106), _(-1698), _( -545), _(-9916), _(-12980), _(-66494), _(61289), _( 7154), _(9416), _( -402), _(1428), _(-177), _( 91), _( 4) }, | 89 | /* 18 */ _( -7), _(-117), _( 106), _(-1698), _( -545), _(-9916), _(-12980), _(-66494), _(61289), _( 7154), _(9416), _( -402), _(1428), _(-177), _( 91), _( 4), |
106 | { _( -7), _(-125), _( 83), _(-1759), _( -814), _(-9959), _(-14548), _(-67629), _(59838), _( 5818), _(9219), _( -605), _(1356), _(-189), _( 85), _( 4) }, | 90 | /* 19 */ _( -7), _(-125), _( 83), _(-1759), _( -814), _(-9959), _(-14548), _(-67629), _(59838), _( 5818), _(9219), _( -605), _(1356), _(-189), _( 85), _( 4), |
107 | { _( -8), _(-132), _( 57), _(-1817), _(-1095), _(-9966), _(-16155), _(-68692), _(58333), _( 4533), _(8998), _( -794), _(1283), _(-200), _( 79), _( 3) }, | 91 | /* 20 */ _( -8), _(-132), _( 57), _(-1817), _(-1095), _(-9966), _(-16155), _(-68692), _(58333), _( 4533), _(8998), _( -794), _(1283), _(-200), _( 79), _( 3), |
108 | { _( -9), _(-139), _( 29), _(-1870), _(-1388), _(-9935), _(-17799), _(-69679), _(56778), _( 3300), _(8755), _( -970), _(1210), _(-208), _( 73), _( 3) }, | 92 | /* 21 */ _( -9), _(-139), _( 29), _(-1870), _(-1388), _(-9935), _(-17799), _(-69679), _(56778), _( 3300), _(8755), _( -970), _(1210), _(-208), _( 73), _( 3), |
109 | { _(-10), _(-147), _( -2), _(-1919), _(-1692), _(-9863), _(-19478), _(-70590), _(55178), _( 2122), _(8491), _(-1131), _(1137), _(-215), _( 68), _( 2) }, | 93 | /* 22 */ _(-10), _(-147), _( -2), _(-1919), _(-1692), _(-9863), _(-19478), _(-70590), _(55178), _( 2122), _(8491), _(-1131), _(1137), _(-215), _( 68), _( 2), |
110 | { _(-11), _(-154), _( -36), _(-1962), _(-2006), _(-9750), _(-21189), _(-71420), _(53534), _( 998), _(8209), _(-1280), _(1064), _(-221), _( 63), _( 2) }, | 94 | /* 23 */ _(-11), _(-154), _( -36), _(-1962), _(-2006), _(-9750), _(-21189), _(-71420), _(53534), _( 998), _(8209), _(-1280), _(1064), _(-221), _( 63), _( 2), |
111 | { _(-13), _(-161), _( -72), _(-2001), _(-2330), _(-9592), _(-22929), _(-72169), _(51853), _( -70), _(7910), _(-1414), _( 991), _(-224), _( 58), _( 2) }, | 95 | /* 24 */ _(-13), _(-161), _( -72), _(-2001), _(-2330), _(-9592), _(-22929), _(-72169), _(51853), _( -70), _(7910), _(-1414), _( 991), _(-224), _( 58), _( 2), |
112 | { _(-14), _(-169), _(-111), _(-2032), _(-2663), _(-9389), _(-24694), _(-72835), _(50137), _(-1082), _(7597), _(-1535), _( 919), _(-227), _( 53), _( 2) }, | 96 | /* 25 */ _(-14), _(-169), _(-111), _(-2032), _(-2663), _(-9389), _(-24694), _(-72835), _(50137), _(-1082), _(7597), _(-1535), _( 919), _(-227), _( 53), _( 2), |
113 | { _(-16), _(-176), _(-153), _(-2057), _(-3004), _(-9139), _(-26482), _(-73415), _(48390), _(-2037), _(7271), _(-1644), _( 848), _(-228), _( 49), _( 1) }, | 97 | /* 26 */ _(-16), _(-176), _(-153), _(-2057), _(-3004), _(-9139), _(-26482), _(-73415), _(48390), _(-2037), _(7271), _(-1644), _( 848), _(-228), _( 49), _( 1), |
114 | { _(-17), _(-183), _(-197), _(-2075), _(-3351), _(-8840), _(-28289), _(-73908), _(46617), _(-2935), _(6935), _(-1739), _( 779), _(-228), _( 45), _( 1) }, | 98 | /* 27 */ _(-17), _(-183), _(-197), _(-2075), _(-3351), _(-8840), _(-28289), _(-73908), _(46617), _(-2935), _(6935), _(-1739), _( 779), _(-228), _( 45), _( 1), |
115 | { _(-19), _(-190), _(-244), _(-2085), _(-3705), _(-8492), _(-30112), _(-74313), _(44821), _(-3776), _(6589), _(-1822), _( 711), _(-227), _( 41), _( 1) }, | 99 | /* 28 */ _(-19), _(-190), _(-244), _(-2085), _(-3705), _(-8492), _(-30112), _(-74313), _(44821), _(-3776), _(6589), _(-1822), _( 711), _(-227), _( 41), _( 1), |
116 | { _(-21), _(-196), _(-294), _(-2087), _(-4063), _(-8092), _(-31947), _(-74630), _(43006), _(-4561), _(6237), _(-1893), _( 645), _(-225), _( 38), _( 1) }, | 100 | /* 29 */ _(-21), _(-196), _(-294), _(-2087), _(-4063), _(-8092), _(-31947), _(-74630), _(43006), _(-4561), _(6237), _(-1893), _( 645), _(-225), _( 38), _( 1), |
117 | { _(-24), _(-202), _(-347), _(-2080), _(-4425), _(-7640), _(-33791), _(-74856), _(41176), _(-5288), _(5879), _(-1952), _( 581), _(-222), _( 35), _( 1) }, | 101 | /* 30 */ _(-24), _(-202), _(-347), _(-2080), _(-4425), _(-7640), _(-33791), _(-74856), _(41176), _(-5288), _(5879), _(-1952), _( 581), _(-222), _( 35), _( 1), |
118 | { _(-26), _(-208), _(-401), _(-2063), _(-4788), _(-7134), _(-35640), _(-74992), _(39336), _(-5959), _(5517), _(-2000), _( 519), _(-218), _( 31), _( 1) } | 102 | /* 31 */ _(-26), _(-208), _(-401), _(-2063), _(-4788), _(-7134), _(-35640), _(-74992), _(39336), _(-5959), _(5517), _(-2000), _( 519), _(-218), _( 31), _( 1) |
119 | }; | 103 | }; |
120 | 104 | ||
121 | #undef _ | 105 | #undef _ |
@@ -457,69 +441,30 @@ mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V ) | |||
457 | // total: 111 adds, 107 subs, 80 muls, 80 shifts | 441 | // total: 111 adds, 107 subs, 80 muls, 80 shifts |
458 | } | 442 | } |
459 | 443 | ||
444 | #if defined(CPU_ARM) | ||
445 | extern void | ||
446 | mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, | ||
447 | const MPC_SAMPLE_FORMAT * V, | ||
448 | const MPC_SAMPLE_FORMAT * D); | ||
449 | #else | ||
460 | static void | 450 | static void |
461 | mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V) | 451 | mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, |
452 | const MPC_SAMPLE_FORMAT * V, | ||
453 | const MPC_SAMPLE_FORMAT * D) | ||
462 | { | 454 | { |
463 | const MPC_SAMPLE_FORMAT *D = (const MPC_SAMPLE_FORMAT *) &Di_opt; | ||
464 | mpc_int32_t k; | 455 | mpc_int32_t k; |
465 | 456 | ||
466 | #if defined(OPTIMIZE_FOR_SPEED) | 457 | #if defined(OPTIMIZE_FOR_SPEED) |
467 | #if defined(CPU_ARM) | ||
468 | // 32=32x32-multiply assembler for ARM | ||
469 | for ( k = 0; k < 32; k++, V++ ) | ||
470 | { | ||
471 | asm volatile ( | ||
472 | "ldmia %[D]!, { r0-r7 } \n\t" | ||
473 | "ldr r8, [%[V]] \n\t" | ||
474 | "mul r9, r0, r8 \n\t" | ||
475 | "ldr r8, [%[V], #96*4] \n\t" | ||
476 | "mla r9, r1, r8, r9 \n\t" | ||
477 | "ldr r8, [%[V], #128*4] \n\t" | ||
478 | "mla r9, r2, r8, r9 \n\t" | ||
479 | "ldr r8, [%[V], #224*4] \n\t" | ||
480 | "mla r9, r3, r8, r9 \n\t" | ||
481 | "ldr r8, [%[V], #256*4] \n\t" | ||
482 | "mla r9, r4, r8, r9 \n\t" | ||
483 | "ldr r8, [%[V], #352*4] \n\t" | ||
484 | "mla r9, r5, r8, r9 \n\t" | ||
485 | "ldr r8, [%[V], #384*4] \n\t" | ||
486 | "mla r9, r6, r8, r9 \n\t" | ||
487 | "ldr r8, [%[V], #480*4] \n\t" | ||
488 | "mla r9, r7, r8, r9 \n\t" | ||
489 | "ldmia %[D]!, { r0-r7 } \n\t" | ||
490 | "ldr r8, [%[V], #512*4] \n\t" | ||
491 | "mla r9, r0, r8, r9 \n\t" | ||
492 | "ldr r8, [%[V], #608*4] \n\t" | ||
493 | "mla r9, r1, r8, r9 \n\t" | ||
494 | "ldr r8, [%[V], #640*4] \n\t" | ||
495 | "mla r9, r2, r8, r9 \n\t" | ||
496 | "ldr r8, [%[V], #736*4] \n\t" | ||
497 | "mla r9, r3, r8, r9 \n\t" | ||
498 | "ldr r8, [%[V], #768*4] \n\t" | ||
499 | "mla r9, r4, r8, r9 \n\t" | ||
500 | "ldr r8, [%[V], #864*4] \n\t" | ||
501 | "mla r9, r5, r8, r9 \n\t" | ||
502 | "ldr r8, [%[V], #896*4] \n\t" | ||
503 | "mla r9, r6, r8, r9 \n\t" | ||
504 | "ldr r8, [%[V], #992*4] \n\t" | ||
505 | "mla r9, r7, r8, r9 \n\t" | ||
506 | "str r9, [%[Data]], #4 \n" | ||
507 | : [Data] "+r" (Data), [D] "+r" (D) | ||
508 | : [V] "r" (V) | ||
509 | : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"); | ||
510 | } | ||
511 | #else | ||
512 | // 32=32x32-multiply (FIXED_POINT) | 458 | // 32=32x32-multiply (FIXED_POINT) |
513 | for ( k = 0; k < 32; k++, D += 16, V++ ) | 459 | for ( k = 0; k < 32; k++, D += 16, V++ ) |
514 | { | 460 | { |
515 | *Data = V[ 0]*D[ 0] + V[ 96]*D[ 1] + V[128]*D[ 2] + V[224]*D[ 3] | 461 | *Data = V[ 0]*D[ 0] + V[ 96]*D[ 1] + V[128]*D[ 2] + V[224]*D[ 3] |
516 | + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7] | 462 | + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7] |
517 | + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11] | 463 | + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11] |
518 | + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15]; | 464 | + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15]; |
519 | Data += 1; | 465 | Data += 1; |
520 | // total: 16 muls, 15 adds | 466 | // total: 32 * (16 muls, 15 adds) |
521 | } | 467 | } |
522 | #endif | ||
523 | #else | 468 | #else |
524 | #if defined(CPU_COLDFIRE) | 469 | #if defined(CPU_COLDFIRE) |
525 | // 64=32x32-multiply assembler for Coldfire | 470 | // 64=32x32-multiply assembler for Coldfire |
@@ -553,71 +498,25 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V) | |||
553 | : [V] "a" (V), [D] "a" (D) | 498 | : [V] "a" (V), [D] "a" (D) |
554 | : "d0", "d1", "d2", "d3", "a5"); | 499 | : "d0", "d1", "d2", "d3", "a5"); |
555 | } | 500 | } |
556 | #elif defined(CPU_ARM) | ||
557 | // 64=32x32-multiply assembler for ARM | ||
558 | for ( k = 0; k < 32; k++, V++ ) | ||
559 | { | ||
560 | asm volatile ( | ||
561 | "ldmia %[D]!, { r0-r3 } \n\t" | ||
562 | "ldr r4, [%[V]] \n\t" | ||
563 | "smull r5, r6, r0, r4 \n\t" | ||
564 | "ldr r4, [%[V], #96*4] \n\t" | ||
565 | "smlal r5, r6, r1, r4 \n\t" | ||
566 | "ldr r4, [%[V], #128*4] \n\t" | ||
567 | "smlal r5, r6, r2, r4 \n\t" | ||
568 | "ldr r4, [%[V], #224*4] \n\t" | ||
569 | "smlal r5, r6, r3, r4 \n\t" | ||
570 | |||
571 | "ldmia %[D]!, { r0-r3 } \n\t" | ||
572 | "ldr r4, [%[V], #256*4] \n\t" | ||
573 | "smlal r5, r6, r0, r4 \n\t" | ||
574 | "ldr r4, [%[V], #352*4] \n\t" | ||
575 | "smlal r5, r6, r1, r4 \n\t" | ||
576 | "ldr r4, [%[V], #384*4] \n\t" | ||
577 | "smlal r5, r6, r2, r4 \n\t" | ||
578 | "ldr r4, [%[V], #480*4] \n\t" | ||
579 | "smlal r5, r6, r3, r4 \n\t" | ||
580 | |||
581 | "ldmia %[D]!, { r0-r3 } \n\t" | ||
582 | "ldr r4, [%[V], #512*4] \n\t" | ||
583 | "smlal r5, r6, r0, r4 \n\t" | ||
584 | "ldr r4, [%[V], #608*4] \n\t" | ||
585 | "smlal r5, r6, r1, r4 \n\t" | ||
586 | "ldr r4, [%[V], #640*4] \n\t" | ||
587 | "smlal r5, r6, r2, r4 \n\t" | ||
588 | "ldr r4, [%[V], #736*4] \n\t" | ||
589 | "smlal r5, r6, r3, r4 \n\t" | ||
590 | |||
591 | "ldmia %[D]!, { r0-r3 } \n\t" | ||
592 | "ldr r4, [%[V], #768*4] \n\t" | ||
593 | "smlal r5, r6, r0, r4 \n\t" | ||
594 | "ldr r4, [%[V], #864*4] \n\t" | ||
595 | "smlal r5, r6, r1, r4 \n\t" | ||
596 | "ldr r4, [%[V], #896*4] \n\t" | ||
597 | "smlal r5, r6, r2, r4 \n\t" | ||
598 | "ldr r4, [%[V], #992*4] \n\t" | ||
599 | "smlal r5, r6, r3, r4 \n\t" | ||
600 | "mov r4, r6, lsl #1 \n\t" | ||
601 | "orr r4, r4, r5, lsr #31\n\t" | ||
602 | "str r4, [%[Data]], #4 \n" | ||
603 | : [Data] "+r" (Data), [D] "+r" (D) | ||
604 | : [V] "r" (V) | ||
605 | : "r0", "r1", "r2", "r3", "r4", "r5", "r6"); | ||
606 | } | ||
607 | #else | 501 | #else |
608 | // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C | 502 | // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C |
609 | for ( k = 0; k < 32; k++, D += 16, V++ ) | 503 | for ( k = 0; k < 32; k++, D += 16, V++ ) |
610 | { | 504 | { |
611 | *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31) + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31) | 505 | *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31) |
612 | + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31) + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31) | 506 | + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31) |
613 | + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31) + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31) | 507 | + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31) |
614 | + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31) + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31); | 508 | + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31) |
509 | + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31) | ||
510 | + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31) | ||
511 | + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31) | ||
512 | + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31); | ||
615 | Data += 1; | 513 | Data += 1; |
616 | // total: 16 muls, 15 adds, 16 shifts | 514 | // total: 16 muls, 15 adds, 16 shifts |
617 | } | 515 | } |
618 | #endif | 516 | #endif |
619 | #endif | 517 | #endif |
620 | } | 518 | } |
519 | #endif /* CPU_ARM */ | ||
621 | 520 | ||
622 | static void | 521 | static void |
623 | mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y) | 522 | mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y) |
@@ -630,7 +529,7 @@ mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, cons | |||
630 | { | 529 | { |
631 | V -= 64; | 530 | V -= 64; |
632 | mpc_calculate_new_V ( Y, V ); | 531 | mpc_calculate_new_V ( Y, V ); |
633 | mpc_decoder_windowing_D( OutData, V); | 532 | mpc_decoder_windowing_D( OutData, V, Di_opt ); |
634 | } | 533 | } |
635 | } | 534 | } |
636 | } | 535 | } |
@@ -661,7 +560,7 @@ mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT *OutData) | |||
661 | /* */ | 560 | /* */ |
662 | /*******************************************/ | 561 | /*******************************************/ |
663 | 562 | ||
664 | static const unsigned char Parity [256] = { // parity | 563 | static const unsigned char Parity [256] ICONST_ATTR = { // parity |
665 | 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1, | 564 | 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1, |
666 | 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, | 565 | 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, |
667 | 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, | 566 | 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, |
diff --git a/apps/codecs/libmusepack/synth_filter_arm.S b/apps/codecs/libmusepack/synth_filter_arm.S new file mode 100755 index 0000000000..ce668e888c --- /dev/null +++ b/apps/codecs/libmusepack/synth_filter_arm.S | |||
@@ -0,0 +1,155 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2008 by Andree Buschmann | ||
11 | * | ||
12 | * All files in this archive are subject to the GNU General Public License. | ||
13 | * See the file COPYING in the source tree root for full license agreement. | ||
14 | * | ||
15 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
16 | * KIND, either express or implied. | ||
17 | * | ||
18 | ****************************************************************************/ | ||
19 | |||
20 | #include "mpc_config.h" | ||
21 | |||
22 | .section .text, "ax", %progbits | ||
23 | |||
24 | /**************************************************************************** | ||
25 | * void mpc_decoder_windowing_D(...) | ||
26 | * | ||
27 | * 2nd step within synthesis filter. Does the dewindowing. | ||
28 | * 32=32x32 multiplies (OPTIMIZE_FOR_SPEED) | ||
29 | * Uses pre-shifted V[] and D[] values. | ||
30 | ****************************************************************************/ | ||
31 | #if defined(OPTIMIZE_FOR_SPEED) | ||
32 | .align 2 | ||
33 | .global mpc_decoder_windowing_D | ||
34 | .type mpc_decoder_windowing_D, %function | ||
35 | mpc_decoder_windowing_D: | ||
36 | /* r0 = Data[] */ | ||
37 | /* r1 = V[] */ | ||
38 | /* r2 = D[] */ | ||
39 | /* lr = counter */ | ||
40 | |||
41 | stmfd sp!, {r4-r12, lr} | ||
42 | |||
43 | mov lr, #32 | ||
44 | .loop32: | ||
45 | ldmia r2!, { r3-r10 } /* load first 8 window coefficients */ | ||
46 | ldr r11, [r1] /* 0 */ | ||
47 | mul r12, r3, r11 | ||
48 | ldr r11, [r1, #96*4] /* 1 */ | ||
49 | mla r12, r4, r11, r12 | ||
50 | ldr r11, [r1, #128*4] /* 2 */ | ||
51 | mla r12, r5, r11, r12 | ||
52 | ldr r11, [r1, #224*4] /* 3 */ | ||
53 | mla r12, r6, r11, r12 | ||
54 | ldr r11, [r1, #256*4] /* 4 */ | ||
55 | mla r12, r7, r11, r12 | ||
56 | ldr r11, [r1, #352*4] /* 5 */ | ||
57 | mla r12, r8, r11, r12 | ||
58 | ldr r11, [r1, #384*4] /* 6 */ | ||
59 | mla r12, r9, r11, r12 | ||
60 | ldr r11, [r1, #480*4] /* 7 */ | ||
61 | mla r12, r10, r11, r12 | ||
62 | ldmia r2!, { r3-r10 } /* load last 8 window coefficients */ | ||
63 | ldr r11, [r1, #512*4] /* 8 */ | ||
64 | mla r12, r3, r11, r12 | ||
65 | ldr r11, [r1, #608*4] /* 9 */ | ||
66 | mla r12, r4, r11, r12 | ||
67 | ldr r11, [r1, #640*4] /* 10 */ | ||
68 | mla r12, r5, r11, r12 | ||
69 | ldr r11, [r1, #736*4] /* 11 */ | ||
70 | mla r12, r6, r11, r12 | ||
71 | ldr r11, [r1, #768*4] /* 12 */ | ||
72 | mla r12, r7, r11, r12 | ||
73 | ldr r11, [r1, #864*4] /* 13 */ | ||
74 | mla r12, r8, r11, r12 | ||
75 | ldr r11, [r1, #896*4] /* 14 */ | ||
76 | mla r12, r9, r11, r12 | ||
77 | ldr r11, [r1, #992*4] /* 15 */ | ||
78 | mla r12, r10, r11, r12 | ||
79 | str r12, [r0], #4 /* store Data */ | ||
80 | add r1, r1, #4 /* V++ */ | ||
81 | |||
82 | subs lr, lr, #1 | ||
83 | bgt .loop32 | ||
84 | |||
85 | ldmfd sp!, {r4-r12, pc} | ||
86 | .mpc_dewindowing_end: | ||
87 | .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D | ||
88 | #else | ||
89 | /**************************************************************************** | ||
90 | * void mpc_decoder_windowing_D(...) | ||
91 | * | ||
92 | * 2nd step within synthesis filter. Does the dewindowing. | ||
93 | * 64=32x32 multiplies | ||
94 | * Drops lo-part of 64bit multiply results and will therefor loose 1 bit | ||
95 | * accuracy. The decoder output is binary identical as this imprecision is | ||
96 | * far below the output's 16bit resolution. | ||
97 | ****************************************************************************/ | ||
98 | .align 2 | ||
99 | .global mpc_decoder_windowing_D | ||
100 | .type mpc_decoder_windowing_D, %function | ||
101 | mpc_decoder_windowing_D: | ||
102 | /* r0 = Data[] */ | ||
103 | /* r1 = V[] */ | ||
104 | /* r2 = D[] */ | ||
105 | /* lr = counter */ | ||
106 | |||
107 | stmfd sp!, {r4-r12, lr} | ||
108 | |||
109 | mov lr, #32 | ||
110 | .loop32: | ||
111 | ldmia r2!, { r3-r10 } /* load first 8 window coefficients */ | ||
112 | ldr r11, [r1] /* 0 */ | ||
113 | smull r11, r12, r3, r11 | ||
114 | ldr r11, [r1, #96*4] /* 1 */ | ||
115 | smlal r11, r12, r4, r11 | ||
116 | ldr r11, [r1, #128*4] /* 2 */ | ||
117 | smlal r11, r12, r5, r11 | ||
118 | ldr r11, [r1, #224*4] /* 3 */ | ||
119 | smlal r11, r12, r6, r11 | ||
120 | ldr r11, [r1, #256*4] /* 4 */ | ||
121 | smlal r11, r12, r7, r11 | ||
122 | ldr r11, [r1, #352*4] /* 5 */ | ||
123 | smlal r11, r12, r8, r11 | ||
124 | ldr r11, [r1, #384*4] /* 6 */ | ||
125 | smlal r11, r12, r9, r11 | ||
126 | ldr r11, [r1, #480*4] /* 7 */ | ||
127 | smlal r11, r12, r10, r11 | ||
128 | ldmia r2!, { r3-r10 } /* load last 8 window coefficients */ | ||
129 | ldr r11, [r1, #512*4] /* 8 */ | ||
130 | smlal r11, r12, r3, r11 | ||
131 | ldr r11, [r1, #608*4] /* 9 */ | ||
132 | smlal r11, r12, r4, r11 | ||
133 | ldr r11, [r1, #640*4] /* 10 */ | ||
134 | smlal r11, r12, r5, r11 | ||
135 | ldr r11, [r1, #736*4] /* 11 */ | ||
136 | smlal r11, r12, r6, r11 | ||
137 | ldr r11, [r1, #768*4] /* 12 */ | ||
138 | smlal r11, r12, r7, r11 | ||
139 | ldr r11, [r1, #864*4] /* 13 */ | ||
140 | smlal r11, r12, r8, r11 | ||
141 | ldr r11, [r1, #896*4] /* 14 */ | ||
142 | smlal r11, r12, r9, r11 | ||
143 | ldr r11, [r1, #992*4] /* 15 */ | ||
144 | smlal r11, r12, r10, r11 | ||
145 | mov r4, r12, lsl #1 /* get result from hi-part */ | ||
146 | str r4, [r0], #4 /* store Data */ | ||
147 | add r1, r1, #4 /* V++ */ | ||
148 | |||
149 | subs lr, lr, #1 | ||
150 | bgt .loop32 | ||
151 | |||
152 | ldmfd sp!, {r4-r12, pc} | ||
153 | .mpc_dewindowing_end: | ||
154 | .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D | ||
155 | #endif | ||