From 18f13b149a4ce6d3b16c0b91de4d571d1860b66f Mon Sep 17 00:00:00 2001 From: Andree Buschmann Date: Sat, 17 May 2008 21:26:34 +0000 Subject: Move ARM assembler of musepack synthesis filter to own file. Additionally add ICONST_ATTR to noise generator data. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17562 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libmusepack/SOURCES | 4 +- apps/codecs/libmusepack/math.h | 2 +- apps/codecs/libmusepack/mpc_config.h | 48 +++++++ apps/codecs/libmusepack/synth_filter.c | 217 ++++++++--------------------- apps/codecs/libmusepack/synth_filter_arm.S | 155 +++++++++++++++++++++ 5 files changed, 265 insertions(+), 161 deletions(-) create mode 100755 apps/codecs/libmusepack/mpc_config.h create mode 100755 apps/codecs/libmusepack/synth_filter_arm.S diff --git a/apps/codecs/libmusepack/SOURCES b/apps/codecs/libmusepack/SOURCES index 9c588e3c39..0de114336f 100644 --- a/apps/codecs/libmusepack/SOURCES +++ b/apps/codecs/libmusepack/SOURCES @@ -5,4 +5,6 @@ mpc_decoder.c requant.c streaminfo.c synth_filter.c - +#if defined(CPU_ARM) +synth_filter_arm.S +#endif diff --git a/apps/codecs/libmusepack/math.h b/apps/codecs/libmusepack/math.h index a015d45cbb..e4c2ffce20 100644 --- a/apps/codecs/libmusepack/math.h +++ b/apps/codecs/libmusepack/math.h @@ -38,7 +38,7 @@ #ifndef _mpcdec_math_h_ #define _mpcdec_math_h_ -#define MPC_FIXED_POINT +#include "mpc_config.h" #define MPC_FIXED_POINT_SHIFT 16 diff --git a/apps/codecs/libmusepack/mpc_config.h b/apps/codecs/libmusepack/mpc_config.h new file mode 100755 index 0000000000..6993775703 --- /dev/null +++ b/apps/codecs/libmusepack/mpc_config.h @@ -0,0 +1,48 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2008 by Andree Buschmann + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#ifndef _mpc_config_h_ +#define _mpc_config_h_ + +#include "config.h" + +/* choose fixed point or floating point */ +#define MPC_FIXED_POINT + +#ifndef MPC_FIXED_POINT +#error FIXME, mpc will not with floating point now +#endif + +/* choose speed vs. accuracy for MPC_FIXED_POINT + * speed-setting will increase decoding speed on ARM only (+20%), loss of accuracy + * equals about 5 dB SNR (15bit output precision) to not use the speed-optimization + * -> comment OPTIMIZE_FOR_SPEED here for desired target */ +#if defined(MPC_FIXED_POINT) + #if defined(CPU_COLDFIRE) + // do nothing + #elif defined(CPU_ARM) + #define OPTIMIZE_FOR_SPEED + #else + #define OPTIMIZE_FOR_SPEED + #endif +#else + // do nothing +#endif + +#endif diff --git a/apps/codecs/libmusepack/synth_filter.c b/apps/codecs/libmusepack/synth_filter.c index faf014c90d..d48b563a0b 100644 --- a/apps/codecs/libmusepack/synth_filter.c +++ b/apps/codecs/libmusepack/synth_filter.c @@ -39,22 +39,6 @@ #include "musepack.h" #include "internal.h" -/* S E T T I N G S */ -// choose speed vs. accuracy for MPC_FIXED_POINT -// speed-setting will increase decoding speed on ARM only (+20%), loss of accuracy equals about 5 dB SNR (15bit output precision) -// to not use the speed-optimization -> comment OPTIMIZE_FOR_SPEED -#if defined(MPC_FIXED_POINT) - #if defined(CPU_COLDFIRE) - // do nothing - #elif defined(CPU_ARM) - #define OPTIMIZE_FOR_SPEED - #else - #define OPTIMIZE_FOR_SPEED - #endif -#else - // do nothing -#endif - /* C O N S T A N T S */ #undef _ @@ -82,40 +66,40 @@ #endif // Di_opt coefficients are +/- 2^17 -static const MPC_SAMPLE_FORMAT Di_opt [32] [16] ICONST_ATTR = { - /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ - { _( 0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _( 6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29) }, - { _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _( 5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26) }, - { _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _( 5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24) }, - { _( -1), _( -38), _( 225), _( -645), _( 1893), _(-6237), _( 4561), _(-43006), _(74630), _(31947), _(8092), _( 4063), _(2087), _( 294), _(196), _(21) }, - { _( -1), _( -41), _( 227), _( -711), _( 1822), _(-6589), _( 3776), _(-44821), _(74313), _(30112), _(8492), _( 3705), _(2085), _( 244), _(190), _(19) }, - { _( -1), _( -45), _( 228), _( -779), _( 1739), _(-6935), _( 2935), _(-46617), _(73908), _(28289), _(8840), _( 3351), _(2075), _( 197), _(183), _(17) }, - { _( -1), _( -49), _( 228), _( -848), _( 1644), _(-7271), _( 2037), _(-48390), _(73415), _(26482), _(9139), _( 3004), _(2057), _( 153), _(176), _(16) }, - { _( -2), _( -53), _( 227), _( -919), _( 1535), _(-7597), _( 1082), _(-50137), _(72835), _(24694), _(9389), _( 2663), _(2032), _( 111), _(169), _(14) }, - { _( -2), _( -58), _( 224), _( -991), _( 1414), _(-7910), _( 70), _(-51853), _(72169), _(22929), _(9592), _( 2330), _(2001), _( 72), _(161), _(13) }, - { _( -2), _( -63), _( 221), _(-1064), _( 1280), _(-8209), _( -998), _(-53534), _(71420), _(21189), _(9750), _( 2006), _(1962), _( 36), _(154), _(11) }, - { _( -2), _( -68), _( 215), _(-1137), _( 1131), _(-8491), _( -2122), _(-55178), _(70590), _(19478), _(9863), _( 1692), _(1919), _( 2), _(147), _(10) }, - { _( -3), _( -73), _( 208), _(-1210), _( 970), _(-8755), _( -3300), _(-56778), _(69679), _(17799), _(9935), _( 1388), _(1870), _( -29), _(139), _( 9) }, - { _( -3), _( -79), _( 200), _(-1283), _( 794), _(-8998), _( -4533), _(-58333), _(68692), _(16155), _(9966), _( 1095), _(1817), _( -57), _(132), _( 8) }, - { _( -4), _( -85), _( 189), _(-1356), _( 605), _(-9219), _( -5818), _(-59838), _(67629), _(14548), _(9959), _( 814), _(1759), _( -83), _(125), _( 7) }, - { _( -4), _( -91), _( 177), _(-1428), _( 402), _(-9416), _( -7154), _(-61289), _(66494), _(12980), _(9916), _( 545), _(1698), _(-106), _(117), _( 7) }, - { _( -5), _( -97), _( 163), _(-1498), _( 185), _(-9585), _( -8540), _(-62684), _(65290), _(11455), _(9838), _( 288), _(1634), _(-127), _(111), _( 6) }, - { _( -5), _(-104), _( 146), _(-1567), _( -45), _(-9727), _( -9975), _(-64019), _(64019), _( 9975), _(9727), _( 45), _(1567), _(-146), _(104), _( 5) }, - { _( -6), _(-111), _( 127), _(-1634), _( -288), _(-9838), _(-11455), _(-65290), _(62684), _( 8540), _(9585), _( -185), _(1498), _(-163), _( 97), _( 5) }, - { _( -7), _(-117), _( 106), _(-1698), _( -545), _(-9916), _(-12980), _(-66494), _(61289), _( 7154), _(9416), _( -402), _(1428), _(-177), _( 91), _( 4) }, - { _( -7), _(-125), _( 83), _(-1759), _( -814), _(-9959), _(-14548), _(-67629), _(59838), _( 5818), _(9219), _( -605), _(1356), _(-189), _( 85), _( 4) }, - { _( -8), _(-132), _( 57), _(-1817), _(-1095), _(-9966), _(-16155), _(-68692), _(58333), _( 4533), _(8998), _( -794), _(1283), _(-200), _( 79), _( 3) }, - { _( -9), _(-139), _( 29), _(-1870), _(-1388), _(-9935), _(-17799), _(-69679), _(56778), _( 3300), _(8755), _( -970), _(1210), _(-208), _( 73), _( 3) }, - { _(-10), _(-147), _( -2), _(-1919), _(-1692), _(-9863), _(-19478), _(-70590), _(55178), _( 2122), _(8491), _(-1131), _(1137), _(-215), _( 68), _( 2) }, - { _(-11), _(-154), _( -36), _(-1962), _(-2006), _(-9750), _(-21189), _(-71420), _(53534), _( 998), _(8209), _(-1280), _(1064), _(-221), _( 63), _( 2) }, - { _(-13), _(-161), _( -72), _(-2001), _(-2330), _(-9592), _(-22929), _(-72169), _(51853), _( -70), _(7910), _(-1414), _( 991), _(-224), _( 58), _( 2) }, - { _(-14), _(-169), _(-111), _(-2032), _(-2663), _(-9389), _(-24694), _(-72835), _(50137), _(-1082), _(7597), _(-1535), _( 919), _(-227), _( 53), _( 2) }, - { _(-16), _(-176), _(-153), _(-2057), _(-3004), _(-9139), _(-26482), _(-73415), _(48390), _(-2037), _(7271), _(-1644), _( 848), _(-228), _( 49), _( 1) }, - { _(-17), _(-183), _(-197), _(-2075), _(-3351), _(-8840), _(-28289), _(-73908), _(46617), _(-2935), _(6935), _(-1739), _( 779), _(-228), _( 45), _( 1) }, - { _(-19), _(-190), _(-244), _(-2085), _(-3705), _(-8492), _(-30112), _(-74313), _(44821), _(-3776), _(6589), _(-1822), _( 711), _(-227), _( 41), _( 1) }, - { _(-21), _(-196), _(-294), _(-2087), _(-4063), _(-8092), _(-31947), _(-74630), _(43006), _(-4561), _(6237), _(-1893), _( 645), _(-225), _( 38), _( 1) }, - { _(-24), _(-202), _(-347), _(-2080), _(-4425), _(-7640), _(-33791), _(-74856), _(41176), _(-5288), _(5879), _(-1952), _( 581), _(-222), _( 35), _( 1) }, - { _(-26), _(-208), _(-401), _(-2063), _(-4788), _(-7134), _(-35640), _(-74992), _(39336), _(-5959), _(5517), _(-2000), _( 519), _(-218), _( 31), _( 1) } +static const MPC_SAMPLE_FORMAT Di_opt [512] ICONST_ATTR = { +/* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ +/* 0 */ _( 0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _( 6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29), +/* 1 */ _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _( 5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26), +/* 2 */ _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _( 5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24), +/* 3 */ _( -1), _( -38), _( 225), _( -645), _( 1893), _(-6237), _( 4561), _(-43006), _(74630), _(31947), _(8092), _( 4063), _(2087), _( 294), _(196), _(21), +/* 4 */ _( -1), _( -41), _( 227), _( -711), _( 1822), _(-6589), _( 3776), _(-44821), _(74313), _(30112), _(8492), _( 3705), _(2085), _( 244), _(190), _(19), +/* 5 */ _( -1), _( -45), _( 228), _( -779), _( 1739), _(-6935), _( 2935), _(-46617), _(73908), _(28289), _(8840), _( 3351), _(2075), _( 197), _(183), _(17), +/* 6 */ _( -1), _( -49), _( 228), _( -848), _( 1644), _(-7271), _( 2037), _(-48390), _(73415), _(26482), _(9139), _( 3004), _(2057), _( 153), _(176), _(16), +/* 7 */ _( -2), _( -53), _( 227), _( -919), _( 1535), _(-7597), _( 1082), _(-50137), _(72835), _(24694), _(9389), _( 2663), _(2032), _( 111), _(169), _(14), +/* 8 */ _( -2), _( -58), _( 224), _( -991), _( 1414), _(-7910), _( 70), _(-51853), _(72169), _(22929), _(9592), _( 2330), _(2001), _( 72), _(161), _(13), +/* 9 */ _( -2), _( -63), _( 221), _(-1064), _( 1280), _(-8209), _( -998), _(-53534), _(71420), _(21189), _(9750), _( 2006), _(1962), _( 36), _(154), _(11), +/* 10 */ _( -2), _( -68), _( 215), _(-1137), _( 1131), _(-8491), _( -2122), _(-55178), _(70590), _(19478), _(9863), _( 1692), _(1919), _( 2), _(147), _(10), +/* 11 */ _( -3), _( -73), _( 208), _(-1210), _( 970), _(-8755), _( -3300), _(-56778), _(69679), _(17799), _(9935), _( 1388), _(1870), _( -29), _(139), _( 9), +/* 12 */ _( -3), _( -79), _( 200), _(-1283), _( 794), _(-8998), _( -4533), _(-58333), _(68692), _(16155), _(9966), _( 1095), _(1817), _( -57), _(132), _( 8), +/* 13 */ _( -4), _( -85), _( 189), _(-1356), _( 605), _(-9219), _( -5818), _(-59838), _(67629), _(14548), _(9959), _( 814), _(1759), _( -83), _(125), _( 7), +/* 14 */ _( -4), _( -91), _( 177), _(-1428), _( 402), _(-9416), _( -7154), _(-61289), _(66494), _(12980), _(9916), _( 545), _(1698), _(-106), _(117), _( 7), +/* 15 */ _( -5), _( -97), _( 163), _(-1498), _( 185), _(-9585), _( -8540), _(-62684), _(65290), _(11455), _(9838), _( 288), _(1634), _(-127), _(111), _( 6), +/* 16 */ _( -5), _(-104), _( 146), _(-1567), _( -45), _(-9727), _( -9975), _(-64019), _(64019), _( 9975), _(9727), _( 45), _(1567), _(-146), _(104), _( 5), +/* 17 */ _( -6), _(-111), _( 127), _(-1634), _( -288), _(-9838), _(-11455), _(-65290), _(62684), _( 8540), _(9585), _( -185), _(1498), _(-163), _( 97), _( 5), +/* 18 */ _( -7), _(-117), _( 106), _(-1698), _( -545), _(-9916), _(-12980), _(-66494), _(61289), _( 7154), _(9416), _( -402), _(1428), _(-177), _( 91), _( 4), +/* 19 */ _( -7), _(-125), _( 83), _(-1759), _( -814), _(-9959), _(-14548), _(-67629), _(59838), _( 5818), _(9219), _( -605), _(1356), _(-189), _( 85), _( 4), +/* 20 */ _( -8), _(-132), _( 57), _(-1817), _(-1095), _(-9966), _(-16155), _(-68692), _(58333), _( 4533), _(8998), _( -794), _(1283), _(-200), _( 79), _( 3), +/* 21 */ _( -9), _(-139), _( 29), _(-1870), _(-1388), _(-9935), _(-17799), _(-69679), _(56778), _( 3300), _(8755), _( -970), _(1210), _(-208), _( 73), _( 3), +/* 22 */ _(-10), _(-147), _( -2), _(-1919), _(-1692), _(-9863), _(-19478), _(-70590), _(55178), _( 2122), _(8491), _(-1131), _(1137), _(-215), _( 68), _( 2), +/* 23 */ _(-11), _(-154), _( -36), _(-1962), _(-2006), _(-9750), _(-21189), _(-71420), _(53534), _( 998), _(8209), _(-1280), _(1064), _(-221), _( 63), _( 2), +/* 24 */ _(-13), _(-161), _( -72), _(-2001), _(-2330), _(-9592), _(-22929), _(-72169), _(51853), _( -70), _(7910), _(-1414), _( 991), _(-224), _( 58), _( 2), +/* 25 */ _(-14), _(-169), _(-111), _(-2032), _(-2663), _(-9389), _(-24694), _(-72835), _(50137), _(-1082), _(7597), _(-1535), _( 919), _(-227), _( 53), _( 2), +/* 26 */ _(-16), _(-176), _(-153), _(-2057), _(-3004), _(-9139), _(-26482), _(-73415), _(48390), _(-2037), _(7271), _(-1644), _( 848), _(-228), _( 49), _( 1), +/* 27 */ _(-17), _(-183), _(-197), _(-2075), _(-3351), _(-8840), _(-28289), _(-73908), _(46617), _(-2935), _(6935), _(-1739), _( 779), _(-228), _( 45), _( 1), +/* 28 */ _(-19), _(-190), _(-244), _(-2085), _(-3705), _(-8492), _(-30112), _(-74313), _(44821), _(-3776), _(6589), _(-1822), _( 711), _(-227), _( 41), _( 1), +/* 29 */ _(-21), _(-196), _(-294), _(-2087), _(-4063), _(-8092), _(-31947), _(-74630), _(43006), _(-4561), _(6237), _(-1893), _( 645), _(-225), _( 38), _( 1), +/* 30 */ _(-24), _(-202), _(-347), _(-2080), _(-4425), _(-7640), _(-33791), _(-74856), _(41176), _(-5288), _(5879), _(-1952), _( 581), _(-222), _( 35), _( 1), +/* 31 */ _(-26), _(-208), _(-401), _(-2063), _(-4788), _(-7134), _(-35640), _(-74992), _(39336), _(-5959), _(5517), _(-2000), _( 519), _(-218), _( 31), _( 1) }; #undef _ @@ -457,69 +441,30 @@ mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V ) // total: 111 adds, 107 subs, 80 muls, 80 shifts } +#if defined(CPU_ARM) +extern void +mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, + const MPC_SAMPLE_FORMAT * V, + const MPC_SAMPLE_FORMAT * D); +#else static void -mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V) +mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, + const MPC_SAMPLE_FORMAT * V, + const MPC_SAMPLE_FORMAT * D) { - const MPC_SAMPLE_FORMAT *D = (const MPC_SAMPLE_FORMAT *) &Di_opt; mpc_int32_t k; #if defined(OPTIMIZE_FOR_SPEED) - #if defined(CPU_ARM) - // 32=32x32-multiply assembler for ARM - for ( k = 0; k < 32; k++, V++ ) - { - asm volatile ( - "ldmia %[D]!, { r0-r7 } \n\t" - "ldr r8, [%[V]] \n\t" - "mul r9, r0, r8 \n\t" - "ldr r8, [%[V], #96*4] \n\t" - "mla r9, r1, r8, r9 \n\t" - "ldr r8, [%[V], #128*4] \n\t" - "mla r9, r2, r8, r9 \n\t" - "ldr r8, [%[V], #224*4] \n\t" - "mla r9, r3, r8, r9 \n\t" - "ldr r8, [%[V], #256*4] \n\t" - "mla r9, r4, r8, r9 \n\t" - "ldr r8, [%[V], #352*4] \n\t" - "mla r9, r5, r8, r9 \n\t" - "ldr r8, [%[V], #384*4] \n\t" - "mla r9, r6, r8, r9 \n\t" - "ldr r8, [%[V], #480*4] \n\t" - "mla r9, r7, r8, r9 \n\t" - "ldmia %[D]!, { r0-r7 } \n\t" - "ldr r8, [%[V], #512*4] \n\t" - "mla r9, r0, r8, r9 \n\t" - "ldr r8, [%[V], #608*4] \n\t" - "mla r9, r1, r8, r9 \n\t" - "ldr r8, [%[V], #640*4] \n\t" - "mla r9, r2, r8, r9 \n\t" - "ldr r8, [%[V], #736*4] \n\t" - "mla r9, r3, r8, r9 \n\t" - "ldr r8, [%[V], #768*4] \n\t" - "mla r9, r4, r8, r9 \n\t" - "ldr r8, [%[V], #864*4] \n\t" - "mla r9, r5, r8, r9 \n\t" - "ldr r8, [%[V], #896*4] \n\t" - "mla r9, r6, r8, r9 \n\t" - "ldr r8, [%[V], #992*4] \n\t" - "mla r9, r7, r8, r9 \n\t" - "str r9, [%[Data]], #4 \n" - : [Data] "+r" (Data), [D] "+r" (D) - : [V] "r" (V) - : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"); - } - #else // 32=32x32-multiply (FIXED_POINT) for ( k = 0; k < 32; k++, D += 16, V++ ) { *Data = V[ 0]*D[ 0] + V[ 96]*D[ 1] + V[128]*D[ 2] + V[224]*D[ 3] - + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7] - + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11] - + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15]; + + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7] + + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11] + + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15]; Data += 1; - // total: 16 muls, 15 adds + // total: 32 * (16 muls, 15 adds) } - #endif #else #if defined(CPU_COLDFIRE) // 64=32x32-multiply assembler for Coldfire @@ -553,71 +498,25 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V) : [V] "a" (V), [D] "a" (D) : "d0", "d1", "d2", "d3", "a5"); } - #elif defined(CPU_ARM) - // 64=32x32-multiply assembler for ARM - for ( k = 0; k < 32; k++, V++ ) - { - asm volatile ( - "ldmia %[D]!, { r0-r3 } \n\t" - "ldr r4, [%[V]] \n\t" - "smull r5, r6, r0, r4 \n\t" - "ldr r4, [%[V], #96*4] \n\t" - "smlal r5, r6, r1, r4 \n\t" - "ldr r4, [%[V], #128*4] \n\t" - "smlal r5, r6, r2, r4 \n\t" - "ldr r4, [%[V], #224*4] \n\t" - "smlal r5, r6, r3, r4 \n\t" - - "ldmia %[D]!, { r0-r3 } \n\t" - "ldr r4, [%[V], #256*4] \n\t" - "smlal r5, r6, r0, r4 \n\t" - "ldr r4, [%[V], #352*4] \n\t" - "smlal r5, r6, r1, r4 \n\t" - "ldr r4, [%[V], #384*4] \n\t" - "smlal r5, r6, r2, r4 \n\t" - "ldr r4, [%[V], #480*4] \n\t" - "smlal r5, r6, r3, r4 \n\t" - - "ldmia %[D]!, { r0-r3 } \n\t" - "ldr r4, [%[V], #512*4] \n\t" - "smlal r5, r6, r0, r4 \n\t" - "ldr r4, [%[V], #608*4] \n\t" - "smlal r5, r6, r1, r4 \n\t" - "ldr r4, [%[V], #640*4] \n\t" - "smlal r5, r6, r2, r4 \n\t" - "ldr r4, [%[V], #736*4] \n\t" - "smlal r5, r6, r3, r4 \n\t" - - "ldmia %[D]!, { r0-r3 } \n\t" - "ldr r4, [%[V], #768*4] \n\t" - "smlal r5, r6, r0, r4 \n\t" - "ldr r4, [%[V], #864*4] \n\t" - "smlal r5, r6, r1, r4 \n\t" - "ldr r4, [%[V], #896*4] \n\t" - "smlal r5, r6, r2, r4 \n\t" - "ldr r4, [%[V], #992*4] \n\t" - "smlal r5, r6, r3, r4 \n\t" - "mov r4, r6, lsl #1 \n\t" - "orr r4, r4, r5, lsr #31\n\t" - "str r4, [%[Data]], #4 \n" - : [Data] "+r" (Data), [D] "+r" (D) - : [V] "r" (V) - : "r0", "r1", "r2", "r3", "r4", "r5", "r6"); - } #else // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C for ( k = 0; k < 32; k++, D += 16, V++ ) { - *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31) + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31) - + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31) + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31) - + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31) + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31) - + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31) + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31); + *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31) + + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31) + + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31) + + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31) + + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31) + + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31) + + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31) + + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31); Data += 1; // total: 16 muls, 15 adds, 16 shifts } #endif #endif } +#endif /* CPU_ARM */ static void mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y) @@ -630,7 +529,7 @@ mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, cons { V -= 64; mpc_calculate_new_V ( Y, V ); - mpc_decoder_windowing_D( OutData, V); + mpc_decoder_windowing_D( OutData, V, Di_opt ); } } } @@ -661,7 +560,7 @@ mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT *OutData) /* */ /*******************************************/ -static const unsigned char Parity [256] = { // parity +static const unsigned char Parity [256] ICONST_ATTR = { // parity 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1, 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, diff --git a/apps/codecs/libmusepack/synth_filter_arm.S b/apps/codecs/libmusepack/synth_filter_arm.S new file mode 100755 index 0000000000..ce668e888c --- /dev/null +++ b/apps/codecs/libmusepack/synth_filter_arm.S @@ -0,0 +1,155 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2008 by Andree Buschmann + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "mpc_config.h" + + .section .text, "ax", %progbits + +/**************************************************************************** + * void mpc_decoder_windowing_D(...) + * + * 2nd step within synthesis filter. Does the dewindowing. + * 32=32x32 multiplies (OPTIMIZE_FOR_SPEED) + * Uses pre-shifted V[] and D[] values. + ****************************************************************************/ +#if defined(OPTIMIZE_FOR_SPEED) + .align 2 + .global mpc_decoder_windowing_D + .type mpc_decoder_windowing_D, %function +mpc_decoder_windowing_D: + /* r0 = Data[] */ + /* r1 = V[] */ + /* r2 = D[] */ + /* lr = counter */ + + stmfd sp!, {r4-r12, lr} + + mov lr, #32 +.loop32: + ldmia r2!, { r3-r10 } /* load first 8 window coefficients */ + ldr r11, [r1] /* 0 */ + mul r12, r3, r11 + ldr r11, [r1, #96*4] /* 1 */ + mla r12, r4, r11, r12 + ldr r11, [r1, #128*4] /* 2 */ + mla r12, r5, r11, r12 + ldr r11, [r1, #224*4] /* 3 */ + mla r12, r6, r11, r12 + ldr r11, [r1, #256*4] /* 4 */ + mla r12, r7, r11, r12 + ldr r11, [r1, #352*4] /* 5 */ + mla r12, r8, r11, r12 + ldr r11, [r1, #384*4] /* 6 */ + mla r12, r9, r11, r12 + ldr r11, [r1, #480*4] /* 7 */ + mla r12, r10, r11, r12 + ldmia r2!, { r3-r10 } /* load last 8 window coefficients */ + ldr r11, [r1, #512*4] /* 8 */ + mla r12, r3, r11, r12 + ldr r11, [r1, #608*4] /* 9 */ + mla r12, r4, r11, r12 + ldr r11, [r1, #640*4] /* 10 */ + mla r12, r5, r11, r12 + ldr r11, [r1, #736*4] /* 11 */ + mla r12, r6, r11, r12 + ldr r11, [r1, #768*4] /* 12 */ + mla r12, r7, r11, r12 + ldr r11, [r1, #864*4] /* 13 */ + mla r12, r8, r11, r12 + ldr r11, [r1, #896*4] /* 14 */ + mla r12, r9, r11, r12 + ldr r11, [r1, #992*4] /* 15 */ + mla r12, r10, r11, r12 + str r12, [r0], #4 /* store Data */ + add r1, r1, #4 /* V++ */ + + subs lr, lr, #1 + bgt .loop32 + + ldmfd sp!, {r4-r12, pc} +.mpc_dewindowing_end: + .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D +#else +/**************************************************************************** + * void mpc_decoder_windowing_D(...) + * + * 2nd step within synthesis filter. Does the dewindowing. + * 64=32x32 multiplies + * Drops lo-part of 64bit multiply results and will therefor loose 1 bit + * accuracy. The decoder output is binary identical as this imprecision is + * far below the output's 16bit resolution. + ****************************************************************************/ + .align 2 + .global mpc_decoder_windowing_D + .type mpc_decoder_windowing_D, %function +mpc_decoder_windowing_D: + /* r0 = Data[] */ + /* r1 = V[] */ + /* r2 = D[] */ + /* lr = counter */ + + stmfd sp!, {r4-r12, lr} + + mov lr, #32 +.loop32: + ldmia r2!, { r3-r10 } /* load first 8 window coefficients */ + ldr r11, [r1] /* 0 */ + smull r11, r12, r3, r11 + ldr r11, [r1, #96*4] /* 1 */ + smlal r11, r12, r4, r11 + ldr r11, [r1, #128*4] /* 2 */ + smlal r11, r12, r5, r11 + ldr r11, [r1, #224*4] /* 3 */ + smlal r11, r12, r6, r11 + ldr r11, [r1, #256*4] /* 4 */ + smlal r11, r12, r7, r11 + ldr r11, [r1, #352*4] /* 5 */ + smlal r11, r12, r8, r11 + ldr r11, [r1, #384*4] /* 6 */ + smlal r11, r12, r9, r11 + ldr r11, [r1, #480*4] /* 7 */ + smlal r11, r12, r10, r11 + ldmia r2!, { r3-r10 } /* load last 8 window coefficients */ + ldr r11, [r1, #512*4] /* 8 */ + smlal r11, r12, r3, r11 + ldr r11, [r1, #608*4] /* 9 */ + smlal r11, r12, r4, r11 + ldr r11, [r1, #640*4] /* 10 */ + smlal r11, r12, r5, r11 + ldr r11, [r1, #736*4] /* 11 */ + smlal r11, r12, r6, r11 + ldr r11, [r1, #768*4] /* 12 */ + smlal r11, r12, r7, r11 + ldr r11, [r1, #864*4] /* 13 */ + smlal r11, r12, r8, r11 + ldr r11, [r1, #896*4] /* 14 */ + smlal r11, r12, r9, r11 + ldr r11, [r1, #992*4] /* 15 */ + smlal r11, r12, r10, r11 + mov r4, r12, lsl #1 /* get result from hi-part */ + str r4, [r0], #4 /* store Data */ + add r1, r1, #4 /* V++ */ + + subs lr, lr, #1 + bgt .loop32 + + ldmfd sp!, {r4-r12, pc} +.mpc_dewindowing_end: + .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D +#endif -- cgit v1.2.3