From af5bad46d883731728b9e4c797cae015e17f11f3 Mon Sep 17 00:00:00 2001 From: Andrew Mahone Date: Sun, 13 Dec 2009 03:45:40 +0000 Subject: Libfaad pow2/log2 improvements: Correct pow2_* to calculate correct output for input < 0.0 or between 0.0 and 1.0 instead of returning 0. This will change output, but the float version of this codec uses pow(2.0,x) in place of these functions, so this behavior was probably a bug Replace 64-bit multiply in pow2_* with left or right shift by whole part of input, rounding if shifting right. An ARM-specific optimized version is provided as a C inline function, other targets use a C macro. Increase constant size of table for pow2 to improve accuracy, also avoid early truncation in linear interpolation of result. Move constant tables for pow2 and log2 to iram. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@23967 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libfaad/common.c | 140 +++++++++++++++++++++++++------------------ apps/codecs/libfaad/common.h | 4 +- apps/codecs/libfaad/fixed.h | 25 ++++++-- 3 files changed, 105 insertions(+), 64 deletions(-) diff --git a/apps/codecs/libfaad/common.c b/apps/codecs/libfaad/common.c index e8340d318d..025c8f8c5b 100644 --- a/apps/codecs/libfaad/common.c +++ b/apps/codecs/libfaad/common.c @@ -248,33 +248,78 @@ uint32_t random_int(void) #define TABLE_BITS 6 /* just take the maximum number of bits for interpolation */ #define INTERP_BITS (REAL_BITS-TABLE_BITS) - -static const real_t pow2_tab[] = { - REAL_CONST(1.000000000000000), REAL_CONST(1.010889286051701), REAL_CONST(1.021897148654117), - REAL_CONST(1.033024879021228), REAL_CONST(1.044273782427414), REAL_CONST(1.055645178360557), - REAL_CONST(1.067140400676824), REAL_CONST(1.078760797757120), REAL_CONST(1.090507732665258), - REAL_CONST(1.102382583307841), REAL_CONST(1.114386742595892), REAL_CONST(1.126521618608242), - REAL_CONST(1.138788634756692), REAL_CONST(1.151189229952983), REAL_CONST(1.163724858777578), - REAL_CONST(1.176396991650281), REAL_CONST(1.189207115002721), REAL_CONST(1.202156731452703), - REAL_CONST(1.215247359980469), REAL_CONST(1.228480536106870), REAL_CONST(1.241857812073484), - REAL_CONST(1.255380757024691), REAL_CONST(1.269050957191733), REAL_CONST(1.282870016078778), - REAL_CONST(1.296839554651010), REAL_CONST(1.310961211524764), REAL_CONST(1.325236643159741), - REAL_CONST(1.339667524053303), REAL_CONST(1.354255546936893), REAL_CONST(1.369002422974591), - REAL_CONST(1.383909881963832), REAL_CONST(1.398979672538311), REAL_CONST(1.414213562373095), - REAL_CONST(1.429613338391970), REAL_CONST(1.445180806977047), REAL_CONST(1.460917794180647), - REAL_CONST(1.476826145939499), REAL_CONST(1.492907728291265), REAL_CONST(1.509164427593423), - REAL_CONST(1.525598150744538), REAL_CONST(1.542210825407941), REAL_CONST(1.559004400237837), - REAL_CONST(1.575980845107887), REAL_CONST(1.593142151342267), REAL_CONST(1.610490331949254), - REAL_CONST(1.628027421857348), REAL_CONST(1.645755478153965), REAL_CONST(1.663676580326736), - REAL_CONST(1.681792830507429), REAL_CONST(1.700106353718524), REAL_CONST(1.718619298122478), - REAL_CONST(1.737333835273706), REAL_CONST(1.756252160373300), REAL_CONST(1.775376492526521), - REAL_CONST(1.794709075003107), REAL_CONST(1.814252175500399), REAL_CONST(1.834008086409342), - REAL_CONST(1.853979125083386), REAL_CONST(1.874167634110300), REAL_CONST(1.894575981586966), - REAL_CONST(1.915206561397147), REAL_CONST(1.936061793492294), REAL_CONST(1.957144124175400), - REAL_CONST(1.978456026387951), REAL_CONST(2.000000000000000) +/* precision of values in pow2_tab */ +#define POWTBL_BITS (31-(INTERP_BITS)) +#define POWTBL_PRECIS (1U<<(POWTBL_BITS)) +static const uint32_t pow2_tab[] ICONST_ATTR = { + UFIX_CONST(1.000000000000000,POWTBL_PRECIS), + UFIX_CONST(1.010889286051701,POWTBL_PRECIS), + UFIX_CONST(1.021897148654117,POWTBL_PRECIS), + UFIX_CONST(1.033024879021228,POWTBL_PRECIS), + UFIX_CONST(1.044273782427414,POWTBL_PRECIS), + UFIX_CONST(1.055645178360557,POWTBL_PRECIS), + UFIX_CONST(1.067140400676824,POWTBL_PRECIS), + UFIX_CONST(1.078760797757120,POWTBL_PRECIS), + UFIX_CONST(1.090507732665258,POWTBL_PRECIS), + UFIX_CONST(1.102382583307841,POWTBL_PRECIS), + UFIX_CONST(1.114386742595892,POWTBL_PRECIS), + UFIX_CONST(1.126521618608242,POWTBL_PRECIS), + UFIX_CONST(1.138788634756692,POWTBL_PRECIS), + UFIX_CONST(1.151189229952983,POWTBL_PRECIS), + UFIX_CONST(1.163724858777578,POWTBL_PRECIS), + UFIX_CONST(1.176396991650281,POWTBL_PRECIS), + UFIX_CONST(1.189207115002721,POWTBL_PRECIS), + UFIX_CONST(1.202156731452703,POWTBL_PRECIS), + UFIX_CONST(1.215247359980469,POWTBL_PRECIS), + UFIX_CONST(1.228480536106870,POWTBL_PRECIS), + UFIX_CONST(1.241857812073484,POWTBL_PRECIS), + UFIX_CONST(1.255380757024691,POWTBL_PRECIS), + UFIX_CONST(1.269050957191733,POWTBL_PRECIS), + UFIX_CONST(1.282870016078778,POWTBL_PRECIS), + UFIX_CONST(1.296839554651010,POWTBL_PRECIS), + UFIX_CONST(1.310961211524764,POWTBL_PRECIS), + UFIX_CONST(1.325236643159741,POWTBL_PRECIS), + UFIX_CONST(1.339667524053303,POWTBL_PRECIS), + UFIX_CONST(1.354255546936893,POWTBL_PRECIS), + UFIX_CONST(1.369002422974591,POWTBL_PRECIS), + UFIX_CONST(1.383909881963832,POWTBL_PRECIS), + UFIX_CONST(1.398979672538311,POWTBL_PRECIS), + UFIX_CONST(1.414213562373095,POWTBL_PRECIS), + UFIX_CONST(1.429613338391970,POWTBL_PRECIS), + UFIX_CONST(1.445180806977047,POWTBL_PRECIS), + UFIX_CONST(1.460917794180647,POWTBL_PRECIS), + UFIX_CONST(1.476826145939499,POWTBL_PRECIS), + UFIX_CONST(1.492907728291265,POWTBL_PRECIS), + UFIX_CONST(1.509164427593423,POWTBL_PRECIS), + UFIX_CONST(1.525598150744538,POWTBL_PRECIS), + UFIX_CONST(1.542210825407941,POWTBL_PRECIS), + UFIX_CONST(1.559004400237837,POWTBL_PRECIS), + UFIX_CONST(1.575980845107887,POWTBL_PRECIS), + UFIX_CONST(1.593142151342267,POWTBL_PRECIS), + UFIX_CONST(1.610490331949254,POWTBL_PRECIS), + UFIX_CONST(1.628027421857348,POWTBL_PRECIS), + UFIX_CONST(1.645755478153965,POWTBL_PRECIS), + UFIX_CONST(1.663676580326736,POWTBL_PRECIS), + UFIX_CONST(1.681792830507429,POWTBL_PRECIS), + UFIX_CONST(1.700106353718524,POWTBL_PRECIS), + UFIX_CONST(1.718619298122478,POWTBL_PRECIS), + UFIX_CONST(1.737333835273706,POWTBL_PRECIS), + UFIX_CONST(1.756252160373300,POWTBL_PRECIS), + UFIX_CONST(1.775376492526521,POWTBL_PRECIS), + UFIX_CONST(1.794709075003107,POWTBL_PRECIS), + UFIX_CONST(1.814252175500399,POWTBL_PRECIS), + UFIX_CONST(1.834008086409342,POWTBL_PRECIS), + UFIX_CONST(1.853979125083386,POWTBL_PRECIS), + UFIX_CONST(1.874167634110300,POWTBL_PRECIS), + UFIX_CONST(1.894575981586966,POWTBL_PRECIS), + UFIX_CONST(1.915206561397147,POWTBL_PRECIS), + UFIX_CONST(1.936061793492294,POWTBL_PRECIS), + UFIX_CONST(1.957144124175400,POWTBL_PRECIS), + UFIX_CONST(1.978456026387951,POWTBL_PRECIS), + UFIX_CONST(2.000000000000000,POWTBL_PRECIS) }; -static const real_t log2_tab[] = { +static const real_t log2_tab[] ICONST_ATTR = { REAL_CONST(0.000000000000000), REAL_CONST(0.022367813028455), REAL_CONST(0.044394119358453), REAL_CONST(0.066089190457772), REAL_CONST(0.087462841250339), REAL_CONST(0.108524456778169), REAL_CONST(0.129283016944966), REAL_CONST(0.149747119504682), REAL_CONST(0.169925001442312), @@ -299,12 +344,12 @@ static const real_t log2_tab[] = { REAL_CONST(0.988684686772166), REAL_CONST(1.000000000000000) }; -real_t pow2_fix(real_t val) +uint32_t pow2_fix(real_t val) { uint32_t x1, x2; uint32_t errcorr; uint32_t index_frac; - real_t retval; + uint32_t retval; int32_t whole = (val >> REAL_BITS); /* rest = [0..1] */ @@ -313,41 +358,27 @@ real_t pow2_fix(real_t val) /* index into pow2_tab */ int32_t index = rest >> (REAL_BITS-TABLE_BITS); - - if (val == 0) - return (1<> (REAL_BITS-TABLE_BITS-INTERP_BITS); index_frac = index_frac & ((1< 0) - { - retval = 1 << whole; - } else { - retval = REAL_CONST(1) >> -whole; - } - x1 = pow2_tab[index & ((1<> INTERP_BITS; + errcorr = ( (index_frac*(x2-x1))); - if (whole > 0) - { - retval = retval * (errcorr + x1); - } else { - retval = MUL_R(retval, (errcorr + x1)); - } + retval = errcorr + (x1<> REAL_BITS); /* rest = [0..1] */ @@ -356,24 +387,17 @@ int32_t pow2_int(real_t val) /* index into pow2_tab */ int32_t index = rest >> (REAL_BITS-TABLE_BITS); - - if (val == 0) - return 1; - /* leave INTERP_BITS bits */ index_frac = rest >> (REAL_BITS-TABLE_BITS-INTERP_BITS); index_frac = index_frac & ((1< 0) - retval = 1 << whole; - else - retval = 0; - x1 = pow2_tab[index & ((1<> INTERP_BITS; + errcorr = ( (index_frac*(x2-x1))); + + retval = errcorr + (x1<= 0) ? ((real_t)((A)*(REAL_PRECISION)+0.5)) : ((real_t)((A)*(REAL_PRECISION)-0.5))) -#define COEF_CONST(A) (((A) >= 0) ? ((real_t)((A)*(COEF_PRECISION)+0.5)) : ((real_t)((A)*(COEF_PRECISION)-0.5))) -#define FRAC_CONST(A) (((A) == 1.00) ? ((real_t)FRAC_MAX) : (((A) >= 0) ? ((real_t)((A)*(FRAC_PRECISION)+0.5)) : ((real_t)((A)*(FRAC_PRECISION)-0.5)))) +#define UFIX_CONST(A,PRECISION) ((uint32_t)((A)*(PRECISION)+0.5)) +#define FIX_CONST(A,PRECISION) (((A) >= 0) ? ((real_t)((A)*(PRECISION)+0.5)) : ((real_t)((A)*(PRECISION)-0.5))) +#define REAL_CONST(A) FIX_CONST((A),(REAL_PRECISION)) +#define COEF_CONST(A) FIX_CONST((A),(COEF_PRECISION)) +#define FRAC_CONST(A) (((A) == 1.00) ? ((real_t)FRAC_MAX) : FIX_CONST((A),(FRAC_PRECISION))) //#define FRAC_CONST(A) (((A) >= 0) ? ((real_t)((A)*(FRAC_PRECISION)+0.5)) : ((real_t)((A)*(FRAC_PRECISION)-0.5))) #define Q2_BITS 22 @@ -128,6 +129,8 @@ static INLINE void ComplexMult(real_t *y1, real_t *y2, #define MUL_Q2(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (Q2_BITS-1))) >> Q2_BITS) #define MUL_SHIFT6(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (6-1))) >> 6) #define MUL_SHIFT23(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (23-1))) >> 23) + #define DESCALE(A,S) ((S)>0?(((A)>>((S)-1))+1)>>1:(A)<<-(S)) + #define DESCALE_SHIFT(A,SH,SC) DESCALE((A),(SC)-(SH) #elif defined(__GNUC__) && defined (__arm__) @@ -205,6 +208,18 @@ static INLINE void ComplexMult(real_t *y1, real_t *y2, *y2 = yt2 << (FRAC_SIZE-FRAC_BITS); } +static inline real_t DESCALE_SHIFT(unsigned val, int shift, int scale) +{ + unsigned out; + if ((out = val >> (scale - shift - 1))) + { + out++; + out >>= 1; + } else + out = val << (shift - scale); + return out; +} + #else /* multiply with real shift */ @@ -225,6 +240,8 @@ static INLINE void ComplexMult(real_t *y1, real_t *y2, #define MUL_Q2(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (Q2_BITS-1))) >> Q2_BITS) #define MUL_SHIFT6(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (6-1))) >> 6) #define MUL_SHIFT23(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (23-1))) >> 23) + #define DESCALE(A,S) ((S)>0?(((A)>>((S)-1))+1)>>1:(A)<<-(S)) + #define DESCALE_SHIFT(A,SH,SC) DESCALE((A),(SC)-(SH) /* Complex multiplication */ static INLINE void ComplexMult(real_t *y1, real_t *y2, -- cgit v1.2.3