From af5bad46d883731728b9e4c797cae015e17f11f3 Mon Sep 17 00:00:00 2001
From: Andrew Mahone <andrew.mahone@gmail.com>
Date: Sun, 13 Dec 2009 03:45:40 +0000
Subject: Libfaad pow2/log2 improvements: Correct pow2_* to calculate correct
 output for input < 0.0 or between 0.0 and 1.0 instead of returning 0. This
 will change output, but the float version of this codec uses pow(2.0,x) in
 place of these functions, so this behavior was probably a bug Replace 64-bit
 multiply in pow2_* with left or right shift by whole part of input, rounding
 if shifting right. An ARM-specific optimized version is provided as a C
 inline function, other targets use a C macro. Increase constant size of table
 for pow2 to improve accuracy, also avoid early truncation in linear
 interpolation of result. Move constant tables for pow2 and log2 to iram.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@23967 a1c6a512-1295-4272-9138-f99709370657
---
 apps/codecs/libfaad/common.c | 140 +++++++++++++++++++++++++------------------
 apps/codecs/libfaad/common.h |   4 +-
 apps/codecs/libfaad/fixed.h  |  25 ++++++--
 3 files changed, 105 insertions(+), 64 deletions(-)

diff --git a/apps/codecs/libfaad/common.c b/apps/codecs/libfaad/common.c
index e8340d318d..025c8f8c5b 100644
--- a/apps/codecs/libfaad/common.c
+++ b/apps/codecs/libfaad/common.c
@@ -248,33 +248,78 @@ uint32_t random_int(void)
 #define TABLE_BITS 6
 /* just take the maximum number of bits for interpolation */
 #define INTERP_BITS (REAL_BITS-TABLE_BITS)
-
-static const real_t pow2_tab[] = {
-    REAL_CONST(1.000000000000000), REAL_CONST(1.010889286051701), REAL_CONST(1.021897148654117),
-    REAL_CONST(1.033024879021228), REAL_CONST(1.044273782427414), REAL_CONST(1.055645178360557),
-    REAL_CONST(1.067140400676824), REAL_CONST(1.078760797757120), REAL_CONST(1.090507732665258),
-    REAL_CONST(1.102382583307841), REAL_CONST(1.114386742595892), REAL_CONST(1.126521618608242),
-    REAL_CONST(1.138788634756692), REAL_CONST(1.151189229952983), REAL_CONST(1.163724858777578),
-    REAL_CONST(1.176396991650281), REAL_CONST(1.189207115002721), REAL_CONST(1.202156731452703),
-    REAL_CONST(1.215247359980469), REAL_CONST(1.228480536106870), REAL_CONST(1.241857812073484),
-    REAL_CONST(1.255380757024691), REAL_CONST(1.269050957191733), REAL_CONST(1.282870016078778),
-    REAL_CONST(1.296839554651010), REAL_CONST(1.310961211524764), REAL_CONST(1.325236643159741),
-    REAL_CONST(1.339667524053303), REAL_CONST(1.354255546936893), REAL_CONST(1.369002422974591),
-    REAL_CONST(1.383909881963832), REAL_CONST(1.398979672538311), REAL_CONST(1.414213562373095),
-    REAL_CONST(1.429613338391970), REAL_CONST(1.445180806977047), REAL_CONST(1.460917794180647),
-    REAL_CONST(1.476826145939499), REAL_CONST(1.492907728291265), REAL_CONST(1.509164427593423),
-    REAL_CONST(1.525598150744538), REAL_CONST(1.542210825407941), REAL_CONST(1.559004400237837),
-    REAL_CONST(1.575980845107887), REAL_CONST(1.593142151342267), REAL_CONST(1.610490331949254),
-    REAL_CONST(1.628027421857348), REAL_CONST(1.645755478153965), REAL_CONST(1.663676580326736),
-    REAL_CONST(1.681792830507429), REAL_CONST(1.700106353718524), REAL_CONST(1.718619298122478),
-    REAL_CONST(1.737333835273706), REAL_CONST(1.756252160373300), REAL_CONST(1.775376492526521),
-    REAL_CONST(1.794709075003107), REAL_CONST(1.814252175500399), REAL_CONST(1.834008086409342),
-    REAL_CONST(1.853979125083386), REAL_CONST(1.874167634110300), REAL_CONST(1.894575981586966),
-    REAL_CONST(1.915206561397147), REAL_CONST(1.936061793492294), REAL_CONST(1.957144124175400),
-    REAL_CONST(1.978456026387951), REAL_CONST(2.000000000000000)
+/* precision of values in pow2_tab */
+#define POWTBL_BITS (31-(INTERP_BITS))
+#define POWTBL_PRECIS (1U<<(POWTBL_BITS))
+static const uint32_t pow2_tab[] ICONST_ATTR = {
+    UFIX_CONST(1.000000000000000,POWTBL_PRECIS),
+    UFIX_CONST(1.010889286051701,POWTBL_PRECIS),
+    UFIX_CONST(1.021897148654117,POWTBL_PRECIS),
+    UFIX_CONST(1.033024879021228,POWTBL_PRECIS),
+    UFIX_CONST(1.044273782427414,POWTBL_PRECIS),
+    UFIX_CONST(1.055645178360557,POWTBL_PRECIS),
+    UFIX_CONST(1.067140400676824,POWTBL_PRECIS),
+    UFIX_CONST(1.078760797757120,POWTBL_PRECIS),
+    UFIX_CONST(1.090507732665258,POWTBL_PRECIS),
+    UFIX_CONST(1.102382583307841,POWTBL_PRECIS),
+    UFIX_CONST(1.114386742595892,POWTBL_PRECIS),
+    UFIX_CONST(1.126521618608242,POWTBL_PRECIS),
+    UFIX_CONST(1.138788634756692,POWTBL_PRECIS),
+    UFIX_CONST(1.151189229952983,POWTBL_PRECIS),
+    UFIX_CONST(1.163724858777578,POWTBL_PRECIS),
+    UFIX_CONST(1.176396991650281,POWTBL_PRECIS),
+    UFIX_CONST(1.189207115002721,POWTBL_PRECIS),
+    UFIX_CONST(1.202156731452703,POWTBL_PRECIS),
+    UFIX_CONST(1.215247359980469,POWTBL_PRECIS),
+    UFIX_CONST(1.228480536106870,POWTBL_PRECIS),
+    UFIX_CONST(1.241857812073484,POWTBL_PRECIS),
+    UFIX_CONST(1.255380757024691,POWTBL_PRECIS),
+    UFIX_CONST(1.269050957191733,POWTBL_PRECIS),
+    UFIX_CONST(1.282870016078778,POWTBL_PRECIS),
+    UFIX_CONST(1.296839554651010,POWTBL_PRECIS),
+    UFIX_CONST(1.310961211524764,POWTBL_PRECIS),
+    UFIX_CONST(1.325236643159741,POWTBL_PRECIS),
+    UFIX_CONST(1.339667524053303,POWTBL_PRECIS),
+    UFIX_CONST(1.354255546936893,POWTBL_PRECIS),
+    UFIX_CONST(1.369002422974591,POWTBL_PRECIS),
+    UFIX_CONST(1.383909881963832,POWTBL_PRECIS),
+    UFIX_CONST(1.398979672538311,POWTBL_PRECIS),
+    UFIX_CONST(1.414213562373095,POWTBL_PRECIS),
+    UFIX_CONST(1.429613338391970,POWTBL_PRECIS),
+    UFIX_CONST(1.445180806977047,POWTBL_PRECIS),
+    UFIX_CONST(1.460917794180647,POWTBL_PRECIS),
+    UFIX_CONST(1.476826145939499,POWTBL_PRECIS),
+    UFIX_CONST(1.492907728291265,POWTBL_PRECIS),
+    UFIX_CONST(1.509164427593423,POWTBL_PRECIS),
+    UFIX_CONST(1.525598150744538,POWTBL_PRECIS),
+    UFIX_CONST(1.542210825407941,POWTBL_PRECIS),
+    UFIX_CONST(1.559004400237837,POWTBL_PRECIS),
+    UFIX_CONST(1.575980845107887,POWTBL_PRECIS),
+    UFIX_CONST(1.593142151342267,POWTBL_PRECIS),
+    UFIX_CONST(1.610490331949254,POWTBL_PRECIS),
+    UFIX_CONST(1.628027421857348,POWTBL_PRECIS),
+    UFIX_CONST(1.645755478153965,POWTBL_PRECIS),
+    UFIX_CONST(1.663676580326736,POWTBL_PRECIS),
+    UFIX_CONST(1.681792830507429,POWTBL_PRECIS),
+    UFIX_CONST(1.700106353718524,POWTBL_PRECIS),
+    UFIX_CONST(1.718619298122478,POWTBL_PRECIS),
+    UFIX_CONST(1.737333835273706,POWTBL_PRECIS),
+    UFIX_CONST(1.756252160373300,POWTBL_PRECIS),
+    UFIX_CONST(1.775376492526521,POWTBL_PRECIS),
+    UFIX_CONST(1.794709075003107,POWTBL_PRECIS),
+    UFIX_CONST(1.814252175500399,POWTBL_PRECIS),
+    UFIX_CONST(1.834008086409342,POWTBL_PRECIS),
+    UFIX_CONST(1.853979125083386,POWTBL_PRECIS),
+    UFIX_CONST(1.874167634110300,POWTBL_PRECIS),
+    UFIX_CONST(1.894575981586966,POWTBL_PRECIS),
+    UFIX_CONST(1.915206561397147,POWTBL_PRECIS),
+    UFIX_CONST(1.936061793492294,POWTBL_PRECIS),
+    UFIX_CONST(1.957144124175400,POWTBL_PRECIS),
+    UFIX_CONST(1.978456026387951,POWTBL_PRECIS),
+    UFIX_CONST(2.000000000000000,POWTBL_PRECIS)
 };
 
-static const real_t log2_tab[] = {
+static const real_t log2_tab[] ICONST_ATTR = {
     REAL_CONST(0.000000000000000), REAL_CONST(0.022367813028455), REAL_CONST(0.044394119358453),
     REAL_CONST(0.066089190457772), REAL_CONST(0.087462841250339), REAL_CONST(0.108524456778169),
     REAL_CONST(0.129283016944966), REAL_CONST(0.149747119504682), REAL_CONST(0.169925001442312),
@@ -299,12 +344,12 @@ static const real_t log2_tab[] = {
     REAL_CONST(0.988684686772166), REAL_CONST(1.000000000000000)
 };
 
-real_t pow2_fix(real_t val)
+uint32_t pow2_fix(real_t val)
 {
     uint32_t x1, x2;
     uint32_t errcorr;
     uint32_t index_frac;
-    real_t retval;
+    uint32_t retval;
     int32_t whole = (val >> REAL_BITS);
 
     /* rest = [0..1] */
@@ -313,41 +358,27 @@ real_t pow2_fix(real_t val)
     /* index into pow2_tab */
     int32_t index = rest >> (REAL_BITS-TABLE_BITS);
 
-
-    if (val == 0)
-        return (1<<REAL_BITS);
-
     /* leave INTERP_BITS bits */
     index_frac = rest >> (REAL_BITS-TABLE_BITS-INTERP_BITS);
     index_frac = index_frac & ((1<<INTERP_BITS)-1);
 
-    if (whole > 0)
-    {
-        retval = 1 << whole;
-    } else {
-        retval = REAL_CONST(1) >> -whole;
-    }
-
     x1 = pow2_tab[index & ((1<<TABLE_BITS)-1)];
     x2 = pow2_tab[(index & ((1<<TABLE_BITS)-1)) + 1];
-    errcorr = ( (index_frac*(x2-x1))) >> INTERP_BITS;
+    errcorr = ( (index_frac*(x2-x1)));
 
-    if (whole > 0)
-    {
-        retval = retval * (errcorr + x1);
-    } else {
-        retval = MUL_R(retval, (errcorr + x1));
-    }
+    retval = errcorr + (x1<<INTERP_BITS);
+
+    retval = DESCALE_SHIFT(retval, whole, POWTBL_BITS + INTERP_BITS - REAL_BITS);
 
     return retval;
 }
 
-int32_t pow2_int(real_t val)
+uint32_t pow2_int(real_t val)
 {
     uint32_t x1, x2;
     uint32_t errcorr;
     uint32_t index_frac;
-    real_t retval;
+    uint32_t retval;
     int32_t whole = (val >> REAL_BITS);
 
     /* rest = [0..1] */
@@ -356,24 +387,17 @@ int32_t pow2_int(real_t val)
     /* index into pow2_tab */
     int32_t index = rest >> (REAL_BITS-TABLE_BITS);
 
-
-    if (val == 0)
-        return 1;
-
     /* leave INTERP_BITS bits */
     index_frac = rest >> (REAL_BITS-TABLE_BITS-INTERP_BITS);
     index_frac = index_frac & ((1<<INTERP_BITS)-1);
 
-    if (whole > 0)
-        retval = 1 << whole;
-    else
-        retval = 0;
-
     x1 = pow2_tab[index & ((1<<TABLE_BITS)-1)];
     x2 = pow2_tab[(index & ((1<<TABLE_BITS)-1)) + 1];
-    errcorr = ( (index_frac*(x2-x1))) >> INTERP_BITS;
+    errcorr = ( (index_frac*(x2-x1)));
+
+    retval = errcorr + (x1<<INTERP_BITS);
 
-    retval = MUL_R(retval, (errcorr + x1));
+    retval = DESCALE_SHIFT(retval, whole, POWTBL_BITS + INTERP_BITS);
 
     return retval;
 }
diff --git a/apps/codecs/libfaad/common.h b/apps/codecs/libfaad/common.h
index fe0d02b228..658e92f23f 100644
--- a/apps/codecs/libfaad/common.h
+++ b/apps/codecs/libfaad/common.h
@@ -404,8 +404,8 @@ uint32_t random_int(void);
 #define LOG2_MIN_INF REAL_CONST(-10000)
 int32_t log2_int(uint32_t val);
 int32_t log2_fix(uint32_t val);
-int32_t pow2_int(real_t val);
-real_t pow2_fix(real_t val);
+uint32_t pow2_int(real_t val);
+uint32_t pow2_fix(real_t val);
 #endif
 uint8_t get_sr_index(const uint32_t samplerate);
 uint8_t max_pred_sfb(const uint8_t sr_index);
diff --git a/apps/codecs/libfaad/fixed.h b/apps/codecs/libfaad/fixed.h
index da895e81bc..9781a846f7 100644
--- a/apps/codecs/libfaad/fixed.h
+++ b/apps/codecs/libfaad/fixed.h
@@ -49,10 +49,11 @@ extern "C" {
 
 typedef int32_t real_t;
 
-
-#define REAL_CONST(A) (((A) >= 0) ? ((real_t)((A)*(REAL_PRECISION)+0.5)) : ((real_t)((A)*(REAL_PRECISION)-0.5)))
-#define COEF_CONST(A) (((A) >= 0) ? ((real_t)((A)*(COEF_PRECISION)+0.5)) : ((real_t)((A)*(COEF_PRECISION)-0.5)))
-#define FRAC_CONST(A) (((A) == 1.00) ? ((real_t)FRAC_MAX) : (((A) >= 0) ? ((real_t)((A)*(FRAC_PRECISION)+0.5)) : ((real_t)((A)*(FRAC_PRECISION)-0.5))))
+#define UFIX_CONST(A,PRECISION) ((uint32_t)((A)*(PRECISION)+0.5))
+#define FIX_CONST(A,PRECISION) (((A) >= 0) ? ((real_t)((A)*(PRECISION)+0.5)) : ((real_t)((A)*(PRECISION)-0.5)))
+#define REAL_CONST(A) FIX_CONST((A),(REAL_PRECISION))
+#define COEF_CONST(A) FIX_CONST((A),(COEF_PRECISION))
+#define FRAC_CONST(A) (((A) == 1.00) ? ((real_t)FRAC_MAX) : FIX_CONST((A),(FRAC_PRECISION)))
 //#define FRAC_CONST(A) (((A) >= 0) ? ((real_t)((A)*(FRAC_PRECISION)+0.5)) : ((real_t)((A)*(FRAC_PRECISION)-0.5)))
 
 #define Q2_BITS 22
@@ -128,6 +129,8 @@ static INLINE void ComplexMult(real_t *y1, real_t *y2,
   #define MUL_Q2(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (Q2_BITS-1))) >> Q2_BITS)
   #define MUL_SHIFT6(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (6-1))) >> 6)
   #define MUL_SHIFT23(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (23-1))) >> 23)
+  #define DESCALE(A,S) ((S)>0?(((A)>>((S)-1))+1)>>1:(A)<<-(S))
+  #define DESCALE_SHIFT(A,SH,SC) DESCALE((A),(SC)-(SH)
 
 #elif defined(__GNUC__) && defined (__arm__)
 
@@ -205,6 +208,18 @@ static INLINE void ComplexMult(real_t *y1, real_t *y2,
     *y2 = yt2 << (FRAC_SIZE-FRAC_BITS);
 }
 
+static inline real_t DESCALE_SHIFT(unsigned val, int shift, int scale)
+{
+    unsigned out;
+    if ((out = val >> (scale - shift - 1)))
+    {
+        out++;
+        out >>= 1;
+    } else
+        out = val << (shift - scale);
+    return out;
+}
+
 #else
 
   /* multiply with real shift */
@@ -225,6 +240,8 @@ static INLINE void ComplexMult(real_t *y1, real_t *y2,
   #define MUL_Q2(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (Q2_BITS-1))) >> Q2_BITS)
   #define MUL_SHIFT6(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (6-1))) >> 6)
   #define MUL_SHIFT23(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (23-1))) >> 23)
+  #define DESCALE(A,S) ((S)>0?(((A)>>((S)-1))+1)>>1:(A)<<-(S))
+  #define DESCALE_SHIFT(A,SH,SC) DESCALE((A),(SC)-(SH)
 
 /* Complex multiplication */
 static INLINE void ComplexMult(real_t *y1, real_t *y2,
-- 
cgit v1.2.3