From 85aad9b3972208b0e34ba0241ebb5314118ae05e Mon Sep 17 00:00:00 2001
From: Andrew Mahone <andrew.mahone@gmail.com>
Date: Wed, 9 Dec 2009 02:24:45 +0000
Subject: Extend av_log2 in codeclib into a generic for scanning for set bits,
 which can provide either log2 or leading-zero-count output, and can force
 mapping 0 input to 0 output if needed (otherwise 0 input produces undefined
 result). Replace av_log2 in lib/codeclib.h, floor_log2 and wl_min_lzc in
 libfaad/common.c and common.h, and count_leading_zeros in libalac/alac.c with
 macros using bs_generic.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@23903 a1c6a512-1295-4272-9138-f99709370657
---
 apps/codecs/lib/codeclib.c      |  22 ++++++++-
 apps/codecs/lib/codeclib.h      | 101 +++++++++++++++++++++++++++-------------
 apps/codecs/libalac/alac.c      |  41 +---------------
 apps/codecs/libfaad/common.c    |  53 +--------------------
 apps/codecs/libfaad/common.h    |   4 +-
 apps/codecs/libfaad/sbr_hfgen.c |   8 ----
 6 files changed, 92 insertions(+), 137 deletions(-)

(limited to 'apps/codecs')

diff --git a/apps/codecs/lib/codeclib.c b/apps/codecs/lib/codeclib.c
index 1c624e0f8c..3a137068ba 100644
--- a/apps/codecs/lib/codeclib.c
+++ b/apps/codecs/lib/codeclib.c
@@ -33,6 +33,15 @@ unsigned char* mp3buf;     // The actual MP3 buffer from Rockbox
 unsigned char* mallocbuf;  // 512K from the start of MP3 buffer
 unsigned char* filebuf;    // The rest of the MP3 buffer
 
+unsigned bs_log2(unsigned x)
+{ return bs_generic(x, BS_LOG2); }
+unsigned bs_log2_0(unsigned x)
+{ return bs_generic(x, BS_LOG2|BS_0_0); }
+unsigned bs_clz(unsigned x)
+{ return bs_generic(x, BS_CLZ); }
+unsigned bs_clz_0(unsigned x)
+{ return bs_generic(x, BS_CLZ|BS_0_0); }
+
 int codec_init(void)
 {
     mem_ptr = 0;
@@ -139,7 +148,7 @@ void qsort(void *base, size_t nmemb, size_t size,
 }
 
 /* From ffmpeg - libavutil/common.h */
-const uint8_t ff_log2_tab[256] ICONST_ATTR = {
+const uint8_t bs_log2_tab[256] ICONST_ATTR = {
     0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
     5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
     6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
@@ -150,6 +159,17 @@ const uint8_t ff_log2_tab[256] ICONST_ATTR = {
     7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
 };
 
+const uint8_t bs_clz_tab[256] ICONST_ATTR = {
+    8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
 #ifdef RB_PROFILE
 void __cyg_profile_func_enter(void *this_fn, void *call_site) {
 #ifdef CPU_COLDFIRE
diff --git a/apps/codecs/lib/codeclib.h b/apps/codecs/lib/codeclib.h
index 9c3624b422..aeae5d6369 100644
--- a/apps/codecs/lib/codeclib.h
+++ b/apps/codecs/lib/codeclib.h
@@ -74,45 +74,80 @@ unsigned udiv32_arm(unsigned a, unsigned b);
 #define UDIV32(a, b) (a / b)
 #endif
 
-/* TODO figure out if we really need to care about calculating
-   av_log2(0) */
-#if defined(CPU_ARM) && ARM_ARCH >= 6
-static inline unsigned int av_log2(uint32_t v)
-{
-    unsigned int r;
-    asm volatile("clz %[r], %[v]\n\t" /* count leading zeroes */
-                 "rsb %[r], %[r], #31\n\t" /* r = 31 - leading zeroes */
-                 "usat %[r], #5, %[r]\n\t" /* unsigned saturate r so -1 -> 0 */
-                 :[r] "=r" (r) : [v] "r" (v));
-    return(r);
-}
-#elif defined(CPU_ARM) && ARM_ARCH >= 5
-static inline unsigned int av_log2(uint32_t v)
-{
-    return v ? 31 - __builtin_clz(v) : 0;
-}
-#else /* CPU_ARM */
+#if !defined(CPU_ARM) || ARM_ARCH < 5
 /* From libavutil/common.h */
-extern const uint8_t ff_log2_tab[256] ICONST_ATTR;
+extern const uint8_t bs_log2_tab[256] ICONST_ATTR;
+extern const uint8_t bs_clz_tab[256] ICONST_ATTR;
+#endif
 
-static inline unsigned int av_log2(unsigned int v)
-{
-    int n;
+#define BS_LOG2  0 /* default personality, equivalent floor(log2(x)) */
+#define BS_CLZ   1 /* alternate personality, Count Leading Zeros */
+#define BS_SHORT 2 /* input guaranteed not to exceed 16 bits */
+#define BS_0_0   4 /* guarantee mapping of 0 input to 0 output */
 
-    n = 0;
-    if (v & 0xffff0000) {
-        v >>= 16;
-        n += 16;
+/* Generic bit-scanning function, used to wrap platform CLZ instruction or
+   scan-and-lookup code, and to provide control over output for 0 inputs. */
+static inline unsigned int bs_generic(unsigned int v, int mode)
+{
+#if defined(CPU_ARM) && ARM_ARCH >= 5
+    unsigned int r = __builtin_clz(v);
+    if (mode & BS_CLZ)
+    {
+        if (mode & BS_0_0)
+            r &= 31;
+    } else {
+        r = 31 - r;
+	/* If mode is constant, this is a single conditional instruction */
+        if (mode & BS_0_0 && (signed)r < 0) 
+            r += 1;
+    }
+#else
+    const uint8_t *bs_tab;
+    unsigned int r;
+    unsigned int n = v;
+    int inc;
+    /* Set up table, increment, and initial result value based on
+       personality. */
+    if (mode & BS_CLZ)
+    {
+        bs_tab = bs_clz_tab;
+        r = 24;
+        inc = -16;
+    } else {
+        bs_tab = bs_log2_tab;
+        r = 0;
+        inc = 16;
     }
-    if (v & 0xff00) {
-        v >>= 8;
-        n += 8;
+    if (!(mode & BS_SHORT) && n >= 0x10000) {
+        n >>= 16;
+        r += inc;
     }
-    n += ff_log2_tab[v];
-
-    return n;
-}
+    if (n > 0xff) {
+        n >>= 8;
+        r += inc / 2;
+    }
+#ifdef CPU_COLDFIRE
+    /* The high 24 bits of n are guaranteed empty after the above, so a
+       superfluous ext.b instruction can be saved by loading the LUT value over
+       n with asm */
+    asm volatile (
+        "move.b (%1,%0.l),%0"
+        : "+d" (n)
+        : "a" (bs_tab)
+    );
+#else
+    n = bs_tab[n];
 #endif
+    r += n;
+    if (mode & BS_CLZ && mode & BS_0_0 && v == 0)
+        r = 0;
+#endif
+    return r;
+}
+
+/* TODO figure out if we really need to care about calculating
+   av_log2(0) */
+#define av_log2(v) bs_generic(v, BS_0_0)
 
 /* Various codec helper functions */
 
diff --git a/apps/codecs/libalac/alac.c b/apps/codecs/libalac/alac.c
index f94ff0fa9d..1f7867b648 100644
--- a/apps/codecs/libalac/alac.c
+++ b/apps/codecs/libalac/alac.c
@@ -166,46 +166,7 @@ static inline void unreadbits(alac_file *alac, int bits)
         alac->input_buffer_bitaccumulator *= -1;
 }
 
-/* ARMv5+ has a clz instruction equivalent to our function.
- */
-#if (defined(CPU_ARM) && (ARM_ARCH > 4))
-static inline int count_leading_zeros(uint32_t v)
-{
-    return __builtin_clz(v);
-}
-#else
-
-static const unsigned char bittab[16] ICONST_ATTR = {
-    0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4
-};
-
-static inline int count_leading_zeros(int input)
-{
-    int output = 32;
-
-#if 0
-    /* Experimentation has shown that the following test is always false,
-       so we don't bother to perform it. */
-    if (input & 0xffff0000)
-    {
-        input >>= 16;
-        output -= 16;
-    }
-#endif
-    if (input & 0xff00)
-    {
-        input >>= 8;
-        output -= 8;
-    }
-    if (input & 0xf0)
-    {
-        input >>= 4;
-        output -= 4;
-    }
-    output -= bittab[input];
-    return output;
-}
-#endif
+#define count_leading_zeros(x) bs_generic(x, BS_CLZ|BS_SHORT)
 
 void basterdised_rice_decompress(alac_file *alac,
                                  int32_t *output_buffer,
diff --git a/apps/codecs/libfaad/common.c b/apps/codecs/libfaad/common.c
index debc125b3e..e8340d318d 100644
--- a/apps/codecs/libfaad/common.c
+++ b/apps/codecs/libfaad/common.c
@@ -241,58 +241,7 @@ uint32_t random_int(void)
     return (__r1 = (t3 >> 1) | t1 ) ^ (__r2 = (t4 + t4) | t2 );
 }
 
-uint32_t ones32(uint32_t x)
-{
-    x -= ((x >> 1) & 0x55555555);
-    x = (((x >> 2) & 0x33333333) + (x & 0x33333333));
-    x = (((x >> 4) + x) & 0x0f0f0f0f);
-    x += (x >> 8);
-    x += (x >> 16);
-
-    return (x & 0x0000003f);
-}
-
-uint32_t floor_log2(uint32_t x)
-{
-#if 1
-    x |= (x >> 1);
-    x |= (x >> 2);
-    x |= (x >> 4);
-    x |= (x >> 8);
-    x |= (x >> 16);
-
-    return (ones32(x) - 1);
-#else
-    uint32_t count = 0;
-
-    while (x >>= 1)
-        count++;
-
-    return count;
-#endif
-}
-
-/* returns position of first bit that is not 0 from msb,
- * starting count at lsb */
-uint32_t wl_min_lzc(uint32_t x)
-{
-#if 1
-    x |= (x >> 1);
-    x |= (x >> 2);
-    x |= (x >> 4);
-    x |= (x >> 8);
-    x |= (x >> 16);
-
-    return (ones32(x));
-#else
-    uint32_t count = 0;
-
-    while (x >>= 1)
-        count++;
-
-    return (count + 1);
-#endif
-}
+#define floor_log2(x) bs_generic(x, BS_LOG2)
 
 #ifdef FIXED_POINT
 
diff --git a/apps/codecs/libfaad/common.h b/apps/codecs/libfaad/common.h
index ea028b1b8e..fe0d02b228 100644
--- a/apps/codecs/libfaad/common.h
+++ b/apps/codecs/libfaad/common.h
@@ -399,9 +399,7 @@ typedef real_t complex_t[2];
 /* common functions */
 uint8_t cpu_has_sse(void);
 uint32_t random_int(void);
-uint32_t ones32(uint32_t x);
-uint32_t floor_log2(uint32_t x);
-uint32_t wl_min_lzc(uint32_t x);
+#define wl_min_lzc(x) bs_generic(x, BS_LOG2|BS_0_0)
 #ifdef FIXED_POINT
 #define LOG2_MIN_INF REAL_CONST(-10000)
 int32_t log2_int(uint32_t val);
diff --git a/apps/codecs/libfaad/sbr_hfgen.c b/apps/codecs/libfaad/sbr_hfgen.c
index 4991839218..f77bbd052f 100644
--- a/apps/codecs/libfaad/sbr_hfgen.c
+++ b/apps/codecs/libfaad/sbr_hfgen.c
@@ -222,10 +222,6 @@ static void auto_correlation(sbr_info *sbr, acorr_coef *ac,
 
     exp = wl_min_lzc(mask);
 
-    /* improves accuracy */
-    if (exp > 0)
-        exp -= 1;
-
     for (j = offset; j < len + offset; j++)
     {
         real_t buf_j = ((QMF_RE(buffer[j][bd])+(1<<(exp-1)))>>exp);
@@ -292,10 +288,6 @@ static void auto_correlation(sbr_info *sbr, acorr_coef *ac, qmf_t buffer[MAX_NTS
     }
 
     exp = wl_min_lzc(mask);
-
-    /* improves accuracy */
-    if (exp > 0)
-        exp -= 1;
    
     pow2_to_exp = 1<<(exp-1);
 
-- 
cgit v1.2.3