From 85aad9b3972208b0e34ba0241ebb5314118ae05e Mon Sep 17 00:00:00 2001 From: Andrew Mahone Date: Wed, 9 Dec 2009 02:24:45 +0000 Subject: Extend av_log2 in codeclib into a generic for scanning for set bits, which can provide either log2 or leading-zero-count output, and can force mapping 0 input to 0 output if needed (otherwise 0 input produces undefined result). Replace av_log2 in lib/codeclib.h, floor_log2 and wl_min_lzc in libfaad/common.c and common.h, and count_leading_zeros in libalac/alac.c with macros using bs_generic. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@23903 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/lib/codeclib.c | 22 ++++++++- apps/codecs/lib/codeclib.h | 101 +++++++++++++++++++++++++++------------- apps/codecs/libalac/alac.c | 41 +--------------- apps/codecs/libfaad/common.c | 53 +-------------------- apps/codecs/libfaad/common.h | 4 +- apps/codecs/libfaad/sbr_hfgen.c | 8 ---- 6 files changed, 92 insertions(+), 137 deletions(-) (limited to 'apps/codecs') diff --git a/apps/codecs/lib/codeclib.c b/apps/codecs/lib/codeclib.c index 1c624e0f8c..3a137068ba 100644 --- a/apps/codecs/lib/codeclib.c +++ b/apps/codecs/lib/codeclib.c @@ -33,6 +33,15 @@ unsigned char* mp3buf; // The actual MP3 buffer from Rockbox unsigned char* mallocbuf; // 512K from the start of MP3 buffer unsigned char* filebuf; // The rest of the MP3 buffer +unsigned bs_log2(unsigned x) +{ return bs_generic(x, BS_LOG2); } +unsigned bs_log2_0(unsigned x) +{ return bs_generic(x, BS_LOG2|BS_0_0); } +unsigned bs_clz(unsigned x) +{ return bs_generic(x, BS_CLZ); } +unsigned bs_clz_0(unsigned x) +{ return bs_generic(x, BS_CLZ|BS_0_0); } + int codec_init(void) { mem_ptr = 0; @@ -139,7 +148,7 @@ void qsort(void *base, size_t nmemb, size_t size, } /* From ffmpeg - libavutil/common.h */ -const uint8_t ff_log2_tab[256] ICONST_ATTR = { +const uint8_t bs_log2_tab[256] ICONST_ATTR = { 0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, @@ -150,6 +159,17 @@ const uint8_t ff_log2_tab[256] ICONST_ATTR = { 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 }; +const uint8_t bs_clz_tab[256] ICONST_ATTR = { + 8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + #ifdef RB_PROFILE void __cyg_profile_func_enter(void *this_fn, void *call_site) { #ifdef CPU_COLDFIRE diff --git a/apps/codecs/lib/codeclib.h b/apps/codecs/lib/codeclib.h index 9c3624b422..aeae5d6369 100644 --- a/apps/codecs/lib/codeclib.h +++ b/apps/codecs/lib/codeclib.h @@ -74,45 +74,80 @@ unsigned udiv32_arm(unsigned a, unsigned b); #define UDIV32(a, b) (a / b) #endif -/* TODO figure out if we really need to care about calculating - av_log2(0) */ -#if defined(CPU_ARM) && ARM_ARCH >= 6 -static inline unsigned int av_log2(uint32_t v) -{ - unsigned int r; - asm volatile("clz %[r], %[v]\n\t" /* count leading zeroes */ - "rsb %[r], %[r], #31\n\t" /* r = 31 - leading zeroes */ - "usat %[r], #5, %[r]\n\t" /* unsigned saturate r so -1 -> 0 */ - :[r] "=r" (r) : [v] "r" (v)); - return(r); -} -#elif defined(CPU_ARM) && ARM_ARCH >= 5 -static inline unsigned int av_log2(uint32_t v) -{ - return v ? 31 - __builtin_clz(v) : 0; -} -#else /* CPU_ARM */ +#if !defined(CPU_ARM) || ARM_ARCH < 5 /* From libavutil/common.h */ -extern const uint8_t ff_log2_tab[256] ICONST_ATTR; +extern const uint8_t bs_log2_tab[256] ICONST_ATTR; +extern const uint8_t bs_clz_tab[256] ICONST_ATTR; +#endif -static inline unsigned int av_log2(unsigned int v) -{ - int n; +#define BS_LOG2 0 /* default personality, equivalent floor(log2(x)) */ +#define BS_CLZ 1 /* alternate personality, Count Leading Zeros */ +#define BS_SHORT 2 /* input guaranteed not to exceed 16 bits */ +#define BS_0_0 4 /* guarantee mapping of 0 input to 0 output */ - n = 0; - if (v & 0xffff0000) { - v >>= 16; - n += 16; +/* Generic bit-scanning function, used to wrap platform CLZ instruction or + scan-and-lookup code, and to provide control over output for 0 inputs. */ +static inline unsigned int bs_generic(unsigned int v, int mode) +{ +#if defined(CPU_ARM) && ARM_ARCH >= 5 + unsigned int r = __builtin_clz(v); + if (mode & BS_CLZ) + { + if (mode & BS_0_0) + r &= 31; + } else { + r = 31 - r; + /* If mode is constant, this is a single conditional instruction */ + if (mode & BS_0_0 && (signed)r < 0) + r += 1; + } +#else + const uint8_t *bs_tab; + unsigned int r; + unsigned int n = v; + int inc; + /* Set up table, increment, and initial result value based on + personality. */ + if (mode & BS_CLZ) + { + bs_tab = bs_clz_tab; + r = 24; + inc = -16; + } else { + bs_tab = bs_log2_tab; + r = 0; + inc = 16; } - if (v & 0xff00) { - v >>= 8; - n += 8; + if (!(mode & BS_SHORT) && n >= 0x10000) { + n >>= 16; + r += inc; } - n += ff_log2_tab[v]; - - return n; -} + if (n > 0xff) { + n >>= 8; + r += inc / 2; + } +#ifdef CPU_COLDFIRE + /* The high 24 bits of n are guaranteed empty after the above, so a + superfluous ext.b instruction can be saved by loading the LUT value over + n with asm */ + asm volatile ( + "move.b (%1,%0.l),%0" + : "+d" (n) + : "a" (bs_tab) + ); +#else + n = bs_tab[n]; #endif + r += n; + if (mode & BS_CLZ && mode & BS_0_0 && v == 0) + r = 0; +#endif + return r; +} + +/* TODO figure out if we really need to care about calculating + av_log2(0) */ +#define av_log2(v) bs_generic(v, BS_0_0) /* Various codec helper functions */ diff --git a/apps/codecs/libalac/alac.c b/apps/codecs/libalac/alac.c index f94ff0fa9d..1f7867b648 100644 --- a/apps/codecs/libalac/alac.c +++ b/apps/codecs/libalac/alac.c @@ -166,46 +166,7 @@ static inline void unreadbits(alac_file *alac, int bits) alac->input_buffer_bitaccumulator *= -1; } -/* ARMv5+ has a clz instruction equivalent to our function. - */ -#if (defined(CPU_ARM) && (ARM_ARCH > 4)) -static inline int count_leading_zeros(uint32_t v) -{ - return __builtin_clz(v); -} -#else - -static const unsigned char bittab[16] ICONST_ATTR = { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4 -}; - -static inline int count_leading_zeros(int input) -{ - int output = 32; - -#if 0 - /* Experimentation has shown that the following test is always false, - so we don't bother to perform it. */ - if (input & 0xffff0000) - { - input >>= 16; - output -= 16; - } -#endif - if (input & 0xff00) - { - input >>= 8; - output -= 8; - } - if (input & 0xf0) - { - input >>= 4; - output -= 4; - } - output -= bittab[input]; - return output; -} -#endif +#define count_leading_zeros(x) bs_generic(x, BS_CLZ|BS_SHORT) void basterdised_rice_decompress(alac_file *alac, int32_t *output_buffer, diff --git a/apps/codecs/libfaad/common.c b/apps/codecs/libfaad/common.c index debc125b3e..e8340d318d 100644 --- a/apps/codecs/libfaad/common.c +++ b/apps/codecs/libfaad/common.c @@ -241,58 +241,7 @@ uint32_t random_int(void) return (__r1 = (t3 >> 1) | t1 ) ^ (__r2 = (t4 + t4) | t2 ); } -uint32_t ones32(uint32_t x) -{ - x -= ((x >> 1) & 0x55555555); - x = (((x >> 2) & 0x33333333) + (x & 0x33333333)); - x = (((x >> 4) + x) & 0x0f0f0f0f); - x += (x >> 8); - x += (x >> 16); - - return (x & 0x0000003f); -} - -uint32_t floor_log2(uint32_t x) -{ -#if 1 - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); - x |= (x >> 8); - x |= (x >> 16); - - return (ones32(x) - 1); -#else - uint32_t count = 0; - - while (x >>= 1) - count++; - - return count; -#endif -} - -/* returns position of first bit that is not 0 from msb, - * starting count at lsb */ -uint32_t wl_min_lzc(uint32_t x) -{ -#if 1 - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); - x |= (x >> 8); - x |= (x >> 16); - - return (ones32(x)); -#else - uint32_t count = 0; - - while (x >>= 1) - count++; - - return (count + 1); -#endif -} +#define floor_log2(x) bs_generic(x, BS_LOG2) #ifdef FIXED_POINT diff --git a/apps/codecs/libfaad/common.h b/apps/codecs/libfaad/common.h index ea028b1b8e..fe0d02b228 100644 --- a/apps/codecs/libfaad/common.h +++ b/apps/codecs/libfaad/common.h @@ -399,9 +399,7 @@ typedef real_t complex_t[2]; /* common functions */ uint8_t cpu_has_sse(void); uint32_t random_int(void); -uint32_t ones32(uint32_t x); -uint32_t floor_log2(uint32_t x); -uint32_t wl_min_lzc(uint32_t x); +#define wl_min_lzc(x) bs_generic(x, BS_LOG2|BS_0_0) #ifdef FIXED_POINT #define LOG2_MIN_INF REAL_CONST(-10000) int32_t log2_int(uint32_t val); diff --git a/apps/codecs/libfaad/sbr_hfgen.c b/apps/codecs/libfaad/sbr_hfgen.c index 4991839218..f77bbd052f 100644 --- a/apps/codecs/libfaad/sbr_hfgen.c +++ b/apps/codecs/libfaad/sbr_hfgen.c @@ -222,10 +222,6 @@ static void auto_correlation(sbr_info *sbr, acorr_coef *ac, exp = wl_min_lzc(mask); - /* improves accuracy */ - if (exp > 0) - exp -= 1; - for (j = offset; j < len + offset; j++) { real_t buf_j = ((QMF_RE(buffer[j][bd])+(1<<(exp-1)))>>exp); @@ -292,10 +288,6 @@ static void auto_correlation(sbr_info *sbr, acorr_coef *ac, qmf_t buffer[MAX_NTS } exp = wl_min_lzc(mask); - - /* improves accuracy */ - if (exp > 0) - exp -= 1; pow2_to_exp = 1<<(exp-1); -- cgit v1.2.3