Replace fp_sqrt function with one that only uses shift, or and sub.

Simply extends the current isqrt() to be able to do fractional bits and improves the initial estimate using clz(). iqrt() itself is no more and is equivalent to fp_sqrt(x, 0). The original also had a small bug where the guess comparision should have been >=, not >. Uses no large integer math or division and is very accurate (simply returns a truncated fraction). Change-Id: I2ae26e6505df1770dc01e56220f7385369f90ae9
author: Michael Sevakis <jethead71@rockbox.org> 2017-09-30 22:48:41 -0400
committer: Michael Sevakis <jethead71@rockbox.org> 2017-10-01 20:29:38 -0400
commit: b2a373eb642077fbf5f1bdbe74d1e82cc534f5f2 (patch)
tree: 9b86428b3357334b95d18729f1fb870dc3bb302f
parent: 679ae2d21c17115c998b1ada6412463c3c7a1db9 (diff)
download: rockbox-b2a373eb642077fbf5f1bdbe74d1e82cc534f5f2.tar.gz
rockbox-b2a373eb642077fbf5f1bdbe74d1e82cc534f5f2.zip
3 files changed, 55 insertions, 55 deletions
diff --git a/apps/plugins/fft/fft.c b/apps/plugins/fft/fft.c
index 40c251de15..3e88722b23 100644
--- a/apps/plugins/fft/fft.c
+++ b/apps/plugins/fft/fft.c
@@ -624,16 +624,16 @@ static unsigned calc_magnitudes(enum fft_amp_scale scale)
                }
                else
                {
-                    d = isqrt(d); /* linear scaling, nothing
+                    d = fp_sqrt(d, 0); /* linear scaling, nothing
-                                     bad should happen */
+                                          bad should happen */
                    d = fp16_log(d << 16); /* the log function
                                              expects s15.16 values */
                }
            }
            else
            {
-                d = isqrt(d); /* linear scaling, nothing
+                d = fp_sqrt(d, 0); /* linear scaling, nothing
-                                 bad should happen */
+                                      bad should happen */
            }
        }
diff --git a/lib/fixedpoint/fixedpoint.c b/lib/fixedpoint/fixedpoint.c
index b5bbe68a95..4530857df0 100644
--- a/lib/fixedpoint/fixedpoint.c
+++ b/lib/fixedpoint/fixedpoint.c
@@ -25,9 +25,7 @@
 #include <stdbool.h>
 #include <inttypes.h>
-#ifndef BIT_N
+#define ULONG_BITS (sizeof (unsigned long)*CHAR_BIT)
-#define BIT_N(n) (1U << (n))
-#endif
 /** TAKEN FROM ORIGINAL fixedpoint.h */
 /* Inverse gain of circular cordic rotation in s0.31 format. */
@@ -142,65 +140,72 @@ long fp_sincos(unsigned long phase, long *cos)
    return y;
 }
-/**
+/* Accurate sqrt with only elementary operations.
- * Fixed point square root via Newton-Raphson.
+ * Snagged from:
- * @param x square root argument.
+ *   http://www.devmaster.net/articles/fixed-point-optimizations/
- * @param fracbits specifies number of fractional bits in argument.
- * @return Square root of argument in same fixed point format as input.
 *
- * This routine has been modified to run longer for greater precision,
+ * Extension to fractions and initial estimate improvement by jethead71
- * but cuts calculation short if the answer is reached sooner. 
 */
 long fp_sqrt(long x, unsigned int fracbits)
 {
-    unsigned long xfp, b;
+    if (x <= 0) {
-    int n = 8; /* iteration limit (should terminate earlier) */
-    if (x <= 0)
        return 0; /* no sqrt(neg), or just sqrt(0) = 0 */
+    }
-    /* Increase working precision by one bit */
+    unsigned long g = 0;
-    xfp = x << 1;
+    unsigned long e = x;
-    fracbits++;
-    /* Get the midpoint between fracbits index and the highest bit index */
+    int intwidth = ULONG_BITS - fracbits;
-    b = ((sizeof(xfp)*8-1) - __builtin_clzl(xfp) + fracbits) >> 1;
+    int bshift = __builtin_clzl(e);
-    b = BIT_N(b);
-    do
+    if (bshift >= intwidth) {
-    {
+        bshift = -1;
-        unsigned long c = b;
+    }
-        b = (fp_div(xfp, b, fracbits) + b) >> 1;
+    else {
-        if (c == b) break;
+        bshift = (intwidth - bshift - 1) >> 1;
    }
-    while (n-- > 0);
-    return b >> 1;
+    unsigned long b = 1ul << (bshift + fracbits);
-}
-/* Accurate int sqrt with only elementary operations.
+    /* integer part */
- * Snagged from:
+    while (e && bshift >= 0) {
- *   http://www.devmaster.net/articles/fixed-point-optimizations/ */
+        unsigned long t = ((g << 1) | b) << bshift--;
-unsigned long isqrt(unsigned long x)
-{
-    /* Adding CLZ could optimize this further */
-    unsigned long g = 0;
-    int bshift = 15;
-    unsigned long b = 1ul << bshift;
-    
-    do
-    {
-        unsigned long temp = (g + g + b) << bshift;
-        if (x > temp)
+        if (e >= t) {
-        {
+            g |= b;
-            g += b;
+            e -= t;
-            x -= temp;
        }
        b >>= 1;
    }
-    while (bshift--);
+    /* fractional part */
+    while (e && b) {
+        unsigned long t = (g << 1) | b;
+        unsigned long c = e; /* detect carry */
+        e <<= 1;
+        if (e < c || e >= t) {
+            g |= b;
+            e -= t;
+        }
+        b >>= 1;
+    }
+#if 0
+    /* round up if the next bit would be a '1' */
+    if (e) {
+        unsigned long c = e; /* detect carry */
+        e <<= 1;
+        if (e < c || e >= ((g << 1) | 1)) {
+            g++;
+        }
+    }
+#endif
    return g;
 }
diff --git a/lib/fixedpoint/fixedpoint.h b/lib/fixedpoint/fixedpoint.h
index 31d60eca4b..bc50ff687d 100644
--- a/lib/fixedpoint/fixedpoint.h
+++ b/lib/fixedpoint/fixedpoint.h
@@ -45,9 +45,6 @@
 *  Take square root of a fixed point number:
 *      fp_sqrt(x, fracbits)
 *
- *  Take the square root of an integer:
- *      isqrt(x)
- *
 *  Calculate sin or cos of an angle (very fast, from a table):
 *      fp14_sin(angle)
 *      fp14_cos(angle)
@@ -88,8 +85,6 @@ long fp14_sin(int val);
 long fp16_log(int x);
 long fp16_exp(int x);
-unsigned long isqrt(unsigned long x);
 /* fast unsigned multiplication (16x16bit->32bit or 32x32bit->32bit,
 * whichever is faster for the architecture) */
 #ifdef CPU_ARM
author	Michael Sevakis <jethead71@rockbox.org>	2017-09-30 22:48:41 -0400
committer	Michael Sevakis <jethead71@rockbox.org>	2017-10-01 20:29:38 -0400
commit	b2a373eb642077fbf5f1bdbe74d1e82cc534f5f2 (patch)
tree	9b86428b3357334b95d18729f1fb870dc3bb302f
parent	679ae2d21c17115c998b1ada6412463c3c7a1db9 (diff)
download	rockbox-b2a373eb642077fbf5f1bdbe74d1e82cc534f5f2.tar.gz rockbox-b2a373eb642077fbf5f1bdbe74d1e82cc534f5f2.zip

diff --git a/apps/plugins/fft/fft.c b/apps/plugins/fft/fft.c index 40c251de15..3e88722b23 100644 --- a/apps/plugins/fft/fft.c +++ b/apps/plugins/fft/fft.c
@@ -624,16 +624,16 @@ static unsigned calc_magnitudes(enum fft_amp_scale scale)
624	}	624	}
625	else	625	else
626	{	626	{
627	d = isqrt(d); /* linear scaling, nothing	627	d = fp_sqrt(d, 0); /* linear scaling, nothing
628	bad should happen */	628	bad should happen */
629	d = fp16_log(d << 16); /* the log function	629	d = fp16_log(d << 16); /* the log function
630	expects s15.16 values */	630	expects s15.16 values */
631	}	631	}
632	}	632	}
633	else	633	else
634	{	634	{
635	d = isqrt(d); /* linear scaling, nothing	635	d = fp_sqrt(d, 0); /* linear scaling, nothing
636	bad should happen */	636	bad should happen */
637	}	637	}
638	}	638	}
639		639


diff --git a/lib/fixedpoint/fixedpoint.c b/lib/fixedpoint/fixedpoint.c index b5bbe68a95..4530857df0 100644 --- a/lib/fixedpoint/fixedpoint.c +++ b/lib/fixedpoint/fixedpoint.c
@@ -25,9 +25,7 @@
25	#include <stdbool.h>	25	#include <stdbool.h>
26	#include <inttypes.h>	26	#include <inttypes.h>
27		27
28	#ifndef BIT_N	28	#define ULONG_BITS (sizeof (unsigned long)*CHAR_BIT)
29	#define BIT_N(n) (1U << (n))
30	#endif
31		29
32	/** TAKEN FROM ORIGINAL fixedpoint.h */	30	/** TAKEN FROM ORIGINAL fixedpoint.h */
33	/* Inverse gain of circular cordic rotation in s0.31 format. */	31	/* Inverse gain of circular cordic rotation in s0.31 format. */
@@ -142,65 +140,72 @@ long fp_sincos(unsigned long phase, long *cos)
142	return y;	140	return y;
143	}	141	}
144		142
145	/**	143	/* Accurate sqrt with only elementary operations.
146	* Fixed point square root via Newton-Raphson.	144	* Snagged from:
147	* @param x square root argument.	145	* http://www.devmaster.net/articles/fixed-point-optimizations/
148	* @param fracbits specifies number of fractional bits in argument.
149	* @return Square root of argument in same fixed point format as input.
150	*	146	*
151	* This routine has been modified to run longer for greater precision,	147	* Extension to fractions and initial estimate improvement by jethead71
152	* but cuts calculation short if the answer is reached sooner.
153	*/	148	*/
154	long fp_sqrt(long x, unsigned int fracbits)	149	long fp_sqrt(long x, unsigned int fracbits)
155	{	150	{
156	unsigned long xfp, b;	151	if (x <= 0) {
157	int n = 8; /* iteration limit (should terminate earlier) */
158
159	if (x <= 0)
160	return 0; /* no sqrt(neg), or just sqrt(0) = 0 */	152	return 0; /* no sqrt(neg), or just sqrt(0) = 0 */
		153	}
161		154
162	/* Increase working precision by one bit */	155	unsigned long g = 0;
163	xfp = x << 1;	156	unsigned long e = x;
164	fracbits++;
165		157
166	/* Get the midpoint between fracbits index and the highest bit index */	158	int intwidth = ULONG_BITS - fracbits;
167	b = ((sizeof(xfp)*8-1) - __builtin_clzl(xfp) + fracbits) >> 1;	159	int bshift = __builtin_clzl(e);
168	b = BIT_N(b);
169		160
170	do	161	if (bshift >= intwidth) {
171	{	162	bshift = -1;
172	unsigned long c = b;	163	}
173	b = (fp_div(xfp, b, fracbits) + b) >> 1;	164	else {
174	if (c == b) break;	165	bshift = (intwidth - bshift - 1) >> 1;
175	}	166	}
176	while (n-- > 0);
177		167
178	return b >> 1;	168	unsigned long b = 1ul << (bshift + fracbits);
179	}
180		169
181	/* Accurate int sqrt with only elementary operations.	170	/* integer part */
182	* Snagged from:	171	while (e && bshift >= 0) {
183	* http://www.devmaster.net/articles/fixed-point-optimizations/ */	172	unsigned long t = ((g << 1) \| b) << bshift--;
184	unsigned long isqrt(unsigned long x)
185	{
186	/* Adding CLZ could optimize this further */
187	unsigned long g = 0;
188	int bshift = 15;
189	unsigned long b = 1ul << bshift;
190
191	do
192	{
193	unsigned long temp = (g + g + b) << bshift;
194		173
195	if (x > temp)	174	if (e >= t) {
196	{	175	g \|= b;
197	g += b;	176	e -= t;
198	x -= temp;
199	}	177	}
200		178
201	b >>= 1;	179	b >>= 1;
202	}	180	}
203	while (bshift--);	181
		182	/* fractional part */
		183	while (e && b) {
		184	unsigned long t = (g << 1) \| b;
		185	unsigned long c = e; /* detect carry */
		186
		187	e <<= 1;
		188
		189	if (e < c \|\| e >= t) {
		190	g \|= b;
		191	e -= t;
		192	}
		193
		194	b >>= 1;
		195	}
		196
		197	#if 0
		198	/* round up if the next bit would be a '1' */
		199	if (e) {
		200	unsigned long c = e; /* detect carry */
		201
		202	e <<= 1;
		203
		204	if (e < c \|\| e >= ((g << 1) \| 1)) {
		205	g++;
		206	}
		207	}
		208	#endif
204		209
205	return g;	210	return g;
206	}	211	}


diff --git a/lib/fixedpoint/fixedpoint.h b/lib/fixedpoint/fixedpoint.h index 31d60eca4b..bc50ff687d 100644 --- a/lib/fixedpoint/fixedpoint.h +++ b/lib/fixedpoint/fixedpoint.h
@@ -45,9 +45,6 @@
45	* Take square root of a fixed point number:	45	* Take square root of a fixed point number:
46	* fp_sqrt(x, fracbits)	46	* fp_sqrt(x, fracbits)
47	*	47	*
48	* Take the square root of an integer:
49	* isqrt(x)
50	*
51	* Calculate sin or cos of an angle (very fast, from a table):	48	* Calculate sin or cos of an angle (very fast, from a table):
52	* fp14_sin(angle)	49	* fp14_sin(angle)
53	* fp14_cos(angle)	50	* fp14_cos(angle)
@@ -88,8 +85,6 @@ long fp14_sin(int val);
88	long fp16_log(int x);	85	long fp16_log(int x);
89	long fp16_exp(int x);	86	long fp16_exp(int x);
90		87
91	unsigned long isqrt(unsigned long x);
92
93	/* fast unsigned multiplication (16x16bit->32bit or 32x32bit->32bit,	88	/* fast unsigned multiplication (16x16bit->32bit or 32x32bit->32bit,
94	* whichever is faster for the architecture) */	89	* whichever is faster for the architecture) */
95	#ifdef CPU_ARM	90	#ifdef CPU_ARM