diff options
author | Michael Sevakis <jethead71@rockbox.org> | 2017-09-30 22:48:41 -0400 |
---|---|---|
committer | Michael Sevakis <jethead71@rockbox.org> | 2017-10-01 20:29:38 -0400 |
commit | b2a373eb642077fbf5f1bdbe74d1e82cc534f5f2 (patch) | |
tree | 9b86428b3357334b95d18729f1fb870dc3bb302f | |
parent | 679ae2d21c17115c998b1ada6412463c3c7a1db9 (diff) | |
download | rockbox-b2a373eb642077fbf5f1bdbe74d1e82cc534f5f2.tar.gz rockbox-b2a373eb642077fbf5f1bdbe74d1e82cc534f5f2.zip |
Replace fp_sqrt function with one that only uses shift, or and sub.
Simply extends the current isqrt() to be able to do fractional bits
and improves the initial estimate using clz(). iqrt() itself is
no more and is equivalent to fp_sqrt(x, 0). The original also had
a small bug where the guess comparision should have been >=, not >.
Uses no large integer math or division and is very accurate
(simply returns a truncated fraction).
Change-Id: I2ae26e6505df1770dc01e56220f7385369f90ae9
-rw-r--r-- | apps/plugins/fft/fft.c | 8 | ||||
-rw-r--r-- | lib/fixedpoint/fixedpoint.c | 97 | ||||
-rw-r--r-- | lib/fixedpoint/fixedpoint.h | 5 |
3 files changed, 55 insertions, 55 deletions
diff --git a/apps/plugins/fft/fft.c b/apps/plugins/fft/fft.c index 40c251de15..3e88722b23 100644 --- a/apps/plugins/fft/fft.c +++ b/apps/plugins/fft/fft.c | |||
@@ -624,16 +624,16 @@ static unsigned calc_magnitudes(enum fft_amp_scale scale) | |||
624 | } | 624 | } |
625 | else | 625 | else |
626 | { | 626 | { |
627 | d = isqrt(d); /* linear scaling, nothing | 627 | d = fp_sqrt(d, 0); /* linear scaling, nothing |
628 | bad should happen */ | 628 | bad should happen */ |
629 | d = fp16_log(d << 16); /* the log function | 629 | d = fp16_log(d << 16); /* the log function |
630 | expects s15.16 values */ | 630 | expects s15.16 values */ |
631 | } | 631 | } |
632 | } | 632 | } |
633 | else | 633 | else |
634 | { | 634 | { |
635 | d = isqrt(d); /* linear scaling, nothing | 635 | d = fp_sqrt(d, 0); /* linear scaling, nothing |
636 | bad should happen */ | 636 | bad should happen */ |
637 | } | 637 | } |
638 | } | 638 | } |
639 | 639 | ||
diff --git a/lib/fixedpoint/fixedpoint.c b/lib/fixedpoint/fixedpoint.c index b5bbe68a95..4530857df0 100644 --- a/lib/fixedpoint/fixedpoint.c +++ b/lib/fixedpoint/fixedpoint.c | |||
@@ -25,9 +25,7 @@ | |||
25 | #include <stdbool.h> | 25 | #include <stdbool.h> |
26 | #include <inttypes.h> | 26 | #include <inttypes.h> |
27 | 27 | ||
28 | #ifndef BIT_N | 28 | #define ULONG_BITS (sizeof (unsigned long)*CHAR_BIT) |
29 | #define BIT_N(n) (1U << (n)) | ||
30 | #endif | ||
31 | 29 | ||
32 | /** TAKEN FROM ORIGINAL fixedpoint.h */ | 30 | /** TAKEN FROM ORIGINAL fixedpoint.h */ |
33 | /* Inverse gain of circular cordic rotation in s0.31 format. */ | 31 | /* Inverse gain of circular cordic rotation in s0.31 format. */ |
@@ -142,65 +140,72 @@ long fp_sincos(unsigned long phase, long *cos) | |||
142 | return y; | 140 | return y; |
143 | } | 141 | } |
144 | 142 | ||
145 | /** | 143 | /* Accurate sqrt with only elementary operations. |
146 | * Fixed point square root via Newton-Raphson. | 144 | * Snagged from: |
147 | * @param x square root argument. | 145 | * http://www.devmaster.net/articles/fixed-point-optimizations/ |
148 | * @param fracbits specifies number of fractional bits in argument. | ||
149 | * @return Square root of argument in same fixed point format as input. | ||
150 | * | 146 | * |
151 | * This routine has been modified to run longer for greater precision, | 147 | * Extension to fractions and initial estimate improvement by jethead71 |
152 | * but cuts calculation short if the answer is reached sooner. | ||
153 | */ | 148 | */ |
154 | long fp_sqrt(long x, unsigned int fracbits) | 149 | long fp_sqrt(long x, unsigned int fracbits) |
155 | { | 150 | { |
156 | unsigned long xfp, b; | 151 | if (x <= 0) { |
157 | int n = 8; /* iteration limit (should terminate earlier) */ | ||
158 | |||
159 | if (x <= 0) | ||
160 | return 0; /* no sqrt(neg), or just sqrt(0) = 0 */ | 152 | return 0; /* no sqrt(neg), or just sqrt(0) = 0 */ |
153 | } | ||
161 | 154 | ||
162 | /* Increase working precision by one bit */ | 155 | unsigned long g = 0; |
163 | xfp = x << 1; | 156 | unsigned long e = x; |
164 | fracbits++; | ||
165 | 157 | ||
166 | /* Get the midpoint between fracbits index and the highest bit index */ | 158 | int intwidth = ULONG_BITS - fracbits; |
167 | b = ((sizeof(xfp)*8-1) - __builtin_clzl(xfp) + fracbits) >> 1; | 159 | int bshift = __builtin_clzl(e); |
168 | b = BIT_N(b); | ||
169 | 160 | ||
170 | do | 161 | if (bshift >= intwidth) { |
171 | { | 162 | bshift = -1; |
172 | unsigned long c = b; | 163 | } |
173 | b = (fp_div(xfp, b, fracbits) + b) >> 1; | 164 | else { |
174 | if (c == b) break; | 165 | bshift = (intwidth - bshift - 1) >> 1; |
175 | } | 166 | } |
176 | while (n-- > 0); | ||
177 | 167 | ||
178 | return b >> 1; | 168 | unsigned long b = 1ul << (bshift + fracbits); |
179 | } | ||
180 | 169 | ||
181 | /* Accurate int sqrt with only elementary operations. | 170 | /* integer part */ |
182 | * Snagged from: | 171 | while (e && bshift >= 0) { |
183 | * http://www.devmaster.net/articles/fixed-point-optimizations/ */ | 172 | unsigned long t = ((g << 1) | b) << bshift--; |
184 | unsigned long isqrt(unsigned long x) | ||
185 | { | ||
186 | /* Adding CLZ could optimize this further */ | ||
187 | unsigned long g = 0; | ||
188 | int bshift = 15; | ||
189 | unsigned long b = 1ul << bshift; | ||
190 | |||
191 | do | ||
192 | { | ||
193 | unsigned long temp = (g + g + b) << bshift; | ||
194 | 173 | ||
195 | if (x > temp) | 174 | if (e >= t) { |
196 | { | 175 | g |= b; |
197 | g += b; | 176 | e -= t; |
198 | x -= temp; | ||
199 | } | 177 | } |
200 | 178 | ||
201 | b >>= 1; | 179 | b >>= 1; |
202 | } | 180 | } |
203 | while (bshift--); | 181 | |
182 | /* fractional part */ | ||
183 | while (e && b) { | ||
184 | unsigned long t = (g << 1) | b; | ||
185 | unsigned long c = e; /* detect carry */ | ||
186 | |||
187 | e <<= 1; | ||
188 | |||
189 | if (e < c || e >= t) { | ||
190 | g |= b; | ||
191 | e -= t; | ||
192 | } | ||
193 | |||
194 | b >>= 1; | ||
195 | } | ||
196 | |||
197 | #if 0 | ||
198 | /* round up if the next bit would be a '1' */ | ||
199 | if (e) { | ||
200 | unsigned long c = e; /* detect carry */ | ||
201 | |||
202 | e <<= 1; | ||
203 | |||
204 | if (e < c || e >= ((g << 1) | 1)) { | ||
205 | g++; | ||
206 | } | ||
207 | } | ||
208 | #endif | ||
204 | 209 | ||
205 | return g; | 210 | return g; |
206 | } | 211 | } |
diff --git a/lib/fixedpoint/fixedpoint.h b/lib/fixedpoint/fixedpoint.h index 31d60eca4b..bc50ff687d 100644 --- a/lib/fixedpoint/fixedpoint.h +++ b/lib/fixedpoint/fixedpoint.h | |||
@@ -45,9 +45,6 @@ | |||
45 | * Take square root of a fixed point number: | 45 | * Take square root of a fixed point number: |
46 | * fp_sqrt(x, fracbits) | 46 | * fp_sqrt(x, fracbits) |
47 | * | 47 | * |
48 | * Take the square root of an integer: | ||
49 | * isqrt(x) | ||
50 | * | ||
51 | * Calculate sin or cos of an angle (very fast, from a table): | 48 | * Calculate sin or cos of an angle (very fast, from a table): |
52 | * fp14_sin(angle) | 49 | * fp14_sin(angle) |
53 | * fp14_cos(angle) | 50 | * fp14_cos(angle) |
@@ -88,8 +85,6 @@ long fp14_sin(int val); | |||
88 | long fp16_log(int x); | 85 | long fp16_log(int x); |
89 | long fp16_exp(int x); | 86 | long fp16_exp(int x); |
90 | 87 | ||
91 | unsigned long isqrt(unsigned long x); | ||
92 | |||
93 | /* fast unsigned multiplication (16x16bit->32bit or 32x32bit->32bit, | 88 | /* fast unsigned multiplication (16x16bit->32bit or 32x32bit->32bit, |
94 | * whichever is faster for the architecture) */ | 89 | * whichever is faster for the architecture) */ |
95 | #ifdef CPU_ARM | 90 | #ifdef CPU_ARM |