summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2005-10-16 22:24:00 +0000
committerJens Arnold <amiconn@rockbox.org>2005-10-16 22:24:00 +0000
commit69ffcd5356cde94ec83417455081b701b7a6b633 (patch)
tree39f9c0f8d75784e6ee4aa18301cd9f7e50354b2a
parent0293dba5d28c336d2438660f194bd6b9a2f00a90 (diff)
downloadrockbox-69ffcd5356cde94ec83417455081b701b7a6b633.tar.gz
rockbox-69ffcd5356cde94ec83417455081b701b7a6b633.zip
Mandelbrot plugin: New fixed point routines, loosing a tiny bit of precision (3 bits), but way faster than before. Full precision routine uses the EMAC on coldfire. No more 64 bit arithmetics used except within the asm inlines.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@7636 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/plugins/mandelbrot.c336
1 files changed, 182 insertions, 154 deletions
diff --git a/apps/plugins/mandelbrot.c b/apps/plugins/mandelbrot.c
index 8a3c14a194..1f80ff395f 100644
--- a/apps/plugins/mandelbrot.c
+++ b/apps/plugins/mandelbrot.c
@@ -55,15 +55,15 @@
55static struct plugin_api* rb; 55static struct plugin_api* rb;
56static char buff[32]; 56static char buff[32];
57 57
58/* Fixed point format: 6 bits integer part incl. sign, 58 bits fractional part */ 58/* Fixed point format: 6 bits integer part incl. sign, 26 bits fractional part */
59static long long x_min; 59static long x_min;
60static long long x_max; 60static long x_max;
61static long long x_step; 61static long x_step;
62static long long x_delta; 62static long x_delta;
63static long long y_min; 63static long y_min;
64static long long y_max; 64static long y_max;
65static long long y_step; 65static long y_step;
66static long long y_delta; 66static long y_delta;
67 67
68static int px_min = 0; 68static int px_min = 0;
69static int px_max = LCD_WIDTH; 69static int px_max = LCD_WIDTH;
@@ -78,117 +78,148 @@ static unsigned int gbuf_size = 0;
78static unsigned char graybuffer[LCD_HEIGHT]; 78static unsigned char graybuffer[LCD_HEIGHT];
79 79
80#if CONFIG_CPU == SH7034 80#if CONFIG_CPU == SH7034
81long long mul64(long long f1, long long f2); 81
82 82#define MULS16_ASR10(a, b) muls16_asr10(a, b)
83asm ( 83static inline short muls16_asr10(short a, short b)
84 /* 64bit * 64bit -> 64bit multiplication. Works for both signed and unsigned */ 84{
85 ".global _mul64 \n" /* Notation: abcd * efgh, where each letter */ 85 short r;
86 ".type _mul64,@function\n" /* represents 16 bits. Called with: */ 86 asm (
87"_mul64: \n" /* r4 = ab, r5 = cd, r6 = ef, r7 = gh */ 87 "muls %[a],%[b] \n"
88 "swap.w r4,r2 \n" /* r2 = ba */ 88 "sts macl,%[r] \n"
89 "mulu r2,r7 \n" /* a * h */ 89 "shlr8 %[r] \n"
90 "swap.w r6,r3 \n" /* r3 = fe */ 90 "shlr2 %[r] \n"
91 "sts macl,r0 \n" /* r0 = a * h */ 91 : /* outputs */
92 "mulu r5,r3 \n" /* d * e */ 92 [r]"=r"(r)
93 "swap.w r7,r3 \n" /* r3 = hg */ 93 : /* inputs */
94 "sts macl,r1 \n" /* r1 = d * e */ 94 [a]"r"(a),
95 "mulu r4,r3 \n" /* b * g */ 95 [b]"r"(b)
96 "add r1,r0 \n" /* r0 += r1 */ 96 );
97 "swap.w r5,r2 \n" /* r2 = dc */ 97 return r;
98 "sts macl,r1 \n" /* r1 = b * g */ 98}
99 "mulu r2,r6 \n" /* c * f */ 99
100 "add r1,r0 \n" /* r0 += r1 */ 100#define MULS32_ASR26(a, b) muls32_asr26(a, b)
101 "sts macl,r1 \n" /* r1 = c * f */ 101static inline long muls32_asr26(long a, long b)
102 "mulu r4,r7 \n" /* b * h */ 102{
103 "add r1,r0 \n" /* r0 += r1 */ 103 long r, t1, t2, t3;
104 "shll16 r0 \n" /* r0 <<= 16 */ 104 asm (
105 "sts macl,r1 \n" /* r1 = b * h */ 105 /* Signed 32bit * 32bit -> 64bit multiplication.
106 "mulu r2,r3 \n" /* c * g */ 106 Notation: xxab * xxcd, where each letter represents 16 bits.
107 "add r1,r0 \n" /* r0 += r1 */ 107 xx is the 64 bit sign extension. */
108 "sts macl,r1 \n" /* r1 = c * g */ 108 "swap.w %[a],%[t1] \n" /* t1 = ba */
109 "mulu r5,r6 \n" /* d * f */ 109 "mulu %[t1],%[b] \n" /* a * d */
110 "add r1,r0 \n" /* r0 += r1 */ 110 "swap.w %[b],%[t3] \n" /* t3 = dc */
111 "sts macl,r1 \n" /* r1 = d * f */ 111 "sts macl,%[t2] \n" /* t2 = a * d */
112 "mulu r2,r7 \n" /* c * h */ 112 "mulu %[t1],%[t3] \n" /* a * c */
113 "add r1,r0 \n" /* r0 += r1 */ 113 "sts macl,%[r] \n" /* hi = a * c */
114 "sts macl,r2 \n" /* r2 = c * h */ 114 "mulu %[a],%[t3] \n" /* b * c */
115 "mulu r5,r3 \n" /* d * g */ 115 "clrt \n"
116 "clrt \n" 116 "sts macl,%[t3] \n" /* t3 = b * c */
117 "sts macl,r3 \n" /* r3 = d * g */ 117 "addc %[t2],%[t3] \n" /* t3 += t2, carry -> t2 */
118 "addc r2,r3 \n" /* r3 += r2, carry->r2 */ 118 "movt %[t2] \n"
119 "movt r2 \n" 119 "mulu %[a],%[b] \n" /* b * d */
120 "mulu r5,r7 \n" /* d * h */ 120 "mov %[t3],%[t1] \n" /* t2t3 <<= 16 */
121 "mov r3,r1 \n" /* r2r3 <<= 16 */ 121 "xtrct %[t2],%[t1] \n"
122 "xtrct r2,r1 \n" 122 "mov %[t1],%[t2] \n"
123 "mov r1,r2 \n" 123 "shll16 %[t3] \n"
124 "shll16 r3 \n" 124 "sts macl,%[t1] \n" /* lo = b * d */
125 "sts macl,r1 \n" /* r1 = d * h */ 125 "clrt \n" /* hi.lo += t2t3 */
126 "clrt \n" /* r0r1 += r2r3 */ 126 "addc %[t3],%[t1] \n"
127 "addc r3,r1 \n" 127 "addc %[t2],%[r] \n"
128 "rts \n" 128 "cmp/pz %[a] \n" /* ab >= 0 ? */
129 "addc r2,r0 \n" 129 "bt 1f \n"
130); 130 "sub %[b],%[r] \n" /* no: hi -= cd (sign extension of ab is -1) */
131#define MUL64(a, b) mul64(a, b) 131 "1: \n"
132 "cmp/pz %[b] \n" /* cd >= 0 ? */
133 "bt 2f \n"
134 "sub %[a],%[r] \n" /* no: hi -= ab (sign extension of cd is -1) */
135 "2: \n"
136 /* Shift right by 26 and return low 32 bits */
137 "shll2 %[r] \n" /* hi <<= 6 */
138 "shll2 %[r] \n"
139 "shll2 %[r] \n"
140 "shlr16 %[t1] \n" /* (unsigned)lo >>= 26 */
141 "shlr8 %[t1] \n"
142 "shlr2 %[t1] \n"
143 "or %[t1],%[r] \n" /* combine result */
144 : /* outputs */
145 [r] "=&r"(r),
146 [t1]"=&r"(t1),
147 [t2]"=&r"(t2),
148 [t3]"=&r"(t3)
149 : /* inputs */
150 [a] "r" (a),
151 [b] "r" (b)
152 );
153 return r;
154}
132 155
133#elif defined CPU_COLDFIRE 156#elif defined CPU_COLDFIRE
134long long mul64(long long f1, long long f2); 157
135 158#define MULS16_ASR10(a, b) muls16_asr10(a, b)
136asm ( 159static inline short muls16_asr10(short a, short b)
137 /* 64bit * 64bit -> 64bit multiplication. Works for both signed and unsigned */ 160{
138 ".section .text,\"ax\",@progbits\n" 161 asm (
139 ".global mul64 \n" /* Notation: abcd * efgh, where each letter */ 162 "muls.w %[a],%[b] \n"
140 ".type mul64,@function\n" /* represents 16 bits. */ 163 "asr.l #8,%[b] \n"
141"mul64: \n" 164 "asr.l #2,%[b] \n"
142 "lea.l (-16,%sp),%sp \n" 165 : /* outputs */
143 "movem.l %d2-%d5,(%sp) \n" 166 [b]"+d"(b)
144 167 : /* inputs */
145 "movem.l (20,%sp),%d0-%d3\n" /* %d0%d1 = abcd, %d2%d3 = efgh */ 168 [a]"d" (a)
146 "mulu.l %d3,%d0 \n" /* %d0 = ab * gh */ 169 );
147 "mulu.l %d1,%d2 \n" /* %d2 = cd * ef */ 170 return b;
148 "add.l %d2,%d0 \n" /* %d0 += %d2 */ 171}
149 "move.l %d1,%d4 \n" 172
150 "swap %d4 \n" /* %d4 = dc */ 173/* Needs the EMAC initialised to fractional mode w/o rounding and saturation */
151 "move.l %d3,%d5 \n" 174#define MULS32_INIT() coldfire_set_macsr(EMAC_FRACTIONAL)
152 "swap %d5 \n" /* %d5 = hg */ 175#define MULS32_ASR26(a, b) muls32_asr26(a, b)
153 "move.l %d4,%d2 \n" 176static inline long muls32_asr26(long a, long b)
154 "mulu.w %d5,%d2 \n" /* %d2 = c * g */ 177{
155 "add.l %d2,%d0 \n" /* %d0 += %d2 */ 178 long r, t1;
156 "mulu.w %d3,%d4 \n" /* %d4 = c * h */ 179 asm (
157 "mulu.w %d1,%d5 \n" /* %d5 = d * h */ 180 "mac.l %[a],%[b],%%acc0\n" /* multiply */
158 "add.l %d4,%d5 \n" /* %d5 += %d4 */ 181 "mulu.l %[a],%[b] \n" /* get lower half */
159 "subx.l %d4,%d4 \n" 182 "movclr.l %%acc0,%[r] \n" /* get higher half */
160 "neg.l %d4 \n" /* carry -> %d4 */ 183 "asl.l #5,%[r] \n" /* hi <<= 5, plus one free */
161 "swap %d4 \n" 184 "moveq.l #26,%[t1] \n"
162 "swap %d5 \n" 185 "lsr.l %[t1],%[b] \n" /* (unsigned)lo >>= 26 */
163 "move.w %d5,%d4 \n" 186 "or.l %[b],%[r] \n" /* combine result */
164 "clr.w %d5 \n" /* %d4%d5 <<= 16 */ 187 : /* outputs */
165 "mulu.w %d3,%d1 \n" /* %d1 = d * h */ 188 [r]"=&d"(r),
166 "add.l %d5,%d1 \n" 189 [t1]"=&d"(t1),
167 "addx.l %d4,%d0 \n" /* %d0%d1 += %d4%d5 */ 190 [b] "+d" (b)
168 191 : /* inputs */
169 "movem.l (%sp),%d2-%d5 \n" 192 [a] "d" (a)
170 "lea.l (16,%sp),%sp\n" 193 );
171 "rts \n" 194 return r;
172); 195}
173#define MUL64(a, b) mul64(a, b) 196
174 197#endif /* CPU */
175#else 198
176#define MUL64(a, b) ((a)*(b)) 199/* default macros */
200#ifndef MULS16_ASR10
201#define MULS16_ASR10(a, b) ((short)(((long)(a) * (long)(b)) >> 10))
202#endif
203#ifndef MULS32_ASR26
204#define MULS32_ASR26(a, b) ((long)(((long long)(a) * (long long)(b)) >> 26))
205#endif
206#ifndef MULS32_INIT
207#define MULS32_INIT()
177#endif 208#endif
178 209
179int ilog2_fp(long long value) /* calculate integer log2(value_fp_6.58) */ 210int ilog2_fp(long value) /* calculate integer log2(value_fp_6.26) */
180{ 211{
181 int i = 0; 212 int i = 0;
182 213
183 if (value <= 0) { 214 if (value <= 0) {
184 return -32767; 215 return -32767;
185 } else if (value > (1LL<<58)) { 216 } else if (value > (1L<<26)) {
186 while (value >= (2LL<<58)) { 217 while (value >= (2L<<26)) {
187 value >>= 1; 218 value >>= 1;
188 i++; 219 i++;
189 } 220 }
190 } else { 221 } else {
191 while (value < (1LL<<58)) { 222 while (value < (1L<<26)) {
192 value <<= 1; 223 value <<= 1;
193 i--; 224 i--;
194 } 225 }
@@ -199,59 +230,57 @@ int ilog2_fp(long long value) /* calculate integer log2(value_fp_6.58) */
199void recalc_parameters(void) 230void recalc_parameters(void)
200{ 231{
201 x_step = (x_max - x_min) / LCD_WIDTH; 232 x_step = (x_max - x_min) / LCD_WIDTH;
202 x_delta = MUL64(x_step, (LCD_WIDTH/8)); 233 x_delta = x_step * (LCD_WIDTH/8);
203 y_step = (y_max - y_min) / LCD_HEIGHT; 234 y_step = (y_max - y_min) / LCD_HEIGHT;
204 y_delta = MUL64(y_step, (LCD_HEIGHT/8)); 235 y_delta = y_step * (LCD_HEIGHT/8);
205 step_log2 = MIN(ilog2_fp(x_step), ilog2_fp(y_step)); 236 step_log2 = ilog2_fp(MIN(x_step, y_step));
206 max_iter = MAX(10, -15 * step_log2 - 50); 237 max_iter = MAX(15, -15 * step_log2 - 45);
207} 238}
208 239
209void init_mandelbrot_set(void) 240void init_mandelbrot_set(void)
210{ 241{
211#if CONFIG_LCD == LCD_SSD1815 /* Recorder, Ondio. */ 242#if CONFIG_LCD == LCD_SSD1815 /* Recorder, Ondio. */
212 x_min = -38LL<<54; // -2.375<<58 243 x_min = -38L<<22; // -2.375<<26
213 x_max = 15LL<<54; // 0.9375<<58 244 x_max = 15L<<22; // 0.9375<<26
214#else /* Iriver H1x0 */ 245#else /* Iriver H1x0 */
215 x_min = -36LL<<54; // -2.25<<58 246 x_min = -36L<<22; // -2.25<<26
216 x_max = 12LL<<54; // 0.75<<58 247 x_max = 12L<<22; // 0.75<<26
217#endif 248#endif
218 y_min = -19LL<<54; // -1.1875<<58 249 y_min = -19L<<22; // -1.1875<<26
219 y_max = 19LL<<54; // 1.1875<<58 250 y_max = 19L<<22; // 1.1875<<26
220 recalc_parameters(); 251 recalc_parameters();
221} 252}
222 253
223void calc_mandelbrot_32(void) 254void calc_mandelbrot_low_prec(void)
224{ 255{
225 long start_tick, last_yield; 256 long start_tick, last_yield;
226 unsigned n_iter; 257 unsigned n_iter;
227 long long a64, b64; 258 long a32, b32;
228 long x, x2, y, y2, a, b; 259 short x, x2, y, y2, a, b;
229 int p_x, p_y; 260 int p_x, p_y;
230 int brightness; 261 int brightness;
231 262
232 start_tick = last_yield = *rb->current_tick; 263 start_tick = last_yield = *rb->current_tick;
233 264
234 for (p_x = 0, a64 = x_min; p_x <= px_max; p_x++, a64 += x_step) { 265 for (p_x = 0, a32 = x_min; p_x < px_max; p_x++, a32 += x_step) {
235 if (p_x < px_min) 266 if (p_x < px_min)
236 continue; 267 continue;
237 a = a64 >> 32; 268 a = a32 >> 16;
238 for (p_y = LCD_HEIGHT-1, b64 = y_min; p_y >= py_min; p_y--, b64 += y_step) { 269 for (p_y = LCD_HEIGHT-1, b32 = y_min; p_y >= py_min; p_y--, b32 += y_step) {
239 if (p_y >= py_max) 270 if (p_y >= py_max)
240 continue; 271 continue;
241 b = b64 >> 32; 272 b = b32 >> 16;
242 x = 0; 273 x = a;
243 y = 0; 274 y = b;
244 n_iter = 0; 275 n_iter = 0;
245 276
246 while (++n_iter <= max_iter) { 277 while (++n_iter <= max_iter) {
247 x >>= 13; 278 x2 = MULS16_ASR10(x, x);
248 y >>= 13; 279 y2 = MULS16_ASR10(y, y);
249 x2 = x * x; 280
250 y2 = y * y; 281 if (x2 + y2 > (4<<10)) break;
251 282
252 if (x2 + y2 > (4<<26)) break; 283 y = 2 * MULS16_ASR10(x, y) + b;
253
254 y = 2 * x * y + b;
255 x = x2 - y2 + a; 284 x = x2 - y2 + a;
256 } 285 }
257 286
@@ -261,7 +290,7 @@ void calc_mandelbrot_32(void)
261 } else { 290 } else {
262 brightness = 255 - (32 * (n_iter & 7)); 291 brightness = 255 - (32 * (n_iter & 7));
263 } 292 }
264 graybuffer[p_y]=brightness; 293 graybuffer[p_y] = brightness;
265 /* be nice to other threads: 294 /* be nice to other threads:
266 * if at least one tick has passed, yield */ 295 * if at least one tick has passed, yield */
267 if (*rb->current_tick > last_yield) { 296 if (*rb->current_tick > last_yield) {
@@ -274,35 +303,34 @@ void calc_mandelbrot_32(void)
274 } 303 }
275} 304}
276 305
277void calc_mandelbrot_64(void) 306void calc_mandelbrot_high_prec(void)
278{ 307{
279 long start_tick, last_yield; 308 long start_tick, last_yield;
280 unsigned n_iter; 309 unsigned n_iter;
281 long long x, x2, y, y2, a, b; 310 long x, x2, y, y2, a, b;
282 int p_x, p_y; 311 int p_x, p_y;
283 int brightness; 312 int brightness;
284
285 start_tick = last_yield = *rb->current_tick;
286 313
314 MULS32_INIT();
315 start_tick = last_yield = *rb->current_tick;
316
287 for (p_x = 0, a = x_min; p_x < px_max; p_x++, a += x_step) { 317 for (p_x = 0, a = x_min; p_x < px_max; p_x++, a += x_step) {
288 if (p_x < px_min) 318 if (p_x < px_min)
289 continue; 319 continue;
290 for (p_y = LCD_HEIGHT-1, b = y_min; p_y >= py_min; p_y--, b += y_step) { 320 for (p_y = LCD_HEIGHT-1, b = y_min; p_y >= py_min; p_y--, b += y_step) {
291 if (p_y >= py_max) 321 if (p_y >= py_max)
292 continue; 322 continue;
293 x = 0; 323 x = a;
294 y = 0; 324 y = b;
295 n_iter = 0; 325 n_iter = 0;
296 326
297 while (++n_iter<=max_iter) { 327 while (++n_iter <= max_iter) {
298 x >>= 29; 328 x2 = MULS32_ASR26(x, x);
299 y >>= 29; 329 y2 = MULS32_ASR26(y, y);
300 x2 = MUL64(x, x); 330
301 y2 = MUL64(y, y); 331 if (x2 + y2 > (4L<<26)) break;
302
303 if (x2 + y2 > (4LL<<58)) break;
304 332
305 y = 2 * MUL64(x, y) + b; 333 y = 2 * MULS32_ASR26(x, y) + b;
306 x = x2 - y2 + a; 334 x = x2 - y2 + a;
307 } 335 }
308 336
@@ -312,10 +340,10 @@ void calc_mandelbrot_64(void)
312 } else { 340 } else {
313 brightness = 255 - (32 * (n_iter & 7)); 341 brightness = 255 - (32 * (n_iter & 7));
314 } 342 }
315 graybuffer[p_y]=brightness; 343 graybuffer[p_y] = brightness;
316 /* be nice to other threads: 344 /* be nice to other threads:
317 * if at least one tick has passed, yield */ 345 * if at least one tick has passed, yield */
318 if (*rb->current_tick > last_yield){ 346 if (*rb->current_tick > last_yield) {
319 rb->yield(); 347 rb->yield();
320 last_yield = *rb->current_tick; 348 last_yield = *rb->current_tick;
321 } 349 }
@@ -375,10 +403,10 @@ enum plugin_status plugin_start(struct plugin_api* api, void* parameter)
375 if (redraw == REDRAW_FULL) 403 if (redraw == REDRAW_FULL)
376 gray_ub_clear_display(); 404 gray_ub_clear_display();
377 405
378 if (step_log2 <= -13) /* select precision */ 406 if (step_log2 <= -10) /* select precision */
379 calc_mandelbrot_64(); 407 calc_mandelbrot_high_prec();
380 else 408 else
381 calc_mandelbrot_32(); 409 calc_mandelbrot_low_prec();
382 410
383#if !defined(SIMULATOR) && defined(HAVE_ADJUSTABLE_CPU_FREQ) 411#if !defined(SIMULATOR) && defined(HAVE_ADJUSTABLE_CPU_FREQ)
384 rb->cpu_boost(false); 412 rb->cpu_boost(false);