summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apps/plugins/mandelbrot.c336
1 files changed, 182 insertions, 154 deletions
diff --git a/apps/plugins/mandelbrot.c b/apps/plugins/mandelbrot.c
index 8a3c14a194..1f80ff395f 100644
--- a/apps/plugins/mandelbrot.c
+++ b/apps/plugins/mandelbrot.c
@@ -55,15 +55,15 @@
55static struct plugin_api* rb; 55static struct plugin_api* rb;
56static char buff[32]; 56static char buff[32];
57 57
58/* Fixed point format: 6 bits integer part incl. sign, 58 bits fractional part */ 58/* Fixed point format: 6 bits integer part incl. sign, 26 bits fractional part */
59static long long x_min; 59static long x_min;
60static long long x_max; 60static long x_max;
61static long long x_step; 61static long x_step;
62static long long x_delta; 62static long x_delta;
63static long long y_min; 63static long y_min;
64static long long y_max; 64static long y_max;
65static long long y_step; 65static long y_step;
66static long long y_delta; 66static long y_delta;
67 67
68static int px_min = 0; 68static int px_min = 0;
69static int px_max = LCD_WIDTH; 69static int px_max = LCD_WIDTH;
@@ -78,117 +78,148 @@ static unsigned int gbuf_size = 0;
78static unsigned char graybuffer[LCD_HEIGHT]; 78static unsigned char graybuffer[LCD_HEIGHT];
79 79
80#if CONFIG_CPU == SH7034 80#if CONFIG_CPU == SH7034
81long long mul64(long long f1, long long f2); 81
82 82#define MULS16_ASR10(a, b) muls16_asr10(a, b)
83asm ( 83static inline short muls16_asr10(short a, short b)
84 /* 64bit * 64bit -> 64bit multiplication. Works for both signed and unsigned */ 84{
85 ".global _mul64 \n" /* Notation: abcd * efgh, where each letter */ 85 short r;
86 ".type _mul64,@function\n" /* represents 16 bits. Called with: */ 86 asm (
87"_mul64: \n" /* r4 = ab, r5 = cd, r6 = ef, r7 = gh */ 87 "muls %[a],%[b] \n"
88 "swap.w r4,r2 \n" /* r2 = ba */ 88 "sts macl,%[r] \n"
89 "mulu r2,r7 \n" /* a * h */ 89 "shlr8 %[r] \n"
90 "swap.w r6,r3 \n" /* r3 = fe */ 90 "shlr2 %[r] \n"
91 "sts macl,r0 \n" /* r0 = a * h */ 91 : /* outputs */
92 "mulu r5,r3 \n" /* d * e */ 92 [r]"=r"(r)
93 "swap.w r7,r3 \n" /* r3 = hg */ 93 : /* inputs */
94 "sts macl,r1 \n" /* r1 = d * e */ 94 [a]"r"(a),
95 "mulu r4,r3 \n" /* b * g */ 95 [b]"r"(b)
96 "add r1,r0 \n" /* r0 += r1 */ 96 );
97 "swap.w r5,r2 \n" /* r2 = dc */ 97 return r;
98 "sts macl,r1 \n" /* r1 = b * g */ 98}
99 "mulu r2,r6 \n" /* c * f */ 99
100 "add r1,r0 \n" /* r0 += r1 */ 100#define MULS32_ASR26(a, b) muls32_asr26(a, b)
101 "sts macl,r1 \n" /* r1 = c * f */ 101static inline long muls32_asr26(long a, long b)
102 "mulu r4,r7 \n" /* b * h */ 102{
103 "add r1,r0 \n" /* r0 += r1 */ 103 long r, t1, t2, t3;
104 "shll16 r0 \n" /* r0 <<= 16 */ 104 asm (
105 "sts macl,r1 \n" /* r1 = b * h */ 105 /* Signed 32bit * 32bit -> 64bit multiplication.
106 "mulu r2,r3 \n" /* c * g */ 106 Notation: xxab * xxcd, where each letter represents 16 bits.
107 "add r1,r0 \n" /* r0 += r1 */ 107 xx is the 64 bit sign extension. */
108 "sts macl,r1 \n" /* r1 = c * g */ 108 "swap.w %[a],%[t1] \n" /* t1 = ba */
109 "mulu r5,r6 \n" /* d * f */ 109 "mulu %[t1],%[b] \n" /* a * d */
110 "add r1,r0 \n" /* r0 += r1 */ 110 "swap.w %[b],%[t3] \n" /* t3 = dc */
111 "sts macl,r1 \n" /* r1 = d * f */ 111 "sts macl,%[t2] \n" /* t2 = a * d */
112 "mulu r2,r7 \n" /* c * h */ 112 "mulu %[t1],%[t3] \n" /* a * c */
113 "add r1,r0 \n" /* r0 += r1 */ 113 "sts macl,%[r] \n" /* hi = a * c */
114 "sts macl,r2 \n" /* r2 = c * h */ 114 "mulu %[a],%[t3] \n" /* b * c */
115 "mulu r5,r3 \n" /* d * g */ 115 "clrt \n"
116 "clrt \n" 116 "sts macl,%[t3] \n" /* t3 = b * c */
117 "sts macl,r3 \n" /* r3 = d * g */ 117 "addc %[t2],%[t3] \n" /* t3 += t2, carry -> t2 */
118 "addc r2,r3 \n" /* r3 += r2, carry->r2 */ 118 "movt %[t2] \n"
119 "movt r2 \n" 119 "mulu %[a],%[b] \n" /* b * d */
120 "mulu r5,r7 \n" /* d * h */ 120 "mov %[t3],%[t1] \n" /* t2t3 <<= 16 */
121 "mov r3,r1 \n" /* r2r3 <<= 16 */ 121 "xtrct %[t2],%[t1] \n"
122 "xtrct r2,r1 \n" 122 "mov %[t1],%[t2] \n"
123 "mov r1,r2 \n" 123 "shll16 %[t3] \n"
124 "shll16 r3 \n" 124 "sts macl,%[t1] \n" /* lo = b * d */
125 "sts macl,r1 \n" /* r1 = d * h */ 125 "clrt \n" /* hi.lo += t2t3 */
126 "clrt \n" /* r0r1 += r2r3 */ 126 "addc %[t3],%[t1] \n"
127 "addc r3,r1 \n" 127 "addc %[t2],%[r] \n"
128 "rts \n" 128 "cmp/pz %[a] \n" /* ab >= 0 ? */
129 "addc r2,r0 \n" 129 "bt 1f \n"
130); 130 "sub %[b],%[r] \n" /* no: hi -= cd (sign extension of ab is -1) */
131#define MUL64(a, b) mul64(a, b) 131 "1: \n"
132 "cmp/pz %[b] \n" /* cd >= 0 ? */
133 "bt 2f \n"
134 "sub %[a],%[r] \n" /* no: hi -= ab (sign extension of cd is -1) */
135 "2: \n"
136 /* Shift right by 26 and return low 32 bits */
137 "shll2 %[r] \n" /* hi <<= 6 */
138 "shll2 %[r] \n"
139 "shll2 %[r] \n"
140 "shlr16 %[t1] \n" /* (unsigned)lo >>= 26 */
141 "shlr8 %[t1] \n"
142 "shlr2 %[t1] \n"
143 "or %[t1],%[r] \n" /* combine result */
144 : /* outputs */
145 [r] "=&r"(r),
146 [t1]"=&r"(t1),
147 [t2]"=&r"(t2),
148 [t3]"=&r"(t3)
149 : /* inputs */
150 [a] "r" (a),
151 [b] "r" (b)
152 );
153 return r;
154}
132 155
133#elif defined CPU_COLDFIRE 156#elif defined CPU_COLDFIRE
134long long mul64(long long f1, long long f2); 157
135 158#define MULS16_ASR10(a, b) muls16_asr10(a, b)
136asm ( 159static inline short muls16_asr10(short a, short b)
137 /* 64bit * 64bit -> 64bit multiplication. Works for both signed and unsigned */ 160{
138 ".section .text,\"ax\",@progbits\n" 161 asm (
139 ".global mul64 \n" /* Notation: abcd * efgh, where each letter */ 162 "muls.w %[a],%[b] \n"
140 ".type mul64,@function\n" /* represents 16 bits. */ 163 "asr.l #8,%[b] \n"
141"mul64: \n" 164 "asr.l #2,%[b] \n"
142 "lea.l (-16,%sp),%sp \n" 165 : /* outputs */
143 "movem.l %d2-%d5,(%sp) \n" 166 [b]"+d"(b)
144 167 : /* inputs */
145 "movem.l (20,%sp),%d0-%d3\n" /* %d0%d1 = abcd, %d2%d3 = efgh */ 168 [a]"d" (a)
146 "mulu.l %d3,%d0 \n" /* %d0 = ab * gh */ 169 );
147 "mulu.l %d1,%d2 \n" /* %d2 = cd * ef */ 170 return b;
148 "add.l %d2,%d0 \n" /* %d0 += %d2 */ 171}
149 "move.l %d1,%d4 \n" 172
150 "swap %d4 \n" /* %d4 = dc */ 173/* Needs the EMAC initialised to fractional mode w/o rounding and saturation */
151 "move.l %d3,%d5 \n" 174#define MULS32_INIT() coldfire_set_macsr(EMAC_FRACTIONAL)
152 "swap %d5 \n" /* %d5 = hg */ 175#define MULS32_ASR26(a, b) muls32_asr26(a, b)
153 "move.l %d4,%d2 \n" 176static inline long muls32_asr26(long a, long b)
154 "mulu.w %d5,%d2 \n" /* %d2 = c * g */ 177{
155 "add.l %d2,%d0 \n" /* %d0 += %d2 */ 178 long r, t1;
156 "mulu.w %d3,%d4 \n" /* %d4 = c * h */ 179 asm (
157 "mulu.w %d1,%d5 \n" /* %d5 = d * h */ 180 "mac.l %[a],%[b],%%acc0\n" /* multiply */
158 "add.l %d4,%d5 \n" /* %d5 += %d4 */ 181 "mulu.l %[a],%[b] \n" /* get lower half */
159 "subx.l %d4,%d4 \n" 182 "movclr.l %%acc0,%[r] \n" /* get higher half */
160 "neg.l %d4 \n" /* carry -> %d4 */ 183 "asl.l #5,%[r] \n" /* hi <<= 5, plus one free */
161 "swap %d4 \n" 184 "moveq.l #26,%[t1] \n"
162 "swap %d5 \n" 185 "lsr.l %[t1],%[b] \n" /* (unsigned)lo >>= 26 */
163 "move.w %d5,%d4 \n" 186 "or.l %[b],%[r] \n" /* combine result */
164 "clr.w %d5 \n" /* %d4%d5 <<= 16 */ 187 : /* outputs */
165 "mulu.w %d3,%d1 \n" /* %d1 = d * h */ 188 [r]"=&d"(r),
166 "add.l %d5,%d1 \n" 189 [t1]"=&d"(t1),
167 "addx.l %d4,%d0 \n" /* %d0%d1 += %d4%d5 */ 190 [b] "+d" (b)
168 191 : /* inputs */
169 "movem.l (%sp),%d2-%d5 \n" 192 [a] "d" (a)
170 "lea.l (16,%sp),%sp\n" 193 );
171 "rts \n" 194 return r;
172); 195}
173#define MUL64(a, b) mul64(a, b) 196
174 197#endif /* CPU */
175#else 198
176#define MUL64(a, b) ((a)*(b)) 199/* default macros */
200#ifndef MULS16_ASR10
201#define MULS16_ASR10(a, b) ((short)(((long)(a) * (long)(b)) >> 10))
202#endif
203#ifndef MULS32_ASR26
204#define MULS32_ASR26(a, b) ((long)(((long long)(a) * (long long)(b)) >> 26))
205#endif
206#ifndef MULS32_INIT
207#define MULS32_INIT()
177#endif 208#endif
178 209
179int ilog2_fp(long long value) /* calculate integer log2(value_fp_6.58) */ 210int ilog2_fp(long value) /* calculate integer log2(value_fp_6.26) */
180{ 211{
181 int i = 0; 212 int i = 0;
182 213
183 if (value <= 0) { 214 if (value <= 0) {
184 return -32767; 215 return -32767;
185 } else if (value > (1LL<<58)) { 216 } else if (value > (1L<<26)) {
186 while (value >= (2LL<<58)) { 217 while (value >= (2L<<26)) {
187 value >>= 1; 218 value >>= 1;
188 i++; 219 i++;
189 } 220 }
190 } else { 221 } else {
191 while (value < (1LL<<58)) { 222 while (value < (1L<<26)) {
192 value <<= 1; 223 value <<= 1;
193 i--; 224 i--;
194 } 225 }
@@ -199,59 +230,57 @@ int ilog2_fp(long long value) /* calculate integer log2(value_fp_6.58) */
199void recalc_parameters(void) 230void recalc_parameters(void)
200{ 231{
201 x_step = (x_max - x_min) / LCD_WIDTH; 232 x_step = (x_max - x_min) / LCD_WIDTH;
202 x_delta = MUL64(x_step, (LCD_WIDTH/8)); 233 x_delta = x_step * (LCD_WIDTH/8);
203 y_step = (y_max - y_min) / LCD_HEIGHT; 234 y_step = (y_max - y_min) / LCD_HEIGHT;
204 y_delta = MUL64(y_step, (LCD_HEIGHT/8)); 235 y_delta = y_step * (LCD_HEIGHT/8);
205 step_log2 = MIN(ilog2_fp(x_step), ilog2_fp(y_step)); 236 step_log2 = ilog2_fp(MIN(x_step, y_step));
206 max_iter = MAX(10, -15 * step_log2 - 50); 237 max_iter = MAX(15, -15 * step_log2 - 45);
207} 238}
208 239
209void init_mandelbrot_set(void) 240void init_mandelbrot_set(void)
210{ 241{
211#if CONFIG_LCD == LCD_SSD1815 /* Recorder, Ondio. */ 242#if CONFIG_LCD == LCD_SSD1815 /* Recorder, Ondio. */
212 x_min = -38LL<<54; // -2.375<<58 243 x_min = -38L<<22; // -2.375<<26
213 x_max = 15LL<<54; // 0.9375<<58 244 x_max = 15L<<22; // 0.9375<<26
214#else /* Iriver H1x0 */ 245#else /* Iriver H1x0 */
215 x_min = -36LL<<54; // -2.25<<58 246 x_min = -36L<<22; // -2.25<<26
216 x_max = 12LL<<54; // 0.75<<58 247 x_max = 12L<<22; // 0.75<<26
217#endif 248#endif
218 y_min = -19LL<<54; // -1.1875<<58 249 y_min = -19L<<22; // -1.1875<<26
219 y_max = 19LL<<54; // 1.1875<<58 250 y_max = 19L<<22; // 1.1875<<26
220 recalc_parameters(); 251 recalc_parameters();
221} 252}
222 253
223void calc_mandelbrot_32(void) 254void calc_mandelbrot_low_prec(void)
224{ 255{
225 long start_tick, last_yield; 256 long start_tick, last_yield;
226 unsigned n_iter; 257 unsigned n_iter;
227 long long a64, b64; 258 long a32, b32;
228 long x, x2, y, y2, a, b; 259 short x, x2, y, y2, a, b;
229 int p_x, p_y; 260 int p_x, p_y;
230 int brightness; 261 int brightness;
231 262
232 start_tick = last_yield = *rb->current_tick; 263 start_tick = last_yield = *rb->current_tick;
233 264
234 for (p_x = 0, a64 = x_min; p_x <= px_max; p_x++, a64 += x_step) { 265 for (p_x = 0, a32 = x_min; p_x < px_max; p_x++, a32 += x_step) {
235 if (p_x < px_min) 266 if (p_x < px_min)
236 continue; 267 continue;
237 a = a64 >> 32; 268 a = a32 >> 16;
238 for (p_y = LCD_HEIGHT-1, b64 = y_min; p_y >= py_min; p_y--, b64 += y_step) { 269 for (p_y = LCD_HEIGHT-1, b32 = y_min; p_y >= py_min; p_y--, b32 += y_step) {
239 if (p_y >= py_max) 270 if (p_y >= py_max)
240 continue; 271 continue;
241 b = b64 >> 32; 272 b = b32 >> 16;
242 x = 0; 273 x = a;
243 y = 0; 274 y = b;
244 n_iter = 0; 275 n_iter = 0;
245 276
246 while (++n_iter <= max_iter) { 277 while (++n_iter <= max_iter) {
247 x >>= 13; 278 x2 = MULS16_ASR10(x, x);
248 y >>= 13; 279 y2 = MULS16_ASR10(y, y);
249 x2 = x * x; 280
250 y2 = y * y; 281 if (x2 + y2 > (4<<10)) break;
251 282
252 if (x2 + y2 > (4<<26)) break; 283 y = 2 * MULS16_ASR10(x, y) + b;
253
254 y = 2 * x * y + b;
255 x = x2 - y2 + a; 284 x = x2 - y2 + a;
256 } 285 }
257 286
@@ -261,7 +290,7 @@ void calc_mandelbrot_32(void)
261 } else { 290 } else {
262 brightness = 255 - (32 * (n_iter & 7)); 291 brightness = 255 - (32 * (n_iter & 7));
263 } 292 }
264 graybuffer[p_y]=brightness; 293 graybuffer[p_y] = brightness;
265 /* be nice to other threads: 294 /* be nice to other threads:
266 * if at least one tick has passed, yield */ 295 * if at least one tick has passed, yield */
267 if (*rb->current_tick > last_yield) { 296 if (*rb->current_tick > last_yield) {
@@ -274,35 +303,34 @@ void calc_mandelbrot_32(void)
274 } 303 }
275} 304}
276 305
277void calc_mandelbrot_64(void) 306void calc_mandelbrot_high_prec(void)
278{ 307{
279 long start_tick, last_yield; 308 long start_tick, last_yield;
280 unsigned n_iter; 309 unsigned n_iter;
281 long long x, x2, y, y2, a, b; 310 long x, x2, y, y2, a, b;
282 int p_x, p_y; 311 int p_x, p_y;
283 int brightness; 312 int brightness;
284
285 start_tick = last_yield = *rb->current_tick;
286 313
314 MULS32_INIT();
315 start_tick = last_yield = *rb->current_tick;
316
287 for (p_x = 0, a = x_min; p_x < px_max; p_x++, a += x_step) { 317 for (p_x = 0, a = x_min; p_x < px_max; p_x++, a += x_step) {
288 if (p_x < px_min) 318 if (p_x < px_min)
289 continue; 319 continue;
290 for (p_y = LCD_HEIGHT-1, b = y_min; p_y >= py_min; p_y--, b += y_step) { 320 for (p_y = LCD_HEIGHT-1, b = y_min; p_y >= py_min; p_y--, b += y_step) {
291 if (p_y >= py_max) 321 if (p_y >= py_max)
292 continue; 322 continue;
293 x = 0; 323 x = a;
294 y = 0; 324 y = b;
295 n_iter = 0; 325 n_iter = 0;
296 326
297 while (++n_iter<=max_iter) { 327 while (++n_iter <= max_iter) {
298 x >>= 29; 328 x2 = MULS32_ASR26(x, x);
299 y >>= 29; 329 y2 = MULS32_ASR26(y, y);
300 x2 = MUL64(x, x); 330
301 y2 = MUL64(y, y); 331 if (x2 + y2 > (4L<<26)) break;
302
303 if (x2 + y2 > (4LL<<58)) break;
304 332
305 y = 2 * MUL64(x, y) + b; 333 y = 2 * MULS32_ASR26(x, y) + b;
306 x = x2 - y2 + a; 334 x = x2 - y2 + a;
307 } 335 }
308 336
@@ -312,10 +340,10 @@ void calc_mandelbrot_64(void)
312 } else { 340 } else {
313 brightness = 255 - (32 * (n_iter & 7)); 341 brightness = 255 - (32 * (n_iter & 7));
314 } 342 }
315 graybuffer[p_y]=brightness; 343 graybuffer[p_y] = brightness;
316 /* be nice to other threads: 344 /* be nice to other threads:
317 * if at least one tick has passed, yield */ 345 * if at least one tick has passed, yield */
318 if (*rb->current_tick > last_yield){ 346 if (*rb->current_tick > last_yield) {
319 rb->yield(); 347 rb->yield();
320 last_yield = *rb->current_tick; 348 last_yield = *rb->current_tick;
321 } 349 }
@@ -375,10 +403,10 @@ enum plugin_status plugin_start(struct plugin_api* api, void* parameter)
375 if (redraw == REDRAW_FULL) 403 if (redraw == REDRAW_FULL)
376 gray_ub_clear_display(); 404 gray_ub_clear_display();
377 405
378 if (step_log2 <= -13) /* select precision */ 406 if (step_log2 <= -10) /* select precision */
379 calc_mandelbrot_64(); 407 calc_mandelbrot_high_prec();
380 else 408 else
381 calc_mandelbrot_32(); 409 calc_mandelbrot_low_prec();
382 410
383#if !defined(SIMULATOR) && defined(HAVE_ADJUSTABLE_CPU_FREQ) 411#if !defined(SIMULATOR) && defined(HAVE_ADJUSTABLE_CPU_FREQ)
384 rb->cpu_boost(false); 412 rb->cpu_boost(false);