1 files changed, 182 insertions, 154 deletions
diff --git a/apps/plugins/mandelbrot.c b/apps/plugins/mandelbrot.c
index 8a3c14a194..1f80ff395f 100644
--- a/apps/plugins/mandelbrot.c
+++ b/apps/plugins/mandelbrot.c
@@ -55,15 +55,15 @@
 static struct plugin_api* rb;
 static char buff[32];
-/* Fixed point format: 6 bits integer part incl. sign, 58 bits fractional part */
+/* Fixed point format: 6 bits integer part incl. sign, 26 bits fractional part */
-static long long x_min;
+static long x_min;
-static long long x_max;
+static long x_max;
-static long long x_step;
+static long x_step;
-static long long x_delta;
+static long x_delta;
-static long long y_min;
+static long y_min;
-static long long y_max;
+static long y_max;
-static long long y_step;
+static long y_step;
-static long long y_delta;
+static long y_delta;
 static int px_min = 0;
 static int px_max = LCD_WIDTH;
@@ -78,117 +78,148 @@ static unsigned int gbuf_size = 0;
 static unsigned char graybuffer[LCD_HEIGHT];   
 #if CONFIG_CPU == SH7034
-long long mul64(long long f1, long long f2);
+#define MULS16_ASR10(a, b) muls16_asr10(a, b)
-asm (
+static inline short muls16_asr10(short a, short b)
-    /* 64bit * 64bit -> 64bit multiplication. Works for both signed and unsigned */
+{
-    ".global _mul64      \n"  /* Notation: abcd * efgh, where each letter */
+    short r;
-    ".type   _mul64,@function\n" /* represents 16 bits. Called with: */
+    asm (
-"_mul64:                 \n"  /* r4 = ab, r5 = cd, r6 = ef, r7 = gh */
+        "muls    %[a],%[b]   \n"
-    "swap.w  r4,r2       \n"  /* r2 = ba */
+        "sts     macl,%[r]   \n"
-    "mulu    r2,r7       \n"  /* a * h */
+        "shlr8   %[r]        \n"
-    "swap.w  r6,r3       \n"  /* r3 = fe */
+        "shlr2   %[r]        \n"
-    "sts     macl,r0     \n"  /* r0 = a * h */
+        : /* outputs */
-    "mulu    r5,r3       \n"  /* d * e */
+        [r]"=r"(r)
-    "swap.w  r7,r3       \n"  /* r3 = hg */
+        : /* inputs */
-    "sts     macl,r1     \n"  /* r1 = d * e */
+        [a]"r"(a),
-    "mulu    r4,r3       \n"  /* b * g */
+        [b]"r"(b)
-    "add     r1,r0       \n"  /* r0 += r1 */
+    );
-    "swap.w  r5,r2       \n"  /* r2 = dc */
+    return r;
-    "sts     macl,r1     \n"  /* r1 = b * g */
+}
-    "mulu    r2,r6       \n"  /* c * f */
-    "add     r1,r0       \n"  /* r0 += r1 */
+#define MULS32_ASR26(a, b) muls32_asr26(a, b)
-    "sts     macl,r1     \n"  /* r1 = c * f */
+static inline long muls32_asr26(long a, long b)
-    "mulu    r4,r7       \n"  /* b * h */
+{
-    "add     r1,r0       \n"  /* r0 += r1 */
+    long r, t1, t2, t3;
-    "shll16  r0          \n"  /* r0 <<= 16 */
+    asm (
-    "sts     macl,r1     \n"  /* r1 = b * h */
+        /* Signed 32bit * 32bit -> 64bit multiplication.
-    "mulu    r2,r3       \n"  /* c * g */
+           Notation: xxab * xxcd, where each letter represents 16 bits.
-    "add     r1,r0       \n"  /* r0 += r1 */
+           xx is the 64 bit sign extension.  */
-    "sts     macl,r1     \n"  /* r1 = c * g */
+        "swap.w  %[a],%[t1]  \n" /* t1 = ba */
-    "mulu    r5,r6       \n"  /* d * f */
+        "mulu    %[t1],%[b]  \n" /* a * d */
-    "add     r1,r0       \n"  /* r0 += r1 */
+        "swap.w  %[b],%[t3]  \n" /* t3 = dc */
-    "sts     macl,r1     \n"  /* r1 = d * f */
+        "sts     macl,%[t2]  \n" /* t2 = a * d */
-    "mulu    r2,r7       \n"  /* c * h */
+        "mulu    %[t1],%[t3] \n" /* a * c */
-    "add     r1,r0       \n"  /* r0 += r1 */
+        "sts     macl,%[r]   \n" /* hi = a * c */
-    "sts     macl,r2     \n"  /* r2 = c * h */
+        "mulu    %[a],%[t3]  \n" /* b * c */
-    "mulu    r5,r3       \n"  /* d * g */
+        "clrt                \n"
-    "clrt                \n"
+        "sts     macl,%[t3]  \n" /* t3 = b * c */
-    "sts     macl,r3     \n"  /* r3 = d * g */
+        "addc    %[t2],%[t3] \n" /* t3 += t2, carry -> t2 */
-    "addc    r2,r3       \n"  /* r3 += r2, carry->r2 */
+        "movt    %[t2]       \n"
-    "movt    r2          \n"
+        "mulu    %[a],%[b]   \n" /* b * d */
-    "mulu    r5,r7       \n"  /* d * h */
+        "mov     %[t3],%[t1] \n" /* t2t3 <<= 16 */
-    "mov     r3,r1       \n"  /* r2r3 <<= 16 */
+        "xtrct   %[t2],%[t1] \n"
-    "xtrct   r2,r1       \n"
+        "mov     %[t1],%[t2] \n"
-    "mov     r1,r2       \n"
+        "shll16  %[t3]       \n"
-    "shll16  r3          \n"
+        "sts     macl,%[t1]  \n" /* lo = b * d */
-    "sts     macl,r1     \n"  /* r1 = d * h */
+        "clrt                \n" /* hi.lo += t2t3 */
-    "clrt                \n"  /* r0r1 += r2r3 */
+        "addc    %[t3],%[t1] \n"
-    "addc    r3,r1       \n"
+        "addc    %[t2],%[r]  \n"
-    "rts                 \n"
+        "cmp/pz  %[a]        \n" /* ab >= 0 ? */
-    "addc    r2,r0       \n"
+        "bt      1f          \n"
-);
+        "sub     %[b],%[r]   \n" /* no: hi -= cd (sign extension of ab is -1) */
-#define MUL64(a, b) mul64(a, b)
+    "1:                      \n"
+        "cmp/pz  %[b]        \n" /* cd >= 0 ? */
+        "bt      2f          \n"
+        "sub     %[a],%[r]   \n" /* no: hi -= ab (sign extension of cd is -1) */
+    "2:                      \n"
+        /* Shift right by 26 and return low 32 bits */
+        "shll2   %[r]        \n" /* hi <<= 6 */
+        "shll2   %[r]        \n"
+        "shll2   %[r]        \n"
+        "shlr16  %[t1]       \n" /* (unsigned)lo >>= 26 */
+        "shlr8   %[t1]       \n"
+        "shlr2   %[t1]       \n"
+        "or      %[t1],%[r]  \n" /* combine result */
+        : /* outputs */
+        [r] "=&r"(r),
+        [t1]"=&r"(t1),
+        [t2]"=&r"(t2),
+        [t3]"=&r"(t3)
+        : /* inputs */
+        [a] "r"  (a),
+        [b] "r"  (b)
+    );
+    return r;
+}
 #elif defined CPU_COLDFIRE
-long long mul64(long long f1, long long f2);
+#define MULS16_ASR10(a, b) muls16_asr10(a, b)
-asm (
+static inline short muls16_asr10(short a, short b)
-    /* 64bit * 64bit -> 64bit multiplication. Works for both signed and unsigned */
+{
-    ".section .text,\"ax\",@progbits\n"
+    asm (
-    ".global mul64       \n"    /* Notation: abcd * efgh, where each letter */
+        "muls.w  %[a],%[b]   \n"
-    ".type   mul64,@function\n" /* represents 16 bits. */
+        "asr.l   #8,%[b]     \n"
-"mul64:                  \n"
+        "asr.l   #2,%[b]     \n"
-    "lea.l   (-16,%sp),%sp   \n"
+        : /* outputs */
-    "movem.l %d2-%d5,(%sp)   \n"
+        [b]"+d"(b)
+        : /* inputs */
-    "movem.l (20,%sp),%d0-%d3\n" /* %d0%d1 = abcd, %d2%d3 = efgh */
+        [a]"d" (a)
-    "mulu.l  %d3,%d0     \n"  /* %d0 = ab * gh */
+    );
-    "mulu.l  %d1,%d2     \n"  /* %d2 = cd * ef */
+    return b;
-    "add.l   %d2,%d0     \n"  /* %d0 += %d2 */
+}
-    "move.l  %d1,%d4     \n"  
-    "swap    %d4         \n"  /* %d4 = dc */
+/* Needs the EMAC initialised to fractional mode w/o rounding and saturation */
-    "move.l  %d3,%d5     \n"
+#define MULS32_INIT() coldfire_set_macsr(EMAC_FRACTIONAL)
-    "swap    %d5         \n"  /* %d5 = hg */
+#define MULS32_ASR26(a, b) muls32_asr26(a, b)
-    "move.l  %d4,%d2     \n"
+static inline long muls32_asr26(long a, long b)
-    "mulu.w  %d5,%d2     \n"  /* %d2 = c * g */
+{
-    "add.l   %d2,%d0     \n"  /* %d0 += %d2 */
+    long r, t1;
-    "mulu.w  %d3,%d4     \n"  /* %d4 = c * h */
+    asm (
-    "mulu.w  %d1,%d5     \n"  /* %d5 = d * h */
+        "mac.l   %[a],%[b],%%acc0\n" /* multiply */
-    "add.l   %d4,%d5     \n"  /* %d5 += %d4 */
+        "mulu.l  %[a],%[b]       \n" /* get lower half */
-    "subx.l  %d4,%d4     \n"
+        "movclr.l %%acc0,%[r]    \n" /* get higher half */
-    "neg.l   %d4         \n"  /* carry -> %d4 */
+        "asl.l   #5,%[r]         \n" /* hi <<= 5, plus one free */
-    "swap    %d4         \n"
+        "moveq.l #26,%[t1]       \n"
-    "swap    %d5         \n" 
+        "lsr.l   %[t1],%[b]      \n" /* (unsigned)lo >>= 26 */
-    "move.w  %d5,%d4     \n"
+        "or.l    %[b],%[r]       \n" /* combine result */
-    "clr.w   %d5         \n"  /* %d4%d5 <<= 16 */
+        : /* outputs */
-    "mulu.w  %d3,%d1     \n"  /* %d1 = d * h */
+        [r]"=&d"(r),
-    "add.l   %d5,%d1     \n"
+        [t1]"=&d"(t1),
-    "addx.l  %d4,%d0     \n"  /* %d0%d1 += %d4%d5 */
+        [b] "+d" (b)
+        : /* inputs */
-    "movem.l (%sp),%d2-%d5   \n"
+        [a] "d"  (a)
-    "lea.l   (16,%sp),%sp\n"
+    );
-    "rts                 \n"
+    return r;
-);
+}
-#define MUL64(a, b) mul64(a, b)
+#endif /* CPU */
-#else
-#define MUL64(a, b) ((a)*(b))
+/* default macros */
+#ifndef MULS16_ASR10 
+#define MULS16_ASR10(a, b) ((short)(((long)(a) * (long)(b)) >> 10))
+#endif
+#ifndef MULS32_ASR26
+#define MULS32_ASR26(a, b) ((long)(((long long)(a) * (long long)(b)) >> 26))
+#endif
+#ifndef MULS32_INIT
+#define MULS32_INIT()
 #endif
-int ilog2_fp(long long value) /* calculate integer log2(value_fp_6.58) */
+int ilog2_fp(long value) /* calculate integer log2(value_fp_6.26) */
 {
    int i = 0;
    if (value <= 0) {
        return -32767;
-    } else if (value > (1LL<<58)) {
+    } else if (value > (1L<<26)) {
-        while (value >= (2LL<<58)) {
+        while (value >= (2L<<26)) {
            value >>= 1;
            i++;
        }
    } else {
-        while (value < (1LL<<58)) {
+        while (value < (1L<<26)) {
            value <<= 1;
            i--;
        }
@@ -199,59 +230,57 @@ int ilog2_fp(long long value) /* calculate integer log2(value_fp_6.58) */
 void recalc_parameters(void)
 {
    x_step = (x_max - x_min) / LCD_WIDTH;
-    x_delta = MUL64(x_step, (LCD_WIDTH/8));
+    x_delta = x_step * (LCD_WIDTH/8);
    y_step = (y_max - y_min) / LCD_HEIGHT;
-    y_delta = MUL64(y_step, (LCD_HEIGHT/8));
+    y_delta = y_step * (LCD_HEIGHT/8);
-    step_log2 = MIN(ilog2_fp(x_step), ilog2_fp(y_step));
+    step_log2 = ilog2_fp(MIN(x_step, y_step));
-    max_iter = MAX(10, -15 * step_log2 - 50);
+    max_iter = MAX(15, -15 * step_log2 - 45);
 }
 void init_mandelbrot_set(void)
 {
 #if CONFIG_LCD == LCD_SSD1815 /* Recorder, Ondio. */
-    x_min = -38LL<<54;  // -2.375<<58
+    x_min = -38L<<22;  // -2.375<<26
-    x_max =  15LL<<54;  //  0.9375<<58
+    x_max =  15L<<22;  //  0.9375<<26
 #else  /* Iriver H1x0 */
-    x_min = -36LL<<54;  // -2.25<<58
+    x_min = -36L<<22;  // -2.25<<26
-    x_max =  12LL<<54;  //  0.75<<58
+    x_max =  12L<<22;  //  0.75<<26
 #endif
-    y_min = -19LL<<54;  // -1.1875<<58
+    y_min = -19L<<22;  // -1.1875<<26
-    y_max =  19LL<<54;  //  1.1875<<58
+    y_max =  19L<<22;  //  1.1875<<26
    recalc_parameters();
 }
-void calc_mandelbrot_32(void)
+void calc_mandelbrot_low_prec(void)
 {
    long start_tick, last_yield;
    unsigned n_iter;
-    long long a64, b64;
+    long a32, b32;
-    long x, x2, y, y2, a, b;
+    short x, x2, y, y2, a, b;
    int p_x, p_y;
    int brightness;
    start_tick = last_yield = *rb->current_tick;
    
-    for (p_x = 0, a64 = x_min; p_x <= px_max; p_x++, a64 += x_step) {
+    for (p_x = 0, a32 = x_min; p_x < px_max; p_x++, a32 += x_step) {
        if (p_x < px_min)
            continue;
-        a = a64 >> 32;
+        a = a32 >> 16;
-        for (p_y = LCD_HEIGHT-1, b64 = y_min; p_y >= py_min; p_y--, b64 += y_step) {
+        for (p_y = LCD_HEIGHT-1, b32 = y_min; p_y >= py_min; p_y--, b32 += y_step) {
            if (p_y >= py_max)
                continue;
-            b = b64 >> 32;
+            b = b32 >> 16;
-            x = 0;
+            x = a;
-            y = 0;
+            y = b;
            n_iter = 0;
            while (++n_iter <= max_iter) {
-                x >>= 13;
+                x2 = MULS16_ASR10(x, x);
-                y >>= 13;
+                y2 = MULS16_ASR10(y, y);
-                x2 = x * x;
-                y2 = y * y;
+                if (x2 + y2 > (4<<10)) break;
-                
-                if (x2 + y2 > (4<<26)) break;
+                y = 2 * MULS16_ASR10(x, y) + b;
-                
-                y = 2 * x * y + b;
                x = x2 - y2 + a;
            }
@@ -261,7 +290,7 @@ void calc_mandelbrot_32(void)
            } else {
                brightness = 255 - (32 * (n_iter & 7));
            }
-            graybuffer[p_y]=brightness;
+            graybuffer[p_y] = brightness;
            /* be nice to other threads:
             * if at least one tick has passed, yield */
            if  (*rb->current_tick > last_yield) {
@@ -274,35 +303,34 @@ void calc_mandelbrot_32(void)
    }
 }
-void calc_mandelbrot_64(void)
+void calc_mandelbrot_high_prec(void)
 {
    long start_tick, last_yield;
    unsigned n_iter;
-    long long x, x2, y, y2, a, b;
+    long x, x2, y, y2, a, b;
    int p_x, p_y;
    int brightness;
-    start_tick = last_yield = *rb->current_tick;
    
+    MULS32_INIT();
+    start_tick = last_yield = *rb->current_tick;
    for (p_x = 0, a = x_min; p_x < px_max; p_x++, a += x_step) {
        if (p_x < px_min)
            continue;
        for (p_y = LCD_HEIGHT-1, b = y_min; p_y >= py_min; p_y--, b += y_step) {
            if (p_y >= py_max)
                continue;
-            x = 0;
+            x = a;
-            y = 0;
+            y = b;
            n_iter = 0;
-            while (++n_iter<=max_iter) {
+            while (++n_iter <= max_iter) {
-                x >>= 29;
+                x2 = MULS32_ASR26(x, x);
-                y >>= 29;
+                y2 = MULS32_ASR26(y, y);
-                x2 = MUL64(x, x);
-                y2 = MUL64(y, y);
+                if (x2 + y2 > (4L<<26)) break;
-                
-                if (x2 + y2 > (4LL<<58)) break;
                
-                y = 2 * MUL64(x, y) + b;
+                y = 2 * MULS32_ASR26(x, y) + b;
                x = x2 - y2 + a;
            }
@@ -312,10 +340,10 @@ void calc_mandelbrot_64(void)
            } else {
                brightness = 255 - (32 * (n_iter & 7));
            }
-            graybuffer[p_y]=brightness;
+            graybuffer[p_y] = brightness;
            /* be nice to other threads:
             * if at least one tick has passed, yield */
-            if  (*rb->current_tick > last_yield){
+            if  (*rb->current_tick > last_yield) {
                rb->yield();
                last_yield = *rb->current_tick;
            }
@@ -375,10 +403,10 @@ enum plugin_status plugin_start(struct plugin_api* api, void* parameter)
            if (redraw == REDRAW_FULL)
                gray_ub_clear_display();
-            if (step_log2 <= -13) /* select precision */
+            if (step_log2 <= -10) /* select precision */
-                calc_mandelbrot_64();
+                calc_mandelbrot_high_prec();
            else
-                calc_mandelbrot_32();
+                calc_mandelbrot_low_prec();
 #if !defined(SIMULATOR) && defined(HAVE_ADJUSTABLE_CPU_FREQ)
            rb->cpu_boost(false);

diff --git a/apps/plugins/mandelbrot.c b/apps/plugins/mandelbrot.c index 8a3c14a194..1f80ff395f 100644 --- a/apps/plugins/mandelbrot.c +++ b/apps/plugins/mandelbrot.c
@@ -55,15 +55,15 @@
55	static struct plugin_api* rb;	55	static struct plugin_api* rb;
56	static char buff[32];	56	static char buff[32];
57		57
58	/* Fixed point format: 6 bits integer part incl. sign, 58 bits fractional part */	58	/* Fixed point format: 6 bits integer part incl. sign, 26 bits fractional part */
59	static long long x_min;	59	static long x_min;
60	static long long x_max;	60	static long x_max;
61	static long long x_step;	61	static long x_step;
62	static long long x_delta;	62	static long x_delta;
63	static long long y_min;	63	static long y_min;
64	static long long y_max;	64	static long y_max;
65	static long long y_step;	65	static long y_step;
66	static long long y_delta;	66	static long y_delta;
67		67
68	static int px_min = 0;	68	static int px_min = 0;
69	static int px_max = LCD_WIDTH;	69	static int px_max = LCD_WIDTH;
@@ -78,117 +78,148 @@ static unsigned int gbuf_size = 0;
78	static unsigned char graybuffer[LCD_HEIGHT];	78	static unsigned char graybuffer[LCD_HEIGHT];
79		79
80	#if CONFIG_CPU == SH7034	80	#if CONFIG_CPU == SH7034
81	long long mul64(long long f1, long long f2);	81
82		82	#define MULS16_ASR10(a, b) muls16_asr10(a, b)
83	asm (	83	static inline short muls16_asr10(short a, short b)
84	/* 64bit * 64bit -> 64bit multiplication. Works for both signed and unsigned */	84	{
85	".global _mul64 \n" /* Notation: abcd * efgh, where each letter */	85	short r;
86	".type _mul64,@function\n" /* represents 16 bits. Called with: */	86	asm (
87	"_mul64: \n" /* r4 = ab, r5 = cd, r6 = ef, r7 = gh */	87	"muls %[a],%[b] \n"
88	"swap.w r4,r2 \n" /* r2 = ba */	88	"sts macl,%[r] \n"
89	"mulu r2,r7 \n" /* a * h */	89	"shlr8 %[r] \n"
90	"swap.w r6,r3 \n" /* r3 = fe */	90	"shlr2 %[r] \n"
91	"sts macl,r0 \n" /* r0 = a * h */	91	: /* outputs */
92	"mulu r5,r3 \n" /* d * e */	92	[r]"=r"(r)
93	"swap.w r7,r3 \n" /* r3 = hg */	93	: /* inputs */
94	"sts macl,r1 \n" /* r1 = d * e */	94	[a]"r"(a),
95	"mulu r4,r3 \n" /* b * g */	95	[b]"r"(b)
96	"add r1,r0 \n" /* r0 += r1 */	96	);
97	"swap.w r5,r2 \n" /* r2 = dc */	97	return r;
98	"sts macl,r1 \n" /* r1 = b * g */	98	}
99	"mulu r2,r6 \n" /* c * f */	99
100	"add r1,r0 \n" /* r0 += r1 */	100	#define MULS32_ASR26(a, b) muls32_asr26(a, b)
101	"sts macl,r1 \n" /* r1 = c * f */	101	static inline long muls32_asr26(long a, long b)
102	"mulu r4,r7 \n" /* b * h */	102	{
103	"add r1,r0 \n" /* r0 += r1 */	103	long r, t1, t2, t3;
104	"shll16 r0 \n" /* r0 <<= 16 */	104	asm (
105	"sts macl,r1 \n" /* r1 = b * h */	105	/* Signed 32bit * 32bit -> 64bit multiplication.
106	"mulu r2,r3 \n" /* c * g */	106	Notation: xxab * xxcd, where each letter represents 16 bits.
107	"add r1,r0 \n" /* r0 += r1 */	107	xx is the 64 bit sign extension. */
108	"sts macl,r1 \n" /* r1 = c * g */	108	"swap.w %[a],%[t1] \n" /* t1 = ba */
109	"mulu r5,r6 \n" /* d * f */	109	"mulu %[t1],%[b] \n" /* a * d */
110	"add r1,r0 \n" /* r0 += r1 */	110	"swap.w %[b],%[t3] \n" /* t3 = dc */
111	"sts macl,r1 \n" /* r1 = d * f */	111	"sts macl,%[t2] \n" /* t2 = a * d */
112	"mulu r2,r7 \n" /* c * h */	112	"mulu %[t1],%[t3] \n" /* a * c */
113	"add r1,r0 \n" /* r0 += r1 */	113	"sts macl,%[r] \n" /* hi = a * c */
114	"sts macl,r2 \n" /* r2 = c * h */	114	"mulu %[a],%[t3] \n" /* b * c */
115	"mulu r5,r3 \n" /* d * g */	115	"clrt \n"
116	"clrt \n"	116	"sts macl,%[t3] \n" /* t3 = b * c */
117	"sts macl,r3 \n" /* r3 = d * g */	117	"addc %[t2],%[t3] \n" /* t3 += t2, carry -> t2 */
118	"addc r2,r3 \n" /* r3 += r2, carry->r2 */	118	"movt %[t2] \n"
119	"movt r2 \n"	119	"mulu %[a],%[b] \n" /* b * d */
120	"mulu r5,r7 \n" /* d * h */	120	"mov %[t3],%[t1] \n" /* t2t3 <<= 16 */
121	"mov r3,r1 \n" /* r2r3 <<= 16 */	121	"xtrct %[t2],%[t1] \n"
122	"xtrct r2,r1 \n"	122	"mov %[t1],%[t2] \n"
123	"mov r1,r2 \n"	123	"shll16 %[t3] \n"
124	"shll16 r3 \n"	124	"sts macl,%[t1] \n" /* lo = b * d */
125	"sts macl,r1 \n" /* r1 = d * h */	125	"clrt \n" /* hi.lo += t2t3 */
126	"clrt \n" /* r0r1 += r2r3 */	126	"addc %[t3],%[t1] \n"
127	"addc r3,r1 \n"	127	"addc %[t2],%[r] \n"
128	"rts \n"	128	"cmp/pz %[a] \n" /* ab >= 0 ? */
129	"addc r2,r0 \n"	129	"bt 1f \n"
130	);	130	"sub %[b],%[r] \n" /* no: hi -= cd (sign extension of ab is -1) */
131	#define MUL64(a, b) mul64(a, b)	131	"1: \n"
		132	"cmp/pz %[b] \n" /* cd >= 0 ? */
		133	"bt 2f \n"
		134	"sub %[a],%[r] \n" /* no: hi -= ab (sign extension of cd is -1) */
		135	"2: \n"
		136	/* Shift right by 26 and return low 32 bits */
		137	"shll2 %[r] \n" /* hi <<= 6 */
		138	"shll2 %[r] \n"
		139	"shll2 %[r] \n"
		140	"shlr16 %[t1] \n" /* (unsigned)lo >>= 26 */
		141	"shlr8 %[t1] \n"
		142	"shlr2 %[t1] \n"
		143	"or %[t1],%[r] \n" /* combine result */
		144	: /* outputs */
		145	[r] "=&r"(r),
		146	[t1]"=&r"(t1),
		147	[t2]"=&r"(t2),
		148	[t3]"=&r"(t3)
		149	: /* inputs */
		150	[a] "r" (a),
		151	[b] "r" (b)
		152	);
		153	return r;
		154	}
132		155
133	#elif defined CPU_COLDFIRE	156	#elif defined CPU_COLDFIRE
134	long long mul64(long long f1, long long f2);	157
135		158	#define MULS16_ASR10(a, b) muls16_asr10(a, b)
136	asm (	159	static inline short muls16_asr10(short a, short b)
137	/* 64bit * 64bit -> 64bit multiplication. Works for both signed and unsigned */	160	{
138	".section .text,\"ax\",@progbits\n"	161	asm (
139	".global mul64 \n" /* Notation: abcd * efgh, where each letter */	162	"muls.w %[a],%[b] \n"
140	".type mul64,@function\n" /* represents 16 bits. */	163	"asr.l #8,%[b] \n"
141	"mul64: \n"	164	"asr.l #2,%[b] \n"
142	"lea.l (-16,%sp),%sp \n"	165	: /* outputs */
143	"movem.l %d2-%d5,(%sp) \n"	166	[b]"+d"(b)
144		167	: /* inputs */
145	"movem.l (20,%sp),%d0-%d3\n" /* %d0%d1 = abcd, %d2%d3 = efgh */	168	[a]"d" (a)
146	"mulu.l %d3,%d0 \n" /* %d0 = ab * gh */	169	);
147	"mulu.l %d1,%d2 \n" /* %d2 = cd * ef */	170	return b;
148	"add.l %d2,%d0 \n" /* %d0 += %d2 */	171	}
149	"move.l %d1,%d4 \n"	172
150	"swap %d4 \n" /* %d4 = dc */	173	/* Needs the EMAC initialised to fractional mode w/o rounding and saturation */
151	"move.l %d3,%d5 \n"	174	#define MULS32_INIT() coldfire_set_macsr(EMAC_FRACTIONAL)
152	"swap %d5 \n" /* %d5 = hg */	175	#define MULS32_ASR26(a, b) muls32_asr26(a, b)
153	"move.l %d4,%d2 \n"	176	static inline long muls32_asr26(long a, long b)
154	"mulu.w %d5,%d2 \n" /* %d2 = c * g */	177	{
155	"add.l %d2,%d0 \n" /* %d0 += %d2 */	178	long r, t1;
156	"mulu.w %d3,%d4 \n" /* %d4 = c * h */	179	asm (
157	"mulu.w %d1,%d5 \n" /* %d5 = d * h */	180	"mac.l %[a],%[b],%%acc0\n" /* multiply */
158	"add.l %d4,%d5 \n" /* %d5 += %d4 */	181	"mulu.l %[a],%[b] \n" /* get lower half */
159	"subx.l %d4,%d4 \n"	182	"movclr.l %%acc0,%[r] \n" /* get higher half */
160	"neg.l %d4 \n" /* carry -> %d4 */	183	"asl.l #5,%[r] \n" /* hi <<= 5, plus one free */
161	"swap %d4 \n"	184	"moveq.l #26,%[t1] \n"
162	"swap %d5 \n"	185	"lsr.l %[t1],%[b] \n" /* (unsigned)lo >>= 26 */
163	"move.w %d5,%d4 \n"	186	"or.l %[b],%[r] \n" /* combine result */
164	"clr.w %d5 \n" /* %d4%d5 <<= 16 */	187	: /* outputs */
165	"mulu.w %d3,%d1 \n" /* %d1 = d * h */	188	[r]"=&d"(r),
166	"add.l %d5,%d1 \n"	189	[t1]"=&d"(t1),
167	"addx.l %d4,%d0 \n" /* %d0%d1 += %d4%d5 */	190	[b] "+d" (b)
168		191	: /* inputs */
169	"movem.l (%sp),%d2-%d5 \n"	192	[a] "d" (a)
170	"lea.l (16,%sp),%sp\n"	193	);
171	"rts \n"	194	return r;
172	);	195	}
173	#define MUL64(a, b) mul64(a, b)	196
174		197	#endif /* CPU */
175	#else	198
176	#define MUL64(a, b) ((a)*(b))	199	/* default macros */
		200	#ifndef MULS16_ASR10
		201	#define MULS16_ASR10(a, b) ((short)(((long)(a) * (long)(b)) >> 10))
		202	#endif
		203	#ifndef MULS32_ASR26
		204	#define MULS32_ASR26(a, b) ((long)(((long long)(a) * (long long)(b)) >> 26))
		205	#endif
		206	#ifndef MULS32_INIT
		207	#define MULS32_INIT()
177	#endif	208	#endif
178		209
179	int ilog2_fp(long long value) /* calculate integer log2(value_fp_6.58) */	210	int ilog2_fp(long value) /* calculate integer log2(value_fp_6.26) */
180	{	211	{
181	int i = 0;	212	int i = 0;
182		213
183	if (value <= 0) {	214	if (value <= 0) {
184	return -32767;	215	return -32767;
185	} else if (value > (1LL<<58)) {	216	} else if (value > (1L<<26)) {
186	while (value >= (2LL<<58)) {	217	while (value >= (2L<<26)) {
187	value >>= 1;	218	value >>= 1;
188	i++;	219	i++;
189	}	220	}
190	} else {	221	} else {
191	while (value < (1LL<<58)) {	222	while (value < (1L<<26)) {
192	value <<= 1;	223	value <<= 1;
193	i--;	224	i--;
194	}	225	}
@@ -199,59 +230,57 @@ int ilog2_fp(long long value) /* calculate integer log2(value_fp_6.58) */
199	void recalc_parameters(void)	230	void recalc_parameters(void)
200	{	231	{
201	x_step = (x_max - x_min) / LCD_WIDTH;	232	x_step = (x_max - x_min) / LCD_WIDTH;
202	x_delta = MUL64(x_step, (LCD_WIDTH/8));	233	x_delta = x_step * (LCD_WIDTH/8);
203	y_step = (y_max - y_min) / LCD_HEIGHT;	234	y_step = (y_max - y_min) / LCD_HEIGHT;
204	y_delta = MUL64(y_step, (LCD_HEIGHT/8));	235	y_delta = y_step * (LCD_HEIGHT/8);
205	step_log2 = MIN(ilog2_fp(x_step), ilog2_fp(y_step));	236	step_log2 = ilog2_fp(MIN(x_step, y_step));
206	max_iter = MAX(10, -15 * step_log2 - 50);	237	max_iter = MAX(15, -15 * step_log2 - 45);
207	}	238	}
208		239
209	void init_mandelbrot_set(void)	240	void init_mandelbrot_set(void)
210	{	241	{
211	#if CONFIG_LCD == LCD_SSD1815 /* Recorder, Ondio. */	242	#if CONFIG_LCD == LCD_SSD1815 /* Recorder, Ondio. */
212	x_min = -38LL<<54; // -2.375<<58	243	x_min = -38L<<22; // -2.375<<26
213	x_max = 15LL<<54; // 0.9375<<58	244	x_max = 15L<<22; // 0.9375<<26
214	#else /* Iriver H1x0 */	245	#else /* Iriver H1x0 */
215	x_min = -36LL<<54; // -2.25<<58	246	x_min = -36L<<22; // -2.25<<26
216	x_max = 12LL<<54; // 0.75<<58	247	x_max = 12L<<22; // 0.75<<26
217	#endif	248	#endif
218	y_min = -19LL<<54; // -1.1875<<58	249	y_min = -19L<<22; // -1.1875<<26
219	y_max = 19LL<<54; // 1.1875<<58	250	y_max = 19L<<22; // 1.1875<<26
220	recalc_parameters();	251	recalc_parameters();
221	}	252	}
222		253
223	void calc_mandelbrot_32(void)	254	void calc_mandelbrot_low_prec(void)
224	{	255	{
225	long start_tick, last_yield;	256	long start_tick, last_yield;
226	unsigned n_iter;	257	unsigned n_iter;
227	long long a64, b64;	258	long a32, b32;
228	long x, x2, y, y2, a, b;	259	short x, x2, y, y2, a, b;
229	int p_x, p_y;	260	int p_x, p_y;
230	int brightness;	261	int brightness;
231		262
232	start_tick = last_yield = *rb->current_tick;	263	start_tick = last_yield = *rb->current_tick;
233		264
234	for (p_x = 0, a64 = x_min; p_x <= px_max; p_x++, a64 += x_step) {	265	for (p_x = 0, a32 = x_min; p_x < px_max; p_x++, a32 += x_step) {
235	if (p_x < px_min)	266	if (p_x < px_min)
236	continue;	267	continue;
237	a = a64 >> 32;	268	a = a32 >> 16;
238	for (p_y = LCD_HEIGHT-1, b64 = y_min; p_y >= py_min; p_y--, b64 += y_step) {	269	for (p_y = LCD_HEIGHT-1, b32 = y_min; p_y >= py_min; p_y--, b32 += y_step) {
239	if (p_y >= py_max)	270	if (p_y >= py_max)
240	continue;	271	continue;
241	b = b64 >> 32;	272	b = b32 >> 16;
242	x = 0;	273	x = a;
243	y = 0;	274	y = b;
244	n_iter = 0;	275	n_iter = 0;
245		276
246	while (++n_iter <= max_iter) {	277	while (++n_iter <= max_iter) {
247	x >>= 13;	278	x2 = MULS16_ASR10(x, x);
248	y >>= 13;	279	y2 = MULS16_ASR10(y, y);
249	x2 = x * x;	280
250	y2 = y * y;	281	if (x2 + y2 > (4<<10)) break;
251		282
252	if (x2 + y2 > (4<<26)) break;	283	y = 2 * MULS16_ASR10(x, y) + b;
253
254	y = 2 * x * y + b;
255	x = x2 - y2 + a;	284	x = x2 - y2 + a;
256	}	285	}
257		286
@@ -261,7 +290,7 @@ void calc_mandelbrot_32(void)
261	} else {	290	} else {
262	brightness = 255 - (32 * (n_iter & 7));	291	brightness = 255 - (32 * (n_iter & 7));
263	}	292	}
264	graybuffer[p_y]=brightness;	293	graybuffer[p_y] = brightness;
265	/* be nice to other threads:	294	/* be nice to other threads:
266	* if at least one tick has passed, yield */	295	* if at least one tick has passed, yield */
267	if (*rb->current_tick > last_yield) {	296	if (*rb->current_tick > last_yield) {
@@ -274,35 +303,34 @@ void calc_mandelbrot_32(void)
274	}	303	}
275	}	304	}
276		305
277	void calc_mandelbrot_64(void)	306	void calc_mandelbrot_high_prec(void)
278	{	307	{
279	long start_tick, last_yield;	308	long start_tick, last_yield;
280	unsigned n_iter;	309	unsigned n_iter;
281	long long x, x2, y, y2, a, b;	310	long x, x2, y, y2, a, b;
282	int p_x, p_y;	311	int p_x, p_y;
283	int brightness;	312	int brightness;
284
285	start_tick = last_yield = *rb->current_tick;
286		313
		314	MULS32_INIT();
		315	start_tick = last_yield = *rb->current_tick;
		316
287	for (p_x = 0, a = x_min; p_x < px_max; p_x++, a += x_step) {	317	for (p_x = 0, a = x_min; p_x < px_max; p_x++, a += x_step) {
288	if (p_x < px_min)	318	if (p_x < px_min)
289	continue;	319	continue;
290	for (p_y = LCD_HEIGHT-1, b = y_min; p_y >= py_min; p_y--, b += y_step) {	320	for (p_y = LCD_HEIGHT-1, b = y_min; p_y >= py_min; p_y--, b += y_step) {
291	if (p_y >= py_max)	321	if (p_y >= py_max)
292	continue;	322	continue;
293	x = 0;	323	x = a;
294	y = 0;	324	y = b;
295	n_iter = 0;	325	n_iter = 0;
296		326
297	while (++n_iter<=max_iter) {	327	while (++n_iter <= max_iter) {
298	x >>= 29;	328	x2 = MULS32_ASR26(x, x);
299	y >>= 29;	329	y2 = MULS32_ASR26(y, y);
300	x2 = MUL64(x, x);	330
301	y2 = MUL64(y, y);	331	if (x2 + y2 > (4L<<26)) break;
302
303	if (x2 + y2 > (4LL<<58)) break;
304		332
305	y = 2 * MUL64(x, y) + b;	333	y = 2 * MULS32_ASR26(x, y) + b;
306	x = x2 - y2 + a;	334	x = x2 - y2 + a;
307	}	335	}
308		336
@@ -312,10 +340,10 @@ void calc_mandelbrot_64(void)
312	} else {	340	} else {
313	brightness = 255 - (32 * (n_iter & 7));	341	brightness = 255 - (32 * (n_iter & 7));
314	}	342	}
315	graybuffer[p_y]=brightness;	343	graybuffer[p_y] = brightness;
316	/* be nice to other threads:	344	/* be nice to other threads:
317	* if at least one tick has passed, yield */	345	* if at least one tick has passed, yield */
318	if (*rb->current_tick > last_yield){	346	if (*rb->current_tick > last_yield) {
319	rb->yield();	347	rb->yield();
320	last_yield = *rb->current_tick;	348	last_yield = *rb->current_tick;
321	}	349	}
@@ -375,10 +403,10 @@ enum plugin_status plugin_start(struct plugin_api* api, void* parameter)
375	if (redraw == REDRAW_FULL)	403	if (redraw == REDRAW_FULL)
376	gray_ub_clear_display();	404	gray_ub_clear_display();
377		405
378	if (step_log2 <= -13) /* select precision */	406	if (step_log2 <= -10) /* select precision */
379	calc_mandelbrot_64();	407	calc_mandelbrot_high_prec();
380	else	408	else
381	calc_mandelbrot_32();	409	calc_mandelbrot_low_prec();
382		410
383	#if !defined(SIMULATOR) && defined(HAVE_ADJUSTABLE_CPU_FREQ)	411	#if !defined(SIMULATOR) && defined(HAVE_ADJUSTABLE_CPU_FREQ)
384	rb->cpu_boost(false);	412	rb->cpu_boost(false);