diff options
Diffstat (limited to 'apps')
-rw-r--r-- | apps/plugins/mandelbrot.c | 336 |
1 files changed, 182 insertions, 154 deletions
diff --git a/apps/plugins/mandelbrot.c b/apps/plugins/mandelbrot.c index 8a3c14a194..1f80ff395f 100644 --- a/apps/plugins/mandelbrot.c +++ b/apps/plugins/mandelbrot.c | |||
@@ -55,15 +55,15 @@ | |||
55 | static struct plugin_api* rb; | 55 | static struct plugin_api* rb; |
56 | static char buff[32]; | 56 | static char buff[32]; |
57 | 57 | ||
58 | /* Fixed point format: 6 bits integer part incl. sign, 58 bits fractional part */ | 58 | /* Fixed point format: 6 bits integer part incl. sign, 26 bits fractional part */ |
59 | static long long x_min; | 59 | static long x_min; |
60 | static long long x_max; | 60 | static long x_max; |
61 | static long long x_step; | 61 | static long x_step; |
62 | static long long x_delta; | 62 | static long x_delta; |
63 | static long long y_min; | 63 | static long y_min; |
64 | static long long y_max; | 64 | static long y_max; |
65 | static long long y_step; | 65 | static long y_step; |
66 | static long long y_delta; | 66 | static long y_delta; |
67 | 67 | ||
68 | static int px_min = 0; | 68 | static int px_min = 0; |
69 | static int px_max = LCD_WIDTH; | 69 | static int px_max = LCD_WIDTH; |
@@ -78,117 +78,148 @@ static unsigned int gbuf_size = 0; | |||
78 | static unsigned char graybuffer[LCD_HEIGHT]; | 78 | static unsigned char graybuffer[LCD_HEIGHT]; |
79 | 79 | ||
80 | #if CONFIG_CPU == SH7034 | 80 | #if CONFIG_CPU == SH7034 |
81 | long long mul64(long long f1, long long f2); | 81 | |
82 | 82 | #define MULS16_ASR10(a, b) muls16_asr10(a, b) | |
83 | asm ( | 83 | static inline short muls16_asr10(short a, short b) |
84 | /* 64bit * 64bit -> 64bit multiplication. Works for both signed and unsigned */ | 84 | { |
85 | ".global _mul64 \n" /* Notation: abcd * efgh, where each letter */ | 85 | short r; |
86 | ".type _mul64,@function\n" /* represents 16 bits. Called with: */ | 86 | asm ( |
87 | "_mul64: \n" /* r4 = ab, r5 = cd, r6 = ef, r7 = gh */ | 87 | "muls %[a],%[b] \n" |
88 | "swap.w r4,r2 \n" /* r2 = ba */ | 88 | "sts macl,%[r] \n" |
89 | "mulu r2,r7 \n" /* a * h */ | 89 | "shlr8 %[r] \n" |
90 | "swap.w r6,r3 \n" /* r3 = fe */ | 90 | "shlr2 %[r] \n" |
91 | "sts macl,r0 \n" /* r0 = a * h */ | 91 | : /* outputs */ |
92 | "mulu r5,r3 \n" /* d * e */ | 92 | [r]"=r"(r) |
93 | "swap.w r7,r3 \n" /* r3 = hg */ | 93 | : /* inputs */ |
94 | "sts macl,r1 \n" /* r1 = d * e */ | 94 | [a]"r"(a), |
95 | "mulu r4,r3 \n" /* b * g */ | 95 | [b]"r"(b) |
96 | "add r1,r0 \n" /* r0 += r1 */ | 96 | ); |
97 | "swap.w r5,r2 \n" /* r2 = dc */ | 97 | return r; |
98 | "sts macl,r1 \n" /* r1 = b * g */ | 98 | } |
99 | "mulu r2,r6 \n" /* c * f */ | 99 | |
100 | "add r1,r0 \n" /* r0 += r1 */ | 100 | #define MULS32_ASR26(a, b) muls32_asr26(a, b) |
101 | "sts macl,r1 \n" /* r1 = c * f */ | 101 | static inline long muls32_asr26(long a, long b) |
102 | "mulu r4,r7 \n" /* b * h */ | 102 | { |
103 | "add r1,r0 \n" /* r0 += r1 */ | 103 | long r, t1, t2, t3; |
104 | "shll16 r0 \n" /* r0 <<= 16 */ | 104 | asm ( |
105 | "sts macl,r1 \n" /* r1 = b * h */ | 105 | /* Signed 32bit * 32bit -> 64bit multiplication. |
106 | "mulu r2,r3 \n" /* c * g */ | 106 | Notation: xxab * xxcd, where each letter represents 16 bits. |
107 | "add r1,r0 \n" /* r0 += r1 */ | 107 | xx is the 64 bit sign extension. */ |
108 | "sts macl,r1 \n" /* r1 = c * g */ | 108 | "swap.w %[a],%[t1] \n" /* t1 = ba */ |
109 | "mulu r5,r6 \n" /* d * f */ | 109 | "mulu %[t1],%[b] \n" /* a * d */ |
110 | "add r1,r0 \n" /* r0 += r1 */ | 110 | "swap.w %[b],%[t3] \n" /* t3 = dc */ |
111 | "sts macl,r1 \n" /* r1 = d * f */ | 111 | "sts macl,%[t2] \n" /* t2 = a * d */ |
112 | "mulu r2,r7 \n" /* c * h */ | 112 | "mulu %[t1],%[t3] \n" /* a * c */ |
113 | "add r1,r0 \n" /* r0 += r1 */ | 113 | "sts macl,%[r] \n" /* hi = a * c */ |
114 | "sts macl,r2 \n" /* r2 = c * h */ | 114 | "mulu %[a],%[t3] \n" /* b * c */ |
115 | "mulu r5,r3 \n" /* d * g */ | 115 | "clrt \n" |
116 | "clrt \n" | 116 | "sts macl,%[t3] \n" /* t3 = b * c */ |
117 | "sts macl,r3 \n" /* r3 = d * g */ | 117 | "addc %[t2],%[t3] \n" /* t3 += t2, carry -> t2 */ |
118 | "addc r2,r3 \n" /* r3 += r2, carry->r2 */ | 118 | "movt %[t2] \n" |
119 | "movt r2 \n" | 119 | "mulu %[a],%[b] \n" /* b * d */ |
120 | "mulu r5,r7 \n" /* d * h */ | 120 | "mov %[t3],%[t1] \n" /* t2t3 <<= 16 */ |
121 | "mov r3,r1 \n" /* r2r3 <<= 16 */ | 121 | "xtrct %[t2],%[t1] \n" |
122 | "xtrct r2,r1 \n" | 122 | "mov %[t1],%[t2] \n" |
123 | "mov r1,r2 \n" | 123 | "shll16 %[t3] \n" |
124 | "shll16 r3 \n" | 124 | "sts macl,%[t1] \n" /* lo = b * d */ |
125 | "sts macl,r1 \n" /* r1 = d * h */ | 125 | "clrt \n" /* hi.lo += t2t3 */ |
126 | "clrt \n" /* r0r1 += r2r3 */ | 126 | "addc %[t3],%[t1] \n" |
127 | "addc r3,r1 \n" | 127 | "addc %[t2],%[r] \n" |
128 | "rts \n" | 128 | "cmp/pz %[a] \n" /* ab >= 0 ? */ |
129 | "addc r2,r0 \n" | 129 | "bt 1f \n" |
130 | ); | 130 | "sub %[b],%[r] \n" /* no: hi -= cd (sign extension of ab is -1) */ |
131 | #define MUL64(a, b) mul64(a, b) | 131 | "1: \n" |
132 | "cmp/pz %[b] \n" /* cd >= 0 ? */ | ||
133 | "bt 2f \n" | ||
134 | "sub %[a],%[r] \n" /* no: hi -= ab (sign extension of cd is -1) */ | ||
135 | "2: \n" | ||
136 | /* Shift right by 26 and return low 32 bits */ | ||
137 | "shll2 %[r] \n" /* hi <<= 6 */ | ||
138 | "shll2 %[r] \n" | ||
139 | "shll2 %[r] \n" | ||
140 | "shlr16 %[t1] \n" /* (unsigned)lo >>= 26 */ | ||
141 | "shlr8 %[t1] \n" | ||
142 | "shlr2 %[t1] \n" | ||
143 | "or %[t1],%[r] \n" /* combine result */ | ||
144 | : /* outputs */ | ||
145 | [r] "=&r"(r), | ||
146 | [t1]"=&r"(t1), | ||
147 | [t2]"=&r"(t2), | ||
148 | [t3]"=&r"(t3) | ||
149 | : /* inputs */ | ||
150 | [a] "r" (a), | ||
151 | [b] "r" (b) | ||
152 | ); | ||
153 | return r; | ||
154 | } | ||
132 | 155 | ||
133 | #elif defined CPU_COLDFIRE | 156 | #elif defined CPU_COLDFIRE |
134 | long long mul64(long long f1, long long f2); | 157 | |
135 | 158 | #define MULS16_ASR10(a, b) muls16_asr10(a, b) | |
136 | asm ( | 159 | static inline short muls16_asr10(short a, short b) |
137 | /* 64bit * 64bit -> 64bit multiplication. Works for both signed and unsigned */ | 160 | { |
138 | ".section .text,\"ax\",@progbits\n" | 161 | asm ( |
139 | ".global mul64 \n" /* Notation: abcd * efgh, where each letter */ | 162 | "muls.w %[a],%[b] \n" |
140 | ".type mul64,@function\n" /* represents 16 bits. */ | 163 | "asr.l #8,%[b] \n" |
141 | "mul64: \n" | 164 | "asr.l #2,%[b] \n" |
142 | "lea.l (-16,%sp),%sp \n" | 165 | : /* outputs */ |
143 | "movem.l %d2-%d5,(%sp) \n" | 166 | [b]"+d"(b) |
144 | 167 | : /* inputs */ | |
145 | "movem.l (20,%sp),%d0-%d3\n" /* %d0%d1 = abcd, %d2%d3 = efgh */ | 168 | [a]"d" (a) |
146 | "mulu.l %d3,%d0 \n" /* %d0 = ab * gh */ | 169 | ); |
147 | "mulu.l %d1,%d2 \n" /* %d2 = cd * ef */ | 170 | return b; |
148 | "add.l %d2,%d0 \n" /* %d0 += %d2 */ | 171 | } |
149 | "move.l %d1,%d4 \n" | 172 | |
150 | "swap %d4 \n" /* %d4 = dc */ | 173 | /* Needs the EMAC initialised to fractional mode w/o rounding and saturation */ |
151 | "move.l %d3,%d5 \n" | 174 | #define MULS32_INIT() coldfire_set_macsr(EMAC_FRACTIONAL) |
152 | "swap %d5 \n" /* %d5 = hg */ | 175 | #define MULS32_ASR26(a, b) muls32_asr26(a, b) |
153 | "move.l %d4,%d2 \n" | 176 | static inline long muls32_asr26(long a, long b) |
154 | "mulu.w %d5,%d2 \n" /* %d2 = c * g */ | 177 | { |
155 | "add.l %d2,%d0 \n" /* %d0 += %d2 */ | 178 | long r, t1; |
156 | "mulu.w %d3,%d4 \n" /* %d4 = c * h */ | 179 | asm ( |
157 | "mulu.w %d1,%d5 \n" /* %d5 = d * h */ | 180 | "mac.l %[a],%[b],%%acc0\n" /* multiply */ |
158 | "add.l %d4,%d5 \n" /* %d5 += %d4 */ | 181 | "mulu.l %[a],%[b] \n" /* get lower half */ |
159 | "subx.l %d4,%d4 \n" | 182 | "movclr.l %%acc0,%[r] \n" /* get higher half */ |
160 | "neg.l %d4 \n" /* carry -> %d4 */ | 183 | "asl.l #5,%[r] \n" /* hi <<= 5, plus one free */ |
161 | "swap %d4 \n" | 184 | "moveq.l #26,%[t1] \n" |
162 | "swap %d5 \n" | 185 | "lsr.l %[t1],%[b] \n" /* (unsigned)lo >>= 26 */ |
163 | "move.w %d5,%d4 \n" | 186 | "or.l %[b],%[r] \n" /* combine result */ |
164 | "clr.w %d5 \n" /* %d4%d5 <<= 16 */ | 187 | : /* outputs */ |
165 | "mulu.w %d3,%d1 \n" /* %d1 = d * h */ | 188 | [r]"=&d"(r), |
166 | "add.l %d5,%d1 \n" | 189 | [t1]"=&d"(t1), |
167 | "addx.l %d4,%d0 \n" /* %d0%d1 += %d4%d5 */ | 190 | [b] "+d" (b) |
168 | 191 | : /* inputs */ | |
169 | "movem.l (%sp),%d2-%d5 \n" | 192 | [a] "d" (a) |
170 | "lea.l (16,%sp),%sp\n" | 193 | ); |
171 | "rts \n" | 194 | return r; |
172 | ); | 195 | } |
173 | #define MUL64(a, b) mul64(a, b) | 196 | |
174 | 197 | #endif /* CPU */ | |
175 | #else | 198 | |
176 | #define MUL64(a, b) ((a)*(b)) | 199 | /* default macros */ |
200 | #ifndef MULS16_ASR10 | ||
201 | #define MULS16_ASR10(a, b) ((short)(((long)(a) * (long)(b)) >> 10)) | ||
202 | #endif | ||
203 | #ifndef MULS32_ASR26 | ||
204 | #define MULS32_ASR26(a, b) ((long)(((long long)(a) * (long long)(b)) >> 26)) | ||
205 | #endif | ||
206 | #ifndef MULS32_INIT | ||
207 | #define MULS32_INIT() | ||
177 | #endif | 208 | #endif |
178 | 209 | ||
179 | int ilog2_fp(long long value) /* calculate integer log2(value_fp_6.58) */ | 210 | int ilog2_fp(long value) /* calculate integer log2(value_fp_6.26) */ |
180 | { | 211 | { |
181 | int i = 0; | 212 | int i = 0; |
182 | 213 | ||
183 | if (value <= 0) { | 214 | if (value <= 0) { |
184 | return -32767; | 215 | return -32767; |
185 | } else if (value > (1LL<<58)) { | 216 | } else if (value > (1L<<26)) { |
186 | while (value >= (2LL<<58)) { | 217 | while (value >= (2L<<26)) { |
187 | value >>= 1; | 218 | value >>= 1; |
188 | i++; | 219 | i++; |
189 | } | 220 | } |
190 | } else { | 221 | } else { |
191 | while (value < (1LL<<58)) { | 222 | while (value < (1L<<26)) { |
192 | value <<= 1; | 223 | value <<= 1; |
193 | i--; | 224 | i--; |
194 | } | 225 | } |
@@ -199,59 +230,57 @@ int ilog2_fp(long long value) /* calculate integer log2(value_fp_6.58) */ | |||
199 | void recalc_parameters(void) | 230 | void recalc_parameters(void) |
200 | { | 231 | { |
201 | x_step = (x_max - x_min) / LCD_WIDTH; | 232 | x_step = (x_max - x_min) / LCD_WIDTH; |
202 | x_delta = MUL64(x_step, (LCD_WIDTH/8)); | 233 | x_delta = x_step * (LCD_WIDTH/8); |
203 | y_step = (y_max - y_min) / LCD_HEIGHT; | 234 | y_step = (y_max - y_min) / LCD_HEIGHT; |
204 | y_delta = MUL64(y_step, (LCD_HEIGHT/8)); | 235 | y_delta = y_step * (LCD_HEIGHT/8); |
205 | step_log2 = MIN(ilog2_fp(x_step), ilog2_fp(y_step)); | 236 | step_log2 = ilog2_fp(MIN(x_step, y_step)); |
206 | max_iter = MAX(10, -15 * step_log2 - 50); | 237 | max_iter = MAX(15, -15 * step_log2 - 45); |
207 | } | 238 | } |
208 | 239 | ||
209 | void init_mandelbrot_set(void) | 240 | void init_mandelbrot_set(void) |
210 | { | 241 | { |
211 | #if CONFIG_LCD == LCD_SSD1815 /* Recorder, Ondio. */ | 242 | #if CONFIG_LCD == LCD_SSD1815 /* Recorder, Ondio. */ |
212 | x_min = -38LL<<54; // -2.375<<58 | 243 | x_min = -38L<<22; // -2.375<<26 |
213 | x_max = 15LL<<54; // 0.9375<<58 | 244 | x_max = 15L<<22; // 0.9375<<26 |
214 | #else /* Iriver H1x0 */ | 245 | #else /* Iriver H1x0 */ |
215 | x_min = -36LL<<54; // -2.25<<58 | 246 | x_min = -36L<<22; // -2.25<<26 |
216 | x_max = 12LL<<54; // 0.75<<58 | 247 | x_max = 12L<<22; // 0.75<<26 |
217 | #endif | 248 | #endif |
218 | y_min = -19LL<<54; // -1.1875<<58 | 249 | y_min = -19L<<22; // -1.1875<<26 |
219 | y_max = 19LL<<54; // 1.1875<<58 | 250 | y_max = 19L<<22; // 1.1875<<26 |
220 | recalc_parameters(); | 251 | recalc_parameters(); |
221 | } | 252 | } |
222 | 253 | ||
223 | void calc_mandelbrot_32(void) | 254 | void calc_mandelbrot_low_prec(void) |
224 | { | 255 | { |
225 | long start_tick, last_yield; | 256 | long start_tick, last_yield; |
226 | unsigned n_iter; | 257 | unsigned n_iter; |
227 | long long a64, b64; | 258 | long a32, b32; |
228 | long x, x2, y, y2, a, b; | 259 | short x, x2, y, y2, a, b; |
229 | int p_x, p_y; | 260 | int p_x, p_y; |
230 | int brightness; | 261 | int brightness; |
231 | 262 | ||
232 | start_tick = last_yield = *rb->current_tick; | 263 | start_tick = last_yield = *rb->current_tick; |
233 | 264 | ||
234 | for (p_x = 0, a64 = x_min; p_x <= px_max; p_x++, a64 += x_step) { | 265 | for (p_x = 0, a32 = x_min; p_x < px_max; p_x++, a32 += x_step) { |
235 | if (p_x < px_min) | 266 | if (p_x < px_min) |
236 | continue; | 267 | continue; |
237 | a = a64 >> 32; | 268 | a = a32 >> 16; |
238 | for (p_y = LCD_HEIGHT-1, b64 = y_min; p_y >= py_min; p_y--, b64 += y_step) { | 269 | for (p_y = LCD_HEIGHT-1, b32 = y_min; p_y >= py_min; p_y--, b32 += y_step) { |
239 | if (p_y >= py_max) | 270 | if (p_y >= py_max) |
240 | continue; | 271 | continue; |
241 | b = b64 >> 32; | 272 | b = b32 >> 16; |
242 | x = 0; | 273 | x = a; |
243 | y = 0; | 274 | y = b; |
244 | n_iter = 0; | 275 | n_iter = 0; |
245 | 276 | ||
246 | while (++n_iter <= max_iter) { | 277 | while (++n_iter <= max_iter) { |
247 | x >>= 13; | 278 | x2 = MULS16_ASR10(x, x); |
248 | y >>= 13; | 279 | y2 = MULS16_ASR10(y, y); |
249 | x2 = x * x; | 280 | |
250 | y2 = y * y; | 281 | if (x2 + y2 > (4<<10)) break; |
251 | 282 | ||
252 | if (x2 + y2 > (4<<26)) break; | 283 | y = 2 * MULS16_ASR10(x, y) + b; |
253 | |||
254 | y = 2 * x * y + b; | ||
255 | x = x2 - y2 + a; | 284 | x = x2 - y2 + a; |
256 | } | 285 | } |
257 | 286 | ||
@@ -261,7 +290,7 @@ void calc_mandelbrot_32(void) | |||
261 | } else { | 290 | } else { |
262 | brightness = 255 - (32 * (n_iter & 7)); | 291 | brightness = 255 - (32 * (n_iter & 7)); |
263 | } | 292 | } |
264 | graybuffer[p_y]=brightness; | 293 | graybuffer[p_y] = brightness; |
265 | /* be nice to other threads: | 294 | /* be nice to other threads: |
266 | * if at least one tick has passed, yield */ | 295 | * if at least one tick has passed, yield */ |
267 | if (*rb->current_tick > last_yield) { | 296 | if (*rb->current_tick > last_yield) { |
@@ -274,35 +303,34 @@ void calc_mandelbrot_32(void) | |||
274 | } | 303 | } |
275 | } | 304 | } |
276 | 305 | ||
277 | void calc_mandelbrot_64(void) | 306 | void calc_mandelbrot_high_prec(void) |
278 | { | 307 | { |
279 | long start_tick, last_yield; | 308 | long start_tick, last_yield; |
280 | unsigned n_iter; | 309 | unsigned n_iter; |
281 | long long x, x2, y, y2, a, b; | 310 | long x, x2, y, y2, a, b; |
282 | int p_x, p_y; | 311 | int p_x, p_y; |
283 | int brightness; | 312 | int brightness; |
284 | |||
285 | start_tick = last_yield = *rb->current_tick; | ||
286 | 313 | ||
314 | MULS32_INIT(); | ||
315 | start_tick = last_yield = *rb->current_tick; | ||
316 | |||
287 | for (p_x = 0, a = x_min; p_x < px_max; p_x++, a += x_step) { | 317 | for (p_x = 0, a = x_min; p_x < px_max; p_x++, a += x_step) { |
288 | if (p_x < px_min) | 318 | if (p_x < px_min) |
289 | continue; | 319 | continue; |
290 | for (p_y = LCD_HEIGHT-1, b = y_min; p_y >= py_min; p_y--, b += y_step) { | 320 | for (p_y = LCD_HEIGHT-1, b = y_min; p_y >= py_min; p_y--, b += y_step) { |
291 | if (p_y >= py_max) | 321 | if (p_y >= py_max) |
292 | continue; | 322 | continue; |
293 | x = 0; | 323 | x = a; |
294 | y = 0; | 324 | y = b; |
295 | n_iter = 0; | 325 | n_iter = 0; |
296 | 326 | ||
297 | while (++n_iter<=max_iter) { | 327 | while (++n_iter <= max_iter) { |
298 | x >>= 29; | 328 | x2 = MULS32_ASR26(x, x); |
299 | y >>= 29; | 329 | y2 = MULS32_ASR26(y, y); |
300 | x2 = MUL64(x, x); | 330 | |
301 | y2 = MUL64(y, y); | 331 | if (x2 + y2 > (4L<<26)) break; |
302 | |||
303 | if (x2 + y2 > (4LL<<58)) break; | ||
304 | 332 | ||
305 | y = 2 * MUL64(x, y) + b; | 333 | y = 2 * MULS32_ASR26(x, y) + b; |
306 | x = x2 - y2 + a; | 334 | x = x2 - y2 + a; |
307 | } | 335 | } |
308 | 336 | ||
@@ -312,10 +340,10 @@ void calc_mandelbrot_64(void) | |||
312 | } else { | 340 | } else { |
313 | brightness = 255 - (32 * (n_iter & 7)); | 341 | brightness = 255 - (32 * (n_iter & 7)); |
314 | } | 342 | } |
315 | graybuffer[p_y]=brightness; | 343 | graybuffer[p_y] = brightness; |
316 | /* be nice to other threads: | 344 | /* be nice to other threads: |
317 | * if at least one tick has passed, yield */ | 345 | * if at least one tick has passed, yield */ |
318 | if (*rb->current_tick > last_yield){ | 346 | if (*rb->current_tick > last_yield) { |
319 | rb->yield(); | 347 | rb->yield(); |
320 | last_yield = *rb->current_tick; | 348 | last_yield = *rb->current_tick; |
321 | } | 349 | } |
@@ -375,10 +403,10 @@ enum plugin_status plugin_start(struct plugin_api* api, void* parameter) | |||
375 | if (redraw == REDRAW_FULL) | 403 | if (redraw == REDRAW_FULL) |
376 | gray_ub_clear_display(); | 404 | gray_ub_clear_display(); |
377 | 405 | ||
378 | if (step_log2 <= -13) /* select precision */ | 406 | if (step_log2 <= -10) /* select precision */ |
379 | calc_mandelbrot_64(); | 407 | calc_mandelbrot_high_prec(); |
380 | else | 408 | else |
381 | calc_mandelbrot_32(); | 409 | calc_mandelbrot_low_prec(); |
382 | 410 | ||
383 | #if !defined(SIMULATOR) && defined(HAVE_ADJUSTABLE_CPU_FREQ) | 411 | #if !defined(SIMULATOR) && defined(HAVE_ADJUSTABLE_CPU_FREQ) |
384 | rb->cpu_boost(false); | 412 | rb->cpu_boost(false); |