diff options
author | Tomer Shalev <shalev.tomer@gmail.com> | 2010-01-17 22:03:36 +0000 |
---|---|---|
committer | Tomer Shalev <shalev.tomer@gmail.com> | 2010-01-17 22:03:36 +0000 |
commit | 563f2602f471208cb8544a36539a79dcceaad643 (patch) | |
tree | 0d78bda3fcd3ee085b2be88b9eba9255e22f4e6c /apps/plugins | |
parent | d8123629058c534835058f2db94ba1c2636408e2 (diff) | |
download | rockbox-563f2602f471208cb8544a36539a79dcceaad643.tar.gz rockbox-563f2602f471208cb8544a36539a79dcceaad643.zip |
Fractals: Have helper functions in header file to keep them inlined
- Should fix performance degradation caused because of the split
- Thanks for all who noticed (amiconn et al.)
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24266 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/plugins')
-rw-r--r-- | apps/plugins/fractals/SOURCES | 7 | ||||
-rw-r--r-- | apps/plugins/fractals/cpu_arm.c | 40 | ||||
-rw-r--r-- | apps/plugins/fractals/cpu_arm.h | 17 | ||||
-rw-r--r-- | apps/plugins/fractals/cpu_coldfire.c | 58 | ||||
-rw-r--r-- | apps/plugins/fractals/cpu_coldfire.h | 35 | ||||
-rw-r--r-- | apps/plugins/fractals/cpu_sh7043.c | 94 | ||||
-rw-r--r-- | apps/plugins/fractals/cpu_sh7043.h | 71 |
7 files changed, 118 insertions, 204 deletions
diff --git a/apps/plugins/fractals/SOURCES b/apps/plugins/fractals/SOURCES index 72a22467c2..a6ef819266 100644 --- a/apps/plugins/fractals/SOURCES +++ b/apps/plugins/fractals/SOURCES | |||
@@ -1,10 +1,3 @@ | |||
1 | fractal.c | 1 | fractal.c |
2 | fractal_rect.c | 2 | fractal_rect.c |
3 | mandelbrot_set.c | 3 | mandelbrot_set.c |
4 | #if CONFIG_CPU == SH7034 | ||
5 | cpu_sh7043.c | ||
6 | #elif defined CPU_COLDFIRE | ||
7 | cpu_coldfire.c | ||
8 | #elif defined CPU_ARM | ||
9 | cpu_arm.c | ||
10 | #endif | ||
diff --git a/apps/plugins/fractals/cpu_arm.c b/apps/plugins/fractals/cpu_arm.c deleted file mode 100644 index 1d8b1a76f6..0000000000 --- a/apps/plugins/fractals/cpu_arm.c +++ /dev/null | |||
@@ -1,40 +0,0 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2009 Tomer Shalev | ||
11 | * | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or | ||
14 | * modify it under the terms of the GNU General Public License | ||
15 | * as published by the Free Software Foundation; either version 2 | ||
16 | * of the License, or (at your option) any later version. | ||
17 | * | ||
18 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
19 | * KIND, either express or implied. | ||
20 | * | ||
21 | ****************************************************************************/ | ||
22 | #include "cpu_arm.h" | ||
23 | |||
24 | inline long muls32_asr26(long a, long b) | ||
25 | { | ||
26 | long r, t1; | ||
27 | asm ( | ||
28 | "smull %[r], %[t1], %[a], %[b] \n" | ||
29 | "mov %[r], %[r], lsr #26 \n" | ||
30 | "orr %[r], %[r], %[t1], lsl #6 \n" | ||
31 | : /* outputs */ | ||
32 | [r] "=&r,&r,&r"(r), | ||
33 | [t1]"=&r,&r,&r"(t1) | ||
34 | : /* inputs */ | ||
35 | [a] "%r,%r,%r" (a), | ||
36 | [b] "r,0,1" (b) | ||
37 | ); | ||
38 | return r; | ||
39 | } | ||
40 | |||
diff --git a/apps/plugins/fractals/cpu_arm.h b/apps/plugins/fractals/cpu_arm.h index 51a3718f66..ea4595b7d2 100644 --- a/apps/plugins/fractals/cpu_arm.h +++ b/apps/plugins/fractals/cpu_arm.h | |||
@@ -22,6 +22,21 @@ | |||
22 | #ifndef _CPU_ARM_H | 22 | #ifndef _CPU_ARM_H |
23 | #define _CPU_ARM_H | 23 | #define _CPU_ARM_H |
24 | 24 | ||
25 | inline long muls32_asr26(long a, long b); | 25 | inline static long muls32_asr26(long a, long b) |
26 | { | ||
27 | long r, t1; | ||
28 | asm ( | ||
29 | "smull %[r], %[t1], %[a], %[b] \n" | ||
30 | "mov %[r], %[r], lsr #26 \n" | ||
31 | "orr %[r], %[r], %[t1], lsl #6 \n" | ||
32 | : /* outputs */ | ||
33 | [r] "=&r,&r,&r"(r), | ||
34 | [t1]"=&r,&r,&r"(t1) | ||
35 | : /* inputs */ | ||
36 | [a] "%r,%r,%r" (a), | ||
37 | [b] "r,0,1" (b) | ||
38 | ); | ||
39 | return r; | ||
40 | } | ||
26 | 41 | ||
27 | #endif | 42 | #endif |
diff --git a/apps/plugins/fractals/cpu_coldfire.c b/apps/plugins/fractals/cpu_coldfire.c deleted file mode 100644 index a005a3141e..0000000000 --- a/apps/plugins/fractals/cpu_coldfire.c +++ /dev/null | |||
@@ -1,58 +0,0 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2009 Tomer Shalev | ||
11 | * | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or | ||
14 | * modify it under the terms of the GNU General Public License | ||
15 | * as published by the Free Software Foundation; either version 2 | ||
16 | * of the License, or (at your option) any later version. | ||
17 | * | ||
18 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
19 | * KIND, either express or implied. | ||
20 | * | ||
21 | ****************************************************************************/ | ||
22 | #include "cpu_coldfire.h" | ||
23 | |||
24 | inline short muls16_asr10(short a, short b) | ||
25 | { | ||
26 | asm ( | ||
27 | "muls.w %[a],%[b] \n" | ||
28 | "asr.l #8,%[b] \n" | ||
29 | "asr.l #2,%[b] \n" | ||
30 | : /* outputs */ | ||
31 | [b]"+d"(b) | ||
32 | : /* inputs */ | ||
33 | [a]"d" (a) | ||
34 | ); | ||
35 | return b; | ||
36 | } | ||
37 | |||
38 | inline long muls32_asr26(long a, long b) | ||
39 | { | ||
40 | long r, t1; | ||
41 | asm ( | ||
42 | "mac.l %[a], %[b], %%acc0 \n" /* multiply */ | ||
43 | "move.l %%accext01, %[t1] \n" /* get low part */ | ||
44 | "movclr.l %%acc0, %[r] \n" /* get high part */ | ||
45 | "asl.l #5, %[r] \n" /* hi <<= 5, plus one free */ | ||
46 | "lsr.l #3, %[t1] \n" /* lo >>= 3 */ | ||
47 | "and.l #0x1f, %[t1] \n" /* mask out unrelated bits */ | ||
48 | "or.l %[t1], %[r] \n" /* combine result */ | ||
49 | : /* outputs */ | ||
50 | [r] "=d"(r), | ||
51 | [t1]"=d"(t1) | ||
52 | : /* inputs */ | ||
53 | [a] "d" (a), | ||
54 | [b] "d" (b) | ||
55 | ); | ||
56 | return r; | ||
57 | } | ||
58 | |||
diff --git a/apps/plugins/fractals/cpu_coldfire.h b/apps/plugins/fractals/cpu_coldfire.h index 452f18e2c5..9be207ee2f 100644 --- a/apps/plugins/fractals/cpu_coldfire.h +++ b/apps/plugins/fractals/cpu_coldfire.h | |||
@@ -22,8 +22,39 @@ | |||
22 | #ifndef _CPU_COLDFIRE_H | 22 | #ifndef _CPU_COLDFIRE_H |
23 | #define _CPU_COLDFIRE_H | 23 | #define _CPU_COLDFIRE_H |
24 | 24 | ||
25 | inline short muls16_asr10(short a, short b); | 25 | inline static short muls16_asr10(short a, short b) |
26 | { | ||
27 | asm ( | ||
28 | "muls.w %[a],%[b] \n" | ||
29 | "asr.l #8,%[b] \n" | ||
30 | "asr.l #2,%[b] \n" | ||
31 | : /* outputs */ | ||
32 | [b]"+d"(b) | ||
33 | : /* inputs */ | ||
34 | [a]"d" (a) | ||
35 | ); | ||
36 | return b; | ||
37 | } | ||
26 | 38 | ||
27 | inline long muls32_asr26(long a, long b); | 39 | inline static long muls32_asr26(long a, long b) |
40 | { | ||
41 | long r, t1; | ||
42 | asm ( | ||
43 | "mac.l %[a], %[b], %%acc0 \n" /* multiply */ | ||
44 | "move.l %%accext01, %[t1] \n" /* get low part */ | ||
45 | "movclr.l %%acc0, %[r] \n" /* get high part */ | ||
46 | "asl.l #5, %[r] \n" /* hi <<= 5, plus one free */ | ||
47 | "lsr.l #3, %[t1] \n" /* lo >>= 3 */ | ||
48 | "and.l #0x1f, %[t1] \n" /* mask out unrelated bits */ | ||
49 | "or.l %[t1], %[r] \n" /* combine result */ | ||
50 | : /* outputs */ | ||
51 | [r] "=d"(r), | ||
52 | [t1]"=d"(t1) | ||
53 | : /* inputs */ | ||
54 | [a] "d" (a), | ||
55 | [b] "d" (b) | ||
56 | ); | ||
57 | return r; | ||
58 | } | ||
28 | 59 | ||
29 | #endif | 60 | #endif |
diff --git a/apps/plugins/fractals/cpu_sh7043.c b/apps/plugins/fractals/cpu_sh7043.c deleted file mode 100644 index 1bce37d654..0000000000 --- a/apps/plugins/fractals/cpu_sh7043.c +++ /dev/null | |||
@@ -1,94 +0,0 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2009 Tomer Shalev | ||
11 | * | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or | ||
14 | * modify it under the terms of the GNU General Public License | ||
15 | * as published by the Free Software Foundation; either version 2 | ||
16 | * of the License, or (at your option) any later version. | ||
17 | * | ||
18 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
19 | * KIND, either express or implied. | ||
20 | * | ||
21 | ****************************************************************************/ | ||
22 | #include "cpu_sh7043.h" | ||
23 | |||
24 | inline short muls16_asr10(short a, short b) | ||
25 | { | ||
26 | short r; | ||
27 | asm ( | ||
28 | "muls %[a],%[b] \n" | ||
29 | "sts macl,%[r] \n" | ||
30 | "shlr8 %[r] \n" | ||
31 | "shlr2 %[r] \n" | ||
32 | : /* outputs */ | ||
33 | [r]"=r"(r) | ||
34 | : /* inputs */ | ||
35 | [a]"r"(a), | ||
36 | [b]"r"(b) | ||
37 | ); | ||
38 | return r; | ||
39 | } | ||
40 | |||
41 | inline long muls32_asr26(long a, long b) | ||
42 | { | ||
43 | long r, t1, t2, t3; | ||
44 | asm ( | ||
45 | /* Signed 32bit * 32bit -> 64bit multiplication. | ||
46 | Notation: xxab * xxcd, where each letter represents 16 bits. | ||
47 | xx is the 64 bit sign extension. */ | ||
48 | "swap.w %[a],%[t1] \n" /* t1 = ba */ | ||
49 | "mulu %[t1],%[b] \n" /* a * d */ | ||
50 | "swap.w %[b],%[t3] \n" /* t3 = dc */ | ||
51 | "sts macl,%[t2] \n" /* t2 = a * d */ | ||
52 | "mulu %[t1],%[t3] \n" /* a * c */ | ||
53 | "sts macl,%[r] \n" /* hi = a * c */ | ||
54 | "mulu %[a],%[t3] \n" /* b * c */ | ||
55 | "clrt \n" | ||
56 | "sts macl,%[t3] \n" /* t3 = b * c */ | ||
57 | "addc %[t2],%[t3] \n" /* t3 += t2, carry -> t2 */ | ||
58 | "movt %[t2] \n" | ||
59 | "mulu %[a],%[b] \n" /* b * d */ | ||
60 | "mov %[t3],%[t1] \n" /* t1t3 = t2t3 << 16 */ | ||
61 | "xtrct %[t2],%[t1] \n" | ||
62 | "shll16 %[t3] \n" | ||
63 | "sts macl,%[t2] \n" /* lo = b * d */ | ||
64 | "clrt \n" /* hi.lo += t1t3 */ | ||
65 | "addc %[t3],%[t2] \n" | ||
66 | "addc %[t1],%[r] \n" | ||
67 | "cmp/pz %[a] \n" /* ab >= 0 ? */ | ||
68 | "bt 1f \n" | ||
69 | "sub %[b],%[r] \n" /* no: hi -= cd (sign extension of ab is -1) */ | ||
70 | "1: \n" | ||
71 | "cmp/pz %[b] \n" /* cd >= 0 ? */ | ||
72 | "bt 2f \n" | ||
73 | "sub %[a],%[r] \n" /* no: hi -= ab (sign extension of cd is -1) */ | ||
74 | "2: \n" | ||
75 | /* Shift right by 26 and return low 32 bits */ | ||
76 | "shll2 %[r] \n" /* hi <<= 6 */ | ||
77 | "shll2 %[r] \n" | ||
78 | "shll2 %[r] \n" | ||
79 | "shlr16 %[t2] \n" /* (unsigned)lo >>= 26 */ | ||
80 | "shlr8 %[t2] \n" | ||
81 | "shlr2 %[t2] \n" | ||
82 | "or %[t2],%[r] \n" /* combine result */ | ||
83 | : /* outputs */ | ||
84 | [r] "=&r"(r), | ||
85 | [t1]"=&r"(t1), | ||
86 | [t2]"=&r"(t2), | ||
87 | [t3]"=&r"(t3) | ||
88 | : /* inputs */ | ||
89 | [a] "r" (a), | ||
90 | [b] "r" (b) | ||
91 | ); | ||
92 | return r; | ||
93 | } | ||
94 | |||
diff --git a/apps/plugins/fractals/cpu_sh7043.h b/apps/plugins/fractals/cpu_sh7043.h index 4805092377..0d773432a8 100644 --- a/apps/plugins/fractals/cpu_sh7043.h +++ b/apps/plugins/fractals/cpu_sh7043.h | |||
@@ -22,8 +22,75 @@ | |||
22 | #ifndef _CPU_SH7043_H | 22 | #ifndef _CPU_SH7043_H |
23 | #define _CPU_SH7043_H | 23 | #define _CPU_SH7043_H |
24 | 24 | ||
25 | inline short muls16_asr10(short a, short b); | 25 | inline static short muls16_asr10(short a, short b) |
26 | { | ||
27 | short r; | ||
28 | asm ( | ||
29 | "muls %[a],%[b] \n" | ||
30 | "sts macl,%[r] \n" | ||
31 | "shlr8 %[r] \n" | ||
32 | "shlr2 %[r] \n" | ||
33 | : /* outputs */ | ||
34 | [r]"=r"(r) | ||
35 | : /* inputs */ | ||
36 | [a]"r"(a), | ||
37 | [b]"r"(b) | ||
38 | ); | ||
39 | return r; | ||
40 | } | ||
26 | 41 | ||
27 | inline long muls32_asr26(long a, long b); | 42 | inline static long muls32_asr26(long a, long b) |
43 | { | ||
44 | long r, t1, t2, t3; | ||
45 | asm ( | ||
46 | /* Signed 32bit * 32bit -> 64bit multiplication. | ||
47 | Notation: xxab * xxcd, where each letter represents 16 bits. | ||
48 | xx is the 64 bit sign extension. */ | ||
49 | "swap.w %[a],%[t1] \n" /* t1 = ba */ | ||
50 | "mulu %[t1],%[b] \n" /* a * d */ | ||
51 | "swap.w %[b],%[t3] \n" /* t3 = dc */ | ||
52 | "sts macl,%[t2] \n" /* t2 = a * d */ | ||
53 | "mulu %[t1],%[t3] \n" /* a * c */ | ||
54 | "sts macl,%[r] \n" /* hi = a * c */ | ||
55 | "mulu %[a],%[t3] \n" /* b * c */ | ||
56 | "clrt \n" | ||
57 | "sts macl,%[t3] \n" /* t3 = b * c */ | ||
58 | "addc %[t2],%[t3] \n" /* t3 += t2, carry -> t2 */ | ||
59 | "movt %[t2] \n" | ||
60 | "mulu %[a],%[b] \n" /* b * d */ | ||
61 | "mov %[t3],%[t1] \n" /* t1t3 = t2t3 << 16 */ | ||
62 | "xtrct %[t2],%[t1] \n" | ||
63 | "shll16 %[t3] \n" | ||
64 | "sts macl,%[t2] \n" /* lo = b * d */ | ||
65 | "clrt \n" /* hi.lo += t1t3 */ | ||
66 | "addc %[t3],%[t2] \n" | ||
67 | "addc %[t1],%[r] \n" | ||
68 | "cmp/pz %[a] \n" /* ab >= 0 ? */ | ||
69 | "bt 1f \n" | ||
70 | "sub %[b],%[r] \n" /* no: hi -= cd (sign extension of ab is -1) */ | ||
71 | "1: \n" | ||
72 | "cmp/pz %[b] \n" /* cd >= 0 ? */ | ||
73 | "bt 2f \n" | ||
74 | "sub %[a],%[r] \n" /* no: hi -= ab (sign extension of cd is -1) */ | ||
75 | "2: \n" | ||
76 | /* Shift right by 26 and return low 32 bits */ | ||
77 | "shll2 %[r] \n" /* hi <<= 6 */ | ||
78 | "shll2 %[r] \n" | ||
79 | "shll2 %[r] \n" | ||
80 | "shlr16 %[t2] \n" /* (unsigned)lo >>= 26 */ | ||
81 | "shlr8 %[t2] \n" | ||
82 | "shlr2 %[t2] \n" | ||
83 | "or %[t2],%[r] \n" /* combine result */ | ||
84 | : /* outputs */ | ||
85 | [r] "=&r"(r), | ||
86 | [t1]"=&r"(t1), | ||
87 | [t2]"=&r"(t2), | ||
88 | [t3]"=&r"(t3) | ||
89 | : /* inputs */ | ||
90 | [a] "r" (a), | ||
91 | [b] "r" (b) | ||
92 | ); | ||
93 | return r; | ||
94 | } | ||
28 | 95 | ||
29 | #endif | 96 | #endif |