From 563f2602f471208cb8544a36539a79dcceaad643 Mon Sep 17 00:00:00 2001 From: Tomer Shalev Date: Sun, 17 Jan 2010 22:03:36 +0000 Subject: Fractals: Have helper functions in header file to keep them inlined - Should fix performance degradation caused because of the split - Thanks for all who noticed (amiconn et al.) git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24266 a1c6a512-1295-4272-9138-f99709370657 --- apps/plugins/fractals/SOURCES | 7 --- apps/plugins/fractals/cpu_arm.c | 40 --------------- apps/plugins/fractals/cpu_arm.h | 17 ++++++- apps/plugins/fractals/cpu_coldfire.c | 58 ---------------------- apps/plugins/fractals/cpu_coldfire.h | 35 +++++++++++++- apps/plugins/fractals/cpu_sh7043.c | 94 ------------------------------------ apps/plugins/fractals/cpu_sh7043.h | 71 ++++++++++++++++++++++++++- 7 files changed, 118 insertions(+), 204 deletions(-) delete mode 100644 apps/plugins/fractals/cpu_arm.c delete mode 100644 apps/plugins/fractals/cpu_coldfire.c delete mode 100644 apps/plugins/fractals/cpu_sh7043.c diff --git a/apps/plugins/fractals/SOURCES b/apps/plugins/fractals/SOURCES index 72a22467c2..a6ef819266 100644 --- a/apps/plugins/fractals/SOURCES +++ b/apps/plugins/fractals/SOURCES @@ -1,10 +1,3 @@ fractal.c fractal_rect.c mandelbrot_set.c -#if CONFIG_CPU == SH7034 -cpu_sh7043.c -#elif defined CPU_COLDFIRE -cpu_coldfire.c -#elif defined CPU_ARM -cpu_arm.c -#endif diff --git a/apps/plugins/fractals/cpu_arm.c b/apps/plugins/fractals/cpu_arm.c deleted file mode 100644 index 1d8b1a76f6..0000000000 --- a/apps/plugins/fractals/cpu_arm.c +++ /dev/null @@ -1,40 +0,0 @@ -/*************************************************************************** - * __________ __ ___. - * Open \______ \ ____ ____ | | _\_ |__ _______ ___ - * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / - * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < - * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ - * \/ \/ \/ \/ \/ - * $Id$ - * - * Copyright (C) 2009 Tomer Shalev - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY - * KIND, either express or implied. - * - ****************************************************************************/ -#include "cpu_arm.h" - -inline long muls32_asr26(long a, long b) -{ - long r, t1; - asm ( - "smull %[r], %[t1], %[a], %[b] \n" - "mov %[r], %[r], lsr #26 \n" - "orr %[r], %[r], %[t1], lsl #6 \n" - : /* outputs */ - [r] "=&r,&r,&r"(r), - [t1]"=&r,&r,&r"(t1) - : /* inputs */ - [a] "%r,%r,%r" (a), - [b] "r,0,1" (b) - ); - return r; -} - diff --git a/apps/plugins/fractals/cpu_arm.h b/apps/plugins/fractals/cpu_arm.h index 51a3718f66..ea4595b7d2 100644 --- a/apps/plugins/fractals/cpu_arm.h +++ b/apps/plugins/fractals/cpu_arm.h @@ -22,6 +22,21 @@ #ifndef _CPU_ARM_H #define _CPU_ARM_H -inline long muls32_asr26(long a, long b); +inline static long muls32_asr26(long a, long b) +{ + long r, t1; + asm ( + "smull %[r], %[t1], %[a], %[b] \n" + "mov %[r], %[r], lsr #26 \n" + "orr %[r], %[r], %[t1], lsl #6 \n" + : /* outputs */ + [r] "=&r,&r,&r"(r), + [t1]"=&r,&r,&r"(t1) + : /* inputs */ + [a] "%r,%r,%r" (a), + [b] "r,0,1" (b) + ); + return r; +} #endif diff --git a/apps/plugins/fractals/cpu_coldfire.c b/apps/plugins/fractals/cpu_coldfire.c deleted file mode 100644 index a005a3141e..0000000000 --- a/apps/plugins/fractals/cpu_coldfire.c +++ /dev/null @@ -1,58 +0,0 @@ -/*************************************************************************** - * __________ __ ___. - * Open \______ \ ____ ____ | | _\_ |__ _______ ___ - * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / - * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < - * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ - * \/ \/ \/ \/ \/ - * $Id$ - * - * Copyright (C) 2009 Tomer Shalev - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY - * KIND, either express or implied. - * - ****************************************************************************/ -#include "cpu_coldfire.h" - -inline short muls16_asr10(short a, short b) -{ - asm ( - "muls.w %[a],%[b] \n" - "asr.l #8,%[b] \n" - "asr.l #2,%[b] \n" - : /* outputs */ - [b]"+d"(b) - : /* inputs */ - [a]"d" (a) - ); - return b; -} - -inline long muls32_asr26(long a, long b) -{ - long r, t1; - asm ( - "mac.l %[a], %[b], %%acc0 \n" /* multiply */ - "move.l %%accext01, %[t1] \n" /* get low part */ - "movclr.l %%acc0, %[r] \n" /* get high part */ - "asl.l #5, %[r] \n" /* hi <<= 5, plus one free */ - "lsr.l #3, %[t1] \n" /* lo >>= 3 */ - "and.l #0x1f, %[t1] \n" /* mask out unrelated bits */ - "or.l %[t1], %[r] \n" /* combine result */ - : /* outputs */ - [r] "=d"(r), - [t1]"=d"(t1) - : /* inputs */ - [a] "d" (a), - [b] "d" (b) - ); - return r; -} - diff --git a/apps/plugins/fractals/cpu_coldfire.h b/apps/plugins/fractals/cpu_coldfire.h index 452f18e2c5..9be207ee2f 100644 --- a/apps/plugins/fractals/cpu_coldfire.h +++ b/apps/plugins/fractals/cpu_coldfire.h @@ -22,8 +22,39 @@ #ifndef _CPU_COLDFIRE_H #define _CPU_COLDFIRE_H -inline short muls16_asr10(short a, short b); +inline static short muls16_asr10(short a, short b) +{ + asm ( + "muls.w %[a],%[b] \n" + "asr.l #8,%[b] \n" + "asr.l #2,%[b] \n" + : /* outputs */ + [b]"+d"(b) + : /* inputs */ + [a]"d" (a) + ); + return b; +} -inline long muls32_asr26(long a, long b); +inline static long muls32_asr26(long a, long b) +{ + long r, t1; + asm ( + "mac.l %[a], %[b], %%acc0 \n" /* multiply */ + "move.l %%accext01, %[t1] \n" /* get low part */ + "movclr.l %%acc0, %[r] \n" /* get high part */ + "asl.l #5, %[r] \n" /* hi <<= 5, plus one free */ + "lsr.l #3, %[t1] \n" /* lo >>= 3 */ + "and.l #0x1f, %[t1] \n" /* mask out unrelated bits */ + "or.l %[t1], %[r] \n" /* combine result */ + : /* outputs */ + [r] "=d"(r), + [t1]"=d"(t1) + : /* inputs */ + [a] "d" (a), + [b] "d" (b) + ); + return r; +} #endif diff --git a/apps/plugins/fractals/cpu_sh7043.c b/apps/plugins/fractals/cpu_sh7043.c deleted file mode 100644 index 1bce37d654..0000000000 --- a/apps/plugins/fractals/cpu_sh7043.c +++ /dev/null @@ -1,94 +0,0 @@ -/*************************************************************************** - * __________ __ ___. - * Open \______ \ ____ ____ | | _\_ |__ _______ ___ - * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / - * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < - * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ - * \/ \/ \/ \/ \/ - * $Id$ - * - * Copyright (C) 2009 Tomer Shalev - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY - * KIND, either express or implied. - * - ****************************************************************************/ -#include "cpu_sh7043.h" - -inline short muls16_asr10(short a, short b) -{ - short r; - asm ( - "muls %[a],%[b] \n" - "sts macl,%[r] \n" - "shlr8 %[r] \n" - "shlr2 %[r] \n" - : /* outputs */ - [r]"=r"(r) - : /* inputs */ - [a]"r"(a), - [b]"r"(b) - ); - return r; -} - -inline long muls32_asr26(long a, long b) -{ - long r, t1, t2, t3; - asm ( - /* Signed 32bit * 32bit -> 64bit multiplication. - Notation: xxab * xxcd, where each letter represents 16 bits. - xx is the 64 bit sign extension. */ - "swap.w %[a],%[t1] \n" /* t1 = ba */ - "mulu %[t1],%[b] \n" /* a * d */ - "swap.w %[b],%[t3] \n" /* t3 = dc */ - "sts macl,%[t2] \n" /* t2 = a * d */ - "mulu %[t1],%[t3] \n" /* a * c */ - "sts macl,%[r] \n" /* hi = a * c */ - "mulu %[a],%[t3] \n" /* b * c */ - "clrt \n" - "sts macl,%[t3] \n" /* t3 = b * c */ - "addc %[t2],%[t3] \n" /* t3 += t2, carry -> t2 */ - "movt %[t2] \n" - "mulu %[a],%[b] \n" /* b * d */ - "mov %[t3],%[t1] \n" /* t1t3 = t2t3 << 16 */ - "xtrct %[t2],%[t1] \n" - "shll16 %[t3] \n" - "sts macl,%[t2] \n" /* lo = b * d */ - "clrt \n" /* hi.lo += t1t3 */ - "addc %[t3],%[t2] \n" - "addc %[t1],%[r] \n" - "cmp/pz %[a] \n" /* ab >= 0 ? */ - "bt 1f \n" - "sub %[b],%[r] \n" /* no: hi -= cd (sign extension of ab is -1) */ - "1: \n" - "cmp/pz %[b] \n" /* cd >= 0 ? */ - "bt 2f \n" - "sub %[a],%[r] \n" /* no: hi -= ab (sign extension of cd is -1) */ - "2: \n" - /* Shift right by 26 and return low 32 bits */ - "shll2 %[r] \n" /* hi <<= 6 */ - "shll2 %[r] \n" - "shll2 %[r] \n" - "shlr16 %[t2] \n" /* (unsigned)lo >>= 26 */ - "shlr8 %[t2] \n" - "shlr2 %[t2] \n" - "or %[t2],%[r] \n" /* combine result */ - : /* outputs */ - [r] "=&r"(r), - [t1]"=&r"(t1), - [t2]"=&r"(t2), - [t3]"=&r"(t3) - : /* inputs */ - [a] "r" (a), - [b] "r" (b) - ); - return r; -} - diff --git a/apps/plugins/fractals/cpu_sh7043.h b/apps/plugins/fractals/cpu_sh7043.h index 4805092377..0d773432a8 100644 --- a/apps/plugins/fractals/cpu_sh7043.h +++ b/apps/plugins/fractals/cpu_sh7043.h @@ -22,8 +22,75 @@ #ifndef _CPU_SH7043_H #define _CPU_SH7043_H -inline short muls16_asr10(short a, short b); +inline static short muls16_asr10(short a, short b) +{ + short r; + asm ( + "muls %[a],%[b] \n" + "sts macl,%[r] \n" + "shlr8 %[r] \n" + "shlr2 %[r] \n" + : /* outputs */ + [r]"=r"(r) + : /* inputs */ + [a]"r"(a), + [b]"r"(b) + ); + return r; +} -inline long muls32_asr26(long a, long b); +inline static long muls32_asr26(long a, long b) +{ + long r, t1, t2, t3; + asm ( + /* Signed 32bit * 32bit -> 64bit multiplication. + Notation: xxab * xxcd, where each letter represents 16 bits. + xx is the 64 bit sign extension. */ + "swap.w %[a],%[t1] \n" /* t1 = ba */ + "mulu %[t1],%[b] \n" /* a * d */ + "swap.w %[b],%[t3] \n" /* t3 = dc */ + "sts macl,%[t2] \n" /* t2 = a * d */ + "mulu %[t1],%[t3] \n" /* a * c */ + "sts macl,%[r] \n" /* hi = a * c */ + "mulu %[a],%[t3] \n" /* b * c */ + "clrt \n" + "sts macl,%[t3] \n" /* t3 = b * c */ + "addc %[t2],%[t3] \n" /* t3 += t2, carry -> t2 */ + "movt %[t2] \n" + "mulu %[a],%[b] \n" /* b * d */ + "mov %[t3],%[t1] \n" /* t1t3 = t2t3 << 16 */ + "xtrct %[t2],%[t1] \n" + "shll16 %[t3] \n" + "sts macl,%[t2] \n" /* lo = b * d */ + "clrt \n" /* hi.lo += t1t3 */ + "addc %[t3],%[t2] \n" + "addc %[t1],%[r] \n" + "cmp/pz %[a] \n" /* ab >= 0 ? */ + "bt 1f \n" + "sub %[b],%[r] \n" /* no: hi -= cd (sign extension of ab is -1) */ + "1: \n" + "cmp/pz %[b] \n" /* cd >= 0 ? */ + "bt 2f \n" + "sub %[a],%[r] \n" /* no: hi -= ab (sign extension of cd is -1) */ + "2: \n" + /* Shift right by 26 and return low 32 bits */ + "shll2 %[r] \n" /* hi <<= 6 */ + "shll2 %[r] \n" + "shll2 %[r] \n" + "shlr16 %[t2] \n" /* (unsigned)lo >>= 26 */ + "shlr8 %[t2] \n" + "shlr2 %[t2] \n" + "or %[t2],%[r] \n" /* combine result */ + : /* outputs */ + [r] "=&r"(r), + [t1]"=&r"(t1), + [t2]"=&r"(t2), + [t3]"=&r"(t3) + : /* inputs */ + [a] "r" (a), + [b] "r" (b) + ); + return r; +} #endif -- cgit v1.2.3