From 0030ae28b551df94defe25c04c1859ba508e632e Mon Sep 17 00:00:00 2001 From: Jens Arnold Date: Wed, 3 Mar 2010 20:52:02 +0000 Subject: Get rid of .rept in inline asm() blocks where possible. Using .rept causes gcc to wrongly estimate the size of the asm(), leading to (potential) compilation problems. This is necessary for the upcoming restructuring, and should fix ARMv6+ sim builds as well. No functional change. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25004 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/demac/libdemac/vector_math16_armv5te.h | 29 +++++++++--------- apps/codecs/demac/libdemac/vector_math16_armv6.h | 34 +++++++++++----------- apps/codecs/demac/libdemac/vector_math16_cf.h | 20 ++++++++----- apps/codecs/demac/libdemac/vector_math32_armv4.h | 18 ++++++------ 4 files changed, 53 insertions(+), 48 deletions(-) diff --git a/apps/codecs/demac/libdemac/vector_math16_armv5te.h b/apps/codecs/demac/libdemac/vector_math16_armv5te.h index 2940585a42..0a3679ce63 100644 --- a/apps/codecs/demac/libdemac/vector_math16_armv5te.h +++ b/apps/codecs/demac/libdemac/vector_math16_armv5te.h @@ -26,6 +26,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA #define FUSED_VECTOR_MATH +#define REPEAT_3(x) x x x +#if ORDER > 16 +#define REPEAT_MLA(x) x x x x x x x +#else +#define REPEAT_MLA(x) x x x +#endif + /* Calculate scalarproduct, then add a 2nd vector (fused for performance) * This version fetches data as 32 bit words, and *requires* v1 to be * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit @@ -133,7 +140,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) ADDHALFREGS(r1, r2, r4) "stmia %[v1]!, {r0,r1} \n" - ".rept 3 \n" + REPEAT_3( "ldmia %[v1], {r1,r2} \n" "ldmia %[f2]!, {r3,r4} \n" "smlabb %[res], r1, r3, %[res] \n" @@ -144,7 +151,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) ADDHALFREGS(r0, r1, r3) ADDHALFREGS(r1, r2, r4) "stmia %[v1]!, {r0,r1} \n" - ".endr \n" + ) #if ORDER > 16 "subs %[cnt], %[cnt], #1 \n" "bne 1b \n" @@ -275,7 +282,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) SUBHALFREGS(r1, r2, r4) "stmia %[v1]!, {r0,r1} \n" - ".rept 3 \n" + REPEAT_3( "ldmia %[v1], {r1,r2} \n" "ldmia %[f2]!, {r3,r4} \n" "smlabb %[res], r1, r3, %[res] \n" @@ -286,7 +293,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) SUBHALFREGS(r0, r1, r3) SUBHALFREGS(r1, r2, r4) "stmia %[v1]!, {r0,r1} \n" - ".endr \n" + ) #if ORDER > 16 "subs %[cnt], %[cnt], #1 \n" "bne 1b \n" @@ -318,12 +325,6 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) int cnt = ORDER>>5; #endif -#if ORDER > 16 -#define MLA_BLOCKS "7" -#else -#define MLA_BLOCKS "3" -#endif - asm volatile ( #if ORDER > 32 "mov %[res], #0 \n" @@ -347,14 +348,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) "smlabt %[res], r1, r2, %[res] \n" "smlatb %[res], r1, r3, %[res] \n" - ".rept " MLA_BLOCKS "\n" + REPEAT_MLA( "ldmia %[v1]!, {r0,r1} \n" "smlabt %[res], r0, r3, %[res] \n" "ldmia %[v2]!, {r2,r3} \n" "smlatb %[res], r0, r2, %[res] \n" "smlabt %[res], r1, r2, %[res] \n" "smlatb %[res], r1, r3, %[res] \n" - ".endr \n" + ) #if ORDER > 32 "subs %[cnt], %[cnt], #1 \n" "bne 1b \n" @@ -374,14 +375,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) "smlabb %[res], r1, r3, %[res] \n" "smlatt %[res], r1, r3, %[res] \n" - ".rept " MLA_BLOCKS "\n" + REPEAT_MLA( "ldmia %[v1]!, {r0,r1} \n" "ldmia %[v2]!, {r2,r3} \n" "smlabb %[res], r0, r2, %[res] \n" "smlatt %[res], r0, r2, %[res] \n" "smlabb %[res], r1, r3, %[res] \n" "smlatt %[res], r1, r3, %[res] \n" - ".endr \n" + ) #if ORDER > 32 "subs %[cnt], %[cnt], #1 \n" "bne 1b \n" diff --git a/apps/codecs/demac/libdemac/vector_math16_armv6.h b/apps/codecs/demac/libdemac/vector_math16_armv6.h index 0ace6c5811..2ce62728cb 100644 --- a/apps/codecs/demac/libdemac/vector_math16_armv6.h +++ b/apps/codecs/demac/libdemac/vector_math16_armv6.h @@ -22,14 +22,14 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA -*/ +*/ #define FUSED_VECTOR_MATH #if ORDER > 16 -#define BLOCK_REPEAT "3" +#define REPEAT_BLOCK(x) x x x #else -#define BLOCK_REPEAT "1" +#define REPEAT_BLOCK(x) x #endif /* Calculate scalarproduct, then add a 2nd vector (fused for performance) @@ -77,7 +77,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) "sadd16 r1, r1, r5 \n" "strd r0, [%[v1]], #8 \n" - ".rept " BLOCK_REPEAT "\n" + REPEAT_BLOCK( "ldmia %[s2]!, {r5,r6} \n" "pkhtb r4, r4, r2 \n" "pkhtb r2, r2, r3 \n" @@ -104,7 +104,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) "sadd16 r0, r0, r6 \n" "sadd16 r1, r1, r5 \n" "strd r0, [%[v1]], #8 \n" - ".endr \n" + ) "ldmia %[s2]!, {r5,r6} \n" "pkhtb r4, r4, r2 \n" @@ -148,7 +148,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) "sadd16 r1, r1, r7 \n" "strd r0, [%[v1]], #8 \n" - ".rept " BLOCK_REPEAT "\n" + REPEAT_BLOCK( "smlad %[res], r2, r4, %[res] \n" "ldrd r6, [%[s2]], #8 \n" "smlad %[res], r3, r5, %[res] \n" @@ -165,7 +165,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) "sadd16 r0, r0, r6 \n" "sadd16 r1, r1, r7 \n" "strd r0, [%[v1]], #8 \n" - ".endr \n" + ) "smlad %[res], r2, r4, %[res] \n" "ldrd r6, [%[s2]], #8 \n" @@ -246,7 +246,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) "ssub16 r1, r1, r5 \n" "strd r0, [%[v1]], #8 \n" - ".rept " BLOCK_REPEAT "\n" + REPEAT_BLOCK( "ldmia %[s2]!, {r5,r6} \n" "pkhtb r4, r4, r2 \n" "pkhtb r2, r2, r3 \n" @@ -273,7 +273,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) "ssub16 r0, r0, r6 \n" "ssub16 r1, r1, r5 \n" "strd r0, [%[v1]], #8 \n" - ".endr \n" + ) "ldmia %[s2]!, {r5,r6} \n" "pkhtb r4, r4, r2 \n" @@ -317,7 +317,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) "ssub16 r1, r1, r7 \n" "strd r0, [%[v1]], #8 \n" - ".rept " BLOCK_REPEAT "\n" + REPEAT_BLOCK( "smlad %[res], r2, r4, %[res] \n" "ldrd r6, [%[s2]], #8 \n" "smlad %[res], r3, r5, %[res] \n" @@ -334,7 +334,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) "ssub16 r0, r0, r6 \n" "ssub16 r1, r1, r7 \n" "strd r0, [%[v1]], #8 \n" - ".endr \n" + ) "smlad %[res], r2, r4, %[res] \n" "ldrd r6, [%[s2]], #8 \n" @@ -400,7 +400,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) #else "smuadx %[res], r0, r3 \n" #endif - ".rept " BLOCK_REPEAT "\n" + REPEAT_BLOCK( "pkhtb r0, r6, r7 \n" "ldrd r2, [%[v1]], #8 \n" "smladx %[res], r1, r0, %[res] \n" @@ -413,8 +413,8 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) "pkhtb r3, r5, r6 \n" "ldrd r4, [%[v2]], #8 \n" "smladx %[res], r0, r3, %[res] \n" - ".endr \n" - + ) + "pkhtb r0, r6, r7 \n" "ldrd r2, [%[v1]], #8 \n" "smladx %[res], r1, r0, %[res] \n" @@ -434,7 +434,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) #endif "b 99f \n" - + "20: \n" "ldrd r0, [%[v1]], #8 \n" "ldmia %[v2]!, {r5-r7} \n" @@ -446,7 +446,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) #else "smuad %[res], r0, r5 \n" #endif - ".rept " BLOCK_REPEAT "\n" + REPEAT_BLOCK( "ldrd r4, [%[v2]], #8 \n" "smlad %[res], r1, r6, %[res] \n" "ldrd r0, [%[v1]], #8 \n" @@ -455,7 +455,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) "smlad %[res], r3, r4, %[res] \n" "ldrd r2, [%[v1]], #8 \n" "smlad %[res], r0, r5, %[res] \n" - ".endr \n" + ) #if ORDER > 32 "ldrd r4, [%[v2]], #8 \n" diff --git a/apps/codecs/demac/libdemac/vector_math16_cf.h b/apps/codecs/demac/libdemac/vector_math16_cf.h index 6e8216c9cc..4d77d3be31 100644 --- a/apps/codecs/demac/libdemac/vector_math16_cf.h +++ b/apps/codecs/demac/libdemac/vector_math16_cf.h @@ -28,6 +28,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA #define PREPARE_SCALARPRODUCT coldfire_set_macsr(0); /* signed integer mode */ +#define REPEAT_2(x) x x +#define REPEAT_3(x) x x x +#define REPEAT_7(x) x x x x x x x + /* Calculate scalarproduct, then add a 2nd vector (fused for performance) * This version fetches data as 32 bit words, and *recommends* v1 to be * 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit @@ -64,7 +68,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) "move.w (%[s2])+, %%d1 \n" "swap %%d1 \n" "1: \n" - ".rept 2 \n" + REPEAT_2( "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n" "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n" @@ -82,7 +86,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) "move.l %%d6, (%[v1])+ \n" ADDHALFXREGS(%%a1, %%d1, %%d7) "move.l %%d7, (%[v1])+ \n" - ".endr \n" + ) #if ORDER > 16 "subq.l #1, %[res] \n" @@ -193,7 +197,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) "move.w (%[s2])+, %%d1 \n" "swap %%d1 \n" "1: \n" - ".rept 2 \n" + REPEAT_2( "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n" "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n" @@ -211,7 +215,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) "move.l %%d6, (%[v1])+ \n" SUBHALFXREGS(%%a1, %%d1, %%d7) "move.l %%d7, (%[v1])+ \n" - ".endr \n" + ) #if ORDER > 16 "subq.l #1, %[res] \n" @@ -305,10 +309,10 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) "move.l (%[v1])+, %%d0 \n" "move.w (%[v2])+, %%d1 \n" "1: \n" - ".rept 7 \n" + REPEAT_7( "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" - ".endr \n" + ) "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" #if ORDER > 16 @@ -324,12 +328,12 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) "move.l (%[v1])+, %%d0 \n" "move.l (%[v2])+, %%d1 \n" "1: \n" - ".rept 3 \n" + REPEAT_3( "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n" - ".endr \n" + ) "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" diff --git a/apps/codecs/demac/libdemac/vector_math32_armv4.h b/apps/codecs/demac/libdemac/vector_math32_armv4.h index 207fca3038..cb5fe9e0ee 100644 --- a/apps/codecs/demac/libdemac/vector_math32_armv4.h +++ b/apps/codecs/demac/libdemac/vector_math32_armv4.h @@ -27,11 +27,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA #define FUSED_VECTOR_MATH #if ORDER > 32 -#define BLOCK_REPEAT "8" +#define REPEAT_BLOCK(x) x x x x x x x x #elif ORDER > 16 -#define BLOCK_REPEAT "7" +#define REPEAT_BLOCK(x) x x x x x x x #else -#define BLOCK_REPEAT "3" +#define REPEAT_BLOCK(x) x x x #endif /* Calculate scalarproduct, then add a 2nd vector (fused for performance) */ @@ -60,7 +60,7 @@ static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2) "add r3, r3, r7 \n" "stmia %[v1]!, {r0-r3} \n" #endif - ".rept " BLOCK_REPEAT "\n" + REPEAT_BLOCK( "ldmia %[v1], {r0-r3} \n" "ldmia %[f2]!, {r4-r7} \n" "mla %[res], r4, r0, %[res] \n" @@ -73,7 +73,7 @@ static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2) "add r2, r2, r6 \n" "add r3, r3, r7 \n" "stmia %[v1]!, {r0-r3} \n" - ".endr \n" + ) #if ORDER > 32 "subs %[cnt], %[cnt], #1 \n" "bne 1b \n" @@ -120,7 +120,7 @@ static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2) "sub r3, r3, r7 \n" "stmia %[v1]!, {r0-r3} \n" #endif - ".rept " BLOCK_REPEAT "\n" + REPEAT_BLOCK( "ldmia %[v1], {r0-r3} \n" "ldmia %[f2]!, {r4-r7} \n" "mla %[res], r4, r0, %[res] \n" @@ -133,7 +133,7 @@ static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2) "sub r2, r2, r6 \n" "sub r3, r3, r7 \n" "stmia %[v1]!, {r0-r3} \n" - ".endr \n" + ) #if ORDER > 32 "subs %[cnt], %[cnt], #1 \n" "bne 1b \n" @@ -173,14 +173,14 @@ static inline int32_t scalarproduct(int32_t* v1, int32_t* v2) "mla %[res], r6, r2, %[res] \n" "mla %[res], r7, r3, %[res] \n" #endif - ".rept " BLOCK_REPEAT "\n" + REPEAT_BLOCK( "ldmia %[v1]!, {r0-r3} \n" "ldmia %[v2]!, {r4-r7} \n" "mla %[res], r4, r0, %[res] \n" "mla %[res], r5, r1, %[res] \n" "mla %[res], r6, r2, %[res] \n" "mla %[res], r7, r3, %[res] \n" - ".endr \n" + ) #if ORDER > 32 "subs %[cnt], %[cnt], #1 \n" "bne 1b \n" -- cgit v1.2.3