From 0030ae28b551df94defe25c04c1859ba508e632e Mon Sep 17 00:00:00 2001
From: Jens Arnold <amiconn@rockbox.org>
Date: Wed, 3 Mar 2010 20:52:02 +0000
Subject: Get rid of .rept in inline asm() blocks where possible. Using .rept
 causes gcc to wrongly estimate the size of the asm(), leading to (potential)
 compilation problems. This is necessary for the upcoming restructuring, and
 should fix ARMv6+ sim builds as well. No functional change.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25004 a1c6a512-1295-4272-9138-f99709370657
---
 apps/codecs/demac/libdemac/vector_math16_armv5te.h | 29 +++++++++---------
 apps/codecs/demac/libdemac/vector_math16_armv6.h   | 34 +++++++++++-----------
 apps/codecs/demac/libdemac/vector_math16_cf.h      | 20 ++++++++-----
 apps/codecs/demac/libdemac/vector_math32_armv4.h   | 18 ++++++------
 4 files changed, 53 insertions(+), 48 deletions(-)

diff --git a/apps/codecs/demac/libdemac/vector_math16_armv5te.h b/apps/codecs/demac/libdemac/vector_math16_armv5te.h
index 2940585a42..0a3679ce63 100644
--- a/apps/codecs/demac/libdemac/vector_math16_armv5te.h
+++ b/apps/codecs/demac/libdemac/vector_math16_armv5te.h
@@ -26,6 +26,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
 
 #define FUSED_VECTOR_MATH
 
+#define REPEAT_3(x) x x x
+#if ORDER > 16
+#define REPEAT_MLA(x) x x x x x x x
+#else
+#define REPEAT_MLA(x) x x x
+#endif
+
 /* Calculate scalarproduct, then add a 2nd vector (fused for performance)
  * This version fetches data as 32 bit words, and *requires* v1 to be
  * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
@@ -133,7 +140,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
         ADDHALFREGS(r1, r2, r4)
         "stmia   %[v1]!, {r0,r1}         \n"
 
-        ".rept   3                       \n"
+        REPEAT_3(
         "ldmia   %[v1],  {r1,r2}         \n"
         "ldmia   %[f2]!, {r3,r4}         \n"
         "smlabb  %[res], r1, r3, %[res]  \n"
@@ -144,7 +151,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
         ADDHALFREGS(r0, r1, r3)
         ADDHALFREGS(r1, r2, r4)
         "stmia   %[v1]!, {r0,r1}         \n"
-        ".endr                           \n"
+        )
 #if ORDER > 16
         "subs    %[cnt], %[cnt], #1      \n"
         "bne     1b                      \n"
@@ -275,7 +282,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
         SUBHALFREGS(r1, r2, r4)
         "stmia   %[v1]!, {r0,r1}         \n"
 
-        ".rept   3                       \n"
+        REPEAT_3(
         "ldmia   %[v1],  {r1,r2}         \n"
         "ldmia   %[f2]!, {r3,r4}         \n"
         "smlabb  %[res], r1, r3, %[res]  \n"
@@ -286,7 +293,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
         SUBHALFREGS(r0, r1, r3)
         SUBHALFREGS(r1, r2, r4)
         "stmia   %[v1]!, {r0,r1}         \n"
-        ".endr                           \n"
+        )
 #if ORDER > 16
         "subs    %[cnt], %[cnt], #1      \n"
         "bne     1b                      \n"
@@ -318,12 +325,6 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
     int cnt = ORDER>>5;
 #endif
 
-#if ORDER > 16
-#define MLA_BLOCKS "7"
-#else
-#define MLA_BLOCKS "3"
-#endif
-
     asm volatile (
 #if ORDER > 32
         "mov     %[res], #0              \n"
@@ -347,14 +348,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
         "smlabt  %[res], r1, r2, %[res]  \n"
         "smlatb  %[res], r1, r3, %[res]  \n"
 
-        ".rept " MLA_BLOCKS             "\n"
+        REPEAT_MLA(
         "ldmia   %[v1]!, {r0,r1}         \n"
         "smlabt  %[res], r0, r3, %[res]  \n"
         "ldmia   %[v2]!, {r2,r3}         \n"
         "smlatb  %[res], r0, r2, %[res]  \n"
         "smlabt  %[res], r1, r2, %[res]  \n"
         "smlatb  %[res], r1, r3, %[res]  \n"
-        ".endr                           \n"
+        )
 #if ORDER > 32
         "subs    %[cnt], %[cnt], #1  \n"
         "bne     1b                  \n"
@@ -374,14 +375,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
         "smlabb  %[res], r1, r3, %[res]  \n"
         "smlatt  %[res], r1, r3, %[res]  \n"
 
-        ".rept " MLA_BLOCKS             "\n"
+        REPEAT_MLA(
         "ldmia   %[v1]!, {r0,r1}         \n"
         "ldmia   %[v2]!, {r2,r3}         \n"
         "smlabb  %[res], r0, r2, %[res]  \n"
         "smlatt  %[res], r0, r2, %[res]  \n"
         "smlabb  %[res], r1, r3, %[res]  \n"
         "smlatt  %[res], r1, r3, %[res]  \n"
-        ".endr                           \n"
+        )
 #if ORDER > 32
         "subs    %[cnt], %[cnt], #1      \n"
         "bne     1b                      \n"  
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv6.h b/apps/codecs/demac/libdemac/vector_math16_armv6.h
index 0ace6c5811..2ce62728cb 100644
--- a/apps/codecs/demac/libdemac/vector_math16_armv6.h
+++ b/apps/codecs/demac/libdemac/vector_math16_armv6.h
@@ -22,14 +22,14 @@ You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
 
-*/          
+*/
 
 #define FUSED_VECTOR_MATH
 
 #if ORDER > 16
-#define BLOCK_REPEAT "3"
+#define REPEAT_BLOCK(x) x x x
 #else
-#define BLOCK_REPEAT "1"
+#define REPEAT_BLOCK(x) x
 #endif
 
 /* Calculate scalarproduct, then add a 2nd vector (fused for performance)
@@ -77,7 +77,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
         "sadd16  r1, r1, r5              \n"
         "strd    r0, [%[v1]], #8         \n"
 
-        ".rept " BLOCK_REPEAT           "\n"
+        REPEAT_BLOCK(
         "ldmia   %[s2]!, {r5,r6}         \n"
         "pkhtb   r4, r4, r2              \n"
         "pkhtb   r2, r2, r3              \n"
@@ -104,7 +104,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
         "sadd16  r0, r0, r6              \n"
         "sadd16  r1, r1, r5              \n"
         "strd    r0, [%[v1]], #8         \n"
-        ".endr                           \n"
+        )
 
         "ldmia   %[s2]!, {r5,r6}         \n"
         "pkhtb   r4, r4, r2              \n"
@@ -148,7 +148,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
         "sadd16  r1, r1, r7              \n"
         "strd    r0, [%[v1]], #8         \n"
 
-        ".rept " BLOCK_REPEAT           "\n"
+        REPEAT_BLOCK(
         "smlad   %[res], r2, r4, %[res]  \n"
         "ldrd    r6, [%[s2]], #8         \n"
         "smlad   %[res], r3, r5, %[res]  \n"
@@ -165,7 +165,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
         "sadd16  r0, r0, r6              \n"
         "sadd16  r1, r1, r7              \n"
         "strd    r0, [%[v1]], #8         \n"
-        ".endr                           \n"
+        )
 
         "smlad   %[res], r2, r4, %[res]  \n"
         "ldrd    r6, [%[s2]], #8         \n"
@@ -246,7 +246,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
         "ssub16  r1, r1, r5              \n"
         "strd    r0, [%[v1]], #8         \n"
 
-        ".rept " BLOCK_REPEAT           "\n"
+        REPEAT_BLOCK(
         "ldmia   %[s2]!, {r5,r6}         \n"
         "pkhtb   r4, r4, r2              \n"
         "pkhtb   r2, r2, r3              \n"
@@ -273,7 +273,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
         "ssub16  r0, r0, r6              \n"
         "ssub16  r1, r1, r5              \n"
         "strd    r0, [%[v1]], #8         \n"
-        ".endr                           \n"
+        )
 
         "ldmia   %[s2]!, {r5,r6}         \n"
         "pkhtb   r4, r4, r2              \n"
@@ -317,7 +317,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
         "ssub16  r1, r1, r7              \n"
         "strd    r0, [%[v1]], #8         \n"
 
-        ".rept " BLOCK_REPEAT           "\n"
+        REPEAT_BLOCK(
         "smlad   %[res], r2, r4, %[res]  \n"
         "ldrd    r6, [%[s2]], #8         \n"
         "smlad   %[res], r3, r5, %[res]  \n"
@@ -334,7 +334,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
         "ssub16  r0, r0, r6              \n"
         "ssub16  r1, r1, r7              \n"
         "strd    r0, [%[v1]], #8         \n"
-        ".endr                           \n"
+        )
 
         "smlad   %[res], r2, r4, %[res]  \n"
         "ldrd    r6, [%[s2]], #8         \n"
@@ -400,7 +400,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
 #else
         "smuadx  %[res], r0, r3          \n"
 #endif
-        ".rept " BLOCK_REPEAT           "\n"
+        REPEAT_BLOCK(
         "pkhtb   r0, r6, r7              \n"
         "ldrd    r2, [%[v1]], #8         \n"
         "smladx  %[res], r1, r0, %[res]  \n"
@@ -413,8 +413,8 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
         "pkhtb   r3, r5, r6              \n"
         "ldrd    r4, [%[v2]], #8         \n"
         "smladx  %[res], r0, r3, %[res]  \n"
-        ".endr                           \n"
-        
+        )
+
         "pkhtb   r0, r6, r7              \n"
         "ldrd    r2, [%[v1]], #8         \n"
         "smladx  %[res], r1, r0, %[res]  \n"
@@ -434,7 +434,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
 #endif
 
         "b       99f                     \n"
-        
+
     "20:                                 \n"
         "ldrd    r0, [%[v1]], #8         \n"
         "ldmia   %[v2]!, {r5-r7}         \n"
@@ -446,7 +446,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
 #else
         "smuad   %[res], r0, r5          \n"
 #endif
-        ".rept " BLOCK_REPEAT           "\n"
+        REPEAT_BLOCK(
         "ldrd    r4, [%[v2]], #8         \n"
         "smlad   %[res], r1, r6, %[res]  \n"
         "ldrd    r0, [%[v1]], #8         \n"
@@ -455,7 +455,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
         "smlad   %[res], r3, r4, %[res]  \n"
         "ldrd    r2, [%[v1]], #8         \n"
         "smlad   %[res], r0, r5, %[res]  \n"
-        ".endr                           \n"
+        )
 
 #if ORDER > 32
         "ldrd    r4, [%[v2]], #8         \n"
diff --git a/apps/codecs/demac/libdemac/vector_math16_cf.h b/apps/codecs/demac/libdemac/vector_math16_cf.h
index 6e8216c9cc..4d77d3be31 100644
--- a/apps/codecs/demac/libdemac/vector_math16_cf.h
+++ b/apps/codecs/demac/libdemac/vector_math16_cf.h
@@ -28,6 +28,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
 
 #define PREPARE_SCALARPRODUCT coldfire_set_macsr(0); /* signed integer mode */
 
+#define REPEAT_2(x) x x
+#define REPEAT_3(x) x x x
+#define REPEAT_7(x) x x x x x x x
+
 /* Calculate scalarproduct, then add a 2nd vector (fused for performance)
  * This version fetches data as 32 bit words, and *recommends* v1 to be
  * 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit
@@ -64,7 +68,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
         "move.w  (%[s2])+, %%d1                      \n"
         "swap    %%d1                                \n"
     "1:                                              \n"
-        ".rept   2                                   \n"
+        REPEAT_2(
         "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1        \n"
         "mac.w   %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n"
         "mac.w   %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n"
@@ -82,7 +86,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
         "move.l  %%d6, (%[v1])+                      \n"
         ADDHALFXREGS(%%a1, %%d1, %%d7)
         "move.l  %%d7, (%[v1])+                      \n"
-        ".endr                                       \n"
+        )
 
 #if ORDER > 16
         "subq.l  #1, %[res]                          \n"
@@ -193,7 +197,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
         "move.w  (%[s2])+, %%d1                      \n"
         "swap    %%d1                                \n"
     "1:                                              \n"
-        ".rept   2                                   \n"
+        REPEAT_2(
         "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1        \n"
         "mac.w   %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n"
         "mac.w   %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n"
@@ -211,7 +215,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
         "move.l  %%d6, (%[v1])+                      \n"
         SUBHALFXREGS(%%a1, %%d1, %%d7)
         "move.l  %%d7, (%[v1])+                      \n"
-        ".endr                                       \n"
+        )
 
 #if ORDER > 16
         "subq.l  #1, %[res]                          \n"
@@ -305,10 +309,10 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
         "move.l  (%[v1])+, %%d0                      \n"
         "move.w  (%[v2])+, %%d1                      \n"
     "1:                                              \n"
-        ".rept   7                                   \n"
+        REPEAT_7(
         "mac.w   %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
         "mac.w   %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
-        ".endr                                       \n"
+        )
 
         "mac.w   %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
 #if ORDER > 16
@@ -324,12 +328,12 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
         "move.l  (%[v1])+, %%d0                      \n"
         "move.l  (%[v2])+, %%d1                      \n"
     "1:                                              \n"
-        ".rept   3                                   \n"
+        REPEAT_3(
         "mac.w   %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
         "mac.w   %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
         "mac.w   %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n"
         "mac.w   %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
-        ".endr                                       \n"
+        )
 
         "mac.w   %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
         "mac.w   %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
diff --git a/apps/codecs/demac/libdemac/vector_math32_armv4.h b/apps/codecs/demac/libdemac/vector_math32_armv4.h
index 207fca3038..cb5fe9e0ee 100644
--- a/apps/codecs/demac/libdemac/vector_math32_armv4.h
+++ b/apps/codecs/demac/libdemac/vector_math32_armv4.h
@@ -27,11 +27,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
 #define FUSED_VECTOR_MATH
 
 #if ORDER > 32
-#define BLOCK_REPEAT "8"
+#define REPEAT_BLOCK(x) x x x x x x x x
 #elif ORDER > 16
-#define BLOCK_REPEAT "7"
+#define REPEAT_BLOCK(x) x x x x x x x
 #else
-#define BLOCK_REPEAT "3"
+#define REPEAT_BLOCK(x) x x x
 #endif
 
 /* Calculate scalarproduct, then add a 2nd vector (fused for performance) */
@@ -60,7 +60,7 @@ static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2)
         "add     r3, r3, r7              \n"
         "stmia   %[v1]!, {r0-r3}         \n"
 #endif
-        ".rept " BLOCK_REPEAT           "\n"
+        REPEAT_BLOCK(
         "ldmia   %[v1],  {r0-r3}         \n"
         "ldmia   %[f2]!, {r4-r7}         \n"
         "mla     %[res], r4, r0, %[res]  \n"
@@ -73,7 +73,7 @@ static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2)
         "add     r2, r2, r6              \n"
         "add     r3, r3, r7              \n"
         "stmia   %[v1]!, {r0-r3}         \n"
-        ".endr                           \n"
+        )
 #if ORDER > 32
         "subs    %[cnt], %[cnt], #1      \n"
         "bne     1b                      \n"
@@ -120,7 +120,7 @@ static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2)
         "sub     r3, r3, r7              \n"
         "stmia   %[v1]!, {r0-r3}         \n"
 #endif
-        ".rept " BLOCK_REPEAT           "\n"
+        REPEAT_BLOCK(
         "ldmia   %[v1],  {r0-r3}         \n"
         "ldmia   %[f2]!, {r4-r7}         \n"
         "mla     %[res], r4, r0, %[res]  \n"
@@ -133,7 +133,7 @@ static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2)
         "sub     r2, r2, r6              \n"
         "sub     r3, r3, r7              \n"
         "stmia   %[v1]!, {r0-r3}         \n"
-        ".endr                           \n"
+        )
 #if ORDER > 32
         "subs    %[cnt], %[cnt], #1      \n"
         "bne     1b                      \n"
@@ -173,14 +173,14 @@ static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
         "mla     %[res], r6, r2, %[res]  \n"
         "mla     %[res], r7, r3, %[res]  \n"
 #endif
-        ".rept " BLOCK_REPEAT           "\n"
+        REPEAT_BLOCK(
         "ldmia   %[v1]!, {r0-r3}         \n"
         "ldmia   %[v2]!, {r4-r7}         \n"
         "mla     %[res], r4, r0, %[res]  \n"
         "mla     %[res], r5, r1, %[res]  \n"
         "mla     %[res], r6, r2, %[res]  \n"
         "mla     %[res], r7, r3, %[res]  \n"
-        ".endr                           \n"
+        )
 #if ORDER > 32
         "subs    %[cnt], %[cnt], #1      \n"
         "bne     1b                      \n"
-- 
cgit v1.2.3