summaryrefslogtreecommitdiff
path: root/apps/codecs/demac/libdemac/vector_math32_armv4.h
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/demac/libdemac/vector_math32_armv4.h')
-rw-r--r--apps/codecs/demac/libdemac/vector_math32_armv4.h194
1 files changed, 95 insertions, 99 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math32_armv4.h b/apps/codecs/demac/libdemac/vector_math32_armv4.h
index 89b24f2b06..207fca3038 100644
--- a/apps/codecs/demac/libdemac/vector_math32_armv4.h
+++ b/apps/codecs/demac/libdemac/vector_math32_armv4.h
@@ -24,78 +24,134 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24 24
25*/ 25*/
26 26
27static inline void vector_add(int32_t* v1, int32_t* v2) 27#define FUSED_VECTOR_MATH
28
29#if ORDER > 32
30#define BLOCK_REPEAT "8"
31#elif ORDER > 16
32#define BLOCK_REPEAT "7"
33#else
34#define BLOCK_REPEAT "3"
35#endif
36
37/* Calculate scalarproduct, then add a 2nd vector (fused for performance) */
38static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2)
28{ 39{
40 int res;
29#if ORDER > 32 41#if ORDER > 32
30 int cnt = ORDER>>5; 42 int cnt = ORDER>>5;
31#endif 43#endif
32 44
33#if ORDER > 16 45 asm volatile (
34#define ADD_SUB_BLOCKS "8" 46#if ORDER > 32
47 "mov %[res], #0 \n"
48 "1: \n"
35#else 49#else
36#define ADD_SUB_BLOCKS "4" 50 "ldmia %[v1], {r0-r3} \n"
51 "ldmia %[f2]!, {r4-r7} \n"
52 "mul %[res], r4, r0 \n"
53 "mla %[res], r5, r1, %[res] \n"
54 "mla %[res], r6, r2, %[res] \n"
55 "mla %[res], r7, r3, %[res] \n"
56 "ldmia %[s2]!, {r4-r7} \n"
57 "add r0, r0, r4 \n"
58 "add r1, r1, r5 \n"
59 "add r2, r2, r6 \n"
60 "add r3, r3, r7 \n"
61 "stmia %[v1]!, {r0-r3} \n"
37#endif 62#endif
38 63 ".rept " BLOCK_REPEAT "\n"
39 asm volatile ( 64 "ldmia %[v1], {r0-r3} \n"
40 "1: \n" 65 "ldmia %[f2]!, {r4-r7} \n"
41 ".rept " ADD_SUB_BLOCKS "\n" 66 "mla %[res], r4, r0, %[res] \n"
42 "ldmia %[v1], {r0-r3} \n" 67 "mla %[res], r5, r1, %[res] \n"
43 "ldmia %[v2]!, {r4-r7} \n" 68 "mla %[res], r6, r2, %[res] \n"
44 "add r0, r0, r4 \n" 69 "mla %[res], r7, r3, %[res] \n"
45 "add r1, r1, r5 \n" 70 "ldmia %[s2]!, {r4-r7} \n"
46 "add r2, r2, r6 \n" 71 "add r0, r0, r4 \n"
47 "add r3, r3, r7 \n" 72 "add r1, r1, r5 \n"
48 "stmia %[v1]!, {r0-r3} \n" 73 "add r2, r2, r6 \n"
49 ".endr \n" 74 "add r3, r3, r7 \n"
75 "stmia %[v1]!, {r0-r3} \n"
76 ".endr \n"
50#if ORDER > 32 77#if ORDER > 32
51 "subs %[cnt], %[cnt], #1 \n" 78 "subs %[cnt], %[cnt], #1 \n"
52 "bne 1b \n" 79 "bne 1b \n"
53#endif 80#endif
54 : /* outputs */ 81 : /* outputs */
55#if ORDER > 32 82#if ORDER > 32
56 [cnt]"+r"(cnt), 83 [cnt]"+r"(cnt),
57#endif 84#endif
58 [v1] "+r"(v1), 85 [v1] "+r"(v1),
59 [v2] "+r"(v2) 86 [f2] "+r"(f2),
87 [s2] "+r"(s2),
88 [res]"=r"(res)
60 : /* inputs */ 89 : /* inputs */
61 : /* clobbers */ 90 : /* clobbers */
62 "r0", "r1", "r2", "r3", "r4", 91 "r0", "r1", "r2", "r3", "r4",
63 "r5", "r6", "r7", "memory" 92 "r5", "r6", "r7", "memory"
64 ); 93 );
94 return res;
65} 95}
66 96
67static inline void vector_sub(int32_t* v1, int32_t* v2) 97/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance) */
98static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2)
68{ 99{
100 int res;
69#if ORDER > 32 101#if ORDER > 32
70 int cnt = ORDER>>5; 102 int cnt = ORDER>>5;
71#endif 103#endif
72 104
73 asm volatile ( 105 asm volatile (
74 "1: \n"
75 ".rept " ADD_SUB_BLOCKS "\n"
76 "ldmia %[v1], {r0-r3} \n"
77 "ldmia %[v2]!, {r4-r7} \n"
78 "sub r0, r0, r4 \n"
79 "sub r1, r1, r5 \n"
80 "sub r2, r2, r6 \n"
81 "sub r3, r3, r7 \n"
82 "stmia %[v1]!, {r0-r3} \n"
83 ".endr \n"
84#if ORDER > 32 106#if ORDER > 32
85 "subs %[cnt], %[cnt], #1 \n" 107 "mov %[res], #0 \n"
86 "bne 1b \n" 108 "1: \n"
109#else
110 "ldmia %[v1], {r0-r3} \n"
111 "ldmia %[f2]!, {r4-r7} \n"
112 "mul %[res], r4, r0 \n"
113 "mla %[res], r5, r1, %[res] \n"
114 "mla %[res], r6, r2, %[res] \n"
115 "mla %[res], r7, r3, %[res] \n"
116 "ldmia %[s2]!, {r4-r7} \n"
117 "sub r0, r0, r4 \n"
118 "sub r1, r1, r5 \n"
119 "sub r2, r2, r6 \n"
120 "sub r3, r3, r7 \n"
121 "stmia %[v1]!, {r0-r3} \n"
122#endif
123 ".rept " BLOCK_REPEAT "\n"
124 "ldmia %[v1], {r0-r3} \n"
125 "ldmia %[f2]!, {r4-r7} \n"
126 "mla %[res], r4, r0, %[res] \n"
127 "mla %[res], r5, r1, %[res] \n"
128 "mla %[res], r6, r2, %[res] \n"
129 "mla %[res], r7, r3, %[res] \n"
130 "ldmia %[s2]!, {r4-r7} \n"
131 "sub r0, r0, r4 \n"
132 "sub r1, r1, r5 \n"
133 "sub r2, r2, r6 \n"
134 "sub r3, r3, r7 \n"
135 "stmia %[v1]!, {r0-r3} \n"
136 ".endr \n"
137#if ORDER > 32
138 "subs %[cnt], %[cnt], #1 \n"
139 "bne 1b \n"
87#endif 140#endif
88 : /* outputs */ 141 : /* outputs */
89#if ORDER > 32 142#if ORDER > 32
90 [cnt]"+r"(cnt), 143 [cnt]"+r"(cnt),
91#endif 144#endif
92 [v1] "+r"(v1), 145 [v1] "+r"(v1),
93 [v2] "+r"(v2) 146 [f2] "+r"(f2),
147 [s2] "+r"(s2),
148 [res]"=r"(res)
94 : /* inputs */ 149 : /* inputs */
95 : /* clobbers */ 150 : /* clobbers */
96 "r0", "r1", "r2", "r3", "r4", 151 "r0", "r1", "r2", "r3", "r4",
97 "r5", "r6", "r7", "memory" 152 "r5", "r6", "r7", "memory"
98 ); 153 );
154 return res;
99} 155}
100 156
101static inline int32_t scalarproduct(int32_t* v1, int32_t* v2) 157static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
@@ -106,78 +162,18 @@ static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
106#endif 162#endif
107 163
108 asm volatile ( 164 asm volatile (
109#if ORDER > 16
110#if ORDER > 32 165#if ORDER > 32
111 "mov %[res], #0 \n" 166 "mov %[res], #0 \n"
112#endif
113 "ldmia %[v2]!, {r6-r7} \n"
114 "1: \n" 167 "1: \n"
115 "ldmia %[v1]!, {r0,r1,r3-r5} \n"
116#if ORDER > 32
117 "mla %[res], r6, r0, %[res] \n"
118#else 168#else
119 "mul %[res], r6, r0 \n"
120#endif
121 "mla %[res], r7, r1, %[res] \n"
122 "ldmia %[v2]!, {r0-r2,r6-r8} \n"
123 "mla %[res], r0, r3, %[res] \n"
124 "mla %[res], r1, r4, %[res] \n"
125 "mla %[res], r2, r5, %[res] \n"
126 "ldmia %[v1]!, {r0-r4} \n"
127 "mla %[res], r6, r0, %[res] \n"
128 "mla %[res], r7, r1, %[res] \n"
129 "mla %[res], r8, r2, %[res] \n"
130 "ldmia %[v2]!, {r0,r1,r6-r8} \n"
131 "mla %[res], r0, r3, %[res] \n"
132 "mla %[res], r1, r4, %[res] \n"
133 "ldmia %[v1]!, {r0-r5} \n"
134 "mla %[res], r6, r0, %[res] \n"
135 "mla %[res], r7, r1, %[res] \n"
136 "mla %[res], r8, r2, %[res] \n"
137 "ldmia %[v2]!, {r0-r2,r6,r7} \n"
138 "mla %[res], r0, r3, %[res] \n"
139 "mla %[res], r1, r4, %[res] \n"
140 "mla %[res], r2, r5, %[res] \n"
141 "ldmia %[v1]!, {r0,r1,r3-r5} \n"
142 "mla %[res], r6, r0, %[res] \n"
143 "mla %[res], r7, r1, %[res] \n"
144 "ldmia %[v2]!, {r0-r2,r6-r8} \n"
145 "mla %[res], r0, r3, %[res] \n"
146 "mla %[res], r1, r4, %[res] \n"
147 "mla %[res], r2, r5, %[res] \n"
148 "ldmia %[v1]!, {r0-r4} \n"
149 "mla %[res], r6, r0, %[res] \n"
150 "mla %[res], r7, r1, %[res] \n"
151 "mla %[res], r8, r2, %[res] \n"
152 "ldmia %[v2]!, {r0,r1,r6-r8} \n"
153 "mla %[res], r0, r3, %[res] \n"
154 "mla %[res], r1, r4, %[res] \n"
155 "ldmia %[v1]!, {r0-r5} \n"
156 "mla %[res], r6, r0, %[res] \n"
157 "mla %[res], r7, r1, %[res] \n"
158 "mla %[res], r8, r2, %[res] \n"
159#if ORDER > 32
160 "ldmia %[v2]!, {r0-r2,r6,r7} \n"
161#else
162 "ldmia %[v2]!, {r0-r2} \n"
163#endif
164 "mla %[res], r0, r3, %[res] \n"
165 "mla %[res], r1, r4, %[res] \n"
166 "mla %[res], r2, r5, %[res] \n"
167#if ORDER > 32
168 "subs %[cnt], %[cnt], #1 \n"
169 "bne 1b \n"
170#endif
171
172#else /* ORDER <= 16 */
173 "ldmia %[v1]!, {r0-r3} \n" 169 "ldmia %[v1]!, {r0-r3} \n"
174 "ldmia %[v2]!, {r4-r7} \n" 170 "ldmia %[v2]!, {r4-r7} \n"
175 "mul %[res], r4, r0 \n" 171 "mul %[res], r4, r0 \n"
176 "mla %[res], r5, r1, %[res] \n" 172 "mla %[res], r5, r1, %[res] \n"
177 "mla %[res], r6, r2, %[res] \n" 173 "mla %[res], r6, r2, %[res] \n"
178 "mla %[res], r7, r3, %[res] \n" 174 "mla %[res], r7, r3, %[res] \n"
179 175#endif
180 ".rept 3 \n" 176 ".rept " BLOCK_REPEAT "\n"
181 "ldmia %[v1]!, {r0-r3} \n" 177 "ldmia %[v1]!, {r0-r3} \n"
182 "ldmia %[v2]!, {r4-r7} \n" 178 "ldmia %[v2]!, {r4-r7} \n"
183 "mla %[res], r4, r0, %[res] \n" 179 "mla %[res], r4, r0, %[res] \n"
@@ -185,7 +181,10 @@ static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
185 "mla %[res], r6, r2, %[res] \n" 181 "mla %[res], r6, r2, %[res] \n"
186 "mla %[res], r7, r3, %[res] \n" 182 "mla %[res], r7, r3, %[res] \n"
187 ".endr \n" 183 ".endr \n"
188#endif /* ORDER <= 16 */ 184#if ORDER > 32
185 "subs %[cnt], %[cnt], #1 \n"
186 "bne 1b \n"
187#endif
189 : /* outputs */ 188 : /* outputs */
190#if ORDER > 32 189#if ORDER > 32
191 [cnt]"+r"(cnt), 190 [cnt]"+r"(cnt),
@@ -197,9 +196,6 @@ static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
197 : /* clobbers */ 196 : /* clobbers */
198 "r0", "r1", "r2", "r3", 197 "r0", "r1", "r2", "r3",
199 "r4", "r5", "r6", "r7" 198 "r4", "r5", "r6", "r7"
200#if ORDER > 16
201 ,"r8"
202#endif
203 ); 199 );
204 return res; 200 return res;
205} 201}