summaryrefslogtreecommitdiff
path: root/apps/codecs/libwmapro/wmapro_math.h
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2010-07-28 20:46:51 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2010-07-28 20:46:51 +0000
commit17069799a935fc96367b7cfd7d5e3ec689bcef92 (patch)
treec738101524645e41b8b0bcd6d5f94e5c62538970 /apps/codecs/libwmapro/wmapro_math.h
parent3bb8020f787514cd853d17e6d5ee9df29b156e28 (diff)
downloadrockbox-17069799a935fc96367b7cfd7d5e3ec689bcef92.tar.gz
rockbox-17069799a935fc96367b7cfd7d5e3ec689bcef92.zip
Refactor asm macros in libwmapro's vector_fixmul_() functions. No change to output samples.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27604 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libwmapro/wmapro_math.h')
-rw-r--r--apps/codecs/libwmapro/wmapro_math.h108
1 files changed, 46 insertions, 62 deletions
diff --git a/apps/codecs/libwmapro/wmapro_math.h b/apps/codecs/libwmapro/wmapro_math.h
index c94fa80271..3672c0103b 100644
--- a/apps/codecs/libwmapro/wmapro_math.h
+++ b/apps/codecs/libwmapro/wmapro_math.h
@@ -181,35 +181,26 @@
181#endif /* CPU_COLDFIRE, CPU_ARM */ 181#endif /* CPU_COLDFIRE, CPU_ARM */
182 182
183#if defined(CPU_COLDFIRE) 183#if defined(CPU_COLDFIRE)
184static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, 184 #define VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj) \
185 const int32_t *src1, const int32_t *win, 185 asm volatile ( \
186 int len) 186 "mac.l %[s0], %[wj], %%acc0 \n\t" \
187{ 187 "msac.l %[s1], %[wi], %%acc0 \n\t" \
188 int i, j; 188 "mac.l %[s0], %[wi], %%acc1 \n\t" \
189 dst += len; 189 "mac.l %[s1], %[wj], %%acc1 \n\t" \
190 win += len; 190 "movclr.l %%acc0, %[s0] \n\t" \
191 src0+= len; 191 "move.l %[s0], (%[dst_i]) \n\t" \
192 for(i=-len, j=len-1; i<0; i++, j--) { 192 "movclr.l %%acc1, %[s0] \n\t" \
193 int32_t s0 = src0[i]; 193 "move.l %[s0], (%[dst_j]) \n\t" \
194 int32_t s1 = src1[j]; 194 : [s0] "+r" (s0) /* register is clobbered so specify it as an input */ \
195 int32_t wi = -win[i]; 195 : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]), \
196 int32_t wj = -win[j]; 196 [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj) \
197 asm volatile ( 197 : "cc", "memory");
198 "mac.l %[s0], %[wj], %%acc0\n\t"
199 "msac.l %[s1], %[wi], %%acc0\n\t"
200 "mac.l %[s0], %[wi], %%acc1\n\t"
201 "mac.l %[s1], %[wj], %%acc1\n\t"
202 "movclr.l %%acc0, %[s0]\n\t"
203 "move.l %[s0], (%[dst_i])\n\t"
204 "movclr.l %%acc1, %[s0]\n\t"
205 "move.l %[s0], (%[dst_j])\n\t"
206 : [s0] "+r" (s0) /* this register is clobbered so specify it as an input */
207 : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]),
208 [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj)
209 : "cc", "memory");
210 }
211}
212#else 198#else
199 #define VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj) \
200 dst[i] = fixmul31(s0, wj) - fixmul31(s1, wi); \
201 dst[j] = fixmul31(s0, wi) + fixmul31(s1, wj);
202#endif /* CPU_COLDFIRE */
203
213static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, 204static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
214 const int32_t *src1, const int32_t *win, 205 const int32_t *src1, const int32_t *win,
215 int len) 206 int len)
@@ -223,41 +214,38 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
223 int32_t s1 = src1[j]; /* s1 = src1[2*len-1 ... len] */ 214 int32_t s1 = src1[j]; /* s1 = src1[2*len-1 ... len] */
224 int32_t wi = -win[i]; /* wi = -win[ 0 ... len-1] */ 215 int32_t wi = -win[i]; /* wi = -win[ 0 ... len-1] */
225 int32_t wj = -win[j]; /* wj = -win[2*len-1 ... len] */ 216 int32_t wj = -win[j]; /* wj = -win[2*len-1 ... len] */
226 dst[i] = fixmul31(s0, wj) - fixmul31(s1, wi); /* dst[ 0 ... len-1] */ 217 VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj);
227 dst[j] = fixmul31(s0, wi) + fixmul31(s1, wj); /* dst[2*len-1 ... len] */
228 } 218 }
229} 219}
230#endif
231 220
232#if defined(CPU_ARM) 221#if defined(CPU_ARM)
233static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, 222 #define VECT_MUL_SCALAR_KERNEL(dst, src, mul) \
234 int32_t mul, int len) 223 asm volatile ( \
235{ 224 "ldmia %[src]!, {r1-r4} \n\t" \
236 /* len is _always_ a multiple of 4, because len is the difference of sfb's 225 "smull r0, r5, r1, %[mul] \n\t" \
237 * which themselves are always a multiple of 4. */ 226 "mov r0, r0, lsr #24 \n\t" \
238 int i; 227 "orr r0, r0, r5, lsl #8 \n\t" \
239 for (i=0; i<len; i+=4) { 228 "smull r1, r5, r2, %[mul] \n\t" \
240 asm volatile ( 229 "mov r1, r1, lsr #24 \n\t" \
241 "ldmia %[src]!, {r1-r4} \n\t" 230 "orr r1, r1, r5, lsl #8 \n\t" \
242 "smull r0, r5, r1, %[mul] \n\t" 231 "smull r2, r5, r3, %[mul] \n\t" \
243 "mov r0, r0, lsr #24 \n\t" 232 "mov r2, r2, lsr #24 \n\t" \
244 "orr r0, r0, r5, lsl #8 \n\t" 233 "orr r2, r2, r5, lsl #8 \n\t" \
245 "smull r1, r5, r2, %[mul] \n\t" 234 "smull r3, r5, r4, %[mul] \n\t" \
246 "mov r1, r1, lsr #24 \n\t" 235 "mov r3, r3, lsr #24 \n\t" \
247 "orr r1, r1, r5, lsl #8 \n\t" 236 "orr r3, r3, r5, lsl #8 \n\t" \
248 "smull r2, r5, r3, %[mul] \n\t" 237 "stmia %[dst]!, {r0-r3} \n" \
249 "mov r2, r2, lsr #24 \n\t" 238 : [dst]"+r"(dst), [src]"+r"(src) \
250 "orr r2, r2, r5, lsl #8 \n\t" 239 : [mul]"r"(mul) \
251 "smull r3, r5, r4, %[mul] \n\t"
252 "mov r3, r3, lsr #24 \n\t"
253 "orr r3, r3, r5, lsl #8 \n\t"
254 "stmia %[dst]!, {r0-r3} \n"
255 : [dst]"+r"(dst), [src]"+r"(src)
256 : [mul]"r"(mul)
257 : "r0", "r1", "r2", "r3", "r4", "r5", "memory"); 240 : "r0", "r1", "r2", "r3", "r4", "r5", "memory");
258 }
259}
260#else 241#else
242 #define VECT_MUL_SCALAR_KERNEL(dst, src, mul) \
243 dst[i ] = fixmul24(src[i ], mul); \
244 dst[i+1] = fixmul24(src[i+1], mul); \
245 dst[i+2] = fixmul24(src[i+2], mul); \
246 dst[i+3] = fixmul24(src[i+3], mul);
247#endif /* CPU_ARM */
248
261static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, 249static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src,
262 int32_t mul, int len) 250 int32_t mul, int len)
263{ 251{
@@ -265,13 +253,9 @@ static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src,
265 * which themselves are always a multiple of 4. */ 253 * which themselves are always a multiple of 4. */
266 int i; 254 int i;
267 for (i=0; i<len; i+=4) { 255 for (i=0; i<len; i+=4) {
268 dst[i ] = fixmul24(src[i ], mul); 256 VECT_MUL_SCALAR_KERNEL(dst, src, mul);
269 dst[i+1] = fixmul24(src[i+1], mul);
270 dst[i+2] = fixmul24(src[i+2], mul);
271 dst[i+3] = fixmul24(src[i+3], mul);
272 } 257 }
273} 258}
274#endif /* CPU_ARM */
275 259
276static inline int av_clip(int a, int amin, int amax) 260static inline int av_clip(int a, int amin, int amax)
277{ 261{