diff options
author | Andree Buschmann <AndreeBuschmann@t-online.de> | 2010-07-28 20:46:51 +0000 |
---|---|---|
committer | Andree Buschmann <AndreeBuschmann@t-online.de> | 2010-07-28 20:46:51 +0000 |
commit | 17069799a935fc96367b7cfd7d5e3ec689bcef92 (patch) | |
tree | c738101524645e41b8b0bcd6d5f94e5c62538970 /apps/codecs | |
parent | 3bb8020f787514cd853d17e6d5ee9df29b156e28 (diff) | |
download | rockbox-17069799a935fc96367b7cfd7d5e3ec689bcef92.tar.gz rockbox-17069799a935fc96367b7cfd7d5e3ec689bcef92.zip |
Refactor asm macros in libwmapro's vector_fixmul_() functions. No change to output samples.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27604 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
-rw-r--r-- | apps/codecs/libwmapro/wmapro_math.h | 108 |
1 files changed, 46 insertions, 62 deletions
diff --git a/apps/codecs/libwmapro/wmapro_math.h b/apps/codecs/libwmapro/wmapro_math.h index c94fa80271..3672c0103b 100644 --- a/apps/codecs/libwmapro/wmapro_math.h +++ b/apps/codecs/libwmapro/wmapro_math.h | |||
@@ -181,35 +181,26 @@ | |||
181 | #endif /* CPU_COLDFIRE, CPU_ARM */ | 181 | #endif /* CPU_COLDFIRE, CPU_ARM */ |
182 | 182 | ||
183 | #if defined(CPU_COLDFIRE) | 183 | #if defined(CPU_COLDFIRE) |
184 | static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, | 184 | #define VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj) \ |
185 | const int32_t *src1, const int32_t *win, | 185 | asm volatile ( \ |
186 | int len) | 186 | "mac.l %[s0], %[wj], %%acc0 \n\t" \ |
187 | { | 187 | "msac.l %[s1], %[wi], %%acc0 \n\t" \ |
188 | int i, j; | 188 | "mac.l %[s0], %[wi], %%acc1 \n\t" \ |
189 | dst += len; | 189 | "mac.l %[s1], %[wj], %%acc1 \n\t" \ |
190 | win += len; | 190 | "movclr.l %%acc0, %[s0] \n\t" \ |
191 | src0+= len; | 191 | "move.l %[s0], (%[dst_i]) \n\t" \ |
192 | for(i=-len, j=len-1; i<0; i++, j--) { | 192 | "movclr.l %%acc1, %[s0] \n\t" \ |
193 | int32_t s0 = src0[i]; | 193 | "move.l %[s0], (%[dst_j]) \n\t" \ |
194 | int32_t s1 = src1[j]; | 194 | : [s0] "+r" (s0) /* register is clobbered so specify it as an input */ \ |
195 | int32_t wi = -win[i]; | 195 | : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]), \ |
196 | int32_t wj = -win[j]; | 196 | [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj) \ |
197 | asm volatile ( | 197 | : "cc", "memory"); |
198 | "mac.l %[s0], %[wj], %%acc0\n\t" | ||
199 | "msac.l %[s1], %[wi], %%acc0\n\t" | ||
200 | "mac.l %[s0], %[wi], %%acc1\n\t" | ||
201 | "mac.l %[s1], %[wj], %%acc1\n\t" | ||
202 | "movclr.l %%acc0, %[s0]\n\t" | ||
203 | "move.l %[s0], (%[dst_i])\n\t" | ||
204 | "movclr.l %%acc1, %[s0]\n\t" | ||
205 | "move.l %[s0], (%[dst_j])\n\t" | ||
206 | : [s0] "+r" (s0) /* this register is clobbered so specify it as an input */ | ||
207 | : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]), | ||
208 | [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj) | ||
209 | : "cc", "memory"); | ||
210 | } | ||
211 | } | ||
212 | #else | 198 | #else |
199 | #define VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj) \ | ||
200 | dst[i] = fixmul31(s0, wj) - fixmul31(s1, wi); \ | ||
201 | dst[j] = fixmul31(s0, wi) + fixmul31(s1, wj); | ||
202 | #endif /* CPU_COLDFIRE */ | ||
203 | |||
213 | static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, | 204 | static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, |
214 | const int32_t *src1, const int32_t *win, | 205 | const int32_t *src1, const int32_t *win, |
215 | int len) | 206 | int len) |
@@ -223,41 +214,38 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, | |||
223 | int32_t s1 = src1[j]; /* s1 = src1[2*len-1 ... len] */ | 214 | int32_t s1 = src1[j]; /* s1 = src1[2*len-1 ... len] */ |
224 | int32_t wi = -win[i]; /* wi = -win[ 0 ... len-1] */ | 215 | int32_t wi = -win[i]; /* wi = -win[ 0 ... len-1] */ |
225 | int32_t wj = -win[j]; /* wj = -win[2*len-1 ... len] */ | 216 | int32_t wj = -win[j]; /* wj = -win[2*len-1 ... len] */ |
226 | dst[i] = fixmul31(s0, wj) - fixmul31(s1, wi); /* dst[ 0 ... len-1] */ | 217 | VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj); |
227 | dst[j] = fixmul31(s0, wi) + fixmul31(s1, wj); /* dst[2*len-1 ... len] */ | ||
228 | } | 218 | } |
229 | } | 219 | } |
230 | #endif | ||
231 | 220 | ||
232 | #if defined(CPU_ARM) | 221 | #if defined(CPU_ARM) |
233 | static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, | 222 | #define VECT_MUL_SCALAR_KERNEL(dst, src, mul) \ |
234 | int32_t mul, int len) | 223 | asm volatile ( \ |
235 | { | 224 | "ldmia %[src]!, {r1-r4} \n\t" \ |
236 | /* len is _always_ a multiple of 4, because len is the difference of sfb's | 225 | "smull r0, r5, r1, %[mul] \n\t" \ |
237 | * which themselves are always a multiple of 4. */ | 226 | "mov r0, r0, lsr #24 \n\t" \ |
238 | int i; | 227 | "orr r0, r0, r5, lsl #8 \n\t" \ |
239 | for (i=0; i<len; i+=4) { | 228 | "smull r1, r5, r2, %[mul] \n\t" \ |
240 | asm volatile ( | 229 | "mov r1, r1, lsr #24 \n\t" \ |
241 | "ldmia %[src]!, {r1-r4} \n\t" | 230 | "orr r1, r1, r5, lsl #8 \n\t" \ |
242 | "smull r0, r5, r1, %[mul] \n\t" | 231 | "smull r2, r5, r3, %[mul] \n\t" \ |
243 | "mov r0, r0, lsr #24 \n\t" | 232 | "mov r2, r2, lsr #24 \n\t" \ |
244 | "orr r0, r0, r5, lsl #8 \n\t" | 233 | "orr r2, r2, r5, lsl #8 \n\t" \ |
245 | "smull r1, r5, r2, %[mul] \n\t" | 234 | "smull r3, r5, r4, %[mul] \n\t" \ |
246 | "mov r1, r1, lsr #24 \n\t" | 235 | "mov r3, r3, lsr #24 \n\t" \ |
247 | "orr r1, r1, r5, lsl #8 \n\t" | 236 | "orr r3, r3, r5, lsl #8 \n\t" \ |
248 | "smull r2, r5, r3, %[mul] \n\t" | 237 | "stmia %[dst]!, {r0-r3} \n" \ |
249 | "mov r2, r2, lsr #24 \n\t" | 238 | : [dst]"+r"(dst), [src]"+r"(src) \ |
250 | "orr r2, r2, r5, lsl #8 \n\t" | 239 | : [mul]"r"(mul) \ |
251 | "smull r3, r5, r4, %[mul] \n\t" | ||
252 | "mov r3, r3, lsr #24 \n\t" | ||
253 | "orr r3, r3, r5, lsl #8 \n\t" | ||
254 | "stmia %[dst]!, {r0-r3} \n" | ||
255 | : [dst]"+r"(dst), [src]"+r"(src) | ||
256 | : [mul]"r"(mul) | ||
257 | : "r0", "r1", "r2", "r3", "r4", "r5", "memory"); | 240 | : "r0", "r1", "r2", "r3", "r4", "r5", "memory"); |
258 | } | ||
259 | } | ||
260 | #else | 241 | #else |
242 | #define VECT_MUL_SCALAR_KERNEL(dst, src, mul) \ | ||
243 | dst[i ] = fixmul24(src[i ], mul); \ | ||
244 | dst[i+1] = fixmul24(src[i+1], mul); \ | ||
245 | dst[i+2] = fixmul24(src[i+2], mul); \ | ||
246 | dst[i+3] = fixmul24(src[i+3], mul); | ||
247 | #endif /* CPU_ARM */ | ||
248 | |||
261 | static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, | 249 | static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, |
262 | int32_t mul, int len) | 250 | int32_t mul, int len) |
263 | { | 251 | { |
@@ -265,13 +253,9 @@ static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, | |||
265 | * which themselves are always a multiple of 4. */ | 253 | * which themselves are always a multiple of 4. */ |
266 | int i; | 254 | int i; |
267 | for (i=0; i<len; i+=4) { | 255 | for (i=0; i<len; i+=4) { |
268 | dst[i ] = fixmul24(src[i ], mul); | 256 | VECT_MUL_SCALAR_KERNEL(dst, src, mul); |
269 | dst[i+1] = fixmul24(src[i+1], mul); | ||
270 | dst[i+2] = fixmul24(src[i+2], mul); | ||
271 | dst[i+3] = fixmul24(src[i+3], mul); | ||
272 | } | 257 | } |
273 | } | 258 | } |
274 | #endif /* CPU_ARM */ | ||
275 | 259 | ||
276 | static inline int av_clip(int a, int amin, int amax) | 260 | static inline int av_clip(int a, int amin, int amax) |
277 | { | 261 | { |