diff options
Diffstat (limited to 'apps/codecs/libwmapro/wmapro_math.h')
-rw-r--r-- | apps/codecs/libwmapro/wmapro_math.h | 66 |
1 files changed, 48 insertions, 18 deletions
diff --git a/apps/codecs/libwmapro/wmapro_math.h b/apps/codecs/libwmapro/wmapro_math.h index 6f8d6dbe0f..c94fa80271 100644 --- a/apps/codecs/libwmapro/wmapro_math.h +++ b/apps/codecs/libwmapro/wmapro_math.h | |||
@@ -180,7 +180,7 @@ | |||
180 | } | 180 | } |
181 | #endif /* CPU_COLDFIRE, CPU_ARM */ | 181 | #endif /* CPU_COLDFIRE, CPU_ARM */ |
182 | 182 | ||
183 | #ifdef CPU_COLDFIRE | 183 | #if defined(CPU_COLDFIRE) |
184 | static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, | 184 | static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, |
185 | const int32_t *src1, const int32_t *win, | 185 | const int32_t *src1, const int32_t *win, |
186 | int len) | 186 | int len) |
@@ -190,23 +190,23 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, | |||
190 | win += len; | 190 | win += len; |
191 | src0+= len; | 191 | src0+= len; |
192 | for(i=-len, j=len-1; i<0; i++, j--) { | 192 | for(i=-len, j=len-1; i<0; i++, j--) { |
193 | int32_t s0 = src0[i]; | 193 | int32_t s0 = src0[i]; |
194 | int32_t s1 = src1[j]; | 194 | int32_t s1 = src1[j]; |
195 | int32_t wi = -win[i]; | 195 | int32_t wi = -win[i]; |
196 | int32_t wj = -win[j]; | 196 | int32_t wj = -win[j]; |
197 | 197 | asm volatile ( | |
198 | asm volatile ("mac.l %[s0], %[wj], %%acc0\n\t" | 198 | "mac.l %[s0], %[wj], %%acc0\n\t" |
199 | "msac.l %[s1], %[wi], %%acc0\n\t" | 199 | "msac.l %[s1], %[wi], %%acc0\n\t" |
200 | "mac.l %[s0], %[wi], %%acc1\n\t" | 200 | "mac.l %[s0], %[wi], %%acc1\n\t" |
201 | "mac.l %[s1], %[wj], %%acc1\n\t" | 201 | "mac.l %[s1], %[wj], %%acc1\n\t" |
202 | "movclr.l %%acc0, %[s0]\n\t" | 202 | "movclr.l %%acc0, %[s0]\n\t" |
203 | "move.l %[s0], (%[dst_i])\n\t" | 203 | "move.l %[s0], (%[dst_i])\n\t" |
204 | "movclr.l %%acc1, %[s0]\n\t" | 204 | "movclr.l %%acc1, %[s0]\n\t" |
205 | "move.l %[s0], (%[dst_j])\n\t" | 205 | "move.l %[s0], (%[dst_j])\n\t" |
206 | : [s0] "+r" (s0) /* this register is clobbered so specify it as an input */ | 206 | : [s0] "+r" (s0) /* this register is clobbered so specify it as an input */ |
207 | : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]), | 207 | : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]), |
208 | [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj) | 208 | [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj) |
209 | : "cc", "memory"); | 209 | : "cc", "memory"); |
210 | } | 210 | } |
211 | } | 211 | } |
212 | #else | 212 | #else |
@@ -229,6 +229,35 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, | |||
229 | } | 229 | } |
230 | #endif | 230 | #endif |
231 | 231 | ||
232 | #if defined(CPU_ARM) | ||
233 | static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, | ||
234 | int32_t mul, int len) | ||
235 | { | ||
236 | /* len is _always_ a multiple of 4, because len is the difference of sfb's | ||
237 | * which themselves are always a multiple of 4. */ | ||
238 | int i; | ||
239 | for (i=0; i<len; i+=4) { | ||
240 | asm volatile ( | ||
241 | "ldmia %[src]!, {r1-r4} \n\t" | ||
242 | "smull r0, r5, r1, %[mul] \n\t" | ||
243 | "mov r0, r0, lsr #24 \n\t" | ||
244 | "orr r0, r0, r5, lsl #8 \n\t" | ||
245 | "smull r1, r5, r2, %[mul] \n\t" | ||
246 | "mov r1, r1, lsr #24 \n\t" | ||
247 | "orr r1, r1, r5, lsl #8 \n\t" | ||
248 | "smull r2, r5, r3, %[mul] \n\t" | ||
249 | "mov r2, r2, lsr #24 \n\t" | ||
250 | "orr r2, r2, r5, lsl #8 \n\t" | ||
251 | "smull r3, r5, r4, %[mul] \n\t" | ||
252 | "mov r3, r3, lsr #24 \n\t" | ||
253 | "orr r3, r3, r5, lsl #8 \n\t" | ||
254 | "stmia %[dst]!, {r0-r3} \n" | ||
255 | : [dst]"+r"(dst), [src]"+r"(src) | ||
256 | : [mul]"r"(mul) | ||
257 | : "r0", "r1", "r2", "r3", "r4", "r5", "memory"); | ||
258 | } | ||
259 | } | ||
260 | #else | ||
232 | static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, | 261 | static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, |
233 | int32_t mul, int len) | 262 | int32_t mul, int len) |
234 | { | 263 | { |
@@ -242,6 +271,7 @@ static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, | |||
242 | dst[i+3] = fixmul24(src[i+3], mul); | 271 | dst[i+3] = fixmul24(src[i+3], mul); |
243 | } | 272 | } |
244 | } | 273 | } |
274 | #endif /* CPU_ARM */ | ||
245 | 275 | ||
246 | static inline int av_clip(int a, int amin, int amax) | 276 | static inline int av_clip(int a, int amin, int amax) |
247 | { | 277 | { |