diff options
Diffstat (limited to 'lib/rbcodec/codecs/libwmapro/wmapro_math.h')
-rw-r--r-- | lib/rbcodec/codecs/libwmapro/wmapro_math.h | 250 |
1 files changed, 250 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libwmapro/wmapro_math.h b/lib/rbcodec/codecs/libwmapro/wmapro_math.h new file mode 100644 index 0000000000..c78d6b627f --- /dev/null +++ b/lib/rbcodec/codecs/libwmapro/wmapro_math.h | |||
@@ -0,0 +1,250 @@ | |||
1 | #ifndef _WMAPRO_MATH_H_ | ||
2 | #define _WMAPRO_MATH_H_ | ||
3 | |||
4 | #include <inttypes.h> | ||
5 | |||
6 | /* rockbox: not used | ||
7 | #define fixtof16(x) (float)((float)(x) / (float)(1 << 16)) | ||
8 | #define fixtof31(x) (float)((float)(x) / (float)(1 << 31)) | ||
9 | #define ftofix16(x) ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5:0.5))) | ||
10 | #define ftofix31(x) ((int32_t)((x) * (float)(1 << 31) + ((x) < 0 ? -0.5:0.5))) | ||
11 | */ | ||
12 | |||
13 | #if defined(CPU_ARM) | ||
14 | /* Calculates: result = (X*Y)>>Z */ | ||
15 | #define fixmulshift(X,Y,Z) \ | ||
16 | ({ \ | ||
17 | int32_t lo; \ | ||
18 | int32_t hi; \ | ||
19 | asm volatile ( \ | ||
20 | "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \ | ||
21 | "mov %[lo], %[lo], lsr %[shr] \n\t" /* lo >>= Z */ \ | ||
22 | "orr %[lo], %[lo], %[hi], lsl %[shl]" /* lo |= (hi << (32-Z)) */ \ | ||
23 | : [lo]"=&r"(lo), [hi]"=&r"(hi) \ | ||
24 | : [x]"r"(X), [y]"r"(Y), [shr]"r"(Z), [shl]"r"(32-Z)); \ | ||
25 | lo; \ | ||
26 | }) | ||
27 | |||
28 | /* Calculates: result = (X*Y)>>16 */ | ||
29 | #define fixmul16(X,Y) \ | ||
30 | ({ \ | ||
31 | int32_t lo; \ | ||
32 | int32_t hi; \ | ||
33 | asm volatile ( \ | ||
34 | "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \ | ||
35 | "mov %[lo], %[lo], lsr #16 \n\t" /* lo >>= 16 */ \ | ||
36 | "orr %[lo], %[lo], %[hi], lsl #16" /* lo |= (hi << 16) */ \ | ||
37 | : [lo]"=&r"(lo), [hi]"=&r"(hi) \ | ||
38 | : [x]"r"(X), [y]"r"(Y)); \ | ||
39 | lo; \ | ||
40 | }) | ||
41 | |||
42 | /* Calculates: result = (X*Y)>>24 */ | ||
43 | #define fixmul24(X,Y) \ | ||
44 | ({ \ | ||
45 | int32_t lo; \ | ||
46 | int32_t hi; \ | ||
47 | asm volatile ( \ | ||
48 | "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \ | ||
49 | "mov %[lo], %[lo], lsr #24 \n\t" /* lo >>= 24 */ \ | ||
50 | "orr %[lo], %[lo], %[hi], lsl #8" /* lo |= (hi << 8) */ \ | ||
51 | : [lo]"=&r"(lo), [hi]"=&r"(hi) \ | ||
52 | : [x]"r"(X), [y]"r"(Y)); \ | ||
53 | lo; \ | ||
54 | }) | ||
55 | |||
56 | /* Calculates: result = (X*Y)>>31, loose 1 bit precision */ | ||
57 | #define fixmul31(X,Y) \ | ||
58 | ({ \ | ||
59 | int32_t lo; \ | ||
60 | int32_t hi; \ | ||
61 | asm volatile ( \ | ||
62 | "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \ | ||
63 | "mov %[hi], %[hi], lsl #1" /* hi <<= 1 */ \ | ||
64 | : [lo]"=&r"(lo), [hi]"=&r"(hi) \ | ||
65 | : [x]"r"(X), [y]"r"(Y)); \ | ||
66 | hi; \ | ||
67 | }) | ||
68 | #elif defined(CPU_COLDFIRE) | ||
69 | /* Calculates: result = (X*Y)>>Z */ | ||
70 | #define fixmulshift(X,Y,Z) \ | ||
71 | ({ \ | ||
72 | int32_t t1; \ | ||
73 | int32_t t2; \ | ||
74 | asm volatile ( \ | ||
75 | "mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \ | ||
76 | "mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \ | ||
77 | "movclr.l %%acc0,%[t1] \n\t" /* get higher half */ \ | ||
78 | "moveq.l #31,%[t2] \n\t" \ | ||
79 | "sub.l %[sh],%[t2] \n\t" /* t2 = 31 - shift */ \ | ||
80 | "ble.s 1f \n\t" \ | ||
81 | "asl.l %[t2],%[t1] \n\t" /* hi <<= 31 - shift */ \ | ||
82 | "lsr.l %[sh],%[x] \n\t" /* (unsigned)lo >>= shift */ \ | ||
83 | "or.l %[x],%[t1] \n\t" /* combine result */ \ | ||
84 | "bra.s 2f \n\t" \ | ||
85 | "1: \n\t" \ | ||
86 | "neg.l %[t2] \n\t" /* t2 = shift - 31 */ \ | ||
87 | "asr.l %[t2],%[t1] \n\t" /* hi >>= t2 */ \ | ||
88 | "2: \n" \ | ||
89 | : [t1]"=&d"(t1), [t2]"=&d"(t2) \ | ||
90 | : [x] "d"((X)), [y] "d"((Y)), [sh]"d"((Z))); \ | ||
91 | t1; \ | ||
92 | }) | ||
93 | |||
94 | /* Calculates: result = (X*Y)>>16 */ | ||
95 | #define fixmul16(X,Y) \ | ||
96 | ({ \ | ||
97 | int32_t t, x = (X); \ | ||
98 | asm volatile ( \ | ||
99 | "mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \ | ||
100 | "mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \ | ||
101 | "movclr.l %%acc0,%[t] \n\t" /* get higher half */ \ | ||
102 | "lsr.l #1,%[t] \n\t" /* hi >>= 1 to compensate emac shift */ \ | ||
103 | "move.w %[t],%[x] \n\t" /* combine halfwords */\ | ||
104 | "swap %[x] \n\t" \ | ||
105 | : [t]"=&d"(t), [x] "+d" (x) \ | ||
106 | : [y] "d" ((Y))); \ | ||
107 | x; \ | ||
108 | }) | ||
109 | |||
110 | /* Calculates: result = (X*Y)>>31 (may lose msb to overflow) */ | ||
111 | #define fixmul31(X,Y) \ | ||
112 | ({ \ | ||
113 | int32_t t; \ | ||
114 | asm volatile ( \ | ||
115 | "mac.l %[x], %[y], %%acc0\n\t" /* multiply */ \ | ||
116 | "movclr.l %%acc0, %[t]\n\t" /* get higher half as result */ \ | ||
117 | : [t] "=d" (t) \ | ||
118 | : [x] "r" ((X)), [y] "r" ((Y))); \ | ||
119 | t; \ | ||
120 | }) | ||
121 | #else | ||
122 | static inline int32_t fixmulshift(int32_t x, int32_t y, int shamt) | ||
123 | { | ||
124 | int64_t temp; | ||
125 | temp = x; | ||
126 | temp *= y; | ||
127 | |||
128 | temp >>= shamt; | ||
129 | |||
130 | return (int32_t)temp; | ||
131 | } | ||
132 | |||
133 | static inline int32_t fixmul31(int32_t x, int32_t y) | ||
134 | { | ||
135 | int64_t temp; | ||
136 | temp = x; | ||
137 | temp *= y; | ||
138 | |||
139 | temp >>= 31; | ||
140 | |||
141 | return (int32_t)temp; | ||
142 | } | ||
143 | |||
144 | static inline int32_t fixmul24(int32_t x, int32_t y) | ||
145 | { | ||
146 | int64_t temp; | ||
147 | temp = x; | ||
148 | temp *= y; | ||
149 | |||
150 | temp >>= 24; | ||
151 | |||
152 | return (int32_t)temp; | ||
153 | } | ||
154 | |||
155 | static inline int32_t fixmul16(int32_t x, int32_t y) | ||
156 | { | ||
157 | int64_t temp; | ||
158 | temp = x; | ||
159 | temp *= y; | ||
160 | |||
161 | temp >>= 16; | ||
162 | |||
163 | return (int32_t)temp; | ||
164 | } | ||
165 | #endif /* CPU_COLDFIRE, CPU_ARM */ | ||
166 | |||
167 | #if defined(CPU_COLDFIRE) | ||
168 | #define VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj) \ | ||
169 | asm volatile ( \ | ||
170 | "mac.l %[s0], %[wj], %%acc0 \n\t" \ | ||
171 | "msac.l %[s1], %[wi], %%acc0 \n\t" \ | ||
172 | "mac.l %[s0], %[wi], %%acc1 \n\t" \ | ||
173 | "mac.l %[s1], %[wj], %%acc1 \n\t" \ | ||
174 | "movclr.l %%acc0, %[s0] \n\t" \ | ||
175 | "move.l %[s0], (%[dst_i]) \n\t" \ | ||
176 | "movclr.l %%acc1, %[s0] \n\t" \ | ||
177 | "move.l %[s0], (%[dst_j]) \n\t" \ | ||
178 | : [s0] "+r" (s0) /* register is clobbered so specify it as an input */ \ | ||
179 | : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]), \ | ||
180 | [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj) \ | ||
181 | : "cc", "memory"); | ||
182 | #else | ||
183 | #define VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj) \ | ||
184 | dst[i] = fixmul31(wj, s0) - fixmul31(wi, s1); \ | ||
185 | dst[j] = fixmul31(wi, s0) + fixmul31(wj, s1); | ||
186 | #endif /* CPU_COLDFIRE */ | ||
187 | |||
188 | static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, | ||
189 | const int32_t *src1, const int32_t *win, | ||
190 | int len) | ||
191 | { | ||
192 | int i, j; | ||
193 | dst += len; | ||
194 | win += len; | ||
195 | src0+= len; | ||
196 | for(i=-len, j=len-1; i<0; i++, j--) { | ||
197 | int32_t s0 = src0[i]; /* s0 = src0[ 0 ... len-1] */ | ||
198 | int32_t s1 = src1[j]; /* s1 = src1[2*len-1 ... len] */ | ||
199 | int32_t wi = -win[i]; /* wi = -win[ 0 ... len-1] */ | ||
200 | int32_t wj = -win[j]; /* wj = -win[2*len-1 ... len] */ | ||
201 | VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj); | ||
202 | } | ||
203 | } | ||
204 | |||
205 | #if defined(CPU_ARM) | ||
206 | #define VECT_MUL_SCALAR_KERNEL(dst, src, mul) \ | ||
207 | asm volatile ( \ | ||
208 | "ldmia %[src]!, {r1-r4} \n\t" \ | ||
209 | "smull r0, r5, r1, %[mul] \n\t" \ | ||
210 | "mov r0, r0, lsr #16 \n\t" \ | ||
211 | "orr r0, r0, r5, lsl #16\n\t" \ | ||
212 | "smull r1, r5, r2, %[mul] \n\t" \ | ||
213 | "mov r1, r1, lsr #16 \n\t" \ | ||
214 | "orr r1, r1, r5, lsl #16\n\t" \ | ||
215 | "smull r2, r5, r3, %[mul] \n\t" \ | ||
216 | "mov r2, r2, lsr #16 \n\t" \ | ||
217 | "orr r2, r2, r5, lsl #16\n\t" \ | ||
218 | "smull r3, r5, r4, %[mul] \n\t" \ | ||
219 | "mov r3, r3, lsr #16 \n\t" \ | ||
220 | "orr r3, r3, r5, lsl #16\n\t" \ | ||
221 | "stmia %[dst]!, {r0-r3} \n" \ | ||
222 | : [dst]"+r"(dst), [src]"+r"(src) \ | ||
223 | : [mul]"r"(mul) \ | ||
224 | : "r0", "r1", "r2", "r3", "r4", "r5", "memory"); | ||
225 | #else | ||
226 | #define VECT_MUL_SCALAR_KERNEL(dst, src, mul) \ | ||
227 | dst[i ] = fixmul16(src[i ], mul); \ | ||
228 | dst[i+1] = fixmul16(src[i+1], mul); \ | ||
229 | dst[i+2] = fixmul16(src[i+2], mul); \ | ||
230 | dst[i+3] = fixmul16(src[i+3], mul); | ||
231 | #endif /* CPU_ARM, CPU_COLDFIRE */ | ||
232 | |||
233 | static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, | ||
234 | int32_t mul, int len) | ||
235 | { | ||
236 | /* len is _always_ a multiple of 4, because len is the difference of sfb's | ||
237 | * which themselves are always a multiple of 4. */ | ||
238 | int i; | ||
239 | for (i=0; i<len; i+=4) { | ||
240 | VECT_MUL_SCALAR_KERNEL(dst, src, mul); | ||
241 | } | ||
242 | } | ||
243 | |||
244 | static inline int av_clip(int a, int amin, int amax) | ||
245 | { | ||
246 | if (a < amin) return amin; | ||
247 | else if (a > amax) return amax; | ||
248 | else return a; | ||
249 | } | ||
250 | #endif /* _WMAPRO_MATH_H_ */ | ||