diff options
Diffstat (limited to 'apps/codecs/libwma/wmafixed.h')
-rw-r--r-- | apps/codecs/libwma/wmafixed.h | 129 |
1 files changed, 129 insertions, 0 deletions
diff --git a/apps/codecs/libwma/wmafixed.h b/apps/codecs/libwma/wmafixed.h index 7f04a955ef..4225f165c9 100644 --- a/apps/codecs/libwma/wmafixed.h +++ b/apps/codecs/libwma/wmafixed.h | |||
@@ -106,3 +106,132 @@ static inline fixed32 fixmul32(fixed32 x, fixed32 y) | |||
106 | } | 106 | } |
107 | 107 | ||
108 | #endif | 108 | #endif |
109 | |||
110 | |||
111 | /* | ||
112 | * Helper functions for wma_window. | ||
113 | * | ||
114 | * | ||
115 | */ | ||
116 | |||
117 | #ifdef CPU_ARM | ||
118 | static inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, | ||
119 | const fixed32 *window, int n) | ||
120 | { | ||
121 | /* Block sizes are always power of two */ | ||
122 | asm volatile ( | ||
123 | "0:" | ||
124 | "ldmia %[d]!, {r0, r1};" | ||
125 | "ldmia %[w]!, {r4, r5};" | ||
126 | /* consume the first data and window value so we can use those | ||
127 | * registers again */ | ||
128 | "smull r8, r9, r0, r4;" | ||
129 | "ldmia %[dst], {r0, r4};" | ||
130 | "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/ | ||
131 | "smull r8, r9, r1, r5;" | ||
132 | "add r1, r4, r9, lsl #1;" | ||
133 | "stmia %[dst]!, {r0, r1};" | ||
134 | "subs %[n], %[n], #2;" | ||
135 | "bne 0b;" | ||
136 | : [d] "+r" (data), [w] "+r" (window), [dst] "+r" (dst), [n] "+r" (n) | ||
137 | : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc"); | ||
138 | } | ||
139 | |||
140 | static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, | ||
141 | int len) | ||
142 | { | ||
143 | /* Block sizes are always power of two */ | ||
144 | asm volatile ( | ||
145 | "add %[s1], %[s1], %[n], lsl #2;" | ||
146 | "0:" | ||
147 | "ldmia %[s0]!, {r0, r1};" | ||
148 | "ldmdb %[s1]!, {r4, r5};" | ||
149 | "smull r8, r9, r0, r5;" | ||
150 | "mov r0, r9, lsl #1;" | ||
151 | "smull r8, r9, r1, r4;" | ||
152 | "mov r1, r9, lsl #1;" | ||
153 | "stmia %[dst]!, {r0, r1};" | ||
154 | "subs %[n], %[n], #2;" | ||
155 | "bne 0b;" | ||
156 | : [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len) | ||
157 | : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc"); | ||
158 | } | ||
159 | |||
160 | #elif defined(CPU_COLDFIRE) | ||
161 | |||
162 | static inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, | ||
163 | const fixed32 *window, int n) | ||
164 | { | ||
165 | /* Block sizes are always power of two. Smallest block is always way bigger | ||
166 | * than four too.*/ | ||
167 | asm volatile ( | ||
168 | "0:" | ||
169 | "movem.l (%[d]), %%d0-%%d3;" | ||
170 | "movem.l (%[w]), %%d4-%%d5/%%a0-%%a1;" | ||
171 | "mac.l %%d0, %%d4, %%acc0;" | ||
172 | "mac.l %%d1, %%d5, %%acc1;" | ||
173 | "mac.l %%d2, %%a0, %%acc2;" | ||
174 | "mac.l %%d3, %%a1, %%acc3;" | ||
175 | "lea.l (16, %[d]), %[d];" | ||
176 | "lea.l (16, %[w]), %[w];" | ||
177 | "movclr.l %%acc0, %%d0;" | ||
178 | "movclr.l %%acc1, %%d1;" | ||
179 | "movclr.l %%acc2, %%d2;" | ||
180 | "movclr.l %%acc3, %%d3;" | ||
181 | "movem.l (%[dst]), %%d4-%%d5/%%a0-%%a1;" | ||
182 | "add.l %%d4, %%d0;" | ||
183 | "add.l %%d5, %%d1;" | ||
184 | "add.l %%a0, %%d2;" | ||
185 | "add.l %%a1, %%d3;" | ||
186 | "movem.l %%d0-%%d3, (%[dst]);" | ||
187 | "lea.l (16, %[dst]), %[dst];" | ||
188 | "subq.l #4, %[n];" | ||
189 | "jne 0b;" | ||
190 | : [d] "+a" (data), [w] "+a" (window), [dst] "+a" (dst), [n] "+d" (n) | ||
191 | : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc"); | ||
192 | } | ||
193 | |||
194 | static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, | ||
195 | int len) | ||
196 | { | ||
197 | /* Block sizes are always power of two. Smallest block is always way bigger | ||
198 | * than four too.*/ | ||
199 | asm volatile ( | ||
200 | "lea.l (-16, %[s1], %[n]*4), %[s1];" | ||
201 | "0:" | ||
202 | "movem.l (%[s0]), %%d0-%%d3;" | ||
203 | "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;" | ||
204 | "mac.l %%d0, %%a1, %%acc0;" | ||
205 | "mac.l %%d1, %%a0, %%acc1;" | ||
206 | "mac.l %%d2, %%d5, %%acc2;" | ||
207 | "mac.l %%d3, %%d4, %%acc3;" | ||
208 | "lea.l (16, %[s0]), %[s0];" | ||
209 | "lea.l (-16, %[s1]), %[s1];" | ||
210 | "movclr.l %%acc0, %%d0;" | ||
211 | "movclr.l %%acc1, %%d1;" | ||
212 | "movclr.l %%acc2, %%d2;" | ||
213 | "movclr.l %%acc3, %%d3;" | ||
214 | "movem.l %%d0-%%d3, (%[dst]);" | ||
215 | "lea.l (16, %[dst]), %[dst];" | ||
216 | "subq.l #4, %[n];" | ||
217 | "jne 0b;" | ||
218 | : [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len) | ||
219 | : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc"); | ||
220 | } | ||
221 | |||
222 | #else | ||
223 | |||
224 | static inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){ | ||
225 | int i; | ||
226 | for(i=0; i<len; i++) | ||
227 | dst[i] = fixmul32b(src0[i], src1[i]) + dst[i]; | ||
228 | } | ||
229 | |||
230 | static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){ | ||
231 | int i; | ||
232 | src1 += len-1; | ||
233 | for(i=0; i<len; i++) | ||
234 | dst[i] = fixmul32b(src0[i], src1[-i]); | ||
235 | } | ||
236 | |||
237 | #endif | ||