diff options
Diffstat (limited to 'apps/codecs/libwma/asm_arm.h')
-rw-r--r-- | apps/codecs/libwma/asm_arm.h | 343 |
1 files changed, 343 insertions, 0 deletions
diff --git a/apps/codecs/libwma/asm_arm.h b/apps/codecs/libwma/asm_arm.h new file mode 100644 index 0000000000..e0ab3e7e20 --- /dev/null +++ b/apps/codecs/libwma/asm_arm.h | |||
@@ -0,0 +1,343 @@ | |||
1 | /******************************************************************** | ||
2 | * * | ||
3 | * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. * | ||
4 | * * | ||
5 | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * | ||
6 | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * | ||
7 | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * | ||
8 | * * | ||
9 | * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 * | ||
10 | * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ * | ||
11 | * * | ||
12 | ******************************************************************** | ||
13 | |||
14 | function: arm7 and later wide math functions | ||
15 | |||
16 | ********************************************************************/ | ||
17 | #ifdef CPU_ARM | ||
18 | |||
19 | #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_) | ||
20 | #define _V_WIDE_MATH | ||
21 | |||
22 | static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { | ||
23 | int lo,hi; | ||
24 | asm volatile("smull\t%0, %1, %2, %3" | ||
25 | : "=&r"(lo),"=&r"(hi) | ||
26 | : "%r"(x),"r"(y) | ||
27 | : "cc"); | ||
28 | return(hi); | ||
29 | } | ||
30 | |||
31 | static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { | ||
32 | return MULT32(x,y)<<1; | ||
33 | } | ||
34 | |||
35 | static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { | ||
36 | int lo,hi; | ||
37 | asm volatile("smull %0, %1, %2, %3\n\t" | ||
38 | "movs %0, %0, lsr #15\n\t" | ||
39 | "adc %1, %0, %1, lsl #17\n\t" | ||
40 | : "=&r"(lo),"=&r"(hi) | ||
41 | : "%r"(x),"r"(y) | ||
42 | : "cc"); | ||
43 | return(hi); | ||
44 | } | ||
45 | |||
46 | #define MB() asm volatile ("" : : : "memory") | ||
47 | |||
48 | #define XPROD32(a, b, t, v, x, y) \ | ||
49 | { \ | ||
50 | long l; \ | ||
51 | asm( "smull %0, %1, %4, %6\n\t" \ | ||
52 | "smlal %0, %1, %5, %7\n\t" \ | ||
53 | "rsb %3, %4, #0\n\t" \ | ||
54 | "smull %0, %2, %5, %6\n\t" \ | ||
55 | "smlal %0, %2, %3, %7" \ | ||
56 | : "=&r" (l), "=&r" (x), "=&r" (y), "=r" ((a)) \ | ||
57 | : "3" ((a)), "r" ((b)), "r" ((t)), "r" ((v)) \ | ||
58 | : "cc" ); \ | ||
59 | } | ||
60 | |||
61 | static inline void XPROD31(ogg_int32_t a, ogg_int32_t b, | ||
62 | ogg_int32_t t, ogg_int32_t v, | ||
63 | ogg_int32_t *x, ogg_int32_t *y) | ||
64 | { | ||
65 | int x1, y1, l; | ||
66 | asm( "smull %0, %1, %4, %6\n\t" | ||
67 | "smlal %0, %1, %5, %7\n\t" | ||
68 | "rsb %3, %4, #0\n\t" | ||
69 | "smull %0, %2, %5, %6\n\t" | ||
70 | "smlal %0, %2, %3, %7" | ||
71 | : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a) | ||
72 | : "3" (a), "r" (b), "r" (t), "r" (v) | ||
73 | : "cc" ); | ||
74 | *x = x1 << 1; | ||
75 | MB(); | ||
76 | *y = y1 << 1; | ||
77 | } | ||
78 | |||
79 | static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b, | ||
80 | ogg_int32_t t, ogg_int32_t v, | ||
81 | ogg_int32_t *x, ogg_int32_t *y) | ||
82 | { | ||
83 | int x1, y1, l; | ||
84 | asm( "rsb %2, %4, #0\n\t" | ||
85 | "smull %0, %1, %3, %5\n\t" | ||
86 | "smlal %0, %1, %2, %6\n\t" | ||
87 | "smull %0, %2, %4, %5\n\t" | ||
88 | "smlal %0, %2, %3, %6" | ||
89 | : "=&r" (l), "=&r" (x1), "=&r" (y1) | ||
90 | : "r" (a), "r" (b), "r" (t), "r" (v) | ||
91 | : "cc" ); | ||
92 | *x = x1 << 1; | ||
93 | MB(); | ||
94 | *y = y1 << 1; | ||
95 | } | ||
96 | |||
97 | #ifndef _V_VECT_OPS | ||
98 | #define _V_VECT_OPS | ||
99 | |||
100 | /* asm versions of vector operations for block.c, window.c */ | ||
101 | static inline | ||
102 | void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) | ||
103 | { | ||
104 | while (n>=4) { | ||
105 | asm volatile ("ldmia %[x], {r0, r1, r2, r3};" | ||
106 | "ldmia %[y]!, {r4, r5, r6, r7};" | ||
107 | "add r0, r0, r4;" | ||
108 | "add r1, r1, r5;" | ||
109 | "add r2, r2, r6;" | ||
110 | "add r3, r3, r7;" | ||
111 | "stmia %[x]!, {r0, r1, r2, r3};" | ||
112 | : [x] "+r" (x), [y] "+r" (y) | ||
113 | : : "r0", "r1", "r2", "r3", | ||
114 | "r4", "r5", "r6", "r7", | ||
115 | "memory"); | ||
116 | n -= 4; | ||
117 | } | ||
118 | /* add final elements */ | ||
119 | while (n>0) { | ||
120 | *x++ += *y++; | ||
121 | n--; | ||
122 | } | ||
123 | } | ||
124 | |||
125 | static inline | ||
126 | void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) | ||
127 | { | ||
128 | while (n>=4) { | ||
129 | asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};" | ||
130 | "stmia %[x]!, {r0, r1, r2, r3};" | ||
131 | : [x] "+r" (x), [y] "+r" (y) | ||
132 | : : "r0", "r1", "r2", "r3", | ||
133 | "memory"); | ||
134 | n -= 4; | ||
135 | } | ||
136 | /* copy final elements */ | ||
137 | while (n>0) { | ||
138 | *x++ = *y++; | ||
139 | n--; | ||
140 | } | ||
141 | } | ||
142 | |||
143 | static inline | ||
144 | void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | ||
145 | { | ||
146 | while (n>=4) { | ||
147 | asm volatile ("ldmia %[d], {r0, r1, r2, r3};" | ||
148 | "ldmia %[w]!, {r4, r5, r6, r7};" | ||
149 | "smull r8, r9, r0, r4;" | ||
150 | "mov r0, r9, lsl #1;" | ||
151 | "smull r8, r9, r1, r5;" | ||
152 | "mov r1, r9, lsl #1;" | ||
153 | "smull r8, r9, r2, r6;" | ||
154 | "mov r2, r9, lsl #1;" | ||
155 | "smull r8, r9, r3, r7;" | ||
156 | "mov r3, r9, lsl #1;" | ||
157 | "stmia %[d]!, {r0, r1, r2, r3};" | ||
158 | : [d] "+r" (data), [w] "+r" (window) | ||
159 | : : "r0", "r1", "r2", "r3", | ||
160 | "r4", "r5", "r6", "r7", "r8", "r9", | ||
161 | "memory", "cc"); | ||
162 | n -= 4; | ||
163 | } | ||
164 | while(n>0) { | ||
165 | *data = MULT31(*data, *window); | ||
166 | data++; | ||
167 | window++; | ||
168 | n--; | ||
169 | } | ||
170 | } | ||
171 | |||
172 | static inline | ||
173 | void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) | ||
174 | { | ||
175 | while (n>=4) { | ||
176 | asm volatile ("ldmia %[d], {r0, r1, r2, r3};" | ||
177 | "ldmda %[w]!, {r4, r5, r6, r7};" | ||
178 | "smull r8, r9, r0, r7;" | ||
179 | "mov r0, r9, lsl #1;" | ||
180 | "smull r8, r9, r1, r6;" | ||
181 | "mov r1, r9, lsl #1;" | ||
182 | "smull r8, r9, r2, r5;" | ||
183 | "mov r2, r9, lsl #1;" | ||
184 | "smull r8, r9, r3, r4;" | ||
185 | "mov r3, r9, lsl #1;" | ||
186 | "stmia %[d]!, {r0, r1, r2, r3};" | ||
187 | : [d] "+r" (data), [w] "+r" (window) | ||
188 | : : "r0", "r1", "r2", "r3", | ||
189 | "r4", "r5", "r6", "r7", "r8", "r9", | ||
190 | "memory", "cc"); | ||
191 | n -= 4; | ||
192 | } | ||
193 | while(n>0) { | ||
194 | *data = MULT31(*data, *window); | ||
195 | data++; | ||
196 | window--; | ||
197 | n--; | ||
198 | } | ||
199 | } | ||
200 | |||
201 | #endif | ||
202 | |||
203 | #endif | ||
204 | |||
205 | #ifndef _V_CLIP_MATH | ||
206 | #define _V_CLIP_MATH | ||
207 | |||
208 | static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) { | ||
209 | int tmp; | ||
210 | asm volatile("subs %1, %0, #32768\n\t" | ||
211 | "movpl %0, #0x7f00\n\t" | ||
212 | "orrpl %0, %0, #0xff\n" | ||
213 | "adds %1, %0, #32768\n\t" | ||
214 | "movmi %0, #0x8000" | ||
215 | : "+r"(x),"=r"(tmp) | ||
216 | : | ||
217 | : "cc"); | ||
218 | return(x); | ||
219 | } | ||
220 | |||
221 | #endif | ||
222 | |||
223 | #ifndef _V_LSP_MATH_ASM | ||
224 | #define _V_LSP_MATH_ASM | ||
225 | |||
226 | static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip, | ||
227 | ogg_int32_t *qexpp, | ||
228 | ogg_int32_t *ilsp,ogg_int32_t wi, | ||
229 | ogg_int32_t m){ | ||
230 | |||
231 | ogg_uint32_t qi=*qip,pi=*pip; | ||
232 | ogg_int32_t qexp=*qexpp; | ||
233 | |||
234 | asm("mov r0,%3;" | ||
235 | "mov r1,%5,asr#1;" | ||
236 | "add r0,r0,r1,lsl#3;" | ||
237 | "1:" | ||
238 | |||
239 | "ldmdb r0!,{r1,r3};" | ||
240 | "subs r1,r1,%4;" //ilsp[j]-wi | ||
241 | "rsbmi r1,r1,#0;" //labs(ilsp[j]-wi) | ||
242 | "umull %0,r2,r1,%0;" //qi*=labs(ilsp[j]-wi) | ||
243 | |||
244 | "subs r1,r3,%4;" //ilsp[j+1]-wi | ||
245 | "rsbmi r1,r1,#0;" //labs(ilsp[j+1]-wi) | ||
246 | "umull %1,r3,r1,%1;" //pi*=labs(ilsp[j+1]-wi) | ||
247 | |||
248 | "cmn r2,r3;" // shift down 16? | ||
249 | "beq 0f;" | ||
250 | "add %2,%2,#16;" | ||
251 | "mov %0,%0,lsr #16;" | ||
252 | "orr %0,%0,r2,lsl #16;" | ||
253 | "mov %1,%1,lsr #16;" | ||
254 | "orr %1,%1,r3,lsl #16;" | ||
255 | "0:" | ||
256 | "cmp r0,%3;\n" | ||
257 | "bhi 1b;\n" | ||
258 | |||
259 | // odd filter assymetry | ||
260 | "ands r0,%5,#1;\n" | ||
261 | "beq 2f;\n" | ||
262 | "add r0,%3,%5,lsl#2;\n" | ||
263 | |||
264 | "ldr r1,[r0,#-4];\n" | ||
265 | "mov r0,#0x4000;\n" | ||
266 | |||
267 | "subs r1,r1,%4;\n" //ilsp[j]-wi | ||
268 | "rsbmi r1,r1,#0;\n" //labs(ilsp[j]-wi) | ||
269 | "umull %0,r2,r1,%0;\n" //qi*=labs(ilsp[j]-wi) | ||
270 | "umull %1,r3,r0,%1;\n" //pi*=labs(ilsp[j+1]-wi) | ||
271 | |||
272 | "cmn r2,r3;\n" // shift down 16? | ||
273 | "beq 2f;\n" | ||
274 | "add %2,%2,#16;\n" | ||
275 | "mov %0,%0,lsr #16;\n" | ||
276 | "orr %0,%0,r2,lsl #16;\n" | ||
277 | "mov %1,%1,lsr #16;\n" | ||
278 | "orr %1,%1,r3,lsl #16;\n" | ||
279 | |||
280 | //qi=(pi>>shift)*labs(ilsp[j]-wi); | ||
281 | //pi=(qi>>shift)*labs(ilsp[j+1]-wi); | ||
282 | //qexp+=shift; | ||
283 | |||
284 | //} | ||
285 | |||
286 | /* normalize to max 16 sig figs */ | ||
287 | "2:" | ||
288 | "mov r2,#0;" | ||
289 | "orr r1,%0,%1;" | ||
290 | "tst r1,#0xff000000;" | ||
291 | "addne r2,r2,#8;" | ||
292 | "movne r1,r1,lsr #8;" | ||
293 | "tst r1,#0x00f00000;" | ||
294 | "addne r2,r2,#4;" | ||
295 | "movne r1,r1,lsr #4;" | ||
296 | "tst r1,#0x000c0000;" | ||
297 | "addne r2,r2,#2;" | ||
298 | "movne r1,r1,lsr #2;" | ||
299 | "tst r1,#0x00020000;" | ||
300 | "addne r2,r2,#1;" | ||
301 | "movne r1,r1,lsr #1;" | ||
302 | "tst r1,#0x00010000;" | ||
303 | "addne r2,r2,#1;" | ||
304 | "mov %0,%0,lsr r2;" | ||
305 | "mov %1,%1,lsr r2;" | ||
306 | "add %2,%2,r2;" | ||
307 | |||
308 | : "+r"(qi),"+r"(pi),"+r"(qexp) | ||
309 | : "r"(ilsp),"r"(wi),"r"(m) | ||
310 | : "r0","r1","r2","r3","cc"); | ||
311 | |||
312 | *qip=qi; | ||
313 | *pip=pi; | ||
314 | *qexpp=qexp; | ||
315 | } | ||
316 | |||
317 | static inline void lsp_norm_asm(ogg_uint32_t *qip,ogg_int32_t *qexpp){ | ||
318 | |||
319 | ogg_uint32_t qi=*qip; | ||
320 | ogg_int32_t qexp=*qexpp; | ||
321 | |||
322 | asm("tst %0,#0x0000ff00;" | ||
323 | "moveq %0,%0,lsl #8;" | ||
324 | "subeq %1,%1,#8;" | ||
325 | "tst %0,#0x0000f000;" | ||
326 | "moveq %0,%0,lsl #4;" | ||
327 | "subeq %1,%1,#4;" | ||
328 | "tst %0,#0x0000c000;" | ||
329 | "moveq %0,%0,lsl #2;" | ||
330 | "subeq %1,%1,#2;" | ||
331 | "tst %0,#0x00008000;" | ||
332 | "moveq %0,%0,lsl #1;" | ||
333 | "subeq %1,%1,#1;" | ||
334 | : "+r"(qi),"+r"(qexp) | ||
335 | : | ||
336 | : "cc"); | ||
337 | *qip=qi; | ||
338 | *qexpp=qexp; | ||
339 | } | ||
340 | |||
341 | #endif | ||
342 | #endif | ||
343 | |||