summaryrefslogtreecommitdiff
path: root/apps/codecs/lib/asm_arm.h
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/lib/asm_arm.h')
-rw-r--r--apps/codecs/lib/asm_arm.h292
1 files changed, 0 insertions, 292 deletions
diff --git a/apps/codecs/lib/asm_arm.h b/apps/codecs/lib/asm_arm.h
deleted file mode 100644
index 8e5d0e68df..0000000000
--- a/apps/codecs/lib/asm_arm.h
+++ /dev/null
@@ -1,292 +0,0 @@
1/********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
4 * *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
11 * *
12 ********************************************************************
13
14 function: arm7 and later wide math functions
15
16 ********************************************************************/
17#ifdef CPU_ARM
18
19#define INCL_OPTIMIZED_MULT32
20#if ARM_ARCH >= 6
21static inline int32_t MULT32(int32_t x, int32_t y) {
22 int32_t hi;
23 asm volatile("smmul %[hi], %[x], %[y] \n\t"
24 : [hi] "=&r" (hi)
25 : [x] "r" (x), [y] "r" (y) );
26 return(hi);
27}
28#else
29static inline int32_t MULT32(int32_t x, int32_t y) {
30 int32_t lo, hi;
31 asm volatile("smull\t%0, %1, %2, %3 \n\t"
32 : "=&r"(lo),"=&r"(hi)
33 : "r"(x),"r"(y) );
34 return(hi);
35}
36#endif
37
38#define INCL_OPTIMIZED_MULT31
39static inline int32_t MULT31(int32_t x, int32_t y) {
40 return MULT32(x,y)<<1;
41}
42
43#define INCL_OPTIMIZED_MULT31_SHIFT15
44static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
45 int32_t lo,hi;
46 asm volatile("smull %0, %1, %2, %3\n\t"
47 "movs %0, %0, lsr #15\n\t"
48 "adc %1, %0, %1, lsl #17\n\t"
49 : "=&r"(lo),"=&r"(hi)
50 : "r"(x),"r"(y)
51 : "cc" );
52 return(hi);
53}
54
55#define INCL_OPTIMIZED_MULT31_SHIFT16
56static inline int32_t MULT31_SHIFT16(int32_t x, int32_t y) {
57 int32_t lo,hi;
58 asm volatile("smull %0, %1, %2, %3\n\t"
59 "movs %0, %0, lsr #16\n\t"
60 "adc %1, %0, %1, lsl #16\n\t"
61 : "=&r"(lo),"=&r"(hi)
62 : "r"(x),"r"(y)
63 : "cc" );
64 return(hi);
65}
66
67#define INCL_OPTIMIZED_XPROD32
68#define XPROD32(a, b, t, v, x, y) \
69{ \
70 int32_t l; \
71 asm("smull %0, %1, %3, %5\n\t" \
72 "rsb %2, %6, #0\n\t" \
73 "smlal %0, %1, %4, %6\n\t" \
74 "smull %0, %2, %3, %2\n\t" \
75 "smlal %0, %2, %4, %5" \
76 : "=&r" (l), "=&r" (x), "=&r" (y) \
77 : "r" ((a)), "r" ((b)), "r" ((t)), "r" ((v)) ); \
78}
79
80#define INCL_OPTIMIZED_XPROD31_R
81#define INCL_OPTIMIZED_XNPROD31_R
82#if ARM_ARCH >= 6
83/* These may yield slightly different result from the macros below
84 because only the high 32 bits of the multiplications are accumulated while
85 the below macros use a 64 bit accumulator that is truncated to 32 bits.*/
86#define XPROD31_R(_a, _b, _t, _v, _x, _y)\
87{\
88 int32_t x1, y1;\
89 asm("smmul %[x1], %[t], %[a] \n\t"\
90 "smmul %[y1], %[t], %[b] \n\t"\
91 "smmla %[x1], %[v], %[b], %[x1] \n\t"\
92 "smmls %[y1], %[v], %[a], %[y1] \n\t"\
93 : [x1] "=&r" (x1), [y1] "=&r" (y1)\
94 : [a] "r" (_a), [b] "r" (_b), [t] "r" (_t), [v] "r" (_v) );\
95 _x = x1 << 1;\
96 _y = y1 << 1;\
97}
98
99#define XNPROD31_R(_a, _b, _t, _v, _x, _y)\
100{\
101 int32_t x1, y1;\
102 asm("smmul %[x1], %[t], %[a] \n\t"\
103 "smmul %[y1], %[t], %[b] \n\t"\
104 "smmls %[x1], %[v], %[b], %[x1] \n\t"\
105 "smmla %[y1], %[v], %[a], %[y1] \n\t"\
106 : [x1] "=&r" (x1), [y1] "=&r" (y1)\
107 : [a] "r" (_a), [b] "r" (_b), [t] "r" (_t), [v] "r" (_v) );\
108 _x = x1 << 1;\
109 _y = y1 << 1;\
110}
111#else
112#define XPROD31_R(_a, _b, _t, _v, _x, _y)\
113{\
114 int32_t x1, y1, l;\
115 asm("smull %0, %1, %5, %3\n\t"\
116 "rsb %2, %3, #0\n\t"\
117 "smlal %0, %1, %6, %4\n\t"\
118 "smull %0, %2, %6, %2\n\t"\
119 "smlal %0, %2, %5, %4"\
120 : "=&r" (l), "=&r" (x1), "=&r" (y1)\
121 : "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\
122 _x = x1 << 1;\
123 _y = y1 << 1;\
124}
125
126#define XNPROD31_R(_a, _b, _t, _v, _x, _y)\
127{\
128 int32_t x1, y1, l;\
129 asm("smull %0, %1, %5, %3\n\t"\
130 "rsb %2, %4, #0\n\t"\
131 "smlal %0, %1, %6, %2\n\t"\
132 "smull %0, %2, %5, %4\n\t"\
133 "smlal %0, %2, %6, %3"\
134 : "=&r" (l), "=&r" (x1), "=&r" (y1)\
135 : "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\
136 _x = x1 << 1;\
137 _y = y1 << 1;\
138}
139#endif
140
141#define INCL_OPTIMIZED_XPROD31
142static inline void XPROD31(int32_t a, int32_t b,
143 int32_t t, int32_t v,
144 int32_t *x, int32_t *y)
145{
146 int32_t _x1, _y1;
147 XPROD31_R(a, b, t, v, _x1, _y1);
148 *x = _x1;
149 *y = _y1;
150}
151
152#define INCL_OPTIMIZED_XNPROD31
153static inline void XNPROD31(int32_t a, int32_t b,
154 int32_t t, int32_t v,
155 int32_t *x, int32_t *y)
156{
157 int32_t _x1, _y1;
158 XNPROD31_R(a, b, t, v, _x1, _y1);
159 *x = _x1;
160 *y = _y1;
161}
162
163
164#ifndef _V_VECT_OPS
165#define _V_VECT_OPS
166
167/* asm versions of vector operations for block.c, window.c */
168static inline
169void vect_add(int32_t *x, const int32_t *y, int n)
170{
171 while (n>=4) {
172 asm volatile ("ldmia %[x], {r0, r1, r2, r3};"
173 "ldmia %[y]!, {r4, r5, r6, r7};"
174 "add r0, r0, r4;"
175 "add r1, r1, r5;"
176 "add r2, r2, r6;"
177 "add r3, r3, r7;"
178 "stmia %[x]!, {r0, r1, r2, r3};"
179 : [x] "+r" (x), [y] "+r" (y)
180 : : "r0", "r1", "r2", "r3",
181 "r4", "r5", "r6", "r7",
182 "memory");
183 n -= 4;
184 }
185 /* add final elements */
186 while (n>0) {
187 *x++ += *y++;
188 n--;
189 }
190}
191
192static inline
193void vect_copy(int32_t *x, const int32_t *y, int n)
194{
195 while (n>=4) {
196 asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};"
197 "stmia %[x]!, {r0, r1, r2, r3};"
198 : [x] "+r" (x), [y] "+r" (y)
199 : : "r0", "r1", "r2", "r3",
200 "memory");
201 n -= 4;
202 }
203 /* copy final elements */
204 while (n>0) {
205 *x++ = *y++;
206 n--;
207 }
208}
209
210static inline
211void vect_mult_fw(int32_t *data, const int32_t *window, int n)
212{
213 while (n>=4) {
214 asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
215 "ldmia %[w]!, {r4, r5, r6, r7};"
216 "smull r8, r9, r0, r4;"
217 "mov r0, r9, lsl #1;"
218 "smull r8, r9, r1, r5;"
219 "mov r1, r9, lsl #1;"
220 "smull r8, r9, r2, r6;"
221 "mov r2, r9, lsl #1;"
222 "smull r8, r9, r3, r7;"
223 "mov r3, r9, lsl #1;"
224 "stmia %[d]!, {r0, r1, r2, r3};"
225 : [d] "+r" (data), [w] "+r" (window)
226 : : "r0", "r1", "r2", "r3",
227 "r4", "r5", "r6", "r7", "r8", "r9",
228 "memory" );
229 n -= 4;
230 }
231 while(n>0) {
232 *data = MULT31(*data, *window);
233 data++;
234 window++;
235 n--;
236 }
237}
238
239static inline
240void vect_mult_bw(int32_t *data, const int32_t *window, int n)
241{
242 while (n>=4) {
243 asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
244 "ldmda %[w]!, {r4, r5, r6, r7};"
245 "smull r8, r9, r0, r7;"
246 "mov r0, r9, lsl #1;"
247 "smull r8, r9, r1, r6;"
248 "mov r1, r9, lsl #1;"
249 "smull r8, r9, r2, r5;"
250 "mov r2, r9, lsl #1;"
251 "smull r8, r9, r3, r4;"
252 "mov r3, r9, lsl #1;"
253 "stmia %[d]!, {r0, r1, r2, r3};"
254 : [d] "+r" (data), [w] "+r" (window)
255 : : "r0", "r1", "r2", "r3",
256 "r4", "r5", "r6", "r7", "r8", "r9",
257 "memory" );
258 n -= 4;
259 }
260 while(n>0) {
261 *data = MULT31(*data, *window);
262 data++;
263 window--;
264 n--;
265 }
266}
267
268#endif
269
270/* not used anymore */
271/*
272#ifndef _V_CLIP_MATH
273#define _V_CLIP_MATH
274
275static inline int32_t CLIP_TO_15(int32_t x) {
276 int tmp;
277 asm volatile("subs %1, %0, #32768\n\t"
278 "movpl %0, #0x7f00\n\t"
279 "orrpl %0, %0, #0xff\n"
280 "adds %1, %0, #32768\n\t"
281 "movmi %0, #0x8000"
282 : "+r"(x),"=r"(tmp)
283 :
284 : "cc");
285 return(x);
286}
287
288#endif
289*/
290
291#endif
292