diff options
Diffstat (limited to 'apps/codecs/Tremor/asm_arm.h')
-rw-r--r-- | apps/codecs/Tremor/asm_arm.h | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/apps/codecs/Tremor/asm_arm.h b/apps/codecs/Tremor/asm_arm.h new file mode 100644 index 0000000000..3a3716df1f --- /dev/null +++ b/apps/codecs/Tremor/asm_arm.h | |||
@@ -0,0 +1,243 @@ | |||
1 | /******************************************************************** | ||
2 | * * | ||
3 | * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. * | ||
4 | * * | ||
5 | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * | ||
6 | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * | ||
7 | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * | ||
8 | * * | ||
9 | * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 * | ||
10 | * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ * | ||
11 | * * | ||
12 | ******************************************************************** | ||
13 | |||
14 | function: arm7 and later wide math functions | ||
15 | |||
16 | ********************************************************************/ | ||
17 | |||
18 | #ifdef _ARM_ASSEM_ | ||
19 | |||
20 | #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_) | ||
21 | #define _V_WIDE_MATH | ||
22 | |||
23 | static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { | ||
24 | int lo,hi; | ||
25 | asm volatile("smull\t%0, %1, %2, %3" | ||
26 | : "=&r"(lo),"=&r"(hi) | ||
27 | : "%r"(x),"r"(y) | ||
28 | : "cc"); | ||
29 | return(hi); | ||
30 | } | ||
31 | |||
32 | static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { | ||
33 | return MULT32(x,y)<<1; | ||
34 | } | ||
35 | |||
36 | static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { | ||
37 | int lo,hi; | ||
38 | asm volatile("smull %0, %1, %2, %3\n\t" | ||
39 | "movs %0, %0, lsr #15\n\t" | ||
40 | "adc %1, %0, %1, lsl #17\n\t" | ||
41 | : "=&r"(lo),"=&r"(hi) | ||
42 | : "%r"(x),"r"(y) | ||
43 | : "cc"); | ||
44 | return(hi); | ||
45 | } | ||
46 | |||
47 | #define MB() asm volatile ("" : : : "memory") | ||
48 | |||
49 | static inline void XPROD32(ogg_int32_t a, ogg_int32_t b, | ||
50 | ogg_int32_t t, ogg_int32_t v, | ||
51 | ogg_int32_t *x, ogg_int32_t *y) | ||
52 | { | ||
53 | int x1, y1, l; | ||
54 | asm( "smull %0, %1, %4, %6\n\t" | ||
55 | "smlal %0, %1, %5, %7\n\t" | ||
56 | "rsb %3, %4, #0\n\t" | ||
57 | "smull %0, %2, %5, %6\n\t" | ||
58 | "smlal %0, %2, %3, %7" | ||
59 | : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a) | ||
60 | : "3" (a), "r" (b), "r" (t), "r" (v) | ||
61 | : "cc" ); | ||
62 | *x = x1; | ||
63 | MB(); | ||
64 | *y = y1; | ||
65 | } | ||
66 | |||
67 | static inline void XPROD31(ogg_int32_t a, ogg_int32_t b, | ||
68 | ogg_int32_t t, ogg_int32_t v, | ||
69 | ogg_int32_t *x, ogg_int32_t *y) | ||
70 | { | ||
71 | int x1, y1, l; | ||
72 | asm( "smull %0, %1, %4, %6\n\t" | ||
73 | "smlal %0, %1, %5, %7\n\t" | ||
74 | "rsb %3, %4, #0\n\t" | ||
75 | "smull %0, %2, %5, %6\n\t" | ||
76 | "smlal %0, %2, %3, %7" | ||
77 | : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a) | ||
78 | : "3" (a), "r" (b), "r" (t), "r" (v) | ||
79 | : "cc" ); | ||
80 | *x = x1 << 1; | ||
81 | MB(); | ||
82 | *y = y1 << 1; | ||
83 | } | ||
84 | |||
85 | static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b, | ||
86 | ogg_int32_t t, ogg_int32_t v, | ||
87 | ogg_int32_t *x, ogg_int32_t *y) | ||
88 | { | ||
89 | int x1, y1, l; | ||
90 | asm( "rsb %2, %4, #0\n\t" | ||
91 | "smull %0, %1, %3, %5\n\t" | ||
92 | "smlal %0, %1, %2, %6\n\t" | ||
93 | "smull %0, %2, %4, %5\n\t" | ||
94 | "smlal %0, %2, %3, %6" | ||
95 | : "=&r" (l), "=&r" (x1), "=&r" (y1) | ||
96 | : "r" (a), "r" (b), "r" (t), "r" (v) | ||
97 | : "cc" ); | ||
98 | *x = x1 << 1; | ||
99 | MB(); | ||
100 | *y = y1 << 1; | ||
101 | } | ||
102 | |||
103 | #endif | ||
104 | |||
105 | #ifndef _V_CLIP_MATH | ||
106 | #define _V_CLIP_MATH | ||
107 | |||
108 | static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) { | ||
109 | int tmp; | ||
110 | asm volatile("subs %1, %0, #32768\n\t" | ||
111 | "movpl %0, #0x7f00\n\t" | ||
112 | "orrpl %0, %0, #0xff\n" | ||
113 | "adds %1, %0, #32768\n\t" | ||
114 | "movmi %0, #0x8000" | ||
115 | : "+r"(x),"=r"(tmp) | ||
116 | : | ||
117 | : "cc"); | ||
118 | return(x); | ||
119 | } | ||
120 | |||
121 | #endif | ||
122 | |||
123 | #ifndef _V_LSP_MATH_ASM | ||
124 | #define _V_LSP_MATH_ASM | ||
125 | |||
126 | static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip, | ||
127 | ogg_int32_t *qexpp, | ||
128 | ogg_int32_t *ilsp,ogg_int32_t wi, | ||
129 | ogg_int32_t m){ | ||
130 | |||
131 | ogg_uint32_t qi=*qip,pi=*pip; | ||
132 | ogg_int32_t qexp=*qexpp; | ||
133 | |||
134 | asm("mov r0,%3;" | ||
135 | "mov r1,%5,asr#1;" | ||
136 | "add r0,r0,r1,lsl#3;" | ||
137 | "1:" | ||
138 | |||
139 | "ldmdb r0!,{r1,r3};" | ||
140 | "subs r1,r1,%4;" //ilsp[j]-wi | ||
141 | "rsbmi r1,r1,#0;" //labs(ilsp[j]-wi) | ||
142 | "umull %0,r2,r1,%0;" //qi*=labs(ilsp[j]-wi) | ||
143 | |||
144 | "subs r1,r3,%4;" //ilsp[j+1]-wi | ||
145 | "rsbmi r1,r1,#0;" //labs(ilsp[j+1]-wi) | ||
146 | "umull %1,r3,r1,%1;" //pi*=labs(ilsp[j+1]-wi) | ||
147 | |||
148 | "cmn r2,r3;" // shift down 16? | ||
149 | "beq 0f;" | ||
150 | "add %2,%2,#16;" | ||
151 | "mov %0,%0,lsr #16;" | ||
152 | "orr %0,%0,r2,lsl #16;" | ||
153 | "mov %1,%1,lsr #16;" | ||
154 | "orr %1,%1,r3,lsl #16;" | ||
155 | "0:" | ||
156 | "cmp r0,%3;\n" | ||
157 | "bhi 1b;\n" | ||
158 | |||
159 | // odd filter assymetry | ||
160 | "ands r0,%5,#1;\n" | ||
161 | "beq 2f;\n" | ||
162 | "add r0,%3,%5,lsl#2;\n" | ||
163 | |||
164 | "ldr r1,[r0,#-4];\n" | ||
165 | "mov r0,#0x4000;\n" | ||
166 | |||
167 | "subs r1,r1,%4;\n" //ilsp[j]-wi | ||
168 | "rsbmi r1,r1,#0;\n" //labs(ilsp[j]-wi) | ||
169 | "umull %0,r2,r1,%0;\n" //qi*=labs(ilsp[j]-wi) | ||
170 | "umull %1,r3,r0,%1;\n" //pi*=labs(ilsp[j+1]-wi) | ||
171 | |||
172 | "cmn r2,r3;\n" // shift down 16? | ||
173 | "beq 2f;\n" | ||
174 | "add %2,%2,#16;\n" | ||
175 | "mov %0,%0,lsr #16;\n" | ||
176 | "orr %0,%0,r2,lsl #16;\n" | ||
177 | "mov %1,%1,lsr #16;\n" | ||
178 | "orr %1,%1,r3,lsl #16;\n" | ||
179 | |||
180 | //qi=(pi>>shift)*labs(ilsp[j]-wi); | ||
181 | //pi=(qi>>shift)*labs(ilsp[j+1]-wi); | ||
182 | //qexp+=shift; | ||
183 | |||
184 | //} | ||
185 | |||
186 | /* normalize to max 16 sig figs */ | ||
187 | "2:" | ||
188 | "mov r2,#0;" | ||
189 | "orr r1,%0,%1;" | ||
190 | "tst r1,#0xff000000;" | ||
191 | "addne r2,r2,#8;" | ||
192 | "movne r1,r1,lsr #8;" | ||
193 | "tst r1,#0x00f00000;" | ||
194 | "addne r2,r2,#4;" | ||
195 | "movne r1,r1,lsr #4;" | ||
196 | "tst r1,#0x000c0000;" | ||
197 | "addne r2,r2,#2;" | ||
198 | "movne r1,r1,lsr #2;" | ||
199 | "tst r1,#0x00020000;" | ||
200 | "addne r2,r2,#1;" | ||
201 | "movne r1,r1,lsr #1;" | ||
202 | "tst r1,#0x00010000;" | ||
203 | "addne r2,r2,#1;" | ||
204 | "mov %0,%0,lsr r2;" | ||
205 | "mov %1,%1,lsr r2;" | ||
206 | "add %2,%2,r2;" | ||
207 | |||
208 | : "+r"(qi),"+r"(pi),"+r"(qexp) | ||
209 | : "r"(ilsp),"r"(wi),"r"(m) | ||
210 | : "r0","r1","r2","r3","cc"); | ||
211 | |||
212 | *qip=qi; | ||
213 | *pip=pi; | ||
214 | *qexpp=qexp; | ||
215 | } | ||
216 | |||
217 | static inline void lsp_norm_asm(ogg_uint32_t *qip,ogg_int32_t *qexpp){ | ||
218 | |||
219 | ogg_uint32_t qi=*qip; | ||
220 | ogg_int32_t qexp=*qexpp; | ||
221 | |||
222 | asm("tst %0,#0x0000ff00;" | ||
223 | "moveq %0,%0,lsl #8;" | ||
224 | "subeq %1,%1,#8;" | ||
225 | "tst %0,#0x0000f000;" | ||
226 | "moveq %0,%0,lsl #4;" | ||
227 | "subeq %1,%1,#4;" | ||
228 | "tst %0,#0x0000c000;" | ||
229 | "moveq %0,%0,lsl #2;" | ||
230 | "subeq %1,%1,#2;" | ||
231 | "tst %0,#0x00008000;" | ||
232 | "moveq %0,%0,lsl #1;" | ||
233 | "subeq %1,%1,#1;" | ||
234 | : "+r"(qi),"+r"(qexp) | ||
235 | : | ||
236 | : "cc"); | ||
237 | *qip=qi; | ||
238 | *qexpp=qexp; | ||
239 | } | ||
240 | |||
241 | #endif | ||
242 | #endif | ||
243 | |||