diff options
Diffstat (limited to 'lib/rbcodec/codecs/libtremor/asm_arm.h')
-rw-r--r-- | lib/rbcodec/codecs/libtremor/asm_arm.h | 190 |
1 files changed, 190 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libtremor/asm_arm.h b/lib/rbcodec/codecs/libtremor/asm_arm.h new file mode 100644 index 0000000000..42834a2581 --- /dev/null +++ b/lib/rbcodec/codecs/libtremor/asm_arm.h | |||
@@ -0,0 +1,190 @@ | |||
1 | /******************************************************************** | ||
2 | * * | ||
3 | * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. * | ||
4 | * * | ||
5 | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * | ||
6 | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * | ||
7 | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * | ||
8 | * * | ||
9 | * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 * | ||
10 | * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ * | ||
11 | * * | ||
12 | ******************************************************************** | ||
13 | |||
14 | function: arm7 and later wide math functions | ||
15 | |||
16 | ********************************************************************/ | ||
17 | |||
18 | #ifdef _ARM_ASSEM_ | ||
19 | #if ARM_ARCH < 6 | ||
20 | #define INCL_OPTIMIZED_VECTOR_FMUL_WINDOW | ||
21 | static inline void ff_vector_fmul_window_c(ogg_int32_t *dst, const ogg_int32_t *src0, | ||
22 | const ogg_int32_t *src1, const ogg_int32_t *win, int len) | ||
23 | { | ||
24 | /* len is always a power of 2 and always >= 16 so this is unrolled 2 times*/ | ||
25 | ogg_int32_t *dst0 = dst, *dst1 = dst + 2*len; | ||
26 | const ogg_int32_t *win0 = win, *win1 = win + 2*len; | ||
27 | src1 += len; | ||
28 | asm volatile ("cmp %[len], #0\n\t" | ||
29 | "b 1f\n\t" | ||
30 | "0:\n\t" | ||
31 | "ldr r0, [%[src0]], #4\n\t" | ||
32 | "ldr r1, [%[src1], #-4]!\n\t" | ||
33 | "ldr r2, [%[win0]], #4\n\t" | ||
34 | "ldr r3, [%[win1], #-4]!\n\t" | ||
35 | "smull r4, r5, r0, r2\n\t" | ||
36 | "smlal r4, r5, r1, r3\n\t" | ||
37 | "rsb r2, r2, #0\n\t" | ||
38 | "lsl r5, r5, #1\n\t" | ||
39 | "str r5, [%[dst1], #-4]!\n\t" | ||
40 | "smull r4, r5, r0, r3\n\t" | ||
41 | "smlal r4, r5, r1, r2\n\t" | ||
42 | "ldr r0, [%[src0]], #4\n\t" | ||
43 | "ldr r1, [%[src1], #-4]!\n\t" | ||
44 | "lsl r5, r5, #1\n\t" | ||
45 | "str r5, [%[dst0]], #4\n\t" | ||
46 | |||
47 | "ldr r2, [%[win0]], #4\n\t" | ||
48 | "ldr r3, [%[win1], #-4]!\n\t" | ||
49 | "smull r4, r5, r0, r2\n\t" | ||
50 | "smlal r4, r5, r1, r3\n\t" | ||
51 | "rsb r2, r2, #0\n\t" | ||
52 | "lsl r5, r5, #1\n\t" | ||
53 | "str r5, [%[dst1], #-4]!\n\t" | ||
54 | "smull r4, r5, r0, r3\n\t" | ||
55 | "smlal r4, r5, r1, r2\n\t" | ||
56 | "subs %[len], %[len], #2\n\t" | ||
57 | "lsl r5, r5, #1\n\t" | ||
58 | "str r5, [%[dst0]], #4\n\t" | ||
59 | "1:\n\t" | ||
60 | "bgt 0b\n\t" | ||
61 | : [dst0] "+r" (dst0), [dst1] "+r" (dst1), | ||
62 | [src0] "+r" (src0), [src1] "+r" (src1), | ||
63 | [win0] "+r" (win0), [win1] "+r" (win1), | ||
64 | [len] "+r" (len) | ||
65 | :: "r0", "r1", "r2", "r3", "r4", "r5", "cc", "memory"); | ||
66 | } | ||
67 | #endif | ||
68 | #ifndef _V_LSP_MATH_ASM | ||
69 | #define _V_LSP_MATH_ASM | ||
70 | |||
71 | static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip, | ||
72 | ogg_int32_t *qexpp, | ||
73 | ogg_int32_t *ilsp,ogg_int32_t wi, | ||
74 | ogg_int32_t m){ | ||
75 | |||
76 | ogg_uint32_t qi=*qip,pi=*pip; | ||
77 | ogg_int32_t qexp=*qexpp; | ||
78 | |||
79 | asm("mov r0,%3;" | ||
80 | "movs r1,%5,asr#1;" | ||
81 | "add r0,r0,r1,lsl#3;" | ||
82 | "beq 2f;\n" | ||
83 | "1:" | ||
84 | |||
85 | "ldmdb r0!,{r1,r3};" | ||
86 | "subs r1,r1,%4;" //ilsp[j]-wi | ||
87 | "rsbmi r1,r1,#0;" //labs(ilsp[j]-wi) | ||
88 | "umull %0,r2,r1,%0;" //qi*=labs(ilsp[j]-wi) | ||
89 | |||
90 | "subs r1,r3,%4;" //ilsp[j+1]-wi | ||
91 | "rsbmi r1,r1,#0;" //labs(ilsp[j+1]-wi) | ||
92 | "umull %1,r3,r1,%1;" //pi*=labs(ilsp[j+1]-wi) | ||
93 | |||
94 | "cmn r2,r3;" // shift down 16? | ||
95 | "beq 0f;" | ||
96 | "add %2,%2,#16;" | ||
97 | "mov %0,%0,lsr #16;" | ||
98 | "orr %0,%0,r2,lsl #16;" | ||
99 | "mov %1,%1,lsr #16;" | ||
100 | "orr %1,%1,r3,lsl #16;" | ||
101 | "0:" | ||
102 | "cmp r0,%3;\n" | ||
103 | "bhi 1b;\n" | ||
104 | |||
105 | "2:" | ||
106 | // odd filter assymetry | ||
107 | "ands r0,%5,#1;\n" | ||
108 | "beq 3f;\n" | ||
109 | "add r0,%3,%5,lsl#2;\n" | ||
110 | |||
111 | "ldr r1,[r0,#-4];\n" | ||
112 | "mov r0,#0x4000;\n" | ||
113 | |||
114 | "subs r1,r1,%4;\n" //ilsp[j]-wi | ||
115 | "rsbmi r1,r1,#0;\n" //labs(ilsp[j]-wi) | ||
116 | "umull %0,r2,r1,%0;\n" //qi*=labs(ilsp[j]-wi) | ||
117 | "umull %1,r3,r0,%1;\n" //pi*=labs(ilsp[j+1]-wi) | ||
118 | |||
119 | "cmn r2,r3;\n" // shift down 16? | ||
120 | "beq 3f;\n" | ||
121 | "add %2,%2,#16;\n" | ||
122 | "mov %0,%0,lsr #16;\n" | ||
123 | "orr %0,%0,r2,lsl #16;\n" | ||
124 | "mov %1,%1,lsr #16;\n" | ||
125 | "orr %1,%1,r3,lsl #16;\n" | ||
126 | |||
127 | //qi=(pi>>shift)*labs(ilsp[j]-wi); | ||
128 | //pi=(qi>>shift)*labs(ilsp[j+1]-wi); | ||
129 | //qexp+=shift; | ||
130 | |||
131 | //} | ||
132 | |||
133 | /* normalize to max 16 sig figs */ | ||
134 | "3:" | ||
135 | "mov r2,#0;" | ||
136 | "orr r1,%0,%1;" | ||
137 | "tst r1,#0xff000000;" | ||
138 | "addne r2,r2,#8;" | ||
139 | "movne r1,r1,lsr #8;" | ||
140 | "tst r1,#0x00f00000;" | ||
141 | "addne r2,r2,#4;" | ||
142 | "movne r1,r1,lsr #4;" | ||
143 | "tst r1,#0x000c0000;" | ||
144 | "addne r2,r2,#2;" | ||
145 | "movne r1,r1,lsr #2;" | ||
146 | "tst r1,#0x00020000;" | ||
147 | "addne r2,r2,#1;" | ||
148 | "movne r1,r1,lsr #1;" | ||
149 | "tst r1,#0x00010000;" | ||
150 | "addne r2,r2,#1;" | ||
151 | "mov %0,%0,lsr r2;" | ||
152 | "mov %1,%1,lsr r2;" | ||
153 | "add %2,%2,r2;" | ||
154 | |||
155 | : "+r"(qi),"+r"(pi),"+r"(qexp) | ||
156 | : "r"(ilsp),"r"(wi),"r"(m) | ||
157 | : "r0","r1","r2","r3","cc"); | ||
158 | |||
159 | *qip=qi; | ||
160 | *pip=pi; | ||
161 | *qexpp=qexp; | ||
162 | } | ||
163 | |||
164 | static inline void lsp_norm_asm(ogg_uint32_t *qip,ogg_int32_t *qexpp){ | ||
165 | |||
166 | ogg_uint32_t qi=*qip; | ||
167 | ogg_int32_t qexp=*qexpp; | ||
168 | |||
169 | asm("tst %0,#0x0000ff00;" | ||
170 | "moveq %0,%0,lsl #8;" | ||
171 | "subeq %1,%1,#8;" | ||
172 | "tst %0,#0x0000f000;" | ||
173 | "moveq %0,%0,lsl #4;" | ||
174 | "subeq %1,%1,#4;" | ||
175 | "tst %0,#0x0000c000;" | ||
176 | "moveq %0,%0,lsl #2;" | ||
177 | "subeq %1,%1,#2;" | ||
178 | "tst %0,#0x00008000;" | ||
179 | "moveq %0,%0,lsl #1;" | ||
180 | "subeq %1,%1,#1;" | ||
181 | : "+r"(qi),"+r"(qexp) | ||
182 | : | ||
183 | : "cc"); | ||
184 | *qip=qi; | ||
185 | *qexpp=qexp; | ||
186 | } | ||
187 | |||
188 | #endif | ||
189 | #endif | ||
190 | |||