diff options
Diffstat (limited to 'apps/codecs/Tremor/asm_arm.h')
-rw-r--r-- | apps/codecs/Tremor/asm_arm.h | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/apps/codecs/Tremor/asm_arm.h b/apps/codecs/Tremor/asm_arm.h index e623ce9b3d..bc09ac5170 100644 --- a/apps/codecs/Tremor/asm_arm.h +++ b/apps/codecs/Tremor/asm_arm.h | |||
@@ -95,6 +95,112 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b, | |||
95 | *y = y1 << 1; | 95 | *y = y1 << 1; |
96 | } | 96 | } |
97 | 97 | ||
98 | #ifndef _V_VECT_OPS | ||
99 | #define _V_VECT_OPS | ||
100 | |||
101 | /* asm versions of vector operations for block.c, window.c */ | ||
102 | static inline | ||
103 | void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) | ||
104 | { | ||
105 | while (n>=4) { | ||
106 | asm volatile ("ldmia %[x], {r0, r1, r2, r3};" | ||
107 | "ldmia %[y]!, {r4, r5, r6, r7};" | ||
108 | "add r0, r0, r4;" | ||
109 | "add r1, r1, r5;" | ||
110 | "add r2, r2, r6;" | ||
111 | "add r3, r3, r7;" | ||
112 | "stmia %[x]!, {r0, r1, r2, r3};" | ||
113 | : [x] "+r" (x), [y] "+r" (y) | ||
114 | : : "r0", "r1", "r2", "r3", | ||
115 | "r4", "r5", "r6", "r7", | ||
116 | "memory"); | ||
117 | n -= 4; | ||
118 | } | ||
119 | /* add final elements */ | ||
120 | while (n>0) { | ||
121 | *x++ += *y++; | ||
122 | n--; | ||
123 | } | ||
124 | } | ||
125 | |||
126 | static inline | ||
127 | void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) | ||
128 | { | ||
129 | while (n>=4) { | ||
130 | asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};" | ||
131 | "stmia %[x]!, {r0, r1, r2, r3};" | ||
132 | : [x] "+r" (x), [y] "+r" (y) | ||
133 | : : "r0", "r1", "r2", "r3", | ||
134 | "memory"); | ||
135 | n -= 4; | ||
136 | } | ||
137 | /* copy final elements */ | ||
138 | while (n>0) { | ||
139 | *x++ = *y++; | ||
140 | n--; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | static inline | ||
145 | void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | ||
146 | { | ||
147 | while (n>=4) { | ||
148 | asm volatile ("ldmia %[d], {r0, r1, r2, r3};" | ||
149 | "ldmia %[w]!, {r4, r5, r6, r7};" | ||
150 | "smull r8, r9, r0, r4;" | ||
151 | "mov r0, r9, lsl #1;" | ||
152 | "smull r8, r9, r1, r5;" | ||
153 | "mov r1, r9, lsl #1;" | ||
154 | "smull r8, r9, r2, r6;" | ||
155 | "mov r2, r9, lsl #1;" | ||
156 | "smull r8, r9, r3, r7;" | ||
157 | "mov r3, r9, lsl #1;" | ||
158 | "stmia %[d]!, {r0, r1, r2, r3};" | ||
159 | : [d] "+r" (data), [w] "+r" (window) | ||
160 | : : "r0", "r1", "r2", "r3", | ||
161 | "r4", "r5", "r6", "r7", "r8", "r9", | ||
162 | "memory", "cc"); | ||
163 | n -= 4; | ||
164 | } | ||
165 | while(n>0) { | ||
166 | *data = MULT31(*data, *window); | ||
167 | data++; | ||
168 | window++; | ||
169 | n--; | ||
170 | } | ||
171 | } | ||
172 | |||
173 | static inline | ||
174 | void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) | ||
175 | { | ||
176 | while (n>=4) { | ||
177 | asm volatile ("ldmia %[d], {r0, r1, r2, r3};" | ||
178 | "ldmda %[w]!, {r4, r5, r6, r7};" | ||
179 | "smull r8, r9, r0, r7;" | ||
180 | "mov r0, r9, lsl #1;" | ||
181 | "smull r8, r9, r1, r6;" | ||
182 | "mov r1, r9, lsl #1;" | ||
183 | "smull r8, r9, r2, r5;" | ||
184 | "mov r2, r9, lsl #1;" | ||
185 | "smull r8, r9, r3, r4;" | ||
186 | "mov r3, r9, lsl #1;" | ||
187 | "stmia %[d]!, {r0, r1, r2, r3};" | ||
188 | : [d] "+r" (data), [w] "+r" (window) | ||
189 | : : "r0", "r1", "r2", "r3", | ||
190 | "r4", "r5", "r6", "r7", "r8", "r9", | ||
191 | "memory", "cc"); | ||
192 | n -= 4; | ||
193 | } | ||
194 | while(n>0) { | ||
195 | *data = MULT31(*data, *window); | ||
196 | data++; | ||
197 | window--; | ||
198 | n--; | ||
199 | } | ||
200 | } | ||
201 | |||
202 | #endif | ||
203 | |||
98 | #endif | 204 | #endif |
99 | 205 | ||
100 | #ifndef _V_CLIP_MATH | 206 | #ifndef _V_CLIP_MATH |