diff options
Diffstat (limited to 'apps/codecs')
-rw-r--r-- | apps/codecs/libwma/wmadeci.c | 6 | ||||
-rw-r--r-- | apps/codecs/libwma/wmafixed.c | 129 | ||||
-rw-r--r-- | apps/codecs/libwma/wmafixed.h | 129 |
3 files changed, 130 insertions, 134 deletions
diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c index 9e448f4b93..a3edea0e97 100644 --- a/apps/codecs/libwma/wmadeci.c +++ b/apps/codecs/libwma/wmadeci.c | |||
@@ -30,11 +30,7 @@ | |||
30 | #include "wmadata.h" | 30 | #include "wmadata.h" |
31 | 31 | ||
32 | static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len); | 32 | static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len); |
33 | inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, | 33 | |
34 | const fixed32 *window, int n); | ||
35 | inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, | ||
36 | const fixed32 *src1, int len); | ||
37 | |||
38 | /*declarations of statically allocated variables used to remove malloc calls*/ | 34 | /*declarations of statically allocated variables used to remove malloc calls*/ |
39 | 35 | ||
40 | fixed32 coefsarray[MAX_CHANNELS][BLOCK_MAX_SIZE] IBSS_ATTR; | 36 | fixed32 coefsarray[MAX_CHANNELS][BLOCK_MAX_SIZE] IBSS_ATTR; |
diff --git a/apps/codecs/libwma/wmafixed.c b/apps/codecs/libwma/wmafixed.c index 205bab4dd9..bdf87a548b 100644 --- a/apps/codecs/libwma/wmafixed.c +++ b/apps/codecs/libwma/wmafixed.c | |||
@@ -63,135 +63,6 @@ fixed64 Fixed32To64(fixed32 x) | |||
63 | return (fixed64)x; | 63 | return (fixed64)x; |
64 | } | 64 | } |
65 | 65 | ||
66 | |||
67 | /* | ||
68 | * Helper functions for wma_window. | ||
69 | * | ||
70 | * | ||
71 | */ | ||
72 | |||
73 | #ifdef CPU_ARM | ||
74 | inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, | ||
75 | const fixed32 *window, int n) | ||
76 | { | ||
77 | /* Block sizes are always power of two */ | ||
78 | asm volatile ( | ||
79 | "0:" | ||
80 | "ldmia %[d]!, {r0, r1};" | ||
81 | "ldmia %[w]!, {r4, r5};" | ||
82 | /* consume the first data and window value so we can use those | ||
83 | * registers again */ | ||
84 | "smull r8, r9, r0, r4;" | ||
85 | "ldmia %[dst], {r0, r4};" | ||
86 | "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/ | ||
87 | "smull r8, r9, r1, r5;" | ||
88 | "add r1, r4, r9, lsl #1;" | ||
89 | "stmia %[dst]!, {r0, r1};" | ||
90 | "subs %[n], %[n], #2;" | ||
91 | "bne 0b;" | ||
92 | : [d] "+r" (data), [w] "+r" (window), [dst] "+r" (dst), [n] "+r" (n) | ||
93 | : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc"); | ||
94 | } | ||
95 | |||
96 | inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, | ||
97 | int len) | ||
98 | { | ||
99 | /* Block sizes are always power of two */ | ||
100 | asm volatile ( | ||
101 | "add %[s1], %[s1], %[n], lsl #2;" | ||
102 | "0:" | ||
103 | "ldmia %[s0]!, {r0, r1};" | ||
104 | "ldmdb %[s1]!, {r4, r5};" | ||
105 | "smull r8, r9, r0, r5;" | ||
106 | "mov r0, r9, lsl #1;" | ||
107 | "smull r8, r9, r1, r4;" | ||
108 | "mov r1, r9, lsl #1;" | ||
109 | "stmia %[dst]!, {r0, r1};" | ||
110 | "subs %[n], %[n], #2;" | ||
111 | "bne 0b;" | ||
112 | : [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len) | ||
113 | : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc"); | ||
114 | } | ||
115 | |||
116 | #elif defined(CPU_COLDFIRE) | ||
117 | |||
118 | inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, | ||
119 | const fixed32 *window, int n) | ||
120 | { | ||
121 | /* Block sizes are always power of two. Smallest block is always way bigger | ||
122 | * than four too.*/ | ||
123 | asm volatile ( | ||
124 | "0:" | ||
125 | "movem.l (%[d]), %%d0-%%d3;" | ||
126 | "movem.l (%[w]), %%d4-%%d5/%%a0-%%a1;" | ||
127 | "mac.l %%d0, %%d4, %%acc0;" | ||
128 | "mac.l %%d1, %%d5, %%acc1;" | ||
129 | "mac.l %%d2, %%a0, %%acc2;" | ||
130 | "mac.l %%d3, %%a1, %%acc3;" | ||
131 | "lea.l (16, %[d]), %[d];" | ||
132 | "lea.l (16, %[w]), %[w];" | ||
133 | "movclr.l %%acc0, %%d0;" | ||
134 | "movclr.l %%acc1, %%d1;" | ||
135 | "movclr.l %%acc2, %%d2;" | ||
136 | "movclr.l %%acc3, %%d3;" | ||
137 | "movem.l (%[dst]), %%d4-%%d5/%%a0-%%a1;" | ||
138 | "add.l %%d4, %%d0;" | ||
139 | "add.l %%d5, %%d1;" | ||
140 | "add.l %%a0, %%d2;" | ||
141 | "add.l %%a1, %%d3;" | ||
142 | "movem.l %%d0-%%d3, (%[dst]);" | ||
143 | "lea.l (16, %[dst]), %[dst];" | ||
144 | "subq.l #4, %[n];" | ||
145 | "jne 0b;" | ||
146 | : [d] "+a" (data), [w] "+a" (window), [dst] "+a" (dst), [n] "+d" (n) | ||
147 | : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc"); | ||
148 | } | ||
149 | |||
150 | inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, | ||
151 | int len) | ||
152 | { | ||
153 | /* Block sizes are always power of two. Smallest block is always way bigger | ||
154 | * than four too.*/ | ||
155 | asm volatile ( | ||
156 | "lea.l (-16, %[s1], %[n]*4), %[s1];" | ||
157 | "0:" | ||
158 | "movem.l (%[s0]), %%d0-%%d3;" | ||
159 | "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;" | ||
160 | "mac.l %%d0, %%a1, %%acc0;" | ||
161 | "mac.l %%d1, %%a0, %%acc1;" | ||
162 | "mac.l %%d2, %%d5, %%acc2;" | ||
163 | "mac.l %%d3, %%d4, %%acc3;" | ||
164 | "lea.l (16, %[s0]), %[s0];" | ||
165 | "lea.l (-16, %[s1]), %[s1];" | ||
166 | "movclr.l %%acc0, %%d0;" | ||
167 | "movclr.l %%acc1, %%d1;" | ||
168 | "movclr.l %%acc2, %%d2;" | ||
169 | "movclr.l %%acc3, %%d3;" | ||
170 | "movem.l %%d0-%%d3, (%[dst]);" | ||
171 | "lea.l (16, %[dst]), %[dst];" | ||
172 | "subq.l #4, %[n];" | ||
173 | "jne 0b;" | ||
174 | : [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len) | ||
175 | : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc"); | ||
176 | } | ||
177 | |||
178 | #else | ||
179 | |||
180 | inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){ | ||
181 | int i; | ||
182 | for(i=0; i<len; i++) | ||
183 | dst[i] = fixmul32b(src0[i], src1[i]) + dst[i]; | ||
184 | } | ||
185 | |||
186 | inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){ | ||
187 | int i; | ||
188 | src1 += len-1; | ||
189 | for(i=0; i<len; i++) | ||
190 | dst[i] = fixmul32b(src0[i], src1[-i]); | ||
191 | } | ||
192 | |||
193 | #endif | ||
194 | |||
195 | /* | 66 | /* |
196 | Not performance senstitive code here | 67 | Not performance senstitive code here |
197 | 68 | ||
diff --git a/apps/codecs/libwma/wmafixed.h b/apps/codecs/libwma/wmafixed.h index 7f04a955ef..4225f165c9 100644 --- a/apps/codecs/libwma/wmafixed.h +++ b/apps/codecs/libwma/wmafixed.h | |||
@@ -106,3 +106,132 @@ static inline fixed32 fixmul32(fixed32 x, fixed32 y) | |||
106 | } | 106 | } |
107 | 107 | ||
108 | #endif | 108 | #endif |
109 | |||
110 | |||
111 | /* | ||
112 | * Helper functions for wma_window. | ||
113 | * | ||
114 | * | ||
115 | */ | ||
116 | |||
117 | #ifdef CPU_ARM | ||
118 | static inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, | ||
119 | const fixed32 *window, int n) | ||
120 | { | ||
121 | /* Block sizes are always power of two */ | ||
122 | asm volatile ( | ||
123 | "0:" | ||
124 | "ldmia %[d]!, {r0, r1};" | ||
125 | "ldmia %[w]!, {r4, r5};" | ||
126 | /* consume the first data and window value so we can use those | ||
127 | * registers again */ | ||
128 | "smull r8, r9, r0, r4;" | ||
129 | "ldmia %[dst], {r0, r4};" | ||
130 | "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/ | ||
131 | "smull r8, r9, r1, r5;" | ||
132 | "add r1, r4, r9, lsl #1;" | ||
133 | "stmia %[dst]!, {r0, r1};" | ||
134 | "subs %[n], %[n], #2;" | ||
135 | "bne 0b;" | ||
136 | : [d] "+r" (data), [w] "+r" (window), [dst] "+r" (dst), [n] "+r" (n) | ||
137 | : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc"); | ||
138 | } | ||
139 | |||
140 | static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, | ||
141 | int len) | ||
142 | { | ||
143 | /* Block sizes are always power of two */ | ||
144 | asm volatile ( | ||
145 | "add %[s1], %[s1], %[n], lsl #2;" | ||
146 | "0:" | ||
147 | "ldmia %[s0]!, {r0, r1};" | ||
148 | "ldmdb %[s1]!, {r4, r5};" | ||
149 | "smull r8, r9, r0, r5;" | ||
150 | "mov r0, r9, lsl #1;" | ||
151 | "smull r8, r9, r1, r4;" | ||
152 | "mov r1, r9, lsl #1;" | ||
153 | "stmia %[dst]!, {r0, r1};" | ||
154 | "subs %[n], %[n], #2;" | ||
155 | "bne 0b;" | ||
156 | : [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len) | ||
157 | : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc"); | ||
158 | } | ||
159 | |||
160 | #elif defined(CPU_COLDFIRE) | ||
161 | |||
162 | static inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, | ||
163 | const fixed32 *window, int n) | ||
164 | { | ||
165 | /* Block sizes are always power of two. Smallest block is always way bigger | ||
166 | * than four too.*/ | ||
167 | asm volatile ( | ||
168 | "0:" | ||
169 | "movem.l (%[d]), %%d0-%%d3;" | ||
170 | "movem.l (%[w]), %%d4-%%d5/%%a0-%%a1;" | ||
171 | "mac.l %%d0, %%d4, %%acc0;" | ||
172 | "mac.l %%d1, %%d5, %%acc1;" | ||
173 | "mac.l %%d2, %%a0, %%acc2;" | ||
174 | "mac.l %%d3, %%a1, %%acc3;" | ||
175 | "lea.l (16, %[d]), %[d];" | ||
176 | "lea.l (16, %[w]), %[w];" | ||
177 | "movclr.l %%acc0, %%d0;" | ||
178 | "movclr.l %%acc1, %%d1;" | ||
179 | "movclr.l %%acc2, %%d2;" | ||
180 | "movclr.l %%acc3, %%d3;" | ||
181 | "movem.l (%[dst]), %%d4-%%d5/%%a0-%%a1;" | ||
182 | "add.l %%d4, %%d0;" | ||
183 | "add.l %%d5, %%d1;" | ||
184 | "add.l %%a0, %%d2;" | ||
185 | "add.l %%a1, %%d3;" | ||
186 | "movem.l %%d0-%%d3, (%[dst]);" | ||
187 | "lea.l (16, %[dst]), %[dst];" | ||
188 | "subq.l #4, %[n];" | ||
189 | "jne 0b;" | ||
190 | : [d] "+a" (data), [w] "+a" (window), [dst] "+a" (dst), [n] "+d" (n) | ||
191 | : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc"); | ||
192 | } | ||
193 | |||
194 | static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, | ||
195 | int len) | ||
196 | { | ||
197 | /* Block sizes are always power of two. Smallest block is always way bigger | ||
198 | * than four too.*/ | ||
199 | asm volatile ( | ||
200 | "lea.l (-16, %[s1], %[n]*4), %[s1];" | ||
201 | "0:" | ||
202 | "movem.l (%[s0]), %%d0-%%d3;" | ||
203 | "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;" | ||
204 | "mac.l %%d0, %%a1, %%acc0;" | ||
205 | "mac.l %%d1, %%a0, %%acc1;" | ||
206 | "mac.l %%d2, %%d5, %%acc2;" | ||
207 | "mac.l %%d3, %%d4, %%acc3;" | ||
208 | "lea.l (16, %[s0]), %[s0];" | ||
209 | "lea.l (-16, %[s1]), %[s1];" | ||
210 | "movclr.l %%acc0, %%d0;" | ||
211 | "movclr.l %%acc1, %%d1;" | ||
212 | "movclr.l %%acc2, %%d2;" | ||
213 | "movclr.l %%acc3, %%d3;" | ||
214 | "movem.l %%d0-%%d3, (%[dst]);" | ||
215 | "lea.l (16, %[dst]), %[dst];" | ||
216 | "subq.l #4, %[n];" | ||
217 | "jne 0b;" | ||
218 | : [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len) | ||
219 | : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc"); | ||
220 | } | ||
221 | |||
222 | #else | ||
223 | |||
224 | static inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){ | ||
225 | int i; | ||
226 | for(i=0; i<len; i++) | ||
227 | dst[i] = fixmul32b(src0[i], src1[i]) + dst[i]; | ||
228 | } | ||
229 | |||
230 | static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){ | ||
231 | int i; | ||
232 | src1 += len-1; | ||
233 | for(i=0; i<len; i++) | ||
234 | dst[i] = fixmul32b(src0[i], src1[-i]); | ||
235 | } | ||
236 | |||
237 | #endif | ||