diff options
Diffstat (limited to 'apps/codecs/libwma/wmafixed.c')
-rw-r--r-- | apps/codecs/libwma/wmafixed.c | 131 |
1 files changed, 125 insertions, 6 deletions
diff --git a/apps/codecs/libwma/wmafixed.c b/apps/codecs/libwma/wmafixed.c index 3c96700076..5569309145 100644 --- a/apps/codecs/libwma/wmafixed.c +++ b/apps/codecs/libwma/wmafixed.c | |||
@@ -63,20 +63,139 @@ fixed64 Fixed32To64(fixed32 x) | |||
63 | return (fixed64)x; | 63 | return (fixed64)x; |
64 | } | 64 | } |
65 | 65 | ||
66 | |||
66 | /* | 67 | /* |
67 | Not performance senstitive code here | 68 | * Helper functions for wma_window. |
69 | * | ||
70 | * | ||
71 | */ | ||
68 | 72 | ||
69 | */ | 73 | #ifdef CPU_ARM |
74 | inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, | ||
75 | const fixed32 *window, int n) | ||
76 | { | ||
77 | /* Block sizes are always power of two */ | ||
78 | asm volatile ( | ||
79 | "0:" | ||
80 | "ldmia %[d]!, {r0, r1};" | ||
81 | "ldmia %[w]!, {r4, r5};" | ||
82 | /* consume the first data and window value so we can use those | ||
83 | * registers again */ | ||
84 | "smull r8, r9, r0, r4;" | ||
85 | "ldmia %[dst], {r0, r4};" | ||
86 | "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/ | ||
87 | "smull r8, r9, r1, r5;" | ||
88 | "add r1, r4, r9, lsl #1;" | ||
89 | "stmia %[dst]!, {r0, r1};" | ||
90 | "subs %[n], %[n], #2;" | ||
91 | "bne 0b;" | ||
92 | : [d] "+r" (data), [w] "+r" (window), [dst] "+r" (dst), [n] "+r" (n) | ||
93 | : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc"); | ||
94 | } | ||
95 | |||
96 | inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, | ||
97 | int len) | ||
98 | { | ||
99 | /* Block sizes are always power of two */ | ||
100 | asm volatile ( | ||
101 | "add %[s1], %[s1], %[n], lsl #2;" | ||
102 | "0:" | ||
103 | "ldmia %[s0]!, {r0, r1};" | ||
104 | "ldmdb %[s1]!, {r4, r5};" | ||
105 | "smull r8, r9, r0, r5;" | ||
106 | "mov r0, r9, lsl #1;" | ||
107 | "smull r8, r9, r1, r4;" | ||
108 | "mov r1, r9, lsl #1;" | ||
109 | "stmia %[dst]!, {r0, r1};" | ||
110 | "subs %[n], %[n], #2;" | ||
111 | "bne 0b;" | ||
112 | : [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len) | ||
113 | : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc"); | ||
114 | } | ||
70 | 115 | ||
116 | #elif defined(CPU_COLDFIRE) | ||
71 | 117 | ||
72 | fixed64 fixmul64byfixed(fixed64 x, fixed32 y) | 118 | inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, |
119 | const fixed32 *window, int n) | ||
73 | { | 120 | { |
121 | /* Block sizes are always power of two. Smallest block is always way bigger | ||
122 | * than four too.*/ | ||
123 | asm volatile ( | ||
124 | "0:" | ||
125 | "movem.l (%[d]), %%d0-%%d3;" | ||
126 | "movem.l (%[w]), %%d4-%%d5/%%a0-%%a1;" | ||
127 | "mac.l %%d0, %%d4, %%acc0;" | ||
128 | "mac.l %%d1, %%d5, %%acc1;" | ||
129 | "mac.l %%d2, %%a0, %%acc2;" | ||
130 | "mac.l %%d3, %%a1, %%acc3;" | ||
131 | "lea.l (16, %[d]), %[d];" | ||
132 | "lea.l (16, %[w]), %[w];" | ||
133 | "movclr.l %%acc0, %%d0;" | ||
134 | "movclr.l %%acc1, %%d1;" | ||
135 | "movclr.l %%acc2, %%d2;" | ||
136 | "movclr.l %%acc3, %%d3;" | ||
137 | "movem.l (%[dst]), %%d4-%%d5/%%a0-%%a1;" | ||
138 | "add.l %%d4, %%d0;" | ||
139 | "add.l %%d5, %%d1;" | ||
140 | "add.l %%a0, %%d2;" | ||
141 | "add.l %%a1, %%d3;" | ||
142 | "movem.l %%d0-%%d3, (%[dst]);" | ||
143 | "lea.l (16, %[dst]), %[dst];" | ||
144 | "subq.l #4, %[n];" | ||
145 | "jne 0b;" | ||
146 | : [d] "+a" (data), [w] "+a" (window), [dst] "+a" (dst), [n] "+d" (n) | ||
147 | : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc"); | ||
148 | } | ||
74 | 149 | ||
75 | //return x * y; | 150 | inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, |
76 | return (x * y); | 151 | int len) |
77 | // return (fixed64) fixmul32(Fixed32From64(x),y); | 152 | { |
153 | /* Block sizes are always power of two. Smallest block is always way bigger | ||
154 | * than four too.*/ | ||
155 | asm volatile ( | ||
156 | "lea.l (-16, %[s1], %[n]*4), %[s1];" | ||
157 | "0:" | ||
158 | "movem.l (%[s0]), %%d0-%%d3;" | ||
159 | "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;" | ||
160 | "mac.l %%d0, %%a1, %%acc0;" | ||
161 | "mac.l %%d1, %%a0, %%acc1;" | ||
162 | "mac.l %%d2, %%d5, %%acc2;" | ||
163 | "mac.l %%d3, %%d4, %%acc3;" | ||
164 | "lea.l (16, %[s0]), %[s0];" | ||
165 | "lea.l (-16, %[s1]), %[s1];" | ||
166 | "movclr.l %%acc0, %%d0;" | ||
167 | "movclr.l %%acc1, %%d1;" | ||
168 | "movclr.l %%acc2, %%d2;" | ||
169 | "movclr.l %%acc3, %%d3;" | ||
170 | "movem.l %%d0-%%d3, (%[dst]);" | ||
171 | "lea.l (16, %[dst]), %[dst];" | ||
172 | "subq.l #4, %[n];" | ||
173 | "jne 0b;" | ||
174 | : [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len) | ||
175 | : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc"); | ||
78 | } | 176 | } |
79 | 177 | ||
178 | #else | ||
179 | |||
180 | inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){ | ||
181 | int i; | ||
182 | for(i=0; i<len; i++) | ||
183 | dst[i] = fixmul32b(src0[i], src1[i]) + dst[i]; | ||
184 | } | ||
185 | |||
186 | inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){ | ||
187 | int i; | ||
188 | src1 += len-1; | ||
189 | for(i=0; i<len; i++) | ||
190 | dst[i] = fixmul32b(src0[i], src1[-i]); | ||
191 | } | ||
192 | |||
193 | #endif | ||
194 | |||
195 | /* | ||
196 | Not performance senstitive code here | ||
197 | |||
198 | */ | ||
80 | 199 | ||
81 | fixed32 fixdiv32(fixed32 x, fixed32 y) | 200 | fixed32 fixdiv32(fixed32 x, fixed32 y) |
82 | { | 201 | { |