diff options
-rw-r--r-- | apps/codecs/lib/mdct.c | 116 |
1 files changed, 115 insertions, 1 deletions
diff --git a/apps/codecs/lib/mdct.c b/apps/codecs/lib/mdct.c index 5524afbbc4..777aec4a55 100644 --- a/apps/codecs/lib/mdct.c +++ b/apps/codecs/lib/mdct.c | |||
@@ -41,7 +41,7 @@ void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input) | |||
41 | { | 41 | { |
42 | int n8, n4, n2, n, j; | 42 | int n8, n4, n2, n, j; |
43 | const fixed32 *in1, *in2; | 43 | const fixed32 *in1, *in2; |
44 | 44 | (void)j; | |
45 | n = 1 << nbits; | 45 | n = 1 << nbits; |
46 | 46 | ||
47 | n2 = n >> 1; | 47 | n2 = n >> 1; |
@@ -79,6 +79,62 @@ void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input) | |||
79 | const uint16_t * p_revtab=revtab; | 79 | const uint16_t * p_revtab=revtab; |
80 | { | 80 | { |
81 | const uint16_t * const p_revtab_end = p_revtab + n8; | 81 | const uint16_t * const p_revtab_end = p_revtab + n8; |
82 | #ifdef CPU_COLDFIRE | ||
83 | asm volatile ("move.l (%[in2]), %%d0\n\t" | ||
84 | "move.l (%[in1]), %%d1\n\t" | ||
85 | "bra.s 1f\n\t" | ||
86 | "0:\n\t" | ||
87 | "movem.l (%[T]), %%d2-%%d3\n\t" | ||
88 | |||
89 | "addq.l #8, %[in1]\n\t" | ||
90 | "subq.l #8, %[in2]\n\t" | ||
91 | |||
92 | "lea (%[step]*4, %[T]), %[T]\n\t" | ||
93 | |||
94 | "mac.l %%d0, %%d3, (%[T]), %%d4, %%acc0;" | ||
95 | "msac.l %%d1, %%d2, (4, %[T]), %%d5, %%acc0;" | ||
96 | "mac.l %%d1, %%d3, (%[in1]), %%d1, %%acc1;" | ||
97 | "mac.l %%d0, %%d2, (%[in2]), %%d0, %%acc1;" | ||
98 | |||
99 | "addq.l #8, %[in1]\n\t" | ||
100 | "subq.l #8, %[in2]\n\t" | ||
101 | |||
102 | "mac.l %%d0, %%d5, %%acc2;" | ||
103 | "msac.l %%d1, %%d4, (%[p_revtab])+, %%d2, %%acc2;" | ||
104 | "mac.l %%d1, %%d5, (%[in1]), %%d1, %%acc3;" | ||
105 | "mac.l %%d0, %%d4, (%[in2]), %%d0, %%acc3;" | ||
106 | |||
107 | "clr.l %%d3\n\t" | ||
108 | "move.w %%d2, %%d3\n\t" | ||
109 | "eor.l %%d3, %%d2\n\t" | ||
110 | "swap %%d2\n\t" | ||
111 | "lsr.l %[revtab_shift], %%d2\n\t" | ||
112 | |||
113 | "movclr.l %%acc0, %%d4;" | ||
114 | "movclr.l %%acc1, %%d5;" | ||
115 | "lsl.l #3, %%d2\n\t" | ||
116 | "lea (%%d2, %[z]), %%a1\n\t" | ||
117 | "movem.l %%d4-%%d5, (%%a1)\n\t" | ||
118 | |||
119 | "lsr.l %[revtab_shift], %%d3\n\t" | ||
120 | |||
121 | "movclr.l %%acc2, %%d4;" | ||
122 | "movclr.l %%acc3, %%d5;" | ||
123 | "lsl.l #3, %%d3\n\t" | ||
124 | "lea (%%d3, %[z]), %%a1\n\t" | ||
125 | "movem.l %%d4-%%d5, (%%a1)\n\t" | ||
126 | |||
127 | "lea (%[step]*4, %[T]), %[T]\n\t" | ||
128 | |||
129 | "1:\n\t" | ||
130 | "cmp.l %[p_revtab_end], %[p_revtab]\n\t" | ||
131 | "bcs.s 0b\n\t" | ||
132 | : [in1] "+a" (in1), [in2] "+a" (in2), [T] "+a" (T), | ||
133 | [p_revtab] "+a" (p_revtab) | ||
134 | : [z] "a" (z), [step] "d" (step), [revtab_shift] "d" (revtab_shift), | ||
135 | [p_revtab_end] "r" (p_revtab_end) | ||
136 | : "d0", "d1", "d2", "d3", "d4", "d5", "a1", "cc", "memory"); | ||
137 | #else | ||
82 | while(LIKELY(p_revtab < p_revtab_end)) | 138 | while(LIKELY(p_revtab < p_revtab_end)) |
83 | { | 139 | { |
84 | j = (*p_revtab)>>revtab_shift; | 140 | j = (*p_revtab)>>revtab_shift; |
@@ -94,9 +150,66 @@ void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input) | |||
94 | in2 -= 2; | 150 | in2 -= 2; |
95 | p_revtab++; | 151 | p_revtab++; |
96 | } | 152 | } |
153 | #endif | ||
97 | } | 154 | } |
98 | { | 155 | { |
99 | const uint16_t * const p_revtab_end = p_revtab + n8; | 156 | const uint16_t * const p_revtab_end = p_revtab + n8; |
157 | #ifdef CPU_COLDFIRE | ||
158 | asm volatile ("move.l (%[in2]), %%d0\n\t" | ||
159 | "move.l (%[in1]), %%d1\n\t" | ||
160 | "bra.s 1f\n\t" | ||
161 | "0:\n\t" | ||
162 | "movem.l (%[T]), %%d2-%%d3\n\t" | ||
163 | |||
164 | "addq.l #8, %[in1]\n\t" | ||
165 | "subq.l #8, %[in2]\n\t" | ||
166 | |||
167 | "lea (%[step]*4, %[T]), %[T]\n\t" | ||
168 | |||
169 | "mac.l %%d0, %%d2, (%[T]), %%d4, %%acc0;" | ||
170 | "msac.l %%d1, %%d3, (4, %[T]), %%d5, %%acc0;" | ||
171 | "mac.l %%d1, %%d2, (%[in1]), %%d1, %%acc1;" | ||
172 | "mac.l %%d0, %%d3, (%[in2]), %%d0, %%acc1;" | ||
173 | |||
174 | "addq.l #8, %[in1]\n\t" | ||
175 | "subq.l #8, %[in2]\n\t" | ||
176 | |||
177 | "mac.l %%d0, %%d4, %%acc2;" | ||
178 | "msac.l %%d1, %%d5, (%[p_revtab])+, %%d2, %%acc2;" | ||
179 | "mac.l %%d1, %%d4, (%[in1]), %%d1, %%acc3;" | ||
180 | "mac.l %%d0, %%d5, (%[in2]), %%d0, %%acc3;" | ||
181 | |||
182 | "clr.l %%d3\n\t" | ||
183 | "move.w %%d2, %%d3\n\t" | ||
184 | "eor.l %%d3, %%d2\n\t" | ||
185 | "swap %%d2\n\t" | ||
186 | "lsr.l %[revtab_shift], %%d2\n\t" | ||
187 | |||
188 | "movclr.l %%acc0, %%d4;" | ||
189 | "movclr.l %%acc1, %%d5;" | ||
190 | "lsl.l #3, %%d2\n\t" | ||
191 | "lea (%%d2, %[z]), %%a1\n\t" | ||
192 | "movem.l %%d4-%%d5, (%%a1)\n\t" | ||
193 | |||
194 | "lsr.l %[revtab_shift], %%d3\n\t" | ||
195 | |||
196 | "movclr.l %%acc2, %%d4;" | ||
197 | "movclr.l %%acc3, %%d5;" | ||
198 | "lsl.l #3, %%d3\n\t" | ||
199 | "lea (%%d3, %[z]), %%a1\n\t" | ||
200 | "movem.l %%d4-%%d5, (%%a1)\n\t" | ||
201 | |||
202 | "lea (%[step]*4, %[T]), %[T]\n\t" | ||
203 | |||
204 | "1:\n\t" | ||
205 | "cmp.l %[p_revtab_end], %[p_revtab]\n\t" | ||
206 | "bcs.s 0b\n\t" | ||
207 | : [in1] "+a" (in1), [in2] "+a" (in2), [T] "+a" (T), | ||
208 | [p_revtab] "+a" (p_revtab) | ||
209 | : [z] "a" (z), [step] "d" (-step), [revtab_shift] "d" (revtab_shift), | ||
210 | [p_revtab_end] "r" (p_revtab_end) | ||
211 | : "d0", "d1", "d2", "d3", "d4", "d5", "a1", "cc", "memory"); | ||
212 | #else | ||
100 | while(LIKELY(p_revtab < p_revtab_end)) | 213 | while(LIKELY(p_revtab < p_revtab_end)) |
101 | { | 214 | { |
102 | j = (*p_revtab)>>revtab_shift; | 215 | j = (*p_revtab)>>revtab_shift; |
@@ -112,6 +225,7 @@ void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input) | |||
112 | in2 -= 2; | 225 | in2 -= 2; |
113 | p_revtab++; | 226 | p_revtab++; |
114 | } | 227 | } |
228 | #endif | ||
115 | } | 229 | } |
116 | 230 | ||
117 | 231 | ||