diff options
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/kiss_fft.c')
-rw-r--r-- | lib/rbcodec/codecs/libopus/celt/kiss_fft.c | 482 |
1 files changed, 173 insertions, 309 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c index e2b8f3b3da..833ef5a71f 100644 --- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c +++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c | |||
@@ -45,73 +45,62 @@ | |||
45 | complex numbers. It also delares the kf_ internal functions. | 45 | complex numbers. It also delares the kf_ internal functions. |
46 | */ | 46 | */ |
47 | 47 | ||
48 | #if 0 | ||
49 | static void kf_bfly2( | 48 | static void kf_bfly2( |
50 | kiss_fft_cpx * Fout, | 49 | kiss_fft_cpx * Fout, |
51 | const size_t fstride, | ||
52 | const kiss_fft_state *st, | ||
53 | int m, | 50 | int m, |
54 | int N, | 51 | int N |
55 | int mm | ||
56 | ) | 52 | ) |
57 | { | 53 | { |
58 | kiss_fft_cpx * Fout2; | 54 | kiss_fft_cpx * Fout2; |
59 | const kiss_twiddle_cpx * tw1; | 55 | int i; |
60 | int i,j; | 56 | (void)m; |
61 | kiss_fft_cpx * Fout_beg = Fout; | 57 | #ifdef CUSTOM_MODES |
62 | for (i=0;i<N;i++) | 58 | if (m==1) |
63 | { | 59 | { |
64 | Fout = Fout_beg + i*mm; | 60 | celt_assert(m==1); |
65 | Fout2 = Fout + m; | 61 | for (i=0;i<N;i++) |
66 | tw1 = st->twiddles; | ||
67 | for(j=0;j<m;j++) | ||
68 | { | 62 | { |
69 | kiss_fft_cpx t; | 63 | kiss_fft_cpx t; |
70 | Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1); | 64 | Fout2 = Fout + 1; |
71 | Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1); | 65 | t = *Fout2; |
72 | C_MUL (t, *Fout2 , *tw1); | ||
73 | tw1 += fstride; | ||
74 | C_SUB( *Fout2 , *Fout , t ); | 66 | C_SUB( *Fout2 , *Fout , t ); |
75 | C_ADDTO( *Fout , t ); | 67 | C_ADDTO( *Fout , t ); |
76 | ++Fout2; | 68 | Fout += 2; |
77 | ++Fout; | ||
78 | } | 69 | } |
79 | } | 70 | } else |
80 | } | ||
81 | #endif | 71 | #endif |
82 | |||
83 | static void ki_bfly2( | ||
84 | kiss_fft_cpx * Fout, | ||
85 | const size_t fstride, | ||
86 | const kiss_fft_state *st, | ||
87 | int m, | ||
88 | int N, | ||
89 | int mm | ||
90 | ) | ||
91 | { | ||
92 | kiss_fft_cpx * Fout2; | ||
93 | const kiss_twiddle_cpx * tw1; | ||
94 | kiss_fft_cpx t; | ||
95 | int i,j; | ||
96 | kiss_fft_cpx * Fout_beg = Fout; | ||
97 | for (i=0;i<N;i++) | ||
98 | { | 72 | { |
99 | Fout = Fout_beg + i*mm; | 73 | opus_val16 tw; |
100 | Fout2 = Fout + m; | 74 | tw = QCONST16(0.7071067812f, 15); |
101 | tw1 = st->twiddles; | 75 | /* We know that m==4 here because the radix-2 is just after a radix-4 */ |
102 | for(j=0;j<m;j++) | 76 | celt_assert(m==4); |
77 | for (i=0;i<N;i++) | ||
103 | { | 78 | { |
104 | C_MULC (t, *Fout2 , *tw1); | 79 | kiss_fft_cpx t; |
105 | tw1 += fstride; | 80 | Fout2 = Fout + 4; |
106 | C_SUB( *Fout2 , *Fout , t ); | 81 | t = Fout2[0]; |
107 | C_ADDTO( *Fout , t ); | 82 | C_SUB( Fout2[0] , Fout[0] , t ); |
108 | ++Fout2; | 83 | C_ADDTO( Fout[0] , t ); |
109 | ++Fout; | 84 | |
85 | t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw); | ||
86 | t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw); | ||
87 | C_SUB( Fout2[1] , Fout[1] , t ); | ||
88 | C_ADDTO( Fout[1] , t ); | ||
89 | |||
90 | t.r = Fout2[2].i; | ||
91 | t.i = -Fout2[2].r; | ||
92 | C_SUB( Fout2[2] , Fout[2] , t ); | ||
93 | C_ADDTO( Fout[2] , t ); | ||
94 | |||
95 | t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw); | ||
96 | t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw); | ||
97 | C_SUB( Fout2[3] , Fout[3] , t ); | ||
98 | C_ADDTO( Fout[3] , t ); | ||
99 | Fout += 8; | ||
110 | } | 100 | } |
111 | } | 101 | } |
112 | } | 102 | } |
113 | 103 | ||
114 | #if 0 | ||
115 | static void kf_bfly4( | 104 | static void kf_bfly4( |
116 | kiss_fft_cpx * Fout, | 105 | kiss_fft_cpx * Fout, |
117 | const size_t fstride, | 106 | const size_t fstride, |
@@ -121,93 +110,69 @@ static void kf_bfly4( | |||
121 | int mm | 110 | int mm |
122 | ) | 111 | ) |
123 | { | 112 | { |
124 | const kiss_twiddle_cpx *tw1,*tw2,*tw3; | 113 | int i; |
125 | kiss_fft_cpx scratch[6]; | ||
126 | const size_t m2=2*m; | ||
127 | const size_t m3=3*m; | ||
128 | int i, j; | ||
129 | 114 | ||
130 | kiss_fft_cpx * Fout_beg = Fout; | 115 | if (m==1) |
131 | for (i=0;i<N;i++) | ||
132 | { | 116 | { |
133 | Fout = Fout_beg + i*mm; | 117 | /* Degenerate case where all the twiddles are 1. */ |
134 | tw3 = tw2 = tw1 = st->twiddles; | 118 | for (i=0;i<N;i++) |
135 | for (j=0;j<m;j++) | ||
136 | { | 119 | { |
137 | C_MUL4(scratch[0],Fout[m] , *tw1 ); | 120 | kiss_fft_cpx scratch0, scratch1; |
138 | C_MUL4(scratch[1],Fout[m2] , *tw2 ); | 121 | |
139 | C_MUL4(scratch[2],Fout[m3] , *tw3 ); | 122 | C_SUB( scratch0 , *Fout, Fout[2] ); |
140 | 123 | C_ADDTO(*Fout, Fout[2]); | |
141 | Fout->r = PSHR32(Fout->r, 2); | 124 | C_ADD( scratch1 , Fout[1] , Fout[3] ); |
142 | Fout->i = PSHR32(Fout->i, 2); | 125 | C_SUB( Fout[2], *Fout, scratch1 ); |
143 | C_SUB( scratch[5] , *Fout, scratch[1] ); | 126 | C_ADDTO( *Fout , scratch1 ); |
144 | C_ADDTO(*Fout, scratch[1]); | 127 | C_SUB( scratch1 , Fout[1] , Fout[3] ); |
145 | C_ADD( scratch[3] , scratch[0] , scratch[2] ); | 128 | |
146 | C_SUB( scratch[4] , scratch[0] , scratch[2] ); | 129 | Fout[1].r = scratch0.r + scratch1.i; |
147 | C_SUB( Fout[m2], *Fout, scratch[3] ); | 130 | Fout[1].i = scratch0.i - scratch1.r; |
148 | tw1 += fstride; | 131 | Fout[3].r = scratch0.r - scratch1.i; |
149 | tw2 += fstride*2; | 132 | Fout[3].i = scratch0.i + scratch1.r; |
150 | tw3 += fstride*3; | 133 | Fout+=4; |
151 | C_ADDTO( *Fout , scratch[3] ); | ||
152 | |||
153 | Fout[m].r = scratch[5].r + scratch[4].i; | ||
154 | Fout[m].i = scratch[5].i - scratch[4].r; | ||
155 | Fout[m3].r = scratch[5].r - scratch[4].i; | ||
156 | Fout[m3].i = scratch[5].i + scratch[4].r; | ||
157 | ++Fout; | ||
158 | } | 134 | } |
159 | } | 135 | } else { |
160 | } | 136 | int j; |
161 | #endif | 137 | kiss_fft_cpx scratch[6]; |
162 | 138 | const kiss_twiddle_cpx *tw1,*tw2,*tw3; | |
163 | static void ki_bfly4( | 139 | const int m2=2*m; |
164 | kiss_fft_cpx * Fout, | 140 | const int m3=3*m; |
165 | const size_t fstride, | 141 | kiss_fft_cpx * Fout_beg = Fout; |
166 | const kiss_fft_state *st, | 142 | for (i=0;i<N;i++) |
167 | int m, | ||
168 | int N, | ||
169 | int mm | ||
170 | ) | ||
171 | { | ||
172 | const kiss_twiddle_cpx *tw1,*tw2,*tw3; | ||
173 | kiss_fft_cpx scratch[6]; | ||
174 | const size_t m2=2*m; | ||
175 | const size_t m3=3*m; | ||
176 | int i, j; | ||
177 | |||
178 | kiss_fft_cpx * Fout_beg = Fout; | ||
179 | for (i=0;i<N;i++) | ||
180 | { | ||
181 | Fout = Fout_beg + i*mm; | ||
182 | tw3 = tw2 = tw1 = st->twiddles; | ||
183 | for (j=0;j<m;j++) | ||
184 | { | 143 | { |
185 | C_MULC(scratch[0],Fout[m] , *tw1 ); | 144 | Fout = Fout_beg + i*mm; |
186 | C_MULC(scratch[1],Fout[m2] , *tw2 ); | 145 | tw3 = tw2 = tw1 = st->twiddles; |
187 | C_MULC(scratch[2],Fout[m3] , *tw3 ); | 146 | /* m is guaranteed to be a multiple of 4. */ |
188 | 147 | for (j=0;j<m;j++) | |
189 | C_SUB( scratch[5] , *Fout, scratch[1] ); | 148 | { |
190 | C_ADDTO(*Fout, scratch[1]); | 149 | C_MUL(scratch[0],Fout[m] , *tw1 ); |
191 | C_ADD( scratch[3] , scratch[0] , scratch[2] ); | 150 | C_MUL(scratch[1],Fout[m2] , *tw2 ); |
192 | C_SUB( scratch[4] , scratch[0] , scratch[2] ); | 151 | C_MUL(scratch[2],Fout[m3] , *tw3 ); |
193 | C_SUB( Fout[m2], *Fout, scratch[3] ); | 152 | |
194 | tw1 += fstride; | 153 | C_SUB( scratch[5] , *Fout, scratch[1] ); |
195 | tw2 += fstride*2; | 154 | C_ADDTO(*Fout, scratch[1]); |
196 | tw3 += fstride*3; | 155 | C_ADD( scratch[3] , scratch[0] , scratch[2] ); |
197 | C_ADDTO( *Fout , scratch[3] ); | 156 | C_SUB( scratch[4] , scratch[0] , scratch[2] ); |
198 | 157 | C_SUB( Fout[m2], *Fout, scratch[3] ); | |
199 | Fout[m].r = scratch[5].r - scratch[4].i; | 158 | tw1 += fstride; |
200 | Fout[m].i = scratch[5].i + scratch[4].r; | 159 | tw2 += fstride*2; |
201 | Fout[m3].r = scratch[5].r + scratch[4].i; | 160 | tw3 += fstride*3; |
202 | Fout[m3].i = scratch[5].i - scratch[4].r; | 161 | C_ADDTO( *Fout , scratch[3] ); |
203 | ++Fout; | 162 | |
163 | Fout[m].r = scratch[5].r + scratch[4].i; | ||
164 | Fout[m].i = scratch[5].i - scratch[4].r; | ||
165 | Fout[m3].r = scratch[5].r - scratch[4].i; | ||
166 | Fout[m3].i = scratch[5].i + scratch[4].r; | ||
167 | ++Fout; | ||
168 | } | ||
204 | } | 169 | } |
205 | } | 170 | } |
206 | } | 171 | } |
207 | 172 | ||
173 | |||
208 | #ifndef RADIX_TWO_ONLY | 174 | #ifndef RADIX_TWO_ONLY |
209 | 175 | ||
210 | #if 0 | ||
211 | static void kf_bfly3( | 176 | static void kf_bfly3( |
212 | kiss_fft_cpx * Fout, | 177 | kiss_fft_cpx * Fout, |
213 | const size_t fstride, | 178 | const size_t fstride, |
@@ -225,14 +190,19 @@ static void kf_bfly3( | |||
225 | kiss_twiddle_cpx epi3; | 190 | kiss_twiddle_cpx epi3; |
226 | 191 | ||
227 | kiss_fft_cpx * Fout_beg = Fout; | 192 | kiss_fft_cpx * Fout_beg = Fout; |
193 | #ifdef FIXED_POINT | ||
194 | epi3.r = -16384; | ||
195 | epi3.i = -28378; | ||
196 | #else | ||
228 | epi3 = st->twiddles[fstride*m]; | 197 | epi3 = st->twiddles[fstride*m]; |
198 | #endif | ||
229 | for (i=0;i<N;i++) | 199 | for (i=0;i<N;i++) |
230 | { | 200 | { |
231 | Fout = Fout_beg + i*mm; | 201 | Fout = Fout_beg + i*mm; |
232 | tw1=tw2=st->twiddles; | 202 | tw1=tw2=st->twiddles; |
203 | /* For non-custom modes, m is guaranteed to be a multiple of 4. */ | ||
233 | k=m; | 204 | k=m; |
234 | do { | 205 | do { |
235 | C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); | ||
236 | 206 | ||
237 | C_MUL(scratch[1],Fout[m] , *tw1); | 207 | C_MUL(scratch[1],Fout[m] , *tw1); |
238 | C_MUL(scratch[2],Fout[m2] , *tw2); | 208 | C_MUL(scratch[2],Fout[m2] , *tw2); |
@@ -259,59 +229,9 @@ static void kf_bfly3( | |||
259 | } while(--k); | 229 | } while(--k); |
260 | } | 230 | } |
261 | } | 231 | } |
262 | #endif | ||
263 | |||
264 | static void ki_bfly3( | ||
265 | kiss_fft_cpx * Fout, | ||
266 | const size_t fstride, | ||
267 | const kiss_fft_state *st, | ||
268 | int m, | ||
269 | int N, | ||
270 | int mm | ||
271 | ) | ||
272 | { | ||
273 | int i, k; | ||
274 | const size_t m2 = 2*m; | ||
275 | const kiss_twiddle_cpx *tw1,*tw2; | ||
276 | kiss_fft_cpx scratch[5]; | ||
277 | kiss_twiddle_cpx epi3; | ||
278 | |||
279 | kiss_fft_cpx * Fout_beg = Fout; | ||
280 | epi3 = st->twiddles[fstride*m]; | ||
281 | for (i=0;i<N;i++) | ||
282 | { | ||
283 | Fout = Fout_beg + i*mm; | ||
284 | tw1=tw2=st->twiddles; | ||
285 | k=m; | ||
286 | do{ | ||
287 | |||
288 | C_MULC(scratch[1],Fout[m] , *tw1); | ||
289 | C_MULC(scratch[2],Fout[m2] , *tw2); | ||
290 | |||
291 | C_ADD(scratch[3],scratch[1],scratch[2]); | ||
292 | C_SUB(scratch[0],scratch[1],scratch[2]); | ||
293 | tw1 += fstride; | ||
294 | tw2 += fstride*2; | ||
295 | |||
296 | Fout[m].r = Fout->r - HALF_OF(scratch[3].r); | ||
297 | Fout[m].i = Fout->i - HALF_OF(scratch[3].i); | ||
298 | |||
299 | C_MULBYSCALAR( scratch[0] , -epi3.i ); | ||
300 | |||
301 | C_ADDTO(*Fout,scratch[3]); | ||
302 | |||
303 | Fout[m2].r = Fout[m].r + scratch[0].i; | ||
304 | Fout[m2].i = Fout[m].i - scratch[0].r; | ||
305 | |||
306 | Fout[m].r -= scratch[0].i; | ||
307 | Fout[m].i += scratch[0].r; | ||
308 | 232 | ||
309 | ++Fout; | ||
310 | }while(--k); | ||
311 | } | ||
312 | } | ||
313 | 233 | ||
314 | #if 0 | 234 | #ifndef OVERRIDE_kf_bfly5 |
315 | static void kf_bfly5( | 235 | static void kf_bfly5( |
316 | kiss_fft_cpx * Fout, | 236 | kiss_fft_cpx * Fout, |
317 | const size_t fstride, | 237 | const size_t fstride, |
@@ -324,13 +244,19 @@ static void kf_bfly5( | |||
324 | kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; | 244 | kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; |
325 | int i, u; | 245 | int i, u; |
326 | kiss_fft_cpx scratch[13]; | 246 | kiss_fft_cpx scratch[13]; |
327 | const kiss_twiddle_cpx * twiddles = st->twiddles; | ||
328 | const kiss_twiddle_cpx *tw; | 247 | const kiss_twiddle_cpx *tw; |
329 | kiss_twiddle_cpx ya,yb; | 248 | kiss_twiddle_cpx ya,yb; |
330 | kiss_fft_cpx * Fout_beg = Fout; | 249 | kiss_fft_cpx * Fout_beg = Fout; |
331 | 250 | ||
332 | ya = twiddles[fstride*m]; | 251 | #ifdef FIXED_POINT |
333 | yb = twiddles[fstride*2*m]; | 252 | ya.r = 10126; |
253 | ya.i = -31164; | ||
254 | yb.r = -26510; | ||
255 | yb.i = -19261; | ||
256 | #else | ||
257 | ya = st->twiddles[fstride*m]; | ||
258 | yb = st->twiddles[fstride*2*m]; | ||
259 | #endif | ||
334 | tw=st->twiddles; | 260 | tw=st->twiddles; |
335 | 261 | ||
336 | for (i=0;i<N;i++) | 262 | for (i=0;i<N;i++) |
@@ -342,8 +268,8 @@ static void kf_bfly5( | |||
342 | Fout3=Fout0+3*m; | 268 | Fout3=Fout0+3*m; |
343 | Fout4=Fout0+4*m; | 269 | Fout4=Fout0+4*m; |
344 | 270 | ||
271 | /* For non-custom modes, m is guaranteed to be a multiple of 4. */ | ||
345 | for ( u=0; u<m; ++u ) { | 272 | for ( u=0; u<m; ++u ) { |
346 | C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); | ||
347 | scratch[0] = *Fout0; | 273 | scratch[0] = *Fout0; |
348 | 274 | ||
349 | C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); | 275 | C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); |
@@ -380,75 +306,8 @@ static void kf_bfly5( | |||
380 | } | 306 | } |
381 | } | 307 | } |
382 | } | 308 | } |
383 | #endif | 309 | #endif /* OVERRIDE_kf_bfly5 */ |
384 | |||
385 | static void ki_bfly5( | ||
386 | kiss_fft_cpx * Fout, | ||
387 | const size_t fstride, | ||
388 | const kiss_fft_state *st, | ||
389 | int m, | ||
390 | int N, | ||
391 | int mm | ||
392 | ) | ||
393 | { | ||
394 | kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; | ||
395 | int i, u; | ||
396 | kiss_fft_cpx scratch[13]; | ||
397 | const kiss_twiddle_cpx * twiddles = st->twiddles; | ||
398 | const kiss_twiddle_cpx *tw; | ||
399 | kiss_twiddle_cpx ya,yb; | ||
400 | kiss_fft_cpx * Fout_beg = Fout; | ||
401 | 310 | ||
402 | ya = twiddles[fstride*m]; | ||
403 | yb = twiddles[fstride*2*m]; | ||
404 | tw=st->twiddles; | ||
405 | |||
406 | for (i=0;i<N;i++) | ||
407 | { | ||
408 | Fout = Fout_beg + i*mm; | ||
409 | Fout0=Fout; | ||
410 | Fout1=Fout0+m; | ||
411 | Fout2=Fout0+2*m; | ||
412 | Fout3=Fout0+3*m; | ||
413 | Fout4=Fout0+4*m; | ||
414 | |||
415 | for ( u=0; u<m; ++u ) { | ||
416 | scratch[0] = *Fout0; | ||
417 | |||
418 | C_MULC(scratch[1] ,*Fout1, tw[u*fstride]); | ||
419 | C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]); | ||
420 | C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]); | ||
421 | C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]); | ||
422 | |||
423 | C_ADD( scratch[7],scratch[1],scratch[4]); | ||
424 | C_SUB( scratch[10],scratch[1],scratch[4]); | ||
425 | C_ADD( scratch[8],scratch[2],scratch[3]); | ||
426 | C_SUB( scratch[9],scratch[2],scratch[3]); | ||
427 | |||
428 | Fout0->r += scratch[7].r + scratch[8].r; | ||
429 | Fout0->i += scratch[7].i + scratch[8].i; | ||
430 | |||
431 | scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); | ||
432 | scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); | ||
433 | |||
434 | scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); | ||
435 | scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); | ||
436 | |||
437 | C_SUB(*Fout1,scratch[5],scratch[6]); | ||
438 | C_ADD(*Fout4,scratch[5],scratch[6]); | ||
439 | |||
440 | scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); | ||
441 | scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); | ||
442 | scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); | ||
443 | scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); | ||
444 | |||
445 | C_ADD(*Fout2,scratch[11],scratch[12]); | ||
446 | C_SUB(*Fout3,scratch[11],scratch[12]); | ||
447 | |||
448 | ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; | ||
449 | } | ||
450 | } | ||
451 | } | ||
452 | 311 | ||
453 | #endif | 312 | #endif |
454 | 313 | ||
@@ -496,6 +355,9 @@ static | |||
496 | int kf_factor(int n,opus_int16 * facbuf) | 355 | int kf_factor(int n,opus_int16 * facbuf) |
497 | { | 356 | { |
498 | int p=4; | 357 | int p=4; |
358 | int i; | ||
359 | int stages=0; | ||
360 | int nbak = n; | ||
499 | 361 | ||
500 | /*factor out powers of 4, powers of 2, then any remaining primes */ | 362 | /*factor out powers of 4, powers of 2, then any remaining primes */ |
501 | do { | 363 | do { |
@@ -517,9 +379,30 @@ int kf_factor(int n,opus_int16 * facbuf) | |||
517 | { | 379 | { |
518 | return 0; | 380 | return 0; |
519 | } | 381 | } |
520 | *facbuf++ = p; | 382 | facbuf[2*stages] = p; |
521 | *facbuf++ = n; | 383 | if (p==2 && stages > 1) |
384 | { | ||
385 | facbuf[2*stages] = 4; | ||
386 | facbuf[2] = 2; | ||
387 | } | ||
388 | stages++; | ||
522 | } while (n > 1); | 389 | } while (n > 1); |
390 | n = nbak; | ||
391 | /* Reverse the order to get the radix 4 at the end, so we can use the | ||
392 | fast degenerate case. It turns out that reversing the order also | ||
393 | improves the noise behaviour. */ | ||
394 | for (i=0;i<stages/2;i++) | ||
395 | { | ||
396 | int tmp; | ||
397 | tmp = facbuf[2*i]; | ||
398 | facbuf[2*i] = facbuf[2*(stages-i-1)]; | ||
399 | facbuf[2*(stages-i-1)] = tmp; | ||
400 | } | ||
401 | for (i=0;i<stages;i++) | ||
402 | { | ||
403 | n /= facbuf[2*i]; | ||
404 | facbuf[2*i+1] = n; | ||
405 | } | ||
523 | return 1; | 406 | return 1; |
524 | } | 407 | } |
525 | 408 | ||
@@ -563,14 +446,20 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co | |||
563 | kiss_twiddle_cpx *twiddles; | 446 | kiss_twiddle_cpx *twiddles; |
564 | 447 | ||
565 | st->nfft=nfft; | 448 | st->nfft=nfft; |
566 | #ifndef FIXED_POINT | 449 | #ifdef FIXED_POINT |
450 | st->scale_shift = celt_ilog2(st->nfft); | ||
451 | if (st->nfft == 1<<st->scale_shift) | ||
452 | st->scale = Q15ONE; | ||
453 | else | ||
454 | st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); | ||
455 | #else | ||
567 | st->scale = 1.f/nfft; | 456 | st->scale = 1.f/nfft; |
568 | #endif | 457 | #endif |
569 | if (base != NULL) | 458 | if (base != NULL) |
570 | { | 459 | { |
571 | st->twiddles = base->twiddles; | 460 | st->twiddles = base->twiddles; |
572 | st->shift = 0; | 461 | st->shift = 0; |
573 | while (nfft<<st->shift != base->nfft && st->shift < 32) | 462 | while (st->shift < 32 && nfft<<st->shift != base->nfft) |
574 | st->shift++; | 463 | st->shift++; |
575 | if (st->shift>=32) | 464 | if (st->shift>=32) |
576 | goto fail; | 465 | goto fail; |
@@ -614,8 +503,7 @@ void opus_fft_free(const kiss_fft_state *cfg) | |||
614 | 503 | ||
615 | #endif /* CUSTOM_MODES */ | 504 | #endif /* CUSTOM_MODES */ |
616 | 505 | ||
617 | #if 0 | 506 | void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) |
618 | void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) | ||
619 | { | 507 | { |
620 | int m2, m; | 508 | int m2, m; |
621 | int p; | 509 | int p; |
@@ -627,17 +515,6 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou | |||
627 | /* st->shift can be -1 */ | 515 | /* st->shift can be -1 */ |
628 | shift = st->shift>0 ? st->shift : 0; | 516 | shift = st->shift>0 ? st->shift : 0; |
629 | 517 | ||
630 | celt_assert2 (fin != fout, "In-place FFT not supported"); | ||
631 | /* Bit-reverse the input */ | ||
632 | for (i=0;i<st->nfft;i++) | ||
633 | { | ||
634 | fout[st->bitrev[i]] = fin[i]; | ||
635 | #ifndef FIXED_POINT | ||
636 | fout[st->bitrev[i]].r *= st->scale; | ||
637 | fout[st->bitrev[i]].i *= st->scale; | ||
638 | #endif | ||
639 | } | ||
640 | |||
641 | fstride[0] = 1; | 518 | fstride[0] = 1; |
642 | L=0; | 519 | L=0; |
643 | do { | 520 | do { |
@@ -656,7 +533,7 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou | |||
656 | switch (st->factors[2*i]) | 533 | switch (st->factors[2*i]) |
657 | { | 534 | { |
658 | case 2: | 535 | case 2: |
659 | kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); | 536 | kf_bfly2(fout, m, fstride[i]); |
660 | break; | 537 | break; |
661 | case 4: | 538 | case 4: |
662 | kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); | 539 | kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); |
@@ -673,57 +550,44 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou | |||
673 | m = m2; | 550 | m = m2; |
674 | } | 551 | } |
675 | } | 552 | } |
676 | #endif | ||
677 | 553 | ||
678 | void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) | 554 | #if 0 |
555 | void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) | ||
679 | { | 556 | { |
680 | int m2, m; | ||
681 | int p; | ||
682 | int L; | ||
683 | int fstride[MAXFACTORS]; | ||
684 | int i; | 557 | int i; |
685 | int shift; | 558 | opus_val16 scale; |
559 | #ifdef FIXED_POINT | ||
560 | /* Allows us to scale with MULT16_32_Q16(), which is faster than | ||
561 | MULT16_32_Q15() on ARM. */ | ||
562 | int scale_shift = st->scale_shift-1; | ||
563 | #endif | ||
564 | scale = st->scale; | ||
686 | 565 | ||
687 | /* st->shift can be -1 */ | ||
688 | shift = st->shift>0 ? st->shift : 0; | ||
689 | celt_assert2 (fin != fout, "In-place FFT not supported"); | 566 | celt_assert2 (fin != fout, "In-place FFT not supported"); |
690 | /* Bit-reverse the input */ | 567 | /* Bit-reverse the input */ |
691 | for (i=0;i<st->nfft;i++) | 568 | for (i=0;i<st->nfft;i++) |
692 | fout[st->bitrev[i]] = fin[i]; | ||
693 | |||
694 | fstride[0] = 1; | ||
695 | L=0; | ||
696 | do { | ||
697 | p = st->factors[2*L]; | ||
698 | m = st->factors[2*L+1]; | ||
699 | fstride[L+1] = fstride[L]*p; | ||
700 | L++; | ||
701 | } while(m!=1); | ||
702 | m = st->factors[2*L-1]; | ||
703 | for (i=L-1;i>=0;i--) | ||
704 | { | 569 | { |
705 | if (i!=0) | 570 | kiss_fft_cpx x = fin[i]; |
706 | m2 = st->factors[2*i-1]; | 571 | fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); |
707 | else | 572 | fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); |
708 | m2 = 1; | ||
709 | switch (st->factors[2*i]) | ||
710 | { | ||
711 | case 2: | ||
712 | ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); | ||
713 | break; | ||
714 | case 4: | ||
715 | ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); | ||
716 | break; | ||
717 | #ifndef RADIX_TWO_ONLY | ||
718 | case 3: | ||
719 | ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); | ||
720 | break; | ||
721 | case 5: | ||
722 | ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); | ||
723 | break; | ||
724 | #endif | ||
725 | } | ||
726 | m = m2; | ||
727 | } | 573 | } |
574 | opus_fft_impl(st, fout); | ||
728 | } | 575 | } |
576 | #endif | ||
577 | |||
729 | 578 | ||
579 | #ifdef TEST_UNIT_DFT_C | ||
580 | void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) | ||
581 | { | ||
582 | int i; | ||
583 | celt_assert2 (fin != fout, "In-place FFT not supported"); | ||
584 | /* Bit-reverse the input */ | ||
585 | for (i=0;i<st->nfft;i++) | ||
586 | fout[st->bitrev[i]] = fin[i]; | ||
587 | for (i=0;i<st->nfft;i++) | ||
588 | fout[i].i = -fout[i].i; | ||
589 | opus_fft_impl(st, fout); | ||
590 | for (i=0;i<st->nfft;i++) | ||
591 | fout[i].i = -fout[i].i; | ||
592 | } | ||
593 | #endif | ||