summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2014-01-19 16:31:59 +0100
committerNils Wallménius <nils@rockbox.org>2014-07-13 11:12:40 +0200
commit9b7ec42403073ee887efc531c153e6b1b6c15bab (patch)
tree07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/kiss_fft.c
parente557951c94c1efa769900257e466900f0ffeb53b (diff)
downloadrockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz
rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip
Sync to upstream libopus
Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c This brings in a bunch of optimizations to decode speed and memory usage. Allocations are switched from using the pseudostack to using the real stack. Enabled hacks to reduce stack usage. This should fix crashes on sansa clip, although some files will not play due to failing allocations in the codec buffer. Speeds up decoding of the following test files: H300 (cf) C200 (arm7tdmi) ipod classic (arm9e) 16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz 64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz 128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/kiss_fft.c')
-rw-r--r--lib/rbcodec/codecs/libopus/celt/kiss_fft.c482
1 files changed, 173 insertions, 309 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
index e2b8f3b3da..833ef5a71f 100644
--- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
+++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
@@ -45,73 +45,62 @@
45 complex numbers. It also delares the kf_ internal functions. 45 complex numbers. It also delares the kf_ internal functions.
46*/ 46*/
47 47
48#if 0
49static void kf_bfly2( 48static void kf_bfly2(
50 kiss_fft_cpx * Fout, 49 kiss_fft_cpx * Fout,
51 const size_t fstride,
52 const kiss_fft_state *st,
53 int m, 50 int m,
54 int N, 51 int N
55 int mm
56 ) 52 )
57{ 53{
58 kiss_fft_cpx * Fout2; 54 kiss_fft_cpx * Fout2;
59 const kiss_twiddle_cpx * tw1; 55 int i;
60 int i,j; 56 (void)m;
61 kiss_fft_cpx * Fout_beg = Fout; 57#ifdef CUSTOM_MODES
62 for (i=0;i<N;i++) 58 if (m==1)
63 { 59 {
64 Fout = Fout_beg + i*mm; 60 celt_assert(m==1);
65 Fout2 = Fout + m; 61 for (i=0;i<N;i++)
66 tw1 = st->twiddles;
67 for(j=0;j<m;j++)
68 { 62 {
69 kiss_fft_cpx t; 63 kiss_fft_cpx t;
70 Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1); 64 Fout2 = Fout + 1;
71 Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1); 65 t = *Fout2;
72 C_MUL (t, *Fout2 , *tw1);
73 tw1 += fstride;
74 C_SUB( *Fout2 , *Fout , t ); 66 C_SUB( *Fout2 , *Fout , t );
75 C_ADDTO( *Fout , t ); 67 C_ADDTO( *Fout , t );
76 ++Fout2; 68 Fout += 2;
77 ++Fout;
78 } 69 }
79 } 70 } else
80}
81#endif 71#endif
82
83static void ki_bfly2(
84 kiss_fft_cpx * Fout,
85 const size_t fstride,
86 const kiss_fft_state *st,
87 int m,
88 int N,
89 int mm
90 )
91{
92 kiss_fft_cpx * Fout2;
93 const kiss_twiddle_cpx * tw1;
94 kiss_fft_cpx t;
95 int i,j;
96 kiss_fft_cpx * Fout_beg = Fout;
97 for (i=0;i<N;i++)
98 { 72 {
99 Fout = Fout_beg + i*mm; 73 opus_val16 tw;
100 Fout2 = Fout + m; 74 tw = QCONST16(0.7071067812f, 15);
101 tw1 = st->twiddles; 75 /* We know that m==4 here because the radix-2 is just after a radix-4 */
102 for(j=0;j<m;j++) 76 celt_assert(m==4);
77 for (i=0;i<N;i++)
103 { 78 {
104 C_MULC (t, *Fout2 , *tw1); 79 kiss_fft_cpx t;
105 tw1 += fstride; 80 Fout2 = Fout + 4;
106 C_SUB( *Fout2 , *Fout , t ); 81 t = Fout2[0];
107 C_ADDTO( *Fout , t ); 82 C_SUB( Fout2[0] , Fout[0] , t );
108 ++Fout2; 83 C_ADDTO( Fout[0] , t );
109 ++Fout; 84
85 t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw);
86 t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw);
87 C_SUB( Fout2[1] , Fout[1] , t );
88 C_ADDTO( Fout[1] , t );
89
90 t.r = Fout2[2].i;
91 t.i = -Fout2[2].r;
92 C_SUB( Fout2[2] , Fout[2] , t );
93 C_ADDTO( Fout[2] , t );
94
95 t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw);
96 t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw);
97 C_SUB( Fout2[3] , Fout[3] , t );
98 C_ADDTO( Fout[3] , t );
99 Fout += 8;
110 } 100 }
111 } 101 }
112} 102}
113 103
114#if 0
115static void kf_bfly4( 104static void kf_bfly4(
116 kiss_fft_cpx * Fout, 105 kiss_fft_cpx * Fout,
117 const size_t fstride, 106 const size_t fstride,
@@ -121,93 +110,69 @@ static void kf_bfly4(
121 int mm 110 int mm
122 ) 111 )
123{ 112{
124 const kiss_twiddle_cpx *tw1,*tw2,*tw3; 113 int i;
125 kiss_fft_cpx scratch[6];
126 const size_t m2=2*m;
127 const size_t m3=3*m;
128 int i, j;
129 114
130 kiss_fft_cpx * Fout_beg = Fout; 115 if (m==1)
131 for (i=0;i<N;i++)
132 { 116 {
133 Fout = Fout_beg + i*mm; 117 /* Degenerate case where all the twiddles are 1. */
134 tw3 = tw2 = tw1 = st->twiddles; 118 for (i=0;i<N;i++)
135 for (j=0;j<m;j++)
136 { 119 {
137 C_MUL4(scratch[0],Fout[m] , *tw1 ); 120 kiss_fft_cpx scratch0, scratch1;
138 C_MUL4(scratch[1],Fout[m2] , *tw2 ); 121
139 C_MUL4(scratch[2],Fout[m3] , *tw3 ); 122 C_SUB( scratch0 , *Fout, Fout[2] );
140 123 C_ADDTO(*Fout, Fout[2]);
141 Fout->r = PSHR32(Fout->r, 2); 124 C_ADD( scratch1 , Fout[1] , Fout[3] );
142 Fout->i = PSHR32(Fout->i, 2); 125 C_SUB( Fout[2], *Fout, scratch1 );
143 C_SUB( scratch[5] , *Fout, scratch[1] ); 126 C_ADDTO( *Fout , scratch1 );
144 C_ADDTO(*Fout, scratch[1]); 127 C_SUB( scratch1 , Fout[1] , Fout[3] );
145 C_ADD( scratch[3] , scratch[0] , scratch[2] ); 128
146 C_SUB( scratch[4] , scratch[0] , scratch[2] ); 129 Fout[1].r = scratch0.r + scratch1.i;
147 C_SUB( Fout[m2], *Fout, scratch[3] ); 130 Fout[1].i = scratch0.i - scratch1.r;
148 tw1 += fstride; 131 Fout[3].r = scratch0.r - scratch1.i;
149 tw2 += fstride*2; 132 Fout[3].i = scratch0.i + scratch1.r;
150 tw3 += fstride*3; 133 Fout+=4;
151 C_ADDTO( *Fout , scratch[3] );
152
153 Fout[m].r = scratch[5].r + scratch[4].i;
154 Fout[m].i = scratch[5].i - scratch[4].r;
155 Fout[m3].r = scratch[5].r - scratch[4].i;
156 Fout[m3].i = scratch[5].i + scratch[4].r;
157 ++Fout;
158 } 134 }
159 } 135 } else {
160} 136 int j;
161#endif 137 kiss_fft_cpx scratch[6];
162 138 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
163static void ki_bfly4( 139 const int m2=2*m;
164 kiss_fft_cpx * Fout, 140 const int m3=3*m;
165 const size_t fstride, 141 kiss_fft_cpx * Fout_beg = Fout;
166 const kiss_fft_state *st, 142 for (i=0;i<N;i++)
167 int m,
168 int N,
169 int mm
170 )
171{
172 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
173 kiss_fft_cpx scratch[6];
174 const size_t m2=2*m;
175 const size_t m3=3*m;
176 int i, j;
177
178 kiss_fft_cpx * Fout_beg = Fout;
179 for (i=0;i<N;i++)
180 {
181 Fout = Fout_beg + i*mm;
182 tw3 = tw2 = tw1 = st->twiddles;
183 for (j=0;j<m;j++)
184 { 143 {
185 C_MULC(scratch[0],Fout[m] , *tw1 ); 144 Fout = Fout_beg + i*mm;
186 C_MULC(scratch[1],Fout[m2] , *tw2 ); 145 tw3 = tw2 = tw1 = st->twiddles;
187 C_MULC(scratch[2],Fout[m3] , *tw3 ); 146 /* m is guaranteed to be a multiple of 4. */
188 147 for (j=0;j<m;j++)
189 C_SUB( scratch[5] , *Fout, scratch[1] ); 148 {
190 C_ADDTO(*Fout, scratch[1]); 149 C_MUL(scratch[0],Fout[m] , *tw1 );
191 C_ADD( scratch[3] , scratch[0] , scratch[2] ); 150 C_MUL(scratch[1],Fout[m2] , *tw2 );
192 C_SUB( scratch[4] , scratch[0] , scratch[2] ); 151 C_MUL(scratch[2],Fout[m3] , *tw3 );
193 C_SUB( Fout[m2], *Fout, scratch[3] ); 152
194 tw1 += fstride; 153 C_SUB( scratch[5] , *Fout, scratch[1] );
195 tw2 += fstride*2; 154 C_ADDTO(*Fout, scratch[1]);
196 tw3 += fstride*3; 155 C_ADD( scratch[3] , scratch[0] , scratch[2] );
197 C_ADDTO( *Fout , scratch[3] ); 156 C_SUB( scratch[4] , scratch[0] , scratch[2] );
198 157 C_SUB( Fout[m2], *Fout, scratch[3] );
199 Fout[m].r = scratch[5].r - scratch[4].i; 158 tw1 += fstride;
200 Fout[m].i = scratch[5].i + scratch[4].r; 159 tw2 += fstride*2;
201 Fout[m3].r = scratch[5].r + scratch[4].i; 160 tw3 += fstride*3;
202 Fout[m3].i = scratch[5].i - scratch[4].r; 161 C_ADDTO( *Fout , scratch[3] );
203 ++Fout; 162
163 Fout[m].r = scratch[5].r + scratch[4].i;
164 Fout[m].i = scratch[5].i - scratch[4].r;
165 Fout[m3].r = scratch[5].r - scratch[4].i;
166 Fout[m3].i = scratch[5].i + scratch[4].r;
167 ++Fout;
168 }
204 } 169 }
205 } 170 }
206} 171}
207 172
173
208#ifndef RADIX_TWO_ONLY 174#ifndef RADIX_TWO_ONLY
209 175
210#if 0
211static void kf_bfly3( 176static void kf_bfly3(
212 kiss_fft_cpx * Fout, 177 kiss_fft_cpx * Fout,
213 const size_t fstride, 178 const size_t fstride,
@@ -225,14 +190,19 @@ static void kf_bfly3(
225 kiss_twiddle_cpx epi3; 190 kiss_twiddle_cpx epi3;
226 191
227 kiss_fft_cpx * Fout_beg = Fout; 192 kiss_fft_cpx * Fout_beg = Fout;
193#ifdef FIXED_POINT
194 epi3.r = -16384;
195 epi3.i = -28378;
196#else
228 epi3 = st->twiddles[fstride*m]; 197 epi3 = st->twiddles[fstride*m];
198#endif
229 for (i=0;i<N;i++) 199 for (i=0;i<N;i++)
230 { 200 {
231 Fout = Fout_beg + i*mm; 201 Fout = Fout_beg + i*mm;
232 tw1=tw2=st->twiddles; 202 tw1=tw2=st->twiddles;
203 /* For non-custom modes, m is guaranteed to be a multiple of 4. */
233 k=m; 204 k=m;
234 do { 205 do {
235 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
236 206
237 C_MUL(scratch[1],Fout[m] , *tw1); 207 C_MUL(scratch[1],Fout[m] , *tw1);
238 C_MUL(scratch[2],Fout[m2] , *tw2); 208 C_MUL(scratch[2],Fout[m2] , *tw2);
@@ -259,59 +229,9 @@ static void kf_bfly3(
259 } while(--k); 229 } while(--k);
260 } 230 }
261} 231}
262#endif
263
264static void ki_bfly3(
265 kiss_fft_cpx * Fout,
266 const size_t fstride,
267 const kiss_fft_state *st,
268 int m,
269 int N,
270 int mm
271 )
272{
273 int i, k;
274 const size_t m2 = 2*m;
275 const kiss_twiddle_cpx *tw1,*tw2;
276 kiss_fft_cpx scratch[5];
277 kiss_twiddle_cpx epi3;
278
279 kiss_fft_cpx * Fout_beg = Fout;
280 epi3 = st->twiddles[fstride*m];
281 for (i=0;i<N;i++)
282 {
283 Fout = Fout_beg + i*mm;
284 tw1=tw2=st->twiddles;
285 k=m;
286 do{
287
288 C_MULC(scratch[1],Fout[m] , *tw1);
289 C_MULC(scratch[2],Fout[m2] , *tw2);
290
291 C_ADD(scratch[3],scratch[1],scratch[2]);
292 C_SUB(scratch[0],scratch[1],scratch[2]);
293 tw1 += fstride;
294 tw2 += fstride*2;
295
296 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
297 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
298
299 C_MULBYSCALAR( scratch[0] , -epi3.i );
300
301 C_ADDTO(*Fout,scratch[3]);
302
303 Fout[m2].r = Fout[m].r + scratch[0].i;
304 Fout[m2].i = Fout[m].i - scratch[0].r;
305
306 Fout[m].r -= scratch[0].i;
307 Fout[m].i += scratch[0].r;
308 232
309 ++Fout;
310 }while(--k);
311 }
312}
313 233
314#if 0 234#ifndef OVERRIDE_kf_bfly5
315static void kf_bfly5( 235static void kf_bfly5(
316 kiss_fft_cpx * Fout, 236 kiss_fft_cpx * Fout,
317 const size_t fstride, 237 const size_t fstride,
@@ -324,13 +244,19 @@ static void kf_bfly5(
324 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; 244 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
325 int i, u; 245 int i, u;
326 kiss_fft_cpx scratch[13]; 246 kiss_fft_cpx scratch[13];
327 const kiss_twiddle_cpx * twiddles = st->twiddles;
328 const kiss_twiddle_cpx *tw; 247 const kiss_twiddle_cpx *tw;
329 kiss_twiddle_cpx ya,yb; 248 kiss_twiddle_cpx ya,yb;
330 kiss_fft_cpx * Fout_beg = Fout; 249 kiss_fft_cpx * Fout_beg = Fout;
331 250
332 ya = twiddles[fstride*m]; 251#ifdef FIXED_POINT
333 yb = twiddles[fstride*2*m]; 252 ya.r = 10126;
253 ya.i = -31164;
254 yb.r = -26510;
255 yb.i = -19261;
256#else
257 ya = st->twiddles[fstride*m];
258 yb = st->twiddles[fstride*2*m];
259#endif
334 tw=st->twiddles; 260 tw=st->twiddles;
335 261
336 for (i=0;i<N;i++) 262 for (i=0;i<N;i++)
@@ -342,8 +268,8 @@ static void kf_bfly5(
342 Fout3=Fout0+3*m; 268 Fout3=Fout0+3*m;
343 Fout4=Fout0+4*m; 269 Fout4=Fout0+4*m;
344 270
271 /* For non-custom modes, m is guaranteed to be a multiple of 4. */
345 for ( u=0; u<m; ++u ) { 272 for ( u=0; u<m; ++u ) {
346 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
347 scratch[0] = *Fout0; 273 scratch[0] = *Fout0;
348 274
349 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); 275 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
@@ -380,75 +306,8 @@ static void kf_bfly5(
380 } 306 }
381 } 307 }
382} 308}
383#endif 309#endif /* OVERRIDE_kf_bfly5 */
384
385static void ki_bfly5(
386 kiss_fft_cpx * Fout,
387 const size_t fstride,
388 const kiss_fft_state *st,
389 int m,
390 int N,
391 int mm
392 )
393{
394 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
395 int i, u;
396 kiss_fft_cpx scratch[13];
397 const kiss_twiddle_cpx * twiddles = st->twiddles;
398 const kiss_twiddle_cpx *tw;
399 kiss_twiddle_cpx ya,yb;
400 kiss_fft_cpx * Fout_beg = Fout;
401 310
402 ya = twiddles[fstride*m];
403 yb = twiddles[fstride*2*m];
404 tw=st->twiddles;
405
406 for (i=0;i<N;i++)
407 {
408 Fout = Fout_beg + i*mm;
409 Fout0=Fout;
410 Fout1=Fout0+m;
411 Fout2=Fout0+2*m;
412 Fout3=Fout0+3*m;
413 Fout4=Fout0+4*m;
414
415 for ( u=0; u<m; ++u ) {
416 scratch[0] = *Fout0;
417
418 C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
419 C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
420 C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
421 C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
422
423 C_ADD( scratch[7],scratch[1],scratch[4]);
424 C_SUB( scratch[10],scratch[1],scratch[4]);
425 C_ADD( scratch[8],scratch[2],scratch[3]);
426 C_SUB( scratch[9],scratch[2],scratch[3]);
427
428 Fout0->r += scratch[7].r + scratch[8].r;
429 Fout0->i += scratch[7].i + scratch[8].i;
430
431 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
432 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
433
434 scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
435 scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
436
437 C_SUB(*Fout1,scratch[5],scratch[6]);
438 C_ADD(*Fout4,scratch[5],scratch[6]);
439
440 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
441 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
442 scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
443 scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
444
445 C_ADD(*Fout2,scratch[11],scratch[12]);
446 C_SUB(*Fout3,scratch[11],scratch[12]);
447
448 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
449 }
450 }
451}
452 311
453#endif 312#endif
454 313
@@ -496,6 +355,9 @@ static
496int kf_factor(int n,opus_int16 * facbuf) 355int kf_factor(int n,opus_int16 * facbuf)
497{ 356{
498 int p=4; 357 int p=4;
358 int i;
359 int stages=0;
360 int nbak = n;
499 361
500 /*factor out powers of 4, powers of 2, then any remaining primes */ 362 /*factor out powers of 4, powers of 2, then any remaining primes */
501 do { 363 do {
@@ -517,9 +379,30 @@ int kf_factor(int n,opus_int16 * facbuf)
517 { 379 {
518 return 0; 380 return 0;
519 } 381 }
520 *facbuf++ = p; 382 facbuf[2*stages] = p;
521 *facbuf++ = n; 383 if (p==2 && stages > 1)
384 {
385 facbuf[2*stages] = 4;
386 facbuf[2] = 2;
387 }
388 stages++;
522 } while (n > 1); 389 } while (n > 1);
390 n = nbak;
391 /* Reverse the order to get the radix 4 at the end, so we can use the
392 fast degenerate case. It turns out that reversing the order also
393 improves the noise behaviour. */
394 for (i=0;i<stages/2;i++)
395 {
396 int tmp;
397 tmp = facbuf[2*i];
398 facbuf[2*i] = facbuf[2*(stages-i-1)];
399 facbuf[2*(stages-i-1)] = tmp;
400 }
401 for (i=0;i<stages;i++)
402 {
403 n /= facbuf[2*i];
404 facbuf[2*i+1] = n;
405 }
523 return 1; 406 return 1;
524} 407}
525 408
@@ -563,14 +446,20 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co
563 kiss_twiddle_cpx *twiddles; 446 kiss_twiddle_cpx *twiddles;
564 447
565 st->nfft=nfft; 448 st->nfft=nfft;
566#ifndef FIXED_POINT 449#ifdef FIXED_POINT
450 st->scale_shift = celt_ilog2(st->nfft);
451 if (st->nfft == 1<<st->scale_shift)
452 st->scale = Q15ONE;
453 else
454 st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
455#else
567 st->scale = 1.f/nfft; 456 st->scale = 1.f/nfft;
568#endif 457#endif
569 if (base != NULL) 458 if (base != NULL)
570 { 459 {
571 st->twiddles = base->twiddles; 460 st->twiddles = base->twiddles;
572 st->shift = 0; 461 st->shift = 0;
573 while (nfft<<st->shift != base->nfft && st->shift < 32) 462 while (st->shift < 32 && nfft<<st->shift != base->nfft)
574 st->shift++; 463 st->shift++;
575 if (st->shift>=32) 464 if (st->shift>=32)
576 goto fail; 465 goto fail;
@@ -614,8 +503,7 @@ void opus_fft_free(const kiss_fft_state *cfg)
614 503
615#endif /* CUSTOM_MODES */ 504#endif /* CUSTOM_MODES */
616 505
617#if 0 506void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
618void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
619{ 507{
620 int m2, m; 508 int m2, m;
621 int p; 509 int p;
@@ -627,17 +515,6 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
627 /* st->shift can be -1 */ 515 /* st->shift can be -1 */
628 shift = st->shift>0 ? st->shift : 0; 516 shift = st->shift>0 ? st->shift : 0;
629 517
630 celt_assert2 (fin != fout, "In-place FFT not supported");
631 /* Bit-reverse the input */
632 for (i=0;i<st->nfft;i++)
633 {
634 fout[st->bitrev[i]] = fin[i];
635#ifndef FIXED_POINT
636 fout[st->bitrev[i]].r *= st->scale;
637 fout[st->bitrev[i]].i *= st->scale;
638#endif
639 }
640
641 fstride[0] = 1; 518 fstride[0] = 1;
642 L=0; 519 L=0;
643 do { 520 do {
@@ -656,7 +533,7 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
656 switch (st->factors[2*i]) 533 switch (st->factors[2*i])
657 { 534 {
658 case 2: 535 case 2:
659 kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); 536 kf_bfly2(fout, m, fstride[i]);
660 break; 537 break;
661 case 4: 538 case 4:
662 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); 539 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
@@ -673,57 +550,44 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
673 m = m2; 550 m = m2;
674 } 551 }
675} 552}
676#endif
677 553
678void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) 554#if 0
555void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
679{ 556{
680 int m2, m;
681 int p;
682 int L;
683 int fstride[MAXFACTORS];
684 int i; 557 int i;
685 int shift; 558 opus_val16 scale;
559#ifdef FIXED_POINT
560 /* Allows us to scale with MULT16_32_Q16(), which is faster than
561 MULT16_32_Q15() on ARM. */
562 int scale_shift = st->scale_shift-1;
563#endif
564 scale = st->scale;
686 565
687 /* st->shift can be -1 */
688 shift = st->shift>0 ? st->shift : 0;
689 celt_assert2 (fin != fout, "In-place FFT not supported"); 566 celt_assert2 (fin != fout, "In-place FFT not supported");
690 /* Bit-reverse the input */ 567 /* Bit-reverse the input */
691 for (i=0;i<st->nfft;i++) 568 for (i=0;i<st->nfft;i++)
692 fout[st->bitrev[i]] = fin[i];
693
694 fstride[0] = 1;
695 L=0;
696 do {
697 p = st->factors[2*L];
698 m = st->factors[2*L+1];
699 fstride[L+1] = fstride[L]*p;
700 L++;
701 } while(m!=1);
702 m = st->factors[2*L-1];
703 for (i=L-1;i>=0;i--)
704 { 569 {
705 if (i!=0) 570 kiss_fft_cpx x = fin[i];
706 m2 = st->factors[2*i-1]; 571 fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
707 else 572 fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
708 m2 = 1;
709 switch (st->factors[2*i])
710 {
711 case 2:
712 ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
713 break;
714 case 4:
715 ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
716 break;
717#ifndef RADIX_TWO_ONLY
718 case 3:
719 ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
720 break;
721 case 5:
722 ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
723 break;
724#endif
725 }
726 m = m2;
727 } 573 }
574 opus_fft_impl(st, fout);
728} 575}
576#endif
577
729 578
579#ifdef TEST_UNIT_DFT_C
580void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
581{
582 int i;
583 celt_assert2 (fin != fout, "In-place FFT not supported");
584 /* Bit-reverse the input */
585 for (i=0;i<st->nfft;i++)
586 fout[st->bitrev[i]] = fin[i];
587 for (i=0;i<st->nfft;i++)
588 fout[i].i = -fout[i].i;
589 opus_fft_impl(st, fout);
590 for (i=0;i<st->nfft;i++)
591 fout[i].i = -fout[i].i;
592}
593#endif