summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libspeex/mdf.c
diff options
context:
space:
mode:
authorSolomon Peachy <pizza@shaftnet.org>2024-05-08 10:36:38 -0400
committerSolomon Peachy <pizza@shaftnet.org>2024-06-20 07:08:35 -0400
commit547b6a570dbad844e79b4ba5eb934f043bab6318 (patch)
tree0cbdb670d73a2544d33985166c5abfa69e20a590 /lib/rbcodec/codecs/libspeex/mdf.c
parent8ef20383b1e5025f7724e750832de6e28e50680d (diff)
downloadrockbox-547b6a570dbad844e79b4ba5eb934f043bab6318.tar.gz
rockbox-547b6a570dbad844e79b4ba5eb934f043bab6318.zip
codecs: Update libspeex from 1.2beta3 to 1.2rc1
This is a relatively minor bump, but it's the first step towards bringing this current. Change-Id: Iab6c9b0c77f0ba705280434ea74b513364719499
Diffstat (limited to 'lib/rbcodec/codecs/libspeex/mdf.c')
-rw-r--r--lib/rbcodec/codecs/libspeex/mdf.c576
1 files changed, 342 insertions, 234 deletions
diff --git a/lib/rbcodec/codecs/libspeex/mdf.c b/lib/rbcodec/codecs/libspeex/mdf.c
index 1994f2a886..cfbe4d1284 100644
--- a/lib/rbcodec/codecs/libspeex/mdf.c
+++ b/lib/rbcodec/codecs/libspeex/mdf.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2003-2006 Jean-Marc Valin 1/* Copyright (C) 2003-2008 Jean-Marc Valin
2 2
3 File: mdf.c 3 File: mdf.c
4 Echo canceller based on the MDF algorithm (see below) 4 Echo canceller based on the MDF algorithm (see below)
@@ -33,36 +33,36 @@
33/* 33/*
34 The echo canceller is based on the MDF algorithm described in: 34 The echo canceller is based on the MDF algorithm described in:
35 35
36 J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter, 36 J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
37 IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2, 37 IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
38 February 1990. 38 February 1990.
39 39
40 We use the Alternatively Updated MDF (AUMDF) variant. Robustness to 40 We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
41 double-talk is achieved using a variable learning rate as described in: 41 double-talk is achieved using a variable learning rate as described in:
42 42
43 Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo 43 Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
44 Cancellation With Double-Talk. IEEE Transactions on Audio, 44 Cancellation With Double-Talk. IEEE Transactions on Audio,
45 Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007. 45 Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007.
46 http://people.xiph.org/~jm/papers/valin_taslp2006.pdf 46 http://people.xiph.org/~jm/papers/valin_taslp2006.pdf
47 47
48 There is no explicit double-talk detection, but a continuous variation 48 There is no explicit double-talk detection, but a continuous variation
49 in the learning rate based on residual echo, double-talk and background 49 in the learning rate based on residual echo, double-talk and background
50 noise. 50 noise.
51 51
52 About the fixed-point version: 52 About the fixed-point version:
53 All the signals are represented with 16-bit words. The filter weights 53 All the signals are represented with 16-bit words. The filter weights
54 are represented with 32-bit words, but only the top 16 bits are used 54 are represented with 32-bit words, but only the top 16 bits are used
55 in most cases. The lower 16 bits are completely unreliable (due to the 55 in most cases. The lower 16 bits are completely unreliable (due to the
56 fact that the update is done only on the top bits), but help in the 56 fact that the update is done only on the top bits), but help in the
57 adaptation -- probably by removing a "threshold effect" due to 57 adaptation -- probably by removing a "threshold effect" due to
58 quantization (rounding going to zero) when the gradient is small. 58 quantization (rounding going to zero) when the gradient is small.
59 59
60 Another kludge that seems to work good: when performing the weight 60 Another kludge that seems to work good: when performing the weight
61 update, we only move half the way toward the "goal" this seems to 61 update, we only move half the way toward the "goal" this seems to
62 reduce the effect of quantization noise in the update phase. This 62 reduce the effect of quantization noise in the update phase. This
63 can be seen as applying a gradient descent on a "soft constraint" 63 can be seen as applying a gradient descent on a "soft constraint"
64 instead of having a hard constraint. 64 instead of having a hard constraint.
65 65
66*/ 66*/
67 67
68#ifdef HAVE_CONFIG_H 68#ifdef HAVE_CONFIG_H
@@ -88,6 +88,12 @@
88#define WEIGHT_SHIFT 0 88#define WEIGHT_SHIFT 0
89#endif 89#endif
90 90
91#ifdef FIXED_POINT
92#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
93#else
94#define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))
95#endif
96
91/* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk 97/* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk
92 and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */ 98 and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */
93#define TWO_PATH 99#define TWO_PATH
@@ -131,13 +137,15 @@ struct SpeexEchoState_ {
131 int adapted; 137 int adapted;
132 int saturated; 138 int saturated;
133 int screwed_up; 139 int screwed_up;
140 int C; /** Number of input channels (microphones) */
141 int K; /** Number of output channels (loudspeakers) */
134 spx_int32_t sampling_rate; 142 spx_int32_t sampling_rate;
135 spx_word16_t spec_average; 143 spx_word16_t spec_average;
136 spx_word16_t beta0; 144 spx_word16_t beta0;
137 spx_word16_t beta_max; 145 spx_word16_t beta_max;
138 spx_word32_t sum_adapt; 146 spx_word32_t sum_adapt;
139 spx_word16_t leak_estimate; 147 spx_word16_t leak_estimate;
140 148
141 spx_word16_t *e; /* scratch */ 149 spx_word16_t *e; /* scratch */
142 spx_word16_t *x; /* Far-end input buffer (2N) */ 150 spx_word16_t *x; /* Far-end input buffer (2N) */
143 spx_word16_t *X; /* Far-end buffer (M+1 frames) in frequency domain */ 151 spx_word16_t *X; /* Far-end buffer (M+1 frames) in frequency domain */
@@ -171,10 +179,10 @@ struct SpeexEchoState_ {
171 spx_word16_t *window; 179 spx_word16_t *window;
172 spx_word16_t *prop; 180 spx_word16_t *prop;
173 void *fft_table; 181 void *fft_table;
174 spx_word16_t memX, memD, memE; 182 spx_word16_t *memX, *memD, *memE;
175 spx_word16_t preemph; 183 spx_word16_t preemph;
176 spx_word16_t notch_radius; 184 spx_word16_t notch_radius;
177 spx_mem_t notch_mem[2]; 185 spx_mem_t *notch_mem;
178 186
179 /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */ 187 /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
180 spx_int16_t *play_buf; 188 spx_int16_t *play_buf;
@@ -182,7 +190,7 @@ struct SpeexEchoState_ {
182 int play_buf_started; 190 int play_buf_started;
183}; 191};
184 192
185static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem) 193static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride)
186{ 194{
187 int i; 195 int i;
188 spx_word16_t den2; 196 spx_word16_t den2;
@@ -190,11 +198,11 @@ static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius,
190 den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius)); 198 den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius));
191#else 199#else
192 den2 = radius*radius + .7*(1-radius)*(1-radius); 200 den2 = radius*radius + .7*(1-radius)*(1-radius);
193#endif 201#endif
194 /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/ 202 /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
195 for (i=0;i<len;i++) 203 for (i=0;i<len;i++)
196 { 204 {
197 spx_word16_t vin = in[i]; 205 spx_word16_t vin = in[i*stride];
198 spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15); 206 spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
199#ifdef FIXED_POINT 207#ifdef FIXED_POINT
200 mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1); 208 mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1);
@@ -234,6 +242,18 @@ static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N
234 ps[j]=MULT16_16(X[i],X[i]); 242 ps[j]=MULT16_16(X[i],X[i]);
235} 243}
236 244
245/** Compute power spectrum of a half-complex (packed) vector and accumulate */
246static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N)
247{
248 int i, j;
249 ps[0]+=MULT16_16(X[0],X[0]);
250 for (i=1,j=1;i<N-1;i+=2,j++)
251 {
252 ps[j] += MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
253 }
254 ps[j]+=MULT16_16(X[i],X[i]);
255}
256
237/** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */ 257/** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
238#ifdef FIXED_POINT 258#ifdef FIXED_POINT
239static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) 259static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
@@ -330,16 +350,17 @@ static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_fl
330 prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i])); 350 prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i]));
331} 351}
332 352
333static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, spx_word16_t *prop) 353static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop)
334{ 354{
335 int i, j; 355 int i, j, p;
336 spx_word16_t max_sum = 1; 356 spx_word16_t max_sum = 1;
337 spx_word32_t prop_sum = 1; 357 spx_word32_t prop_sum = 1;
338 for (i=0;i<M;i++) 358 for (i=0;i<M;i++)
339 { 359 {
340 spx_word32_t tmp = 1; 360 spx_word32_t tmp = 1;
341 for (j=0;j<N;j++) 361 for (p=0;p<P;p++)
342 tmp += MULT16_16(EXTRACT16(SHR32(W[i*N+j],18)), EXTRACT16(SHR32(W[i*N+j],18))); 362 for (j=0;j<N;j++)
363 tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18)));
343#ifdef FIXED_POINT 364#ifdef FIXED_POINT
344 /* Just a security in case an overflow were to occur */ 365 /* Just a security in case an overflow were to occur */
345 tmp = MIN32(ABS32(tmp), 536870912); 366 tmp = MIN32(ABS32(tmp), 536870912);
@@ -378,11 +399,20 @@ static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const sp
378#endif 399#endif
379 400
380/** Creates a new echo canceller state */ 401/** Creates a new echo canceller state */
381SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) 402EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
382{ 403{
383 int i,N,M; 404 return speex_echo_state_init_mc(frame_size, filter_length, 1, 1);
405}
406
407EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers)
408{
409 int i,N,M, C, K;
384 SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState)); 410 SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
385 411
412 st->K = nb_speakers;
413 st->C = nb_mic;
414 C=st->C;
415 K=st->K;
386#ifdef DUMP_ECHO_CANCEL_DATA 416#ifdef DUMP_ECHO_CANCEL_DATA
387 if (rFile || pFile || oFile) 417 if (rFile || pFile || oFile)
388 speex_fatal("Opening dump files twice"); 418 speex_fatal("Opening dump files twice");
@@ -390,7 +420,7 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
390 pFile = fopen("aec_play.sw", "wb"); 420 pFile = fopen("aec_play.sw", "wb");
391 oFile = fopen("aec_out.sw", "wb"); 421 oFile = fopen("aec_out.sw", "wb");
392#endif 422#endif
393 423
394 st->frame_size = frame_size; 424 st->frame_size = frame_size;
395 st->window_size = 2*frame_size; 425 st->window_size = 2*frame_size;
396 N = st->window_size; 426 N = st->window_size;
@@ -412,24 +442,24 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
412 st->leak_estimate = 0; 442 st->leak_estimate = 0;
413 443
414 st->fft_table = spx_fft_init(N); 444 st->fft_table = spx_fft_init(N);
415 445
416 st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 446 st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
417 st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 447 st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t));
418 st->input = (spx_word16_t*)speex_alloc(st->frame_size*sizeof(spx_word16_t)); 448 st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t));
419 st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 449 st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
420 st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 450 st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
421 st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 451 st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
422 st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 452 st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
423 st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 453 st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
424 st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 454 st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
425 st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 455 st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
426 456
427 st->X = (spx_word16_t*)speex_alloc((M+1)*N*sizeof(spx_word16_t)); 457 st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t));
428 st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 458 st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
429 st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 459 st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
430 st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t)); 460 st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t));
431#ifdef TWO_PATH 461#ifdef TWO_PATH
432 st->foreground = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t)); 462 st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t));
433#endif 463#endif
434 st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); 464 st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
435 st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); 465 st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
@@ -450,7 +480,7 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
450#endif 480#endif
451 for (i=0;i<=st->frame_size;i++) 481 for (i=0;i<=st->frame_size;i++)
452 st->power_1[i] = FLOAT_ONE; 482 st->power_1[i] = FLOAT_ONE;
453 for (i=0;i<N*M;i++) 483 for (i=0;i<N*M*K*C;i++)
454 st->W[i] = 0; 484 st->W[i] = 0;
455 { 485 {
456 spx_word32_t sum = 0; 486 spx_word32_t sum = 0;
@@ -465,11 +495,13 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
465 } 495 }
466 for (i=M-1;i>=0;i--) 496 for (i=M-1;i>=0;i--)
467 { 497 {
468 st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum); 498 st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum);
469 } 499 }
470 } 500 }
471 501
472 st->memX=st->memD=st->memE=0; 502 st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t));
503 st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
504 st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
473 st->preemph = QCONST16(.9,15); 505 st->preemph = QCONST16(.9,15);
474 if (st->sampling_rate<12000) 506 if (st->sampling_rate<12000)
475 st->notch_radius = QCONST16(.9, 15); 507 st->notch_radius = QCONST16(.9, 15);
@@ -478,30 +510,32 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
478 else 510 else
479 st->notch_radius = QCONST16(.992, 15); 511 st->notch_radius = QCONST16(.992, 15);
480 512
481 st->notch_mem[0] = st->notch_mem[1] = 0; 513 st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t));
482 st->adapted = 0; 514 st->adapted = 0;
483 st->Pey = st->Pyy = FLOAT_ONE; 515 st->Pey = st->Pyy = FLOAT_ONE;
484 516
485#ifdef TWO_PATH 517#ifdef TWO_PATH
486 st->Davg1 = st->Davg2 = 0; 518 st->Davg1 = st->Davg2 = 0;
487 st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 519 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
488#endif 520#endif
489 521
490 st->play_buf = (spx_int16_t*)speex_alloc((PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); 522 st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
491 st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; 523 st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
492 st->play_buf_started = 0; 524 st->play_buf_started = 0;
493 525
494 return st; 526 return st;
495} 527}
496 528
497/** Resets echo canceller state */ 529/** Resets echo canceller state */
498void speex_echo_state_reset(SpeexEchoState *st) 530EXPORT void speex_echo_state_reset(SpeexEchoState *st)
499{ 531{
500 int i, M, N; 532 int i, M, N, C, K;
501 st->cancel_count=0; 533 st->cancel_count=0;
502 st->screwed_up = 0; 534 st->screwed_up = 0;
503 N = st->window_size; 535 N = st->window_size;
504 M = st->M; 536 M = st->M;
537 C=st->C;
538 K=st->K;
505 for (i=0;i<N*M;i++) 539 for (i=0;i<N*M;i++)
506 st->W[i] = 0; 540 st->W[i] = 0;
507#ifdef TWO_PATH 541#ifdef TWO_PATH
@@ -521,13 +555,20 @@ void speex_echo_state_reset(SpeexEchoState *st)
521 { 555 {
522 st->last_y[i] = 0; 556 st->last_y[i] = 0;
523 } 557 }
524 for (i=0;i<N;i++) 558 for (i=0;i<N*C;i++)
525 { 559 {
526 st->E[i] = 0; 560 st->E[i] = 0;
561 }
562 for (i=0;i<N*K;i++)
563 {
527 st->x[i] = 0; 564 st->x[i] = 0;
528 } 565 }
529 st->notch_mem[0] = st->notch_mem[1] = 0; 566 for (i=0;i<2*C;i++)
530 st->memX=st->memD=st->memE=0; 567 st->notch_mem[i] = 0;
568 for (i=0;i<C;i++)
569 st->memD[i]=st->memE[i]=0;
570 for (i=0;i<K;i++)
571 st->memX[i]=0;
531 572
532 st->saturated = 0; 573 st->saturated = 0;
533 st->adapted = 0; 574 st->adapted = 0;
@@ -545,7 +586,7 @@ void speex_echo_state_reset(SpeexEchoState *st)
545} 586}
546 587
547/** Destroys an echo canceller state */ 588/** Destroys an echo canceller state */
548void speex_echo_state_destroy(SpeexEchoState *st) 589EXPORT void speex_echo_state_destroy(SpeexEchoState *st)
549{ 590{
550 spx_fft_destroy(st->fft_table); 591 spx_fft_destroy(st->fft_table);
551 592
@@ -576,9 +617,14 @@ void speex_echo_state_destroy(SpeexEchoState *st)
576#ifdef FIXED_POINT 617#ifdef FIXED_POINT
577 speex_free(st->wtmp2); 618 speex_free(st->wtmp2);
578#endif 619#endif
620 speex_free(st->memX);
621 speex_free(st->memD);
622 speex_free(st->memE);
623 speex_free(st->notch_mem);
624
579 speex_free(st->play_buf); 625 speex_free(st->play_buf);
580 speex_free(st); 626 speex_free(st);
581 627
582#ifdef DUMP_ECHO_CANCEL_DATA 628#ifdef DUMP_ECHO_CANCEL_DATA
583 fclose(rFile); 629 fclose(rFile);
584 fclose(pFile); 630 fclose(pFile);
@@ -587,7 +633,7 @@ void speex_echo_state_destroy(SpeexEchoState *st)
587#endif 633#endif
588} 634}
589 635
590void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) 636EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
591{ 637{
592 int i; 638 int i;
593 /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/ 639 /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
@@ -610,7 +656,7 @@ void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t
610 } 656 }
611} 657}
612 658
613void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) 659EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
614{ 660{
615 /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/ 661 /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/
616 if (!st->play_buf_started) 662 if (!st->play_buf_started)
@@ -637,16 +683,16 @@ void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
637} 683}
638 684
639/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */ 685/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
640void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) 686EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
641{ 687{
642 speex_echo_cancellation(st, in, far_end, out); 688 speex_echo_cancellation(st, in, far_end, out);
643} 689}
644 690
645/** Performs echo cancellation on a frame */ 691/** Performs echo cancellation on a frame */
646void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) 692EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
647{ 693{
648 int i,j; 694 int i,j, chan, speak;
649 int N,M; 695 int N,M, C, K;
650 spx_word32_t Syy,See,Sxx,Sdd, Sff; 696 spx_word32_t Syy,See,Sxx,Sdd, Sff;
651#ifdef TWO_PATH 697#ifdef TWO_PATH
652 spx_word32_t Dbf; 698 spx_word32_t Dbf;
@@ -658,9 +704,12 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
658 spx_float_t alpha, alpha_1; 704 spx_float_t alpha, alpha_1;
659 spx_word16_t RER; 705 spx_word16_t RER;
660 spx_word32_t tmp32; 706 spx_word32_t tmp32;
661 707
662 N = st->window_size; 708 N = st->window_size;
663 M = st->M; 709 M = st->M;
710 C = st->C;
711 K = st->K;
712
664 st->cancel_count++; 713 st->cancel_count++;
665#ifdef FIXED_POINT 714#ifdef FIXED_POINT
666 ss=DIV32_16(11469,M); 715 ss=DIV32_16(11469,M);
@@ -670,157 +719,198 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
670 ss_1 = 1-ss; 719 ss_1 = 1-ss;
671#endif 720#endif
672 721
673 /* Apply a notch filter to make sure DC doesn't end up causing problems */ 722 for (chan = 0; chan < C; chan++)
674 filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem);
675 /* Copy input data to buffer and apply pre-emphasis */
676 for (i=0;i<st->frame_size;i++)
677 { 723 {
678 spx_word32_t tmp32; 724 /* Apply a notch filter to make sure DC doesn't end up causing problems */
679 tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); 725 filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C);
680#ifdef FIXED_POINT 726 /* Copy input data to buffer and apply pre-emphasis */
681 /* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */ 727 /* Copy input data to buffer */
682 if (tmp32 > 32767) 728 for (i=0;i<st->frame_size;i++)
683 { 729 {
684 tmp32 = 32767; 730 spx_word32_t tmp32;
685 st->saturated = M+1; 731 /* FIXME: This core has changed a bit, need to merge properly */
732 tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan])));
733#ifdef FIXED_POINT
734 if (tmp32 > 32767)
735 {
736 tmp32 = 32767;
737 if (st->saturated == 0)
738 st->saturated = 1;
739 }
740 if (tmp32 < -32767)
741 {
742 tmp32 = -32767;
743 if (st->saturated == 0)
744 st->saturated = 1;
745 }
746#endif
747 st->memD[chan] = st->input[chan*st->frame_size+i];
748 st->input[chan*st->frame_size+i] = EXTRACT16(tmp32);
686 } 749 }
687 if (tmp32 < -32767) 750 }
751
752 for (speak = 0; speak < K; speak++)
753 {
754 for (i=0;i<st->frame_size;i++)
688 { 755 {
689 tmp32 = -32767; 756 spx_word32_t tmp32;
690 st->saturated = M+1; 757 st->x[speak*N+i] = st->x[speak*N+i+st->frame_size];
691 } 758 tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak])));
692#endif
693 st->x[i+st->frame_size] = EXTRACT16(tmp32);
694 st->memX = far_end[i];
695
696 tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD)));
697#ifdef FIXED_POINT 759#ifdef FIXED_POINT
698 if (tmp32 > 32767) 760 /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */
699 { 761 if (tmp32 > 32767)
700 tmp32 = 32767; 762 {
701 if (st->saturated == 0) 763 tmp32 = 32767;
702 st->saturated = 1; 764 st->saturated = M+1;
703 } 765 }
704 if (tmp32 < -32767) 766 if (tmp32 < -32767)
767 {
768 tmp32 = -32767;
769 st->saturated = M+1;
770 }
771#endif
772 st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32);
773 st->memX[speak] = far_end[i*K+speak];
774 }
775 }
776
777 for (speak = 0; speak < K; speak++)
778 {
779 /* Shift memory: this could be optimized eventually*/
780 for (j=M-1;j>=0;j--)
705 { 781 {
706 tmp32 = -32767; 782 for (i=0;i<N;i++)
707 if (st->saturated == 0) 783 st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i];
708 st->saturated = 1;
709 } 784 }
710#endif 785 /* Convert x (echo input) to frequency domain */
711 st->memD = st->input[i]; 786 spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]);
712 st->input[i] = tmp32;
713 } 787 }
714 788
715 /* Shift memory: this could be optimized eventually*/ 789 Sxx = 0;
716 for (j=M-1;j>=0;j--) 790 for (speak = 0; speak < K; speak++)
717 { 791 {
718 for (i=0;i<N;i++) 792 Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
719 st->X[(j+1)*N+i] = st->X[j*N+i]; 793 power_spectrum_accum(st->X+speak*N, st->Xf, N);
720 } 794 }
721 795
722 /* Convert x (far end) to frequency domain */ 796 Sff = 0;
723 spx_fft(st->fft_table, st->x, &st->X[0]); 797 for (chan = 0; chan < C; chan++)
724 for (i=0;i<N;i++) 798 {
725 st->last_y[i] = st->x[i];
726 Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size);
727 for (i=0;i<st->frame_size;i++)
728 st->x[i] = st->x[i+st->frame_size];
729 /* From here on, the top part of x is used as scratch space */
730
731#ifdef TWO_PATH 799#ifdef TWO_PATH
732 /* Compute foreground filter */ 800 /* Compute foreground filter */
733 spectral_mul_accum16(st->X, st->foreground, st->Y, N, M); 801 spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K);
734 spx_ifft(st->fft_table, st->Y, st->e); 802 spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N);
735 for (i=0;i<st->frame_size;i++) 803 for (i=0;i<st->frame_size;i++)
736 st->e[i] = SUB16(st->input[i], st->e[i+st->frame_size]); 804 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]);
737 Sff = mdf_inner_prod(st->e, st->e, st->frame_size); 805 Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
738#endif 806#endif
739 807 }
808
740 /* Adjust proportional adaption rate */ 809 /* Adjust proportional adaption rate */
741 mdf_adjust_prop (st->W, N, M, st->prop); 810 /* FIXME: Adjust that for C, K*/
811 if (st->adapted)
812 mdf_adjust_prop (st->W, N, M, C*K, st->prop);
742 /* Compute weight gradient */ 813 /* Compute weight gradient */
743 if (st->saturated == 0) 814 if (st->saturated == 0)
744 { 815 {
745 for (j=M-1;j>=0;j--) 816 for (chan = 0; chan < C; chan++)
746 { 817 {
747 weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N); 818 for (speak = 0; speak < K; speak++)
748 for (i=0;i<N;i++) 819 {
749 st->W[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]); 820 for (j=M-1;j>=0;j--)
750 821 {
822 weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N);
823 for (i=0;i<N;i++)
824 st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i];
825 }
826 }
751 } 827 }
752 } else { 828 } else {
753 st->saturated--; 829 st->saturated--;
754 } 830 }
755 831
832 /* FIXME: MC conversion required */
756 /* Update weight to prevent circular convolution (MDF / AUMDF) */ 833 /* Update weight to prevent circular convolution (MDF / AUMDF) */
757 for (j=0;j<M;j++) 834 for (chan = 0; chan < C; chan++)
758 { 835 {
759 /* This is a variant of the Alternatively Updated MDF (AUMDF) */ 836 for (speak = 0; speak < K; speak++)
760 /* Remove the "if" to make this an MDF filter */
761 if (j==0 || st->cancel_count%(M-1) == j-1)
762 { 837 {
763#ifdef FIXED_POINT 838 for (j=0;j<M;j++)
764 for (i=0;i<N;i++)
765 st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16));
766 spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
767 for (i=0;i<st->frame_size;i++)
768 { 839 {
769 st->wtmp[i]=0; 840 /* This is a variant of the Alternatively Updated MDF (AUMDF) */
770 } 841 /* Remove the "if" to make this an MDF filter */
771 for (i=st->frame_size;i<N;i++) 842 if (j==0 || st->cancel_count%(M-1) == j-1)
772 { 843 {
773 st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); 844#ifdef FIXED_POINT
774 } 845 for (i=0;i<N;i++)
775 spx_fft(st->fft_table, st->wtmp, st->wtmp2); 846 st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16));
776 /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ 847 spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
777 for (i=0;i<N;i++) 848 for (i=0;i<st->frame_size;i++)
778 st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); 849 {
850 st->wtmp[i]=0;
851 }
852 for (i=st->frame_size;i<N;i++)
853 {
854 st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
855 }
856 spx_fft(st->fft_table, st->wtmp, st->wtmp2);
857 /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
858 for (i=0;i<N;i++)
859 st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
779#else 860#else
780 spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); 861 spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp);
781 for (i=st->frame_size;i<N;i++) 862 for (i=st->frame_size;i<N;i++)
782 { 863 {
783 st->wtmp[i]=0; 864 st->wtmp[i]=0;
784 } 865 }
785 spx_fft(st->fft_table, st->wtmp, &st->W[j*N]); 866 spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]);
786#endif 867#endif
868 }
869 }
787 } 870 }
788 } 871 }
789 872
790 /* Compute filter response Y */ 873 /* So we can use power_spectrum_accum */
791 spectral_mul_accum(st->X, st->W, st->Y, N, M); 874 for (i=0;i<=st->frame_size;i++)
792 spx_ifft(st->fft_table, st->Y, st->y); 875 st->Rf[i] = st->Yf[i] = st->Xf[i] = 0;
793 876
877 Dbf = 0;
878 See = 0;
794#ifdef TWO_PATH 879#ifdef TWO_PATH
795 /* Difference in response, this is used to estimate the variance of our residual power estimate */ 880 /* Difference in response, this is used to estimate the variance of our residual power estimate */
796 for (i=0;i<st->frame_size;i++) 881 for (chan = 0; chan < C; chan++)
797 st->e[i] = SUB16(st->e[i+st->frame_size], st->y[i+st->frame_size]); 882 {
798 Dbf = 10+mdf_inner_prod(st->e, st->e, st->frame_size); 883 spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K);
884 spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N);
885 for (i=0;i<st->frame_size;i++)
886 st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]);
887 Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
888 for (i=0;i<st->frame_size;i++)
889 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
890 See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
891 }
799#endif 892#endif
800 893
801 for (i=0;i<st->frame_size;i++)
802 st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]);
803 See = mdf_inner_prod(st->e, st->e, st->frame_size);
804#ifndef TWO_PATH 894#ifndef TWO_PATH
805 Sff = See; 895 Sff = See;
806#endif 896#endif
807 897
808#ifdef TWO_PATH 898#ifdef TWO_PATH
809 /* Logic for updating the foreground filter */ 899 /* Logic for updating the foreground filter */
810 900
811 /* For two time windows, compute the mean of the energy difference, as well as the variance */ 901 /* For two time windows, compute the mean of the energy difference, as well as the variance */
812 st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See))); 902 st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See)));
813 st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See))); 903 st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See)));
814 st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf))); 904 st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
815 st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf))); 905 st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
816 906
817 /* Equivalent float code: 907 /* Equivalent float code:
818 st->Davg1 = .6*st->Davg1 + .4*(Sff-See); 908 st->Davg1 = .6*st->Davg1 + .4*(Sff-See);
819 st->Davg2 = .85*st->Davg2 + .15*(Sff-See); 909 st->Davg2 = .85*st->Davg2 + .15*(Sff-See);
820 st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf; 910 st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf;
821 st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf; 911 st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf;
822 */ 912 */
823 913
824 update_foreground = 0; 914 update_foreground = 0;
825 /* Check if we have a statistically significant reduction in the residual echo */ 915 /* Check if we have a statistically significant reduction in the residual echo */
826 /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */ 916 /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */
@@ -830,18 +920,19 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
830 update_foreground = 1; 920 update_foreground = 1;
831 else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2)))) 921 else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2))))
832 update_foreground = 1; 922 update_foreground = 1;
833 923
834 /* Do we update? */ 924 /* Do we update? */
835 if (update_foreground) 925 if (update_foreground)
836 { 926 {
837 st->Davg1 = st->Davg2 = 0; 927 st->Davg1 = st->Davg2 = 0;
838 st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 928 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
839 /* Copy background filter to foreground filter */ 929 /* Copy background filter to foreground filter */
840 for (i=0;i<N*M;i++) 930 for (i=0;i<N*M*C*K;i++)
841 st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16)); 931 st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16));
842 /* Apply a smooth transition so as to not introduce blocking artifacts */ 932 /* Apply a smooth transition so as to not introduce blocking artifacts */
843 for (i=0;i<st->frame_size;i++) 933 for (chan = 0; chan < C; chan++)
844 st->e[i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]); 934 for (i=0;i<st->frame_size;i++)
935 st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]);
845 } else { 936 } else {
846 int reset_background=0; 937 int reset_background=0;
847 /* Otherwise, check if the background filter is significantly worse */ 938 /* Otherwise, check if the background filter is significantly worse */
@@ -854,13 +945,16 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
854 if (reset_background) 945 if (reset_background)
855 { 946 {
856 /* Copy foreground filter to background filter */ 947 /* Copy foreground filter to background filter */
857 for (i=0;i<N*M;i++) 948 for (i=0;i<N*M*C*K;i++)
858 st->W[i] = SHL32(EXTEND32(st->foreground[i]),16); 949 st->W[i] = SHL32(EXTEND32(st->foreground[i]),16);
859 /* We also need to copy the output so as to get correct adaptation */ 950 /* We also need to copy the output so as to get correct adaptation */
860 for (i=0;i<st->frame_size;i++) 951 for (chan = 0; chan < C; chan++)
861 st->y[i+st->frame_size] = st->e[i+st->frame_size]; 952 {
862 for (i=0;i<st->frame_size;i++) 953 for (i=0;i<st->frame_size;i++)
863 st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); 954 st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size];
955 for (i=0;i<st->frame_size;i++)
956 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
957 }
864 See = Sff; 958 See = Sff;
865 st->Davg1 = st->Davg2 = 0; 959 st->Davg1 = st->Davg2 = 0;
866 st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 960 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
@@ -868,50 +962,60 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
868 } 962 }
869#endif 963#endif
870 964
871 /* Compute error signal (for the output with de-emphasis) */ 965 Sey = Syy = Sdd = 0;
872 for (i=0;i<st->frame_size;i++) 966 for (chan = 0; chan < C; chan++)
873 { 967 {
874 spx_word32_t tmp_out; 968 /* Compute error signal (for the output with de-emphasis) */
969 for (i=0;i<st->frame_size;i++)
970 {
971 spx_word32_t tmp_out;
875#ifdef TWO_PATH 972#ifdef TWO_PATH
876 tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->e[i+st->frame_size])); 973 tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size]));
877#else 974#else
878 tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size])); 975 tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size]));
879#endif 976#endif
880 /* Saturation */ 977 tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan])));
881 if (tmp_out>32767)
882 tmp_out = 32767;
883 else if (tmp_out<-32768)
884 tmp_out = -32768;
885 tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE)));
886 /* This is an arbitrary test for saturation in the microphone signal */ 978 /* This is an arbitrary test for saturation in the microphone signal */
887 if (in[i] <= -32000 || in[i] >= 32000) 979 if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000)
888 { 980 {
889 tmp_out = 0;
890 if (st->saturated == 0) 981 if (st->saturated == 0)
891 st->saturated = 1; 982 st->saturated = 1;
983 }
984 out[i*C+chan] = WORD2INT(tmp_out);
985 st->memE[chan] = tmp_out;
892 } 986 }
893 out[i] = (spx_int16_t)tmp_out; 987
894 st->memE = tmp_out;
895 }
896
897#ifdef DUMP_ECHO_CANCEL_DATA 988#ifdef DUMP_ECHO_CANCEL_DATA
898 dump_audio(in, far_end, out, st->frame_size); 989 dump_audio(in, far_end, out, st->frame_size);
899#endif 990#endif
900 991
901 /* Compute error signal (filter update version) */ 992 /* Compute error signal (filter update version) */
902 for (i=0;i<st->frame_size;i++) 993 for (i=0;i<st->frame_size;i++)
903 { 994 {
904 st->e[i+st->frame_size] = st->e[i]; 995 st->e[chan*N+i+st->frame_size] = st->e[chan*N+i];
905 st->e[i] = 0; 996 st->e[chan*N+i] = 0;
997 }
998
999 /* Compute a bunch of correlations */
1000 /* FIXME: bad merge */
1001 Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
1002 Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
1003 Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size);
1004
1005 /* Convert error to frequency domain */
1006 spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N);
1007 for (i=0;i<st->frame_size;i++)
1008 st->y[i+chan*N] = 0;
1009 spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N);
1010
1011 /* Compute power spectrum of echo (X), error (E) and filter response (Y) */
1012 power_spectrum_accum(st->E+chan*N, st->Rf, N);
1013 power_spectrum_accum(st->Y+chan*N, st->Yf, N);
1014
906 } 1015 }
907 1016
908 /* Compute a bunch of correlations */
909 Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size);
910 Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size);
911 Sdd = mdf_inner_prod(st->input, st->input, st->frame_size);
912
913 /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/ 1017 /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
914 1018
915 /* Do some sanity check */ 1019 /* Do some sanity check */
916 if (!(Syy>=0 && Sxx>=0 && See >= 0) 1020 if (!(Syy>=0 && Sxx>=0 && See >= 0)
917#ifndef FIXED_POINT 1021#ifndef FIXED_POINT
@@ -921,7 +1025,7 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
921 { 1025 {
922 /* Things have gone really bad */ 1026 /* Things have gone really bad */
923 st->screwed_up += 50; 1027 st->screwed_up += 50;
924 for (i=0;i<st->frame_size;i++) 1028 for (i=0;i<st->frame_size*C;i++)
925 out[i] = 0; 1029 out[i] = 0;
926 } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6))) 1030 } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)))
927 { 1031 {
@@ -941,35 +1045,16 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
941 /* Add a small noise floor to make sure not to have problems when dividing */ 1045 /* Add a small noise floor to make sure not to have problems when dividing */
942 See = MAX32(See, SHR32(MULT16_16(N, 100),6)); 1046 See = MAX32(See, SHR32(MULT16_16(N, 100),6));
943 1047
944 /* Convert error to frequency domain */ 1048 for (speak = 0; speak < K; speak++)
945 spx_fft(st->fft_table, st->e, st->E); 1049 {
946 for (i=0;i<st->frame_size;i++) 1050 Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
947 st->y[i] = 0; 1051 power_spectrum_accum(st->X+speak*N, st->Xf, N);
948 spx_fft(st->fft_table, st->y, st->Y); 1052 }
1053
949 1054
950 /* Compute power spectrum of far end (X), error (E) and filter response (Y) */
951 power_spectrum(st->E, st->Rf, N);
952 power_spectrum(st->Y, st->Yf, N);
953 power_spectrum(st->X, st->Xf, N);
954
955 /* Smooth far end energy estimate over time */ 1055 /* Smooth far end energy estimate over time */
956 for (j=0;j<=st->frame_size;j++) 1056 for (j=0;j<=st->frame_size;j++)
957 st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); 1057 st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
958
959 /* Enable this to compute the power based only on the tail (would need to compute more
960 efficiently to make this really useful */
961 if (0)
962 {
963 float scale2 = .5f/M;
964 for (j=0;j<=st->frame_size;j++)
965 st->power[j] = 100;
966 for (i=0;i<M;i++)
967 {
968 power_spectrum(&st->X[i*N], st->Xf, N);
969 for (j=0;j<=st->frame_size;j++)
970 st->power[j] += scale2*st->Xf[j];
971 }
972 }
973 1058
974 /* Compute filtered spectra and (cross-)correlations */ 1059 /* Compute filtered spectra and (cross-)correlations */
975 for (j=st->frame_size;j>=0;j--) 1060 for (j=st->frame_size;j>=0;j--)
@@ -987,7 +1072,7 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
987 st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j]; 1072 st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j];
988#endif 1073#endif
989 } 1074 }
990 1075
991 Pyy = FLOAT_SQRT(Pyy); 1076 Pyy = FLOAT_SQRT(Pyy);
992 Pey = FLOAT_DIVU(Pey,Pyy); 1077 Pey = FLOAT_DIVU(Pey,Pyy);
993 1078
@@ -1015,7 +1100,7 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
1015 else 1100 else
1016 st->leak_estimate = SHL16(st->leak_estimate,1); 1101 st->leak_estimate = SHL16(st->leak_estimate,1);
1017 /*printf ("%f\n", st->leak_estimate);*/ 1102 /*printf ("%f\n", st->leak_estimate);*/
1018 1103
1019 /* Compute Residual to Error Ratio */ 1104 /* Compute Residual to Error Ratio */
1020#ifdef FIXED_POINT 1105#ifdef FIXED_POINT
1021 tmp32 = MULT16_32_Q15(st->leak_estimate,Syy); 1106 tmp32 = MULT16_32_Q15(st->leak_estimate,Syy);
@@ -1071,7 +1156,7 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
1071 /* Temporary adaption rate if filter is not yet adapted enough */ 1156 /* Temporary adaption rate if filter is not yet adapted enough */
1072 spx_word16_t adapt_rate=0; 1157 spx_word16_t adapt_rate=0;
1073 1158
1074 if (Sxx > SHR32(MULT16_16(N, 1000),6)) 1159 if (Sxx > SHR32(MULT16_16(N, 1000),6))
1075 { 1160 {
1076 tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx); 1161 tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
1077#ifdef FIXED_POINT 1162#ifdef FIXED_POINT
@@ -1091,13 +1176,13 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
1091 st->sum_adapt = ADD32(st->sum_adapt,adapt_rate); 1176 st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
1092 } 1177 }
1093 1178
1094 /* Save residual echo so it can be used by the nonlinear processor */ 1179 /* FIXME: MC conversion required */
1180 for (i=0;i<st->frame_size;i++)
1181 st->last_y[i] = st->last_y[st->frame_size+i];
1095 if (st->adapted) 1182 if (st->adapted)
1096 { 1183 {
1097 /* If the filter is adapted, take the filtered echo */ 1184 /* If the filter is adapted, take the filtered echo */
1098 for (i=0;i<st->frame_size;i++) 1185 for (i=0;i<st->frame_size;i++)
1099 st->last_y[i] = st->last_y[st->frame_size+i];
1100 for (i=0;i<st->frame_size;i++)
1101 st->last_y[st->frame_size+i] = in[i]-out[i]; 1186 st->last_y[st->frame_size+i] = in[i]-out[i];
1102 } else { 1187 } else {
1103 /* If filter isn't adapted yet, all we can do is take the far end signal directly */ 1188 /* If filter isn't adapted yet, all we can do is take the far end signal directly */
@@ -1113,17 +1198,17 @@ void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, in
1113 int i; 1198 int i;
1114 spx_word16_t leak2; 1199 spx_word16_t leak2;
1115 int N; 1200 int N;
1116 1201
1117 N = st->window_size; 1202 N = st->window_size;
1118 1203
1119 /* Apply hanning window (should pre-compute it)*/ 1204 /* Apply hanning window (should pre-compute it)*/
1120 for (i=0;i<N;i++) 1205 for (i=0;i<N;i++)
1121 st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]); 1206 st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]);
1122 1207
1123 /* Compute power spectrum of the echo */ 1208 /* Compute power spectrum of the echo */
1124 spx_fft(st->fft_table, st->y, st->Y); 1209 spx_fft(st->fft_table, st->y, st->Y);
1125 power_spectrum(st->Y, residual_echo, N); 1210 power_spectrum(st->Y, residual_echo, N);
1126 1211
1127#ifdef FIXED_POINT 1212#ifdef FIXED_POINT
1128 if (st->leak_estimate > 16383) 1213 if (st->leak_estimate > 16383)
1129 leak2 = 32767; 1214 leak2 = 32767;
@@ -1138,14 +1223,14 @@ void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, in
1138 /* Estimate residual echo */ 1223 /* Estimate residual echo */
1139 for (i=0;i<=st->frame_size;i++) 1224 for (i=0;i<=st->frame_size;i++)
1140 residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]); 1225 residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]);
1141 1226
1142} 1227}
1143 1228
1144int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) 1229EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
1145{ 1230{
1146 switch(request) 1231 switch(request)
1147 { 1232 {
1148 1233
1149 case SPEEX_ECHO_GET_FRAME_SIZE: 1234 case SPEEX_ECHO_GET_FRAME_SIZE:
1150 (*(int*)ptr) = st->frame_size; 1235 (*(int*)ptr) = st->frame_size;
1151 break; 1236 break;
@@ -1169,6 +1254,29 @@ int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
1169 case SPEEX_ECHO_GET_SAMPLING_RATE: 1254 case SPEEX_ECHO_GET_SAMPLING_RATE:
1170 (*(int*)ptr) = st->sampling_rate; 1255 (*(int*)ptr) = st->sampling_rate;
1171 break; 1256 break;
1257 case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE:
1258 /*FIXME: Implement this for multiple channels */
1259 *((spx_int32_t *)ptr) = st->M * st->frame_size;
1260 break;
1261 case SPEEX_ECHO_GET_IMPULSE_RESPONSE:
1262 {
1263 int M = st->M, N = st->window_size, n = st->frame_size, i, j;
1264 spx_int32_t *filt = (spx_int32_t *) ptr;
1265 for(j=0;j<M;j++)
1266 {
1267 /*FIXME: Implement this for multiple channels */
1268#ifdef FIXED_POINT
1269 for (i=0;i<N;i++)
1270 st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN));
1271 spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
1272#else
1273 spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
1274#endif
1275 for(i=0;i<n;i++)
1276 filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN);
1277 }
1278 }
1279 break;
1172 default: 1280 default:
1173 speex_warning_int("Unknown speex_echo_ctl request: ", request); 1281 speex_warning_int("Unknown speex_echo_ctl request: ", request);
1174 return -1; 1282 return -1;