diff options
Diffstat (limited to 'lib/rbcodec/codecs/libspeex/mdf.c')
-rw-r--r-- | lib/rbcodec/codecs/libspeex/mdf.c | 1177 |
1 files changed, 1177 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libspeex/mdf.c b/lib/rbcodec/codecs/libspeex/mdf.c new file mode 100644 index 0000000000..1994f2a886 --- /dev/null +++ b/lib/rbcodec/codecs/libspeex/mdf.c | |||
@@ -0,0 +1,1177 @@ | |||
1 | /* Copyright (C) 2003-2006 Jean-Marc Valin | ||
2 | |||
3 | File: mdf.c | ||
4 | Echo canceller based on the MDF algorithm (see below) | ||
5 | |||
6 | Redistribution and use in source and binary forms, with or without | ||
7 | modification, are permitted provided that the following conditions are | ||
8 | met: | ||
9 | |||
10 | 1. Redistributions of source code must retain the above copyright notice, | ||
11 | this list of conditions and the following disclaimer. | ||
12 | |||
13 | 2. Redistributions in binary form must reproduce the above copyright | ||
14 | notice, this list of conditions and the following disclaimer in the | ||
15 | documentation and/or other materials provided with the distribution. | ||
16 | |||
17 | 3. The name of the author may not be used to endorse or promote products | ||
18 | derived from this software without specific prior written permission. | ||
19 | |||
20 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | ||
21 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | ||
22 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
23 | DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, | ||
24 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
25 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
27 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
28 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN | ||
29 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
30 | POSSIBILITY OF SUCH DAMAGE. | ||
31 | */ | ||
32 | |||
33 | /* | ||
34 | The echo canceller is based on the MDF algorithm described in: | ||
35 | |||
36 | J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter, | ||
37 | IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2, | ||
38 | February 1990. | ||
39 | |||
40 | We use the Alternatively Updated MDF (AUMDF) variant. Robustness to | ||
41 | double-talk is achieved using a variable learning rate as described in: | ||
42 | |||
43 | Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo | ||
44 | Cancellation With Double-Talk. IEEE Transactions on Audio, | ||
45 | Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007. | ||
46 | http://people.xiph.org/~jm/papers/valin_taslp2006.pdf | ||
47 | |||
48 | There is no explicit double-talk detection, but a continuous variation | ||
49 | in the learning rate based on residual echo, double-talk and background | ||
50 | noise. | ||
51 | |||
52 | About the fixed-point version: | ||
53 | All the signals are represented with 16-bit words. The filter weights | ||
54 | are represented with 32-bit words, but only the top 16 bits are used | ||
55 | in most cases. The lower 16 bits are completely unreliable (due to the | ||
56 | fact that the update is done only on the top bits), but help in the | ||
57 | adaptation -- probably by removing a "threshold effect" due to | ||
58 | quantization (rounding going to zero) when the gradient is small. | ||
59 | |||
60 | Another kludge that seems to work good: when performing the weight | ||
61 | update, we only move half the way toward the "goal" this seems to | ||
62 | reduce the effect of quantization noise in the update phase. This | ||
63 | can be seen as applying a gradient descent on a "soft constraint" | ||
64 | instead of having a hard constraint. | ||
65 | |||
66 | */ | ||
67 | |||
68 | #ifdef HAVE_CONFIG_H | ||
69 | #include "config-speex.h" | ||
70 | #endif | ||
71 | |||
72 | #include "arch.h" | ||
73 | #include "speex/speex_echo.h" | ||
74 | #include "fftwrap.h" | ||
75 | #include "pseudofloat.h" | ||
76 | #include "math_approx.h" | ||
77 | #include "os_support.h" | ||
78 | |||
79 | #ifndef M_PI | ||
80 | #define M_PI 3.14159265358979323846 | ||
81 | #endif | ||
82 | |||
83 | #ifdef FIXED_POINT | ||
84 | #define WEIGHT_SHIFT 11 | ||
85 | #define NORMALIZE_SCALEDOWN 5 | ||
86 | #define NORMALIZE_SCALEUP 3 | ||
87 | #else | ||
88 | #define WEIGHT_SHIFT 0 | ||
89 | #endif | ||
90 | |||
91 | /* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk | ||
92 | and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */ | ||
93 | #define TWO_PATH | ||
94 | |||
95 | #ifdef FIXED_POINT | ||
96 | static const spx_float_t MIN_LEAK = {20972, -22}; | ||
97 | |||
98 | /* Constants for the two-path filter */ | ||
99 | static const spx_float_t VAR1_SMOOTH = {23593, -16}; | ||
100 | static const spx_float_t VAR2_SMOOTH = {23675, -15}; | ||
101 | static const spx_float_t VAR1_UPDATE = {16384, -15}; | ||
102 | static const spx_float_t VAR2_UPDATE = {16384, -16}; | ||
103 | static const spx_float_t VAR_BACKTRACK = {16384, -12}; | ||
104 | #define TOP16(x) ((x)>>16) | ||
105 | |||
106 | #else | ||
107 | |||
108 | static const spx_float_t MIN_LEAK = .005f; | ||
109 | |||
110 | /* Constants for the two-path filter */ | ||
111 | static const spx_float_t VAR1_SMOOTH = .36f; | ||
112 | static const spx_float_t VAR2_SMOOTH = .7225f; | ||
113 | static const spx_float_t VAR1_UPDATE = .5f; | ||
114 | static const spx_float_t VAR2_UPDATE = .25f; | ||
115 | static const spx_float_t VAR_BACKTRACK = 4.f; | ||
116 | #define TOP16(x) (x) | ||
117 | #endif | ||
118 | |||
119 | |||
120 | #define PLAYBACK_DELAY 2 | ||
121 | |||
122 | void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len); | ||
123 | |||
124 | |||
125 | /** Speex echo cancellation state. */ | ||
126 | struct SpeexEchoState_ { | ||
127 | int frame_size; /**< Number of samples processed each time */ | ||
128 | int window_size; | ||
129 | int M; | ||
130 | int cancel_count; | ||
131 | int adapted; | ||
132 | int saturated; | ||
133 | int screwed_up; | ||
134 | spx_int32_t sampling_rate; | ||
135 | spx_word16_t spec_average; | ||
136 | spx_word16_t beta0; | ||
137 | spx_word16_t beta_max; | ||
138 | spx_word32_t sum_adapt; | ||
139 | spx_word16_t leak_estimate; | ||
140 | |||
141 | spx_word16_t *e; /* scratch */ | ||
142 | spx_word16_t *x; /* Far-end input buffer (2N) */ | ||
143 | spx_word16_t *X; /* Far-end buffer (M+1 frames) in frequency domain */ | ||
144 | spx_word16_t *input; /* scratch */ | ||
145 | spx_word16_t *y; /* scratch */ | ||
146 | spx_word16_t *last_y; | ||
147 | spx_word16_t *Y; /* scratch */ | ||
148 | spx_word16_t *E; | ||
149 | spx_word32_t *PHI; /* scratch */ | ||
150 | spx_word32_t *W; /* (Background) filter weights */ | ||
151 | #ifdef TWO_PATH | ||
152 | spx_word16_t *foreground; /* Foreground filter weights */ | ||
153 | spx_word32_t Davg1; /* 1st recursive average of the residual power difference */ | ||
154 | spx_word32_t Davg2; /* 2nd recursive average of the residual power difference */ | ||
155 | spx_float_t Dvar1; /* Estimated variance of 1st estimator */ | ||
156 | spx_float_t Dvar2; /* Estimated variance of 2nd estimator */ | ||
157 | #endif | ||
158 | spx_word32_t *power; /* Power of the far-end signal */ | ||
159 | spx_float_t *power_1;/* Inverse power of far-end */ | ||
160 | spx_word16_t *wtmp; /* scratch */ | ||
161 | #ifdef FIXED_POINT | ||
162 | spx_word16_t *wtmp2; /* scratch */ | ||
163 | #endif | ||
164 | spx_word32_t *Rf; /* scratch */ | ||
165 | spx_word32_t *Yf; /* scratch */ | ||
166 | spx_word32_t *Xf; /* scratch */ | ||
167 | spx_word32_t *Eh; | ||
168 | spx_word32_t *Yh; | ||
169 | spx_float_t Pey; | ||
170 | spx_float_t Pyy; | ||
171 | spx_word16_t *window; | ||
172 | spx_word16_t *prop; | ||
173 | void *fft_table; | ||
174 | spx_word16_t memX, memD, memE; | ||
175 | spx_word16_t preemph; | ||
176 | spx_word16_t notch_radius; | ||
177 | spx_mem_t notch_mem[2]; | ||
178 | |||
179 | /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */ | ||
180 | spx_int16_t *play_buf; | ||
181 | int play_buf_pos; | ||
182 | int play_buf_started; | ||
183 | }; | ||
184 | |||
185 | static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem) | ||
186 | { | ||
187 | int i; | ||
188 | spx_word16_t den2; | ||
189 | #ifdef FIXED_POINT | ||
190 | den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius)); | ||
191 | #else | ||
192 | den2 = radius*radius + .7*(1-radius)*(1-radius); | ||
193 | #endif | ||
194 | /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/ | ||
195 | for (i=0;i<len;i++) | ||
196 | { | ||
197 | spx_word16_t vin = in[i]; | ||
198 | spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15); | ||
199 | #ifdef FIXED_POINT | ||
200 | mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1); | ||
201 | #else | ||
202 | mem[0] = mem[1] + 2*(-vin + radius*vout); | ||
203 | #endif | ||
204 | mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout); | ||
205 | out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767); | ||
206 | } | ||
207 | } | ||
208 | |||
209 | /* This inner product is slightly different from the codec version because of fixed-point */ | ||
210 | static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) | ||
211 | { | ||
212 | spx_word32_t sum=0; | ||
213 | len >>= 1; | ||
214 | while(len--) | ||
215 | { | ||
216 | spx_word32_t part=0; | ||
217 | part = MAC16_16(part,*x++,*y++); | ||
218 | part = MAC16_16(part,*x++,*y++); | ||
219 | /* HINT: If you had a 40-bit accumulator, you could shift only at the end */ | ||
220 | sum = ADD32(sum,SHR32(part,6)); | ||
221 | } | ||
222 | return sum; | ||
223 | } | ||
224 | |||
225 | /** Compute power spectrum of a half-complex (packed) vector */ | ||
226 | static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N) | ||
227 | { | ||
228 | int i, j; | ||
229 | ps[0]=MULT16_16(X[0],X[0]); | ||
230 | for (i=1,j=1;i<N-1;i+=2,j++) | ||
231 | { | ||
232 | ps[j] = MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]); | ||
233 | } | ||
234 | ps[j]=MULT16_16(X[i],X[i]); | ||
235 | } | ||
236 | |||
237 | /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */ | ||
238 | #ifdef FIXED_POINT | ||
239 | static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) | ||
240 | { | ||
241 | int i,j; | ||
242 | spx_word32_t tmp1=0,tmp2=0; | ||
243 | for (j=0;j<M;j++) | ||
244 | { | ||
245 | tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N])); | ||
246 | } | ||
247 | acc[0] = PSHR32(tmp1,WEIGHT_SHIFT); | ||
248 | for (i=1;i<N-1;i+=2) | ||
249 | { | ||
250 | tmp1 = tmp2 = 0; | ||
251 | for (j=0;j<M;j++) | ||
252 | { | ||
253 | tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1]))); | ||
254 | tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1])); | ||
255 | } | ||
256 | acc[i] = PSHR32(tmp1,WEIGHT_SHIFT); | ||
257 | acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT); | ||
258 | } | ||
259 | tmp1 = tmp2 = 0; | ||
260 | for (j=0;j<M;j++) | ||
261 | { | ||
262 | tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1])); | ||
263 | } | ||
264 | acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT); | ||
265 | } | ||
266 | static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M) | ||
267 | { | ||
268 | int i,j; | ||
269 | spx_word32_t tmp1=0,tmp2=0; | ||
270 | for (j=0;j<M;j++) | ||
271 | { | ||
272 | tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]); | ||
273 | } | ||
274 | acc[0] = PSHR32(tmp1,WEIGHT_SHIFT); | ||
275 | for (i=1;i<N-1;i+=2) | ||
276 | { | ||
277 | tmp1 = tmp2 = 0; | ||
278 | for (j=0;j<M;j++) | ||
279 | { | ||
280 | tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1])); | ||
281 | tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]); | ||
282 | } | ||
283 | acc[i] = PSHR32(tmp1,WEIGHT_SHIFT); | ||
284 | acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT); | ||
285 | } | ||
286 | tmp1 = tmp2 = 0; | ||
287 | for (j=0;j<M;j++) | ||
288 | { | ||
289 | tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]); | ||
290 | } | ||
291 | acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT); | ||
292 | } | ||
293 | |||
294 | #else | ||
295 | static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) | ||
296 | { | ||
297 | int i,j; | ||
298 | for (i=0;i<N;i++) | ||
299 | acc[i] = 0; | ||
300 | for (j=0;j<M;j++) | ||
301 | { | ||
302 | acc[0] += X[0]*Y[0]; | ||
303 | for (i=1;i<N-1;i+=2) | ||
304 | { | ||
305 | acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]); | ||
306 | acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]); | ||
307 | } | ||
308 | acc[i] += X[i]*Y[i]; | ||
309 | X += N; | ||
310 | Y += N; | ||
311 | } | ||
312 | } | ||
313 | #define spectral_mul_accum16 spectral_mul_accum | ||
314 | #endif | ||
315 | |||
316 | /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */ | ||
317 | static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N) | ||
318 | { | ||
319 | int i, j; | ||
320 | spx_float_t W; | ||
321 | W = FLOAT_AMULT(p, w[0]); | ||
322 | prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0])); | ||
323 | for (i=1,j=1;i<N-1;i+=2,j++) | ||
324 | { | ||
325 | W = FLOAT_AMULT(p, w[j]); | ||
326 | prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1])); | ||
327 | prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1])); | ||
328 | } | ||
329 | W = FLOAT_AMULT(p, w[j]); | ||
330 | prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i])); | ||
331 | } | ||
332 | |||
333 | static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, spx_word16_t *prop) | ||
334 | { | ||
335 | int i, j; | ||
336 | spx_word16_t max_sum = 1; | ||
337 | spx_word32_t prop_sum = 1; | ||
338 | for (i=0;i<M;i++) | ||
339 | { | ||
340 | spx_word32_t tmp = 1; | ||
341 | for (j=0;j<N;j++) | ||
342 | tmp += MULT16_16(EXTRACT16(SHR32(W[i*N+j],18)), EXTRACT16(SHR32(W[i*N+j],18))); | ||
343 | #ifdef FIXED_POINT | ||
344 | /* Just a security in case an overflow were to occur */ | ||
345 | tmp = MIN32(ABS32(tmp), 536870912); | ||
346 | #endif | ||
347 | prop[i] = spx_sqrt(tmp); | ||
348 | if (prop[i] > max_sum) | ||
349 | max_sum = prop[i]; | ||
350 | } | ||
351 | for (i=0;i<M;i++) | ||
352 | { | ||
353 | prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum); | ||
354 | prop_sum += EXTEND32(prop[i]); | ||
355 | } | ||
356 | for (i=0;i<M;i++) | ||
357 | { | ||
358 | prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum); | ||
359 | /*printf ("%f ", prop[i]);*/ | ||
360 | } | ||
361 | /*printf ("\n");*/ | ||
362 | } | ||
363 | |||
364 | #ifdef DUMP_ECHO_CANCEL_DATA | ||
365 | #include <stdio.h> | ||
366 | static FILE *rFile=NULL, *pFile=NULL, *oFile=NULL; | ||
367 | |||
368 | static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const spx_int16_t *out, int len) | ||
369 | { | ||
370 | if (!(rFile && pFile && oFile)) | ||
371 | { | ||
372 | speex_fatal("Dump files not open"); | ||
373 | } | ||
374 | fwrite(rec, sizeof(spx_int16_t), len, rFile); | ||
375 | fwrite(play, sizeof(spx_int16_t), len, pFile); | ||
376 | fwrite(out, sizeof(spx_int16_t), len, oFile); | ||
377 | } | ||
378 | #endif | ||
379 | |||
380 | /** Creates a new echo canceller state */ | ||
381 | SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) | ||
382 | { | ||
383 | int i,N,M; | ||
384 | SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState)); | ||
385 | |||
386 | #ifdef DUMP_ECHO_CANCEL_DATA | ||
387 | if (rFile || pFile || oFile) | ||
388 | speex_fatal("Opening dump files twice"); | ||
389 | rFile = fopen("aec_rec.sw", "wb"); | ||
390 | pFile = fopen("aec_play.sw", "wb"); | ||
391 | oFile = fopen("aec_out.sw", "wb"); | ||
392 | #endif | ||
393 | |||
394 | st->frame_size = frame_size; | ||
395 | st->window_size = 2*frame_size; | ||
396 | N = st->window_size; | ||
397 | M = st->M = (filter_length+st->frame_size-1)/frame_size; | ||
398 | st->cancel_count=0; | ||
399 | st->sum_adapt = 0; | ||
400 | st->saturated = 0; | ||
401 | st->screwed_up = 0; | ||
402 | /* This is the default sampling rate */ | ||
403 | st->sampling_rate = 8000; | ||
404 | st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); | ||
405 | #ifdef FIXED_POINT | ||
406 | st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); | ||
407 | st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); | ||
408 | #else | ||
409 | st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; | ||
410 | st->beta_max = (.5f*st->frame_size)/st->sampling_rate; | ||
411 | #endif | ||
412 | st->leak_estimate = 0; | ||
413 | |||
414 | st->fft_table = spx_fft_init(N); | ||
415 | |||
416 | st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); | ||
417 | st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); | ||
418 | st->input = (spx_word16_t*)speex_alloc(st->frame_size*sizeof(spx_word16_t)); | ||
419 | st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); | ||
420 | st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); | ||
421 | st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); | ||
422 | st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); | ||
423 | st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); | ||
424 | st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); | ||
425 | st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); | ||
426 | |||
427 | st->X = (spx_word16_t*)speex_alloc((M+1)*N*sizeof(spx_word16_t)); | ||
428 | st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); | ||
429 | st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); | ||
430 | st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t)); | ||
431 | #ifdef TWO_PATH | ||
432 | st->foreground = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t)); | ||
433 | #endif | ||
434 | st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); | ||
435 | st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); | ||
436 | st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t)); | ||
437 | st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); | ||
438 | st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t)); | ||
439 | st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); | ||
440 | #ifdef FIXED_POINT | ||
441 | st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); | ||
442 | for (i=0;i<N>>1;i++) | ||
443 | { | ||
444 | st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1)); | ||
445 | st->window[N-i-1] = st->window[i]; | ||
446 | } | ||
447 | #else | ||
448 | for (i=0;i<N;i++) | ||
449 | st->window[i] = .5-.5*cos(2*M_PI*i/N); | ||
450 | #endif | ||
451 | for (i=0;i<=st->frame_size;i++) | ||
452 | st->power_1[i] = FLOAT_ONE; | ||
453 | for (i=0;i<N*M;i++) | ||
454 | st->W[i] = 0; | ||
455 | { | ||
456 | spx_word32_t sum = 0; | ||
457 | /* Ratio of ~10 between adaptation rate of first and last block */ | ||
458 | spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1); | ||
459 | st->prop[0] = QCONST16(.7, 15); | ||
460 | sum = EXTEND32(st->prop[0]); | ||
461 | for (i=1;i<M;i++) | ||
462 | { | ||
463 | st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay); | ||
464 | sum = ADD32(sum, EXTEND32(st->prop[i])); | ||
465 | } | ||
466 | for (i=M-1;i>=0;i--) | ||
467 | { | ||
468 | st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum); | ||
469 | } | ||
470 | } | ||
471 | |||
472 | st->memX=st->memD=st->memE=0; | ||
473 | st->preemph = QCONST16(.9,15); | ||
474 | if (st->sampling_rate<12000) | ||
475 | st->notch_radius = QCONST16(.9, 15); | ||
476 | else if (st->sampling_rate<24000) | ||
477 | st->notch_radius = QCONST16(.982, 15); | ||
478 | else | ||
479 | st->notch_radius = QCONST16(.992, 15); | ||
480 | |||
481 | st->notch_mem[0] = st->notch_mem[1] = 0; | ||
482 | st->adapted = 0; | ||
483 | st->Pey = st->Pyy = FLOAT_ONE; | ||
484 | |||
485 | #ifdef TWO_PATH | ||
486 | st->Davg1 = st->Davg2 = 0; | ||
487 | st->Dvar1 = st->Dvar2 = FLOAT_ZERO; | ||
488 | #endif | ||
489 | |||
490 | st->play_buf = (spx_int16_t*)speex_alloc((PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); | ||
491 | st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; | ||
492 | st->play_buf_started = 0; | ||
493 | |||
494 | return st; | ||
495 | } | ||
496 | |||
497 | /** Resets echo canceller state */ | ||
498 | void speex_echo_state_reset(SpeexEchoState *st) | ||
499 | { | ||
500 | int i, M, N; | ||
501 | st->cancel_count=0; | ||
502 | st->screwed_up = 0; | ||
503 | N = st->window_size; | ||
504 | M = st->M; | ||
505 | for (i=0;i<N*M;i++) | ||
506 | st->W[i] = 0; | ||
507 | #ifdef TWO_PATH | ||
508 | for (i=0;i<N*M;i++) | ||
509 | st->foreground[i] = 0; | ||
510 | #endif | ||
511 | for (i=0;i<N*(M+1);i++) | ||
512 | st->X[i] = 0; | ||
513 | for (i=0;i<=st->frame_size;i++) | ||
514 | { | ||
515 | st->power[i] = 0; | ||
516 | st->power_1[i] = FLOAT_ONE; | ||
517 | st->Eh[i] = 0; | ||
518 | st->Yh[i] = 0; | ||
519 | } | ||
520 | for (i=0;i<st->frame_size;i++) | ||
521 | { | ||
522 | st->last_y[i] = 0; | ||
523 | } | ||
524 | for (i=0;i<N;i++) | ||
525 | { | ||
526 | st->E[i] = 0; | ||
527 | st->x[i] = 0; | ||
528 | } | ||
529 | st->notch_mem[0] = st->notch_mem[1] = 0; | ||
530 | st->memX=st->memD=st->memE=0; | ||
531 | |||
532 | st->saturated = 0; | ||
533 | st->adapted = 0; | ||
534 | st->sum_adapt = 0; | ||
535 | st->Pey = st->Pyy = FLOAT_ONE; | ||
536 | #ifdef TWO_PATH | ||
537 | st->Davg1 = st->Davg2 = 0; | ||
538 | st->Dvar1 = st->Dvar2 = FLOAT_ZERO; | ||
539 | #endif | ||
540 | for (i=0;i<3*st->frame_size;i++) | ||
541 | st->play_buf[i] = 0; | ||
542 | st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; | ||
543 | st->play_buf_started = 0; | ||
544 | |||
545 | } | ||
546 | |||
547 | /** Destroys an echo canceller state */ | ||
548 | void speex_echo_state_destroy(SpeexEchoState *st) | ||
549 | { | ||
550 | spx_fft_destroy(st->fft_table); | ||
551 | |||
552 | speex_free(st->e); | ||
553 | speex_free(st->x); | ||
554 | speex_free(st->input); | ||
555 | speex_free(st->y); | ||
556 | speex_free(st->last_y); | ||
557 | speex_free(st->Yf); | ||
558 | speex_free(st->Rf); | ||
559 | speex_free(st->Xf); | ||
560 | speex_free(st->Yh); | ||
561 | speex_free(st->Eh); | ||
562 | |||
563 | speex_free(st->X); | ||
564 | speex_free(st->Y); | ||
565 | speex_free(st->E); | ||
566 | speex_free(st->W); | ||
567 | #ifdef TWO_PATH | ||
568 | speex_free(st->foreground); | ||
569 | #endif | ||
570 | speex_free(st->PHI); | ||
571 | speex_free(st->power); | ||
572 | speex_free(st->power_1); | ||
573 | speex_free(st->window); | ||
574 | speex_free(st->prop); | ||
575 | speex_free(st->wtmp); | ||
576 | #ifdef FIXED_POINT | ||
577 | speex_free(st->wtmp2); | ||
578 | #endif | ||
579 | speex_free(st->play_buf); | ||
580 | speex_free(st); | ||
581 | |||
582 | #ifdef DUMP_ECHO_CANCEL_DATA | ||
583 | fclose(rFile); | ||
584 | fclose(pFile); | ||
585 | fclose(oFile); | ||
586 | rFile = pFile = oFile = NULL; | ||
587 | #endif | ||
588 | } | ||
589 | |||
590 | void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) | ||
591 | { | ||
592 | int i; | ||
593 | /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/ | ||
594 | st->play_buf_started = 1; | ||
595 | if (st->play_buf_pos>=st->frame_size) | ||
596 | { | ||
597 | speex_echo_cancellation(st, rec, st->play_buf, out); | ||
598 | st->play_buf_pos -= st->frame_size; | ||
599 | for (i=0;i<st->play_buf_pos;i++) | ||
600 | st->play_buf[i] = st->play_buf[i+st->frame_size]; | ||
601 | } else { | ||
602 | speex_warning("No playback frame available (your application is buggy and/or got xruns)"); | ||
603 | if (st->play_buf_pos!=0) | ||
604 | { | ||
605 | speex_warning("internal playback buffer corruption?"); | ||
606 | st->play_buf_pos = 0; | ||
607 | } | ||
608 | for (i=0;i<st->frame_size;i++) | ||
609 | out[i] = rec[i]; | ||
610 | } | ||
611 | } | ||
612 | |||
613 | void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) | ||
614 | { | ||
615 | /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/ | ||
616 | if (!st->play_buf_started) | ||
617 | { | ||
618 | speex_warning("discarded first playback frame"); | ||
619 | return; | ||
620 | } | ||
621 | if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size) | ||
622 | { | ||
623 | int i; | ||
624 | for (i=0;i<st->frame_size;i++) | ||
625 | st->play_buf[st->play_buf_pos+i] = play[i]; | ||
626 | st->play_buf_pos += st->frame_size; | ||
627 | if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size) | ||
628 | { | ||
629 | speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)"); | ||
630 | for (i=0;i<st->frame_size;i++) | ||
631 | st->play_buf[st->play_buf_pos+i] = play[i]; | ||
632 | st->play_buf_pos += st->frame_size; | ||
633 | } | ||
634 | } else { | ||
635 | speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)"); | ||
636 | } | ||
637 | } | ||
638 | |||
639 | /** Performs echo cancellation on a frame (deprecated, last arg now ignored) */ | ||
640 | void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) | ||
641 | { | ||
642 | speex_echo_cancellation(st, in, far_end, out); | ||
643 | } | ||
644 | |||
645 | /** Performs echo cancellation on a frame */ | ||
646 | void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) | ||
647 | { | ||
648 | int i,j; | ||
649 | int N,M; | ||
650 | spx_word32_t Syy,See,Sxx,Sdd, Sff; | ||
651 | #ifdef TWO_PATH | ||
652 | spx_word32_t Dbf; | ||
653 | int update_foreground; | ||
654 | #endif | ||
655 | spx_word32_t Sey; | ||
656 | spx_word16_t ss, ss_1; | ||
657 | spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE; | ||
658 | spx_float_t alpha, alpha_1; | ||
659 | spx_word16_t RER; | ||
660 | spx_word32_t tmp32; | ||
661 | |||
662 | N = st->window_size; | ||
663 | M = st->M; | ||
664 | st->cancel_count++; | ||
665 | #ifdef FIXED_POINT | ||
666 | ss=DIV32_16(11469,M); | ||
667 | ss_1 = SUB16(32767,ss); | ||
668 | #else | ||
669 | ss=.35/M; | ||
670 | ss_1 = 1-ss; | ||
671 | #endif | ||
672 | |||
673 | /* Apply a notch filter to make sure DC doesn't end up causing problems */ | ||
674 | filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem); | ||
675 | /* Copy input data to buffer and apply pre-emphasis */ | ||
676 | for (i=0;i<st->frame_size;i++) | ||
677 | { | ||
678 | spx_word32_t tmp32; | ||
679 | tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); | ||
680 | #ifdef FIXED_POINT | ||
681 | /* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */ | ||
682 | if (tmp32 > 32767) | ||
683 | { | ||
684 | tmp32 = 32767; | ||
685 | st->saturated = M+1; | ||
686 | } | ||
687 | if (tmp32 < -32767) | ||
688 | { | ||
689 | tmp32 = -32767; | ||
690 | st->saturated = M+1; | ||
691 | } | ||
692 | #endif | ||
693 | st->x[i+st->frame_size] = EXTRACT16(tmp32); | ||
694 | st->memX = far_end[i]; | ||
695 | |||
696 | tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD))); | ||
697 | #ifdef FIXED_POINT | ||
698 | if (tmp32 > 32767) | ||
699 | { | ||
700 | tmp32 = 32767; | ||
701 | if (st->saturated == 0) | ||
702 | st->saturated = 1; | ||
703 | } | ||
704 | if (tmp32 < -32767) | ||
705 | { | ||
706 | tmp32 = -32767; | ||
707 | if (st->saturated == 0) | ||
708 | st->saturated = 1; | ||
709 | } | ||
710 | #endif | ||
711 | st->memD = st->input[i]; | ||
712 | st->input[i] = tmp32; | ||
713 | } | ||
714 | |||
715 | /* Shift memory: this could be optimized eventually*/ | ||
716 | for (j=M-1;j>=0;j--) | ||
717 | { | ||
718 | for (i=0;i<N;i++) | ||
719 | st->X[(j+1)*N+i] = st->X[j*N+i]; | ||
720 | } | ||
721 | |||
722 | /* Convert x (far end) to frequency domain */ | ||
723 | spx_fft(st->fft_table, st->x, &st->X[0]); | ||
724 | for (i=0;i<N;i++) | ||
725 | st->last_y[i] = st->x[i]; | ||
726 | Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); | ||
727 | for (i=0;i<st->frame_size;i++) | ||
728 | st->x[i] = st->x[i+st->frame_size]; | ||
729 | /* From here on, the top part of x is used as scratch space */ | ||
730 | |||
731 | #ifdef TWO_PATH | ||
732 | /* Compute foreground filter */ | ||
733 | spectral_mul_accum16(st->X, st->foreground, st->Y, N, M); | ||
734 | spx_ifft(st->fft_table, st->Y, st->e); | ||
735 | for (i=0;i<st->frame_size;i++) | ||
736 | st->e[i] = SUB16(st->input[i], st->e[i+st->frame_size]); | ||
737 | Sff = mdf_inner_prod(st->e, st->e, st->frame_size); | ||
738 | #endif | ||
739 | |||
740 | /* Adjust proportional adaption rate */ | ||
741 | mdf_adjust_prop (st->W, N, M, st->prop); | ||
742 | /* Compute weight gradient */ | ||
743 | if (st->saturated == 0) | ||
744 | { | ||
745 | for (j=M-1;j>=0;j--) | ||
746 | { | ||
747 | weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N); | ||
748 | for (i=0;i<N;i++) | ||
749 | st->W[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]); | ||
750 | |||
751 | } | ||
752 | } else { | ||
753 | st->saturated--; | ||
754 | } | ||
755 | |||
756 | /* Update weight to prevent circular convolution (MDF / AUMDF) */ | ||
757 | for (j=0;j<M;j++) | ||
758 | { | ||
759 | /* This is a variant of the Alternatively Updated MDF (AUMDF) */ | ||
760 | /* Remove the "if" to make this an MDF filter */ | ||
761 | if (j==0 || st->cancel_count%(M-1) == j-1) | ||
762 | { | ||
763 | #ifdef FIXED_POINT | ||
764 | for (i=0;i<N;i++) | ||
765 | st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16)); | ||
766 | spx_ifft(st->fft_table, st->wtmp2, st->wtmp); | ||
767 | for (i=0;i<st->frame_size;i++) | ||
768 | { | ||
769 | st->wtmp[i]=0; | ||
770 | } | ||
771 | for (i=st->frame_size;i<N;i++) | ||
772 | { | ||
773 | st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); | ||
774 | } | ||
775 | spx_fft(st->fft_table, st->wtmp, st->wtmp2); | ||
776 | /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ | ||
777 | for (i=0;i<N;i++) | ||
778 | st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); | ||
779 | #else | ||
780 | spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); | ||
781 | for (i=st->frame_size;i<N;i++) | ||
782 | { | ||
783 | st->wtmp[i]=0; | ||
784 | } | ||
785 | spx_fft(st->fft_table, st->wtmp, &st->W[j*N]); | ||
786 | #endif | ||
787 | } | ||
788 | } | ||
789 | |||
790 | /* Compute filter response Y */ | ||
791 | spectral_mul_accum(st->X, st->W, st->Y, N, M); | ||
792 | spx_ifft(st->fft_table, st->Y, st->y); | ||
793 | |||
794 | #ifdef TWO_PATH | ||
795 | /* Difference in response, this is used to estimate the variance of our residual power estimate */ | ||
796 | for (i=0;i<st->frame_size;i++) | ||
797 | st->e[i] = SUB16(st->e[i+st->frame_size], st->y[i+st->frame_size]); | ||
798 | Dbf = 10+mdf_inner_prod(st->e, st->e, st->frame_size); | ||
799 | #endif | ||
800 | |||
801 | for (i=0;i<st->frame_size;i++) | ||
802 | st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); | ||
803 | See = mdf_inner_prod(st->e, st->e, st->frame_size); | ||
804 | #ifndef TWO_PATH | ||
805 | Sff = See; | ||
806 | #endif | ||
807 | |||
808 | #ifdef TWO_PATH | ||
809 | /* Logic for updating the foreground filter */ | ||
810 | |||
811 | /* For two time windows, compute the mean of the energy difference, as well as the variance */ | ||
812 | st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See))); | ||
813 | st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See))); | ||
814 | st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf))); | ||
815 | st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf))); | ||
816 | |||
817 | /* Equivalent float code: | ||
818 | st->Davg1 = .6*st->Davg1 + .4*(Sff-See); | ||
819 | st->Davg2 = .85*st->Davg2 + .15*(Sff-See); | ||
820 | st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf; | ||
821 | st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf; | ||
822 | */ | ||
823 | |||
824 | update_foreground = 0; | ||
825 | /* Check if we have a statistically significant reduction in the residual echo */ | ||
826 | /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */ | ||
827 | if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf))) | ||
828 | update_foreground = 1; | ||
829 | else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1)))) | ||
830 | update_foreground = 1; | ||
831 | else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2)))) | ||
832 | update_foreground = 1; | ||
833 | |||
834 | /* Do we update? */ | ||
835 | if (update_foreground) | ||
836 | { | ||
837 | st->Davg1 = st->Davg2 = 0; | ||
838 | st->Dvar1 = st->Dvar2 = FLOAT_ZERO; | ||
839 | /* Copy background filter to foreground filter */ | ||
840 | for (i=0;i<N*M;i++) | ||
841 | st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16)); | ||
842 | /* Apply a smooth transition so as to not introduce blocking artifacts */ | ||
843 | for (i=0;i<st->frame_size;i++) | ||
844 | st->e[i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]); | ||
845 | } else { | ||
846 | int reset_background=0; | ||
847 | /* Otherwise, check if the background filter is significantly worse */ | ||
848 | if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf)))) | ||
849 | reset_background = 1; | ||
850 | if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1))) | ||
851 | reset_background = 1; | ||
852 | if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2))) | ||
853 | reset_background = 1; | ||
854 | if (reset_background) | ||
855 | { | ||
856 | /* Copy foreground filter to background filter */ | ||
857 | for (i=0;i<N*M;i++) | ||
858 | st->W[i] = SHL32(EXTEND32(st->foreground[i]),16); | ||
859 | /* We also need to copy the output so as to get correct adaptation */ | ||
860 | for (i=0;i<st->frame_size;i++) | ||
861 | st->y[i+st->frame_size] = st->e[i+st->frame_size]; | ||
862 | for (i=0;i<st->frame_size;i++) | ||
863 | st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); | ||
864 | See = Sff; | ||
865 | st->Davg1 = st->Davg2 = 0; | ||
866 | st->Dvar1 = st->Dvar2 = FLOAT_ZERO; | ||
867 | } | ||
868 | } | ||
869 | #endif | ||
870 | |||
871 | /* Compute error signal (for the output with de-emphasis) */ | ||
872 | for (i=0;i<st->frame_size;i++) | ||
873 | { | ||
874 | spx_word32_t tmp_out; | ||
875 | #ifdef TWO_PATH | ||
876 | tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->e[i+st->frame_size])); | ||
877 | #else | ||
878 | tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size])); | ||
879 | #endif | ||
880 | /* Saturation */ | ||
881 | if (tmp_out>32767) | ||
882 | tmp_out = 32767; | ||
883 | else if (tmp_out<-32768) | ||
884 | tmp_out = -32768; | ||
885 | tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE))); | ||
886 | /* This is an arbitrary test for saturation in the microphone signal */ | ||
887 | if (in[i] <= -32000 || in[i] >= 32000) | ||
888 | { | ||
889 | tmp_out = 0; | ||
890 | if (st->saturated == 0) | ||
891 | st->saturated = 1; | ||
892 | } | ||
893 | out[i] = (spx_int16_t)tmp_out; | ||
894 | st->memE = tmp_out; | ||
895 | } | ||
896 | |||
897 | #ifdef DUMP_ECHO_CANCEL_DATA | ||
898 | dump_audio(in, far_end, out, st->frame_size); | ||
899 | #endif | ||
900 | |||
901 | /* Compute error signal (filter update version) */ | ||
902 | for (i=0;i<st->frame_size;i++) | ||
903 | { | ||
904 | st->e[i+st->frame_size] = st->e[i]; | ||
905 | st->e[i] = 0; | ||
906 | } | ||
907 | |||
908 | /* Compute a bunch of correlations */ | ||
909 | Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size); | ||
910 | Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size); | ||
911 | Sdd = mdf_inner_prod(st->input, st->input, st->frame_size); | ||
912 | |||
913 | /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/ | ||
914 | |||
915 | /* Do some sanity check */ | ||
916 | if (!(Syy>=0 && Sxx>=0 && See >= 0) | ||
917 | #ifndef FIXED_POINT | ||
918 | || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9) | ||
919 | #endif | ||
920 | ) | ||
921 | { | ||
922 | /* Things have gone really bad */ | ||
923 | st->screwed_up += 50; | ||
924 | for (i=0;i<st->frame_size;i++) | ||
925 | out[i] = 0; | ||
926 | } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6))) | ||
927 | { | ||
928 | /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */ | ||
929 | st->screwed_up++; | ||
930 | } else { | ||
931 | /* Everything's fine */ | ||
932 | st->screwed_up=0; | ||
933 | } | ||
934 | if (st->screwed_up>=50) | ||
935 | { | ||
936 | speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now."); | ||
937 | speex_echo_state_reset(st); | ||
938 | return; | ||
939 | } | ||
940 | |||
941 | /* Add a small noise floor to make sure not to have problems when dividing */ | ||
942 | See = MAX32(See, SHR32(MULT16_16(N, 100),6)); | ||
943 | |||
944 | /* Convert error to frequency domain */ | ||
945 | spx_fft(st->fft_table, st->e, st->E); | ||
946 | for (i=0;i<st->frame_size;i++) | ||
947 | st->y[i] = 0; | ||
948 | spx_fft(st->fft_table, st->y, st->Y); | ||
949 | |||
950 | /* Compute power spectrum of far end (X), error (E) and filter response (Y) */ | ||
951 | power_spectrum(st->E, st->Rf, N); | ||
952 | power_spectrum(st->Y, st->Yf, N); | ||
953 | power_spectrum(st->X, st->Xf, N); | ||
954 | |||
955 | /* Smooth far end energy estimate over time */ | ||
956 | for (j=0;j<=st->frame_size;j++) | ||
957 | st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); | ||
958 | |||
959 | /* Enable this to compute the power based only on the tail (would need to compute more | ||
960 | efficiently to make this really useful */ | ||
961 | if (0) | ||
962 | { | ||
963 | float scale2 = .5f/M; | ||
964 | for (j=0;j<=st->frame_size;j++) | ||
965 | st->power[j] = 100; | ||
966 | for (i=0;i<M;i++) | ||
967 | { | ||
968 | power_spectrum(&st->X[i*N], st->Xf, N); | ||
969 | for (j=0;j<=st->frame_size;j++) | ||
970 | st->power[j] += scale2*st->Xf[j]; | ||
971 | } | ||
972 | } | ||
973 | |||
974 | /* Compute filtered spectra and (cross-)correlations */ | ||
975 | for (j=st->frame_size;j>=0;j--) | ||
976 | { | ||
977 | spx_float_t Eh, Yh; | ||
978 | Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]); | ||
979 | Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]); | ||
980 | Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh)); | ||
981 | Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh)); | ||
982 | #ifdef FIXED_POINT | ||
983 | st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]); | ||
984 | st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]); | ||
985 | #else | ||
986 | st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j]; | ||
987 | st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j]; | ||
988 | #endif | ||
989 | } | ||
990 | |||
991 | Pyy = FLOAT_SQRT(Pyy); | ||
992 | Pey = FLOAT_DIVU(Pey,Pyy); | ||
993 | |||
994 | /* Compute correlation updatete rate */ | ||
995 | tmp32 = MULT16_32_Q15(st->beta0,Syy); | ||
996 | if (tmp32 > MULT16_32_Q15(st->beta_max,See)) | ||
997 | tmp32 = MULT16_32_Q15(st->beta_max,See); | ||
998 | alpha = FLOAT_DIV32(tmp32, See); | ||
999 | alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha); | ||
1000 | /* Update correlations (recursive average) */ | ||
1001 | st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey)); | ||
1002 | st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy)); | ||
1003 | if (FLOAT_LT(st->Pyy, FLOAT_ONE)) | ||
1004 | st->Pyy = FLOAT_ONE; | ||
1005 | /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */ | ||
1006 | if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy))) | ||
1007 | st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy); | ||
1008 | if (FLOAT_GT(st->Pey, st->Pyy)) | ||
1009 | st->Pey = st->Pyy; | ||
1010 | /* leak_estimate is the linear regression result */ | ||
1011 | st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14)); | ||
1012 | /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */ | ||
1013 | if (st->leak_estimate > 16383) | ||
1014 | st->leak_estimate = 32767; | ||
1015 | else | ||
1016 | st->leak_estimate = SHL16(st->leak_estimate,1); | ||
1017 | /*printf ("%f\n", st->leak_estimate);*/ | ||
1018 | |||
1019 | /* Compute Residual to Error Ratio */ | ||
1020 | #ifdef FIXED_POINT | ||
1021 | tmp32 = MULT16_32_Q15(st->leak_estimate,Syy); | ||
1022 | tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1))); | ||
1023 | /* Check for y in e (lower bound on RER) */ | ||
1024 | { | ||
1025 | spx_float_t bound = PSEUDOFLOAT(Sey); | ||
1026 | bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy))); | ||
1027 | if (FLOAT_GT(bound, PSEUDOFLOAT(See))) | ||
1028 | tmp32 = See; | ||
1029 | else if (tmp32 < FLOAT_EXTRACT32(bound)) | ||
1030 | tmp32 = FLOAT_EXTRACT32(bound); | ||
1031 | } | ||
1032 | if (tmp32 > SHR32(See,1)) | ||
1033 | tmp32 = SHR32(See,1); | ||
1034 | RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15)); | ||
1035 | #else | ||
1036 | RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See; | ||
1037 | /* Check for y in e (lower bound on RER) */ | ||
1038 | if (RER < Sey*Sey/(1+See*Syy)) | ||
1039 | RER = Sey*Sey/(1+See*Syy); | ||
1040 | if (RER > .5) | ||
1041 | RER = .5; | ||
1042 | #endif | ||
1043 | |||
1044 | /* We consider that the filter has had minimal adaptation if the following is true*/ | ||
1045 | if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy)) | ||
1046 | { | ||
1047 | st->adapted = 1; | ||
1048 | } | ||
1049 | |||
1050 | if (st->adapted) | ||
1051 | { | ||
1052 | /* Normal learning rate calculation once we're past the minimal adaptation phase */ | ||
1053 | for (i=0;i<=st->frame_size;i++) | ||
1054 | { | ||
1055 | spx_word32_t r, e; | ||
1056 | /* Compute frequency-domain adaptation mask */ | ||
1057 | r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3)); | ||
1058 | e = SHL32(st->Rf[i],3)+1; | ||
1059 | #ifdef FIXED_POINT | ||
1060 | if (r>SHR32(e,1)) | ||
1061 | r = SHR32(e,1); | ||
1062 | #else | ||
1063 | if (r>.5*e) | ||
1064 | r = .5*e; | ||
1065 | #endif | ||
1066 | r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e))); | ||
1067 | /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/ | ||
1068 | st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16); | ||
1069 | } | ||
1070 | } else { | ||
1071 | /* Temporary adaption rate if filter is not yet adapted enough */ | ||
1072 | spx_word16_t adapt_rate=0; | ||
1073 | |||
1074 | if (Sxx > SHR32(MULT16_16(N, 1000),6)) | ||
1075 | { | ||
1076 | tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx); | ||
1077 | #ifdef FIXED_POINT | ||
1078 | if (tmp32 > SHR32(See,2)) | ||
1079 | tmp32 = SHR32(See,2); | ||
1080 | #else | ||
1081 | if (tmp32 > .25*See) | ||
1082 | tmp32 = .25*See; | ||
1083 | #endif | ||
1084 | adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15)); | ||
1085 | } | ||
1086 | for (i=0;i<=st->frame_size;i++) | ||
1087 | st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1); | ||
1088 | |||
1089 | |||
1090 | /* How much have we adapted so far? */ | ||
1091 | st->sum_adapt = ADD32(st->sum_adapt,adapt_rate); | ||
1092 | } | ||
1093 | |||
1094 | /* Save residual echo so it can be used by the nonlinear processor */ | ||
1095 | if (st->adapted) | ||
1096 | { | ||
1097 | /* If the filter is adapted, take the filtered echo */ | ||
1098 | for (i=0;i<st->frame_size;i++) | ||
1099 | st->last_y[i] = st->last_y[st->frame_size+i]; | ||
1100 | for (i=0;i<st->frame_size;i++) | ||
1101 | st->last_y[st->frame_size+i] = in[i]-out[i]; | ||
1102 | } else { | ||
1103 | /* If filter isn't adapted yet, all we can do is take the far end signal directly */ | ||
1104 | /* moved earlier: for (i=0;i<N;i++) | ||
1105 | st->last_y[i] = st->x[i];*/ | ||
1106 | } | ||
1107 | |||
1108 | } | ||
1109 | |||
1110 | /* Compute spectrum of estimated echo for use in an echo post-filter */ | ||
1111 | void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len) | ||
1112 | { | ||
1113 | int i; | ||
1114 | spx_word16_t leak2; | ||
1115 | int N; | ||
1116 | |||
1117 | N = st->window_size; | ||
1118 | |||
1119 | /* Apply hanning window (should pre-compute it)*/ | ||
1120 | for (i=0;i<N;i++) | ||
1121 | st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]); | ||
1122 | |||
1123 | /* Compute power spectrum of the echo */ | ||
1124 | spx_fft(st->fft_table, st->y, st->Y); | ||
1125 | power_spectrum(st->Y, residual_echo, N); | ||
1126 | |||
1127 | #ifdef FIXED_POINT | ||
1128 | if (st->leak_estimate > 16383) | ||
1129 | leak2 = 32767; | ||
1130 | else | ||
1131 | leak2 = SHL16(st->leak_estimate, 1); | ||
1132 | #else | ||
1133 | if (st->leak_estimate>.5) | ||
1134 | leak2 = 1; | ||
1135 | else | ||
1136 | leak2 = 2*st->leak_estimate; | ||
1137 | #endif | ||
1138 | /* Estimate residual echo */ | ||
1139 | for (i=0;i<=st->frame_size;i++) | ||
1140 | residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]); | ||
1141 | |||
1142 | } | ||
1143 | |||
1144 | int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) | ||
1145 | { | ||
1146 | switch(request) | ||
1147 | { | ||
1148 | |||
1149 | case SPEEX_ECHO_GET_FRAME_SIZE: | ||
1150 | (*(int*)ptr) = st->frame_size; | ||
1151 | break; | ||
1152 | case SPEEX_ECHO_SET_SAMPLING_RATE: | ||
1153 | st->sampling_rate = (*(int*)ptr); | ||
1154 | st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); | ||
1155 | #ifdef FIXED_POINT | ||
1156 | st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); | ||
1157 | st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); | ||
1158 | #else | ||
1159 | st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; | ||
1160 | st->beta_max = (.5f*st->frame_size)/st->sampling_rate; | ||
1161 | #endif | ||
1162 | if (st->sampling_rate<12000) | ||
1163 | st->notch_radius = QCONST16(.9, 15); | ||
1164 | else if (st->sampling_rate<24000) | ||
1165 | st->notch_radius = QCONST16(.982, 15); | ||
1166 | else | ||
1167 | st->notch_radius = QCONST16(.992, 15); | ||
1168 | break; | ||
1169 | case SPEEX_ECHO_GET_SAMPLING_RATE: | ||
1170 | (*(int*)ptr) = st->sampling_rate; | ||
1171 | break; | ||
1172 | default: | ||
1173 | speex_warning_int("Unknown speex_echo_ctl request: ", request); | ||
1174 | return -1; | ||
1175 | } | ||
1176 | return 0; | ||
1177 | } | ||