summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libspeex/preprocess.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/codecs/libspeex/preprocess.c')
-rw-r--r--lib/rbcodec/codecs/libspeex/preprocess.c1185
1 files changed, 1185 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libspeex/preprocess.c b/lib/rbcodec/codecs/libspeex/preprocess.c
new file mode 100644
index 0000000000..07a2ad3479
--- /dev/null
+++ b/lib/rbcodec/codecs/libspeex/preprocess.c
@@ -0,0 +1,1185 @@
1/* Copyright (C) 2003 Epic Games (written by Jean-Marc Valin)
2 Copyright (C) 2004-2006 Epic Games
3
4 File: preprocess.c
5 Preprocessor with denoising based on the algorithm by Ephraim and Malah
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 3. The name of the author may not be used to endorse or promote products
19 derived from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
25 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 POSSIBILITY OF SUCH DAMAGE.
32*/
33
34
35/*
36 Recommended papers:
37
38 Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error
39 short-time spectral amplitude estimator". IEEE Transactions on Acoustics,
40 Speech and Signal Processing, vol. ASSP-32, no. 6, pp. 1109-1121, 1984.
41
42 Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error
43 log-spectral amplitude estimator". IEEE Transactions on Acoustics, Speech and
44 Signal Processing, vol. ASSP-33, no. 2, pp. 443-445, 1985.
45
46 I. Cohen and B. Berdugo, "Speech enhancement for non-stationary noise environments".
47 Signal Processing, vol. 81, no. 2, pp. 2403-2418, 2001.
48
49 Stefan Gustafsson, Rainer Martin, Peter Jax, and Peter Vary. "A psychoacoustic
50 approach to combined acoustic echo cancellation and noise reduction". IEEE
51 Transactions on Speech and Audio Processing, 2002.
52
53 J.-M. Valin, J. Rouat, and F. Michaud, "Microphone array post-filter for separation
54 of simultaneous non-stationary sources". In Proceedings IEEE International
55 Conference on Acoustics, Speech, and Signal Processing, 2004.
56*/
57
58#ifdef HAVE_CONFIG_H
59#include "config-speex.h"
60#endif
61
62#include <math.h>
63#include "speex/speex_preprocess.h"
64#include "speex/speex_echo.h"
65#include "arch.h"
66#include "fftwrap.h"
67#include "filterbank.h"
68#include "math_approx.h"
69#include "os_support.h"
70
71#ifndef M_PI
72#define M_PI 3.14159263
73#endif
74
75#define LOUDNESS_EXP 5.f
76#define AMP_SCALE .001f
77#define AMP_SCALE_1 1000.f
78
79#define NB_BANDS 24
80
81#define SPEECH_PROB_START_DEFAULT QCONST16(0.35f,15)
82#define SPEECH_PROB_CONTINUE_DEFAULT QCONST16(0.20f,15)
83#define NOISE_SUPPRESS_DEFAULT -15
84#define ECHO_SUPPRESS_DEFAULT -40
85#define ECHO_SUPPRESS_ACTIVE_DEFAULT -15
86
87#ifndef NULL
88#define NULL 0
89#endif
90
91#define SQR(x) ((x)*(x))
92#define SQR16(x) (MULT16_16((x),(x)))
93#define SQR16_Q15(x) (MULT16_16_Q15((x),(x)))
94
95#ifdef FIXED_POINT
96static inline spx_word16_t DIV32_16_Q8(spx_word32_t a, spx_word32_t b)
97{
98 if (SHR32(a,7) >= b)
99 {
100 return 32767;
101 } else {
102 if (b>=QCONST32(1,23))
103 {
104 a = SHR32(a,8);
105 b = SHR32(b,8);
106 }
107 if (b>=QCONST32(1,19))
108 {
109 a = SHR32(a,4);
110 b = SHR32(b,4);
111 }
112 if (b>=QCONST32(1,15))
113 {
114 a = SHR32(a,4);
115 b = SHR32(b,4);
116 }
117 a = SHL32(a,8);
118 return PDIV32_16(a,b);
119 }
120
121}
122static inline spx_word16_t DIV32_16_Q15(spx_word32_t a, spx_word32_t b)
123{
124 if (SHR32(a,15) >= b)
125 {
126 return 32767;
127 } else {
128 if (b>=QCONST32(1,23))
129 {
130 a = SHR32(a,8);
131 b = SHR32(b,8);
132 }
133 if (b>=QCONST32(1,19))
134 {
135 a = SHR32(a,4);
136 b = SHR32(b,4);
137 }
138 if (b>=QCONST32(1,15))
139 {
140 a = SHR32(a,4);
141 b = SHR32(b,4);
142 }
143 a = SHL32(a,15)-a;
144 return DIV32_16(a,b);
145 }
146}
147#define SNR_SCALING 256.f
148#define SNR_SCALING_1 0.0039062f
149#define SNR_SHIFT 8
150
151#define FRAC_SCALING 32767.f
152#define FRAC_SCALING_1 3.0518e-05
153#define FRAC_SHIFT 1
154
155#define EXPIN_SCALING 2048.f
156#define EXPIN_SCALING_1 0.00048828f
157#define EXPIN_SHIFT 11
158#define EXPOUT_SCALING_1 1.5259e-05
159
160#define NOISE_SHIFT 7
161
162#else
163
164#define DIV32_16_Q8(a,b) ((a)/(b))
165#define DIV32_16_Q15(a,b) ((a)/(b))
166#define SNR_SCALING 1.f
167#define SNR_SCALING_1 1.f
168#define SNR_SHIFT 0
169#define FRAC_SCALING 1.f
170#define FRAC_SCALING_1 1.f
171#define FRAC_SHIFT 0
172#define NOISE_SHIFT 0
173
174#define EXPIN_SCALING 1.f
175#define EXPIN_SCALING_1 1.f
176#define EXPOUT_SCALING_1 1.f
177
178#endif
179
180/** Speex pre-processor state. */
181struct SpeexPreprocessState_ {
182 /* Basic info */
183 int frame_size; /**< Number of samples processed each time */
184 int ps_size; /**< Number of points in the power spectrum */
185 int sampling_rate; /**< Sampling rate of the input/output */
186 int nbands;
187 FilterBank *bank;
188
189 /* Parameters */
190 int denoise_enabled;
191 int vad_enabled;
192 int dereverb_enabled;
193 spx_word16_t reverb_decay;
194 spx_word16_t reverb_level;
195 spx_word16_t speech_prob_start;
196 spx_word16_t speech_prob_continue;
197 int noise_suppress;
198 int echo_suppress;
199 int echo_suppress_active;
200 SpeexEchoState *echo_state;
201
202 /* DSP-related arrays */
203 spx_word16_t *frame; /**< Processing frame (2*ps_size) */
204 spx_word16_t *ft; /**< Processing frame in freq domain (2*ps_size) */
205 spx_word32_t *ps; /**< Current power spectrum */
206 spx_word16_t *gain2; /**< Adjusted gains */
207 spx_word16_t *gain_floor; /**< Minimum gain allowed */
208 spx_word16_t *window; /**< Analysis/Synthesis window */
209 spx_word32_t *noise; /**< Noise estimate */
210 spx_word32_t *reverb_estimate; /**< Estimate of reverb energy */
211 spx_word32_t *old_ps; /**< Power spectrum for last frame */
212 spx_word16_t *gain; /**< Ephraim Malah gain */
213 spx_word16_t *prior; /**< A-priori SNR */
214 spx_word16_t *post; /**< A-posteriori SNR */
215
216 spx_word32_t *S; /**< Smoothed power spectrum */
217 spx_word32_t *Smin; /**< See Cohen paper */
218 spx_word32_t *Stmp; /**< See Cohen paper */
219 int *update_prob; /**< Probability of speech presence for noise update */
220
221 spx_word16_t *zeta; /**< Smoothed a priori SNR */
222 spx_word32_t *echo_noise;
223 spx_word32_t *residual_echo;
224
225 /* Misc */
226 spx_word16_t *inbuf; /**< Input buffer (overlapped analysis) */
227 spx_word16_t *outbuf; /**< Output buffer (for overlap and add) */
228
229 /* AGC stuff, only for floating point for now */
230#ifndef FIXED_POINT
231 int agc_enabled;
232 float agc_level;
233 float loudness_accum;
234 float *loudness_weight; /**< Perceptual loudness curve */
235 float loudness; /**< Loudness estimate */
236 float agc_gain; /**< Current AGC gain */
237 int nb_loudness_adapt; /**< Number of frames used for loudness adaptation so far */
238 float max_gain; /**< Maximum gain allowed */
239 float max_increase_step; /**< Maximum increase in gain from one frame to another */
240 float max_decrease_step; /**< Maximum decrease in gain from one frame to another */
241 float prev_loudness; /**< Loudness of previous frame */
242 float init_max; /**< Current gain limit during initialisation */
243#endif
244 int nb_adapt; /**< Number of frames used for adaptation so far */
245 int was_speech;
246 int min_count; /**< Number of frames processed so far */
247 void *fft_lookup; /**< Lookup table for the FFT */
248#ifdef FIXED_POINT
249 int frame_shift;
250#endif
251};
252
253
254static void conj_window(spx_word16_t *w, int len)
255{
256 int i;
257 for (i=0;i<len;i++)
258 {
259 spx_word16_t tmp;
260#ifdef FIXED_POINT
261 spx_word16_t x = DIV32_16(MULT16_16(32767,i),len);
262#else
263 spx_word16_t x = DIV32_16(MULT16_16(QCONST16(4.f,13),i),len);
264#endif
265 int inv=0;
266 if (x<QCONST16(1.f,13))
267 {
268 } else if (x<QCONST16(2.f,13))
269 {
270 x=QCONST16(2.f,13)-x;
271 inv=1;
272 } else if (x<QCONST16(3.f,13))
273 {
274 x=x-QCONST16(2.f,13);
275 inv=1;
276 } else {
277 x=QCONST16(2.f,13)-x+QCONST16(2.f,13); /* 4 - x */
278 }
279 x = MULT16_16_Q14(QCONST16(1.271903f,14), x);
280 tmp = SQR16_Q15(QCONST16(.5f,15)-MULT16_16_P15(QCONST16(.5f,15),spx_cos_norm(SHL32(EXTEND32(x),2))));
281 if (inv)
282 tmp=SUB16(Q15_ONE,tmp);
283 w[i]=spx_sqrt(SHL32(EXTEND32(tmp),15));
284 }
285}
286
287
288#ifdef FIXED_POINT
289/* This function approximates the gain function
290 y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x)
291 which multiplied by xi/(1+xi) is the optimal gain
292 in the loudness domain ( sqrt[amplitude] )
293 Input in Q11 format, output in Q15
294*/
295static inline spx_word32_t hypergeom_gain(spx_word32_t xx)
296{
297 int ind;
298 spx_word16_t frac;
299 /* Q13 table */
300 static const spx_word16_t table[21] = {
301 6730, 8357, 9868, 11267, 12563, 13770, 14898,
302 15959, 16961, 17911, 18816, 19682, 20512, 21311,
303 22082, 22827, 23549, 24250, 24931, 25594, 26241};
304 ind = SHR32(xx,10);
305 if (ind<0)
306 return Q15_ONE;
307 if (ind>19)
308 return ADD32(EXTEND32(Q15_ONE),EXTEND32(DIV32_16(QCONST32(.1296,23), SHR32(xx,EXPIN_SHIFT-SNR_SHIFT))));
309 frac = SHL32(xx-SHL32(ind,10),5);
310 return SHL32(DIV32_16(PSHR32(MULT16_16(Q15_ONE-frac,table[ind]) + MULT16_16(frac,table[ind+1]),7),(spx_sqrt(SHL32(xx,15)+6711))),7);
311}
312
313static inline spx_word16_t qcurve(spx_word16_t x)
314{
315 x = MAX16(x, 1);
316 return DIV32_16(SHL32(EXTEND32(32767),9),ADD16(512,MULT16_16_Q15(QCONST16(.60f,15),DIV32_16(32767,x))));
317}
318
319/* Compute the gain floor based on different floors for the background noise and residual echo */
320static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len)
321{
322 int i;
323
324 if (noise_suppress > effective_echo_suppress)
325 {
326 spx_word16_t noise_gain, gain_ratio;
327 noise_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),noise_suppress)),1)));
328 gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),effective_echo_suppress-noise_suppress)),1)));
329
330 /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */
331 for (i=0;i<len;i++)
332 gain_floor[i] = MULT16_16_Q15(noise_gain,
333 spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(PSHR32(noise[i],NOISE_SHIFT) + MULT16_32_Q15(gain_ratio,echo[i]),
334 (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15)));
335 } else {
336 spx_word16_t echo_gain, gain_ratio;
337 echo_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),effective_echo_suppress)),1)));
338 gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),noise_suppress-effective_echo_suppress)),1)));
339
340 /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */
341 for (i=0;i<len;i++)
342 gain_floor[i] = MULT16_16_Q15(echo_gain,
343 spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(MULT16_32_Q15(gain_ratio,PSHR32(noise[i],NOISE_SHIFT)) + echo[i],
344 (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15)));
345 }
346}
347
348#else
349/* This function approximates the gain function
350 y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x)
351 which multiplied by xi/(1+xi) is the optimal gain
352 in the loudness domain ( sqrt[amplitude] )
353*/
354static inline spx_word32_t hypergeom_gain(spx_word32_t xx)
355{
356 int ind;
357 float integer, frac;
358 float x;
359 static const float table[21] = {
360 0.82157f, 1.02017f, 1.20461f, 1.37534f, 1.53363f, 1.68092f, 1.81865f,
361 1.94811f, 2.07038f, 2.18638f, 2.29688f, 2.40255f, 2.50391f, 2.60144f,
362 2.69551f, 2.78647f, 2.87458f, 2.96015f, 3.04333f, 3.12431f, 3.20326f};
363 x = EXPIN_SCALING_1*xx;
364 integer = floor(2*x);
365 ind = (int)integer;
366 if (ind<0)
367 return FRAC_SCALING;
368 if (ind>19)
369 return FRAC_SCALING*(1+.1296/x);
370 frac = 2*x-integer;
371 return FRAC_SCALING*((1-frac)*table[ind] + frac*table[ind+1])/sqrt(x+.0001f);
372}
373
374static inline spx_word16_t qcurve(spx_word16_t x)
375{
376 return 1.f/(1.f+.15f/(SNR_SCALING_1*x));
377}
378
379static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len)
380{
381 int i;
382 float echo_floor;
383 float noise_floor;
384
385 noise_floor = exp(.2302585f*noise_suppress);
386 echo_floor = exp(.2302585f*effective_echo_suppress);
387
388 /* Compute the gain floor based on different floors for the background noise and residual echo */
389 for (i=0;i<len;i++)
390 gain_floor[i] = FRAC_SCALING*sqrt(noise_floor*PSHR32(noise[i],NOISE_SHIFT) + echo_floor*echo[i])/sqrt(1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]);
391}
392
393#endif
394SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate)
395{
396 int i;
397 int N, N3, N4, M;
398
399 SpeexPreprocessState *st = (SpeexPreprocessState *)speex_alloc(sizeof(SpeexPreprocessState));
400 st->frame_size = frame_size;
401
402 /* Round ps_size down to the nearest power of two */
403#if 0
404 i=1;
405 st->ps_size = st->frame_size;
406 while(1)
407 {
408 if (st->ps_size & ~i)
409 {
410 st->ps_size &= ~i;
411 i<<=1;
412 } else {
413 break;
414 }
415 }
416
417
418 if (st->ps_size < 3*st->frame_size/4)
419 st->ps_size = st->ps_size * 3 / 2;
420#else
421 st->ps_size = st->frame_size;
422#endif
423
424 N = st->ps_size;
425 N3 = 2*N - st->frame_size;
426 N4 = st->frame_size - N3;
427
428 st->sampling_rate = sampling_rate;
429 st->denoise_enabled = 1;
430 st->vad_enabled = 0;
431 st->dereverb_enabled = 0;
432 st->reverb_decay = 0;
433 st->reverb_level = 0;
434 st->noise_suppress = NOISE_SUPPRESS_DEFAULT;
435 st->echo_suppress = ECHO_SUPPRESS_DEFAULT;
436 st->echo_suppress_active = ECHO_SUPPRESS_ACTIVE_DEFAULT;
437
438 st->speech_prob_start = SPEECH_PROB_START_DEFAULT;
439 st->speech_prob_continue = SPEECH_PROB_CONTINUE_DEFAULT;
440
441 st->echo_state = NULL;
442
443 st->nbands = NB_BANDS;
444 M = st->nbands;
445 st->bank = filterbank_new(M, sampling_rate, N, 1);
446
447 st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
448 st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
449 st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
450
451 st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
452 st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
453 st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
454 st->residual_echo = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
455 st->reverb_estimate = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
456 st->old_ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
457 st->prior = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
458 st->post = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
459 st->gain = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
460 st->gain2 = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
461 st->gain_floor = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
462 st->zeta = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
463
464 st->S = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
465 st->Smin = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
466 st->Stmp = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
467 st->update_prob = (int*)speex_alloc(N*sizeof(int));
468
469 st->inbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));
470 st->outbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));
471
472 conj_window(st->window, 2*N3);
473 for (i=2*N3;i<2*st->ps_size;i++)
474 st->window[i]=Q15_ONE;
475
476 if (N4>0)
477 {
478 for (i=N3-1;i>=0;i--)
479 {
480 st->window[i+N3+N4]=st->window[i+N3];
481 st->window[i+N3]=1;
482 }
483 }
484 for (i=0;i<N+M;i++)
485 {
486 st->noise[i]=QCONST32(1.f,NOISE_SHIFT);
487 st->reverb_estimate[i]=0;
488 st->old_ps[i]=1;
489 st->gain[i]=Q15_ONE;
490 st->post[i]=SHL16(1, SNR_SHIFT);
491 st->prior[i]=SHL16(1, SNR_SHIFT);
492 }
493
494 for (i=0;i<N;i++)
495 st->update_prob[i] = 1;
496 for (i=0;i<N3;i++)
497 {
498 st->inbuf[i]=0;
499 st->outbuf[i]=0;
500 }
501#ifndef FIXED_POINT
502 st->agc_enabled = 0;
503 st->agc_level = 8000;
504 st->loudness_weight = (float*)speex_alloc(N*sizeof(float));
505 for (i=0;i<N;i++)
506 {
507 float ff=((float)i)*.5*sampling_rate/((float)N);
508 /*st->loudness_weight[i] = .5f*(1.f/(1.f+ff/8000.f))+1.f*exp(-.5f*(ff-3800.f)*(ff-3800.f)/9e5f);*/
509 st->loudness_weight[i] = .35f-.35f*ff/16000.f+.73f*exp(-.5f*(ff-3800)*(ff-3800)/9e5f);
510 if (st->loudness_weight[i]<.01f)
511 st->loudness_weight[i]=.01f;
512 st->loudness_weight[i] *= st->loudness_weight[i];
513 }
514 /*st->loudness = pow(AMP_SCALE*st->agc_level,LOUDNESS_EXP);*/
515 st->loudness = 1e-15;
516 st->agc_gain = 1;
517 st->nb_loudness_adapt = 0;
518 st->max_gain = 30;
519 st->max_increase_step = exp(0.11513f * 12.*st->frame_size / st->sampling_rate);
520 st->max_decrease_step = exp(-0.11513f * 40.*st->frame_size / st->sampling_rate);
521 st->prev_loudness = 1;
522 st->init_max = 1;
523#endif
524 st->was_speech = 0;
525
526 st->fft_lookup = spx_fft_init(2*N);
527
528 st->nb_adapt=0;
529 st->min_count=0;
530 return st;
531}
532
533void speex_preprocess_state_destroy(SpeexPreprocessState *st)
534{
535 speex_free(st->frame);
536 speex_free(st->ft);
537 speex_free(st->ps);
538 speex_free(st->gain2);
539 speex_free(st->gain_floor);
540 speex_free(st->window);
541 speex_free(st->noise);
542 speex_free(st->reverb_estimate);
543 speex_free(st->old_ps);
544 speex_free(st->gain);
545 speex_free(st->prior);
546 speex_free(st->post);
547#ifndef FIXED_POINT
548 speex_free(st->loudness_weight);
549#endif
550 speex_free(st->echo_noise);
551 speex_free(st->residual_echo);
552
553 speex_free(st->S);
554 speex_free(st->Smin);
555 speex_free(st->Stmp);
556 speex_free(st->update_prob);
557 speex_free(st->zeta);
558
559 speex_free(st->inbuf);
560 speex_free(st->outbuf);
561
562 spx_fft_destroy(st->fft_lookup);
563 filterbank_destroy(st->bank);
564 speex_free(st);
565}
566
567/* FIXME: The AGC doesn't work yet with fixed-point*/
568#ifndef FIXED_POINT
569static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx_word16_t *ft)
570{
571 int i;
572 int N = st->ps_size;
573 float target_gain;
574 float loudness=1.f;
575 float rate;
576
577 for (i=2;i<N;i++)
578 {
579 loudness += 2.f*N*st->ps[i]* st->loudness_weight[i];
580 }
581 loudness=sqrt(loudness);
582 /*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) &&
583 loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/
584 if (Pframe>.3f)
585 {
586 st->nb_loudness_adapt++;
587 /*rate=2.0f*Pframe*Pframe/(1+st->nb_loudness_adapt);*/
588 rate = .03*Pframe*Pframe;
589 st->loudness = (1-rate)*st->loudness + (rate)*pow(AMP_SCALE*loudness, LOUDNESS_EXP);
590 st->loudness_accum = (1-rate)*st->loudness_accum + rate;
591 if (st->init_max < st->max_gain && st->nb_adapt > 20)
592 st->init_max *= 1.f + .1f*Pframe*Pframe;
593 }
594 /*printf ("%f %f %f %f\n", Pframe, loudness, pow(st->loudness, 1.0f/LOUDNESS_EXP), st->loudness2);*/
595
596 target_gain = AMP_SCALE*st->agc_level*pow(st->loudness/(1e-4+st->loudness_accum), -1.0f/LOUDNESS_EXP);
597
598 if ((Pframe>.5 && st->nb_adapt > 20) || target_gain < st->agc_gain)
599 {
600 if (target_gain > st->max_increase_step*st->agc_gain)
601 target_gain = st->max_increase_step*st->agc_gain;
602 if (target_gain < st->max_decrease_step*st->agc_gain && loudness < 10*st->prev_loudness)
603 target_gain = st->max_decrease_step*st->agc_gain;
604 if (target_gain > st->max_gain)
605 target_gain = st->max_gain;
606 if (target_gain > st->init_max)
607 target_gain = st->init_max;
608
609 st->agc_gain = target_gain;
610 }
611 /*fprintf (stderr, "%f %f %f\n", loudness, (float)AMP_SCALE_1*pow(st->loudness, 1.0f/LOUDNESS_EXP), st->agc_gain);*/
612
613 for (i=0;i<2*N;i++)
614 ft[i] *= st->agc_gain;
615 st->prev_loudness = loudness;
616}
617#endif
618
619static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x)
620{
621 int i;
622 int N = st->ps_size;
623 int N3 = 2*N - st->frame_size;
624 int N4 = st->frame_size - N3;
625 spx_word32_t *ps=st->ps;
626
627 /* 'Build' input frame */
628 for (i=0;i<N3;i++)
629 st->frame[i]=st->inbuf[i];
630 for (i=0;i<st->frame_size;i++)
631 st->frame[N3+i]=x[i];
632
633 /* Update inbuf */
634 for (i=0;i<N3;i++)
635 st->inbuf[i]=x[N4+i];
636
637 /* Windowing */
638 for (i=0;i<2*N;i++)
639 st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
640
641#ifdef FIXED_POINT
642 {
643 spx_word16_t max_val=0;
644 for (i=0;i<2*N;i++)
645 max_val = MAX16(max_val, ABS16(st->frame[i]));
646 st->frame_shift = 14-spx_ilog2(EXTEND32(max_val));
647 for (i=0;i<2*N;i++)
648 st->frame[i] = SHL16(st->frame[i], st->frame_shift);
649 }
650#endif
651
652 /* Perform FFT */
653 spx_fft(st->fft_lookup, st->frame, st->ft);
654
655 /* Power spectrum */
656 ps[0]=MULT16_16(st->ft[0],st->ft[0]);
657 for (i=1;i<N;i++)
658 ps[i]=MULT16_16(st->ft[2*i-1],st->ft[2*i-1]) + MULT16_16(st->ft[2*i],st->ft[2*i]);
659 for (i=0;i<N;i++)
660 st->ps[i] = PSHR32(st->ps[i], 2*st->frame_shift);
661
662 filterbank_compute_bank32(st->bank, ps, ps+N);
663}
664
665static void update_noise_prob(SpeexPreprocessState *st)
666{
667 int i;
668 int min_range;
669 int N = st->ps_size;
670
671 for (i=1;i<N-1;i++)
672 st->S[i] = MULT16_32_Q15(QCONST16(.8f,15),st->S[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i-1])
673 + MULT16_32_Q15(QCONST16(.1f,15),st->ps[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i+1]);
674 st->S[0] = MULT16_32_Q15(QCONST16(.8f,15),st->S[0]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[0]);
675 st->S[N-1] = MULT16_32_Q15(QCONST16(.8f,15),st->S[N-1]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[N-1]);
676
677 if (st->nb_adapt==1)
678 {
679 for (i=0;i<N;i++)
680 st->Smin[i] = st->Stmp[i] = 0;
681 }
682
683 if (st->nb_adapt < 100)
684 min_range = 15;
685 else if (st->nb_adapt < 1000)
686 min_range = 50;
687 else if (st->nb_adapt < 10000)
688 min_range = 150;
689 else
690 min_range = 300;
691 if (st->min_count > min_range)
692 {
693 st->min_count = 0;
694 for (i=0;i<N;i++)
695 {
696 st->Smin[i] = MIN32(st->Stmp[i], st->S[i]);
697 st->Stmp[i] = st->S[i];
698 }
699 } else {
700 for (i=0;i<N;i++)
701 {
702 st->Smin[i] = MIN32(st->Smin[i], st->S[i]);
703 st->Stmp[i] = MIN32(st->Stmp[i], st->S[i]);
704 }
705 }
706 for (i=0;i<N;i++)
707 {
708 if (MULT16_32_Q15(QCONST16(.4f,15),st->S[i]) > ADD32(st->Smin[i],EXTEND32(20)))
709 st->update_prob[i] = 1;
710 else
711 st->update_prob[i] = 0;
712 /*fprintf (stderr, "%f ", st->S[i]/st->Smin[i]);*/
713 /*fprintf (stderr, "%f ", st->update_prob[i]);*/
714 }
715
716}
717
718#define NOISE_OVERCOMPENS 1.
719
720void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
721
722int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo)
723{
724 return speex_preprocess_run(st, x);
725}
726
727int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x)
728{
729 int i;
730 int M;
731 int N = st->ps_size;
732 int N3 = 2*N - st->frame_size;
733 int N4 = st->frame_size - N3;
734 spx_word32_t *ps=st->ps;
735 spx_word32_t Zframe;
736 spx_word16_t Pframe;
737 spx_word16_t beta, beta_1;
738 spx_word16_t effective_echo_suppress;
739
740 st->nb_adapt++;
741 if (st->nb_adapt>20000)
742 st->nb_adapt = 20000;
743 st->min_count++;
744
745 beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt));
746 beta_1 = Q15_ONE-beta;
747 M = st->nbands;
748 /* Deal with residual echo if provided */
749 if (st->echo_state)
750 {
751 speex_echo_get_residual(st->echo_state, st->residual_echo, N);
752#ifndef FIXED_POINT
753 /* If there are NaNs or ridiculous values, it'll show up in the DC and we just reset everything to zero */
754 if (!(st->residual_echo[0] >=0 && st->residual_echo[0]<N*1e9f))
755 {
756 for (i=0;i<N;i++)
757 st->residual_echo[i] = 0;
758 }
759#endif
760 for (i=0;i<N;i++)
761 st->echo_noise[i] = MAX32(MULT16_32_Q15(QCONST16(.6f,15),st->echo_noise[i]), st->residual_echo[i]);
762 filterbank_compute_bank32(st->bank, st->echo_noise, st->echo_noise+N);
763 } else {
764 for (i=0;i<N+M;i++)
765 st->echo_noise[i] = 0;
766 }
767 preprocess_analysis(st, x);
768
769 update_noise_prob(st);
770
771 /* Noise estimation always updated for the 10 first frames */
772 /*if (st->nb_adapt<10)
773 {
774 for (i=1;i<N-1;i++)
775 st->update_prob[i] = 0;
776 }
777 */
778
779 /* Update the noise estimate for the frequencies where it can be */
780 for (i=0;i<N;i++)
781 {
782 if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i], NOISE_SHIFT))
783 st->noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,st->noise[i]) + MULT16_32_Q15(beta,SHL32(st->ps[i],NOISE_SHIFT)));
784 }
785 filterbank_compute_bank32(st->bank, st->noise, st->noise+N);
786
787 /* Special case for first frame */
788 if (st->nb_adapt==1)
789 for (i=0;i<N+M;i++)
790 st->old_ps[i] = ps[i];
791
792 /* Compute a posteriori SNR */
793 for (i=0;i<N+M;i++)
794 {
795 spx_word16_t gamma;
796
797 /* Total noise estimate including residual echo and reverberation */
798 spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]);
799
800 /* A posteriori SNR = ps/noise - 1*/
801 st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT));
802 st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT));
803
804 /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */
805 gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise))));
806
807 /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */
808 st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15));
809 st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT));
810 }
811
812 /*print_vec(st->post, N+M, "");*/
813
814 /* Recursive average of the a priori SNR. A bit smoothed for the psd components */
815 st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15);
816 for (i=1;i<N-1;i++)
817 st->zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.15f,15),st->prior[i])),
818 MULT16_16(QCONST16(.075f,15),st->prior[i-1])), MULT16_16(QCONST16(.075f,15),st->prior[i+1])),15);
819 for (i=N-1;i<N+M;i++)
820 st->zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.3f,15),st->prior[i])),15);
821
822 /* Speech probability of presence for the entire frame is based on the average filterbank a priori SNR */
823 Zframe = 0;
824 for (i=N;i<N+M;i++)
825 Zframe = ADD32(Zframe, EXTEND32(st->zeta[i]));
826 Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands)));
827
828 effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15));
829
830 compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M);
831
832 /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale)
833 Technically this is actually wrong because the EM gaim assumes a slightly different probability
834 distribution */
835 for (i=N;i<N+M;i++)
836 {
837 /* See EM and Cohen papers*/
838 spx_word32_t theta;
839 /* Gain from hypergeometric function */
840 spx_word32_t MM;
841 /* Weiner filter gain */
842 spx_word16_t prior_ratio;
843 /* a priority probability of speech presence based on Bark sub-band alone */
844 spx_word16_t P1;
845 /* Speech absence a priori probability (considering sub-band and frame) */
846 spx_word16_t q;
847#ifdef FIXED_POINT
848 spx_word16_t tmp;
849#endif
850
851 prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
852 theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
853
854 MM = hypergeom_gain(theta);
855 /* Gain with bound */
856 st->gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
857 /* Save old Bark power spectrum */
858 st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
859
860 P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (st->zeta[i]));
861 q = Q15_ONE-MULT16_16_Q15(Pframe,P1);
862#ifdef FIXED_POINT
863 theta = MIN32(theta, EXTEND32(32767));
864/*Q8*/tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+st->prior[i]),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1))));
865 tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp); /* Prevent overflows in the next line*/
866/*Q8*/tmp = EXTRACT16(PSHR32(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8));
867 st->gain2[i]=DIV32_16(SHL32(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp));
868#else
869 st->gain2[i]=1/(1.f + (q/(1.f-q))*(1+st->prior[i])*exp(-theta));
870#endif
871 }
872 /* Convert the EM gains and speech prob to linear frequency */
873 filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
874 filterbank_compute_psd16(st->bank,st->gain+N, st->gain);
875
876 /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */
877 if (1)
878 {
879 filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor);
880
881 /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */
882 for (i=0;i<N;i++)
883 {
884 spx_word32_t MM;
885 spx_word32_t theta;
886 spx_word16_t prior_ratio;
887 spx_word16_t tmp;
888 spx_word16_t p;
889 spx_word16_t g;
890
891 /* Wiener filter gain */
892 prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
893 theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
894
895 /* Optimal estimator for loudness domain */
896 MM = hypergeom_gain(theta);
897 /* EM gain with bound */
898 g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
899 /* Interpolated speech probability of presence */
900 p = st->gain2[i];
901
902 /* Constrain the gain to be close to the Bark scale gain */
903 if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i])
904 g = MULT16_16(3,st->gain[i]);
905 st->gain[i] = g;
906
907 /* Save old power spectrum */
908 st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
909
910 /* Apply gain floor */
911 if (st->gain[i] < st->gain_floor[i])
912 st->gain[i] = st->gain_floor[i];
913
914 /* Exponential decay model for reverberation (unused) */
915 /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/
916
917 /* Take into account speech probability of presence (loudness domain MMSE estimator) */
918 /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */
919 tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
920 st->gain2[i]=SQR16_Q15(tmp);
921
922 /* Use this if you want a log-domain MMSE estimator instead */
923 /*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/
924 }
925 } else {
926 for (i=N;i<N+M;i++)
927 {
928 spx_word16_t tmp;
929 spx_word16_t p = st->gain2[i];
930 st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]);
931 tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
932 st->gain2[i]=SQR16_Q15(tmp);
933 }
934 filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
935 }
936
937 /* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */
938 if (!st->denoise_enabled)
939 {
940 for (i=0;i<N+M;i++)
941 st->gain2[i]=Q15_ONE;
942 }
943
944 /* Apply computed gain */
945 for (i=1;i<N;i++)
946 {
947 st->ft[2*i-1] = MULT16_16_P15(st->gain2[i],st->ft[2*i-1]);
948 st->ft[2*i] = MULT16_16_P15(st->gain2[i],st->ft[2*i]);
949 }
950 st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]);
951 st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]);
952
953 /*FIXME: This *will* not work for fixed-point */
954#ifndef FIXED_POINT
955 if (st->agc_enabled)
956 speex_compute_agc(st, Pframe, st->ft);
957#endif
958
959 /* Inverse FFT with 1/N scaling */
960 spx_ifft(st->fft_lookup, st->ft, st->frame);
961 /* Scale back to original (lower) amplitude */
962 for (i=0;i<2*N;i++)
963 st->frame[i] = PSHR16(st->frame[i], st->frame_shift);
964
965 /*FIXME: This *will* not work for fixed-point */
966#ifndef FIXED_POINT
967 if (st->agc_enabled)
968 {
969 float max_sample=0;
970 for (i=0;i<2*N;i++)
971 if (fabs(st->frame[i])>max_sample)
972 max_sample = fabs(st->frame[i]);
973 if (max_sample>28000.f)
974 {
975 float damp = 28000.f/max_sample;
976 for (i=0;i<2*N;i++)
977 st->frame[i] *= damp;
978 }
979 }
980#endif
981
982 /* Synthesis window (for WOLA) */
983 for (i=0;i<2*N;i++)
984 st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
985
986 /* Perform overlap and add */
987 for (i=0;i<N3;i++)
988 x[i] = st->outbuf[i] + st->frame[i];
989 for (i=0;i<N4;i++)
990 x[N3+i] = st->frame[N3+i];
991
992 /* Update outbuf */
993 for (i=0;i<N3;i++)
994 st->outbuf[i] = st->frame[st->frame_size+i];
995
996 /* FIXME: This VAD is a kludge */
997 if (st->vad_enabled)
998 {
999 if (Pframe > st->speech_prob_start || (st->was_speech && Pframe > st->speech_prob_continue))
1000 {
1001 st->was_speech=1;
1002 return 1;
1003 } else
1004 {
1005 st->was_speech=0;
1006 return 0;
1007 }
1008 } else {
1009 return 1;
1010 }
1011}
1012
1013void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x)
1014{
1015 int i;
1016 int N = st->ps_size;
1017 int N3 = 2*N - st->frame_size;
1018 int M;
1019 spx_word32_t *ps=st->ps;
1020
1021 M = st->nbands;
1022 st->min_count++;
1023
1024 preprocess_analysis(st, x);
1025
1026 update_noise_prob(st);
1027
1028 for (i=1;i<N-1;i++)
1029 {
1030 if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT))
1031 {
1032 st->noise[i] = MULT16_32_Q15(QCONST16(.95f,15),st->noise[i]) + MULT16_32_Q15(QCONST16(.05f,15),SHL32(st->ps[i],NOISE_SHIFT));
1033 }
1034 }
1035
1036 for (i=0;i<N3;i++)
1037 st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]);
1038
1039 /* Save old power spectrum */
1040 for (i=0;i<N+M;i++)
1041 st->old_ps[i] = ps[i];
1042
1043 for (i=0;i<N;i++)
1044 st->reverb_estimate[i] = MULT16_32_Q15(st->reverb_decay, st->reverb_estimate[i]);
1045}
1046
1047
1048int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr)
1049{
1050 int i;
1051 SpeexPreprocessState *st;
1052 st=(SpeexPreprocessState*)state;
1053 switch(request)
1054 {
1055 case SPEEX_PREPROCESS_SET_DENOISE:
1056 st->denoise_enabled = (*(spx_int32_t*)ptr);
1057 break;
1058 case SPEEX_PREPROCESS_GET_DENOISE:
1059 (*(spx_int32_t*)ptr) = st->denoise_enabled;
1060 break;
1061#ifndef FIXED_POINT
1062 case SPEEX_PREPROCESS_SET_AGC:
1063 st->agc_enabled = (*(spx_int32_t*)ptr);
1064 break;
1065 case SPEEX_PREPROCESS_GET_AGC:
1066 (*(spx_int32_t*)ptr) = st->agc_enabled;
1067 break;
1068#ifndef DISABLE_FLOAT_API
1069 case SPEEX_PREPROCESS_SET_AGC_LEVEL:
1070 st->agc_level = (*(float*)ptr);
1071 if (st->agc_level<1)
1072 st->agc_level=1;
1073 if (st->agc_level>32768)
1074 st->agc_level=32768;
1075 break;
1076 case SPEEX_PREPROCESS_GET_AGC_LEVEL:
1077 (*(float*)ptr) = st->agc_level;
1078 break;
1079#endif /* #ifndef DISABLE_FLOAT_API */
1080 case SPEEX_PREPROCESS_SET_AGC_INCREMENT:
1081 st->max_increase_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);
1082 break;
1083 case SPEEX_PREPROCESS_GET_AGC_INCREMENT:
1084 (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_increase_step)*st->sampling_rate/st->frame_size);
1085 break;
1086 case SPEEX_PREPROCESS_SET_AGC_DECREMENT:
1087 st->max_decrease_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);
1088 break;
1089 case SPEEX_PREPROCESS_GET_AGC_DECREMENT:
1090 (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_decrease_step)*st->sampling_rate/st->frame_size);
1091 break;
1092 case SPEEX_PREPROCESS_SET_AGC_MAX_GAIN:
1093 st->max_gain = exp(0.11513f * (*(spx_int32_t*)ptr));
1094 break;
1095 case SPEEX_PREPROCESS_GET_AGC_MAX_GAIN:
1096 (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_gain));
1097 break;
1098#endif
1099 case SPEEX_PREPROCESS_SET_VAD:
1100 speex_warning("The VAD has been replaced by a hack pending a complete rewrite");
1101 st->vad_enabled = (*(spx_int32_t*)ptr);
1102 break;
1103 case SPEEX_PREPROCESS_GET_VAD:
1104 (*(spx_int32_t*)ptr) = st->vad_enabled;
1105 break;
1106
1107 case SPEEX_PREPROCESS_SET_DEREVERB:
1108 st->dereverb_enabled = (*(spx_int32_t*)ptr);
1109 for (i=0;i<st->ps_size;i++)
1110 st->reverb_estimate[i]=0;
1111 break;
1112 case SPEEX_PREPROCESS_GET_DEREVERB:
1113 (*(spx_int32_t*)ptr) = st->dereverb_enabled;
1114 break;
1115
1116 case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL:
1117 /* FIXME: Re-enable when de-reverberation is actually enabled again */
1118 /*st->reverb_level = (*(float*)ptr);*/
1119 break;
1120 case SPEEX_PREPROCESS_GET_DEREVERB_LEVEL:
1121 /* FIXME: Re-enable when de-reverberation is actually enabled again */
1122 /*(*(float*)ptr) = st->reverb_level;*/
1123 break;
1124
1125 case SPEEX_PREPROCESS_SET_DEREVERB_DECAY:
1126 /* FIXME: Re-enable when de-reverberation is actually enabled again */
1127 /*st->reverb_decay = (*(float*)ptr);*/
1128 break;
1129 case SPEEX_PREPROCESS_GET_DEREVERB_DECAY:
1130 /* FIXME: Re-enable when de-reverberation is actually enabled again */
1131 /*(*(float*)ptr) = st->reverb_decay;*/
1132 break;
1133
1134 case SPEEX_PREPROCESS_SET_PROB_START:
1135 *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr));
1136 st->speech_prob_start = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100);
1137 break;
1138 case SPEEX_PREPROCESS_GET_PROB_START:
1139 (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_start, 100);
1140 break;
1141
1142 case SPEEX_PREPROCESS_SET_PROB_CONTINUE:
1143 *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr));
1144 st->speech_prob_continue = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100);
1145 break;
1146 case SPEEX_PREPROCESS_GET_PROB_CONTINUE:
1147 (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_continue, 100);
1148 break;
1149
1150 case SPEEX_PREPROCESS_SET_NOISE_SUPPRESS:
1151 st->noise_suppress = -ABS(*(spx_int32_t*)ptr);
1152 break;
1153 case SPEEX_PREPROCESS_GET_NOISE_SUPPRESS:
1154 (*(spx_int32_t*)ptr) = st->noise_suppress;
1155 break;
1156 case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS:
1157 st->echo_suppress = -ABS(*(spx_int32_t*)ptr);
1158 break;
1159 case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS:
1160 (*(spx_int32_t*)ptr) = st->echo_suppress;
1161 break;
1162 case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE:
1163 st->echo_suppress_active = -ABS(*(spx_int32_t*)ptr);
1164 break;
1165 case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE:
1166 (*(spx_int32_t*)ptr) = st->echo_suppress_active;
1167 break;
1168 case SPEEX_PREPROCESS_SET_ECHO_STATE:
1169 st->echo_state = (SpeexEchoState*)ptr;
1170 break;
1171 case SPEEX_PREPROCESS_GET_ECHO_STATE:
1172 ptr = (void*)st->echo_state;
1173 break;
1174#ifndef FIXED_POINT
1175 case SPEEX_PREPROCESS_GET_AGC_LOUDNESS:
1176 (*(spx_int32_t*)ptr) = pow(st->loudness, 1.0/LOUDNESS_EXP);
1177 break;
1178#endif
1179
1180 default:
1181 speex_warning_int("Unknown speex_preprocess_ctl request: ", request);
1182 return -1;
1183 }
1184 return 0;
1185}