diff options
author | Solomon Peachy <pizza@shaftnet.org> | 2024-05-08 10:36:38 -0400 |
---|---|---|
committer | Solomon Peachy <pizza@shaftnet.org> | 2024-06-20 07:08:35 -0400 |
commit | 547b6a570dbad844e79b4ba5eb934f043bab6318 (patch) | |
tree | 0cbdb670d73a2544d33985166c5abfa69e20a590 /lib/rbcodec/codecs/libspeex/preprocess.c | |
parent | 8ef20383b1e5025f7724e750832de6e28e50680d (diff) | |
download | rockbox-547b6a570dbad844e79b4ba5eb934f043bab6318.tar.gz rockbox-547b6a570dbad844e79b4ba5eb934f043bab6318.zip |
codecs: Update libspeex from 1.2beta3 to 1.2rc1
This is a relatively minor bump, but it's the first step towards
bringing this current.
Change-Id: Iab6c9b0c77f0ba705280434ea74b513364719499
Diffstat (limited to 'lib/rbcodec/codecs/libspeex/preprocess.c')
-rw-r--r-- | lib/rbcodec/codecs/libspeex/preprocess.c | 202 |
1 files changed, 118 insertions, 84 deletions
diff --git a/lib/rbcodec/codecs/libspeex/preprocess.c b/lib/rbcodec/codecs/libspeex/preprocess.c index 07a2ad3479..58d213a0a9 100644 --- a/lib/rbcodec/codecs/libspeex/preprocess.c +++ b/lib/rbcodec/codecs/libspeex/preprocess.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* Copyright (C) 2003 Epic Games (written by Jean-Marc Valin) | 1 | /* Copyright (C) 2003 Epic Games (written by Jean-Marc Valin) |
2 | Copyright (C) 2004-2006 Epic Games | 2 | Copyright (C) 2004-2006 Epic Games |
3 | 3 | ||
4 | File: preprocess.c | 4 | File: preprocess.c |
5 | Preprocessor with denoising based on the algorithm by Ephraim and Malah | 5 | Preprocessor with denoising based on the algorithm by Ephraim and Malah |
6 | 6 | ||
@@ -34,24 +34,24 @@ | |||
34 | 34 | ||
35 | /* | 35 | /* |
36 | Recommended papers: | 36 | Recommended papers: |
37 | 37 | ||
38 | Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error | 38 | Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error |
39 | short-time spectral amplitude estimator". IEEE Transactions on Acoustics, | 39 | short-time spectral amplitude estimator". IEEE Transactions on Acoustics, |
40 | Speech and Signal Processing, vol. ASSP-32, no. 6, pp. 1109-1121, 1984. | 40 | Speech and Signal Processing, vol. ASSP-32, no. 6, pp. 1109-1121, 1984. |
41 | 41 | ||
42 | Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error | 42 | Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error |
43 | log-spectral amplitude estimator". IEEE Transactions on Acoustics, Speech and | 43 | log-spectral amplitude estimator". IEEE Transactions on Acoustics, Speech and |
44 | Signal Processing, vol. ASSP-33, no. 2, pp. 443-445, 1985. | 44 | Signal Processing, vol. ASSP-33, no. 2, pp. 443-445, 1985. |
45 | 45 | ||
46 | I. Cohen and B. Berdugo, "Speech enhancement for non-stationary noise environments". | 46 | I. Cohen and B. Berdugo, "Speech enhancement for non-stationary noise environments". |
47 | Signal Processing, vol. 81, no. 2, pp. 2403-2418, 2001. | 47 | Signal Processing, vol. 81, no. 2, pp. 2403-2418, 2001. |
48 | 48 | ||
49 | Stefan Gustafsson, Rainer Martin, Peter Jax, and Peter Vary. "A psychoacoustic | 49 | Stefan Gustafsson, Rainer Martin, Peter Jax, and Peter Vary. "A psychoacoustic |
50 | approach to combined acoustic echo cancellation and noise reduction". IEEE | 50 | approach to combined acoustic echo cancellation and noise reduction". IEEE |
51 | Transactions on Speech and Audio Processing, 2002. | 51 | Transactions on Speech and Audio Processing, 2002. |
52 | 52 | ||
53 | J.-M. Valin, J. Rouat, and F. Michaud, "Microphone array post-filter for separation | 53 | J.-M. Valin, J. Rouat, and F. Michaud, "Microphone array post-filter for separation |
54 | of simultaneous non-stationary sources". In Proceedings IEEE International | 54 | of simultaneous non-stationary sources". In Proceedings IEEE International |
55 | Conference on Acoustics, Speech, and Signal Processing, 2004. | 55 | Conference on Acoustics, Speech, and Signal Processing, 2004. |
56 | */ | 56 | */ |
57 | 57 | ||
@@ -75,7 +75,7 @@ | |||
75 | #define LOUDNESS_EXP 5.f | 75 | #define LOUDNESS_EXP 5.f |
76 | #define AMP_SCALE .001f | 76 | #define AMP_SCALE .001f |
77 | #define AMP_SCALE_1 1000.f | 77 | #define AMP_SCALE_1 1000.f |
78 | 78 | ||
79 | #define NB_BANDS 24 | 79 | #define NB_BANDS 24 |
80 | 80 | ||
81 | #define SPEECH_PROB_START_DEFAULT QCONST16(0.35f,15) | 81 | #define SPEECH_PROB_START_DEFAULT QCONST16(0.35f,15) |
@@ -117,7 +117,7 @@ static inline spx_word16_t DIV32_16_Q8(spx_word32_t a, spx_word32_t b) | |||
117 | a = SHL32(a,8); | 117 | a = SHL32(a,8); |
118 | return PDIV32_16(a,b); | 118 | return PDIV32_16(a,b); |
119 | } | 119 | } |
120 | 120 | ||
121 | } | 121 | } |
122 | static inline spx_word16_t DIV32_16_Q15(spx_word32_t a, spx_word32_t b) | 122 | static inline spx_word16_t DIV32_16_Q15(spx_word32_t a, spx_word32_t b) |
123 | { | 123 | { |
@@ -185,7 +185,7 @@ struct SpeexPreprocessState_ { | |||
185 | int sampling_rate; /**< Sampling rate of the input/output */ | 185 | int sampling_rate; /**< Sampling rate of the input/output */ |
186 | int nbands; | 186 | int nbands; |
187 | FilterBank *bank; | 187 | FilterBank *bank; |
188 | 188 | ||
189 | /* Parameters */ | 189 | /* Parameters */ |
190 | int denoise_enabled; | 190 | int denoise_enabled; |
191 | int vad_enabled; | 191 | int vad_enabled; |
@@ -198,7 +198,9 @@ struct SpeexPreprocessState_ { | |||
198 | int echo_suppress; | 198 | int echo_suppress; |
199 | int echo_suppress_active; | 199 | int echo_suppress_active; |
200 | SpeexEchoState *echo_state; | 200 | SpeexEchoState *echo_state; |
201 | 201 | ||
202 | spx_word16_t speech_prob; /**< Probability last frame was speech */ | ||
203 | |||
202 | /* DSP-related arrays */ | 204 | /* DSP-related arrays */ |
203 | spx_word16_t *frame; /**< Processing frame (2*ps_size) */ | 205 | spx_word16_t *frame; /**< Processing frame (2*ps_size) */ |
204 | spx_word16_t *ft; /**< Processing frame in freq domain (2*ps_size) */ | 206 | spx_word16_t *ft; /**< Processing frame in freq domain (2*ps_size) */ |
@@ -234,7 +236,6 @@ struct SpeexPreprocessState_ { | |||
234 | float *loudness_weight; /**< Perceptual loudness curve */ | 236 | float *loudness_weight; /**< Perceptual loudness curve */ |
235 | float loudness; /**< Loudness estimate */ | 237 | float loudness; /**< Loudness estimate */ |
236 | float agc_gain; /**< Current AGC gain */ | 238 | float agc_gain; /**< Current AGC gain */ |
237 | int nb_loudness_adapt; /**< Number of frames used for loudness adaptation so far */ | ||
238 | float max_gain; /**< Maximum gain allowed */ | 239 | float max_gain; /**< Maximum gain allowed */ |
239 | float max_increase_step; /**< Maximum increase in gain from one frame to another */ | 240 | float max_increase_step; /**< Maximum increase in gain from one frame to another */ |
240 | float max_decrease_step; /**< Maximum decrease in gain from one frame to another */ | 241 | float max_decrease_step; /**< Maximum decrease in gain from one frame to another */ |
@@ -259,7 +260,7 @@ static void conj_window(spx_word16_t *w, int len) | |||
259 | spx_word16_t tmp; | 260 | spx_word16_t tmp; |
260 | #ifdef FIXED_POINT | 261 | #ifdef FIXED_POINT |
261 | spx_word16_t x = DIV32_16(MULT16_16(32767,i),len); | 262 | spx_word16_t x = DIV32_16(MULT16_16(32767,i),len); |
262 | #else | 263 | #else |
263 | spx_word16_t x = DIV32_16(MULT16_16(QCONST16(4.f,13),i),len); | 264 | spx_word16_t x = DIV32_16(MULT16_16(QCONST16(4.f,13),i),len); |
264 | #endif | 265 | #endif |
265 | int inv=0; | 266 | int inv=0; |
@@ -284,10 +285,10 @@ static void conj_window(spx_word16_t *w, int len) | |||
284 | } | 285 | } |
285 | } | 286 | } |
286 | 287 | ||
287 | 288 | ||
288 | #ifdef FIXED_POINT | 289 | #ifdef FIXED_POINT |
289 | /* This function approximates the gain function | 290 | /* This function approximates the gain function |
290 | y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x) | 291 | y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x) |
291 | which multiplied by xi/(1+xi) is the optimal gain | 292 | which multiplied by xi/(1+xi) is the optimal gain |
292 | in the loudness domain ( sqrt[amplitude] ) | 293 | in the loudness domain ( sqrt[amplitude] ) |
293 | Input in Q11 format, output in Q15 | 294 | Input in Q11 format, output in Q15 |
@@ -320,7 +321,7 @@ static inline spx_word16_t qcurve(spx_word16_t x) | |||
320 | static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len) | 321 | static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len) |
321 | { | 322 | { |
322 | int i; | 323 | int i; |
323 | 324 | ||
324 | if (noise_suppress > effective_echo_suppress) | 325 | if (noise_suppress > effective_echo_suppress) |
325 | { | 326 | { |
326 | spx_word16_t noise_gain, gain_ratio; | 327 | spx_word16_t noise_gain, gain_ratio; |
@@ -346,8 +347,8 @@ static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, | |||
346 | } | 347 | } |
347 | 348 | ||
348 | #else | 349 | #else |
349 | /* This function approximates the gain function | 350 | /* This function approximates the gain function |
350 | y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x) | 351 | y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x) |
351 | which multiplied by xi/(1+xi) is the optimal gain | 352 | which multiplied by xi/(1+xi) is the optimal gain |
352 | in the loudness domain ( sqrt[amplitude] ) | 353 | in the loudness domain ( sqrt[amplitude] ) |
353 | */ | 354 | */ |
@@ -391,7 +392,7 @@ static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, | |||
391 | } | 392 | } |
392 | 393 | ||
393 | #endif | 394 | #endif |
394 | SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate) | 395 | EXPORT SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate) |
395 | { | 396 | { |
396 | int i; | 397 | int i; |
397 | int N, N3, N4, M; | 398 | int N, N3, N4, M; |
@@ -413,8 +414,8 @@ SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_r | |||
413 | break; | 414 | break; |
414 | } | 415 | } |
415 | } | 416 | } |
416 | 417 | ||
417 | 418 | ||
418 | if (st->ps_size < 3*st->frame_size/4) | 419 | if (st->ps_size < 3*st->frame_size/4) |
419 | st->ps_size = st->ps_size * 3 / 2; | 420 | st->ps_size = st->ps_size * 3 / 2; |
420 | #else | 421 | #else |
@@ -424,7 +425,7 @@ SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_r | |||
424 | N = st->ps_size; | 425 | N = st->ps_size; |
425 | N3 = 2*N - st->frame_size; | 426 | N3 = 2*N - st->frame_size; |
426 | N4 = st->frame_size - N3; | 427 | N4 = st->frame_size - N3; |
427 | 428 | ||
428 | st->sampling_rate = sampling_rate; | 429 | st->sampling_rate = sampling_rate; |
429 | st->denoise_enabled = 1; | 430 | st->denoise_enabled = 1; |
430 | st->vad_enabled = 0; | 431 | st->vad_enabled = 0; |
@@ -439,15 +440,15 @@ SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_r | |||
439 | st->speech_prob_continue = SPEECH_PROB_CONTINUE_DEFAULT; | 440 | st->speech_prob_continue = SPEECH_PROB_CONTINUE_DEFAULT; |
440 | 441 | ||
441 | st->echo_state = NULL; | 442 | st->echo_state = NULL; |
442 | 443 | ||
443 | st->nbands = NB_BANDS; | 444 | st->nbands = NB_BANDS; |
444 | M = st->nbands; | 445 | M = st->nbands; |
445 | st->bank = filterbank_new(M, sampling_rate, N, 1); | 446 | st->bank = filterbank_new(M, sampling_rate, N, 1); |
446 | 447 | ||
447 | st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); | 448 | st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); |
448 | st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); | 449 | st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); |
449 | st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); | 450 | st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); |
450 | 451 | ||
451 | st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); | 452 | st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); |
452 | st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); | 453 | st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); |
453 | st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); | 454 | st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); |
@@ -460,19 +461,19 @@ SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_r | |||
460 | st->gain2 = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); | 461 | st->gain2 = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); |
461 | st->gain_floor = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); | 462 | st->gain_floor = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); |
462 | st->zeta = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); | 463 | st->zeta = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); |
463 | 464 | ||
464 | st->S = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); | 465 | st->S = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); |
465 | st->Smin = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); | 466 | st->Smin = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); |
466 | st->Stmp = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); | 467 | st->Stmp = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); |
467 | st->update_prob = (int*)speex_alloc(N*sizeof(int)); | 468 | st->update_prob = (int*)speex_alloc(N*sizeof(int)); |
468 | 469 | ||
469 | st->inbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t)); | 470 | st->inbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t)); |
470 | st->outbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t)); | 471 | st->outbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t)); |
471 | 472 | ||
472 | conj_window(st->window, 2*N3); | 473 | conj_window(st->window, 2*N3); |
473 | for (i=2*N3;i<2*st->ps_size;i++) | 474 | for (i=2*N3;i<2*st->ps_size;i++) |
474 | st->window[i]=Q15_ONE; | 475 | st->window[i]=Q15_ONE; |
475 | 476 | ||
476 | if (N4>0) | 477 | if (N4>0) |
477 | { | 478 | { |
478 | for (i=N3-1;i>=0;i--) | 479 | for (i=N3-1;i>=0;i--) |
@@ -514,7 +515,6 @@ SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_r | |||
514 | /*st->loudness = pow(AMP_SCALE*st->agc_level,LOUDNESS_EXP);*/ | 515 | /*st->loudness = pow(AMP_SCALE*st->agc_level,LOUDNESS_EXP);*/ |
515 | st->loudness = 1e-15; | 516 | st->loudness = 1e-15; |
516 | st->agc_gain = 1; | 517 | st->agc_gain = 1; |
517 | st->nb_loudness_adapt = 0; | ||
518 | st->max_gain = 30; | 518 | st->max_gain = 30; |
519 | st->max_increase_step = exp(0.11513f * 12.*st->frame_size / st->sampling_rate); | 519 | st->max_increase_step = exp(0.11513f * 12.*st->frame_size / st->sampling_rate); |
520 | st->max_decrease_step = exp(-0.11513f * 40.*st->frame_size / st->sampling_rate); | 520 | st->max_decrease_step = exp(-0.11513f * 40.*st->frame_size / st->sampling_rate); |
@@ -530,7 +530,7 @@ SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_r | |||
530 | return st; | 530 | return st; |
531 | } | 531 | } |
532 | 532 | ||
533 | void speex_preprocess_state_destroy(SpeexPreprocessState *st) | 533 | EXPORT void speex_preprocess_state_destroy(SpeexPreprocessState *st) |
534 | { | 534 | { |
535 | speex_free(st->frame); | 535 | speex_free(st->frame); |
536 | speex_free(st->ft); | 536 | speex_free(st->ft); |
@@ -573,7 +573,7 @@ static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx | |||
573 | float target_gain; | 573 | float target_gain; |
574 | float loudness=1.f; | 574 | float loudness=1.f; |
575 | float rate; | 575 | float rate; |
576 | 576 | ||
577 | for (i=2;i<N;i++) | 577 | for (i=2;i<N;i++) |
578 | { | 578 | { |
579 | loudness += 2.f*N*st->ps[i]* st->loudness_weight[i]; | 579 | loudness += 2.f*N*st->ps[i]* st->loudness_weight[i]; |
@@ -583,7 +583,6 @@ static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx | |||
583 | loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/ | 583 | loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/ |
584 | if (Pframe>.3f) | 584 | if (Pframe>.3f) |
585 | { | 585 | { |
586 | st->nb_loudness_adapt++; | ||
587 | /*rate=2.0f*Pframe*Pframe/(1+st->nb_loudness_adapt);*/ | 586 | /*rate=2.0f*Pframe*Pframe/(1+st->nb_loudness_adapt);*/ |
588 | rate = .03*Pframe*Pframe; | 587 | rate = .03*Pframe*Pframe; |
589 | st->loudness = (1-rate)*st->loudness + (rate)*pow(AMP_SCALE*loudness, LOUDNESS_EXP); | 588 | st->loudness = (1-rate)*st->loudness + (rate)*pow(AMP_SCALE*loudness, LOUDNESS_EXP); |
@@ -592,7 +591,7 @@ static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx | |||
592 | st->init_max *= 1.f + .1f*Pframe*Pframe; | 591 | st->init_max *= 1.f + .1f*Pframe*Pframe; |
593 | } | 592 | } |
594 | /*printf ("%f %f %f %f\n", Pframe, loudness, pow(st->loudness, 1.0f/LOUDNESS_EXP), st->loudness2);*/ | 593 | /*printf ("%f %f %f %f\n", Pframe, loudness, pow(st->loudness, 1.0f/LOUDNESS_EXP), st->loudness2);*/ |
595 | 594 | ||
596 | target_gain = AMP_SCALE*st->agc_level*pow(st->loudness/(1e-4+st->loudness_accum), -1.0f/LOUDNESS_EXP); | 595 | target_gain = AMP_SCALE*st->agc_level*pow(st->loudness/(1e-4+st->loudness_accum), -1.0f/LOUDNESS_EXP); |
597 | 596 | ||
598 | if ((Pframe>.5 && st->nb_adapt > 20) || target_gain < st->agc_gain) | 597 | if ((Pframe>.5 && st->nb_adapt > 20) || target_gain < st->agc_gain) |
@@ -605,11 +604,11 @@ static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx | |||
605 | target_gain = st->max_gain; | 604 | target_gain = st->max_gain; |
606 | if (target_gain > st->init_max) | 605 | if (target_gain > st->init_max) |
607 | target_gain = st->init_max; | 606 | target_gain = st->init_max; |
608 | 607 | ||
609 | st->agc_gain = target_gain; | 608 | st->agc_gain = target_gain; |
610 | } | 609 | } |
611 | /*fprintf (stderr, "%f %f %f\n", loudness, (float)AMP_SCALE_1*pow(st->loudness, 1.0f/LOUDNESS_EXP), st->agc_gain);*/ | 610 | /*fprintf (stderr, "%f %f %f\n", loudness, (float)AMP_SCALE_1*pow(st->loudness, 1.0f/LOUDNESS_EXP), st->agc_gain);*/ |
612 | 611 | ||
613 | for (i=0;i<2*N;i++) | 612 | for (i=0;i<2*N;i++) |
614 | ft[i] *= st->agc_gain; | 613 | ft[i] *= st->agc_gain; |
615 | st->prev_loudness = loudness; | 614 | st->prev_loudness = loudness; |
@@ -629,7 +628,7 @@ static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x) | |||
629 | st->frame[i]=st->inbuf[i]; | 628 | st->frame[i]=st->inbuf[i]; |
630 | for (i=0;i<st->frame_size;i++) | 629 | for (i=0;i<st->frame_size;i++) |
631 | st->frame[N3+i]=x[i]; | 630 | st->frame[N3+i]=x[i]; |
632 | 631 | ||
633 | /* Update inbuf */ | 632 | /* Update inbuf */ |
634 | for (i=0;i<N3;i++) | 633 | for (i=0;i<N3;i++) |
635 | st->inbuf[i]=x[N4+i]; | 634 | st->inbuf[i]=x[N4+i]; |
@@ -648,10 +647,10 @@ static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x) | |||
648 | st->frame[i] = SHL16(st->frame[i], st->frame_shift); | 647 | st->frame[i] = SHL16(st->frame[i], st->frame_shift); |
649 | } | 648 | } |
650 | #endif | 649 | #endif |
651 | 650 | ||
652 | /* Perform FFT */ | 651 | /* Perform FFT */ |
653 | spx_fft(st->fft_lookup, st->frame, st->ft); | 652 | spx_fft(st->fft_lookup, st->frame, st->ft); |
654 | 653 | ||
655 | /* Power spectrum */ | 654 | /* Power spectrum */ |
656 | ps[0]=MULT16_16(st->ft[0],st->ft[0]); | 655 | ps[0]=MULT16_16(st->ft[0],st->ft[0]); |
657 | for (i=1;i<N;i++) | 656 | for (i=1;i<N;i++) |
@@ -669,11 +668,11 @@ static void update_noise_prob(SpeexPreprocessState *st) | |||
669 | int N = st->ps_size; | 668 | int N = st->ps_size; |
670 | 669 | ||
671 | for (i=1;i<N-1;i++) | 670 | for (i=1;i<N-1;i++) |
672 | st->S[i] = MULT16_32_Q15(QCONST16(.8f,15),st->S[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i-1]) | 671 | st->S[i] = MULT16_32_Q15(QCONST16(.8f,15),st->S[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i-1]) |
673 | + MULT16_32_Q15(QCONST16(.1f,15),st->ps[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i+1]); | 672 | + MULT16_32_Q15(QCONST16(.1f,15),st->ps[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i+1]); |
674 | st->S[0] = MULT16_32_Q15(QCONST16(.8f,15),st->S[0]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[0]); | 673 | st->S[0] = MULT16_32_Q15(QCONST16(.8f,15),st->S[0]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[0]); |
675 | st->S[N-1] = MULT16_32_Q15(QCONST16(.8f,15),st->S[N-1]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[N-1]); | 674 | st->S[N-1] = MULT16_32_Q15(QCONST16(.8f,15),st->S[N-1]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[N-1]); |
676 | 675 | ||
677 | if (st->nb_adapt==1) | 676 | if (st->nb_adapt==1) |
678 | { | 677 | { |
679 | for (i=0;i<N;i++) | 678 | for (i=0;i<N;i++) |
@@ -700,12 +699,12 @@ static void update_noise_prob(SpeexPreprocessState *st) | |||
700 | for (i=0;i<N;i++) | 699 | for (i=0;i<N;i++) |
701 | { | 700 | { |
702 | st->Smin[i] = MIN32(st->Smin[i], st->S[i]); | 701 | st->Smin[i] = MIN32(st->Smin[i], st->S[i]); |
703 | st->Stmp[i] = MIN32(st->Stmp[i], st->S[i]); | 702 | st->Stmp[i] = MIN32(st->Stmp[i], st->S[i]); |
704 | } | 703 | } |
705 | } | 704 | } |
706 | for (i=0;i<N;i++) | 705 | for (i=0;i<N;i++) |
707 | { | 706 | { |
708 | if (MULT16_32_Q15(QCONST16(.4f,15),st->S[i]) > ADD32(st->Smin[i],EXTEND32(20))) | 707 | if (MULT16_32_Q15(QCONST16(.4f,15),st->S[i]) > st->Smin[i]) |
709 | st->update_prob[i] = 1; | 708 | st->update_prob[i] = 1; |
710 | else | 709 | else |
711 | st->update_prob[i] = 0; | 710 | st->update_prob[i] = 0; |
@@ -719,12 +718,12 @@ static void update_noise_prob(SpeexPreprocessState *st) | |||
719 | 718 | ||
720 | void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len); | 719 | void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len); |
721 | 720 | ||
722 | int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo) | 721 | EXPORT int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo) |
723 | { | 722 | { |
724 | return speex_preprocess_run(st, x); | 723 | return speex_preprocess_run(st, x); |
725 | } | 724 | } |
726 | 725 | ||
727 | int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) | 726 | EXPORT int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) |
728 | { | 727 | { |
729 | int i; | 728 | int i; |
730 | int M; | 729 | int M; |
@@ -736,12 +735,12 @@ int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) | |||
736 | spx_word16_t Pframe; | 735 | spx_word16_t Pframe; |
737 | spx_word16_t beta, beta_1; | 736 | spx_word16_t beta, beta_1; |
738 | spx_word16_t effective_echo_suppress; | 737 | spx_word16_t effective_echo_suppress; |
739 | 738 | ||
740 | st->nb_adapt++; | 739 | st->nb_adapt++; |
741 | if (st->nb_adapt>20000) | 740 | if (st->nb_adapt>20000) |
742 | st->nb_adapt = 20000; | 741 | st->nb_adapt = 20000; |
743 | st->min_count++; | 742 | st->min_count++; |
744 | 743 | ||
745 | beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt)); | 744 | beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt)); |
746 | beta_1 = Q15_ONE-beta; | 745 | beta_1 = Q15_ONE-beta; |
747 | M = st->nbands; | 746 | M = st->nbands; |
@@ -775,7 +774,7 @@ int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) | |||
775 | st->update_prob[i] = 0; | 774 | st->update_prob[i] = 0; |
776 | } | 775 | } |
777 | */ | 776 | */ |
778 | 777 | ||
779 | /* Update the noise estimate for the frequencies where it can be */ | 778 | /* Update the noise estimate for the frequencies where it can be */ |
780 | for (i=0;i<N;i++) | 779 | for (i=0;i<N;i++) |
781 | { | 780 | { |
@@ -793,17 +792,17 @@ int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) | |||
793 | for (i=0;i<N+M;i++) | 792 | for (i=0;i<N+M;i++) |
794 | { | 793 | { |
795 | spx_word16_t gamma; | 794 | spx_word16_t gamma; |
796 | 795 | ||
797 | /* Total noise estimate including residual echo and reverberation */ | 796 | /* Total noise estimate including residual echo and reverberation */ |
798 | spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]); | 797 | spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]); |
799 | 798 | ||
800 | /* A posteriori SNR = ps/noise - 1*/ | 799 | /* A posteriori SNR = ps/noise - 1*/ |
801 | st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT)); | 800 | st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT)); |
802 | st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT)); | 801 | st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT)); |
803 | 802 | ||
804 | /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */ | 803 | /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */ |
805 | gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise)))); | 804 | gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise)))); |
806 | 805 | ||
807 | /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */ | 806 | /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */ |
808 | st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15)); | 807 | st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15)); |
809 | st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT)); | 808 | st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT)); |
@@ -824,13 +823,13 @@ int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) | |||
824 | for (i=N;i<N+M;i++) | 823 | for (i=N;i<N+M;i++) |
825 | Zframe = ADD32(Zframe, EXTEND32(st->zeta[i])); | 824 | Zframe = ADD32(Zframe, EXTEND32(st->zeta[i])); |
826 | Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands))); | 825 | Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands))); |
827 | 826 | ||
828 | effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15)); | 827 | effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15)); |
829 | 828 | ||
830 | compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M); | 829 | compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M); |
831 | 830 | ||
832 | /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale) | 831 | /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale) |
833 | Technically this is actually wrong because the EM gaim assumes a slightly different probability | 832 | Technically this is actually wrong because the EM gaim assumes a slightly different probability |
834 | distribution */ | 833 | distribution */ |
835 | for (i=N;i<N+M;i++) | 834 | for (i=N;i<N+M;i++) |
836 | { | 835 | { |
@@ -847,7 +846,7 @@ int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) | |||
847 | #ifdef FIXED_POINT | 846 | #ifdef FIXED_POINT |
848 | spx_word16_t tmp; | 847 | spx_word16_t tmp; |
849 | #endif | 848 | #endif |
850 | 849 | ||
851 | prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT))); | 850 | prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT))); |
852 | theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT)); | 851 | theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT)); |
853 | 852 | ||
@@ -872,12 +871,12 @@ int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) | |||
872 | /* Convert the EM gains and speech prob to linear frequency */ | 871 | /* Convert the EM gains and speech prob to linear frequency */ |
873 | filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2); | 872 | filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2); |
874 | filterbank_compute_psd16(st->bank,st->gain+N, st->gain); | 873 | filterbank_compute_psd16(st->bank,st->gain+N, st->gain); |
875 | 874 | ||
876 | /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */ | 875 | /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */ |
877 | if (1) | 876 | if (1) |
878 | { | 877 | { |
879 | filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor); | 878 | filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor); |
880 | 879 | ||
881 | /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */ | 880 | /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */ |
882 | for (i=0;i<N;i++) | 881 | for (i=0;i<N;i++) |
883 | { | 882 | { |
@@ -887,7 +886,7 @@ int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) | |||
887 | spx_word16_t tmp; | 886 | spx_word16_t tmp; |
888 | spx_word16_t p; | 887 | spx_word16_t p; |
889 | spx_word16_t g; | 888 | spx_word16_t g; |
890 | 889 | ||
891 | /* Wiener filter gain */ | 890 | /* Wiener filter gain */ |
892 | prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT))); | 891 | prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT))); |
893 | theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT)); | 892 | theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT)); |
@@ -898,22 +897,22 @@ int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) | |||
898 | g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM))); | 897 | g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM))); |
899 | /* Interpolated speech probability of presence */ | 898 | /* Interpolated speech probability of presence */ |
900 | p = st->gain2[i]; | 899 | p = st->gain2[i]; |
901 | 900 | ||
902 | /* Constrain the gain to be close to the Bark scale gain */ | 901 | /* Constrain the gain to be close to the Bark scale gain */ |
903 | if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i]) | 902 | if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i]) |
904 | g = MULT16_16(3,st->gain[i]); | 903 | g = MULT16_16(3,st->gain[i]); |
905 | st->gain[i] = g; | 904 | st->gain[i] = g; |
906 | 905 | ||
907 | /* Save old power spectrum */ | 906 | /* Save old power spectrum */ |
908 | st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]); | 907 | st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]); |
909 | 908 | ||
910 | /* Apply gain floor */ | 909 | /* Apply gain floor */ |
911 | if (st->gain[i] < st->gain_floor[i]) | 910 | if (st->gain[i] < st->gain_floor[i]) |
912 | st->gain[i] = st->gain_floor[i]; | 911 | st->gain[i] = st->gain_floor[i]; |
913 | 912 | ||
914 | /* Exponential decay model for reverberation (unused) */ | 913 | /* Exponential decay model for reverberation (unused) */ |
915 | /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/ | 914 | /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/ |
916 | 915 | ||
917 | /* Take into account speech probability of presence (loudness domain MMSE estimator) */ | 916 | /* Take into account speech probability of presence (loudness domain MMSE estimator) */ |
918 | /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */ | 917 | /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */ |
919 | tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15))); | 918 | tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15))); |
@@ -927,20 +926,20 @@ int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) | |||
927 | { | 926 | { |
928 | spx_word16_t tmp; | 927 | spx_word16_t tmp; |
929 | spx_word16_t p = st->gain2[i]; | 928 | spx_word16_t p = st->gain2[i]; |
930 | st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]); | 929 | st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]); |
931 | tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15))); | 930 | tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15))); |
932 | st->gain2[i]=SQR16_Q15(tmp); | 931 | st->gain2[i]=SQR16_Q15(tmp); |
933 | } | 932 | } |
934 | filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2); | 933 | filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2); |
935 | } | 934 | } |
936 | 935 | ||
937 | /* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */ | 936 | /* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */ |
938 | if (!st->denoise_enabled) | 937 | if (!st->denoise_enabled) |
939 | { | 938 | { |
940 | for (i=0;i<N+M;i++) | 939 | for (i=0;i<N+M;i++) |
941 | st->gain2[i]=Q15_ONE; | 940 | st->gain2[i]=Q15_ONE; |
942 | } | 941 | } |
943 | 942 | ||
944 | /* Apply computed gain */ | 943 | /* Apply computed gain */ |
945 | for (i=1;i<N;i++) | 944 | for (i=1;i<N;i++) |
946 | { | 945 | { |
@@ -949,7 +948,7 @@ int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) | |||
949 | } | 948 | } |
950 | st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]); | 949 | st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]); |
951 | st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]); | 950 | st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]); |
952 | 951 | ||
953 | /*FIXME: This *will* not work for fixed-point */ | 952 | /*FIXME: This *will* not work for fixed-point */ |
954 | #ifndef FIXED_POINT | 953 | #ifndef FIXED_POINT |
955 | if (st->agc_enabled) | 954 | if (st->agc_enabled) |
@@ -978,7 +977,7 @@ int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) | |||
978 | } | 977 | } |
979 | } | 978 | } |
980 | #endif | 979 | #endif |
981 | 980 | ||
982 | /* Synthesis window (for WOLA) */ | 981 | /* Synthesis window (for WOLA) */ |
983 | for (i=0;i<2*N;i++) | 982 | for (i=0;i<2*N;i++) |
984 | st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]); | 983 | st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]); |
@@ -988,15 +987,16 @@ int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) | |||
988 | x[i] = st->outbuf[i] + st->frame[i]; | 987 | x[i] = st->outbuf[i] + st->frame[i]; |
989 | for (i=0;i<N4;i++) | 988 | for (i=0;i<N4;i++) |
990 | x[N3+i] = st->frame[N3+i]; | 989 | x[N3+i] = st->frame[N3+i]; |
991 | 990 | ||
992 | /* Update outbuf */ | 991 | /* Update outbuf */ |
993 | for (i=0;i<N3;i++) | 992 | for (i=0;i<N3;i++) |
994 | st->outbuf[i] = st->frame[st->frame_size+i]; | 993 | st->outbuf[i] = st->frame[st->frame_size+i]; |
995 | 994 | ||
996 | /* FIXME: This VAD is a kludge */ | 995 | /* FIXME: This VAD is a kludge */ |
996 | st->speech_prob = Pframe; | ||
997 | if (st->vad_enabled) | 997 | if (st->vad_enabled) |
998 | { | 998 | { |
999 | if (Pframe > st->speech_prob_start || (st->was_speech && Pframe > st->speech_prob_continue)) | 999 | if (st->speech_prob > st->speech_prob_start || (st->was_speech && st->speech_prob > st->speech_prob_continue)) |
1000 | { | 1000 | { |
1001 | st->was_speech=1; | 1001 | st->was_speech=1; |
1002 | return 1; | 1002 | return 1; |
@@ -1010,7 +1010,7 @@ int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) | |||
1010 | } | 1010 | } |
1011 | } | 1011 | } |
1012 | 1012 | ||
1013 | void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x) | 1013 | EXPORT void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x) |
1014 | { | 1014 | { |
1015 | int i; | 1015 | int i; |
1016 | int N = st->ps_size; | 1016 | int N = st->ps_size; |
@@ -1020,11 +1020,11 @@ void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x) | |||
1020 | 1020 | ||
1021 | M = st->nbands; | 1021 | M = st->nbands; |
1022 | st->min_count++; | 1022 | st->min_count++; |
1023 | 1023 | ||
1024 | preprocess_analysis(st, x); | 1024 | preprocess_analysis(st, x); |
1025 | 1025 | ||
1026 | update_noise_prob(st); | 1026 | update_noise_prob(st); |
1027 | 1027 | ||
1028 | for (i=1;i<N-1;i++) | 1028 | for (i=1;i<N-1;i++) |
1029 | { | 1029 | { |
1030 | if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT)) | 1030 | if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT)) |
@@ -1045,7 +1045,7 @@ void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x) | |||
1045 | } | 1045 | } |
1046 | 1046 | ||
1047 | 1047 | ||
1048 | int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr) | 1048 | EXPORT int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr) |
1049 | { | 1049 | { |
1050 | int i; | 1050 | int i; |
1051 | SpeexPreprocessState *st; | 1051 | SpeexPreprocessState *st; |
@@ -1103,7 +1103,7 @@ int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr) | |||
1103 | case SPEEX_PREPROCESS_GET_VAD: | 1103 | case SPEEX_PREPROCESS_GET_VAD: |
1104 | (*(spx_int32_t*)ptr) = st->vad_enabled; | 1104 | (*(spx_int32_t*)ptr) = st->vad_enabled; |
1105 | break; | 1105 | break; |
1106 | 1106 | ||
1107 | case SPEEX_PREPROCESS_SET_DEREVERB: | 1107 | case SPEEX_PREPROCESS_SET_DEREVERB: |
1108 | st->dereverb_enabled = (*(spx_int32_t*)ptr); | 1108 | st->dereverb_enabled = (*(spx_int32_t*)ptr); |
1109 | for (i=0;i<st->ps_size;i++) | 1109 | for (i=0;i<st->ps_size;i++) |
@@ -1121,7 +1121,7 @@ int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr) | |||
1121 | /* FIXME: Re-enable when de-reverberation is actually enabled again */ | 1121 | /* FIXME: Re-enable when de-reverberation is actually enabled again */ |
1122 | /*(*(float*)ptr) = st->reverb_level;*/ | 1122 | /*(*(float*)ptr) = st->reverb_level;*/ |
1123 | break; | 1123 | break; |
1124 | 1124 | ||
1125 | case SPEEX_PREPROCESS_SET_DEREVERB_DECAY: | 1125 | case SPEEX_PREPROCESS_SET_DEREVERB_DECAY: |
1126 | /* FIXME: Re-enable when de-reverberation is actually enabled again */ | 1126 | /* FIXME: Re-enable when de-reverberation is actually enabled again */ |
1127 | /*st->reverb_decay = (*(float*)ptr);*/ | 1127 | /*st->reverb_decay = (*(float*)ptr);*/ |
@@ -1169,17 +1169,51 @@ int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr) | |||
1169 | st->echo_state = (SpeexEchoState*)ptr; | 1169 | st->echo_state = (SpeexEchoState*)ptr; |
1170 | break; | 1170 | break; |
1171 | case SPEEX_PREPROCESS_GET_ECHO_STATE: | 1171 | case SPEEX_PREPROCESS_GET_ECHO_STATE: |
1172 | ptr = (void*)st->echo_state; | 1172 | (*(SpeexEchoState**)ptr) = (SpeexEchoState*)st->echo_state; |
1173 | break; | 1173 | break; |
1174 | #ifndef FIXED_POINT | 1174 | #ifndef FIXED_POINT |
1175 | case SPEEX_PREPROCESS_GET_AGC_LOUDNESS: | 1175 | case SPEEX_PREPROCESS_GET_AGC_LOUDNESS: |
1176 | (*(spx_int32_t*)ptr) = pow(st->loudness, 1.0/LOUDNESS_EXP); | 1176 | (*(spx_int32_t*)ptr) = pow(st->loudness, 1.0/LOUDNESS_EXP); |
1177 | break; | 1177 | break; |
1178 | case SPEEX_PREPROCESS_GET_AGC_GAIN: | ||
1179 | (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->agc_gain)); | ||
1180 | break; | ||
1181 | #endif | ||
1182 | case SPEEX_PREPROCESS_GET_PSD_SIZE: | ||
1183 | case SPEEX_PREPROCESS_GET_NOISE_PSD_SIZE: | ||
1184 | (*(spx_int32_t*)ptr) = st->ps_size; | ||
1185 | break; | ||
1186 | case SPEEX_PREPROCESS_GET_PSD: | ||
1187 | for(i=0;i<st->ps_size;i++) | ||
1188 | ((spx_int32_t *)ptr)[i] = (spx_int32_t) st->ps[i]; | ||
1189 | break; | ||
1190 | case SPEEX_PREPROCESS_GET_NOISE_PSD: | ||
1191 | for(i=0;i<st->ps_size;i++) | ||
1192 | ((spx_int32_t *)ptr)[i] = (spx_int32_t) PSHR32(st->noise[i], NOISE_SHIFT); | ||
1193 | break; | ||
1194 | case SPEEX_PREPROCESS_GET_PROB: | ||
1195 | (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob, 100); | ||
1196 | break; | ||
1197 | #ifndef FIXED_POINT | ||
1198 | case SPEEX_PREPROCESS_SET_AGC_TARGET: | ||
1199 | st->agc_level = (*(spx_int32_t*)ptr); | ||
1200 | if (st->agc_level<1) | ||
1201 | st->agc_level=1; | ||
1202 | if (st->agc_level>32768) | ||
1203 | st->agc_level=32768; | ||
1204 | break; | ||
1205 | case SPEEX_PREPROCESS_GET_AGC_TARGET: | ||
1206 | (*(spx_int32_t*)ptr) = st->agc_level; | ||
1207 | break; | ||
1178 | #endif | 1208 | #endif |
1179 | |||
1180 | default: | 1209 | default: |
1181 | speex_warning_int("Unknown speex_preprocess_ctl request: ", request); | 1210 | speex_warning_int("Unknown speex_preprocess_ctl request: ", request); |
1182 | return -1; | 1211 | return -1; |
1183 | } | 1212 | } |
1184 | return 0; | 1213 | return 0; |
1185 | } | 1214 | } |
1215 | |||
1216 | #ifdef FIXED_DEBUG | ||
1217 | long long spx_mips=0; | ||
1218 | #endif | ||
1219 | |||