diff options
Diffstat (limited to 'lib/rbcodec/codecs/libspeex/vbr.c')
-rw-r--r-- | lib/rbcodec/codecs/libspeex/vbr.c | 275 |
1 files changed, 275 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libspeex/vbr.c b/lib/rbcodec/codecs/libspeex/vbr.c new file mode 100644 index 0000000000..32b33bc10e --- /dev/null +++ b/lib/rbcodec/codecs/libspeex/vbr.c | |||
@@ -0,0 +1,275 @@ | |||
1 | /* Copyright (C) 2002 Jean-Marc Valin | ||
2 | File: vbr.c | ||
3 | |||
4 | VBR-related routines | ||
5 | |||
6 | Redistribution and use in source and binary forms, with or without | ||
7 | modification, are permitted provided that the following conditions | ||
8 | are met: | ||
9 | |||
10 | - Redistributions of source code must retain the above copyright | ||
11 | notice, this list of conditions and the following disclaimer. | ||
12 | |||
13 | - Redistributions in binary form must reproduce the above copyright | ||
14 | notice, this list of conditions and the following disclaimer in the | ||
15 | documentation and/or other materials provided with the distribution. | ||
16 | |||
17 | - Neither the name of the Xiph.org Foundation nor the names of its | ||
18 | contributors may be used to endorse or promote products derived from | ||
19 | this software without specific prior written permission. | ||
20 | |||
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR | ||
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
32 | |||
33 | */ | ||
34 | |||
35 | #ifdef HAVE_CONFIG_H | ||
36 | #include "config-speex.h" | ||
37 | #endif | ||
38 | |||
39 | #include "vbr.h" | ||
40 | #include <math.h> | ||
41 | |||
42 | |||
43 | #define sqr(x) ((x)*(x)) | ||
44 | |||
45 | #define MIN_ENERGY 6000 | ||
46 | #define NOISE_POW .3 | ||
47 | |||
48 | #ifndef DISABLE_VBR | ||
49 | |||
50 | const float vbr_nb_thresh[9][11]={ | ||
51 | {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* CNG */ | ||
52 | { 4.0f, 2.5f, 2.0f, 1.2f, 0.5f, 0.0f, -0.5f, -0.7f, -0.8f, -0.9f, -1.0f}, /* 2 kbps */ | ||
53 | {10.0f, 6.5f, 5.2f, 4.5f, 3.9f, 3.5f, 3.0f, 2.5f, 2.3f, 1.8f, 1.0f}, /* 6 kbps */ | ||
54 | {11.0f, 8.8f, 7.5f, 6.5f, 5.0f, 3.9f, 3.9f, 3.9f, 3.5f, 3.0f, 1.0f}, /* 8 kbps */ | ||
55 | {11.0f, 11.0f, 9.9f, 8.5f, 7.0f, 6.0f, 4.5f, 4.0f, 4.0f, 4.0f, 2.0f}, /* 11 kbps */ | ||
56 | {11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 8.0f, 7.0f, 6.0f, 5.0f, 3.0f}, /* 15 kbps */ | ||
57 | {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 7.0f, 6.0f, 5.0f}, /* 18 kbps */ | ||
58 | {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 9.5f, 7.5f}, /* 24 kbps */ | ||
59 | { 7.0f, 4.5f, 3.7f, 3.0f, 2.5f, 2.0f, 1.8f, 1.5f, 1.0f, 0.0f, 0.0f} /* 4 kbps */ | ||
60 | }; | ||
61 | |||
62 | |||
63 | const float vbr_hb_thresh[5][11]={ | ||
64 | {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */ | ||
65 | {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* 2 kbps */ | ||
66 | {11.0f, 11.0f, 9.5f, 8.5f, 7.5f, 6.0f, 5.0f, 3.9f, 3.0f, 2.0f, 1.0f}, /* 6 kbps */ | ||
67 | {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.7f, 7.8f, 7.0f, 6.5f, 4.0f}, /* 10 kbps */ | ||
68 | {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 7.5f, 5.5f} /* 18 kbps */ | ||
69 | }; | ||
70 | |||
71 | const float vbr_uhb_thresh[2][11]={ | ||
72 | {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */ | ||
73 | { 3.9f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.0f} /* 2 kbps */ | ||
74 | }; | ||
75 | |||
76 | void vbr_init(VBRState *vbr) | ||
77 | { | ||
78 | int i; | ||
79 | |||
80 | vbr->average_energy=0; | ||
81 | vbr->last_energy=1; | ||
82 | vbr->accum_sum=0; | ||
83 | vbr->energy_alpha=.1; | ||
84 | vbr->soft_pitch=0; | ||
85 | vbr->last_pitch_coef=0; | ||
86 | vbr->last_quality=0; | ||
87 | |||
88 | vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW); | ||
89 | vbr->noise_accum_count=.05; | ||
90 | vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count; | ||
91 | vbr->consec_noise=0; | ||
92 | |||
93 | |||
94 | for (i=0;i<VBR_MEMORY_SIZE;i++) | ||
95 | vbr->last_log_energy[i] = log(MIN_ENERGY); | ||
96 | } | ||
97 | |||
98 | |||
99 | /* | ||
100 | This function should analyse the signal and decide how critical the | ||
101 | coding error will be perceptually. The following factors should be | ||
102 | taken into account: | ||
103 | |||
104 | -Attacks (positive energy derivative) should be coded with more bits | ||
105 | |||
106 | -Stationary voiced segments should receive more bits | ||
107 | |||
108 | -Segments with (very) low absolute energy should receive less bits (maybe | ||
109 | only shaped noise?) | ||
110 | |||
111 | -DTX for near-zero energy? | ||
112 | |||
113 | -Stationary fricative segments should have less bits | ||
114 | |||
115 | -Temporal masking: when energy slope is decreasing, decrease the bit-rate | ||
116 | |||
117 | -Decrease bit-rate for males (low pitch)? | ||
118 | |||
119 | -(wideband only) less bits in the high-band when signal is very | ||
120 | non-stationary (harder to notice high-frequency noise)??? | ||
121 | |||
122 | */ | ||
123 | |||
124 | float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float pitch_coef) | ||
125 | { | ||
126 | int i; | ||
127 | float ener=0, ener1=0, ener2=0; | ||
128 | float qual=7; | ||
129 | int va; | ||
130 | float log_energy; | ||
131 | float non_st=0; | ||
132 | float voicing; | ||
133 | float pow_ener; | ||
134 | |||
135 | for (i=0;i<len>>1;i++) | ||
136 | ener1 += ((float)sig[i])*sig[i]; | ||
137 | |||
138 | for (i=len>>1;i<len;i++) | ||
139 | ener2 += ((float)sig[i])*sig[i]; | ||
140 | ener=ener1+ener2; | ||
141 | |||
142 | log_energy = log(ener+MIN_ENERGY); | ||
143 | for (i=0;i<VBR_MEMORY_SIZE;i++) | ||
144 | non_st += sqr(log_energy-vbr->last_log_energy[i]); | ||
145 | non_st = non_st/(30*VBR_MEMORY_SIZE); | ||
146 | if (non_st>1) | ||
147 | non_st=1; | ||
148 | |||
149 | voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4); | ||
150 | vbr->average_energy = (1-vbr->energy_alpha)*vbr->average_energy + vbr->energy_alpha*ener; | ||
151 | vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count; | ||
152 | pow_ener = pow(ener,NOISE_POW); | ||
153 | if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY) | ||
154 | vbr->noise_accum = .05*pow_ener; | ||
155 | |||
156 | if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level) | ||
157 | || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level) | ||
158 | || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level) | ||
159 | || (voicing<0 && non_st < .05)) | ||
160 | { | ||
161 | float tmp; | ||
162 | va = 0; | ||
163 | vbr->consec_noise++; | ||
164 | if (pow_ener > 3*vbr->noise_level) | ||
165 | tmp = 3*vbr->noise_level; | ||
166 | else | ||
167 | tmp = pow_ener; | ||
168 | if (vbr->consec_noise>=4) | ||
169 | { | ||
170 | vbr->noise_accum = .95*vbr->noise_accum + .05*tmp; | ||
171 | vbr->noise_accum_count = .95*vbr->noise_accum_count + .05; | ||
172 | } | ||
173 | } else { | ||
174 | va = 1; | ||
175 | vbr->consec_noise=0; | ||
176 | } | ||
177 | |||
178 | if (pow_ener < vbr->noise_level && ener>MIN_ENERGY) | ||
179 | { | ||
180 | vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener; | ||
181 | vbr->noise_accum_count = .95*vbr->noise_accum_count + .05; | ||
182 | } | ||
183 | |||
184 | /* Checking for very low absolute energy */ | ||
185 | if (ener < 30000) | ||
186 | { | ||
187 | qual -= .7; | ||
188 | if (ener < 10000) | ||
189 | qual-=.7; | ||
190 | if (ener < 3000) | ||
191 | qual-=.7; | ||
192 | } else { | ||
193 | float short_diff, long_diff; | ||
194 | short_diff = log((ener+1)/(1+vbr->last_energy)); | ||
195 | long_diff = log((ener+1)/(1+vbr->average_energy)); | ||
196 | /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/ | ||
197 | |||
198 | if (long_diff<-5) | ||
199 | long_diff=-5; | ||
200 | if (long_diff>2) | ||
201 | long_diff=2; | ||
202 | |||
203 | if (long_diff>0) | ||
204 | qual += .6*long_diff; | ||
205 | if (long_diff<0) | ||
206 | qual += .5*long_diff; | ||
207 | if (short_diff>0) | ||
208 | { | ||
209 | if (short_diff>5) | ||
210 | short_diff=5; | ||
211 | qual += .5*short_diff; | ||
212 | } | ||
213 | /* Checking for energy increases */ | ||
214 | if (ener2 > 1.6*ener1) | ||
215 | qual += .5; | ||
216 | } | ||
217 | vbr->last_energy = ener; | ||
218 | vbr->soft_pitch = .6*vbr->soft_pitch + .4*pitch_coef; | ||
219 | qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4)); | ||
220 | |||
221 | if (qual < vbr->last_quality) | ||
222 | qual = .5*qual + .5*vbr->last_quality; | ||
223 | if (qual<4) | ||
224 | qual=4; | ||
225 | if (qual>10) | ||
226 | qual=10; | ||
227 | |||
228 | /* | ||
229 | if (vbr->consec_noise>=2) | ||
230 | qual-=1.3; | ||
231 | if (vbr->consec_noise>=5) | ||
232 | qual-=1.3; | ||
233 | if (vbr->consec_noise>=12) | ||
234 | qual-=1.3; | ||
235 | */ | ||
236 | if (vbr->consec_noise>=3) | ||
237 | qual=4; | ||
238 | |||
239 | if (vbr->consec_noise) | ||
240 | qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3)); | ||
241 | if (qual<0) | ||
242 | qual=0; | ||
243 | |||
244 | if (ener<60000) | ||
245 | { | ||
246 | if (vbr->consec_noise>2) | ||
247 | qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3)); | ||
248 | if (ener<10000&&vbr->consec_noise>2) | ||
249 | qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3)); | ||
250 | if (qual<0) | ||
251 | qual=0; | ||
252 | qual += .3*log(.0001+ener/60000.0); | ||
253 | } | ||
254 | if (qual<-1) | ||
255 | qual=-1; | ||
256 | |||
257 | /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/ | ||
258 | |||
259 | vbr->last_pitch_coef = pitch_coef; | ||
260 | vbr->last_quality = qual; | ||
261 | |||
262 | for (i=VBR_MEMORY_SIZE-1;i>0;i--) | ||
263 | vbr->last_log_energy[i] = vbr->last_log_energy[i-1]; | ||
264 | vbr->last_log_energy[0] = log_energy; | ||
265 | |||
266 | /*printf ("VBR: %f %f %f %d %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, va, vbr->noise_level);*/ | ||
267 | |||
268 | return qual; | ||
269 | } | ||
270 | |||
271 | void vbr_destroy(VBRState *vbr) | ||
272 | { | ||
273 | } | ||
274 | |||
275 | #endif /* #ifndef DISABLE_VBR */ | ||