summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThom Johansen <thomj@rockbox.org>2007-11-21 17:26:37 +0000
committerThom Johansen <thomj@rockbox.org>2007-11-21 17:26:37 +0000
commita2ad8537af659972b2e859c99c0ff75e374b73f9 (patch)
tree25405f36d452629298267d981b5538f3c24b713b
parent5076723120ce67a2f7d5e09b352ca7490377f01a (diff)
downloadrockbox-a2ad8537af659972b2e859c99c0ff75e374b73f9.tar.gz
rockbox-a2ad8537af659972b2e859c99c0ff75e374b73f9.zip
Compensate for encoder lookahead to make more click-free voice clips. Add narrowband option.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15741 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--tools/rbspeex/rbspeexenc.c107
1 files changed, 63 insertions, 44 deletions
diff --git a/tools/rbspeex/rbspeexenc.c b/tools/rbspeex/rbspeexenc.c
index f0e0262b6c..649f3b5e1c 100644
--- a/tools/rbspeex/rbspeexenc.c
+++ b/tools/rbspeex/rbspeexenc.c
@@ -23,6 +23,17 @@
23#include <string.h> 23#include <string.h>
24#include <stdbool.h> 24#include <stdbool.h>
25 25
26#define USAGE_TEXT \
27"Usage: rbspeexenc [options] infile outfile\n"\
28"Options:\n"\
29" -q x Quality, floating point number in the range [0-10], default 8.0\n"\
30" -c x Complexity, increases quality for a given bitrate, but encodes\n"\
31" slower, range [0-10], default 3\n"\
32" -n Enable narrowband mode, will resample input to 8 kHz\n\n"\
33"rbspeexenc expects a mono 16 bit WAV file as input. Files will be resampled\n"\
34"to either 16 kHz by default, or 8 kHz if narrowband mode is enabled.\n"\
35"WARNING: This tool will create files that are only usable by Rockbox!\n"
36
26/* Read an unaligned 32-bit little endian long from buffer. */ 37/* Read an unaligned 32-bit little endian long from buffer. */
27unsigned int get_long_le(unsigned char *p) 38unsigned int get_long_le(unsigned char *p)
28{ 39{
@@ -110,81 +121,83 @@ int main(int argc, char **argv)
110 SpeexResamplerState *resampler = NULL; 121 SpeexResamplerState *resampler = NULL;
111 SpeexBits bits; 122 SpeexBits bits;
112 int i, tmp; 123 int i, tmp;
113 float ftmp; 124 int complexity = 3;
125 float quality = 8.f;
126 bool narrowband = false;
127 int target_sr;
114 int numchan, bps, sr, numsamples; 128 int numchan, bps, sr, numsamples;
115 int frame_size; 129 int frame_size;
130 int lookahead;
116 131
117 if (argc < 3) { 132 if (argc < 3) {
118 printf("Usage: rbspeexenc [options] infile outfile\n" 133 printf(USAGE_TEXT);
119 "Options:\n"
120 " -q x Quality, floating point number in the range [0-10]\n"
121 " -c x Complexity, affects quality and encoding time, where\n"
122 " both increase with increasing values, range [0-10]\n"
123 " Defaults are as in speexenc.\n"
124 "\nWARNING: This tool will create files that are only usable by Rockbox!\n"
125 );
126 return 1; 134 return 1;
127 } 135 }
128 136
137 i = 1;
138 while (i < argc - 2) {
139 if (strncmp(argv[i], "-q", 2) == 0)
140 quality = atof(argv[++i]);
141 else if (strncmp(argv[i], "-c", 2) == 0)
142 complexity = atoi(argv[++i]);
143 else if (strncmp(argv[i], "-n", 2) == 0)
144 narrowband = true;
145 ++i;
146 }
147
148 /* Allocate an encoder of specified type, defaults to wideband */
149 st = speex_encoder_init(narrowband ? &speex_nb_mode : &speex_wb_mode);
150 if (narrowband)
151 target_sr = 8000;
152 else
153 target_sr = 16000;
154
129 /* We'll eat an entire WAV file here, and encode it with Speex, packing the 155 /* We'll eat an entire WAV file here, and encode it with Speex, packing the
130 * bits as tightly as we can. Output is completely raw, with absolutely 156 * bits as tightly as we can. Output is completely raw, with absolutely
131 * nothing to identify the contents. 157 * nothing to identify the contents.
132 */ 158 */
133
134 /* Wideband encoding */
135 st = speex_encoder_init(&speex_wb_mode);
136 159
137 /* VBR */ 160 /* VBR */
138 tmp = 1; 161 tmp = 1;
139 speex_encoder_ctl(st, SPEEX_SET_VBR, &tmp); 162 speex_encoder_ctl(st, SPEEX_SET_VBR, &tmp);
140 /* Quality, 0-10 */ 163 /* Quality, 0-10 */
141 ftmp = 8.f; 164 speex_encoder_ctl(st, SPEEX_SET_VBR_QUALITY, &quality);
142 for (i = 1; i < argc - 2; ++i) {
143 if (strncmp(argv[i], "-q", 2) == 0) {
144 ftmp = atof(argv[i + 1]);
145 break;
146 }
147 }
148 speex_encoder_ctl(st, SPEEX_SET_VBR_QUALITY, &ftmp);
149 /* Complexity, 0-10 */ 165 /* Complexity, 0-10 */
150 tmp = 3; 166 speex_encoder_ctl(st, SPEEX_SET_COMPLEXITY, &complexity);
151 for (i = 1; i < argc - 2; ++i) {
152 if (strncmp(argv[i], "-c", 2) == 0) {
153 tmp = atoi(argv[i + 1]);
154 break;
155 }
156 }
157 speex_encoder_ctl(st, SPEEX_SET_COMPLEXITY, &tmp);
158 speex_encoder_ctl(st, SPEEX_GET_FRAME_SIZE, &frame_size); 167 speex_encoder_ctl(st, SPEEX_GET_FRAME_SIZE, &frame_size);
168 speex_encoder_ctl(st, SPEEX_GET_LOOKAHEAD, &lookahead);
159 169
160 fin = fopen(argv[argc - 2], "rb"); 170 fin = fopen(argv[argc - 2], "rb");
161 if (!get_wave_metadata(fin, &numchan, &bps, &sr, &numsamples)) { 171 if (!get_wave_metadata(fin, &numchan, &bps, &sr, &numsamples)) {
162 printf("invalid wave file!\n"); 172 printf("Error: invalid WAV file\n");
163 return 1; 173 return 1;
164 } 174 }
165 if (sr != 16000) { 175 if (sr != target_sr) {
166 resampler = speex_resampler_init(1, sr, 16000, 10, NULL); 176 resampler = speex_resampler_init(1, sr, target_sr, 10, NULL);
167 speex_resampler_skip_zeros(resampler); 177 speex_resampler_skip_zeros(resampler);
168 printf("Resampling from %i Hz to 16000 Hz\n", sr);
169 } 178 }
170 if (numchan != 1) { 179 if (numchan != 1) {
171 printf("Error: input file must be mono\n"); 180 printf("Error: input file must be mono\n");
172 return 1; 181 return 1;
173 } 182 }
174 if (bps != 16) { 183 if (bps != 16) {
175 printf("samples must be 16 bit!\n"); 184 printf("Error: samples must be 16 bit\n");
176 return 1; 185 return 1;
177 } 186 }
178 187
179 /* Read input samples into a buffer */ 188 /* Read input samples into a buffer */
180 in = malloc(numsamples*2); 189 in = calloc(numsamples + lookahead, sizeof(spx_int16_t));
181 if (malloc == NULL) { 190 if (in == NULL) {
182 printf("error on malloc\n"); 191 printf("Error: could not allocate clip memory\n");
183 return 1; 192 return 1;
184 } 193 }
185 fread(in, 2, numsamples, fin); 194 fread(in, 2, numsamples, fin);
186 fclose(fin); 195 fclose(fin);
187 196 /* There will be 'lookahead' samples of zero at the end of the array, to
197 * make sure the Speex encoder is allowed to spit out all its data at clip
198 * end */
199 numsamples += lookahead;
200
188 speex_bits_init(&bits); 201 speex_bits_init(&bits);
189 inpos = in; 202 inpos = in;
190 fout = fopen(argv[argc - 1], "wb"); 203 fout = fopen(argv[argc - 1], "wb");
@@ -193,12 +206,16 @@ int main(int argc, char **argv)
193 int samples = frame_size; 206 int samples = frame_size;
194 207
195 /* Check if we need to resample */ 208 /* Check if we need to resample */
196 if (sr != 16000) { 209 if (sr != target_sr) {
197 spx_uint32_t in_len = numsamples, out_len = frame_size; 210 spx_uint32_t in_len = numsamples, out_len = frame_size;
211 double resample_factor = (double)sr/(double)target_sr;
212 /* Calculate how many input samples are needed for one full frame
213 * out, and add some, just in case. */
214 spx_uint32_t samples_in = frame_size*resample_factor + 50;
198 215
199 /* Limit this or resampler will try to allocate it all on stack */ 216 /* Limit this or resampler will try to allocate it all on stack */
200 if (in_len > 2000) 217 if (in_len > samples_in)
201 in_len = 2000; 218 in_len = samples_in;
202 speex_resampler_process_int(resampler, 0, inpos, &in_len, 219 speex_resampler_process_int(resampler, 0, inpos, &in_len,
203 enc_buf, &out_len); 220 enc_buf, &out_len);
204 inpos += in_len; 221 inpos += in_len;
@@ -214,7 +231,10 @@ int main(int argc, char **argv)
214 /* Pad out with zeros if we didn't fill all input */ 231 /* Pad out with zeros if we didn't fill all input */
215 memset(enc_buf + samples, 0, (frame_size - samples)*2); 232 memset(enc_buf + samples, 0, (frame_size - samples)*2);
216 233
217 speex_encode_int(st, enc_buf, &bits); 234 if (speex_encode_int(st, enc_buf, &bits) < 0) {
235 printf("Error: encoder error\n");
236 return 1;
237 }
218 238
219 /* Copy the bits to an array of char that can be written */ 239 /* Copy the bits to an array of char that can be written */
220 nbytes = speex_bits_write_whole_bytes(&bits, cbits, 200); 240 nbytes = speex_bits_write_whole_bytes(&bits, cbits, 200);
@@ -222,8 +242,7 @@ int main(int argc, char **argv)
222 /* Write the compressed data */ 242 /* Write the compressed data */
223 fwrite(cbits, 1, nbytes, fout); 243 fwrite(cbits, 1, nbytes, fout);
224 } 244 }
225 245 /* Squeeze out the last bits */
226 /* Squeeze out the last bits */
227 nbytes = speex_bits_write(&bits, cbits, 200); 246 nbytes = speex_bits_write(&bits, cbits, 200);
228 fwrite(cbits, 1, nbytes, fout); 247 fwrite(cbits, 1, nbytes, fout);
229 248