diff options
Diffstat (limited to 'tools/voice.pl')
-rwxr-xr-x | tools/voice.pl | 360 |
1 files changed, 360 insertions, 0 deletions
diff --git a/tools/voice.pl b/tools/voice.pl new file mode 100755 index 0000000000..1635b701f1 --- /dev/null +++ b/tools/voice.pl | |||
@@ -0,0 +1,360 @@ | |||
1 | #!/usr/bin/perl -s | ||
2 | # __________ __ ___. | ||
3 | # Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | # \/ \/ \/ \/ \/ | ||
8 | # $Id: | ||
9 | # | ||
10 | # Copyright (C) 2007 Jonas Häggqvist | ||
11 | # | ||
12 | # All files in this archive are subject to the GNU General Public License. | ||
13 | # See the file COPYING in the source tree root for full license agreement. | ||
14 | # | ||
15 | # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
16 | # KIND, either express or implied. | ||
17 | |||
18 | use strict; | ||
19 | use warnings; | ||
20 | use File::Basename; | ||
21 | use File::Copy; | ||
22 | use Switch; | ||
23 | use vars qw($V $C $t $l $e $E $s $S $i $v); | ||
24 | use IPC::Open3; | ||
25 | use Digest::MD5 qw(md5_hex); | ||
26 | |||
27 | sub printusage { | ||
28 | print <<USAGE | ||
29 | |||
30 | Usage: voice.pl [options] [path to dir] | ||
31 | -V | ||
32 | Create voice file. You must also specify -t and -l. | ||
33 | |||
34 | -C | ||
35 | Create .talk clips. | ||
36 | |||
37 | -t=<target> | ||
38 | Specify which target you want to build voicefile for. Must include | ||
39 | any features that target supports. | ||
40 | |||
41 | -i=<target_id> | ||
42 | Numeric target id. Needed for voice building. | ||
43 | |||
44 | -l=<language> | ||
45 | Specify which language you want to build. Without .lang extension. | ||
46 | |||
47 | -e=<encoder> | ||
48 | Which encoder to use for voice strings | ||
49 | |||
50 | -E=<encoder options> | ||
51 | Which encoder options to use when compressing voice strings. Enclose | ||
52 | in double quotes if the options include spaces. | ||
53 | |||
54 | -s=<TTS engine> | ||
55 | Which TTS engine to use. | ||
56 | |||
57 | -S=<TTS engine options> | ||
58 | Options to pass to the TTS engine. Enclose in double quotes if the | ||
59 | options include spaces. | ||
60 | |||
61 | -v | ||
62 | Be verbose | ||
63 | USAGE | ||
64 | ; | ||
65 | } | ||
66 | |||
67 | # Initialize TTS engine. May return an object or value which will be passed | ||
68 | # to voicestring and shutdown_tts | ||
69 | sub init_tts { | ||
70 | our $verbose; | ||
71 | my ($tts_engine, $tts_engine_opts, $language) = @_; | ||
72 | my $ret = undef; | ||
73 | switch($tts_engine) { | ||
74 | case "festival" { | ||
75 | print("> festival $tts_engine_opts --server\n") if $verbose; | ||
76 | my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1"); | ||
77 | $ret = *FESTIVAL_SERVER; | ||
78 | $ret = $pid; | ||
79 | $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; | ||
80 | $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); }; | ||
81 | } | ||
82 | case "sapi5" { | ||
83 | my $toolsdir = dirname($0); | ||
84 | my $path = `cygpath $toolsdir -a -w`; | ||
85 | chomp($path); | ||
86 | $path = $path . "\\sapi5_voice_new.vbs $language $tts_engine_opts"; | ||
87 | $path =~ s/\\/\\\\/g; | ||
88 | print("> cscript /B $path\n") if $verbose; | ||
89 | my $pid = open(F, "| cscript /B $path"); | ||
90 | $ret = *F; | ||
91 | $SIG{INT} = sub { print($ret "\r\n\r\n"); panic_cleanup(); }; | ||
92 | $SIG{KILL} = sub { print($ret "\r\n\r\n"); panic_cleanup(); }; | ||
93 | } | ||
94 | } | ||
95 | return $ret; | ||
96 | } | ||
97 | |||
98 | # Shutdown TTS engine if necessary. | ||
99 | sub shutdown_tts { | ||
100 | my ($tts_engine, $tts_object) = @_; | ||
101 | switch($tts_engine) { | ||
102 | case "festival" { | ||
103 | # Send SIGTERM to festival server | ||
104 | kill TERM => $tts_object; | ||
105 | } | ||
106 | case "sapi5" { | ||
107 | print($tts_object "\r\n\r\n"); | ||
108 | close($tts_object); | ||
109 | } | ||
110 | } | ||
111 | } | ||
112 | |||
113 | # Apply corrections to a voice-string to make it sound better | ||
114 | sub correct_string { | ||
115 | our $verbose; | ||
116 | my ($string, $language, $tts_engine) = @_; | ||
117 | my $orig = $string; | ||
118 | switch($language) { | ||
119 | # General for all engines and languages (perhaps - just an example) | ||
120 | $string =~ s/USB/U S B/; | ||
121 | |||
122 | case ("deutsch") { | ||
123 | switch($tts_engine) { | ||
124 | $string =~ s/alphabet/alfabet/; | ||
125 | $string =~ s/alkaline/alkalein/; | ||
126 | $string =~ s/ampere/amper/; | ||
127 | $string =~ s/byte(s?)\b/beit$1/; | ||
128 | $string =~ s/\bdezibel\b/de-zibell/; | ||
129 | $string =~ s/energie\b/ener-gie/; | ||
130 | $string =~ s/\bflash\b/fläsh/g; | ||
131 | $string =~ s/\bfirmware(s?)\b/firmwer$1/; | ||
132 | $string =~ s/\bid3 tag\b/id3 täg/g; # can't just use "tag" here | ||
133 | $string =~ s/\bloudness\b/laudness/; | ||
134 | $string =~ s/\bnumerisch\b/numehrisch/; | ||
135 | $string =~ s/\brücklauf\b/rück-lauf/; | ||
136 | $string =~ s/\bsuchlauf\b/such-lauf/; | ||
137 | } | ||
138 | } | ||
139 | } | ||
140 | if ($orig ne $string) { | ||
141 | printf("%s -> %s\n", $orig, $string) if $verbose; | ||
142 | } | ||
143 | return $string; | ||
144 | } | ||
145 | |||
146 | # Produce a wav file of the text given | ||
147 | sub voicestring { | ||
148 | our $verbose; | ||
149 | my ($string, $output, $tts_engine, $tts_engine_opts, $tts_object) = @_; | ||
150 | my $cmd; | ||
151 | printf("Generate \"%s\" with %s in file %s\n", $string, $tts_engine, $output) if $verbose; | ||
152 | switch($tts_engine) { | ||
153 | case "festival" { | ||
154 | # festival_client lies to us, so we have to do awful soul-eating | ||
155 | # work with IPC::open3() | ||
156 | $cmd = "festival_client --server localhost --otype riff --ttw --output \"$output\""; | ||
157 | print("> $cmd\n") if $verbose; | ||
158 | # Open command, and filehandles for STDIN, STDOUT, STDERR | ||
159 | my $pid = open3(*CMD_IN, *CMD_OUT, *CMD_ERR, $cmd); | ||
160 | # Put the string to speak into STDIN and close it | ||
161 | print(CMD_IN $string); | ||
162 | close(CMD_IN); | ||
163 | # Read all output from festival_client (because it LIES TO US) | ||
164 | while (<CMD_ERR>) { | ||
165 | } | ||
166 | close(CMD_OUT); | ||
167 | close(CMD_ERR); | ||
168 | } | ||
169 | case "flite" { | ||
170 | $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\""; | ||
171 | print("> $cmd\n") if $verbose; | ||
172 | `$cmd`; | ||
173 | } | ||
174 | case "espeak" { | ||
175 | # xxx: $tts_engine_opts isn't used | ||
176 | $cmd = "espeak $tts_engine_opts -w $output"; | ||
177 | print("> $cmd\n") if $verbose; | ||
178 | open(ESPEAK, "| $cmd"); | ||
179 | print ESPEAK $string . "\n"; | ||
180 | close(ESPEAK); | ||
181 | } | ||
182 | case "sapi5" { | ||
183 | print($tts_object sprintf("%s\r\n%s\r\n", $string, $output)); | ||
184 | } | ||
185 | } | ||
186 | } | ||
187 | |||
188 | # Encode a wav file into the given destination file | ||
189 | sub encodewav { | ||
190 | our $verbose; | ||
191 | my ($input, $output, $encoder, $encoder_opts) = @_; | ||
192 | printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose; | ||
193 | switch ($encoder) { | ||
194 | case 'lame' { | ||
195 | my $cmd = "lame $encoder_opts \"$input\" \"$output\""; | ||
196 | print("> $cmd\n") if $verbose; | ||
197 | `lame $encoder_opts "$input" "$output"`; | ||
198 | `$cmd`; | ||
199 | } | ||
200 | case 'vorbis' { | ||
201 | `oggenc $encoder_opts "$input" -o "$output"`; | ||
202 | } | ||
203 | case 'speexenc' { | ||
204 | `speexenc $encoder_opts "$input" "$output"`; | ||
205 | } | ||
206 | } | ||
207 | } | ||
208 | |||
209 | sub wavtrim { | ||
210 | our $verbose; | ||
211 | my ($file) = @_; | ||
212 | my $cmd = dirname($0) . "/wavtrim \"$file\""; | ||
213 | print("> $cmd\n") if $verbose; | ||
214 | `$cmd`; | ||
215 | } | ||
216 | |||
217 | # Run genlang and create voice clips for each string | ||
218 | sub generateclips { | ||
219 | our $verbose; | ||
220 | my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_; | ||
221 | my $genlang = dirname($0) . '/genlang'; | ||
222 | my $english = dirname($0) . '/../apps/lang/english.lang'; | ||
223 | my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang'; | ||
224 | my $id = ''; | ||
225 | my $voice = ''; | ||
226 | my $cmd = "$genlang -o -t=$target -e=$english $langfile 2>/dev/null"; | ||
227 | my $pool_file; | ||
228 | open(VOICEFONTIDS, "> voicefontids"); | ||
229 | my $i = 0; | ||
230 | |||
231 | my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language); | ||
232 | print("Generating voice clips"); | ||
233 | print("\n") if $verbose; | ||
234 | for (`$cmd`) { | ||
235 | my $line = $_; | ||
236 | print(VOICEFONTIDS $line); | ||
237 | if ($line =~ /^id: (.*)$/) { | ||
238 | $id = $1; | ||
239 | } | ||
240 | elsif ($line =~ /^voice: "(.*)"$/) { | ||
241 | $voice = $1; | ||
242 | if ($id !~ /^NOT_USED_.*$/ && $voice ne "") { | ||
243 | my $wav = $id . '.wav'; | ||
244 | my $mp3 = $id . '.mp3'; | ||
245 | |||
246 | # Print some progress information | ||
247 | if (++$i % 10 == 0 and !$verbose) { | ||
248 | print("."); | ||
249 | } | ||
250 | |||
251 | # Apply corrections to the string | ||
252 | $voice = correct_string($voice); | ||
253 | |||
254 | # If we have a pool of snippes, see if the string exists there first | ||
255 | if (defined($ENV{'POOL'})) { | ||
256 | $pool_file = sprintf("%s/%s-%s-%s.mp3", $ENV{'POOL'}, md5_hex($voice), $language, $tts_engine); | ||
257 | if (-f $pool_file) { | ||
258 | printf("Re-using %s (%s) from pool\n", $id, $voice) if $verbose; | ||
259 | copy($pool_file, $mp3); | ||
260 | } | ||
261 | } | ||
262 | |||
263 | # Don't generate MP3 if it already exists (probably from the POOL) | ||
264 | if (! -f $mp3) { | ||
265 | if ($id eq "VOICE_PAUSE") { | ||
266 | print("Use distributed $wav\n") if $verbose; | ||
267 | copy(dirname($0)."/VOICE_PAUSE.wav", $wav); | ||
268 | } | ||
269 | else { | ||
270 | voicestring($voice, $wav, $tts_engine, $tts_engine_opts, $tts_object); | ||
271 | wavtrim($wav, 500); # 500 seems to be a reasonable default for now | ||
272 | } | ||
273 | |||
274 | encodewav($wav, $mp3, $encoder, $encoder_opts); | ||
275 | if (defined($ENV{'POOL'})) { | ||
276 | copy($mp3, $pool_file); | ||
277 | } | ||
278 | unlink($wav); | ||
279 | } | ||
280 | $voice = ""; | ||
281 | $id = ""; | ||
282 | } | ||
283 | } | ||
284 | } | ||
285 | print("\n"); | ||
286 | close(VOICEFONTIDS); | ||
287 | shutdown_tts($tts_engine, $tts_object); | ||
288 | } | ||
289 | |||
290 | # Assemble the voicefile | ||
291 | sub createvoice { | ||
292 | our $verbose; | ||
293 | my ($language, $target_id) = @_; | ||
294 | my $voicefont = dirname($0) . '/voicefont'; | ||
295 | my $outfile = ""; | ||
296 | my $i = 0; | ||
297 | do { | ||
298 | $outfile = sprintf("%s%s.voice", $language, ($i++ == 0 ? '' : '-'.$i)); | ||
299 | } while (-f $outfile); | ||
300 | printf("Saving voice file to %s\n", $outfile) if $verbose; | ||
301 | my $cmd = "$voicefont 'voicefontids' $target_id ./ $outfile"; | ||
302 | print("> $cmd\n") if $verbose; | ||
303 | my $output = `$cmd`; | ||
304 | print($output) if $verbose; | ||
305 | } | ||
306 | |||
307 | sub deletemp3s() { | ||
308 | for (glob('*.mp3')) { | ||
309 | unlink($_); | ||
310 | } | ||
311 | for (glob('*.wav')) { | ||
312 | unlink($_); | ||
313 | } | ||
314 | } | ||
315 | |||
316 | sub panic_cleanup { | ||
317 | deletemp3s(); | ||
318 | die "moo"; | ||
319 | } | ||
320 | |||
321 | # Check parameters | ||
322 | my $printusage = 0; | ||
323 | unless (defined($V) or defined($C)) { print("Missing either -V or -C\n"); $printusage = 1; } | ||
324 | if (defined($V)) { | ||
325 | unless (defined($t)) { print("Missing -t argument\n"); $printusage = 1; } | ||
326 | unless (defined($l)) { print("Missing -l argument\n"); $printusage = 1; } | ||
327 | unless (defined($i)) { print("Missing -i argument\n"); $printusage = 1; } | ||
328 | } | ||
329 | elsif (defined($C)) { | ||
330 | unless (defined($ARGV[0])) { print "Missing path argument\n"; $printusage = 1; } | ||
331 | } | ||
332 | unless (defined($e)) { print("Missing -e argument\n"); $printusage = 1; } | ||
333 | unless (defined($E)) { print("Missing -E argument\n"); $printusage = 1; } | ||
334 | unless (defined($s)) { print("Missing -s argument\n"); $printusage = 1; } | ||
335 | unless (defined($S)) { print("Missing -S argument\n"); $printusage = 1; } | ||
336 | if ($printusage == 1) { printusage(); exit 1; } | ||
337 | |||
338 | $SIG{INT} = \&panic_cleanup; | ||
339 | $SIG{KILL} = \&panic_cleanup; | ||
340 | |||
341 | if (defined($v) or defined($ENV{'V'})) { | ||
342 | our $verbose = 1; | ||
343 | } | ||
344 | |||
345 | |||
346 | # Do what we're told | ||
347 | if ($V == 1) { | ||
348 | printf("Generating voice\n Target: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n", | ||
349 | $t, $l, $e, $E, $s, $S); | ||
350 | generateclips($l, $t, $e, $E, $s, $S); | ||
351 | createvoice($l, $i); | ||
352 | deletemp3s(); | ||
353 | } | ||
354 | elsif ($C) { | ||
355 | # xxx: Implement .talk clip generation | ||
356 | } | ||
357 | else { | ||
358 | printusage(); | ||
359 | exit 1; | ||
360 | } | ||