diff options
-rwxr-xr-x | tools/sapi5_voice_new.vbs | 69 | ||||
-rwxr-xr-x | tools/voice.pl | 97 |
2 files changed, 110 insertions, 56 deletions
diff --git a/tools/sapi5_voice_new.vbs b/tools/sapi5_voice_new.vbs index 96c6e2a720..f6abcf7d0b 100755 --- a/tools/sapi5_voice_new.vbs +++ b/tools/sapi5_voice_new.vbs | |||
@@ -20,11 +20,13 @@ | |||
20 | 20 | ||
21 | 'To be done: | 21 | 'To be done: |
22 | ' - Allow user to override voice, speed and/or format (currently uses Control Panel defaults for voice/speed) | 22 | ' - Allow user to override voice, speed and/or format (currently uses Control Panel defaults for voice/speed) |
23 | ' - Voice specific replacements/corrections for pronounciation (this should be at a higher level really) | 23 | |
24 | Option Explicit | ||
24 | 25 | ||
25 | Const SSFMCreateForWrite = 3 | 26 | Const SSFMCreateForWrite = 3 |
26 | 27 | ||
27 | Const SPSF_8kHz16BitMono = 6 | 28 | ' Audio formats for SAPI5 filestream object |
29 | Const SPSF_8kHz16BitMono = 6 | ||
28 | Const SPSF_11kHz16BitMono = 10 | 30 | Const SPSF_11kHz16BitMono = 10 |
29 | Const SPSF_12kHz16BitMono = 14 | 31 | Const SPSF_12kHz16BitMono = 14 |
30 | Const SPSF_16kHz16BitMono = 18 | 32 | Const SPSF_16kHz16BitMono = 18 |
@@ -34,34 +36,59 @@ Const SPSF_32kHz16BitMono = 30 | |||
34 | Const SPSF_44kHz16BitMono = 34 | 36 | Const SPSF_44kHz16BitMono = 34 |
35 | Const SPSF_48kHz16BitMono = 38 | 37 | Const SPSF_48kHz16BitMono = 38 |
36 | 38 | ||
37 | Dim oSpVoice, oSpFS, nAudioFormat, sText, sOutputFile | 39 | Dim oShell, oEnv |
40 | Dim oSpVoice, oSpFS ' SAPI5 voice and filestream | ||
41 | Dim aLine, aData ' used in command reading | ||
42 | Dim nAudioFormat | ||
43 | Dim bVerbose | ||
44 | |||
45 | |||
46 | On Error Resume Next | ||
38 | 47 | ||
39 | nAudioFormat = SPSF_22kHz16BitMono 'Audio format to use, recommended settings: | 48 | nAudioFormat = SPSF_22kHz16BitMono 'Audio format to use, recommended settings: |
40 | '- for AT&T natural voices, use SPSF_32kHz16BitMono | 49 | '- for AT&T natural voices, use SPSF_32kHz16BitMono |
41 | '- for MS voices, use SPSF_22kHz16BitMono | 50 | '- for MS voices, use SPSF_22kHz16BitMono |
42 | 51 | ||
52 | Set oShell = CreateObject("WScript.Shell") | ||
53 | Set oEnv = oShell.Environment("Process") | ||
54 | bVerbose = (oEnv("V") <> "") | ||
55 | |||
43 | Set oSpVoice = CreateObject("SAPI.SpVoice") | 56 | Set oSpVoice = CreateObject("SAPI.SpVoice") |
44 | If Err.Number <> 0 Then | 57 | If Err.Number <> 0 Then |
45 | WScript.Echo "Error - could not get SpVoice object. " & _ | 58 | WScript.StdErr.WriteLine "Error - could not get SpVoice object. " & _ |
46 | "SAPI 5 not installed?" | 59 | "SAPI 5 not installed?" |
47 | Err.Clear | 60 | Err.Clear |
48 | WScript.Quit 1 | 61 | WScript.Quit 1 |
49 | End If | 62 | End If |
50 | 63 | ||
51 | While 1 > 0 | 64 | Set oSpFS = CreateObject("SAPI.SpFileStream") |
52 | sText = WScript.StdIn.ReadLine | 65 | oSpFS.Format.Type = nAudioFormat |
53 | sOutputFile = WScript.StdIn.ReadLine | 66 | |
54 | If sOutputFile = "" Then | 67 | On Error Goto 0 |
55 | Set oSpFS = Nothing | 68 | |
56 | Set oSpVoice = Nothing | 69 | Do |
57 | Set oArgs = Nothing | 70 | aLine = Split(WScript.StdIn.ReadLine, vbTab, 2) |
58 | WScript.Quit 0 | 71 | If Err.Number <> 0 Then |
72 | WScript.StdErr.WriteLine "Error " & Err.Number & ": " & Err.Description | ||
73 | WScript.Quit 1 | ||
59 | End If | 74 | End If |
60 | ' WScript.Echo "Saying " + sText + " in " + sOutputFile | 75 | Select Case aLine(0) ' command |
61 | Set oSpFS = CreateObject("SAPI.SpFileStream") | 76 | Case "SPEAK" |
62 | oSpFS.Format.Type = nAudioFormat | 77 | aData = Split(aLine(1), vbTab, 2) |
63 | oSpFS.Open sOutputFile, SSFMCreateForWrite, False | 78 | If bVerbose Then WScript.StdErr.WriteLine "Saying " & aData(1) _ |
64 | Set oSpVoice.AudioOutputStream = oSpFS | 79 | & " in " & aData(0) |
65 | oSpVoice.Speak sText | 80 | oSpFS.Open aData(0), SSFMCreateForWrite, false |
66 | oSpFS.Close | 81 | Set oSpVoice.AudioOutputStream = oSpFS |
67 | Wend | 82 | oSpVoice.Speak aData(1) |
83 | oSpFS.Close | ||
84 | Case "EXEC" | ||
85 | If bVerbose Then WScript.StdErr.WriteLine "> " & aLine(1) | ||
86 | oShell.Run aLine(1), 0, true | ||
87 | Case "SYNC" | ||
88 | If bVerbose Then WScript.StdErr.WriteLine "Syncing" | ||
89 | WScript.StdOut.WriteLine aLine(1) ' Just echo what was passed | ||
90 | Case "QUIT" | ||
91 | If bVerbose Then WScript.StdErr.WriteLine "Quitting" | ||
92 | WScript.Quit 0 | ||
93 | End Select | ||
94 | Loop | ||
diff --git a/tools/voice.pl b/tools/voice.pl index 109451f82e..88f3ba4744 100755 --- a/tools/voice.pl +++ b/tools/voice.pl | |||
@@ -21,6 +21,7 @@ use File::Basename; | |||
21 | use File::Copy; | 21 | use File::Copy; |
22 | use Switch; | 22 | use Switch; |
23 | use vars qw($V $C $t $l $e $E $s $S $i $v); | 23 | use vars qw($V $C $t $l $e $E $s $S $i $v); |
24 | use IPC::Open2; | ||
24 | use IPC::Open3; | 25 | use IPC::Open3; |
25 | use Digest::MD5 qw(md5_hex); | 26 | use Digest::MD5 qw(md5_hex); |
26 | 27 | ||
@@ -69,43 +70,44 @@ USAGE | |||
69 | sub init_tts { | 70 | sub init_tts { |
70 | our $verbose; | 71 | our $verbose; |
71 | my ($tts_engine, $tts_engine_opts, $language) = @_; | 72 | my ($tts_engine, $tts_engine_opts, $language) = @_; |
72 | my $ret = undef; | 73 | my %ret = ("name" => $tts_engine); |
73 | switch($tts_engine) { | 74 | switch($tts_engine) { |
74 | case "festival" { | 75 | case "festival" { |
75 | print("> festival $tts_engine_opts --server\n") if $verbose; | 76 | print("> festival $tts_engine_opts --server\n") if $verbose; |
76 | my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1"); | 77 | my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1"); |
77 | $ret = *FESTIVAL_SERVER; | 78 | my $dummy = *FESTIVAL_SERVER; #suppress warning |
78 | $ret = $pid; | ||
79 | $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; | 79 | $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; |
80 | $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); }; | 80 | $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); }; |
81 | $ret{"pid"} = $pid; | ||
81 | } | 82 | } |
82 | case "sapi5" { | 83 | case "sapi5" { |
83 | my $toolsdir = dirname($0); | 84 | my $toolsdir = dirname($0); |
84 | my $path = `cygpath $toolsdir -a -w`; | 85 | my $path = `cygpath $toolsdir -a -w`; |
85 | chomp($path); | 86 | chomp($path); |
86 | $path = $path . "\\sapi5_voice_new.vbs $language $tts_engine_opts"; | 87 | $path = $path . '\\'; |
87 | $path =~ s/\\/\\\\/g; | 88 | my $cmd = $path . "sapi5_voice_new.vbs $language $tts_engine_opts"; |
88 | print("> cscript /B $path\n") if $verbose; | 89 | $cmd =~ s/\\/\\\\/g; |
89 | my $pid = open(F, "| cscript /B $path"); | 90 | print("> cscript //nologo $cmd\n") if $verbose; |
90 | $ret = *F; | 91 | my $pid = open2(*CMD_OUT, *CMD_IN, "cscript //nologo $cmd"); |
91 | $SIG{INT} = sub { print($ret "\r\n\r\n"); panic_cleanup(); }; | 92 | $SIG{INT} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); }; |
92 | $SIG{KILL} = sub { print($ret "\r\n\r\n"); panic_cleanup(); }; | 93 | $SIG{KILL} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); }; |
94 | %ret = (%ret, "stdin" => *CMD_IN, "stdout" => *CMD_OUT, "toolspath" => $path); | ||
93 | } | 95 | } |
94 | } | 96 | } |
95 | return $ret; | 97 | return \%ret; |
96 | } | 98 | } |
97 | 99 | ||
98 | # Shutdown TTS engine if necessary. | 100 | # Shutdown TTS engine if necessary. |
99 | sub shutdown_tts { | 101 | sub shutdown_tts { |
100 | my ($tts_engine, $tts_object) = @_; | 102 | my ($tts_object) = @_; |
101 | switch($tts_engine) { | 103 | switch($$tts_object{"name"}) { |
102 | case "festival" { | 104 | case "festival" { |
103 | # Send SIGTERM to festival server | 105 | # Send SIGTERM to festival server |
104 | kill TERM => $tts_object; | 106 | kill TERM => $$tts_object{"pid"}; |
105 | } | 107 | } |
106 | case "sapi5" { | 108 | case "sapi5" { |
107 | print($tts_object "\r\n\r\n"); | 109 | print({$$tts_object{"stdin"}} "QUIT\r\n"); |
108 | close($tts_object); | 110 | close($$tts_object{"stdin"}); |
109 | } | 111 | } |
110 | } | 112 | } |
111 | } | 113 | } |
@@ -113,14 +115,14 @@ sub shutdown_tts { | |||
113 | # Apply corrections to a voice-string to make it sound better | 115 | # Apply corrections to a voice-string to make it sound better |
114 | sub correct_string { | 116 | sub correct_string { |
115 | our $verbose; | 117 | our $verbose; |
116 | my ($string, $language, $tts_engine) = @_; | 118 | my ($string, $language, $tts_object) = @_; |
117 | my $orig = $string; | 119 | my $orig = $string; |
118 | switch($language) { | 120 | switch($language) { |
119 | # General for all engines and languages (perhaps - just an example) | 121 | # General for all engines and languages (perhaps - just an example) |
120 | $string =~ s/USB/U S B/; | 122 | $string =~ s/USB/U S B/; |
121 | 123 | ||
122 | case ("deutsch") { | 124 | case ("deutsch") { |
123 | switch($tts_engine) { | 125 | switch($$tts_object{"name"}) { |
124 | $string =~ s/alphabet/alfabet/; | 126 | $string =~ s/alphabet/alfabet/; |
125 | $string =~ s/alkaline/alkalein/; | 127 | $string =~ s/alkaline/alkalein/; |
126 | $string =~ s/ampere/amper/; | 128 | $string =~ s/ampere/amper/; |
@@ -146,10 +148,10 @@ sub correct_string { | |||
146 | # Produce a wav file of the text given | 148 | # Produce a wav file of the text given |
147 | sub voicestring { | 149 | sub voicestring { |
148 | our $verbose; | 150 | our $verbose; |
149 | my ($string, $output, $tts_engine, $tts_engine_opts, $tts_object) = @_; | 151 | my ($string, $output, $tts_engine_opts, $tts_object) = @_; |
150 | my $cmd; | 152 | my $cmd; |
151 | printf("Generate \"%s\" with %s in file %s\n", $string, $tts_engine, $output) if $verbose; | 153 | printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose; |
152 | switch($tts_engine) { | 154 | switch($$tts_object{"name"}) { |
153 | case "festival" { | 155 | case "festival" { |
154 | # festival_client lies to us, so we have to do awful soul-eating | 156 | # festival_client lies to us, so we have to do awful soul-eating |
155 | # work with IPC::open3() | 157 | # work with IPC::open3() |
@@ -180,15 +182,31 @@ sub voicestring { | |||
180 | close(ESPEAK); | 182 | close(ESPEAK); |
181 | } | 183 | } |
182 | case "sapi5" { | 184 | case "sapi5" { |
183 | print($tts_object sprintf("%s\r\n%s\r\n", $string, $output)); | 185 | print({$$tts_object{"stdin"}} sprintf("SPEAK\t%s\t%s\r\n", $output, $string)); |
184 | } | 186 | } |
185 | } | 187 | } |
186 | } | 188 | } |
187 | 189 | ||
190 | # trim leading / trailing silence from the clip | ||
191 | sub wavtrim { | ||
192 | our $verbose; | ||
193 | my ($file, $threshold, $tts_object) = @_; | ||
194 | printf("Trim \"%s\"\n", $file) if $verbose; | ||
195 | if ($$tts_object{"name"} eq "sapi5") { | ||
196 | my $cmd = $$tts_object{"toolspath"}."wavtrim $file $threshold"; | ||
197 | print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd)); | ||
198 | } | ||
199 | else { | ||
200 | my $cmd = dirname($0) . "/wavtrim $file $threshold"; | ||
201 | print("> $cmd\n") if $verbose; | ||
202 | `$cmd`; | ||
203 | } | ||
204 | } | ||
205 | |||
188 | # Encode a wav file into the given destination file | 206 | # Encode a wav file into the given destination file |
189 | sub encodewav { | 207 | sub encodewav { |
190 | our $verbose; | 208 | our $verbose; |
191 | my ($input, $output, $encoder, $encoder_opts) = @_; | 209 | my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_; |
192 | my $cmd = ''; | 210 | my $cmd = ''; |
193 | printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose; | 211 | printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose; |
194 | switch ($encoder) { | 212 | switch ($encoder) { |
@@ -202,16 +220,23 @@ sub encodewav { | |||
202 | $cmd = "speexenc $encoder_opts \"$input\" \"$output\""; | 220 | $cmd = "speexenc $encoder_opts \"$input\" \"$output\""; |
203 | } | 221 | } |
204 | } | 222 | } |
205 | print("> $cmd\n") if $verbose; | 223 | if ($$tts_object{"name"} eq "sapi5") { |
206 | `$cmd`; | 224 | print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd)); |
225 | } | ||
226 | else { | ||
227 | print("> $cmd\n") if $verbose; | ||
228 | `$cmd`; | ||
229 | } | ||
207 | } | 230 | } |
208 | 231 | ||
209 | sub wavtrim { | 232 | # synchronize the clip generation / processing if it's running in another process |
210 | our $verbose; | 233 | sub synchronize { |
211 | my ($file) = @_; | 234 | my ($tts_object) = @_; |
212 | my $cmd = dirname($0) . "/wavtrim \"$file\""; | 235 | if ($$tts_object{"name"} eq "sapi5") { |
213 | print("> $cmd\n") if $verbose; | 236 | print({$$tts_object{"stdin"}} "SYNC\t42\r\n"); |
214 | `$cmd`; | 237 | my $wait = readline($$tts_object{"stdout"}); |
238 | #ignore what's actually returned | ||
239 | } | ||
215 | } | 240 | } |
216 | 241 | ||
217 | # Run genlang and create voice clips for each string | 242 | # Run genlang and create voice clips for each string |
@@ -267,11 +292,13 @@ sub generateclips { | |||
267 | copy(dirname($0)."/VOICE_PAUSE.wav", $wav); | 292 | copy(dirname($0)."/VOICE_PAUSE.wav", $wav); |
268 | } | 293 | } |
269 | else { | 294 | else { |
270 | voicestring($voice, $wav, $tts_engine, $tts_engine_opts, $tts_object); | 295 | voicestring($voice, $wav, $tts_engine_opts, $tts_object); |
271 | wavtrim($wav, 500); # 500 seems to be a reasonable default for now | 296 | wavtrim($wav, 500, $tts_object); |
297 | # 500 seems to be a reasonable default for now | ||
272 | } | 298 | } |
273 | 299 | ||
274 | encodewav($wav, $mp3, $encoder, $encoder_opts); | 300 | encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object); |
301 | synchronize($tts_object); | ||
275 | if (defined($ENV{'POOL'})) { | 302 | if (defined($ENV{'POOL'})) { |
276 | copy($mp3, $pool_file); | 303 | copy($mp3, $pool_file); |
277 | } | 304 | } |
@@ -284,7 +311,7 @@ sub generateclips { | |||
284 | } | 311 | } |
285 | print("\n"); | 312 | print("\n"); |
286 | close(VOICEFONTIDS); | 313 | close(VOICEFONTIDS); |
287 | shutdown_tts($tts_engine, $tts_object); | 314 | shutdown_tts($tts_object); |
288 | } | 315 | } |
289 | 316 | ||
290 | # Assemble the voicefile | 317 | # Assemble the voicefile |