summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xtools/sapi5_voice_new.vbs69
-rwxr-xr-xtools/voice.pl97
2 files changed, 110 insertions, 56 deletions
diff --git a/tools/sapi5_voice_new.vbs b/tools/sapi5_voice_new.vbs
index 96c6e2a720..f6abcf7d0b 100755
--- a/tools/sapi5_voice_new.vbs
+++ b/tools/sapi5_voice_new.vbs
@@ -20,11 +20,13 @@
20 20
21'To be done: 21'To be done:
22' - Allow user to override voice, speed and/or format (currently uses Control Panel defaults for voice/speed) 22' - Allow user to override voice, speed and/or format (currently uses Control Panel defaults for voice/speed)
23' - Voice specific replacements/corrections for pronounciation (this should be at a higher level really) 23
24Option Explicit
24 25
25Const SSFMCreateForWrite = 3 26Const SSFMCreateForWrite = 3
26 27
27Const SPSF_8kHz16BitMono = 6 28' Audio formats for SAPI5 filestream object
29Const SPSF_8kHz16BitMono = 6
28Const SPSF_11kHz16BitMono = 10 30Const SPSF_11kHz16BitMono = 10
29Const SPSF_12kHz16BitMono = 14 31Const SPSF_12kHz16BitMono = 14
30Const SPSF_16kHz16BitMono = 18 32Const SPSF_16kHz16BitMono = 18
@@ -34,34 +36,59 @@ Const SPSF_32kHz16BitMono = 30
34Const SPSF_44kHz16BitMono = 34 36Const SPSF_44kHz16BitMono = 34
35Const SPSF_48kHz16BitMono = 38 37Const SPSF_48kHz16BitMono = 38
36 38
37Dim oSpVoice, oSpFS, nAudioFormat, sText, sOutputFile 39Dim oShell, oEnv
40Dim oSpVoice, oSpFS ' SAPI5 voice and filestream
41Dim aLine, aData ' used in command reading
42Dim nAudioFormat
43Dim bVerbose
44
45
46On Error Resume Next
38 47
39nAudioFormat = SPSF_22kHz16BitMono 'Audio format to use, recommended settings: 48nAudioFormat = SPSF_22kHz16BitMono 'Audio format to use, recommended settings:
40'- for AT&T natural voices, use SPSF_32kHz16BitMono 49'- for AT&T natural voices, use SPSF_32kHz16BitMono
41'- for MS voices, use SPSF_22kHz16BitMono 50'- for MS voices, use SPSF_22kHz16BitMono
42 51
52Set oShell = CreateObject("WScript.Shell")
53Set oEnv = oShell.Environment("Process")
54bVerbose = (oEnv("V") <> "")
55
43Set oSpVoice = CreateObject("SAPI.SpVoice") 56Set oSpVoice = CreateObject("SAPI.SpVoice")
44If Err.Number <> 0 Then 57If Err.Number <> 0 Then
45 WScript.Echo "Error - could not get SpVoice object. " & _ 58 WScript.StdErr.WriteLine "Error - could not get SpVoice object. " & _
46 "SAPI 5 not installed?" 59 "SAPI 5 not installed?"
47 Err.Clear 60 Err.Clear
48 WScript.Quit 1 61 WScript.Quit 1
49End If 62End If
50 63
51While 1 > 0 64Set oSpFS = CreateObject("SAPI.SpFileStream")
52 sText = WScript.StdIn.ReadLine 65oSpFS.Format.Type = nAudioFormat
53 sOutputFile = WScript.StdIn.ReadLine 66
54 If sOutputFile = "" Then 67On Error Goto 0
55 Set oSpFS = Nothing 68
56 Set oSpVoice = Nothing 69Do
57 Set oArgs = Nothing 70 aLine = Split(WScript.StdIn.ReadLine, vbTab, 2)
58 WScript.Quit 0 71 If Err.Number <> 0 Then
72 WScript.StdErr.WriteLine "Error " & Err.Number & ": " & Err.Description
73 WScript.Quit 1
59 End If 74 End If
60 ' WScript.Echo "Saying " + sText + " in " + sOutputFile 75 Select Case aLine(0) ' command
61 Set oSpFS = CreateObject("SAPI.SpFileStream") 76 Case "SPEAK"
62 oSpFS.Format.Type = nAudioFormat 77 aData = Split(aLine(1), vbTab, 2)
63 oSpFS.Open sOutputFile, SSFMCreateForWrite, False 78 If bVerbose Then WScript.StdErr.WriteLine "Saying " & aData(1) _
64 Set oSpVoice.AudioOutputStream = oSpFS 79 & " in " & aData(0)
65 oSpVoice.Speak sText 80 oSpFS.Open aData(0), SSFMCreateForWrite, false
66 oSpFS.Close 81 Set oSpVoice.AudioOutputStream = oSpFS
67Wend 82 oSpVoice.Speak aData(1)
83 oSpFS.Close
84 Case "EXEC"
85 If bVerbose Then WScript.StdErr.WriteLine "> " & aLine(1)
86 oShell.Run aLine(1), 0, true
87 Case "SYNC"
88 If bVerbose Then WScript.StdErr.WriteLine "Syncing"
89 WScript.StdOut.WriteLine aLine(1) ' Just echo what was passed
90 Case "QUIT"
91 If bVerbose Then WScript.StdErr.WriteLine "Quitting"
92 WScript.Quit 0
93 End Select
94Loop
diff --git a/tools/voice.pl b/tools/voice.pl
index 109451f82e..88f3ba4744 100755
--- a/tools/voice.pl
+++ b/tools/voice.pl
@@ -21,6 +21,7 @@ use File::Basename;
21use File::Copy; 21use File::Copy;
22use Switch; 22use Switch;
23use vars qw($V $C $t $l $e $E $s $S $i $v); 23use vars qw($V $C $t $l $e $E $s $S $i $v);
24use IPC::Open2;
24use IPC::Open3; 25use IPC::Open3;
25use Digest::MD5 qw(md5_hex); 26use Digest::MD5 qw(md5_hex);
26 27
@@ -69,43 +70,44 @@ USAGE
69sub init_tts { 70sub init_tts {
70 our $verbose; 71 our $verbose;
71 my ($tts_engine, $tts_engine_opts, $language) = @_; 72 my ($tts_engine, $tts_engine_opts, $language) = @_;
72 my $ret = undef; 73 my %ret = ("name" => $tts_engine);
73 switch($tts_engine) { 74 switch($tts_engine) {
74 case "festival" { 75 case "festival" {
75 print("> festival $tts_engine_opts --server\n") if $verbose; 76 print("> festival $tts_engine_opts --server\n") if $verbose;
76 my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1"); 77 my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1");
77 $ret = *FESTIVAL_SERVER; 78 my $dummy = *FESTIVAL_SERVER; #suppress warning
78 $ret = $pid;
79 $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; 79 $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
80 $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); }; 80 $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
81 $ret{"pid"} = $pid;
81 } 82 }
82 case "sapi5" { 83 case "sapi5" {
83 my $toolsdir = dirname($0); 84 my $toolsdir = dirname($0);
84 my $path = `cygpath $toolsdir -a -w`; 85 my $path = `cygpath $toolsdir -a -w`;
85 chomp($path); 86 chomp($path);
86 $path = $path . "\\sapi5_voice_new.vbs $language $tts_engine_opts"; 87 $path = $path . '\\';
87 $path =~ s/\\/\\\\/g; 88 my $cmd = $path . "sapi5_voice_new.vbs $language $tts_engine_opts";
88 print("> cscript /B $path\n") if $verbose; 89 $cmd =~ s/\\/\\\\/g;
89 my $pid = open(F, "| cscript /B $path"); 90 print("> cscript //nologo $cmd\n") if $verbose;
90 $ret = *F; 91 my $pid = open2(*CMD_OUT, *CMD_IN, "cscript //nologo $cmd");
91 $SIG{INT} = sub { print($ret "\r\n\r\n"); panic_cleanup(); }; 92 $SIG{INT} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
92 $SIG{KILL} = sub { print($ret "\r\n\r\n"); panic_cleanup(); }; 93 $SIG{KILL} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
94 %ret = (%ret, "stdin" => *CMD_IN, "stdout" => *CMD_OUT, "toolspath" => $path);
93 } 95 }
94 } 96 }
95 return $ret; 97 return \%ret;
96} 98}
97 99
98# Shutdown TTS engine if necessary. 100# Shutdown TTS engine if necessary.
99sub shutdown_tts { 101sub shutdown_tts {
100 my ($tts_engine, $tts_object) = @_; 102 my ($tts_object) = @_;
101 switch($tts_engine) { 103 switch($$tts_object{"name"}) {
102 case "festival" { 104 case "festival" {
103 # Send SIGTERM to festival server 105 # Send SIGTERM to festival server
104 kill TERM => $tts_object; 106 kill TERM => $$tts_object{"pid"};
105 } 107 }
106 case "sapi5" { 108 case "sapi5" {
107 print($tts_object "\r\n\r\n"); 109 print({$$tts_object{"stdin"}} "QUIT\r\n");
108 close($tts_object); 110 close($$tts_object{"stdin"});
109 } 111 }
110 } 112 }
111} 113}
@@ -113,14 +115,14 @@ sub shutdown_tts {
113# Apply corrections to a voice-string to make it sound better 115# Apply corrections to a voice-string to make it sound better
114sub correct_string { 116sub correct_string {
115 our $verbose; 117 our $verbose;
116 my ($string, $language, $tts_engine) = @_; 118 my ($string, $language, $tts_object) = @_;
117 my $orig = $string; 119 my $orig = $string;
118 switch($language) { 120 switch($language) {
119 # General for all engines and languages (perhaps - just an example) 121 # General for all engines and languages (perhaps - just an example)
120 $string =~ s/USB/U S B/; 122 $string =~ s/USB/U S B/;
121 123
122 case ("deutsch") { 124 case ("deutsch") {
123 switch($tts_engine) { 125 switch($$tts_object{"name"}) {
124 $string =~ s/alphabet/alfabet/; 126 $string =~ s/alphabet/alfabet/;
125 $string =~ s/alkaline/alkalein/; 127 $string =~ s/alkaline/alkalein/;
126 $string =~ s/ampere/amper/; 128 $string =~ s/ampere/amper/;
@@ -146,10 +148,10 @@ sub correct_string {
146# Produce a wav file of the text given 148# Produce a wav file of the text given
147sub voicestring { 149sub voicestring {
148 our $verbose; 150 our $verbose;
149 my ($string, $output, $tts_engine, $tts_engine_opts, $tts_object) = @_; 151 my ($string, $output, $tts_engine_opts, $tts_object) = @_;
150 my $cmd; 152 my $cmd;
151 printf("Generate \"%s\" with %s in file %s\n", $string, $tts_engine, $output) if $verbose; 153 printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose;
152 switch($tts_engine) { 154 switch($$tts_object{"name"}) {
153 case "festival" { 155 case "festival" {
154 # festival_client lies to us, so we have to do awful soul-eating 156 # festival_client lies to us, so we have to do awful soul-eating
155 # work with IPC::open3() 157 # work with IPC::open3()
@@ -180,15 +182,31 @@ sub voicestring {
180 close(ESPEAK); 182 close(ESPEAK);
181 } 183 }
182 case "sapi5" { 184 case "sapi5" {
183 print($tts_object sprintf("%s\r\n%s\r\n", $string, $output)); 185 print({$$tts_object{"stdin"}} sprintf("SPEAK\t%s\t%s\r\n", $output, $string));
184 } 186 }
185 } 187 }
186} 188}
187 189
190# trim leading / trailing silence from the clip
191sub wavtrim {
192 our $verbose;
193 my ($file, $threshold, $tts_object) = @_;
194 printf("Trim \"%s\"\n", $file) if $verbose;
195 if ($$tts_object{"name"} eq "sapi5") {
196 my $cmd = $$tts_object{"toolspath"}."wavtrim $file $threshold";
197 print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd));
198 }
199 else {
200 my $cmd = dirname($0) . "/wavtrim $file $threshold";
201 print("> $cmd\n") if $verbose;
202 `$cmd`;
203 }
204}
205
188# Encode a wav file into the given destination file 206# Encode a wav file into the given destination file
189sub encodewav { 207sub encodewav {
190 our $verbose; 208 our $verbose;
191 my ($input, $output, $encoder, $encoder_opts) = @_; 209 my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_;
192 my $cmd = ''; 210 my $cmd = '';
193 printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose; 211 printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
194 switch ($encoder) { 212 switch ($encoder) {
@@ -202,16 +220,23 @@ sub encodewav {
202 $cmd = "speexenc $encoder_opts \"$input\" \"$output\""; 220 $cmd = "speexenc $encoder_opts \"$input\" \"$output\"";
203 } 221 }
204 } 222 }
205 print("> $cmd\n") if $verbose; 223 if ($$tts_object{"name"} eq "sapi5") {
206 `$cmd`; 224 print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd));
225 }
226 else {
227 print("> $cmd\n") if $verbose;
228 `$cmd`;
229 }
207} 230}
208 231
209sub wavtrim { 232# synchronize the clip generation / processing if it's running in another process
210 our $verbose; 233sub synchronize {
211 my ($file) = @_; 234 my ($tts_object) = @_;
212 my $cmd = dirname($0) . "/wavtrim \"$file\""; 235 if ($$tts_object{"name"} eq "sapi5") {
213 print("> $cmd\n") if $verbose; 236 print({$$tts_object{"stdin"}} "SYNC\t42\r\n");
214 `$cmd`; 237 my $wait = readline($$tts_object{"stdout"});
238 #ignore what's actually returned
239 }
215} 240}
216 241
217# Run genlang and create voice clips for each string 242# Run genlang and create voice clips for each string
@@ -267,11 +292,13 @@ sub generateclips {
267 copy(dirname($0)."/VOICE_PAUSE.wav", $wav); 292 copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
268 } 293 }
269 else { 294 else {
270 voicestring($voice, $wav, $tts_engine, $tts_engine_opts, $tts_object); 295 voicestring($voice, $wav, $tts_engine_opts, $tts_object);
271 wavtrim($wav, 500); # 500 seems to be a reasonable default for now 296 wavtrim($wav, 500, $tts_object);
297 # 500 seems to be a reasonable default for now
272 } 298 }
273 299
274 encodewav($wav, $mp3, $encoder, $encoder_opts); 300 encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
301 synchronize($tts_object);
275 if (defined($ENV{'POOL'})) { 302 if (defined($ENV{'POOL'})) {
276 copy($mp3, $pool_file); 303 copy($mp3, $pool_file);
277 } 304 }
@@ -284,7 +311,7 @@ sub generateclips {
284 } 311 }
285 print("\n"); 312 print("\n");
286 close(VOICEFONTIDS); 313 close(VOICEFONTIDS);
287 shutdown_tts($tts_engine, $tts_object); 314 shutdown_tts($tts_object);
288} 315}
289 316
290# Assemble the voicefile 317# Assemble the voicefile