summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2007-09-01 08:38:10 +0000
committerJens Arnold <amiconn@rockbox.org>2007-09-01 08:38:10 +0000
commit080522f9173526c9ffca06bf12782ddf73cb577a (patch)
tree56c39cb41ef1bfe1f933e35cc11b7aa926d3e13f
parent80e91c1af392c72cd72095c1cbfa21957462381b (diff)
downloadrockbox-080522f9173526c9ffca06bf12782ddf73cb577a.tar.gz
rockbox-080522f9173526c9ffca06bf12782ddf73cb577a.zip
Voice file generation: * Significant speedup of SAPI5 voice generation by running lame and wavtrim from inside the VB script instead of the perl script, avoiding the large overhead of process generation within cygwin. Added proper synchronisation between perl script and VB script as the pipes are buffered. * Make wavtrim work as intended (threashold wasn't passed). * Set correct SVN properties for the VB script.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@14562 a1c6a512-1295-4272-9138-f99709370657
-rwxr-xr-xtools/sapi5_voice_new.vbs69
-rwxr-xr-xtools/voice.pl97
2 files changed, 110 insertions, 56 deletions
diff --git a/tools/sapi5_voice_new.vbs b/tools/sapi5_voice_new.vbs
index 96c6e2a720..f6abcf7d0b 100755
--- a/tools/sapi5_voice_new.vbs
+++ b/tools/sapi5_voice_new.vbs
@@ -20,11 +20,13 @@
20 20
21'To be done: 21'To be done:
22' - Allow user to override voice, speed and/or format (currently uses Control Panel defaults for voice/speed) 22' - Allow user to override voice, speed and/or format (currently uses Control Panel defaults for voice/speed)
23' - Voice specific replacements/corrections for pronounciation (this should be at a higher level really) 23
24Option Explicit
24 25
25Const SSFMCreateForWrite = 3 26Const SSFMCreateForWrite = 3
26 27
27Const SPSF_8kHz16BitMono = 6 28' Audio formats for SAPI5 filestream object
29Const SPSF_8kHz16BitMono = 6
28Const SPSF_11kHz16BitMono = 10 30Const SPSF_11kHz16BitMono = 10
29Const SPSF_12kHz16BitMono = 14 31Const SPSF_12kHz16BitMono = 14
30Const SPSF_16kHz16BitMono = 18 32Const SPSF_16kHz16BitMono = 18
@@ -34,34 +36,59 @@ Const SPSF_32kHz16BitMono = 30
34Const SPSF_44kHz16BitMono = 34 36Const SPSF_44kHz16BitMono = 34
35Const SPSF_48kHz16BitMono = 38 37Const SPSF_48kHz16BitMono = 38
36 38
37Dim oSpVoice, oSpFS, nAudioFormat, sText, sOutputFile 39Dim oShell, oEnv
40Dim oSpVoice, oSpFS ' SAPI5 voice and filestream
41Dim aLine, aData ' used in command reading
42Dim nAudioFormat
43Dim bVerbose
44
45
46On Error Resume Next
38 47
39nAudioFormat = SPSF_22kHz16BitMono 'Audio format to use, recommended settings: 48nAudioFormat = SPSF_22kHz16BitMono 'Audio format to use, recommended settings:
40'- for AT&T natural voices, use SPSF_32kHz16BitMono 49'- for AT&T natural voices, use SPSF_32kHz16BitMono
41'- for MS voices, use SPSF_22kHz16BitMono 50'- for MS voices, use SPSF_22kHz16BitMono
42 51
52Set oShell = CreateObject("WScript.Shell")
53Set oEnv = oShell.Environment("Process")
54bVerbose = (oEnv("V") <> "")
55
43Set oSpVoice = CreateObject("SAPI.SpVoice") 56Set oSpVoice = CreateObject("SAPI.SpVoice")
44If Err.Number <> 0 Then 57If Err.Number <> 0 Then
45 WScript.Echo "Error - could not get SpVoice object. " & _ 58 WScript.StdErr.WriteLine "Error - could not get SpVoice object. " & _
46 "SAPI 5 not installed?" 59 "SAPI 5 not installed?"
47 Err.Clear 60 Err.Clear
48 WScript.Quit 1 61 WScript.Quit 1
49End If 62End If
50 63
51While 1 > 0 64Set oSpFS = CreateObject("SAPI.SpFileStream")
52 sText = WScript.StdIn.ReadLine 65oSpFS.Format.Type = nAudioFormat
53 sOutputFile = WScript.StdIn.ReadLine 66
54 If sOutputFile = "" Then 67On Error Goto 0
55 Set oSpFS = Nothing 68
56 Set oSpVoice = Nothing 69Do
57 Set oArgs = Nothing 70 aLine = Split(WScript.StdIn.ReadLine, vbTab, 2)
58 WScript.Quit 0 71 If Err.Number <> 0 Then
72 WScript.StdErr.WriteLine "Error " & Err.Number & ": " & Err.Description
73 WScript.Quit 1
59 End If 74 End If
60 ' WScript.Echo "Saying " + sText + " in " + sOutputFile 75 Select Case aLine(0) ' command
61 Set oSpFS = CreateObject("SAPI.SpFileStream") 76 Case "SPEAK"
62 oSpFS.Format.Type = nAudioFormat 77 aData = Split(aLine(1), vbTab, 2)
63 oSpFS.Open sOutputFile, SSFMCreateForWrite, False 78 If bVerbose Then WScript.StdErr.WriteLine "Saying " & aData(1) _
64 Set oSpVoice.AudioOutputStream = oSpFS 79 & " in " & aData(0)
65 oSpVoice.Speak sText 80 oSpFS.Open aData(0), SSFMCreateForWrite, false
66 oSpFS.Close 81 Set oSpVoice.AudioOutputStream = oSpFS
67Wend 82 oSpVoice.Speak aData(1)
83 oSpFS.Close
84 Case "EXEC"
85 If bVerbose Then WScript.StdErr.WriteLine "> " & aLine(1)
86 oShell.Run aLine(1), 0, true
87 Case "SYNC"
88 If bVerbose Then WScript.StdErr.WriteLine "Syncing"
89 WScript.StdOut.WriteLine aLine(1) ' Just echo what was passed
90 Case "QUIT"
91 If bVerbose Then WScript.StdErr.WriteLine "Quitting"
92 WScript.Quit 0
93 End Select
94Loop
diff --git a/tools/voice.pl b/tools/voice.pl
index 109451f82e..88f3ba4744 100755
--- a/tools/voice.pl
+++ b/tools/voice.pl
@@ -21,6 +21,7 @@ use File::Basename;
21use File::Copy; 21use File::Copy;
22use Switch; 22use Switch;
23use vars qw($V $C $t $l $e $E $s $S $i $v); 23use vars qw($V $C $t $l $e $E $s $S $i $v);
24use IPC::Open2;
24use IPC::Open3; 25use IPC::Open3;
25use Digest::MD5 qw(md5_hex); 26use Digest::MD5 qw(md5_hex);
26 27
@@ -69,43 +70,44 @@ USAGE
69sub init_tts { 70sub init_tts {
70 our $verbose; 71 our $verbose;
71 my ($tts_engine, $tts_engine_opts, $language) = @_; 72 my ($tts_engine, $tts_engine_opts, $language) = @_;
72 my $ret = undef; 73 my %ret = ("name" => $tts_engine);
73 switch($tts_engine) { 74 switch($tts_engine) {
74 case "festival" { 75 case "festival" {
75 print("> festival $tts_engine_opts --server\n") if $verbose; 76 print("> festival $tts_engine_opts --server\n") if $verbose;
76 my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1"); 77 my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1");
77 $ret = *FESTIVAL_SERVER; 78 my $dummy = *FESTIVAL_SERVER; #suppress warning
78 $ret = $pid;
79 $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; 79 $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
80 $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); }; 80 $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
81 $ret{"pid"} = $pid;
81 } 82 }
82 case "sapi5" { 83 case "sapi5" {
83 my $toolsdir = dirname($0); 84 my $toolsdir = dirname($0);
84 my $path = `cygpath $toolsdir -a -w`; 85 my $path = `cygpath $toolsdir -a -w`;
85 chomp($path); 86 chomp($path);
86 $path = $path . "\\sapi5_voice_new.vbs $language $tts_engine_opts"; 87 $path = $path . '\\';
87 $path =~ s/\\/\\\\/g; 88 my $cmd = $path . "sapi5_voice_new.vbs $language $tts_engine_opts";
88 print("> cscript /B $path\n") if $verbose; 89 $cmd =~ s/\\/\\\\/g;
89 my $pid = open(F, "| cscript /B $path"); 90 print("> cscript //nologo $cmd\n") if $verbose;
90 $ret = *F; 91 my $pid = open2(*CMD_OUT, *CMD_IN, "cscript //nologo $cmd");
91 $SIG{INT} = sub { print($ret "\r\n\r\n"); panic_cleanup(); }; 92 $SIG{INT} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
92 $SIG{KILL} = sub { print($ret "\r\n\r\n"); panic_cleanup(); }; 93 $SIG{KILL} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
94 %ret = (%ret, "stdin" => *CMD_IN, "stdout" => *CMD_OUT, "toolspath" => $path);
93 } 95 }
94 } 96 }
95 return $ret; 97 return \%ret;
96} 98}
97 99
98# Shutdown TTS engine if necessary. 100# Shutdown TTS engine if necessary.
99sub shutdown_tts { 101sub shutdown_tts {
100 my ($tts_engine, $tts_object) = @_; 102 my ($tts_object) = @_;
101 switch($tts_engine) { 103 switch($$tts_object{"name"}) {
102 case "festival" { 104 case "festival" {
103 # Send SIGTERM to festival server 105 # Send SIGTERM to festival server
104 kill TERM => $tts_object; 106 kill TERM => $$tts_object{"pid"};
105 } 107 }
106 case "sapi5" { 108 case "sapi5" {
107 print($tts_object "\r\n\r\n"); 109 print({$$tts_object{"stdin"}} "QUIT\r\n");
108 close($tts_object); 110 close($$tts_object{"stdin"});
109 } 111 }
110 } 112 }
111} 113}
@@ -113,14 +115,14 @@ sub shutdown_tts {
113# Apply corrections to a voice-string to make it sound better 115# Apply corrections to a voice-string to make it sound better
114sub correct_string { 116sub correct_string {
115 our $verbose; 117 our $verbose;
116 my ($string, $language, $tts_engine) = @_; 118 my ($string, $language, $tts_object) = @_;
117 my $orig = $string; 119 my $orig = $string;
118 switch($language) { 120 switch($language) {
119 # General for all engines and languages (perhaps - just an example) 121 # General for all engines and languages (perhaps - just an example)
120 $string =~ s/USB/U S B/; 122 $string =~ s/USB/U S B/;
121 123
122 case ("deutsch") { 124 case ("deutsch") {
123 switch($tts_engine) { 125 switch($$tts_object{"name"}) {
124 $string =~ s/alphabet/alfabet/; 126 $string =~ s/alphabet/alfabet/;
125 $string =~ s/alkaline/alkalein/; 127 $string =~ s/alkaline/alkalein/;
126 $string =~ s/ampere/amper/; 128 $string =~ s/ampere/amper/;
@@ -146,10 +148,10 @@ sub correct_string {
146# Produce a wav file of the text given 148# Produce a wav file of the text given
147sub voicestring { 149sub voicestring {
148 our $verbose; 150 our $verbose;
149 my ($string, $output, $tts_engine, $tts_engine_opts, $tts_object) = @_; 151 my ($string, $output, $tts_engine_opts, $tts_object) = @_;
150 my $cmd; 152 my $cmd;
151 printf("Generate \"%s\" with %s in file %s\n", $string, $tts_engine, $output) if $verbose; 153 printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose;
152 switch($tts_engine) { 154 switch($$tts_object{"name"}) {
153 case "festival" { 155 case "festival" {
154 # festival_client lies to us, so we have to do awful soul-eating 156 # festival_client lies to us, so we have to do awful soul-eating
155 # work with IPC::open3() 157 # work with IPC::open3()
@@ -180,15 +182,31 @@ sub voicestring {
180 close(ESPEAK); 182 close(ESPEAK);
181 } 183 }
182 case "sapi5" { 184 case "sapi5" {
183 print($tts_object sprintf("%s\r\n%s\r\n", $string, $output)); 185 print({$$tts_object{"stdin"}} sprintf("SPEAK\t%s\t%s\r\n", $output, $string));
184 } 186 }
185 } 187 }
186} 188}
187 189
190# trim leading / trailing silence from the clip
191sub wavtrim {
192 our $verbose;
193 my ($file, $threshold, $tts_object) = @_;
194 printf("Trim \"%s\"\n", $file) if $verbose;
195 if ($$tts_object{"name"} eq "sapi5") {
196 my $cmd = $$tts_object{"toolspath"}."wavtrim $file $threshold";
197 print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd));
198 }
199 else {
200 my $cmd = dirname($0) . "/wavtrim $file $threshold";
201 print("> $cmd\n") if $verbose;
202 `$cmd`;
203 }
204}
205
188# Encode a wav file into the given destination file 206# Encode a wav file into the given destination file
189sub encodewav { 207sub encodewav {
190 our $verbose; 208 our $verbose;
191 my ($input, $output, $encoder, $encoder_opts) = @_; 209 my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_;
192 my $cmd = ''; 210 my $cmd = '';
193 printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose; 211 printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
194 switch ($encoder) { 212 switch ($encoder) {
@@ -202,16 +220,23 @@ sub encodewav {
202 $cmd = "speexenc $encoder_opts \"$input\" \"$output\""; 220 $cmd = "speexenc $encoder_opts \"$input\" \"$output\"";
203 } 221 }
204 } 222 }
205 print("> $cmd\n") if $verbose; 223 if ($$tts_object{"name"} eq "sapi5") {
206 `$cmd`; 224 print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd));
225 }
226 else {
227 print("> $cmd\n") if $verbose;
228 `$cmd`;
229 }
207} 230}
208 231
209sub wavtrim { 232# synchronize the clip generation / processing if it's running in another process
210 our $verbose; 233sub synchronize {
211 my ($file) = @_; 234 my ($tts_object) = @_;
212 my $cmd = dirname($0) . "/wavtrim \"$file\""; 235 if ($$tts_object{"name"} eq "sapi5") {
213 print("> $cmd\n") if $verbose; 236 print({$$tts_object{"stdin"}} "SYNC\t42\r\n");
214 `$cmd`; 237 my $wait = readline($$tts_object{"stdout"});
238 #ignore what's actually returned
239 }
215} 240}
216 241
217# Run genlang and create voice clips for each string 242# Run genlang and create voice clips for each string
@@ -267,11 +292,13 @@ sub generateclips {
267 copy(dirname($0)."/VOICE_PAUSE.wav", $wav); 292 copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
268 } 293 }
269 else { 294 else {
270 voicestring($voice, $wav, $tts_engine, $tts_engine_opts, $tts_object); 295 voicestring($voice, $wav, $tts_engine_opts, $tts_object);
271 wavtrim($wav, 500); # 500 seems to be a reasonable default for now 296 wavtrim($wav, 500, $tts_object);
297 # 500 seems to be a reasonable default for now
272 } 298 }
273 299
274 encodewav($wav, $mp3, $encoder, $encoder_opts); 300 encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
301 synchronize($tts_object);
275 if (defined($ENV{'POOL'})) { 302 if (defined($ENV{'POOL'})) {
276 copy($mp3, $pool_file); 303 copy($mp3, $pool_file);
277 } 304 }
@@ -284,7 +311,7 @@ sub generateclips {
284 } 311 }
285 print("\n"); 312 print("\n");
286 close(VOICEFONTIDS); 313 close(VOICEFONTIDS);
287 shutdown_tts($tts_engine, $tts_object); 314 shutdown_tts($tts_object);
288} 315}
289 316
290# Assemble the voicefile 317# Assemble the voicefile