From 080522f9173526c9ffca06bf12782ddf73cb577a Mon Sep 17 00:00:00 2001 From: Jens Arnold Date: Sat, 1 Sep 2007 08:38:10 +0000 Subject: Voice file generation: * Significant speedup of SAPI5 voice generation by running lame and wavtrim from inside the VB script instead of the perl script, avoiding the large overhead of process generation within cygwin. Added proper synchronisation between perl script and VB script as the pipes are buffered. * Make wavtrim work as intended (threashold wasn't passed). * Set correct SVN properties for the VB script. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@14562 a1c6a512-1295-4272-9138-f99709370657 --- tools/sapi5_voice_new.vbs | 69 +++++++++++++++++++++++---------- tools/voice.pl | 97 ++++++++++++++++++++++++++++++----------------- 2 files changed, 110 insertions(+), 56 deletions(-) diff --git a/tools/sapi5_voice_new.vbs b/tools/sapi5_voice_new.vbs index 96c6e2a720..f6abcf7d0b 100755 --- a/tools/sapi5_voice_new.vbs +++ b/tools/sapi5_voice_new.vbs @@ -20,11 +20,13 @@ 'To be done: ' - Allow user to override voice, speed and/or format (currently uses Control Panel defaults for voice/speed) -' - Voice specific replacements/corrections for pronounciation (this should be at a higher level really) + +Option Explicit Const SSFMCreateForWrite = 3 -Const SPSF_8kHz16BitMono = 6 +' Audio formats for SAPI5 filestream object +Const SPSF_8kHz16BitMono = 6 Const SPSF_11kHz16BitMono = 10 Const SPSF_12kHz16BitMono = 14 Const SPSF_16kHz16BitMono = 18 @@ -34,34 +36,59 @@ Const SPSF_32kHz16BitMono = 30 Const SPSF_44kHz16BitMono = 34 Const SPSF_48kHz16BitMono = 38 -Dim oSpVoice, oSpFS, nAudioFormat, sText, sOutputFile +Dim oShell, oEnv +Dim oSpVoice, oSpFS ' SAPI5 voice and filestream +Dim aLine, aData ' used in command reading +Dim nAudioFormat +Dim bVerbose + + +On Error Resume Next nAudioFormat = SPSF_22kHz16BitMono 'Audio format to use, recommended settings: '- for AT&T natural voices, use SPSF_32kHz16BitMono '- for MS voices, use SPSF_22kHz16BitMono +Set oShell = CreateObject("WScript.Shell") +Set oEnv = oShell.Environment("Process") +bVerbose = (oEnv("V") <> "") + Set oSpVoice = CreateObject("SAPI.SpVoice") If Err.Number <> 0 Then - WScript.Echo "Error - could not get SpVoice object. " & _ - "SAPI 5 not installed?" + WScript.StdErr.WriteLine "Error - could not get SpVoice object. " & _ + "SAPI 5 not installed?" Err.Clear WScript.Quit 1 End If -While 1 > 0 - sText = WScript.StdIn.ReadLine - sOutputFile = WScript.StdIn.ReadLine - If sOutputFile = "" Then - Set oSpFS = Nothing - Set oSpVoice = Nothing - Set oArgs = Nothing - WScript.Quit 0 +Set oSpFS = CreateObject("SAPI.SpFileStream") +oSpFS.Format.Type = nAudioFormat + +On Error Goto 0 + +Do + aLine = Split(WScript.StdIn.ReadLine, vbTab, 2) + If Err.Number <> 0 Then + WScript.StdErr.WriteLine "Error " & Err.Number & ": " & Err.Description + WScript.Quit 1 End If - ' WScript.Echo "Saying " + sText + " in " + sOutputFile - Set oSpFS = CreateObject("SAPI.SpFileStream") - oSpFS.Format.Type = nAudioFormat - oSpFS.Open sOutputFile, SSFMCreateForWrite, False - Set oSpVoice.AudioOutputStream = oSpFS - oSpVoice.Speak sText - oSpFS.Close -Wend + Select Case aLine(0) ' command + Case "SPEAK" + aData = Split(aLine(1), vbTab, 2) + If bVerbose Then WScript.StdErr.WriteLine "Saying " & aData(1) _ + & " in " & aData(0) + oSpFS.Open aData(0), SSFMCreateForWrite, false + Set oSpVoice.AudioOutputStream = oSpFS + oSpVoice.Speak aData(1) + oSpFS.Close + Case "EXEC" + If bVerbose Then WScript.StdErr.WriteLine "> " & aLine(1) + oShell.Run aLine(1), 0, true + Case "SYNC" + If bVerbose Then WScript.StdErr.WriteLine "Syncing" + WScript.StdOut.WriteLine aLine(1) ' Just echo what was passed + Case "QUIT" + If bVerbose Then WScript.StdErr.WriteLine "Quitting" + WScript.Quit 0 + End Select +Loop diff --git a/tools/voice.pl b/tools/voice.pl index 109451f82e..88f3ba4744 100755 --- a/tools/voice.pl +++ b/tools/voice.pl @@ -21,6 +21,7 @@ use File::Basename; use File::Copy; use Switch; use vars qw($V $C $t $l $e $E $s $S $i $v); +use IPC::Open2; use IPC::Open3; use Digest::MD5 qw(md5_hex); @@ -69,43 +70,44 @@ USAGE sub init_tts { our $verbose; my ($tts_engine, $tts_engine_opts, $language) = @_; - my $ret = undef; + my %ret = ("name" => $tts_engine); switch($tts_engine) { case "festival" { print("> festival $tts_engine_opts --server\n") if $verbose; my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1"); - $ret = *FESTIVAL_SERVER; - $ret = $pid; + my $dummy = *FESTIVAL_SERVER; #suppress warning $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); }; + $ret{"pid"} = $pid; } case "sapi5" { my $toolsdir = dirname($0); my $path = `cygpath $toolsdir -a -w`; chomp($path); - $path = $path . "\\sapi5_voice_new.vbs $language $tts_engine_opts"; - $path =~ s/\\/\\\\/g; - print("> cscript /B $path\n") if $verbose; - my $pid = open(F, "| cscript /B $path"); - $ret = *F; - $SIG{INT} = sub { print($ret "\r\n\r\n"); panic_cleanup(); }; - $SIG{KILL} = sub { print($ret "\r\n\r\n"); panic_cleanup(); }; + $path = $path . '\\'; + my $cmd = $path . "sapi5_voice_new.vbs $language $tts_engine_opts"; + $cmd =~ s/\\/\\\\/g; + print("> cscript //nologo $cmd\n") if $verbose; + my $pid = open2(*CMD_OUT, *CMD_IN, "cscript //nologo $cmd"); + $SIG{INT} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); }; + $SIG{KILL} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); }; + %ret = (%ret, "stdin" => *CMD_IN, "stdout" => *CMD_OUT, "toolspath" => $path); } } - return $ret; + return \%ret; } # Shutdown TTS engine if necessary. sub shutdown_tts { - my ($tts_engine, $tts_object) = @_; - switch($tts_engine) { + my ($tts_object) = @_; + switch($$tts_object{"name"}) { case "festival" { # Send SIGTERM to festival server - kill TERM => $tts_object; + kill TERM => $$tts_object{"pid"}; } case "sapi5" { - print($tts_object "\r\n\r\n"); - close($tts_object); + print({$$tts_object{"stdin"}} "QUIT\r\n"); + close($$tts_object{"stdin"}); } } } @@ -113,14 +115,14 @@ sub shutdown_tts { # Apply corrections to a voice-string to make it sound better sub correct_string { our $verbose; - my ($string, $language, $tts_engine) = @_; + my ($string, $language, $tts_object) = @_; my $orig = $string; switch($language) { # General for all engines and languages (perhaps - just an example) $string =~ s/USB/U S B/; case ("deutsch") { - switch($tts_engine) { + switch($$tts_object{"name"}) { $string =~ s/alphabet/alfabet/; $string =~ s/alkaline/alkalein/; $string =~ s/ampere/amper/; @@ -146,10 +148,10 @@ sub correct_string { # Produce a wav file of the text given sub voicestring { our $verbose; - my ($string, $output, $tts_engine, $tts_engine_opts, $tts_object) = @_; + my ($string, $output, $tts_engine_opts, $tts_object) = @_; my $cmd; - printf("Generate \"%s\" with %s in file %s\n", $string, $tts_engine, $output) if $verbose; - switch($tts_engine) { + printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose; + switch($$tts_object{"name"}) { case "festival" { # festival_client lies to us, so we have to do awful soul-eating # work with IPC::open3() @@ -180,15 +182,31 @@ sub voicestring { close(ESPEAK); } case "sapi5" { - print($tts_object sprintf("%s\r\n%s\r\n", $string, $output)); + print({$$tts_object{"stdin"}} sprintf("SPEAK\t%s\t%s\r\n", $output, $string)); } } } +# trim leading / trailing silence from the clip +sub wavtrim { + our $verbose; + my ($file, $threshold, $tts_object) = @_; + printf("Trim \"%s\"\n", $file) if $verbose; + if ($$tts_object{"name"} eq "sapi5") { + my $cmd = $$tts_object{"toolspath"}."wavtrim $file $threshold"; + print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd)); + } + else { + my $cmd = dirname($0) . "/wavtrim $file $threshold"; + print("> $cmd\n") if $verbose; + `$cmd`; + } +} + # Encode a wav file into the given destination file sub encodewav { our $verbose; - my ($input, $output, $encoder, $encoder_opts) = @_; + my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_; my $cmd = ''; printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose; switch ($encoder) { @@ -202,16 +220,23 @@ sub encodewav { $cmd = "speexenc $encoder_opts \"$input\" \"$output\""; } } - print("> $cmd\n") if $verbose; - `$cmd`; + if ($$tts_object{"name"} eq "sapi5") { + print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd)); + } + else { + print("> $cmd\n") if $verbose; + `$cmd`; + } } -sub wavtrim { - our $verbose; - my ($file) = @_; - my $cmd = dirname($0) . "/wavtrim \"$file\""; - print("> $cmd\n") if $verbose; - `$cmd`; +# synchronize the clip generation / processing if it's running in another process +sub synchronize { + my ($tts_object) = @_; + if ($$tts_object{"name"} eq "sapi5") { + print({$$tts_object{"stdin"}} "SYNC\t42\r\n"); + my $wait = readline($$tts_object{"stdout"}); + #ignore what's actually returned + } } # Run genlang and create voice clips for each string @@ -267,11 +292,13 @@ sub generateclips { copy(dirname($0)."/VOICE_PAUSE.wav", $wav); } else { - voicestring($voice, $wav, $tts_engine, $tts_engine_opts, $tts_object); - wavtrim($wav, 500); # 500 seems to be a reasonable default for now + voicestring($voice, $wav, $tts_engine_opts, $tts_object); + wavtrim($wav, 500, $tts_object); + # 500 seems to be a reasonable default for now } - encodewav($wav, $mp3, $encoder, $encoder_opts); + encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object); + synchronize($tts_object); if (defined($ENV{'POOL'})) { copy($mp3, $pool_file); } @@ -284,7 +311,7 @@ sub generateclips { } print("\n"); close(VOICEFONTIDS); - shutdown_tts($tts_engine, $tts_object); + shutdown_tts($tts_object); } # Assemble the voicefile -- cgit v1.2.3