From 17e03e75a43ff75b2cc20ba794b0f4d3131f4a36 Mon Sep 17 00:00:00 2001 From: Jonas Häggqvist Date: Sat, 25 Aug 2007 22:00:13 +0000 Subject: Replace the voicebuilding with a perl-based approach. Should greatly speed up building on Cygwin. See more in FS#7646. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@14457 a1c6a512-1295-4272-9138-f99709370657 --- docs/MAINTAINERS | 2 +- tools/VOICE_PAUSE.wav | Bin 0 -> 26612 bytes tools/configure | 58 +++++--- tools/sapi5_voice_new.vbs | 67 +++++++++ tools/voice.pl | 360 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 465 insertions(+), 22 deletions(-) create mode 100644 tools/VOICE_PAUSE.wav create mode 100755 tools/sapi5_voice_new.vbs create mode 100755 tools/voice.pl diff --git a/docs/MAINTAINERS b/docs/MAINTAINERS index 023e1aa6cc..fcf03f74bc 100644 --- a/docs/MAINTAINERS +++ b/docs/MAINTAINERS @@ -290,7 +290,7 @@ Build Tools :scramble: Linus Nielsen Feltzing :descramble: Linus Nielsen Feltzing :mkinfo: Daniel Stenberg -:voice shell scripts: Jonas Häggqvist +:voice perl script: Jonas Häggqvist Install Tools ------------- diff --git a/tools/VOICE_PAUSE.wav b/tools/VOICE_PAUSE.wav new file mode 100644 index 0000000000..90d07f0ff6 Binary files /dev/null and b/tools/VOICE_PAUSE.wav differ diff --git a/tools/configure b/tools/configure index 03182cfde4..6be536e109 100755 --- a/tools/configure +++ b/tools/configure @@ -288,7 +288,8 @@ whichadvanced () { # Ask about languages to build echo "Select a number for the language to use (default is english)" - echo "You may enter a comma-separated list of languages to build" + # The multiple-language feature is currently broken + # echo "You may enter a comma-separated list of languages to build" picklang voicelanguage=`whichlang` @@ -329,7 +330,7 @@ voiceconfig () { if [ -f "`which flite`" ]; then FLITE="F(l)ite " - FLITE_OPTS="FLITE_OPTS=\"\"" + FLITE_OPTS="" DEFAULT_TTS="flite" DEFAULT_TTS_OPTS=$FLITE_OPTS DEFAULT_NOISEFLOOR="500" @@ -337,7 +338,7 @@ voiceconfig () { fi if [ -f "`which espeak`" ]; then ESPEAK="(e)Speak " - ESPEAK_OPTS="ESPEAK_OPTS=\"\"" + ESPEAK_OPTS="" DEFAULT_TTS="espeak" DEFAULT_TTS_OPTS=$ESPEAK_OPTS DEFAULT_NOISEFLOOR="500" @@ -345,7 +346,23 @@ voiceconfig () { fi if [ -f "`which festival`" ]; then FESTIVAL="(F)estival " - FESTIVAL_OPTS="FESTIVAL_OPTS=\"\"" + case "$thislang" in + "italiano") + FESTIVAL_OPTS="--language italian" + ;; + "espanol") + FESTIVAL_OPTS="--language spanish" + ;; + "finnish") + FESTIVAL_OPTS="--language finnish" + ;; + "czech") + FESTIVAL_OPTS="--language czech" + ;; + *) + FESTIVAL_OPTS="" + ;; + esac DEFAULT_TTS="festival" DEFAULT_TTS_OPTS=$FESTIVAL_OPTS DEFAULT_NOISEFLOOR="500" @@ -354,7 +371,7 @@ voiceconfig () { # Allow SAPI if Windows is in use if [ -f "`which winver`" ]; then SAPI5="(S)API5 " - SAPI5_OPTS="SAPI5_OPTS=\"\"" + SAPI5_OPTS="" DEFAULT_TTS="sapi5" DEFAULT_TTS_OPTS=$SAPI5_OPTS DEFAULT_NOISEFLOOR="500" @@ -397,10 +414,10 @@ voiceconfig () { echo "Using $TTS_ENGINE for TTS" # Allow the user to input manual commandline options - printf "Enter $TTS_ENGINE options (enter for defaults `echo $TTS_OPTS |sed 's/.*=//'`): " + printf "Enter $TTS_ENGINE options (enter for defaults \"$TTS_OPTS\"): " USER_TTS_OPTS=`input` if [ -n "$USER_TTS_OPTS" ]; then - TTS_OPTS="`echo $TTS_OPTS | sed 's/=.*//'`=\"$USER_TTS_OPTS\"" + TTS_OPTS="$USER_TTS_OPTS" fi echo "" @@ -408,7 +425,7 @@ voiceconfig () { if [ -f "`which oggenc`" ]; then OGGENC="(O)ggenc " DEFAULT_ENC="oggenc" - VORBIS_OPTS="VORBIS_OPTS=\"-q0 --downmix\"" + VORBIS_OPTS="-q0 --downmix" DEFAULT_ENC_OPTS=$VORBIS_OPTS DEFAULT_CHOICE="O" fi @@ -422,7 +439,7 @@ voiceconfig () { if [ -f "`which lame`" ]; then LAME="(L)ame " DEFAULT_ENC="lame" - LAME_OPTS="LAME_OPTS=\"--resample 12 -t -m m -h -V 9 -S\"" + LAME_OPTS="--resample 12 -t -m m -h -V 9 -S -B 64 --vbr-new" DEFAULT_ENC_OPTS=$LAME_OPTS DEFAULT_CHOICE="L" fi @@ -456,25 +473,16 @@ voiceconfig () { echo "Using $ENCODER for encoding voice clips" # Allow the user to input manual commandline options - printf "Enter $ENCODER options (enter for defaults `echo $ENC_OPTS |sed 's/.*=//'`): " + printf "Enter $ENCODER options (enter for defaults \"$ENC_OPTS\"): " USER_ENC_OPTS=`input` if [ -n "$USER_ENC_OPTS" ]; then - ENC_OPTS="`echo $ENC_OPTS | sed 's/=.*//'`=\"$USER_ENC_OPTS\"" + ENC_OPTS=$USER_ENC_OPTS fi TEMPDIR="${pwd}" if [ -f "`which cygpath`" ]; then TEMPDIR=`cygpath . -a -w` fi - - cat > voicesettings-$thislang.sh < Makefile \ -e "${simmagic1}" \ -e "${simmagic2}" \ -e "s,@MANUALDEV@,${manualdev},g" \ + -e "s,@ENCODER@,${ENCODER},g" \ + -e "s,@ENC_OPTS@,${ENC_OPTS},g" \ + -e "s,@TTS_ENGINE@,${TTS_ENGINE},g" \ + -e "s,@TTS_OPTS@,${TTS_OPTS},g" \ < ) \___| < | \_\ ( <_> > < < +' Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ +' \/ \/ \/ \/ \/ +' $Id: sapi5_voice.vbs$ +' +' Copyright (C) 2007 Steve Bavin, Jens Arnold, Mesar Hameed +' +' All files in this archive are subject to the GNU General Public License. +' See the file COPYING in the source tree root for full license agreement. +' +' This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY +' KIND, either express or implied. +' +'*************************************************************************** +' Purpose: Make a voice clip file for the given text on stdin + +'To be done: +' - Allow user to override voice, speed and/or format (currently uses Control Panel defaults for voice/speed) +' - Voice specific replacements/corrections for pronounciation (this should be at a higher level really) + +Const SSFMCreateForWrite = 3 + +Const SPSF_8kHz16BitMono = 6 +Const SPSF_11kHz16BitMono = 10 +Const SPSF_12kHz16BitMono = 14 +Const SPSF_16kHz16BitMono = 18 +Const SPSF_22kHz16BitMono = 22 +Const SPSF_24kHz16BitMono = 26 +Const SPSF_32kHz16BitMono = 30 +Const SPSF_44kHz16BitMono = 34 +Const SPSF_48kHz16BitMono = 38 + +Dim oSpVoice, oSpFS, nAudioFormat, sText, sOutputFile + +nAudioFormat = SPSF_22kHz16BitMono 'Audio format to use, recommended settings: +'- for AT&T natural voices, use SPSF_32kHz16BitMono +'- for MS voices, use SPSF_22kHz16BitMono + +Set oSpVoice = CreateObject("SAPI.SpVoice") +If Err.Number <> 0 Then + WScript.Echo "Error - could not get SpVoice object. " & _ + "SAPI 5 not installed?" + Err.Clear + WScript.Quit 1 +End If + +While 1 > 0 + sText = WScript.StdIn.ReadLine + sOutputFile = WScript.StdIn.ReadLine + If sOutputFile = "" Then + Set oSpFS = Nothing + Set oSpVoice = Nothing + Set oArgs = Nothing + WScript.Quit 0 + End If + ' WScript.Echo "Saying " + sText + " in " + sOutputFile + Set oSpFS = CreateObject("SAPI.SpFileStream") + oSpFS.Format.Type = nAudioFormat + oSpFS.Open sOutputFile, SSFMCreateForWrite, False + Set oSpVoice.AudioOutputStream = oSpFS + oSpVoice.Speak sText + oSpFS.Close +Wend diff --git a/tools/voice.pl b/tools/voice.pl new file mode 100755 index 0000000000..1635b701f1 --- /dev/null +++ b/tools/voice.pl @@ -0,0 +1,360 @@ +#!/usr/bin/perl -s +# __________ __ ___. +# Open \______ \ ____ ____ | | _\_ |__ _______ ___ +# Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / +# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < +# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ +# \/ \/ \/ \/ \/ +# $Id: +# +# Copyright (C) 2007 Jonas Häggqvist +# +# All files in this archive are subject to the GNU General Public License. +# See the file COPYING in the source tree root for full license agreement. +# +# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY +# KIND, either express or implied. + +use strict; +use warnings; +use File::Basename; +use File::Copy; +use Switch; +use vars qw($V $C $t $l $e $E $s $S $i $v); +use IPC::Open3; +use Digest::MD5 qw(md5_hex); + +sub printusage { + print < + Specify which target you want to build voicefile for. Must include + any features that target supports. + + -i= + Numeric target id. Needed for voice building. + + -l= + Specify which language you want to build. Without .lang extension. + + -e= + Which encoder to use for voice strings + + -E= + Which encoder options to use when compressing voice strings. Enclose + in double quotes if the options include spaces. + + -s= + Which TTS engine to use. + + -S= + Options to pass to the TTS engine. Enclose in double quotes if the + options include spaces. + + -v + Be verbose +USAGE +; +} + +# Initialize TTS engine. May return an object or value which will be passed +# to voicestring and shutdown_tts +sub init_tts { + our $verbose; + my ($tts_engine, $tts_engine_opts, $language) = @_; + my $ret = undef; + switch($tts_engine) { + case "festival" { + print("> festival $tts_engine_opts --server\n") if $verbose; + my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1"); + $ret = *FESTIVAL_SERVER; + $ret = $pid; + $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; + $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); }; + } + case "sapi5" { + my $toolsdir = dirname($0); + my $path = `cygpath $toolsdir -a -w`; + chomp($path); + $path = $path . "\\sapi5_voice_new.vbs $language $tts_engine_opts"; + $path =~ s/\\/\\\\/g; + print("> cscript /B $path\n") if $verbose; + my $pid = open(F, "| cscript /B $path"); + $ret = *F; + $SIG{INT} = sub { print($ret "\r\n\r\n"); panic_cleanup(); }; + $SIG{KILL} = sub { print($ret "\r\n\r\n"); panic_cleanup(); }; + } + } + return $ret; +} + +# Shutdown TTS engine if necessary. +sub shutdown_tts { + my ($tts_engine, $tts_object) = @_; + switch($tts_engine) { + case "festival" { + # Send SIGTERM to festival server + kill TERM => $tts_object; + } + case "sapi5" { + print($tts_object "\r\n\r\n"); + close($tts_object); + } + } +} + +# Apply corrections to a voice-string to make it sound better +sub correct_string { + our $verbose; + my ($string, $language, $tts_engine) = @_; + my $orig = $string; + switch($language) { + # General for all engines and languages (perhaps - just an example) + $string =~ s/USB/U S B/; + + case ("deutsch") { + switch($tts_engine) { + $string =~ s/alphabet/alfabet/; + $string =~ s/alkaline/alkalein/; + $string =~ s/ampere/amper/; + $string =~ s/byte(s?)\b/beit$1/; + $string =~ s/\bdezibel\b/de-zibell/; + $string =~ s/energie\b/ener-gie/; + $string =~ s/\bflash\b/fläsh/g; + $string =~ s/\bfirmware(s?)\b/firmwer$1/; + $string =~ s/\bid3 tag\b/id3 täg/g; # can't just use "tag" here + $string =~ s/\bloudness\b/laudness/; + $string =~ s/\bnumerisch\b/numehrisch/; + $string =~ s/\brücklauf\b/rück-lauf/; + $string =~ s/\bsuchlauf\b/such-lauf/; + } + } + } + if ($orig ne $string) { + printf("%s -> %s\n", $orig, $string) if $verbose; + } + return $string; +} + +# Produce a wav file of the text given +sub voicestring { + our $verbose; + my ($string, $output, $tts_engine, $tts_engine_opts, $tts_object) = @_; + my $cmd; + printf("Generate \"%s\" with %s in file %s\n", $string, $tts_engine, $output) if $verbose; + switch($tts_engine) { + case "festival" { + # festival_client lies to us, so we have to do awful soul-eating + # work with IPC::open3() + $cmd = "festival_client --server localhost --otype riff --ttw --output \"$output\""; + print("> $cmd\n") if $verbose; + # Open command, and filehandles for STDIN, STDOUT, STDERR + my $pid = open3(*CMD_IN, *CMD_OUT, *CMD_ERR, $cmd); + # Put the string to speak into STDIN and close it + print(CMD_IN $string); + close(CMD_IN); + # Read all output from festival_client (because it LIES TO US) + while () { + } + close(CMD_OUT); + close(CMD_ERR); + } + case "flite" { + $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\""; + print("> $cmd\n") if $verbose; + `$cmd`; + } + case "espeak" { + # xxx: $tts_engine_opts isn't used + $cmd = "espeak $tts_engine_opts -w $output"; + print("> $cmd\n") if $verbose; + open(ESPEAK, "| $cmd"); + print ESPEAK $string . "\n"; + close(ESPEAK); + } + case "sapi5" { + print($tts_object sprintf("%s\r\n%s\r\n", $string, $output)); + } + } +} + +# Encode a wav file into the given destination file +sub encodewav { + our $verbose; + my ($input, $output, $encoder, $encoder_opts) = @_; + printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose; + switch ($encoder) { + case 'lame' { + my $cmd = "lame $encoder_opts \"$input\" \"$output\""; + print("> $cmd\n") if $verbose; + `lame $encoder_opts "$input" "$output"`; + `$cmd`; + } + case 'vorbis' { + `oggenc $encoder_opts "$input" -o "$output"`; + } + case 'speexenc' { + `speexenc $encoder_opts "$input" "$output"`; + } + } +} + +sub wavtrim { + our $verbose; + my ($file) = @_; + my $cmd = dirname($0) . "/wavtrim \"$file\""; + print("> $cmd\n") if $verbose; + `$cmd`; +} + +# Run genlang and create voice clips for each string +sub generateclips { + our $verbose; + my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_; + my $genlang = dirname($0) . '/genlang'; + my $english = dirname($0) . '/../apps/lang/english.lang'; + my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang'; + my $id = ''; + my $voice = ''; + my $cmd = "$genlang -o -t=$target -e=$english $langfile 2>/dev/null"; + my $pool_file; + open(VOICEFONTIDS, "> voicefontids"); + my $i = 0; + + my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language); + print("Generating voice clips"); + print("\n") if $verbose; + for (`$cmd`) { + my $line = $_; + print(VOICEFONTIDS $line); + if ($line =~ /^id: (.*)$/) { + $id = $1; + } + elsif ($line =~ /^voice: "(.*)"$/) { + $voice = $1; + if ($id !~ /^NOT_USED_.*$/ && $voice ne "") { + my $wav = $id . '.wav'; + my $mp3 = $id . '.mp3'; + + # Print some progress information + if (++$i % 10 == 0 and !$verbose) { + print("."); + } + + # Apply corrections to the string + $voice = correct_string($voice); + + # If we have a pool of snippes, see if the string exists there first + if (defined($ENV{'POOL'})) { + $pool_file = sprintf("%s/%s-%s-%s.mp3", $ENV{'POOL'}, md5_hex($voice), $language, $tts_engine); + if (-f $pool_file) { + printf("Re-using %s (%s) from pool\n", $id, $voice) if $verbose; + copy($pool_file, $mp3); + } + } + + # Don't generate MP3 if it already exists (probably from the POOL) + if (! -f $mp3) { + if ($id eq "VOICE_PAUSE") { + print("Use distributed $wav\n") if $verbose; + copy(dirname($0)."/VOICE_PAUSE.wav", $wav); + } + else { + voicestring($voice, $wav, $tts_engine, $tts_engine_opts, $tts_object); + wavtrim($wav, 500); # 500 seems to be a reasonable default for now + } + + encodewav($wav, $mp3, $encoder, $encoder_opts); + if (defined($ENV{'POOL'})) { + copy($mp3, $pool_file); + } + unlink($wav); + } + $voice = ""; + $id = ""; + } + } + } + print("\n"); + close(VOICEFONTIDS); + shutdown_tts($tts_engine, $tts_object); +} + +# Assemble the voicefile +sub createvoice { + our $verbose; + my ($language, $target_id) = @_; + my $voicefont = dirname($0) . '/voicefont'; + my $outfile = ""; + my $i = 0; + do { + $outfile = sprintf("%s%s.voice", $language, ($i++ == 0 ? '' : '-'.$i)); + } while (-f $outfile); + printf("Saving voice file to %s\n", $outfile) if $verbose; + my $cmd = "$voicefont 'voicefontids' $target_id ./ $outfile"; + print("> $cmd\n") if $verbose; + my $output = `$cmd`; + print($output) if $verbose; +} + +sub deletemp3s() { + for (glob('*.mp3')) { + unlink($_); + } + for (glob('*.wav')) { + unlink($_); + } +} + +sub panic_cleanup { + deletemp3s(); + die "moo"; +} + +# Check parameters +my $printusage = 0; +unless (defined($V) or defined($C)) { print("Missing either -V or -C\n"); $printusage = 1; } +if (defined($V)) { + unless (defined($t)) { print("Missing -t argument\n"); $printusage = 1; } + unless (defined($l)) { print("Missing -l argument\n"); $printusage = 1; } + unless (defined($i)) { print("Missing -i argument\n"); $printusage = 1; } +} +elsif (defined($C)) { + unless (defined($ARGV[0])) { print "Missing path argument\n"; $printusage = 1; } +} +unless (defined($e)) { print("Missing -e argument\n"); $printusage = 1; } +unless (defined($E)) { print("Missing -E argument\n"); $printusage = 1; } +unless (defined($s)) { print("Missing -s argument\n"); $printusage = 1; } +unless (defined($S)) { print("Missing -S argument\n"); $printusage = 1; } +if ($printusage == 1) { printusage(); exit 1; } + +$SIG{INT} = \&panic_cleanup; +$SIG{KILL} = \&panic_cleanup; + +if (defined($v) or defined($ENV{'V'})) { + our $verbose = 1; +} + + +# Do what we're told +if ($V == 1) { + printf("Generating voice\n Target: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n", + $t, $l, $e, $E, $s, $S); + generateclips($l, $t, $e, $E, $s, $S); + createvoice($l, $i); + deletemp3s(); +} +elsif ($C) { + # xxx: Implement .talk clip generation +} +else { + printusage(); + exit 1; +} -- cgit v1.2.3