From 2c3399537cfb9d481a1e31a4b625d2abb97664e2 Mon Sep 17 00:00:00 2001 From: Solomon Peachy Date: Wed, 8 Jul 2020 19:05:09 -0400 Subject: voice: Add support for Google Translate's speech synthesizer Uses the 'gtts-cli' command line client. Supports a wide variety of languages, including all "Complete" and "Good" Rockbox translations. Additional changes: * voice synth script can accept pre-encoded mp3 files * Move language->synth options mapping into the voice script * Additional cleanups Change-Id: I9523e2bca87cbcee2d8c4111f9892e8e458c7419 --- tools/configure | 53 ++++++++++++++++----------------- tools/voice.pl | 91 ++++++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 93 insertions(+), 51 deletions(-) (limited to 'tools') diff --git a/tools/configure b/tools/configure index 585eadd357..aa8b190250 100755 --- a/tools/configure +++ b/tools/configure @@ -1111,23 +1111,7 @@ voiceconfig () { fi if [ -n "`findtool festival`" ]; then FESTIVAL="(F)estival " - case "$thislang" in - "italiano") - FESTIVAL_OPTS="--language italian" - ;; - "espanol") - FESTIVAL_OPTS="--language spanish" - ;; - "finnish") - FESTIVAL_OPTS="--language finnish" - ;; - "czech") - FESTIVAL_OPTS="--language czech" - ;; - *) - FESTIVAL_OPTS="" - ;; - esac + FESTIVAL_OPTS="" DEFAULT_TTS="festival" DEFAULT_TTS_OPTS=$FESTIVAL_OPTS DEFAULT_NOISEFLOOR="500" @@ -1149,14 +1133,6 @@ voiceconfig () { DEFAULT_NOISEFLOOR="500" DEFAULT_CHOICE="w" fi - if [ -n "`findtool rbspeak`" ]; then - RBSPEAK="(O)ther " - RBSPEAK_OPTS="" - DEFAULT_TTS="rbspeak" - DEFAULT_TTS_OPTS=$RBSPEAK_OPTS - DEFAULT_NOISEFLOOR="500" - DEFAULT_CHOICE="O" - fi # Allow SAPI if Windows is in use if [ -n "`findtool winver`" ]; then SAPI="(S)API " @@ -1164,10 +1140,26 @@ voiceconfig () { DEFAULT_TTS="sapi" DEFAULT_TTS_OPTS=$SAPI_OPTS DEFAULT_NOISEFLOOR="500" - DEFAULT_CHOICE="s" + DEFAULT_CHOICE="S" + fi + if [ -n "`findtool gtts-cli`" ]; then + GTTS="(g)tts " + GTTS_OPTS="" + DEFAULT_TTS="gtts" + DEFAULT_TTS_OPTS=$GTTS_OPTS + DEFAULT_NOISEFLOOR="500" + DEFAULT_CHOICE="g" + fi + if [ -n "`findtool rbspeak`" ]; then + RBSPEAK="(O)ther " + RBSPEAK_OPTS="" + DEFAULT_TTS="rbspeak" + DEFAULT_TTS_OPTS=$RBSPEAK_OPTS + DEFAULT_NOISEFLOOR="500" + DEFAULT_CHOICE="O" fi - if [ "$FESTIVAL" = "$FLITE" ] && [ "$FLITE" = "$ESPEAK" ] && [ "$ESPEAK" = "$SAPI" ] && [ "$SAPI" = "$MIMIC"] && [ "$MIMIC" = "$SWIFT" ] && [ "$SWIFT" = "$RBSPEAK" ]; then + if [ "$FESTIVAL" = "$FLITE" ] && [ "$FLITE" = "$ESPEAK" ] && [ "$ESPEAK" = "$SAPI" ] && [ "$SAPI" = "$MIMIC"] && [ "$MIMIC" = "$SWIFT" ] && [ "$SWIFT" = "$RBSPEAK" ] && [ "$RBSPEAK" = "$GTTS" ] ; then echo "You need Festival, eSpeak, Mimic, Flite, or rbspeak in your path, or SAPI available to build voice files" exit 3 fi @@ -1175,7 +1167,7 @@ voiceconfig () { if [ "$ARG_TTS" ]; then option=$ARG_TTS else - echo "TTS engine to use: ${FLITE}${FESTIVAL}${ESPEAK}${MIMIC}${SAPI}${SWIFT}${RBSPEAK}(${DEFAULT_CHOICE})?" + echo "TTS engine to use: ${FLITE}${FESTIVAL}${ESPEAK}${MIMIC}${SAPI}${SWIFT}${GTTS}${RBSPEAK}(${DEFAULT_CHOICE})?" option=`input` if [ -z "$option" ]; then option=${DEFAULT_CHOICE}; fi advopts="$advopts --tts=$option" @@ -1211,6 +1203,11 @@ voiceconfig () { NOISEFLOOR="500" TTS_OPTS=$SWIFT_OPTS ;; + [Gg) + TTS_ENGINE="gtts" + NOISEFLOOR="500" + TTS_OPTS=$GTTS_OPTS + ;; [Oo]) TTS_ENGINE="rbspeak" NOISEFLOOR="500" diff --git a/tools/voice.pl b/tools/voice.pl index 8198501777..56195d9e12 100755 --- a/tools/voice.pl +++ b/tools/voice.pl @@ -5,7 +5,7 @@ # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ # \/ \/ \/ \/ \/ -# $Id$ +# $Id$ # # Copyright (C) 2007 Jonas Häggqvist # @@ -33,46 +33,73 @@ sub printusage { Usage: voice.pl [options] [path to dir] -V Create voice file. You must also specify -t and -l. - + -C Create .talk clips. -t= Specify which target you want to build voicefile for. Must include any features that target supports. - + -i= Numeric target id. Needed for voice building. - + -l= Specify which language you want to build. Without .lang extension. - + -e= Which encoder to use for voice strings -E= Which encoder options to use when compressing voice strings. Enclose in double quotes if the options include spaces. - + -s= Which TTS engine to use. - + -S= Options to pass to the TTS engine. Enclose in double quotes if the options include spaces. - + -v Be verbose USAGE ; } +my %festival_lang_map = { + 'english' => 'english', + 'english-us' => 'english', + 'espanol' => 'spanish', + #'finnish' => 'finnish' + #'italiano' => 'italian', + #'czech' => 'czech', + #'welsh' => 'welsh' +}; + +my %gtts_lang_map = { + 'english' => 'en-gb', # Always first, it's the golden master + 'deutsch' => 'de', + 'english-us' => 'en-us', + 'francais' => 'fr-fr', + 'greek' => 'gr', + 'italiano' => 'it', + 'norsk' => 'no', + 'polski' => 'pl', + 'russian' => 'ru', + 'slovak' => 'sk', + 'srpski' => 'sr', +}; + # Initialize TTS engine. May return an object or value which will be passed # to voicestring and shutdown_tts sub init_tts { our $verbose; my ($tts_engine, $tts_engine_opts, $language) = @_; my %ret = ("name" => $tts_engine); + $ret{"format"} = 'wav'; + $ret{"ttsoptions"} = ""; + # Don't use given/when here - it's not compatible with old perl versions if ($tts_engine eq 'festival') { print("> festival $tts_engine_opts --server\n") if $verbose; @@ -81,8 +108,10 @@ sub init_tts { $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); }; $ret{"pid"} = $pid; - } - elsif ($tts_engine eq 'sapi') { + if (defined($festival_lang_map{$language})) { + $ret{"ttsoptions"} = "-l $festival_lang_map{$language} "; + } + } elsif ($tts_engine eq 'sapi') { my $toolsdir = dirname($0); my $path = `cygpath $toolsdir -a -w`; chomp($path); @@ -102,6 +131,11 @@ sub init_tts { "stdin" => *CMD_IN, "stdout" => *CMD_OUT, "vendor" => $vendor); + } elsif ($tts_engine eq 'gtts') { + $ret{"format"} = 'mp3'; + if (defined($gtts_lang_map{$language})) { + $ret{"ttsoptions"} = "-l $gtts_lang_map{$language} "; + } } return \%ret; } @@ -143,6 +177,9 @@ sub voicestring { my ($string, $output, $tts_engine_opts, $tts_object) = @_; my $cmd; my $name = $$tts_object{'name'}; + + $tts_engine_opts .= $$tts_object{"ttsoptions"}; + printf("Generate \"%s\" with %s in file %s\n", $string, $name, $output) if $verbose; if ($name eq 'festival') { # festival_client lies to us, so we have to do awful soul-eating @@ -167,7 +204,7 @@ sub voicestring { elsif ($name eq 'flite') { $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\""; print("> $cmd\n") if $verbose; - `$cmd`; + system($cmd); } elsif ($name eq 'espeak') { $cmd = "espeak $tts_engine_opts -w \"$output\""; @@ -193,11 +230,14 @@ sub voicestring { close(RBSPEAK); } elsif ($name eq 'mimic') { - $cmd = "mimic $tts_engine_opts -o $output"; - print("> $cmd\n") if $verbose; - open (MIMIC, "| $cmd"); - print MIMIC $string . "\n"; - close(MIMIC); + $cmd = "mimic $tts_engine_opts -o $output -t \"$string\" "; + print("> $cmd\n") if $verbose; + system($cmd); + } + elsif ($name eq 'gtts') { + $cmd = "gtts-cli $tts_engine_opts -o $output \"$string\""; + print("> $cmd\n") if $verbose; + system($cmd); } } @@ -326,17 +366,22 @@ sub generateclips { if ($id eq "VOICE_PAUSE") { print("Use distributed $wav\n") if $verbose; copy(dirname($0)."/VOICE_PAUSE.wav", $wav); + } else { + voicestring($voice, $wav, $tts_engine_opts, $tts_object); + if ($tts_object->{'format'} eq "wav") { + wavtrim($wav, 500, $tts_object); + # 500 seems to be a reasonable default for now + } } - else { - voicestring($voice, $wav, $tts_engine_opts, $tts_object); - wavtrim($wav, 500, $tts_object); - # 500 seems to be a reasonable default for now - } + if ($tts_object->{'format'} eq "wav" || $id eq "VOICE_PAUSE") { + encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object); + } else { + copy($wav, $mp3); + } - encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object); synchronize($tts_object); if (defined($ENV{'POOL'})) { - copy($mp3, $pool_file); + copy($mp3, $pool_file); } unlink($wav); } -- cgit v1.2.3