From c8dd31aab79a5a470c95b6253f147e919f8422bc Mon Sep 17 00:00:00 2001 From: Solomon Peachy Date: Tue, 16 Apr 2024 18:35:09 -0400 Subject: voice: Fix the 'gtts' voice generation backend. * Language and dialect need to be specified separately * Convert the mp3 files generated by gtts into wav into rbspeex (Uses ffmpeg currently) Change-Id: I6d7b9494e70a61537519221522202ea28469cc70 --- tools/builds.pm | 4 ++-- tools/voice.pl | 55 +++++++++++++++++++++++++++++++------------------------ 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/tools/builds.pm b/tools/builds.pm index e11b527a1f..0c83548cf2 100644 --- a/tools/builds.pm +++ b/tools/builds.pm @@ -536,7 +536,7 @@ sub allbuilds { 'engines' => { 'festival' => '--language english', 'espeak' => '-ven-gb -k 5', - 'gtts' => '-l en-gb', + 'gtts' => '-l en -t co.uk', }, 'enabled' => 1, }, @@ -571,7 +571,7 @@ sub allbuilds { 'engines' => { 'festival' => '--language english', 'espeak' => '-ven-us -k 5', - 'gtts' => '-l en-us', + 'gtts' => '-l en -t us', }, 'enabled' => 1, }, diff --git a/tools/voice.pl b/tools/voice.pl index 86a018e096..0718e517ff 100755 --- a/tools/voice.pl +++ b/tools/voice.pl @@ -81,24 +81,24 @@ my %festival_lang_map = ( ); my %gtts_lang_map = ( - 'english' => 'en-gb', # Always first, it's the golden master - 'czech' => 'cs', # not supported - 'dansk' => 'da', - 'deutsch' => 'de', - 'english-us' => 'en-us', - 'espanol' => 'es-es', - 'francais' => 'fr-fr', - 'greek' => 'el', - 'magyar' => 'hu', - 'italiano' => 'it', - 'nederlands' => 'nl', - 'norsk' => 'no', - 'polski' => 'pl', - 'russian' => 'ru', - 'slovak' => 'sk', - 'srpski' => 'sr', - 'svenska' => 'sv', - 'turkce' => 'tr', + 'english' => '-l en -t co.uk', # Always first, it's the golden master + 'czech' => '-l cs', # not supported + 'dansk' => '-l da', + 'deutsch' => '-l de', + 'english-us' => '-l en -t us', + 'espanol' => '-l es', + 'francais' => '-l fr', + 'greek' => '-l el', + 'magyar' => '-l hu', + 'italiano' => '-l it', + 'nederlands' => '-l nl', + 'norsk' => '-l no', + 'polski' => '-l pl', + 'russian' => '-l ru', + 'slovak' => '-l sk', + 'srpski' => '-l sr', + 'svenska' => '-l sv', + 'turkce' => '-l tr', ); my %espeak_lang_map = ( @@ -167,7 +167,7 @@ sub init_tts { } elsif ($tts_engine eq 'gtts') { $ret{"format"} = 'mp3'; if (defined($gtts_lang_map{$language}) && $tts_engine_opts !~ /-l/) { - $ret{"ttsoptions"} = "-l $gtts_lang_map{$language} "; + $ret{"ttsoptions"} = " $gtts_lang_map{$language} "; } } elsif ($tts_engine eq 'espeak' || $tts_engine eq 'espeak-ng') { if (defined($espeak_lang_map{$language}) && $tts_engine_opts !~ /-v/) { @@ -403,7 +403,8 @@ sub generateclips { $voice = $1; if ($id !~ /^NOT_USED_.*$/ && $voice ne "") { my $wav = $id . '.wav'; - my $enc = $id . '.mp3'; + my $enc = $id . '.enc'; + my $format = $tts_object->{'format'}; # Print some progress information if (++$i % 10 == 0 and !$verbose) { @@ -415,7 +416,7 @@ sub generateclips { # If we have a pool of snippets, see if the string exists there first if (defined($ENV{'POOL'})) { - $pool_file = sprintf("%s/%s-%s.mp3", $ENV{'POOL'}, + $pool_file = sprintf("%s/%s-%s.enc", $ENV{'POOL'}, md5_hex(Encode::encode_utf8("$voice $tts_engine $tts_engine_opts $encoder_opts")), $language); if (-f $pool_file) { @@ -431,12 +432,18 @@ sub generateclips { copy(dirname($0)."/VOICE_PAUSE.wav", $wav); } else { voicestring($voice, $wav, $tts_engine_opts, $tts_object); - if ($tts_object->{'format'} eq "wav") { + if ($format eq "wav") { wavtrim($wav, 500, $tts_object); # 500 seems to be a reasonable default for now } } - if ($tts_object->{'format'} eq "wav" || $id eq "VOICE_PAUSE") { + # Convert from mp3 to wav so we can use rbspeex + if ($format eq "mp3") { + system("ffmpeg -loglevel 0 -i $wav $id$wav"); + rename("$id$wav","$wav"); + $format = "wav"; + } + if ($format eq "wav" || $id eq "VOICE_PAUSE") { encodewav($wav, $enc, $encoder, $encoder_opts, $tts_object); } else { copy($wav, $enc); @@ -483,7 +490,7 @@ sub createvoice { } sub deleteencs() { - for (glob('*.mp3')) { + for (glob('*.enc')) { unlink($_); } for (glob('*.wav')) { -- cgit v1.2.3