summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSolomon Peachy <pizza@shaftnet.org>2024-04-16 18:35:09 -0400
committerSolomon Peachy <pizza@shaftnet.org>2024-04-16 18:37:43 -0400
commitc8dd31aab79a5a470c95b6253f147e919f8422bc (patch)
tree0f554f1f86743d8eacb976e12682816eca3c89e5
parent9af812c320ca7273cf4a8748d5d945a435e23d90 (diff)
downloadrockbox-c8dd31aab79a5a470c95b6253f147e919f8422bc.tar.gz
rockbox-c8dd31aab79a5a470c95b6253f147e919f8422bc.zip
voice: Fix the 'gtts' voice generation backend.
* Language and dialect need to be specified separately * Convert the mp3 files generated by gtts into wav into rbspeex (Uses ffmpeg currently) Change-Id: I6d7b9494e70a61537519221522202ea28469cc70
-rw-r--r--tools/builds.pm4
-rwxr-xr-xtools/voice.pl55
2 files changed, 33 insertions, 26 deletions
diff --git a/tools/builds.pm b/tools/builds.pm
index e11b527a1f..0c83548cf2 100644
--- a/tools/builds.pm
+++ b/tools/builds.pm
@@ -536,7 +536,7 @@ sub allbuilds {
536 'engines' => { 536 'engines' => {
537 'festival' => '--language english', 537 'festival' => '--language english',
538 'espeak' => '-ven-gb -k 5', 538 'espeak' => '-ven-gb -k 5',
539 'gtts' => '-l en-gb', 539 'gtts' => '-l en -t co.uk',
540 }, 540 },
541 'enabled' => 1, 541 'enabled' => 1,
542 }, 542 },
@@ -571,7 +571,7 @@ sub allbuilds {
571 'engines' => { 571 'engines' => {
572 'festival' => '--language english', 572 'festival' => '--language english',
573 'espeak' => '-ven-us -k 5', 573 'espeak' => '-ven-us -k 5',
574 'gtts' => '-l en-us', 574 'gtts' => '-l en -t us',
575 }, 575 },
576 'enabled' => 1, 576 'enabled' => 1,
577 }, 577 },
diff --git a/tools/voice.pl b/tools/voice.pl
index 86a018e096..0718e517ff 100755
--- a/tools/voice.pl
+++ b/tools/voice.pl
@@ -81,24 +81,24 @@ my %festival_lang_map = (
81); 81);
82 82
83my %gtts_lang_map = ( 83my %gtts_lang_map = (
84 'english' => 'en-gb', # Always first, it's the golden master 84 'english' => '-l en -t co.uk', # Always first, it's the golden master
85 'czech' => 'cs', # not supported 85 'czech' => '-l cs', # not supported
86 'dansk' => 'da', 86 'dansk' => '-l da',
87 'deutsch' => 'de', 87 'deutsch' => '-l de',
88 'english-us' => 'en-us', 88 'english-us' => '-l en -t us',
89 'espanol' => 'es-es', 89 'espanol' => '-l es',
90 'francais' => 'fr-fr', 90 'francais' => '-l fr',
91 'greek' => 'el', 91 'greek' => '-l el',
92 'magyar' => 'hu', 92 'magyar' => '-l hu',
93 'italiano' => 'it', 93 'italiano' => '-l it',
94 'nederlands' => 'nl', 94 'nederlands' => '-l nl',
95 'norsk' => 'no', 95 'norsk' => '-l no',
96 'polski' => 'pl', 96 'polski' => '-l pl',
97 'russian' => 'ru', 97 'russian' => '-l ru',
98 'slovak' => 'sk', 98 'slovak' => '-l sk',
99 'srpski' => 'sr', 99 'srpski' => '-l sr',
100 'svenska' => 'sv', 100 'svenska' => '-l sv',
101 'turkce' => 'tr', 101 'turkce' => '-l tr',
102); 102);
103 103
104my %espeak_lang_map = ( 104my %espeak_lang_map = (
@@ -167,7 +167,7 @@ sub init_tts {
167 } elsif ($tts_engine eq 'gtts') { 167 } elsif ($tts_engine eq 'gtts') {
168 $ret{"format"} = 'mp3'; 168 $ret{"format"} = 'mp3';
169 if (defined($gtts_lang_map{$language}) && $tts_engine_opts !~ /-l/) { 169 if (defined($gtts_lang_map{$language}) && $tts_engine_opts !~ /-l/) {
170 $ret{"ttsoptions"} = "-l $gtts_lang_map{$language} "; 170 $ret{"ttsoptions"} = " $gtts_lang_map{$language} ";
171 } 171 }
172 } elsif ($tts_engine eq 'espeak' || $tts_engine eq 'espeak-ng') { 172 } elsif ($tts_engine eq 'espeak' || $tts_engine eq 'espeak-ng') {
173 if (defined($espeak_lang_map{$language}) && $tts_engine_opts !~ /-v/) { 173 if (defined($espeak_lang_map{$language}) && $tts_engine_opts !~ /-v/) {
@@ -403,7 +403,8 @@ sub generateclips {
403 $voice = $1; 403 $voice = $1;
404 if ($id !~ /^NOT_USED_.*$/ && $voice ne "") { 404 if ($id !~ /^NOT_USED_.*$/ && $voice ne "") {
405 my $wav = $id . '.wav'; 405 my $wav = $id . '.wav';
406 my $enc = $id . '.mp3'; 406 my $enc = $id . '.enc';
407 my $format = $tts_object->{'format'};
407 408
408 # Print some progress information 409 # Print some progress information
409 if (++$i % 10 == 0 and !$verbose) { 410 if (++$i % 10 == 0 and !$verbose) {
@@ -415,7 +416,7 @@ sub generateclips {
415 416
416 # If we have a pool of snippets, see if the string exists there first 417 # If we have a pool of snippets, see if the string exists there first
417 if (defined($ENV{'POOL'})) { 418 if (defined($ENV{'POOL'})) {
418 $pool_file = sprintf("%s/%s-%s.mp3", $ENV{'POOL'}, 419 $pool_file = sprintf("%s/%s-%s.enc", $ENV{'POOL'},
419 md5_hex(Encode::encode_utf8("$voice $tts_engine $tts_engine_opts $encoder_opts")), 420 md5_hex(Encode::encode_utf8("$voice $tts_engine $tts_engine_opts $encoder_opts")),
420 $language); 421 $language);
421 if (-f $pool_file) { 422 if (-f $pool_file) {
@@ -431,12 +432,18 @@ sub generateclips {
431 copy(dirname($0)."/VOICE_PAUSE.wav", $wav); 432 copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
432 } else { 433 } else {
433 voicestring($voice, $wav, $tts_engine_opts, $tts_object); 434 voicestring($voice, $wav, $tts_engine_opts, $tts_object);
434 if ($tts_object->{'format'} eq "wav") { 435 if ($format eq "wav") {
435 wavtrim($wav, 500, $tts_object); 436 wavtrim($wav, 500, $tts_object);
436 # 500 seems to be a reasonable default for now 437 # 500 seems to be a reasonable default for now
437 } 438 }
438 } 439 }
439 if ($tts_object->{'format'} eq "wav" || $id eq "VOICE_PAUSE") { 440 # Convert from mp3 to wav so we can use rbspeex
441 if ($format eq "mp3") {
442 system("ffmpeg -loglevel 0 -i $wav $id$wav");
443 rename("$id$wav","$wav");
444 $format = "wav";
445 }
446 if ($format eq "wav" || $id eq "VOICE_PAUSE") {
440 encodewav($wav, $enc, $encoder, $encoder_opts, $tts_object); 447 encodewav($wav, $enc, $encoder, $encoder_opts, $tts_object);
441 } else { 448 } else {
442 copy($wav, $enc); 449 copy($wav, $enc);
@@ -483,7 +490,7 @@ sub createvoice {
483} 490}
484 491
485sub deleteencs() { 492sub deleteencs() {
486 for (glob('*.mp3')) { 493 for (glob('*.enc')) {
487 unlink($_); 494 unlink($_);
488 } 495 }
489 for (glob('*.wav')) { 496 for (glob('*.wav')) {