diff options
author | Solomon Peachy <pizza@shaftnet.org> | 2024-04-16 18:35:09 -0400 |
---|---|---|
committer | Solomon Peachy <pizza@shaftnet.org> | 2024-04-16 18:37:43 -0400 |
commit | c8dd31aab79a5a470c95b6253f147e919f8422bc (patch) | |
tree | 0f554f1f86743d8eacb976e12682816eca3c89e5 /tools/voice.pl | |
parent | 9af812c320ca7273cf4a8748d5d945a435e23d90 (diff) | |
download | rockbox-c8dd31aab79a5a470c95b6253f147e919f8422bc.tar.gz rockbox-c8dd31aab79a5a470c95b6253f147e919f8422bc.zip |
voice: Fix the 'gtts' voice generation backend.
* Language and dialect need to be specified separately
* Convert the mp3 files generated by gtts into wav into rbspeex
(Uses ffmpeg currently)
Change-Id: I6d7b9494e70a61537519221522202ea28469cc70
Diffstat (limited to 'tools/voice.pl')
-rwxr-xr-x | tools/voice.pl | 55 |
1 files changed, 31 insertions, 24 deletions
diff --git a/tools/voice.pl b/tools/voice.pl index 86a018e096..0718e517ff 100755 --- a/tools/voice.pl +++ b/tools/voice.pl | |||
@@ -81,24 +81,24 @@ my %festival_lang_map = ( | |||
81 | ); | 81 | ); |
82 | 82 | ||
83 | my %gtts_lang_map = ( | 83 | my %gtts_lang_map = ( |
84 | 'english' => 'en-gb', # Always first, it's the golden master | 84 | 'english' => '-l en -t co.uk', # Always first, it's the golden master |
85 | 'czech' => 'cs', # not supported | 85 | 'czech' => '-l cs', # not supported |
86 | 'dansk' => 'da', | 86 | 'dansk' => '-l da', |
87 | 'deutsch' => 'de', | 87 | 'deutsch' => '-l de', |
88 | 'english-us' => 'en-us', | 88 | 'english-us' => '-l en -t us', |
89 | 'espanol' => 'es-es', | 89 | 'espanol' => '-l es', |
90 | 'francais' => 'fr-fr', | 90 | 'francais' => '-l fr', |
91 | 'greek' => 'el', | 91 | 'greek' => '-l el', |
92 | 'magyar' => 'hu', | 92 | 'magyar' => '-l hu', |
93 | 'italiano' => 'it', | 93 | 'italiano' => '-l it', |
94 | 'nederlands' => 'nl', | 94 | 'nederlands' => '-l nl', |
95 | 'norsk' => 'no', | 95 | 'norsk' => '-l no', |
96 | 'polski' => 'pl', | 96 | 'polski' => '-l pl', |
97 | 'russian' => 'ru', | 97 | 'russian' => '-l ru', |
98 | 'slovak' => 'sk', | 98 | 'slovak' => '-l sk', |
99 | 'srpski' => 'sr', | 99 | 'srpski' => '-l sr', |
100 | 'svenska' => 'sv', | 100 | 'svenska' => '-l sv', |
101 | 'turkce' => 'tr', | 101 | 'turkce' => '-l tr', |
102 | ); | 102 | ); |
103 | 103 | ||
104 | my %espeak_lang_map = ( | 104 | my %espeak_lang_map = ( |
@@ -167,7 +167,7 @@ sub init_tts { | |||
167 | } elsif ($tts_engine eq 'gtts') { | 167 | } elsif ($tts_engine eq 'gtts') { |
168 | $ret{"format"} = 'mp3'; | 168 | $ret{"format"} = 'mp3'; |
169 | if (defined($gtts_lang_map{$language}) && $tts_engine_opts !~ /-l/) { | 169 | if (defined($gtts_lang_map{$language}) && $tts_engine_opts !~ /-l/) { |
170 | $ret{"ttsoptions"} = "-l $gtts_lang_map{$language} "; | 170 | $ret{"ttsoptions"} = " $gtts_lang_map{$language} "; |
171 | } | 171 | } |
172 | } elsif ($tts_engine eq 'espeak' || $tts_engine eq 'espeak-ng') { | 172 | } elsif ($tts_engine eq 'espeak' || $tts_engine eq 'espeak-ng') { |
173 | if (defined($espeak_lang_map{$language}) && $tts_engine_opts !~ /-v/) { | 173 | if (defined($espeak_lang_map{$language}) && $tts_engine_opts !~ /-v/) { |
@@ -403,7 +403,8 @@ sub generateclips { | |||
403 | $voice = $1; | 403 | $voice = $1; |
404 | if ($id !~ /^NOT_USED_.*$/ && $voice ne "") { | 404 | if ($id !~ /^NOT_USED_.*$/ && $voice ne "") { |
405 | my $wav = $id . '.wav'; | 405 | my $wav = $id . '.wav'; |
406 | my $enc = $id . '.mp3'; | 406 | my $enc = $id . '.enc'; |
407 | my $format = $tts_object->{'format'}; | ||
407 | 408 | ||
408 | # Print some progress information | 409 | # Print some progress information |
409 | if (++$i % 10 == 0 and !$verbose) { | 410 | if (++$i % 10 == 0 and !$verbose) { |
@@ -415,7 +416,7 @@ sub generateclips { | |||
415 | 416 | ||
416 | # If we have a pool of snippets, see if the string exists there first | 417 | # If we have a pool of snippets, see if the string exists there first |
417 | if (defined($ENV{'POOL'})) { | 418 | if (defined($ENV{'POOL'})) { |
418 | $pool_file = sprintf("%s/%s-%s.mp3", $ENV{'POOL'}, | 419 | $pool_file = sprintf("%s/%s-%s.enc", $ENV{'POOL'}, |
419 | md5_hex(Encode::encode_utf8("$voice $tts_engine $tts_engine_opts $encoder_opts")), | 420 | md5_hex(Encode::encode_utf8("$voice $tts_engine $tts_engine_opts $encoder_opts")), |
420 | $language); | 421 | $language); |
421 | if (-f $pool_file) { | 422 | if (-f $pool_file) { |
@@ -431,12 +432,18 @@ sub generateclips { | |||
431 | copy(dirname($0)."/VOICE_PAUSE.wav", $wav); | 432 | copy(dirname($0)."/VOICE_PAUSE.wav", $wav); |
432 | } else { | 433 | } else { |
433 | voicestring($voice, $wav, $tts_engine_opts, $tts_object); | 434 | voicestring($voice, $wav, $tts_engine_opts, $tts_object); |
434 | if ($tts_object->{'format'} eq "wav") { | 435 | if ($format eq "wav") { |
435 | wavtrim($wav, 500, $tts_object); | 436 | wavtrim($wav, 500, $tts_object); |
436 | # 500 seems to be a reasonable default for now | 437 | # 500 seems to be a reasonable default for now |
437 | } | 438 | } |
438 | } | 439 | } |
439 | if ($tts_object->{'format'} eq "wav" || $id eq "VOICE_PAUSE") { | 440 | # Convert from mp3 to wav so we can use rbspeex |
441 | if ($format eq "mp3") { | ||
442 | system("ffmpeg -loglevel 0 -i $wav $id$wav"); | ||
443 | rename("$id$wav","$wav"); | ||
444 | $format = "wav"; | ||
445 | } | ||
446 | if ($format eq "wav" || $id eq "VOICE_PAUSE") { | ||
440 | encodewav($wav, $enc, $encoder, $encoder_opts, $tts_object); | 447 | encodewav($wav, $enc, $encoder, $encoder_opts, $tts_object); |
441 | } else { | 448 | } else { |
442 | copy($wav, $enc); | 449 | copy($wav, $enc); |
@@ -483,7 +490,7 @@ sub createvoice { | |||
483 | } | 490 | } |
484 | 491 | ||
485 | sub deleteencs() { | 492 | sub deleteencs() { |
486 | for (glob('*.mp3')) { | 493 | for (glob('*.enc')) { |
487 | unlink($_); | 494 | unlink($_); |
488 | } | 495 | } |
489 | for (glob('*.wav')) { | 496 | for (glob('*.wav')) { |