diff options
author | Solomon Peachy <pizza@shaftnet.org> | 2024-04-17 08:59:05 -0400 |
---|---|---|
committer | Solomon Peachy <pizza@shaftnet.org> | 2024-04-17 09:02:20 -0400 |
commit | 613a1432d6be69ebf68fb4cf86545757a5f7c602 (patch) | |
tree | d56254bb76b122f8f5863e457c46e9de66b4fd1d /tools | |
parent | 39c9c350ae2f42e83d398c8a446ec99c1cadcfc5 (diff) | |
download | rockbox-613a1432d6be69ebf68fb4cf86545757a5f7c602.tar.gz rockbox-613a1432d6be69ebf68fb4cf86545757a5f7c602.zip |
voice: Improvements to the talk clip generation
This covers the voiced directory and filenames
* Don't regenerate a talk clip if one is present (?)
* Format awareness; ie if the TTS engine generates an mp3 file,
convert it to a wav file so we can encode it properly
* Use a global variable for the wavtrim threshold
Change-Id: I9f441b573704bdf7675794fd0e1984446308463b
Diffstat (limited to 'tools')
-rwxr-xr-x | tools/voice.pl | 34 |
1 files changed, 24 insertions, 10 deletions
diff --git a/tools/voice.pl b/tools/voice.pl index 0718e517ff..9398b58b47 100755 --- a/tools/voice.pl +++ b/tools/voice.pl | |||
@@ -124,6 +124,8 @@ my %espeak_lang_map = ( | |||
124 | 'turkce' => 'tr', | 124 | 'turkce' => 'tr', |
125 | ); | 125 | ); |
126 | 126 | ||
127 | my $trim_thresh = 500; # Trim silence if over this, in ms | ||
128 | |||
127 | # Initialize TTS engine. May return an object or value which will be passed | 129 | # Initialize TTS engine. May return an object or value which will be passed |
128 | # to voicestring and shutdown_tts | 130 | # to voicestring and shutdown_tts |
129 | sub init_tts { | 131 | sub init_tts { |
@@ -433,8 +435,7 @@ sub generateclips { | |||
433 | } else { | 435 | } else { |
434 | voicestring($voice, $wav, $tts_engine_opts, $tts_object); | 436 | voicestring($voice, $wav, $tts_engine_opts, $tts_object); |
435 | if ($format eq "wav") { | 437 | if ($format eq "wav") { |
436 | wavtrim($wav, 500, $tts_object); | 438 | wavtrim($wav, $trim_thresh, $tts_object); |
437 | # 500 seems to be a reasonable default for now | ||
438 | } | 439 | } |
439 | } | 440 | } |
440 | # Convert from mp3 to wav so we can use rbspeex | 441 | # Convert from mp3 to wav so we can use rbspeex |
@@ -510,6 +511,8 @@ sub gentalkclips { | |||
510 | my $d = new DirHandle $dir; | 511 | my $d = new DirHandle $dir; |
511 | while (my $file = $d->read) { | 512 | while (my $file = $d->read) { |
512 | my ($voice, $wav, $enc); | 513 | my ($voice, $wav, $enc); |
514 | my $format = $tts_object->{'format'}; | ||
515 | |||
513 | # Print some progress information | 516 | # Print some progress information |
514 | if (++$i % 10 == 0 and !$verbose) { | 517 | if (++$i % 10 == 0 and !$verbose) { |
515 | print("."); | 518 | print("."); |
@@ -527,8 +530,8 @@ sub gentalkclips { | |||
527 | } | 530 | } |
528 | # Element is a dir | 531 | # Element is a dir |
529 | if ( -d $path) { | 532 | if ( -d $path) { |
533 | $enc = sprintf("%s/_dirname.talk", $path); | ||
530 | gentalkclips($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i); | 534 | gentalkclips($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i); |
531 | $enc = sprintf("%s/_dirname.talk", $path); | ||
532 | } | 535 | } |
533 | # Element is a file | 536 | # Element is a file |
534 | else { | 537 | else { |
@@ -537,13 +540,24 @@ sub gentalkclips { | |||
537 | } | 540 | } |
538 | 541 | ||
539 | printf("Talkclip %s: %s", $enc, $voice) if $verbose; | 542 | printf("Talkclip %s: %s", $enc, $voice) if $verbose; |
540 | 543 | # Don't generate encoded file if it already exists | |
541 | voicestring($voice, $wav, $tts_engine_opts, $tts_object); | 544 | next if (-f $enc); |
542 | wavtrim($wav, 500, $tts_object); | 545 | |
543 | # 500 seems to be a reasonable default for now | 546 | voicestring($voice, $wav, $tts_engine_opts, $tts_object); |
544 | encodewav($wav, $enc, $encoder, $encoder_opts, $tts_object); | 547 | wavtrim($wav, $trim_thresh, $tts_object); |
545 | synchronize($tts_object); | 548 | |
546 | unlink($wav); | 549 | if ($format eq "mp3") { |
550 | system("ffmpeg -loglevel 0 -i $wav $voice$wav"); | ||
551 | rename("$voice$wav","$wav"); | ||
552 | $format = "wav"; | ||
553 | } | ||
554 | if ($format eq "wav") { | ||
555 | encodewav($wav, $enc, $encoder, $encoder_opts, $tts_object); | ||
556 | } else { | ||
557 | copy($wav, $enc); | ||
558 | } | ||
559 | synchronize($tts_object); | ||
560 | unlink($wav); | ||
547 | } | 561 | } |
548 | } | 562 | } |
549 | 563 | ||