summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorSolomon Peachy <pizza@shaftnet.org>2024-04-17 08:59:05 -0400
committerSolomon Peachy <pizza@shaftnet.org>2024-04-17 09:02:20 -0400
commit613a1432d6be69ebf68fb4cf86545757a5f7c602 (patch)
treed56254bb76b122f8f5863e457c46e9de66b4fd1d /tools
parent39c9c350ae2f42e83d398c8a446ec99c1cadcfc5 (diff)
downloadrockbox-613a1432d6be69ebf68fb4cf86545757a5f7c602.tar.gz
rockbox-613a1432d6be69ebf68fb4cf86545757a5f7c602.zip
voice: Improvements to the talk clip generation
This covers the voiced directory and filenames * Don't regenerate a talk clip if one is present (?) * Format awareness; ie if the TTS engine generates an mp3 file, convert it to a wav file so we can encode it properly * Use a global variable for the wavtrim threshold Change-Id: I9f441b573704bdf7675794fd0e1984446308463b
Diffstat (limited to 'tools')
-rwxr-xr-xtools/voice.pl34
1 files changed, 24 insertions, 10 deletions
diff --git a/tools/voice.pl b/tools/voice.pl
index 0718e517ff..9398b58b47 100755
--- a/tools/voice.pl
+++ b/tools/voice.pl
@@ -124,6 +124,8 @@ my %espeak_lang_map = (
124 'turkce' => 'tr', 124 'turkce' => 'tr',
125); 125);
126 126
127my $trim_thresh = 500; # Trim silence if over this, in ms
128
127# Initialize TTS engine. May return an object or value which will be passed 129# Initialize TTS engine. May return an object or value which will be passed
128# to voicestring and shutdown_tts 130# to voicestring and shutdown_tts
129sub init_tts { 131sub init_tts {
@@ -433,8 +435,7 @@ sub generateclips {
433 } else { 435 } else {
434 voicestring($voice, $wav, $tts_engine_opts, $tts_object); 436 voicestring($voice, $wav, $tts_engine_opts, $tts_object);
435 if ($format eq "wav") { 437 if ($format eq "wav") {
436 wavtrim($wav, 500, $tts_object); 438 wavtrim($wav, $trim_thresh, $tts_object);
437 # 500 seems to be a reasonable default for now
438 } 439 }
439 } 440 }
440 # Convert from mp3 to wav so we can use rbspeex 441 # Convert from mp3 to wav so we can use rbspeex
@@ -510,6 +511,8 @@ sub gentalkclips {
510 my $d = new DirHandle $dir; 511 my $d = new DirHandle $dir;
511 while (my $file = $d->read) { 512 while (my $file = $d->read) {
512 my ($voice, $wav, $enc); 513 my ($voice, $wav, $enc);
514 my $format = $tts_object->{'format'};
515
513 # Print some progress information 516 # Print some progress information
514 if (++$i % 10 == 0 and !$verbose) { 517 if (++$i % 10 == 0 and !$verbose) {
515 print("."); 518 print(".");
@@ -527,8 +530,8 @@ sub gentalkclips {
527 } 530 }
528 # Element is a dir 531 # Element is a dir
529 if ( -d $path) { 532 if ( -d $path) {
533 $enc = sprintf("%s/_dirname.talk", $path);
530 gentalkclips($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i); 534 gentalkclips($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i);
531 $enc = sprintf("%s/_dirname.talk", $path);
532 } 535 }
533 # Element is a file 536 # Element is a file
534 else { 537 else {
@@ -537,13 +540,24 @@ sub gentalkclips {
537 } 540 }
538 541
539 printf("Talkclip %s: %s", $enc, $voice) if $verbose; 542 printf("Talkclip %s: %s", $enc, $voice) if $verbose;
540 543 # Don't generate encoded file if it already exists
541 voicestring($voice, $wav, $tts_engine_opts, $tts_object); 544 next if (-f $enc);
542 wavtrim($wav, 500, $tts_object); 545
543 # 500 seems to be a reasonable default for now 546 voicestring($voice, $wav, $tts_engine_opts, $tts_object);
544 encodewav($wav, $enc, $encoder, $encoder_opts, $tts_object); 547 wavtrim($wav, $trim_thresh, $tts_object);
545 synchronize($tts_object); 548
546 unlink($wav); 549 if ($format eq "mp3") {
550 system("ffmpeg -loglevel 0 -i $wav $voice$wav");
551 rename("$voice$wav","$wav");
552 $format = "wav";
553 }
554 if ($format eq "wav") {
555 encodewav($wav, $enc, $encoder, $encoder_opts, $tts_object);
556 } else {
557 copy($wav, $enc);
558 }
559 synchronize($tts_object);
560 unlink($wav);
547 } 561 }
548} 562}
549 563