voice: Improvements to the talk clip generation

This covers the voiced directory and filenames * Don't regenerate a talk clip if one is present (?) * Format awareness; ie if the TTS engine generates an mp3 file, convert it to a wav file so we can encode it properly * Use a global variable for the wavtrim threshold Change-Id: I9f441b573704bdf7675794fd0e1984446308463b
author: Solomon Peachy <pizza@shaftnet.org> 2024-04-17 08:59:05 -0400
committer: Solomon Peachy <pizza@shaftnet.org> 2024-04-17 09:02:20 -0400
commit: 613a1432d6be69ebf68fb4cf86545757a5f7c602 (patch)
tree: d56254bb76b122f8f5863e457c46e9de66b4fd1d
parent: 39c9c350ae2f42e83d398c8a446ec99c1cadcfc5 (diff)
download: rockbox-613a1432d6be69ebf68fb4cf86545757a5f7c602.tar.gz
rockbox-613a1432d6be69ebf68fb4cf86545757a5f7c602.zip
1 files changed, 24 insertions, 10 deletions
diff --git a/tools/voice.pl b/tools/voice.pl
index 0718e517ff..9398b58b47 100755
--- a/tools/voice.pl
+++ b/tools/voice.pl
@@ -124,6 +124,8 @@ my %espeak_lang_map = (
        'turkce' => 'tr',
 );
+my $trim_thresh = 500;   # Trim silence if over this, in ms
 # Initialize TTS engine. May return an object or value which will be passed
 # to voicestring and shutdown_tts
 sub init_tts {
@@ -433,8 +435,7 @@ sub generateclips {
                    } else {
                        voicestring($voice, $wav, $tts_engine_opts, $tts_object);
                        if ($format eq "wav") {
-                            wavtrim($wav, 500, $tts_object);
+                            wavtrim($wav, $trim_thresh, $tts_object);
-                            # 500 seems to be a reasonable default for now
                        }
                    }
                    # Convert from mp3 to wav so we can use rbspeex
@@ -510,6 +511,8 @@ sub gentalkclips {
    my $d = new DirHandle $dir;
    while (my $file = $d->read) {
        my ($voice, $wav, $enc);
+        my $format = $tts_object->{'format'};
        # Print some progress information
        if (++$i % 10 == 0 and !$verbose) {
            print(".");
@@ -527,8 +530,8 @@ sub gentalkclips {
        }
        # Element is a dir
        if ( -d $path) {
+            $enc = sprintf("%s/_dirname.talk", $path);
            gentalkclips($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i);
-            $enc = sprintf("%s/_dirname.talk", $path);
        }
        # Element is a file
        else {
@@ -537,13 +540,24 @@ sub gentalkclips {
        }
        printf("Talkclip %s: %s", $enc, $voice) if $verbose;
+        # Don't generate encoded file if it already exists
-        voicestring($voice, $wav, $tts_engine_opts, $tts_object);
+        next if (-f $enc);
-        wavtrim($wav, 500, $tts_object);
-        # 500 seems to be a reasonable default for now
+        voicestring($voice, $wav, $tts_engine_opts, $tts_object);
-        encodewav($wav, $enc, $encoder, $encoder_opts, $tts_object);
+        wavtrim($wav, $trim_thresh, $tts_object);
-        synchronize($tts_object);
-        unlink($wav);
+        if ($format eq "mp3") {
+            system("ffmpeg -loglevel 0 -i $wav $voice$wav");
+            rename("$voice$wav","$wav");
+            $format = "wav";
+        }
+        if ($format eq "wav") {
+            encodewav($wav, $enc, $encoder, $encoder_opts, $tts_object);
+        } else {
+            copy($wav, $enc);
+        }
+        synchronize($tts_object);
+        unlink($wav);
    }
 }
author	Solomon Peachy <pizza@shaftnet.org>	2024-04-17 08:59:05 -0400
committer	Solomon Peachy <pizza@shaftnet.org>	2024-04-17 09:02:20 -0400
commit	613a1432d6be69ebf68fb4cf86545757a5f7c602 (patch)
tree	d56254bb76b122f8f5863e457c46e9de66b4fd1d
parent	39c9c350ae2f42e83d398c8a446ec99c1cadcfc5 (diff)
download	rockbox-613a1432d6be69ebf68fb4cf86545757a5f7c602.tar.gz rockbox-613a1432d6be69ebf68fb4cf86545757a5f7c602.zip

diff --git a/tools/voice.pl b/tools/voice.pl index 0718e517ff..9398b58b47 100755 --- a/tools/voice.pl +++ b/tools/voice.pl
@@ -124,6 +124,8 @@ my %espeak_lang_map = (
124	'turkce' => 'tr',	124	'turkce' => 'tr',
125	);	125	);
126		126
		127	my $trim_thresh = 500; # Trim silence if over this, in ms
		128
127	# Initialize TTS engine. May return an object or value which will be passed	129	# Initialize TTS engine. May return an object or value which will be passed
128	# to voicestring and shutdown_tts	130	# to voicestring and shutdown_tts
129	sub init_tts {	131	sub init_tts {
@@ -433,8 +435,7 @@ sub generateclips {
433	} else {	435	} else {
434	voicestring($voice, $wav, $tts_engine_opts, $tts_object);	436	voicestring($voice, $wav, $tts_engine_opts, $tts_object);
435	if ($format eq "wav") {	437	if ($format eq "wav") {
436	wavtrim($wav, 500, $tts_object);	438	wavtrim($wav, $trim_thresh, $tts_object);
437	# 500 seems to be a reasonable default for now
438	}	439	}
439	}	440	}
440	# Convert from mp3 to wav so we can use rbspeex	441	# Convert from mp3 to wav so we can use rbspeex
@@ -510,6 +511,8 @@ sub gentalkclips {
510	my $d = new DirHandle $dir;	511	my $d = new DirHandle $dir;
511	while (my $file = $d->read) {	512	while (my $file = $d->read) {
512	my ($voice, $wav, $enc);	513	my ($voice, $wav, $enc);
		514	my $format = $tts_object->{'format'};
		515
513	# Print some progress information	516	# Print some progress information
514	if (++$i % 10 == 0 and !$verbose) {	517	if (++$i % 10 == 0 and !$verbose) {
515	print(".");	518	print(".");
@@ -527,8 +530,8 @@ sub gentalkclips {
527	}	530	}
528	# Element is a dir	531	# Element is a dir
529	if ( -d $path) {	532	if ( -d $path) {
		533	$enc = sprintf("%s/_dirname.talk", $path);
530	gentalkclips($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i);	534	gentalkclips($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i);
531	$enc = sprintf("%s/_dirname.talk", $path);
532	}	535	}
533	# Element is a file	536	# Element is a file
534	else {	537	else {
@@ -537,13 +540,24 @@ sub gentalkclips {
537	}	540	}
538		541
539	printf("Talkclip %s: %s", $enc, $voice) if $verbose;	542	printf("Talkclip %s: %s", $enc, $voice) if $verbose;
540		543	# Don't generate encoded file if it already exists
541	voicestring($voice, $wav, $tts_engine_opts, $tts_object);	544	next if (-f $enc);
542	wavtrim($wav, 500, $tts_object);	545
543	# 500 seems to be a reasonable default for now	546	voicestring($voice, $wav, $tts_engine_opts, $tts_object);
544	encodewav($wav, $enc, $encoder, $encoder_opts, $tts_object);	547	wavtrim($wav, $trim_thresh, $tts_object);
545	synchronize($tts_object);	548
546	unlink($wav);	549	if ($format eq "mp3") {
		550	system("ffmpeg -loglevel 0 -i $wav $voice$wav");
		551	rename("$voice$wav","$wav");
		552	$format = "wav";
		553	}
		554	if ($format eq "wav") {
		555	encodewav($wav, $enc, $encoder, $encoder_opts, $tts_object);
		556	} else {
		557	copy($wav, $enc);
		558	}
		559	synchronize($tts_object);
		560	unlink($wav);
547	}	561	}
548	}	562	}
549		563