summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorSolomon Peachy <pizza@shaftnet.org>2020-07-08 19:05:09 -0400
committerSolomon Peachy <pizza@shaftnet.org>2020-07-08 23:16:57 +0000
commit2c3399537cfb9d481a1e31a4b625d2abb97664e2 (patch)
tree2a612cfd81ee1766e9b2e61801f231911d577137 /tools
parent5e98eba8abc16d494802ed41f37ef7cec54dc8f4 (diff)
downloadrockbox-2c3399537cfb9d481a1e31a4b625d2abb97664e2.tar.gz
rockbox-2c3399537cfb9d481a1e31a4b625d2abb97664e2.zip
voice: Add support for Google Translate's speech synthesizer
Uses the 'gtts-cli' command line client. Supports a wide variety of languages, including all "Complete" and "Good" Rockbox translations. Additional changes: * voice synth script can accept pre-encoded mp3 files * Move language->synth options mapping into the voice script * Additional cleanups Change-Id: I9523e2bca87cbcee2d8c4111f9892e8e458c7419
Diffstat (limited to 'tools')
-rwxr-xr-xtools/configure53
-rwxr-xr-xtools/voice.pl91
2 files changed, 93 insertions, 51 deletions
diff --git a/tools/configure b/tools/configure
index 585eadd357..aa8b190250 100755
--- a/tools/configure
+++ b/tools/configure
@@ -1111,23 +1111,7 @@ voiceconfig () {
1111 fi 1111 fi
1112 if [ -n "`findtool festival`" ]; then 1112 if [ -n "`findtool festival`" ]; then
1113 FESTIVAL="(F)estival " 1113 FESTIVAL="(F)estival "
1114 case "$thislang" in 1114 FESTIVAL_OPTS=""
1115 "italiano")
1116 FESTIVAL_OPTS="--language italian"
1117 ;;
1118 "espanol")
1119 FESTIVAL_OPTS="--language spanish"
1120 ;;
1121 "finnish")
1122 FESTIVAL_OPTS="--language finnish"
1123 ;;
1124 "czech")
1125 FESTIVAL_OPTS="--language czech"
1126 ;;
1127 *)
1128 FESTIVAL_OPTS=""
1129 ;;
1130 esac
1131 DEFAULT_TTS="festival" 1115 DEFAULT_TTS="festival"
1132 DEFAULT_TTS_OPTS=$FESTIVAL_OPTS 1116 DEFAULT_TTS_OPTS=$FESTIVAL_OPTS
1133 DEFAULT_NOISEFLOOR="500" 1117 DEFAULT_NOISEFLOOR="500"
@@ -1149,14 +1133,6 @@ voiceconfig () {
1149 DEFAULT_NOISEFLOOR="500" 1133 DEFAULT_NOISEFLOOR="500"
1150 DEFAULT_CHOICE="w" 1134 DEFAULT_CHOICE="w"
1151 fi 1135 fi
1152 if [ -n "`findtool rbspeak`" ]; then
1153 RBSPEAK="(O)ther "
1154 RBSPEAK_OPTS=""
1155 DEFAULT_TTS="rbspeak"
1156 DEFAULT_TTS_OPTS=$RBSPEAK_OPTS
1157 DEFAULT_NOISEFLOOR="500"
1158 DEFAULT_CHOICE="O"
1159 fi
1160 # Allow SAPI if Windows is in use 1136 # Allow SAPI if Windows is in use
1161 if [ -n "`findtool winver`" ]; then 1137 if [ -n "`findtool winver`" ]; then
1162 SAPI="(S)API " 1138 SAPI="(S)API "
@@ -1164,10 +1140,26 @@ voiceconfig () {
1164 DEFAULT_TTS="sapi" 1140 DEFAULT_TTS="sapi"
1165 DEFAULT_TTS_OPTS=$SAPI_OPTS 1141 DEFAULT_TTS_OPTS=$SAPI_OPTS
1166 DEFAULT_NOISEFLOOR="500" 1142 DEFAULT_NOISEFLOOR="500"
1167 DEFAULT_CHOICE="s" 1143 DEFAULT_CHOICE="S"
1144 fi
1145 if [ -n "`findtool gtts-cli`" ]; then
1146 GTTS="(g)tts "
1147 GTTS_OPTS=""
1148 DEFAULT_TTS="gtts"
1149 DEFAULT_TTS_OPTS=$GTTS_OPTS
1150 DEFAULT_NOISEFLOOR="500"
1151 DEFAULT_CHOICE="g"
1152 fi
1153 if [ -n "`findtool rbspeak`" ]; then
1154 RBSPEAK="(O)ther "
1155 RBSPEAK_OPTS=""
1156 DEFAULT_TTS="rbspeak"
1157 DEFAULT_TTS_OPTS=$RBSPEAK_OPTS
1158 DEFAULT_NOISEFLOOR="500"
1159 DEFAULT_CHOICE="O"
1168 fi 1160 fi
1169 1161
1170 if [ "$FESTIVAL" = "$FLITE" ] && [ "$FLITE" = "$ESPEAK" ] && [ "$ESPEAK" = "$SAPI" ] && [ "$SAPI" = "$MIMIC"] && [ "$MIMIC" = "$SWIFT" ] && [ "$SWIFT" = "$RBSPEAK" ]; then 1162 if [ "$FESTIVAL" = "$FLITE" ] && [ "$FLITE" = "$ESPEAK" ] && [ "$ESPEAK" = "$SAPI" ] && [ "$SAPI" = "$MIMIC"] && [ "$MIMIC" = "$SWIFT" ] && [ "$SWIFT" = "$RBSPEAK" ] && [ "$RBSPEAK" = "$GTTS" ] ; then
1171 echo "You need Festival, eSpeak, Mimic, Flite, or rbspeak in your path, or SAPI available to build voice files" 1163 echo "You need Festival, eSpeak, Mimic, Flite, or rbspeak in your path, or SAPI available to build voice files"
1172 exit 3 1164 exit 3
1173 fi 1165 fi
@@ -1175,7 +1167,7 @@ voiceconfig () {
1175 if [ "$ARG_TTS" ]; then 1167 if [ "$ARG_TTS" ]; then
1176 option=$ARG_TTS 1168 option=$ARG_TTS
1177 else 1169 else
1178 echo "TTS engine to use: ${FLITE}${FESTIVAL}${ESPEAK}${MIMIC}${SAPI}${SWIFT}${RBSPEAK}(${DEFAULT_CHOICE})?" 1170 echo "TTS engine to use: ${FLITE}${FESTIVAL}${ESPEAK}${MIMIC}${SAPI}${SWIFT}${GTTS}${RBSPEAK}(${DEFAULT_CHOICE})?"
1179 option=`input` 1171 option=`input`
1180 if [ -z "$option" ]; then option=${DEFAULT_CHOICE}; fi 1172 if [ -z "$option" ]; then option=${DEFAULT_CHOICE}; fi
1181 advopts="$advopts --tts=$option" 1173 advopts="$advopts --tts=$option"
@@ -1211,6 +1203,11 @@ voiceconfig () {
1211 NOISEFLOOR="500" 1203 NOISEFLOOR="500"
1212 TTS_OPTS=$SWIFT_OPTS 1204 TTS_OPTS=$SWIFT_OPTS
1213 ;; 1205 ;;
1206 [Gg)
1207 TTS_ENGINE="gtts"
1208 NOISEFLOOR="500"
1209 TTS_OPTS=$GTTS_OPTS
1210 ;;
1214 [Oo]) 1211 [Oo])
1215 TTS_ENGINE="rbspeak" 1212 TTS_ENGINE="rbspeak"
1216 NOISEFLOOR="500" 1213 NOISEFLOOR="500"
diff --git a/tools/voice.pl b/tools/voice.pl
index 8198501777..56195d9e12 100755
--- a/tools/voice.pl
+++ b/tools/voice.pl
@@ -5,7 +5,7 @@
5# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < 5# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ 6# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7# \/ \/ \/ \/ \/ 7# \/ \/ \/ \/ \/
8# $Id$ 8# $Id$
9# 9#
10# Copyright (C) 2007 Jonas Häggqvist 10# Copyright (C) 2007 Jonas Häggqvist
11# 11#
@@ -33,46 +33,73 @@ sub printusage {
33Usage: voice.pl [options] [path to dir] 33Usage: voice.pl [options] [path to dir]
34 -V 34 -V
35 Create voice file. You must also specify -t and -l. 35 Create voice file. You must also specify -t and -l.
36 36
37 -C 37 -C
38 Create .talk clips. 38 Create .talk clips.
39 39
40 -t=<target> 40 -t=<target>
41 Specify which target you want to build voicefile for. Must include 41 Specify which target you want to build voicefile for. Must include
42 any features that target supports. 42 any features that target supports.
43 43
44 -i=<target_id> 44 -i=<target_id>
45 Numeric target id. Needed for voice building. 45 Numeric target id. Needed for voice building.
46 46
47 -l=<language> 47 -l=<language>
48 Specify which language you want to build. Without .lang extension. 48 Specify which language you want to build. Without .lang extension.
49 49
50 -e=<encoder> 50 -e=<encoder>
51 Which encoder to use for voice strings 51 Which encoder to use for voice strings
52 52
53 -E=<encoder options> 53 -E=<encoder options>
54 Which encoder options to use when compressing voice strings. Enclose 54 Which encoder options to use when compressing voice strings. Enclose
55 in double quotes if the options include spaces. 55 in double quotes if the options include spaces.
56 56
57 -s=<TTS engine> 57 -s=<TTS engine>
58 Which TTS engine to use. 58 Which TTS engine to use.
59 59
60 -S=<TTS engine options> 60 -S=<TTS engine options>
61 Options to pass to the TTS engine. Enclose in double quotes if the 61 Options to pass to the TTS engine. Enclose in double quotes if the
62 options include spaces. 62 options include spaces.
63 63
64 -v 64 -v
65 Be verbose 65 Be verbose
66USAGE 66USAGE
67; 67;
68} 68}
69 69
70my %festival_lang_map = {
71 'english' => 'english',
72 'english-us' => 'english',
73 'espanol' => 'spanish',
74 #'finnish' => 'finnish'
75 #'italiano' => 'italian',
76 #'czech' => 'czech',
77 #'welsh' => 'welsh'
78};
79
80my %gtts_lang_map = {
81 'english' => 'en-gb', # Always first, it's the golden master
82 'deutsch' => 'de',
83 'english-us' => 'en-us',
84 'francais' => 'fr-fr',
85 'greek' => 'gr',
86 'italiano' => 'it',
87 'norsk' => 'no',
88 'polski' => 'pl',
89 'russian' => 'ru',
90 'slovak' => 'sk',
91 'srpski' => 'sr',
92};
93
70# Initialize TTS engine. May return an object or value which will be passed 94# Initialize TTS engine. May return an object or value which will be passed
71# to voicestring and shutdown_tts 95# to voicestring and shutdown_tts
72sub init_tts { 96sub init_tts {
73 our $verbose; 97 our $verbose;
74 my ($tts_engine, $tts_engine_opts, $language) = @_; 98 my ($tts_engine, $tts_engine_opts, $language) = @_;
75 my %ret = ("name" => $tts_engine); 99 my %ret = ("name" => $tts_engine);
100 $ret{"format"} = 'wav';
101 $ret{"ttsoptions"} = "";
102
76 # Don't use given/when here - it's not compatible with old perl versions 103 # Don't use given/when here - it's not compatible with old perl versions
77 if ($tts_engine eq 'festival') { 104 if ($tts_engine eq 'festival') {
78 print("> festival $tts_engine_opts --server\n") if $verbose; 105 print("> festival $tts_engine_opts --server\n") if $verbose;
@@ -81,8 +108,10 @@ sub init_tts {
81 $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; 108 $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
82 $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); }; 109 $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
83 $ret{"pid"} = $pid; 110 $ret{"pid"} = $pid;
84 } 111 if (defined($festival_lang_map{$language})) {
85 elsif ($tts_engine eq 'sapi') { 112 $ret{"ttsoptions"} = "-l $festival_lang_map{$language} ";
113 }
114 } elsif ($tts_engine eq 'sapi') {
86 my $toolsdir = dirname($0); 115 my $toolsdir = dirname($0);
87 my $path = `cygpath $toolsdir -a -w`; 116 my $path = `cygpath $toolsdir -a -w`;
88 chomp($path); 117 chomp($path);
@@ -102,6 +131,11 @@ sub init_tts {
102 "stdin" => *CMD_IN, 131 "stdin" => *CMD_IN,
103 "stdout" => *CMD_OUT, 132 "stdout" => *CMD_OUT,
104 "vendor" => $vendor); 133 "vendor" => $vendor);
134 } elsif ($tts_engine eq 'gtts') {
135 $ret{"format"} = 'mp3';
136 if (defined($gtts_lang_map{$language})) {
137 $ret{"ttsoptions"} = "-l $gtts_lang_map{$language} ";
138 }
105 } 139 }
106 return \%ret; 140 return \%ret;
107} 141}
@@ -143,6 +177,9 @@ sub voicestring {
143 my ($string, $output, $tts_engine_opts, $tts_object) = @_; 177 my ($string, $output, $tts_engine_opts, $tts_object) = @_;
144 my $cmd; 178 my $cmd;
145 my $name = $$tts_object{'name'}; 179 my $name = $$tts_object{'name'};
180
181 $tts_engine_opts .= $$tts_object{"ttsoptions"};
182
146 printf("Generate \"%s\" with %s in file %s\n", $string, $name, $output) if $verbose; 183 printf("Generate \"%s\" with %s in file %s\n", $string, $name, $output) if $verbose;
147 if ($name eq 'festival') { 184 if ($name eq 'festival') {
148 # festival_client lies to us, so we have to do awful soul-eating 185 # festival_client lies to us, so we have to do awful soul-eating
@@ -167,7 +204,7 @@ sub voicestring {
167 elsif ($name eq 'flite') { 204 elsif ($name eq 'flite') {
168 $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\""; 205 $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
169 print("> $cmd\n") if $verbose; 206 print("> $cmd\n") if $verbose;
170 `$cmd`; 207 system($cmd);
171 } 208 }
172 elsif ($name eq 'espeak') { 209 elsif ($name eq 'espeak') {
173 $cmd = "espeak $tts_engine_opts -w \"$output\""; 210 $cmd = "espeak $tts_engine_opts -w \"$output\"";
@@ -193,11 +230,14 @@ sub voicestring {
193 close(RBSPEAK); 230 close(RBSPEAK);
194 } 231 }
195 elsif ($name eq 'mimic') { 232 elsif ($name eq 'mimic') {
196 $cmd = "mimic $tts_engine_opts -o $output"; 233 $cmd = "mimic $tts_engine_opts -o $output -t \"$string\" ";
197 print("> $cmd\n") if $verbose; 234 print("> $cmd\n") if $verbose;
198 open (MIMIC, "| $cmd"); 235 system($cmd);
199 print MIMIC $string . "\n"; 236 }
200 close(MIMIC); 237 elsif ($name eq 'gtts') {
238 $cmd = "gtts-cli $tts_engine_opts -o $output \"$string\"";
239 print("> $cmd\n") if $verbose;
240 system($cmd);
201 } 241 }
202} 242}
203 243
@@ -326,17 +366,22 @@ sub generateclips {
326 if ($id eq "VOICE_PAUSE") { 366 if ($id eq "VOICE_PAUSE") {
327 print("Use distributed $wav\n") if $verbose; 367 print("Use distributed $wav\n") if $verbose;
328 copy(dirname($0)."/VOICE_PAUSE.wav", $wav); 368 copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
369 } else {
370 voicestring($voice, $wav, $tts_engine_opts, $tts_object);
371 if ($tts_object->{'format'} eq "wav") {
372 wavtrim($wav, 500, $tts_object);
373 # 500 seems to be a reasonable default for now
374 }
329 } 375 }
330 else { 376 if ($tts_object->{'format'} eq "wav" || $id eq "VOICE_PAUSE") {
331 voicestring($voice, $wav, $tts_engine_opts, $tts_object); 377 encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
332 wavtrim($wav, 500, $tts_object); 378 } else {
333 # 500 seems to be a reasonable default for now 379 copy($wav, $mp3);
334 } 380 }
335 381
336 encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
337 synchronize($tts_object); 382 synchronize($tts_object);
338 if (defined($ENV{'POOL'})) { 383 if (defined($ENV{'POOL'})) {
339 copy($mp3, $pool_file); 384 copy($mp3, $pool_file);
340 } 385 }
341 unlink($wav); 386 unlink($wav);
342 } 387 }