diff options
author | Solomon Peachy <pizza@shaftnet.org> | 2020-07-08 19:05:09 -0400 |
---|---|---|
committer | Solomon Peachy <pizza@shaftnet.org> | 2020-07-08 23:16:57 +0000 |
commit | 2c3399537cfb9d481a1e31a4b625d2abb97664e2 (patch) | |
tree | 2a612cfd81ee1766e9b2e61801f231911d577137 | |
parent | 5e98eba8abc16d494802ed41f37ef7cec54dc8f4 (diff) | |
download | rockbox-2c3399537cfb9d481a1e31a4b625d2abb97664e2.tar.gz rockbox-2c3399537cfb9d481a1e31a4b625d2abb97664e2.zip |
voice: Add support for Google Translate's speech synthesizer
Uses the 'gtts-cli' command line client. Supports a wide variety of
languages, including all "Complete" and "Good" Rockbox translations.
Additional changes:
* voice synth script can accept pre-encoded mp3 files
* Move language->synth options mapping into the voice script
* Additional cleanups
Change-Id: I9523e2bca87cbcee2d8c4111f9892e8e458c7419
-rwxr-xr-x | tools/configure | 53 | ||||
-rwxr-xr-x | tools/voice.pl | 91 |
2 files changed, 93 insertions, 51 deletions
diff --git a/tools/configure b/tools/configure index 585eadd357..aa8b190250 100755 --- a/tools/configure +++ b/tools/configure | |||
@@ -1111,23 +1111,7 @@ voiceconfig () { | |||
1111 | fi | 1111 | fi |
1112 | if [ -n "`findtool festival`" ]; then | 1112 | if [ -n "`findtool festival`" ]; then |
1113 | FESTIVAL="(F)estival " | 1113 | FESTIVAL="(F)estival " |
1114 | case "$thislang" in | 1114 | FESTIVAL_OPTS="" |
1115 | "italiano") | ||
1116 | FESTIVAL_OPTS="--language italian" | ||
1117 | ;; | ||
1118 | "espanol") | ||
1119 | FESTIVAL_OPTS="--language spanish" | ||
1120 | ;; | ||
1121 | "finnish") | ||
1122 | FESTIVAL_OPTS="--language finnish" | ||
1123 | ;; | ||
1124 | "czech") | ||
1125 | FESTIVAL_OPTS="--language czech" | ||
1126 | ;; | ||
1127 | *) | ||
1128 | FESTIVAL_OPTS="" | ||
1129 | ;; | ||
1130 | esac | ||
1131 | DEFAULT_TTS="festival" | 1115 | DEFAULT_TTS="festival" |
1132 | DEFAULT_TTS_OPTS=$FESTIVAL_OPTS | 1116 | DEFAULT_TTS_OPTS=$FESTIVAL_OPTS |
1133 | DEFAULT_NOISEFLOOR="500" | 1117 | DEFAULT_NOISEFLOOR="500" |
@@ -1149,14 +1133,6 @@ voiceconfig () { | |||
1149 | DEFAULT_NOISEFLOOR="500" | 1133 | DEFAULT_NOISEFLOOR="500" |
1150 | DEFAULT_CHOICE="w" | 1134 | DEFAULT_CHOICE="w" |
1151 | fi | 1135 | fi |
1152 | if [ -n "`findtool rbspeak`" ]; then | ||
1153 | RBSPEAK="(O)ther " | ||
1154 | RBSPEAK_OPTS="" | ||
1155 | DEFAULT_TTS="rbspeak" | ||
1156 | DEFAULT_TTS_OPTS=$RBSPEAK_OPTS | ||
1157 | DEFAULT_NOISEFLOOR="500" | ||
1158 | DEFAULT_CHOICE="O" | ||
1159 | fi | ||
1160 | # Allow SAPI if Windows is in use | 1136 | # Allow SAPI if Windows is in use |
1161 | if [ -n "`findtool winver`" ]; then | 1137 | if [ -n "`findtool winver`" ]; then |
1162 | SAPI="(S)API " | 1138 | SAPI="(S)API " |
@@ -1164,10 +1140,26 @@ voiceconfig () { | |||
1164 | DEFAULT_TTS="sapi" | 1140 | DEFAULT_TTS="sapi" |
1165 | DEFAULT_TTS_OPTS=$SAPI_OPTS | 1141 | DEFAULT_TTS_OPTS=$SAPI_OPTS |
1166 | DEFAULT_NOISEFLOOR="500" | 1142 | DEFAULT_NOISEFLOOR="500" |
1167 | DEFAULT_CHOICE="s" | 1143 | DEFAULT_CHOICE="S" |
1144 | fi | ||
1145 | if [ -n "`findtool gtts-cli`" ]; then | ||
1146 | GTTS="(g)tts " | ||
1147 | GTTS_OPTS="" | ||
1148 | DEFAULT_TTS="gtts" | ||
1149 | DEFAULT_TTS_OPTS=$GTTS_OPTS | ||
1150 | DEFAULT_NOISEFLOOR="500" | ||
1151 | DEFAULT_CHOICE="g" | ||
1152 | fi | ||
1153 | if [ -n "`findtool rbspeak`" ]; then | ||
1154 | RBSPEAK="(O)ther " | ||
1155 | RBSPEAK_OPTS="" | ||
1156 | DEFAULT_TTS="rbspeak" | ||
1157 | DEFAULT_TTS_OPTS=$RBSPEAK_OPTS | ||
1158 | DEFAULT_NOISEFLOOR="500" | ||
1159 | DEFAULT_CHOICE="O" | ||
1168 | fi | 1160 | fi |
1169 | 1161 | ||
1170 | if [ "$FESTIVAL" = "$FLITE" ] && [ "$FLITE" = "$ESPEAK" ] && [ "$ESPEAK" = "$SAPI" ] && [ "$SAPI" = "$MIMIC"] && [ "$MIMIC" = "$SWIFT" ] && [ "$SWIFT" = "$RBSPEAK" ]; then | 1162 | if [ "$FESTIVAL" = "$FLITE" ] && [ "$FLITE" = "$ESPEAK" ] && [ "$ESPEAK" = "$SAPI" ] && [ "$SAPI" = "$MIMIC"] && [ "$MIMIC" = "$SWIFT" ] && [ "$SWIFT" = "$RBSPEAK" ] && [ "$RBSPEAK" = "$GTTS" ] ; then |
1171 | echo "You need Festival, eSpeak, Mimic, Flite, or rbspeak in your path, or SAPI available to build voice files" | 1163 | echo "You need Festival, eSpeak, Mimic, Flite, or rbspeak in your path, or SAPI available to build voice files" |
1172 | exit 3 | 1164 | exit 3 |
1173 | fi | 1165 | fi |
@@ -1175,7 +1167,7 @@ voiceconfig () { | |||
1175 | if [ "$ARG_TTS" ]; then | 1167 | if [ "$ARG_TTS" ]; then |
1176 | option=$ARG_TTS | 1168 | option=$ARG_TTS |
1177 | else | 1169 | else |
1178 | echo "TTS engine to use: ${FLITE}${FESTIVAL}${ESPEAK}${MIMIC}${SAPI}${SWIFT}${RBSPEAK}(${DEFAULT_CHOICE})?" | 1170 | echo "TTS engine to use: ${FLITE}${FESTIVAL}${ESPEAK}${MIMIC}${SAPI}${SWIFT}${GTTS}${RBSPEAK}(${DEFAULT_CHOICE})?" |
1179 | option=`input` | 1171 | option=`input` |
1180 | if [ -z "$option" ]; then option=${DEFAULT_CHOICE}; fi | 1172 | if [ -z "$option" ]; then option=${DEFAULT_CHOICE}; fi |
1181 | advopts="$advopts --tts=$option" | 1173 | advopts="$advopts --tts=$option" |
@@ -1211,6 +1203,11 @@ voiceconfig () { | |||
1211 | NOISEFLOOR="500" | 1203 | NOISEFLOOR="500" |
1212 | TTS_OPTS=$SWIFT_OPTS | 1204 | TTS_OPTS=$SWIFT_OPTS |
1213 | ;; | 1205 | ;; |
1206 | [Gg) | ||
1207 | TTS_ENGINE="gtts" | ||
1208 | NOISEFLOOR="500" | ||
1209 | TTS_OPTS=$GTTS_OPTS | ||
1210 | ;; | ||
1214 | [Oo]) | 1211 | [Oo]) |
1215 | TTS_ENGINE="rbspeak" | 1212 | TTS_ENGINE="rbspeak" |
1216 | NOISEFLOOR="500" | 1213 | NOISEFLOOR="500" |
diff --git a/tools/voice.pl b/tools/voice.pl index 8198501777..56195d9e12 100755 --- a/tools/voice.pl +++ b/tools/voice.pl | |||
@@ -5,7 +5,7 @@ | |||
5 | # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | 5 | # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < |
6 | # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | 6 | # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ |
7 | # \/ \/ \/ \/ \/ | 7 | # \/ \/ \/ \/ \/ |
8 | # $Id$ | 8 | # $Id$ |
9 | # | 9 | # |
10 | # Copyright (C) 2007 Jonas Häggqvist | 10 | # Copyright (C) 2007 Jonas Häggqvist |
11 | # | 11 | # |
@@ -33,46 +33,73 @@ sub printusage { | |||
33 | Usage: voice.pl [options] [path to dir] | 33 | Usage: voice.pl [options] [path to dir] |
34 | -V | 34 | -V |
35 | Create voice file. You must also specify -t and -l. | 35 | Create voice file. You must also specify -t and -l. |
36 | 36 | ||
37 | -C | 37 | -C |
38 | Create .talk clips. | 38 | Create .talk clips. |
39 | 39 | ||
40 | -t=<target> | 40 | -t=<target> |
41 | Specify which target you want to build voicefile for. Must include | 41 | Specify which target you want to build voicefile for. Must include |
42 | any features that target supports. | 42 | any features that target supports. |
43 | 43 | ||
44 | -i=<target_id> | 44 | -i=<target_id> |
45 | Numeric target id. Needed for voice building. | 45 | Numeric target id. Needed for voice building. |
46 | 46 | ||
47 | -l=<language> | 47 | -l=<language> |
48 | Specify which language you want to build. Without .lang extension. | 48 | Specify which language you want to build. Without .lang extension. |
49 | 49 | ||
50 | -e=<encoder> | 50 | -e=<encoder> |
51 | Which encoder to use for voice strings | 51 | Which encoder to use for voice strings |
52 | 52 | ||
53 | -E=<encoder options> | 53 | -E=<encoder options> |
54 | Which encoder options to use when compressing voice strings. Enclose | 54 | Which encoder options to use when compressing voice strings. Enclose |
55 | in double quotes if the options include spaces. | 55 | in double quotes if the options include spaces. |
56 | 56 | ||
57 | -s=<TTS engine> | 57 | -s=<TTS engine> |
58 | Which TTS engine to use. | 58 | Which TTS engine to use. |
59 | 59 | ||
60 | -S=<TTS engine options> | 60 | -S=<TTS engine options> |
61 | Options to pass to the TTS engine. Enclose in double quotes if the | 61 | Options to pass to the TTS engine. Enclose in double quotes if the |
62 | options include spaces. | 62 | options include spaces. |
63 | 63 | ||
64 | -v | 64 | -v |
65 | Be verbose | 65 | Be verbose |
66 | USAGE | 66 | USAGE |
67 | ; | 67 | ; |
68 | } | 68 | } |
69 | 69 | ||
70 | my %festival_lang_map = { | ||
71 | 'english' => 'english', | ||
72 | 'english-us' => 'english', | ||
73 | 'espanol' => 'spanish', | ||
74 | #'finnish' => 'finnish' | ||
75 | #'italiano' => 'italian', | ||
76 | #'czech' => 'czech', | ||
77 | #'welsh' => 'welsh' | ||
78 | }; | ||
79 | |||
80 | my %gtts_lang_map = { | ||
81 | 'english' => 'en-gb', # Always first, it's the golden master | ||
82 | 'deutsch' => 'de', | ||
83 | 'english-us' => 'en-us', | ||
84 | 'francais' => 'fr-fr', | ||
85 | 'greek' => 'gr', | ||
86 | 'italiano' => 'it', | ||
87 | 'norsk' => 'no', | ||
88 | 'polski' => 'pl', | ||
89 | 'russian' => 'ru', | ||
90 | 'slovak' => 'sk', | ||
91 | 'srpski' => 'sr', | ||
92 | }; | ||
93 | |||
70 | # Initialize TTS engine. May return an object or value which will be passed | 94 | # Initialize TTS engine. May return an object or value which will be passed |
71 | # to voicestring and shutdown_tts | 95 | # to voicestring and shutdown_tts |
72 | sub init_tts { | 96 | sub init_tts { |
73 | our $verbose; | 97 | our $verbose; |
74 | my ($tts_engine, $tts_engine_opts, $language) = @_; | 98 | my ($tts_engine, $tts_engine_opts, $language) = @_; |
75 | my %ret = ("name" => $tts_engine); | 99 | my %ret = ("name" => $tts_engine); |
100 | $ret{"format"} = 'wav'; | ||
101 | $ret{"ttsoptions"} = ""; | ||
102 | |||
76 | # Don't use given/when here - it's not compatible with old perl versions | 103 | # Don't use given/when here - it's not compatible with old perl versions |
77 | if ($tts_engine eq 'festival') { | 104 | if ($tts_engine eq 'festival') { |
78 | print("> festival $tts_engine_opts --server\n") if $verbose; | 105 | print("> festival $tts_engine_opts --server\n") if $verbose; |
@@ -81,8 +108,10 @@ sub init_tts { | |||
81 | $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; | 108 | $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; |
82 | $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); }; | 109 | $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); }; |
83 | $ret{"pid"} = $pid; | 110 | $ret{"pid"} = $pid; |
84 | } | 111 | if (defined($festival_lang_map{$language})) { |
85 | elsif ($tts_engine eq 'sapi') { | 112 | $ret{"ttsoptions"} = "-l $festival_lang_map{$language} "; |
113 | } | ||
114 | } elsif ($tts_engine eq 'sapi') { | ||
86 | my $toolsdir = dirname($0); | 115 | my $toolsdir = dirname($0); |
87 | my $path = `cygpath $toolsdir -a -w`; | 116 | my $path = `cygpath $toolsdir -a -w`; |
88 | chomp($path); | 117 | chomp($path); |
@@ -102,6 +131,11 @@ sub init_tts { | |||
102 | "stdin" => *CMD_IN, | 131 | "stdin" => *CMD_IN, |
103 | "stdout" => *CMD_OUT, | 132 | "stdout" => *CMD_OUT, |
104 | "vendor" => $vendor); | 133 | "vendor" => $vendor); |
134 | } elsif ($tts_engine eq 'gtts') { | ||
135 | $ret{"format"} = 'mp3'; | ||
136 | if (defined($gtts_lang_map{$language})) { | ||
137 | $ret{"ttsoptions"} = "-l $gtts_lang_map{$language} "; | ||
138 | } | ||
105 | } | 139 | } |
106 | return \%ret; | 140 | return \%ret; |
107 | } | 141 | } |
@@ -143,6 +177,9 @@ sub voicestring { | |||
143 | my ($string, $output, $tts_engine_opts, $tts_object) = @_; | 177 | my ($string, $output, $tts_engine_opts, $tts_object) = @_; |
144 | my $cmd; | 178 | my $cmd; |
145 | my $name = $$tts_object{'name'}; | 179 | my $name = $$tts_object{'name'}; |
180 | |||
181 | $tts_engine_opts .= $$tts_object{"ttsoptions"}; | ||
182 | |||
146 | printf("Generate \"%s\" with %s in file %s\n", $string, $name, $output) if $verbose; | 183 | printf("Generate \"%s\" with %s in file %s\n", $string, $name, $output) if $verbose; |
147 | if ($name eq 'festival') { | 184 | if ($name eq 'festival') { |
148 | # festival_client lies to us, so we have to do awful soul-eating | 185 | # festival_client lies to us, so we have to do awful soul-eating |
@@ -167,7 +204,7 @@ sub voicestring { | |||
167 | elsif ($name eq 'flite') { | 204 | elsif ($name eq 'flite') { |
168 | $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\""; | 205 | $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\""; |
169 | print("> $cmd\n") if $verbose; | 206 | print("> $cmd\n") if $verbose; |
170 | `$cmd`; | 207 | system($cmd); |
171 | } | 208 | } |
172 | elsif ($name eq 'espeak') { | 209 | elsif ($name eq 'espeak') { |
173 | $cmd = "espeak $tts_engine_opts -w \"$output\""; | 210 | $cmd = "espeak $tts_engine_opts -w \"$output\""; |
@@ -193,11 +230,14 @@ sub voicestring { | |||
193 | close(RBSPEAK); | 230 | close(RBSPEAK); |
194 | } | 231 | } |
195 | elsif ($name eq 'mimic') { | 232 | elsif ($name eq 'mimic') { |
196 | $cmd = "mimic $tts_engine_opts -o $output"; | 233 | $cmd = "mimic $tts_engine_opts -o $output -t \"$string\" "; |
197 | print("> $cmd\n") if $verbose; | 234 | print("> $cmd\n") if $verbose; |
198 | open (MIMIC, "| $cmd"); | 235 | system($cmd); |
199 | print MIMIC $string . "\n"; | 236 | } |
200 | close(MIMIC); | 237 | elsif ($name eq 'gtts') { |
238 | $cmd = "gtts-cli $tts_engine_opts -o $output \"$string\""; | ||
239 | print("> $cmd\n") if $verbose; | ||
240 | system($cmd); | ||
201 | } | 241 | } |
202 | } | 242 | } |
203 | 243 | ||
@@ -326,17 +366,22 @@ sub generateclips { | |||
326 | if ($id eq "VOICE_PAUSE") { | 366 | if ($id eq "VOICE_PAUSE") { |
327 | print("Use distributed $wav\n") if $verbose; | 367 | print("Use distributed $wav\n") if $verbose; |
328 | copy(dirname($0)."/VOICE_PAUSE.wav", $wav); | 368 | copy(dirname($0)."/VOICE_PAUSE.wav", $wav); |
369 | } else { | ||
370 | voicestring($voice, $wav, $tts_engine_opts, $tts_object); | ||
371 | if ($tts_object->{'format'} eq "wav") { | ||
372 | wavtrim($wav, 500, $tts_object); | ||
373 | # 500 seems to be a reasonable default for now | ||
374 | } | ||
329 | } | 375 | } |
330 | else { | 376 | if ($tts_object->{'format'} eq "wav" || $id eq "VOICE_PAUSE") { |
331 | voicestring($voice, $wav, $tts_engine_opts, $tts_object); | 377 | encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object); |
332 | wavtrim($wav, 500, $tts_object); | 378 | } else { |
333 | # 500 seems to be a reasonable default for now | 379 | copy($wav, $mp3); |
334 | } | 380 | } |
335 | 381 | ||
336 | encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object); | ||
337 | synchronize($tts_object); | 382 | synchronize($tts_object); |
338 | if (defined($ENV{'POOL'})) { | 383 | if (defined($ENV{'POOL'})) { |
339 | copy($mp3, $pool_file); | 384 | copy($mp3, $pool_file); |
340 | } | 385 | } |
341 | unlink($wav); | 386 | unlink($wav); |
342 | } | 387 | } |