diff options
Diffstat (limited to 'tools/voice.pl')
-rwxr-xr-x | tools/voice.pl | 146 |
1 files changed, 98 insertions, 48 deletions
diff --git a/tools/voice.pl b/tools/voice.pl index 6b3f807854..337407e187 100755 --- a/tools/voice.pl +++ b/tools/voice.pl | |||
@@ -42,7 +42,8 @@ Usage: voice.pl [options] [path to dir] | |||
42 | Specify which target you want to build voicefile for. Must include | 42 | Specify which target you want to build voicefile for. Must include |
43 | any features that target supports. | 43 | any features that target supports. |
44 | 44 | ||
45 | -f=<file> Use existing voiceids file | 45 | -f=<file> |
46 | Use existing voiceids file | ||
46 | 47 | ||
47 | -i=<target_id> | 48 | -i=<target_id> |
48 | Numeric target id. Needed for voice building. | 49 | Numeric target id. Needed for voice building. |
@@ -64,7 +65,8 @@ Usage: voice.pl [options] [path to dir] | |||
64 | Options to pass to the TTS engine. Enclose in double quotes if the | 65 | Options to pass to the TTS engine. Enclose in double quotes if the |
65 | options include spaces. | 66 | options include spaces. |
66 | 67 | ||
67 | -F Force the file to be regenerated even if present | 68 | -F |
69 | Force the file to be regenerated even if present | ||
68 | 70 | ||
69 | -v | 71 | -v |
70 | Be verbose | 72 | Be verbose |
@@ -73,57 +75,78 @@ USAGE | |||
73 | } | 75 | } |
74 | 76 | ||
75 | my %festival_lang_map = ( | 77 | my %festival_lang_map = ( |
76 | 'english' => 'english', | 78 | 'english' => 'english', |
77 | 'english-us' => 'english', | 79 | 'english-us' => 'english', |
78 | 'espanol' => 'spanish', | 80 | 'espanol' => 'spanish', |
79 | #'finnish' => 'finnish' | 81 | #'finnish' => 'finnish' |
80 | #'italiano' => 'italian', | 82 | #'italiano' => 'italian', |
81 | #'czech' => 'czech', | 83 | #'czech' => 'czech', |
82 | #'welsh' => 'welsh' | 84 | #'welsh' => 'welsh' |
83 | ); | 85 | ); |
84 | 86 | ||
85 | my %gtts_lang_map = ( | 87 | my %gtts_lang_map = ( |
86 | 'english' => '-l en -t co.uk', # Always first, it's the golden master | 88 | 'english' => '-l en -t co.uk', # Always first, it's the golden master |
87 | 'czech' => '-l cs', # not supported | 89 | 'czech' => '-l cs', |
88 | 'dansk' => '-l da', | 90 | 'dansk' => '-l da', |
89 | 'deutsch' => '-l de', | 91 | 'deutsch' => '-l de', |
90 | 'english-us' => '-l en -t us', | 92 | 'english-us' => '-l en -t us', |
91 | 'espanol' => '-l es', | 93 | 'espanol' => '-l es', |
92 | 'francais' => '-l fr', | 94 | 'francais' => '-l fr', |
93 | 'greek' => '-l el', | 95 | 'greek' => '-l el', |
94 | 'magyar' => '-l hu', | 96 | 'magyar' => '-l hu', |
95 | 'italiano' => '-l it', | 97 | 'italiano' => '-l it', |
96 | 'nederlands' => '-l nl', | 98 | 'nederlands' => '-l nl', |
97 | 'norsk' => '-l no', | 99 | 'norsk' => '-l no', |
98 | 'polski' => '-l pl', | 100 | 'polski' => '-l pl', |
99 | 'russian' => '-l ru', | 101 | 'russian' => '-l ru', |
100 | 'slovak' => '-l sk', | 102 | 'slovak' => '-l sk', |
101 | 'srpski' => '-l sr', | 103 | 'srpski' => '-l sr', |
102 | 'svenska' => '-l sv', | 104 | 'svenska' => '-l sv', |
103 | 'turkce' => '-l tr', | 105 | 'turkce' => '-l tr', |
104 | ); | 106 | ); |
105 | 107 | ||
106 | my %espeak_lang_map = ( | 108 | my %espeak_lang_map = ( |
107 | 'english' => 'en-gb', # Always first, it's the golden master | 109 | 'english' => '-ven-gb -k 5', # Always first, it's the golden master |
108 | 'czech' => 'cs', | 110 | 'czech' => '-vcs', |
109 | 'dansk' => 'da', | 111 | 'dansk' => '-vda', |
110 | 'deutsch' => 'de', | 112 | 'deutsch' => '-vde', |
111 | 'english-us' => 'en-us', | 113 | 'english-us' => '-ven-us -k 5', |
112 | 'espanol' => 'es', | 114 | 'espanol' => '-ves', |
113 | 'francais' => 'fr-fr', | 115 | 'francais' => '-vfr-fr', |
114 | 'greek' => 'el', | 116 | 'greek' => '-vel', |
115 | 'nederlands' => 'nl', | 117 | 'magyar' => '-vhu', |
116 | 'magyar' => 'hu', | 118 | 'italiano' => '-vit', |
117 | 'italiano' => 'it', | 119 | 'japanese' => '-vja', |
118 | 'japanese' => 'ja', | 120 | 'nederlands' => '-vnl', |
119 | 'nederlands' => 'nl', | 121 | 'norsk' => '-vno', |
120 | 'norsk' => 'no', | 122 | 'polski' => '-vpl', |
121 | 'polski' => 'pl', | 123 | 'russian' => '-vru', |
122 | 'russian' => 'ru', | 124 | 'slovak' => '-vsk', |
123 | 'slovak' => 'sk', | 125 | 'srpski' => '-vsr', |
124 | 'srpski' => 'sr', | 126 | 'svenska' => '-vsv', |
125 | 'svenska' => 'sv', | 127 | 'turkce' => '-vtr', |
126 | 'turkce' => 'tr', | 128 | ); |
129 | |||
130 | my %piper_lang_map = ( | ||
131 | 'english' => 'en_GB-cori-high.onnx', # Always first, it's the golden master | ||
132 | 'czech' => 'cs_CZ-jirka-medium.onnx', | ||
133 | 'dansk' => 'da_DK-talesyntese-medium.onnx', | ||
134 | 'deutsch' => 'de_DE-thorsten-high.onnx', | ||
135 | 'english-us' => 'en_US-libritts-high.onnx', | ||
136 | 'espanol' => 'es_ES-sharvard-medium.onnx', | ||
137 | 'francais' => 'fr_FR-siwis-medium.onnx', | ||
138 | 'greek' => 'el_GR-rapunzelina-low.onnx', | ||
139 | # 'magyar' => '-vhu', | ||
140 | 'italiano' => 'it_IT-riccardo-x_low.onnx', | ||
141 | # 'japanese' => '-vja', | ||
142 | 'nederlands' => 'nl_NL-mls-medium.onnx', | ||
143 | 'norsk' => 'no_NO-talesyntese-medium.onnx', | ||
144 | 'polski' => 'pl_PL-gosia-medium.onnx', | ||
145 | 'russian' => 'ru_RU-irina-medium.onnx', | ||
146 | 'slovak' => 'sk_SK-lili-medium.onnx', | ||
147 | 'srpski' => 'sr_RS-serbski_institut-medium.onnx', | ||
148 | 'svenska' => 'sv_SE-nst-medium.onnx', | ||
149 | 'turkce' => 'tr_TR-fettah-medium.onnx', | ||
127 | ); | 150 | ); |
128 | 151 | ||
129 | my $trim_thresh = 500; # Trim silence if over this, in ms | 152 | my $trim_thresh = 500; # Trim silence if over this, in ms |
@@ -141,6 +164,7 @@ sub init_tts { | |||
141 | # Don't use given/when here - it's not compatible with old perl versions | 164 | # Don't use given/when here - it's not compatible with old perl versions |
142 | if ($tts_engine eq 'festival') { | 165 | if ($tts_engine eq 'festival') { |
143 | print("> festival $tts_engine_opts --server\n") if $verbose; | 166 | print("> festival $tts_engine_opts --server\n") if $verbose; |
167 | # Open command, and filehandles for STDIN, STDOUT, STDERR | ||
144 | my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1"); | 168 | my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1"); |
145 | my $dummy = *FESTIVAL_SERVER; #suppress warning | 169 | my $dummy = *FESTIVAL_SERVER; #suppress warning |
146 | $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; | 170 | $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; |
@@ -149,6 +173,21 @@ sub init_tts { | |||
149 | if (defined($festival_lang_map{$language}) && $tts_engine_opts !~ /--language/) { | 173 | if (defined($festival_lang_map{$language}) && $tts_engine_opts !~ /--language/) { |
150 | $ret{"ttsoptions"} = "--language $festival_lang_map{$language} "; | 174 | $ret{"ttsoptions"} = "--language $festival_lang_map{$language} "; |
151 | } | 175 | } |
176 | } elsif ($tts_engine eq 'piper') { | ||
177 | my $cmd = "piper $tts_engine_opts --json-input"; | ||
178 | print("> $cmd\n") if $verbose; | ||
179 | |||
180 | my $pid = open3(*CMD_IN, *CMD_OUT, *CMD_ERR, $cmd); | ||
181 | $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); }; | ||
182 | $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); }; | ||
183 | $ret{"pid"} = $pid; | ||
184 | binmode(*CMD_IN, ':encoding(utf8)'); | ||
185 | binmode(*CMD_OUT, ':encoding(utf8)'); | ||
186 | binmode(*CMD_ERR, ':encoding(utf8)'); | ||
187 | if (defined($piper_lang_map{$language}) && $tts_engine_opts !~ /--model/) { | ||
188 | die("Need PIPER_MODEL_DIR\n") if (!defined($ENV{'PIPER_MODEL_DIR'})); | ||
189 | $ret{"ttsoptions"} = "--model $ENV{PIPER_MODEL_DIR}/$piper_lang_map{$language} "; | ||
190 | } | ||
152 | } elsif ($tts_engine eq 'sapi') { | 191 | } elsif ($tts_engine eq 'sapi') { |
153 | my $toolsdir = dirname($0); | 192 | my $toolsdir = dirname($0); |
154 | my $path = `cygpath $toolsdir -a -w`; | 193 | my $path = `cygpath $toolsdir -a -w`; |
@@ -176,7 +215,7 @@ sub init_tts { | |||
176 | } | 215 | } |
177 | } elsif ($tts_engine eq 'espeak' || $tts_engine eq 'espeak-ng') { | 216 | } elsif ($tts_engine eq 'espeak' || $tts_engine eq 'espeak-ng') { |
178 | if (defined($espeak_lang_map{$language}) && $tts_engine_opts !~ /-v/) { | 217 | if (defined($espeak_lang_map{$language}) && $tts_engine_opts !~ /-v/) { |
179 | $ret{"ttsoptions"} = "-v$espeak_lang_map{$language} "; | 218 | $ret{"ttsoptions"} = " $espeak_lang_map{$language} "; |
180 | } | 219 | } |
181 | } | 220 | } |
182 | 221 | ||
@@ -190,6 +229,10 @@ sub shutdown_tts { | |||
190 | # Send SIGTERM to festival server | 229 | # Send SIGTERM to festival server |
191 | kill TERM => $$tts_object{"pid"}; | 230 | kill TERM => $$tts_object{"pid"}; |
192 | } | 231 | } |
232 | elsif ($$tts_object{'name'} eq 'piper') { | ||
233 | # Send SIGTERM to piper | ||
234 | kill TERM => $$tts_object{"pid"}; | ||
235 | } | ||
193 | elsif ($$tts_object{'name'} eq 'sapi') { | 236 | elsif ($$tts_object{'name'} eq 'sapi') { |
194 | print({$$tts_object{"stdin"}} "QUIT\r\n"); | 237 | print({$$tts_object{"stdin"}} "QUIT\r\n"); |
195 | close($$tts_object{"stdin"}); | 238 | close($$tts_object{"stdin"}); |
@@ -244,6 +287,13 @@ sub voicestring { | |||
244 | close(CMD_OUT); | 287 | close(CMD_OUT); |
245 | close(CMD_ERR); | 288 | close(CMD_ERR); |
246 | } | 289 | } |
290 | elsif ($name eq 'piper') { | ||
291 | $cmd = "{ \"text\": \"$string\", \"output_file\": \"$output\" }"; | ||
292 | print(">> $cmd\n") if $verbose; | ||
293 | print(CMD_IN "$cmd\n"); | ||
294 | my $res = <CMD_OUT>; | ||
295 | $res = <CMD_ERR>; | ||
296 | } | ||
247 | elsif ($name eq 'flite') { | 297 | elsif ($name eq 'flite') { |
248 | $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\""; | 298 | $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\""; |
249 | print("> $cmd\n") if $verbose; | 299 | print("> $cmd\n") if $verbose; |
@@ -469,7 +519,6 @@ sub generateclips { | |||
469 | print("\n"); | 519 | print("\n"); |
470 | 520 | ||
471 | unlink($updfile) if (-f $updfile); | 521 | unlink($updfile) if (-f $updfile); |
472 | shutdown_tts($tts_object); | ||
473 | } | 522 | } |
474 | 523 | ||
475 | # Assemble the voicefile | 524 | # Assemble the voicefile |
@@ -608,6 +657,7 @@ if ($V == 1) { | |||
608 | defined($t) ? $t : "unknown", | 657 | defined($t) ? $t : "unknown", |
609 | $l, $e, $E, $s, $S); | 658 | $l, $e, $E, $s, $S); |
610 | generateclips($l, $t, $e, $E, $tts_object, $S, $f); | 659 | generateclips($l, $t, $e, $E, $tts_object, $S, $f); |
660 | shutdown_tts($tts_object); | ||
611 | createvoice($l, $i, $f); | 661 | createvoice($l, $i, $f); |
612 | deleteencs(); | 662 | deleteencs(); |
613 | } elsif ($C) { | 663 | } elsif ($C) { |