diff options
Diffstat (limited to 'tools/voice.pl')
-rwxr-xr-x | tools/voice.pl | 138 |
1 files changed, 38 insertions, 100 deletions
diff --git a/tools/voice.pl b/tools/voice.pl index 32db75c5e1..ee68c30eb4 100755 --- a/tools/voice.pl +++ b/tools/voice.pl | |||
@@ -128,106 +128,12 @@ sub correct_string { | |||
128 | our $verbose; | 128 | our $verbose; |
129 | my ($string, $language, $tts_object) = @_; | 129 | my ($string, $language, $tts_object) = @_; |
130 | my $orig = $string; | 130 | my $orig = $string; |
131 | switch($language) { | 131 | my $corrections = $tts_object->{"corrections"}; |
132 | # General for all engines and languages | 132 | |
133 | $string =~ s/USB/U S B/g; | 133 | foreach (@$corrections) { |
134 | $string =~ s/ID3/I D 3/g; | 134 | my $r = "s" . $_->{separator} . $_->{search} . $_->{separator} |
135 | 135 | . $_->{replace} . $_->{separator} . $_->{modifier}; | |
136 | case "english" { | 136 | eval ('$string =~' . "$r;"); |
137 | switch($$tts_object{"name"}) { | ||
138 | case ["sapi","festival"] { | ||
139 | $string =~ s/plugin(s?)/plug-in$1/ig; next | ||
140 | } | ||
141 | case "festival" { | ||
142 | $string =~ s/\ba\b/ay/ig; | ||
143 | $string =~ s/$/./; | ||
144 | } | ||
145 | } | ||
146 | } | ||
147 | case "deutsch" { | ||
148 | # for all german engines (e.g. for english words) | ||
149 | $string =~ s/alkaline/alkalein/ig; | ||
150 | $string =~ s/byte(s?)/beit$1/ig; | ||
151 | $string =~ s/clip(s?)/klipp$1/ig; | ||
152 | $string =~ s/\bcover/kawwer/ig; | ||
153 | $string =~ s/cuesheet/kjuschiet/ig; | ||
154 | $string =~ s/dither/didder/ig; | ||
155 | $string =~ s/equalizer/iquileiser/ig; | ||
156 | $string =~ s/\bflash\b/fläsh/ig; | ||
157 | $string =~ s/\bfirmware(s?)\b/firmwer$1/ig; | ||
158 | $string =~ s/\bI D 3 tag\b/I D 3 täg/ig; # can't just use "tag" here | ||
159 | $string =~ s/\bloudness\b/laudness/ig; | ||
160 | $string =~ s/\bunicode\b/unikod/ig; | ||
161 | switch($$tts_object{"name"}) { | ||
162 | case "sapi" { # just for SAPI | ||
163 | switch($$tts_object{"vendor"}) { | ||
164 | case "AT&T Labs" { | ||
165 | $string =~ s/alphabet/alfabet/ig; | ||
166 | $string =~ s/ampere/amper/ig; | ||
167 | $string =~ s/\bdezibel\b/de-zibell/ig; | ||
168 | $string =~ s/diddering/didde-ring/ig; | ||
169 | $string =~ s/energie\b/ener-gie/ig; | ||
170 | $string =~ s/\Blauf\b/-lauf/ig; | ||
171 | $string =~ s/\bnumerisch\b/numehrisch/ig; | ||
172 | } | ||
173 | } | ||
174 | } | ||
175 | } | ||
176 | } | ||
177 | case "svenska" { | ||
178 | # for all swedish engines (e.g. for english words) | ||
179 | $string =~ s/kilobyte/kilobajt/ig; | ||
180 | $string =~ s/megabyte/megabajt/ig; | ||
181 | $string =~ s/gigabyte/gigabajt/ig; | ||
182 | $string =~ s/\bloudness\b/laudness/ig; | ||
183 | |||
184 | switch($$tts_object{"name"}) { | ||
185 | case "espeak" { # just for eSpeak | ||
186 | $string =~ s/ampere/ampär/ig; | ||
187 | $string =~ s/bokmärken/bok-märken/ig; | ||
188 | $string =~ s/generella/schenerella/ig; | ||
189 | $string =~ s/dithering/diddering/ig; | ||
190 | $string =~ s/\bunicode\b/jynikod/ig; | ||
191 | $string =~ s/uttoning/utoning/ig; | ||
192 | $string =~ s/procent/pro-cent/ig; | ||
193 | $string =~ s/spellistor/spelistor/ig; | ||
194 | $string =~ s/cuesheet/qjyschiit/ig; | ||
195 | } | ||
196 | } | ||
197 | } | ||
198 | case "italiano" { | ||
199 | # for all italian engines (e.g. for english words) | ||
200 | $string =~ s/Replaygain/Ripleyghein/ig; | ||
201 | $string =~ s/Crossfade/Crossfeid/ig; | ||
202 | $string =~ s/beep/Bip/ig; | ||
203 | $string =~ s/cuesheet/chiushit/ig; | ||
204 | $string =~ s/fade/feid/ig; | ||
205 | $string =~ s/Crossfeed/crossfid/ig; | ||
206 | $string =~ s/Cache/chash/ig; | ||
207 | $string =~ s/\bfirmware(s?)\b/firmuer$1/ig; | ||
208 | $string =~ s/\bFile(s?)\b/fail$1/ig; | ||
209 | $string =~ s/\bloudness\b/laudness/ig; | ||
210 | $string =~ s/\bunicode\b/unikod/ig; | ||
211 | $string =~ s/Playlist/pleylist/ig; | ||
212 | $string =~ s/WavPack/wave pak/ig; | ||
213 | $string =~ s/BITRATE/bit reit/ig; | ||
214 | $string =~ s/Codepage/cod page/ig; | ||
215 | $string =~ s/PCM Wave/pcm Ue'iv/ig; | ||
216 | switch($$tts_object{"name"}) { | ||
217 | case "sapi" { # just for SAPI | ||
218 | switch($$tts_object{"vendor"}) { | ||
219 | case "Loquendo" { | ||
220 | $string =~ s/Inizializza/inizializa/ig; | ||
221 | } | ||
222 | case "ScanSoft, Inc" { | ||
223 | $string =~ s/V/v/ig; | ||
224 | $string =~ s/X/x/ig; | ||
225 | $string =~ s/stop/stohp/ig; | ||
226 | } | ||
227 | } | ||
228 | } | ||
229 | } | ||
230 | } | ||
231 | } | 137 | } |
232 | if ($orig ne $string) { | 138 | if ($orig ne $string) { |
233 | printf("%s -> %s\n", $orig, $string) if $verbose; | 139 | printf("%s -> %s\n", $orig, $string) if $verbose; |
@@ -331,6 +237,7 @@ sub generateclips { | |||
331 | my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_; | 237 | my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_; |
332 | my $english = dirname($0) . '/../apps/lang/english.lang'; | 238 | my $english = dirname($0) . '/../apps/lang/english.lang'; |
333 | my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang'; | 239 | my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang'; |
240 | my $correctionsfile = dirname($0) . '/voice-corrections.txt'; | ||
334 | my $id = ''; | 241 | my $id = ''; |
335 | my $voice = ''; | 242 | my $voice = ''; |
336 | my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null"; | 243 | my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null"; |
@@ -340,6 +247,37 @@ sub generateclips { | |||
340 | local $| = 1; # make progress indicator work reliably | 247 | local $| = 1; # make progress indicator work reliably |
341 | 248 | ||
342 | my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language); | 249 | my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language); |
250 | # add string corrections to tts_object. | ||
251 | my @corrects = (); | ||
252 | open(VOICEREGEXP, "<$correctionsfile") or die "Can't open corrections file!\n"; | ||
253 | while(<VOICEREGEXP>) { | ||
254 | # get first character of line | ||
255 | my $line = $_; | ||
256 | my $separator = substr($_, 0, 1); | ||
257 | if($separator =~ m/\s+/) { | ||
258 | next; | ||
259 | } | ||
260 | chomp($line); | ||
261 | $line =~ s/^.//g; # remove separator at beginning | ||
262 | my ($lang, $engine, $vendor, $search, $replace, $modifier) = split(/$separator/, $line); | ||
263 | |||
264 | # does language match? | ||
265 | if($language !~ m/$lang/) { | ||
266 | next; | ||
267 | } | ||
268 | if($$tts_object{"name"} !~ m/$engine/) { | ||
269 | next; | ||
270 | } | ||
271 | my $v = $$tts_object{"vendor"} || ""; # vendor might be empty in $tts_object | ||
272 | if($v !~ m/$vendor/) { | ||
273 | next; | ||
274 | } | ||
275 | push @corrects, {separator => $separator, search => $search, replace => $replace, modifier => $modifier}; | ||
276 | |||
277 | } | ||
278 | close(VOICEREGEXP); | ||
279 | $tts_object->{corrections} = [@corrects]; | ||
280 | |||
343 | print("Generating voice clips"); | 281 | print("Generating voice clips"); |
344 | print("\n") if $verbose; | 282 | print("\n") if $verbose; |
345 | for (`$cmd`) { | 283 | for (`$cmd`) { |