diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/voice-corrections.txt | 92 | ||||
-rwxr-xr-x | tools/voice.pl | 138 |
2 files changed, 130 insertions, 100 deletions
diff --git a/tools/voice-corrections.txt b/tools/voice-corrections.txt new file mode 100644 index 0000000000..26d2c031cf --- /dev/null +++ b/tools/voice-corrections.txt | |||
@@ -0,0 +1,92 @@ | |||
1 | __________ __ ___. | ||
2 | Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
3 | Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
4 | Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
5 | Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
6 | \/ \/ \/ \/ \/ | ||
7 | $Id$ | ||
8 | |||
9 | |||
10 | Voice string corrections for voice.pl to support TTS engines. | ||
11 | The list items are separated by the separator that is defined by the first | ||
12 | character on the line. If the first character is a whitespace the line will | ||
13 | get treated as comment. | ||
14 | |||
15 | Format: | ||
16 | /language/engine/vendor/string/replacement/ | ||
17 | |||
18 | Where / is the separator, and all strings are Perl regexes. | ||
19 | Empty lines and lines starting with a whitespace are ignored, for all other | ||
20 | lines the first character will become the separator. | ||
21 | |||
22 | General for all engines and languages | ||
23 | |||
24 | /.*/.*/.*/USB/U S B/g | ||
25 | /.*/.*/.*/ID3/I D 3/g | ||
26 | English | ||
27 | /english/(sapi|festival)/.*/plugin(s?)/plug-in$1/ig | ||
28 | /english/festival/.*//\ba\b/ay/ig | ||
29 | /english/festival/.*//$/./ | ||
30 | |||
31 | German (deutsch) | ||
32 | |||
33 | /deutsch/.*/.*/alkaline/alkalein/ig | ||
34 | /deutsch/.*/.*/byte(s?)/beit$1/ig | ||
35 | /deutsch/.*/.*/clip(s?)/klipp$1/ig | ||
36 | /deutsch/.*/.*/\bcover/kawwer/ig | ||
37 | /deutsch/.*/.*/cuesheet/kjuschiet/ig | ||
38 | /deutsch/.*/.*/dither/didder/ig | ||
39 | /deutsch/.*/.*/equalizer/iquileiser/ig | ||
40 | /deutsch/.*/.*/\bflash\b/fläsh/ig | ||
41 | /deutsch/.*/.*/\bfirmware(s?)\b/firmwer$1/ig | ||
42 | /deutsch/.*/.*/\bI D 3 tag\b/I D 3 täg/ig | ||
43 | /deutsch/.*/.*/\bloudness\b/laudness/ig | ||
44 | /deutsch/.*/.*/\bunicode\b/unikod/ig | ||
45 | /deutsch/sapi/AT&T Labs/alphabet/alfabet/ig; | ||
46 | /deutsch/sapi/AT&T Labs/ampere/amper/ig; | ||
47 | /deutsch/sapi/AT&T Labs/\bdezibel\b/de-zibell/ig; | ||
48 | /deutsch/sapi/AT&T Labs/diddering/didde-ring/ig; | ||
49 | /deutsch/sapi/AT&T Labs/energie\b/ener-gie/ig; | ||
50 | /deutsch/sapi/AT&T Labs/\Blauf\b/-lauf/ig; | ||
51 | /deutsch/sapi/AT&T Labs/\bnumerisch\b/numehrisch/ig; | ||
52 | |||
53 | Swedish (svenska) | ||
54 | for all swedish engines (e.g. for english words) | ||
55 | |||
56 | /svenska/.*/.*/kilobyte/kilobajt/ig | ||
57 | /svenska/.*/.*/megabyte/megabajt/ig | ||
58 | /svenska/.*/.*/gigabyte/gigabajt/ig | ||
59 | /svenska/.*/.*/\bloudness\b/laudness/ig | ||
60 | /svenska/espeak/.*/ampere/ampär/ig | ||
61 | /svenska/espeak/.*/bokmärken/bok-märken/ig | ||
62 | /svenska/espeak/.*/generella/schenerella/ig | ||
63 | /svenska/espeak/.*/dithering/diddering/ig | ||
64 | /svenska/espeak/.*/\bunicode\b/jynikod/ig | ||
65 | /svenska/espeak/.*/uttoning/utoning/ig | ||
66 | /svenska/espeak/.*/procent/pro-cent/ig | ||
67 | /svenska/espeak/.*/spellistor/spelistor/ig | ||
68 | /svenska/espeak/.*/cuesheet/qjyschiit/ig | ||
69 | |||
70 | Italian (italiano) | ||
71 | for all italian engines (e.g. for english words) | ||
72 | |||
73 | /italiano/.*/.*/Replaygain/Ripleyghein/ig | ||
74 | /italiano/.*/.*/Crossfade/Crossfeid/ig | ||
75 | /italiano/.*/.*/beep/Bip/ig | ||
76 | /italiano/.*/.*/cuesheet/chiushit/ig | ||
77 | /italiano/.*/.*/fade/feid/ig | ||
78 | /italiano/.*/.*/Crossfeed/crossfid/ig | ||
79 | /italiano/.*/.*/Cache/chash/ig | ||
80 | /italiano/.*/.*/\bfirmware(s?)\b/firmuer$1/ig | ||
81 | /italiano/.*/.*/\bFile(s?)\b/fail$1/ig | ||
82 | /italiano/.*/.*/\bloudness\b/laudness/ig | ||
83 | /italiano/.*/.*/\bunicode\b/unikod/ig | ||
84 | /italiano/.*/.*/Playlist/pleylist/ig | ||
85 | /italiano/.*/.*/WavPack/wave pak/ig | ||
86 | /italiano/.*/.*/BITRATE/bit reit/ig | ||
87 | /italiano/.*/.*/Codepage/cod page/ig | ||
88 | /italiano/.*/.*/PCM Wave/pcm Ue'iv/ig | ||
89 | /italiano/sapi/Loquendo/Inizializza/inizializa/ig | ||
90 | /italiano/sapi/ScanSoft, Inc/V/v/ig | ||
91 | /italiano/sapi/ScanSoft, Inc/X/x/ig | ||
92 | /italiano/sapi/ScanSoft, Inc/stop/stohp/ig | ||
diff --git a/tools/voice.pl b/tools/voice.pl index 32db75c5e1..ee68c30eb4 100755 --- a/tools/voice.pl +++ b/tools/voice.pl | |||
@@ -128,106 +128,12 @@ sub correct_string { | |||
128 | our $verbose; | 128 | our $verbose; |
129 | my ($string, $language, $tts_object) = @_; | 129 | my ($string, $language, $tts_object) = @_; |
130 | my $orig = $string; | 130 | my $orig = $string; |
131 | switch($language) { | 131 | my $corrections = $tts_object->{"corrections"}; |
132 | # General for all engines and languages | 132 | |
133 | $string =~ s/USB/U S B/g; | 133 | foreach (@$corrections) { |
134 | $string =~ s/ID3/I D 3/g; | 134 | my $r = "s" . $_->{separator} . $_->{search} . $_->{separator} |
135 | 135 | . $_->{replace} . $_->{separator} . $_->{modifier}; | |
136 | case "english" { | 136 | eval ('$string =~' . "$r;"); |
137 | switch($$tts_object{"name"}) { | ||
138 | case ["sapi","festival"] { | ||
139 | $string =~ s/plugin(s?)/plug-in$1/ig; next | ||
140 | } | ||
141 | case "festival" { | ||
142 | $string =~ s/\ba\b/ay/ig; | ||
143 | $string =~ s/$/./; | ||
144 | } | ||
145 | } | ||
146 | } | ||
147 | case "deutsch" { | ||
148 | # for all german engines (e.g. for english words) | ||
149 | $string =~ s/alkaline/alkalein/ig; | ||
150 | $string =~ s/byte(s?)/beit$1/ig; | ||
151 | $string =~ s/clip(s?)/klipp$1/ig; | ||
152 | $string =~ s/\bcover/kawwer/ig; | ||
153 | $string =~ s/cuesheet/kjuschiet/ig; | ||
154 | $string =~ s/dither/didder/ig; | ||
155 | $string =~ s/equalizer/iquileiser/ig; | ||
156 | $string =~ s/\bflash\b/fläsh/ig; | ||
157 | $string =~ s/\bfirmware(s?)\b/firmwer$1/ig; | ||
158 | $string =~ s/\bI D 3 tag\b/I D 3 täg/ig; # can't just use "tag" here | ||
159 | $string =~ s/\bloudness\b/laudness/ig; | ||
160 | $string =~ s/\bunicode\b/unikod/ig; | ||
161 | switch($$tts_object{"name"}) { | ||
162 | case "sapi" { # just for SAPI | ||
163 | switch($$tts_object{"vendor"}) { | ||
164 | case "AT&T Labs" { | ||
165 | $string =~ s/alphabet/alfabet/ig; | ||
166 | $string =~ s/ampere/amper/ig; | ||
167 | $string =~ s/\bdezibel\b/de-zibell/ig; | ||
168 | $string =~ s/diddering/didde-ring/ig; | ||
169 | $string =~ s/energie\b/ener-gie/ig; | ||
170 | $string =~ s/\Blauf\b/-lauf/ig; | ||
171 | $string =~ s/\bnumerisch\b/numehrisch/ig; | ||
172 | } | ||
173 | } | ||
174 | } | ||
175 | } | ||
176 | } | ||
177 | case "svenska" { | ||
178 | # for all swedish engines (e.g. for english words) | ||
179 | $string =~ s/kilobyte/kilobajt/ig; | ||
180 | $string =~ s/megabyte/megabajt/ig; | ||
181 | $string =~ s/gigabyte/gigabajt/ig; | ||
182 | $string =~ s/\bloudness\b/laudness/ig; | ||
183 | |||
184 | switch($$tts_object{"name"}) { | ||
185 | case "espeak" { # just for eSpeak | ||
186 | $string =~ s/ampere/ampär/ig; | ||
187 | $string =~ s/bokmärken/bok-märken/ig; | ||
188 | $string =~ s/generella/schenerella/ig; | ||
189 | $string =~ s/dithering/diddering/ig; | ||
190 | $string =~ s/\bunicode\b/jynikod/ig; | ||
191 | $string =~ s/uttoning/utoning/ig; | ||
192 | $string =~ s/procent/pro-cent/ig; | ||
193 | $string =~ s/spellistor/spelistor/ig; | ||
194 | $string =~ s/cuesheet/qjyschiit/ig; | ||
195 | } | ||
196 | } | ||
197 | } | ||
198 | case "italiano" { | ||
199 | # for all italian engines (e.g. for english words) | ||
200 | $string =~ s/Replaygain/Ripleyghein/ig; | ||
201 | $string =~ s/Crossfade/Crossfeid/ig; | ||
202 | $string =~ s/beep/Bip/ig; | ||
203 | $string =~ s/cuesheet/chiushit/ig; | ||
204 | $string =~ s/fade/feid/ig; | ||
205 | $string =~ s/Crossfeed/crossfid/ig; | ||
206 | $string =~ s/Cache/chash/ig; | ||
207 | $string =~ s/\bfirmware(s?)\b/firmuer$1/ig; | ||
208 | $string =~ s/\bFile(s?)\b/fail$1/ig; | ||
209 | $string =~ s/\bloudness\b/laudness/ig; | ||
210 | $string =~ s/\bunicode\b/unikod/ig; | ||
211 | $string =~ s/Playlist/pleylist/ig; | ||
212 | $string =~ s/WavPack/wave pak/ig; | ||
213 | $string =~ s/BITRATE/bit reit/ig; | ||
214 | $string =~ s/Codepage/cod page/ig; | ||
215 | $string =~ s/PCM Wave/pcm Ue'iv/ig; | ||
216 | switch($$tts_object{"name"}) { | ||
217 | case "sapi" { # just for SAPI | ||
218 | switch($$tts_object{"vendor"}) { | ||
219 | case "Loquendo" { | ||
220 | $string =~ s/Inizializza/inizializa/ig; | ||
221 | } | ||
222 | case "ScanSoft, Inc" { | ||
223 | $string =~ s/V/v/ig; | ||
224 | $string =~ s/X/x/ig; | ||
225 | $string =~ s/stop/stohp/ig; | ||
226 | } | ||
227 | } | ||
228 | } | ||
229 | } | ||
230 | } | ||
231 | } | 137 | } |
232 | if ($orig ne $string) { | 138 | if ($orig ne $string) { |
233 | printf("%s -> %s\n", $orig, $string) if $verbose; | 139 | printf("%s -> %s\n", $orig, $string) if $verbose; |
@@ -331,6 +237,7 @@ sub generateclips { | |||
331 | my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_; | 237 | my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_; |
332 | my $english = dirname($0) . '/../apps/lang/english.lang'; | 238 | my $english = dirname($0) . '/../apps/lang/english.lang'; |
333 | my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang'; | 239 | my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang'; |
240 | my $correctionsfile = dirname($0) . '/voice-corrections.txt'; | ||
334 | my $id = ''; | 241 | my $id = ''; |
335 | my $voice = ''; | 242 | my $voice = ''; |
336 | my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null"; | 243 | my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null"; |
@@ -340,6 +247,37 @@ sub generateclips { | |||
340 | local $| = 1; # make progress indicator work reliably | 247 | local $| = 1; # make progress indicator work reliably |
341 | 248 | ||
342 | my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language); | 249 | my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language); |
250 | # add string corrections to tts_object. | ||
251 | my @corrects = (); | ||
252 | open(VOICEREGEXP, "<$correctionsfile") or die "Can't open corrections file!\n"; | ||
253 | while(<VOICEREGEXP>) { | ||
254 | # get first character of line | ||
255 | my $line = $_; | ||
256 | my $separator = substr($_, 0, 1); | ||
257 | if($separator =~ m/\s+/) { | ||
258 | next; | ||
259 | } | ||
260 | chomp($line); | ||
261 | $line =~ s/^.//g; # remove separator at beginning | ||
262 | my ($lang, $engine, $vendor, $search, $replace, $modifier) = split(/$separator/, $line); | ||
263 | |||
264 | # does language match? | ||
265 | if($language !~ m/$lang/) { | ||
266 | next; | ||
267 | } | ||
268 | if($$tts_object{"name"} !~ m/$engine/) { | ||
269 | next; | ||
270 | } | ||
271 | my $v = $$tts_object{"vendor"} || ""; # vendor might be empty in $tts_object | ||
272 | if($v !~ m/$vendor/) { | ||
273 | next; | ||
274 | } | ||
275 | push @corrects, {separator => $separator, search => $search, replace => $replace, modifier => $modifier}; | ||
276 | |||
277 | } | ||
278 | close(VOICEREGEXP); | ||
279 | $tts_object->{corrections} = [@corrects]; | ||
280 | |||
343 | print("Generating voice clips"); | 281 | print("Generating voice clips"); |
344 | print("\n") if $verbose; | 282 | print("\n") if $verbose; |
345 | for (`$cmd`) { | 283 | for (`$cmd`) { |