summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDominik Riebeling <Dominik.Riebeling@gmail.com>2011-03-02 18:29:38 +0000
committerDominik Riebeling <Dominik.Riebeling@gmail.com>2011-03-02 18:29:38 +0000
commit7ad78222c45e2056edd29c16034bb6109ebef45b (patch)
treea06ccfd5f12038c0d61acfb192cd97c9c1319f2d
parent1f77d091a5ce98c10e263dfdb18f2939aeb21a55 (diff)
downloadrockbox-7ad78222c45e2056edd29c16034bb6109ebef45b.tar.gz
rockbox-7ad78222c45e2056edd29c16034bb6109ebef45b.zip
FS#11913: Separate TTS correction expressions into separate file.
voice.pl will now read the TTS correction expressions from a file tools/voice-corrections.txt which includes regular expressions for adjusting the string. This makes it easier to adjust the corrections and allows integrating them into tools like Rockbox Utility. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@29500 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--tools/voice-corrections.txt92
-rwxr-xr-xtools/voice.pl138
2 files changed, 130 insertions, 100 deletions
diff --git a/tools/voice-corrections.txt b/tools/voice-corrections.txt
new file mode 100644
index 0000000000..26d2c031cf
--- /dev/null
+++ b/tools/voice-corrections.txt
@@ -0,0 +1,92 @@
1 __________ __ ___.
2 Open \______ \ ____ ____ | | _\_ |__ _______ ___
3 Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
4 Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
5 Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
6 \/ \/ \/ \/ \/
7 $Id$
8
9
10 Voice string corrections for voice.pl to support TTS engines.
11 The list items are separated by the separator that is defined by the first
12 character on the line. If the first character is a whitespace the line will
13 get treated as comment.
14
15 Format:
16 /language/engine/vendor/string/replacement/
17
18 Where / is the separator, and all strings are Perl regexes.
19 Empty lines and lines starting with a whitespace are ignored, for all other
20 lines the first character will become the separator.
21
22 General for all engines and languages
23
24/.*/.*/.*/USB/U S B/g
25/.*/.*/.*/ID3/I D 3/g
26 English
27/english/(sapi|festival)/.*/plugin(s?)/plug-in$1/ig
28/english/festival/.*//\ba\b/ay/ig
29/english/festival/.*//$/./
30
31 German (deutsch)
32
33/deutsch/.*/.*/alkaline/alkalein/ig
34/deutsch/.*/.*/byte(s?)/beit$1/ig
35/deutsch/.*/.*/clip(s?)/klipp$1/ig
36/deutsch/.*/.*/\bcover/kawwer/ig
37/deutsch/.*/.*/cuesheet/kjuschiet/ig
38/deutsch/.*/.*/dither/didder/ig
39/deutsch/.*/.*/equalizer/iquileiser/ig
40/deutsch/.*/.*/\bflash\b/fläsh/ig
41/deutsch/.*/.*/\bfirmware(s?)\b/firmwer$1/ig
42/deutsch/.*/.*/\bI D 3 tag\b/I D 3 täg/ig
43/deutsch/.*/.*/\bloudness\b/laudness/ig
44/deutsch/.*/.*/\bunicode\b/unikod/ig
45/deutsch/sapi/AT&T Labs/alphabet/alfabet/ig;
46/deutsch/sapi/AT&T Labs/ampere/amper/ig;
47/deutsch/sapi/AT&T Labs/\bdezibel\b/de-zibell/ig;
48/deutsch/sapi/AT&T Labs/diddering/didde-ring/ig;
49/deutsch/sapi/AT&T Labs/energie\b/ener-gie/ig;
50/deutsch/sapi/AT&T Labs/\Blauf\b/-lauf/ig;
51/deutsch/sapi/AT&T Labs/\bnumerisch\b/numehrisch/ig;
52
53 Swedish (svenska)
54 for all swedish engines (e.g. for english words)
55
56/svenska/.*/.*/kilobyte/kilobajt/ig
57/svenska/.*/.*/megabyte/megabajt/ig
58/svenska/.*/.*/gigabyte/gigabajt/ig
59/svenska/.*/.*/\bloudness\b/laudness/ig
60/svenska/espeak/.*/ampere/ampär/ig
61/svenska/espeak/.*/bokmärken/bok-märken/ig
62/svenska/espeak/.*/generella/schenerella/ig
63/svenska/espeak/.*/dithering/diddering/ig
64/svenska/espeak/.*/\bunicode\b/jynikod/ig
65/svenska/espeak/.*/uttoning/utoning/ig
66/svenska/espeak/.*/procent/pro-cent/ig
67/svenska/espeak/.*/spellistor/spelistor/ig
68/svenska/espeak/.*/cuesheet/qjyschiit/ig
69
70 Italian (italiano)
71 for all italian engines (e.g. for english words)
72
73/italiano/.*/.*/Replaygain/Ripleyghein/ig
74/italiano/.*/.*/Crossfade/Crossfeid/ig
75/italiano/.*/.*/beep/Bip/ig
76/italiano/.*/.*/cuesheet/chiushit/ig
77/italiano/.*/.*/fade/feid/ig
78/italiano/.*/.*/Crossfeed/crossfid/ig
79/italiano/.*/.*/Cache/chash/ig
80/italiano/.*/.*/\bfirmware(s?)\b/firmuer$1/ig
81/italiano/.*/.*/\bFile(s?)\b/fail$1/ig
82/italiano/.*/.*/\bloudness\b/laudness/ig
83/italiano/.*/.*/\bunicode\b/unikod/ig
84/italiano/.*/.*/Playlist/pleylist/ig
85/italiano/.*/.*/WavPack/wave pak/ig
86/italiano/.*/.*/BITRATE/bit reit/ig
87/italiano/.*/.*/Codepage/cod page/ig
88/italiano/.*/.*/PCM Wave/pcm Ue'iv/ig
89/italiano/sapi/Loquendo/Inizializza/inizializa/ig
90/italiano/sapi/ScanSoft, Inc/V/v/ig
91/italiano/sapi/ScanSoft, Inc/X/x/ig
92/italiano/sapi/ScanSoft, Inc/stop/stohp/ig
diff --git a/tools/voice.pl b/tools/voice.pl
index 32db75c5e1..ee68c30eb4 100755
--- a/tools/voice.pl
+++ b/tools/voice.pl
@@ -128,106 +128,12 @@ sub correct_string {
128 our $verbose; 128 our $verbose;
129 my ($string, $language, $tts_object) = @_; 129 my ($string, $language, $tts_object) = @_;
130 my $orig = $string; 130 my $orig = $string;
131 switch($language) { 131 my $corrections = $tts_object->{"corrections"};
132 # General for all engines and languages 132
133 $string =~ s/USB/U S B/g; 133 foreach (@$corrections) {
134 $string =~ s/ID3/I D 3/g; 134 my $r = "s" . $_->{separator} . $_->{search} . $_->{separator}
135 135 . $_->{replace} . $_->{separator} . $_->{modifier};
136 case "english" { 136 eval ('$string =~' . "$r;");
137 switch($$tts_object{"name"}) {
138 case ["sapi","festival"] {
139 $string =~ s/plugin(s?)/plug-in$1/ig; next
140 }
141 case "festival" {
142 $string =~ s/\ba\b/ay/ig;
143 $string =~ s/$/./;
144 }
145 }
146 }
147 case "deutsch" {
148 # for all german engines (e.g. for english words)
149 $string =~ s/alkaline/alkalein/ig;
150 $string =~ s/byte(s?)/beit$1/ig;
151 $string =~ s/clip(s?)/klipp$1/ig;
152 $string =~ s/\bcover/kawwer/ig;
153 $string =~ s/cuesheet/kjuschiet/ig;
154 $string =~ s/dither/didder/ig;
155 $string =~ s/equalizer/iquileiser/ig;
156 $string =~ s/\bflash\b/fläsh/ig;
157 $string =~ s/\bfirmware(s?)\b/firmwer$1/ig;
158 $string =~ s/\bI D 3 tag\b/I D 3 täg/ig; # can't just use "tag" here
159 $string =~ s/\bloudness\b/laudness/ig;
160 $string =~ s/\bunicode\b/unikod/ig;
161 switch($$tts_object{"name"}) {
162 case "sapi" { # just for SAPI
163 switch($$tts_object{"vendor"}) {
164 case "AT&T Labs" {
165 $string =~ s/alphabet/alfabet/ig;
166 $string =~ s/ampere/amper/ig;
167 $string =~ s/\bdezibel\b/de-zibell/ig;
168 $string =~ s/diddering/didde-ring/ig;
169 $string =~ s/energie\b/ener-gie/ig;
170 $string =~ s/\Blauf\b/-lauf/ig;
171 $string =~ s/\bnumerisch\b/numehrisch/ig;
172 }
173 }
174 }
175 }
176 }
177 case "svenska" {
178 # for all swedish engines (e.g. for english words)
179 $string =~ s/kilobyte/kilobajt/ig;
180 $string =~ s/megabyte/megabajt/ig;
181 $string =~ s/gigabyte/gigabajt/ig;
182 $string =~ s/\bloudness\b/laudness/ig;
183
184 switch($$tts_object{"name"}) {
185 case "espeak" { # just for eSpeak
186 $string =~ s/ampere/ampär/ig;
187 $string =~ s/bokmärken/bok-märken/ig;
188 $string =~ s/generella/schenerella/ig;
189 $string =~ s/dithering/diddering/ig;
190 $string =~ s/\bunicode\b/jynikod/ig;
191 $string =~ s/uttoning/utoning/ig;
192 $string =~ s/procent/pro-cent/ig;
193 $string =~ s/spellistor/spelistor/ig;
194 $string =~ s/cuesheet/qjyschiit/ig;
195 }
196 }
197 }
198 case "italiano" {
199 # for all italian engines (e.g. for english words)
200 $string =~ s/Replaygain/Ripleyghein/ig;
201 $string =~ s/Crossfade/Crossfeid/ig;
202 $string =~ s/beep/Bip/ig;
203 $string =~ s/cuesheet/chiushit/ig;
204 $string =~ s/fade/feid/ig;
205 $string =~ s/Crossfeed/crossfid/ig;
206 $string =~ s/Cache/chash/ig;
207 $string =~ s/\bfirmware(s?)\b/firmuer$1/ig;
208 $string =~ s/\bFile(s?)\b/fail$1/ig;
209 $string =~ s/\bloudness\b/laudness/ig;
210 $string =~ s/\bunicode\b/unikod/ig;
211 $string =~ s/Playlist/pleylist/ig;
212 $string =~ s/WavPack/wave pak/ig;
213 $string =~ s/BITRATE/bit reit/ig;
214 $string =~ s/Codepage/cod page/ig;
215 $string =~ s/PCM Wave/pcm Ue'iv/ig;
216 switch($$tts_object{"name"}) {
217 case "sapi" { # just for SAPI
218 switch($$tts_object{"vendor"}) {
219 case "Loquendo" {
220 $string =~ s/Inizializza/inizializa/ig;
221 }
222 case "ScanSoft, Inc" {
223 $string =~ s/V/v/ig;
224 $string =~ s/X/x/ig;
225 $string =~ s/stop/stohp/ig;
226 }
227 }
228 }
229 }
230 }
231 } 137 }
232 if ($orig ne $string) { 138 if ($orig ne $string) {
233 printf("%s -> %s\n", $orig, $string) if $verbose; 139 printf("%s -> %s\n", $orig, $string) if $verbose;
@@ -331,6 +237,7 @@ sub generateclips {
331 my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_; 237 my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
332 my $english = dirname($0) . '/../apps/lang/english.lang'; 238 my $english = dirname($0) . '/../apps/lang/english.lang';
333 my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang'; 239 my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang';
240 my $correctionsfile = dirname($0) . '/voice-corrections.txt';
334 my $id = ''; 241 my $id = '';
335 my $voice = ''; 242 my $voice = '';
336 my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null"; 243 my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null";
@@ -340,6 +247,37 @@ sub generateclips {
340 local $| = 1; # make progress indicator work reliably 247 local $| = 1; # make progress indicator work reliably
341 248
342 my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language); 249 my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language);
250 # add string corrections to tts_object.
251 my @corrects = ();
252 open(VOICEREGEXP, "<$correctionsfile") or die "Can't open corrections file!\n";
253 while(<VOICEREGEXP>) {
254 # get first character of line
255 my $line = $_;
256 my $separator = substr($_, 0, 1);
257 if($separator =~ m/\s+/) {
258 next;
259 }
260 chomp($line);
261 $line =~ s/^.//g; # remove separator at beginning
262 my ($lang, $engine, $vendor, $search, $replace, $modifier) = split(/$separator/, $line);
263
264 # does language match?
265 if($language !~ m/$lang/) {
266 next;
267 }
268 if($$tts_object{"name"} !~ m/$engine/) {
269 next;
270 }
271 my $v = $$tts_object{"vendor"} || ""; # vendor might be empty in $tts_object
272 if($v !~ m/$vendor/) {
273 next;
274 }
275 push @corrects, {separator => $separator, search => $search, replace => $replace, modifier => $modifier};
276
277 }
278 close(VOICEREGEXP);
279 $tts_object->{corrections} = [@corrects];
280
343 print("Generating voice clips"); 281 print("Generating voice clips");
344 print("\n") if $verbose; 282 print("\n") if $verbose;
345 for (`$cmd`) { 283 for (`$cmd`) {