From 7ad78222c45e2056edd29c16034bb6109ebef45b Mon Sep 17 00:00:00 2001 From: Dominik Riebeling Date: Wed, 2 Mar 2011 18:29:38 +0000 Subject: FS#11913: Separate TTS correction expressions into separate file. voice.pl will now read the TTS correction expressions from a file tools/voice-corrections.txt which includes regular expressions for adjusting the string. This makes it easier to adjust the corrections and allows integrating them into tools like Rockbox Utility. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@29500 a1c6a512-1295-4272-9138-f99709370657 --- tools/voice-corrections.txt | 92 +++++++++++++++++++++++++++++ tools/voice.pl | 138 ++++++++++++-------------------------------- 2 files changed, 130 insertions(+), 100 deletions(-) create mode 100644 tools/voice-corrections.txt diff --git a/tools/voice-corrections.txt b/tools/voice-corrections.txt new file mode 100644 index 0000000000..26d2c031cf --- /dev/null +++ b/tools/voice-corrections.txt @@ -0,0 +1,92 @@ + __________ __ ___. + Open \______ \ ____ ____ | | _\_ |__ _______ ___ + Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + \/ \/ \/ \/ \/ + $Id$ + + + Voice string corrections for voice.pl to support TTS engines. + The list items are separated by the separator that is defined by the first + character on the line. If the first character is a whitespace the line will + get treated as comment. + + Format: + /language/engine/vendor/string/replacement/ + + Where / is the separator, and all strings are Perl regexes. + Empty lines and lines starting with a whitespace are ignored, for all other + lines the first character will become the separator. + + General for all engines and languages + +/.*/.*/.*/USB/U S B/g +/.*/.*/.*/ID3/I D 3/g + English +/english/(sapi|festival)/.*/plugin(s?)/plug-in$1/ig +/english/festival/.*//\ba\b/ay/ig +/english/festival/.*//$/./ + + German (deutsch) + +/deutsch/.*/.*/alkaline/alkalein/ig +/deutsch/.*/.*/byte(s?)/beit$1/ig +/deutsch/.*/.*/clip(s?)/klipp$1/ig +/deutsch/.*/.*/\bcover/kawwer/ig +/deutsch/.*/.*/cuesheet/kjuschiet/ig +/deutsch/.*/.*/dither/didder/ig +/deutsch/.*/.*/equalizer/iquileiser/ig +/deutsch/.*/.*/\bflash\b/fläsh/ig +/deutsch/.*/.*/\bfirmware(s?)\b/firmwer$1/ig +/deutsch/.*/.*/\bI D 3 tag\b/I D 3 täg/ig +/deutsch/.*/.*/\bloudness\b/laudness/ig +/deutsch/.*/.*/\bunicode\b/unikod/ig +/deutsch/sapi/AT&T Labs/alphabet/alfabet/ig; +/deutsch/sapi/AT&T Labs/ampere/amper/ig; +/deutsch/sapi/AT&T Labs/\bdezibel\b/de-zibell/ig; +/deutsch/sapi/AT&T Labs/diddering/didde-ring/ig; +/deutsch/sapi/AT&T Labs/energie\b/ener-gie/ig; +/deutsch/sapi/AT&T Labs/\Blauf\b/-lauf/ig; +/deutsch/sapi/AT&T Labs/\bnumerisch\b/numehrisch/ig; + + Swedish (svenska) + for all swedish engines (e.g. for english words) + +/svenska/.*/.*/kilobyte/kilobajt/ig +/svenska/.*/.*/megabyte/megabajt/ig +/svenska/.*/.*/gigabyte/gigabajt/ig +/svenska/.*/.*/\bloudness\b/laudness/ig +/svenska/espeak/.*/ampere/ampär/ig +/svenska/espeak/.*/bokmärken/bok-märken/ig +/svenska/espeak/.*/generella/schenerella/ig +/svenska/espeak/.*/dithering/diddering/ig +/svenska/espeak/.*/\bunicode\b/jynikod/ig +/svenska/espeak/.*/uttoning/utoning/ig +/svenska/espeak/.*/procent/pro-cent/ig +/svenska/espeak/.*/spellistor/spelistor/ig +/svenska/espeak/.*/cuesheet/qjyschiit/ig + + Italian (italiano) + for all italian engines (e.g. for english words) + +/italiano/.*/.*/Replaygain/Ripleyghein/ig +/italiano/.*/.*/Crossfade/Crossfeid/ig +/italiano/.*/.*/beep/Bip/ig +/italiano/.*/.*/cuesheet/chiushit/ig +/italiano/.*/.*/fade/feid/ig +/italiano/.*/.*/Crossfeed/crossfid/ig +/italiano/.*/.*/Cache/chash/ig +/italiano/.*/.*/\bfirmware(s?)\b/firmuer$1/ig +/italiano/.*/.*/\bFile(s?)\b/fail$1/ig +/italiano/.*/.*/\bloudness\b/laudness/ig +/italiano/.*/.*/\bunicode\b/unikod/ig +/italiano/.*/.*/Playlist/pleylist/ig +/italiano/.*/.*/WavPack/wave pak/ig +/italiano/.*/.*/BITRATE/bit reit/ig +/italiano/.*/.*/Codepage/cod page/ig +/italiano/.*/.*/PCM Wave/pcm Ue'iv/ig +/italiano/sapi/Loquendo/Inizializza/inizializa/ig +/italiano/sapi/ScanSoft, Inc/V/v/ig +/italiano/sapi/ScanSoft, Inc/X/x/ig +/italiano/sapi/ScanSoft, Inc/stop/stohp/ig diff --git a/tools/voice.pl b/tools/voice.pl index 32db75c5e1..ee68c30eb4 100755 --- a/tools/voice.pl +++ b/tools/voice.pl @@ -128,106 +128,12 @@ sub correct_string { our $verbose; my ($string, $language, $tts_object) = @_; my $orig = $string; - switch($language) { - # General for all engines and languages - $string =~ s/USB/U S B/g; - $string =~ s/ID3/I D 3/g; - - case "english" { - switch($$tts_object{"name"}) { - case ["sapi","festival"] { - $string =~ s/plugin(s?)/plug-in$1/ig; next - } - case "festival" { - $string =~ s/\ba\b/ay/ig; - $string =~ s/$/./; - } - } - } - case "deutsch" { - # for all german engines (e.g. for english words) - $string =~ s/alkaline/alkalein/ig; - $string =~ s/byte(s?)/beit$1/ig; - $string =~ s/clip(s?)/klipp$1/ig; - $string =~ s/\bcover/kawwer/ig; - $string =~ s/cuesheet/kjuschiet/ig; - $string =~ s/dither/didder/ig; - $string =~ s/equalizer/iquileiser/ig; - $string =~ s/\bflash\b/fläsh/ig; - $string =~ s/\bfirmware(s?)\b/firmwer$1/ig; - $string =~ s/\bI D 3 tag\b/I D 3 täg/ig; # can't just use "tag" here - $string =~ s/\bloudness\b/laudness/ig; - $string =~ s/\bunicode\b/unikod/ig; - switch($$tts_object{"name"}) { - case "sapi" { # just for SAPI - switch($$tts_object{"vendor"}) { - case "AT&T Labs" { - $string =~ s/alphabet/alfabet/ig; - $string =~ s/ampere/amper/ig; - $string =~ s/\bdezibel\b/de-zibell/ig; - $string =~ s/diddering/didde-ring/ig; - $string =~ s/energie\b/ener-gie/ig; - $string =~ s/\Blauf\b/-lauf/ig; - $string =~ s/\bnumerisch\b/numehrisch/ig; - } - } - } - } - } - case "svenska" { - # for all swedish engines (e.g. for english words) - $string =~ s/kilobyte/kilobajt/ig; - $string =~ s/megabyte/megabajt/ig; - $string =~ s/gigabyte/gigabajt/ig; - $string =~ s/\bloudness\b/laudness/ig; - - switch($$tts_object{"name"}) { - case "espeak" { # just for eSpeak - $string =~ s/ampere/ampär/ig; - $string =~ s/bokmärken/bok-märken/ig; - $string =~ s/generella/schenerella/ig; - $string =~ s/dithering/diddering/ig; - $string =~ s/\bunicode\b/jynikod/ig; - $string =~ s/uttoning/utoning/ig; - $string =~ s/procent/pro-cent/ig; - $string =~ s/spellistor/spelistor/ig; - $string =~ s/cuesheet/qjyschiit/ig; - } - } - } - case "italiano" { - # for all italian engines (e.g. for english words) - $string =~ s/Replaygain/Ripleyghein/ig; - $string =~ s/Crossfade/Crossfeid/ig; - $string =~ s/beep/Bip/ig; - $string =~ s/cuesheet/chiushit/ig; - $string =~ s/fade/feid/ig; - $string =~ s/Crossfeed/crossfid/ig; - $string =~ s/Cache/chash/ig; - $string =~ s/\bfirmware(s?)\b/firmuer$1/ig; - $string =~ s/\bFile(s?)\b/fail$1/ig; - $string =~ s/\bloudness\b/laudness/ig; - $string =~ s/\bunicode\b/unikod/ig; - $string =~ s/Playlist/pleylist/ig; - $string =~ s/WavPack/wave pak/ig; - $string =~ s/BITRATE/bit reit/ig; - $string =~ s/Codepage/cod page/ig; - $string =~ s/PCM Wave/pcm Ue'iv/ig; - switch($$tts_object{"name"}) { - case "sapi" { # just for SAPI - switch($$tts_object{"vendor"}) { - case "Loquendo" { - $string =~ s/Inizializza/inizializa/ig; - } - case "ScanSoft, Inc" { - $string =~ s/V/v/ig; - $string =~ s/X/x/ig; - $string =~ s/stop/stohp/ig; - } - } - } - } - } + my $corrections = $tts_object->{"corrections"}; + + foreach (@$corrections) { + my $r = "s" . $_->{separator} . $_->{search} . $_->{separator} + . $_->{replace} . $_->{separator} . $_->{modifier}; + eval ('$string =~' . "$r;"); } if ($orig ne $string) { printf("%s -> %s\n", $orig, $string) if $verbose; @@ -331,6 +237,7 @@ sub generateclips { my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_; my $english = dirname($0) . '/../apps/lang/english.lang'; my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang'; + my $correctionsfile = dirname($0) . '/voice-corrections.txt'; my $id = ''; my $voice = ''; my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null"; @@ -340,6 +247,37 @@ sub generateclips { local $| = 1; # make progress indicator work reliably my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language); + # add string corrections to tts_object. + my @corrects = (); + open(VOICEREGEXP, "<$correctionsfile") or die "Can't open corrections file!\n"; + while() { + # get first character of line + my $line = $_; + my $separator = substr($_, 0, 1); + if($separator =~ m/\s+/) { + next; + } + chomp($line); + $line =~ s/^.//g; # remove separator at beginning + my ($lang, $engine, $vendor, $search, $replace, $modifier) = split(/$separator/, $line); + + # does language match? + if($language !~ m/$lang/) { + next; + } + if($$tts_object{"name"} !~ m/$engine/) { + next; + } + my $v = $$tts_object{"vendor"} || ""; # vendor might be empty in $tts_object + if($v !~ m/$vendor/) { + next; + } + push @corrects, {separator => $separator, search => $search, replace => $replace, modifier => $modifier}; + + } + close(VOICEREGEXP); + $tts_object->{corrections} = [@corrects]; + print("Generating voice clips"); print("\n") if $verbose; for (`$cmd`) { -- cgit v1.2.3