From c06e7772ff81ed4bbc78377a6e16456456f3e96c Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Mon, 3 Apr 2006 21:11:11 +0000 Subject: langv2 git-svn-id: svn://svn.rockbox.org/rockbox/trunk@9470 a1c6a512-1295-4272-9138-f99709370657 --- tools/genlang | 611 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 535 insertions(+), 76 deletions(-) (limited to 'tools/genlang') diff --git a/tools/genlang b/tools/genlang index cde23f85fa..07c866a288 100755 --- a/tools/genlang +++ b/tools/genlang @@ -1,28 +1,430 @@ #!/usr/bin/perl -s +# __________ __ ___. +# Open \______ \ ____ ____ | | _\_ |__ _______ ___ +# Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / +# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < +# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ +# \/ \/ \/ \/ \/ +# $Id$ +# +# Copyright (C) 2006 by Daniel Stenberg +# + +# binary version for the binary lang file +my $langversion = 2; # 2 is the latest one used in the v1 format + +# A note for future users and readers: The original v1 language system allowed +# the build to create and use a different language than english built-in. We +# removed that feature from our build-system, but the build scripts still had +# the ability. But, starting now, this ability is no longer provided since I +# figured it was boring and unnecessary to write support for now since we +# don't use it anymore. if(!$ARGV[0]) { print <] +Usage: genlang2 [options] + + -p= + Make the tool create a [prefix].c and [prefix].h file. + + -b= + Make the tool create a binary language (.lng) file namaed [outfile]. + The use of this option requires that you also use -e. + + -u + Update language file. Given the translated file and the most recent english + file, you\'ll get an updated version sent to stdout. Suitable action to do + when you intend to update a translation. + + -e= + Point out the english (original source) file, to use that as master + language template. Used in combination with -b or -u. -When running this program. .h and .c will be created in the -"current directory". is "lang" by default. + -t= + Specify which target you want the translations/phrases for. Required when + -b or -p is used. + + -o + Voice mode output. Outputs all id: and voice: lines for the given target! + + -v + Enables verbose (debug) output. MOO ; exit; } +# How update works: +# +# 1) scan the english file, keep the whole for each phrase. +# 2) read the translated file, for each end of phrase, compare: +# A) all source strings, if there's any change there should be a comment about +# it output +# B) the desc fields +# +# 3) output the phrase with the comments from above +# 4) check which phrases that the translated version didn't have, and spit out +# the english version of those +# + my $prefix = $p; -if(!$prefix) { - $prefix="lang"; +my $binary = $b; +my $update = $u; + +my $english = $e; +my $voiceout = $o; + +my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0); + +if($check > 1) { + print "Please use only one of -p, -u, -o and -b\n"; + exit; +} +if(!$check) { + print "Please use at least one of -p, -u, -o and -b\n"; + exit; +} +if(($binary || $update || $voiceout) && !$english) { + print "Please use -e too when you use -b, -o or -u\n"; + exit; +} + +my $target = $t; +if(!$target && !$update) { + print "Please specify a target (with -t)!\n"; + exit; } +my $verbose=$v; + +my %id; # string to num hash +my @idnum; # num to string array + +my %source; # id string to source phrase hash +my %dest; # id string to dest phrase hash +my %voice; # id string to voice phrase hash my $input = $ARGV[0]; -open(HFILE, ">$prefix.h"); -open(CFILE, ">$prefix.c"); +my @m; +my $m="blank"; + +sub match { + my ($string, $pattern)=@_; + + $pattern =~ s/\*/.?*/g; + $pattern =~ s/\?/./g; + + return ($string =~ $pattern); +} + +sub blank { + # nothing to do +} + +my %head; +sub header { + my ($full, $n, $v)=@_; + $head{$n}=$v; +} + +my %phrase; +sub phrase { + my ($full, $n, $v)=@_; + $phrase{$n}=$v; +} + +sub parsetarget { + my ($debug, $strref, $full, $n, $v)=@_; + my $string; + my @all= split(" *, *", $n); + my $test; + for $test (@all) { +# print "TEST ($debug) $target for $test\n"; + if(match($target, $test)) { + $string = $v; +# print "MATCH: $test => $v\n"; + } + } + if($string) { + $$strref = $string; + } + return $string; +} + +my $src; +sub source { + parsetarget("src", \$src, @_); +} + +my $dest; +sub dest { + parsetarget("dest", \$dest, @_); +} + +my $voice; +sub voice { + parsetarget("voice", \$voice, @_); +} + +my %idmap; +my %english; +if($english) { + # For the cases where the english file needs to be scanned/read, we do + # it before we read the translated file. For -b it isn't necessary, but for + # -u it is convenient. + + my $idnum=0; # start with a true number + my $vidnum=0x8000; # first voice id + open(ENG, "<$english") || die "can't open $english"; + my @phrase; + my $id; + while() { + + # get rid of DOS newlines + $_ =~ s/\r//g; + + if($_ =~ /^ *\/) { + # this is the start of a phrase + } + elsif($_ =~ /^ *\<\/phrase\>/) { + # this is the end of a phrase, add it to the english hash + $english{$id}=join("", @phrase); + undef @phrase; + } + elsif($_ ne "\n") { + # gather everything related to this phrase + push @phrase, $_; + } + + if($_ =~ /^ *id: ([^ \t\n]+)/i) { + $id=$1; + # voice-only entries get a difference range + if($id =~ /^VOICE_/) { + # Assign an ID number to this entry + $idmap{$id}=$vidnum; + $vidnum++; + } + else { + # Assign an ID number to this entry + $idmap{$id}=$idnum; + $idnum++; + } + } + } + close(ENG); +} + +# a function that compares the english phrase with the translated one. +# compare source strings and desc + +# Then output the updated version! +sub compare { + my ($idstr, $engref, $locref)=@_; + my ($edesc, $ldesc); + my ($esource, $lsource); + my $mode=0; + + for my $l (@$engref) { + if($l =~ /^ *desc: (.*)/) { + $edesc=$1; + } + elsif($l =~ / *\/i) { + $mode=1; + } + elsif($mode) { + if($l =~ / *\<\/source\>/i) { + last; + } + $esource .= "$l\n"; + } + } + + my @show; + my @source; + + $mode = 0; + for my $l (@$locref) { + if($l =~ /^ *desc: (.*)/) { + $ldesc=$1; + if($edesc ne $ldesc) { + $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n"; + } + push @show, $l; + } + elsif($l =~ / *\/i) { + $mode=1; + push @show, $l; + } + elsif($mode) { + if($l =~ / *\<\/source\>/i) { + $mode = 0; + print @show; + if($esource ne $lsource) { + print "### The section differs from the english!\n", + "### the previously used one is commented below:\n"; + for(split("\n", $lsource)) { + print "### $_\n"; + } + print $esource; + } + else { + print $lsource; + } + undef @show; # start over + + push @show, $l; + } + else { + $lsource .= "$l"; + } + } + else { + push @show, $l; + } + } + + + print @show; +} + +my $idcount; # counter for lang ID numbers +my $voiceid=0x8000; # counter for voice-only ID numbers + +# +# Now start the scanning of the selected language string +# + +open(LANG, "<$input"); +my @phrase; +while() { + + $line++; + + # get rid of DOS newlines + $_ =~ s/\r//g; + + if($_ =~ /^( *\#|[ \t\n\r]*\z)/) { + # comment or empty line + next; + } + + my $ll = $_; -print HFILE <]*)>/) { + my $part = $1; + #print "P: $part\n"; + + if($part =~ /^\//) { + # this was a closing tag + + if($part eq "/phrase") { + # closing the phrase + + my $idstr = $phrase{'id'}; + my $idnum; + + if($dest =~ /^none\z/i) { + # "none" as dest means that this entire phrase is to be + # ignored + #print "dest is NONE!\n"; + } + else { + + # Use the ID name to figure out which id number range we + # should use for this phrase. Voice-only strings are + # separated. + + if($idstr =~ /^VOICE/) { + $idnum = $voiceid++; + } + else { + $idnum = $idcount++; + } + + $id{$idstr} = $idnum; + $idnum[$idnum]=$idstr; + + $source{$idstr}=$src; + $dest{$idstr}=$dest; + $voice{$idstr}=$voice; + + if($verbose) { + print "id: $phrase{id} ($idnum)\n"; + print "source: $src\n"; + print "dest: $dest\n"; + print "voice: $voice\n"; + } + + undef $src; + undef $dest; + undef $voice; + undef %phrase; + } + + if($update) { + my $e = $english{$idstr}; + + if($e) { + # compare original english with this! + my @eng = split("\n", $english{$idstr}); + + compare($idstr, \@eng, \@phrase); + + $english{$idstr}=""; # clear it + } + else { + print "### $idstr: The phrase is not used. Skipped\n"; + } + } + undef @phrase; + + } # end of + + # starts with a slash, this _ends_ this section + $m = pop @m; # get back old value, the previous level's tag + next; + } # end of tag close + + # This is an opening (sub) tag + + push @m, $m; # store old value + $m = $1; + next; + } + + if(/^ *([^:]+): *(.*)/) { + my ($name, $val)=($1, $2); + &$m($_, $name, $val); + } +} +close(LANG); + +if($update) { + my $any=0; + for(keys %english) { + if($english{$_}) { + print "###\n", + "### This phrase below was not present in the translated file\n", + "\n"; + print $english{$_}; + print "\n"; + } + } +} + +if($prefix) { + # We create a .c and .h file + + open(HFILE, ">$prefix.h"); + open(CFILE, ">$prefix.c"); + + print HFILE <) { - $line++; - if($_ =~ / *\#/) { - # comment - next; + # Output the ID names for the enum in the header file + my $i; + for $i (1 .. $idcount) { + my $name=$idnum[$i - 1]; # get the ID name + + $name =~ s/\"//g; # cut off the quotes + + printf HFILE (" %s,\n", $name); } - # get rid of DOS newlines - $_ =~ s/\r//g; - if($_ =~ / *([a-z]+): *(.*)/) { - ($var, $value) = ($1, $2); - # print "$var => $value\n"; - $set{$var} = $value; +# Output separation marker for last string ID and the upcoming voice IDs - if( (($var eq "new") && $value && ($value !~ /^\"(.*)\"\W*$/)) || - (($var eq "voice") && $value && ($value !~ /^\"(.*)\"\W*$/)) || - (($var eq "eng") && ($value !~ /^\"(.*)\"\W*$/)) ) { - print "$input:$line:missing quotes for ".$set{'id'}."\n"; - $errors++; - next; - } + print HFILE <$binary") or die "Can't create $binary"; + binmode OUTF; + printf OUTF ("\x1a%c", $langversion); # magic lang file header + + # loop over the target phrases + for $i (1 .. $idcount) { + my $name=$idnum[$i - 1]; # get the ID + my $dest = $dest{$name}; # get the destination phrase + + if($dest) { + $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes + + # Now, make sure we get the number from the english sort order: + $idnum = $idmap{$name}; + + printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest); + if($debug) { + printf("%02x => %s\n", $idnum, $value); + } + } + } } +elsif($voiceout) { + # voice output requested, display id: and voice: strings in a v1-like + # fashion -print HFILE <