From c06e7772ff81ed4bbc78377a6e16456456f3e96c Mon Sep 17 00:00:00 2001
From: Daniel Stenberg <daniel@haxx.se>
Date: Mon, 3 Apr 2006 21:11:11 +0000
Subject: langv2

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@9470 a1c6a512-1295-4272-9138-f99709370657
---
 tools/genlang | 611 ++++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 535 insertions(+), 76 deletions(-)

(limited to 'tools/genlang')
diff --git a/tools/genlang b/tools/genlang
index cde23f85fa..07c866a288 100755
--- a/tools/genlang
+++ b/tools/genlang
@@ -1,28 +1,430 @@
 #!/usr/bin/perl -s
+#             __________               __   ___.
+#   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+#   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+#   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+#   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+#                     \/            \/     \/    \/            \/
+# $Id$
+#
+# Copyright (C) 2006 by Daniel Stenberg
+#
+
+# binary version for the binary lang file
+my $langversion = 2; # 2 is the latest one used in the v1 format
+
+# A note for future users and readers: The original v1 language system allowed
+# the build to create and use a different language than english built-in. We
+# removed that feature from our build-system, but the build scripts still had
+# the ability. But, starting now, this ability is no longer provided since I
+# figured it was boring and unnecessary to write support for now since we
+# don't use it anymore.
 
 if(!$ARGV[0]) {
     print <<MOO
-Usage: genlang [-p=<prefix>] <language file>
+Usage: genlang2 [options] <langv2 file>
+
+ -p=<prefix>
+    Make the tool create a [prefix].c and [prefix].h file.
+
+ -b=<outfile>
+    Make the tool create a binary language (.lng) file namaed [outfile].
+    The use of this option requires that you also use -e.
+
+ -u
+    Update language file. Given the translated file and the most recent english
+    file, you\'ll get an updated version sent to stdout. Suitable action to do
+    when you intend to update a translation.
+
+ -e=<english lang file>
+    Point out the english (original source) file, to use that as master
+    language template. Used in combination with -b or -u.
 
-When running this program. <prefix>.h and <prefix>.c will be created in the
-"current directory". <prefix> is "lang" by default.
+ -t=<target>
+    Specify which target you want the translations/phrases for. Required when
+    -b or -p is used.
+
+ -o
+    Voice mode output. Outputs all id: and voice: lines for the given target!
+
+ -v
+    Enables verbose (debug) output.
 MOO
 ;
     exit;
 }
 
+# How update works:
+#
+# 1) scan the english file, keep the whole <phrase> for each phrase.
+# 2) read the translated file, for each end of phrase, compare:
+#  A) all source strings, if there's any change there should be a comment about
+#     it output
+#  B) the desc fields
+#
+# 3) output the phrase with the comments from above
+# 4) check which phrases that the translated version didn't have, and spit out
+#    the english version of those
+#
+
 my $prefix = $p;
-if(!$prefix) {
-    $prefix="lang";
+my $binary = $b;
+my $update = $u;
+
+my $english = $e;
+my $voiceout = $o;
+
+my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
+
+if($check > 1) {
+    print "Please use only one of -p, -u, -o and -b\n";
+    exit;
+}
+if(!$check) {
+    print "Please use at least one of -p, -u, -o and -b\n";
+    exit;
+}
+if(($binary || $update || $voiceout) && !$english) {
+    print "Please use -e too when you use -b, -o or -u\n";
+    exit;
+}
+
+my $target = $t;
+if(!$target && !$update) {
+    print "Please specify a target (with -t)!\n";
+    exit;
 }
+my $verbose=$v;
+
+my %id; # string to num hash
+my @idnum; # num to string array
+
+my %source; # id string to source phrase hash
+my %dest; # id string to dest phrase hash
+my %voice; # id string to voice phrase hash
 
 my $input = $ARGV[0];
 
-open(HFILE, ">$prefix.h");
-open(CFILE, ">$prefix.c");
+my @m;
+my $m="blank";
+
+sub match {
+    my ($string, $pattern)=@_;
+
+    $pattern =~ s/\*/.?*/g;
+    $pattern =~ s/\?/./g;
+
+    return ($string =~ $pattern);
+}
+
+sub blank {
+    # nothing to do
+}
+
+my %head;
+sub header {
+    my ($full, $n, $v)=@_;
+    $head{$n}=$v;
+}
+
+my %phrase;
+sub phrase {
+    my ($full, $n, $v)=@_;
+    $phrase{$n}=$v;
+}
+
+sub parsetarget {
+    my ($debug, $strref, $full, $n, $v)=@_;
+    my $string;
+    my @all= split(" *, *", $n);
+    my $test;
+    for $test (@all) {
+#        print "TEST ($debug) $target for $test\n";
+        if(match($target, $test)) {
+            $string = $v;
+#            print "MATCH: $test => $v\n";
+        }
+    }
+    if($string) {
+        $$strref = $string;
+    }
+    return $string;
+}
+
+my $src;
+sub source {
+    parsetarget("src", \$src, @_);
+}
+
+my $dest;
+sub dest {
+    parsetarget("dest", \$dest, @_);
+}
+
+my $voice;
+sub voice {
+    parsetarget("voice", \$voice, @_);
+}
+
+my %idmap;
+my %english;
+if($english) {
+    # For the cases where the english file needs to be scanned/read, we do
+    # it before we read the translated file. For -b it isn't necessary, but for
+    # -u it is convenient.
+
+    my $idnum=0; # start with a true number
+    my $vidnum=0x8000; # first voice id
+    open(ENG, "<$english") || die "can't open $english";
+    my @phrase;
+    my $id;
+    while(<ENG>) {
+
+        # get rid of DOS newlines
+        $_ =~ s/\r//g;
+
+        if($_ =~ /^ *\<phrase\>/) {
+            # this is the start of a phrase
+        }
+        elsif($_ =~ /^ *\<\/phrase\>/) {
+            # this is the end of a phrase, add it to the english hash
+            $english{$id}=join("", @phrase);
+            undef @phrase;
+        }
+        elsif($_ ne "\n") {
+            # gather everything related to this phrase
+            push @phrase, $_;
+        }
+
+        if($_ =~ /^ *id: ([^ \t\n]+)/i) {
+            $id=$1;
+            # voice-only entries get a difference range
+            if($id =~ /^VOICE_/) {
+                # Assign an ID number to this entry
+                $idmap{$id}=$vidnum;
+                $vidnum++;
+            }
+            else {
+                # Assign an ID number to this entry
+                $idmap{$id}=$idnum;
+                $idnum++;
+            }
+        }
+    }
+    close(ENG);
+}
+
+# a function that compares the english phrase with the translated one.
+# compare source strings and desc
+
+# Then output the updated version!
+sub compare {
+    my ($idstr, $engref, $locref)=@_;
+    my ($edesc, $ldesc);
+    my ($esource, $lsource);
+    my $mode=0;
+    
+    for my $l (@$engref) {
+        if($l =~ /^ *desc: (.*)/) {
+            $edesc=$1;
+        }
+        elsif($l =~ / *\<source\>/i) {
+            $mode=1;
+        }
+        elsif($mode) {
+            if($l =~ / *\<\/source\>/i) {
+                last;
+            }
+            $esource .= "$l\n";
+        }
+    }
+
+    my @show;
+    my @source;
+
+    $mode = 0;
+    for my $l (@$locref) {
+        if($l =~ /^ *desc: (.*)/) {
+            $ldesc=$1;
+            if($edesc ne $ldesc) {
+                $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n  desc: $edesc\n";
+            }
+            push @show, $l;
+        }
+        elsif($l =~ / *\<source\>/i) {
+            $mode=1;
+            push @show, $l;
+        }
+        elsif($mode) {
+            if($l =~ / *\<\/source\>/i) {
+                $mode = 0;
+                print @show;
+                if($esource ne $lsource) {
+                    print "### The <source> section differs from the english!\n",
+                    "### the previously used one is commented below:\n";
+                    for(split("\n", $lsource)) {
+                        print "### $_\n";
+                    }
+                    print $esource;
+                }
+                else {
+                    print $lsource;
+                }
+                undef @show; # start over
+
+                push @show, $l;
+            }
+            else {
+                $lsource .= "$l";
+            }
+        }
+        else {
+            push @show, $l;
+        }
+    }
+
+
+    print @show;
+}
+
+my $idcount;        # counter for lang ID numbers
+my $voiceid=0x8000; # counter for voice-only ID numbers
+
+#
+# Now start the scanning of the selected language string
+#
+
+open(LANG, "<$input");
+my @phrase;
+while(<LANG>) {
+
+    $line++;
+
+    # get rid of DOS newlines
+    $_ =~ s/\r//g;
+
+    if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
+        # comment or empty line
+        next;
+    }
+
+    my $ll = $_;
 
-print HFILE <<MOO
-/* This file was automatically generated using genlang */
+   # print "M: $m\n";
+
+    push @phrase, $ll;
+
+    # this is an XML-lookalike tag
+    if(/ *<([^>]*)>/) {
+        my $part = $1;
+        #print "P: $part\n";
+
+        if($part =~ /^\//) {
+            # this was a closing tag
+
+            if($part eq "/phrase") {
+                # closing the phrase
+
+                my $idstr = $phrase{'id'};
+                my $idnum;
+
+                if($dest =~ /^none\z/i) {
+                    # "none" as dest means that this entire phrase is to be
+                    # ignored
+                    #print "dest is NONE!\n";
+                }
+                else {
+
+                    # Use the ID name to figure out which id number range we
+                    # should use for this phrase. Voice-only strings are
+                    # separated.
+
+                    if($idstr =~ /^VOICE/) {
+                        $idnum = $voiceid++;
+                    }
+                    else {
+                        $idnum = $idcount++;
+                    }
+                    
+                    $id{$idstr} = $idnum;
+                    $idnum[$idnum]=$idstr;
+                    
+                    $source{$idstr}=$src;
+                    $dest{$idstr}=$dest;
+                    $voice{$idstr}=$voice;
+                    
+                    if($verbose) {
+                        print "id: $phrase{id} ($idnum)\n";
+                        print "source: $src\n";
+                        print "dest: $dest\n";
+                        print "voice: $voice\n";
+                    }
+
+                    undef $src;
+                    undef $dest;
+                    undef $voice;
+                    undef %phrase;
+                }
+
+                if($update) {
+                    my $e = $english{$idstr};
+
+                    if($e) {
+                        # compare original english with this!
+                        my @eng = split("\n", $english{$idstr});
+
+                        compare($idstr, \@eng, \@phrase);
+
+                        $english{$idstr}=""; # clear it
+                    }
+                    else {
+                        print "### $idstr: The phrase is not used. Skipped\n";
+                    }
+                }
+                undef @phrase;
+
+            } # end of </phrase>
+
+            # starts with a slash, this _ends_ this section
+            $m = pop @m; # get back old value, the previous level's tag
+            next;
+        } # end of tag close
+
+        # This is an opening (sub) tag
+
+        push @m, $m; # store old value
+        $m = $1;
+        next;
+    }
+
+    if(/^ *([^:]+): *(.*)/) {
+        my ($name, $val)=($1, $2);
+        &$m($_, $name, $val);
+    }
+}
+close(LANG);
+
+if($update) {
+    my $any=0;
+    for(keys %english) {
+        if($english{$_}) {
+            print "###\n",
+            "### This phrase below was not present in the translated file\n",
+            "<phrase>\n";
+            print $english{$_};
+            print "</phrase>\n";
+        }
+    }
+}
+
+if($prefix) {
+    # We create a .c and .h file
+
+    open(HFILE, ">$prefix.h");
+    open(CFILE, ">$prefix.c");
+
+    print HFILE <<MOO
+/* This file was automatically generated using genlang2 */
 /*
  * The str() macro/functions is how to access strings that might be
  * translated. Use it like str(MACRO) and expect a string to be
@@ -37,12 +439,12 @@ extern unsigned char *language_strings[];
 extern const unsigned char language_builtin[];
 
 /* The enum below contains all available strings */
-enum {
+enum \{
 MOO
     ;
 
-print CFILE <<MOO
-/* This file was automaticly generated using genlang, the strings come
+    print CFILE <<MOO
+/* This file was automaticly generated using genlang2, the strings come
    from "$input" */
    
 #include "$prefix.h"
@@ -50,87 +452,144 @@ print CFILE <<MOO
 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
 const unsigned char language_builtin[] =
 MOO
-    ;
+;
 
-open(LANG, "<$input");
-while(<LANG>) {
-    $line++;
-    if($_ =~ / *\#/) {
-        # comment
-        next;
+    # Output the ID names for the enum in the header file
+    my $i;
+    for $i (1 .. $idcount) {
+        my $name=$idnum[$i - 1]; # get the ID name
+        
+        $name =~ s/\"//g; # cut off the quotes
+        
+        printf HFILE ("    %s,\n", $name);
     }
-    # get rid of DOS newlines
-    $_ =~ s/\r//g;
-    if($_ =~ / *([a-z]+): *(.*)/) {
-        ($var, $value) = ($1, $2);
-        # print "$var => $value\n";
 
-        $set{$var} = $value;
+# Output separation marker for last string ID and the upcoming voice IDs
 
-        if( (($var eq "new") && $value && ($value !~ /^\"(.*)\"\W*$/)) ||
-            (($var eq "voice") && $value && ($value !~ /^\"(.*)\"\W*$/)) ||
-            (($var eq "eng") && ($value !~ /^\"(.*)\"\W*$/)) ) {
-            print "$input:$line:missing quotes for ".$set{'id'}."\n";
-            $errors++;
-            next;
-        }
+    print HFILE <<MOO
+    LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
+    /* --- below this follows voice-only strings --- */
+    VOICEONLY_DELIMITER = 0x8000,
+MOO
+    ;
 
-        if($var eq "new") {
-            # the last one for a single phrase
+# Output the ID names for the enum in the header file
+    my $i;
+    for $i (0x8000 .. ($voiceid-1)) {
+        my $name=$idnum[$i]; # get the ID name
+        
+        $name =~ s/\"//g; # cut off the quotes
+        
+        printf HFILE ("    %s,\n", $name);
+    }
 
-            if(!$value || ($value eq "\"\"") ) {
-                # if not set, get the english version
-                $value = $set{'eng'};
-            }
-#            print "VOICE: ".$set{'voice'}." VALUE: $value\n";
-            # Note: if both entries are "", the string is deprecated,
-            # but must be included to maintain compatibility
-            if($set{'id'} =~ /^VOICE_/) {
-                # voice-only
-                push @vfile, $set{'id'};
-            }
-            else {
-                push @hfile, $set{'id'};
-                $value =~ s/^\"(.*)\"\W*$/\"$1\\0\"/;
-                print CFILE "    $value\n";
-            }
+    # Output end of enum
+    print HFILE "\n};\n/* end of generated enum list */\n";
+
+    # Output the target phrases for the source file
+    for $i (1 .. $idcount) {
+        my $name=$idnum[$i - 1]; # get the ID
+        my $dest = $dest{$name}; # get the destination phrase
+        
+        $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
 
-            undef %set;
+        if(!$dest) {
+            # this is just to be on the safe side
+            $dest = '"\0"';
         }
 
+        printf CFILE ("    %s\n", $dest);
     }
 
-}
-close(LANG);
+# Output end of string chunk
+    print CFILE <<MOO
+;
+/* end of generated string list */
+MOO
+;
 
-for(@hfile) {
-    print HFILE "    $_,\n";
+    close(HFILE);
+    close(CFILE);
+} # end of the c/h file generation
+elsif($binary) {
+    # Creation of a binary lang file was requested
+
+    # We must first scan the english file to get the correct order of the id
+    # numbers used there, as that is what sets the id order for all language
+    # files. The english file is scanned before the translated file was
+    # scanned.
+
+    open(OUTF, ">$binary") or die "Can't create $binary";
+    binmode OUTF;
+    printf OUTF ("\x1a%c", $langversion); # magic lang file header
+
+    # loop over the target phrases
+    for $i (1 .. $idcount) {
+        my $name=$idnum[$i - 1]; # get the ID
+        my $dest = $dest{$name}; # get the destination phrase
+
+        if($dest) {
+            $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
+
+            # Now, make sure we get the number from the english sort order:
+            $idnum = $idmap{$name};
+
+            printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
+            if($debug) {
+                printf("%02x => %s\n", $idnum, $value);
+            }
+        }
+    }
 }
+elsif($voiceout) {
+    # voice output requested, display id: and voice: strings in a v1-like
+    # fashion
 
-print HFILE <<MOO
-    LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
-    /* --- below this follows voice-only strings --- */
-    VOICEONLY_DELIMITER = 0x8000,
-MOO
-    ;
+    my @engl;
+
+    # This loops over the strings in the translated language file order
+    my @ids = ((0 .. ($idcount-1)));
+    push @ids, (0x8000 .. ($voiceid-1));
+
+    #for my $id (@ids) {
+    #    print "$id\n";
+    #}
+
+    for $i (@ids) {
+        my $name=$idnum[$i]; # get the ID
+        my $dest = $voice{$name}; # get the destination voice string
 
-for(@vfile) {
-    print HFILE "    $_,\n";
+        if($dest) {
+            $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
+
+            # Now, make sure we get the number from the english sort order:
+            $idnum = $idmap{$name};
+
+            $engl[$idnum] = $i;
+
+           # print "Input index $i output index $idnum\n";
+
+        }
+    }
+    for my $i (@ids) {
+
+        my $o = $engl[$i];
+
+        my $name=$idnum[$o]; # get the ID
+        my $dest = $voice{$name}; # get the destination voice string
+        
+        print "#$i\nid: $name\nvoice: $dest\n";
+    }
+    
 }
 
-print HFILE <<MOO
-};
-/* end of generated enum list */
-MOO
-    ;
 
-print CFILE <<MOO
-;
-/* end of generated string list */
-MOO
-    ;
+if($verbose) {
+    printf("%d ID strings scanned\n", $idcount);
 
-close(CFILE);
-close(HFILE);
+    print "* head *\n";
+    for(keys %head) {
+        printf "$_: %s\n", $head{$_};
+    }
+}
 
-exit $errors;
-- 
cgit v1.2.3