summaryrefslogtreecommitdiff
path: root/tools/genlang
diff options
context:
space:
mode:
authorDaniel Stenberg <daniel@haxx.se>2006-04-03 21:11:11 +0000
committerDaniel Stenberg <daniel@haxx.se>2006-04-03 21:11:11 +0000
commitc06e7772ff81ed4bbc78377a6e16456456f3e96c (patch)
tree0eee2026f47d5041461d2a35349f0c2175e97ab0 /tools/genlang
parenta87203651e35f368bf1d8bca5a846a0b9fb657c1 (diff)
downloadrockbox-c06e7772ff81ed4bbc78377a6e16456456f3e96c.tar.gz
rockbox-c06e7772ff81ed4bbc78377a6e16456456f3e96c.zip
langv2
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@9470 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'tools/genlang')
-rwxr-xr-xtools/genlang611
1 files changed, 535 insertions, 76 deletions
diff --git a/tools/genlang b/tools/genlang
index cde23f85fa..07c866a288 100755
--- a/tools/genlang
+++ b/tools/genlang
@@ -1,28 +1,430 @@
1#!/usr/bin/perl -s 1#!/usr/bin/perl -s
2# __________ __ ___.
3# Open \______ \ ____ ____ | | _\_ |__ _______ ___
4# Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7# \/ \/ \/ \/ \/
8# $Id$
9#
10# Copyright (C) 2006 by Daniel Stenberg
11#
12
13# binary version for the binary lang file
14my $langversion = 2; # 2 is the latest one used in the v1 format
15
16# A note for future users and readers: The original v1 language system allowed
17# the build to create and use a different language than english built-in. We
18# removed that feature from our build-system, but the build scripts still had
19# the ability. But, starting now, this ability is no longer provided since I
20# figured it was boring and unnecessary to write support for now since we
21# don't use it anymore.
2 22
3if(!$ARGV[0]) { 23if(!$ARGV[0]) {
4 print <<MOO 24 print <<MOO
5Usage: genlang [-p=<prefix>] <language file> 25Usage: genlang2 [options] <langv2 file>
26
27 -p=<prefix>
28 Make the tool create a [prefix].c and [prefix].h file.
29
30 -b=<outfile>
31 Make the tool create a binary language (.lng) file namaed [outfile].
32 The use of this option requires that you also use -e.
33
34 -u
35 Update language file. Given the translated file and the most recent english
36 file, you\'ll get an updated version sent to stdout. Suitable action to do
37 when you intend to update a translation.
38
39 -e=<english lang file>
40 Point out the english (original source) file, to use that as master
41 language template. Used in combination with -b or -u.
6 42
7When running this program. <prefix>.h and <prefix>.c will be created in the 43 -t=<target>
8"current directory". <prefix> is "lang" by default. 44 Specify which target you want the translations/phrases for. Required when
45 -b or -p is used.
46
47 -o
48 Voice mode output. Outputs all id: and voice: lines for the given target!
49
50 -v
51 Enables verbose (debug) output.
9MOO 52MOO
10; 53;
11 exit; 54 exit;
12} 55}
13 56
57# How update works:
58#
59# 1) scan the english file, keep the whole <phrase> for each phrase.
60# 2) read the translated file, for each end of phrase, compare:
61# A) all source strings, if there's any change there should be a comment about
62# it output
63# B) the desc fields
64#
65# 3) output the phrase with the comments from above
66# 4) check which phrases that the translated version didn't have, and spit out
67# the english version of those
68#
69
14my $prefix = $p; 70my $prefix = $p;
15if(!$prefix) { 71my $binary = $b;
16 $prefix="lang"; 72my $update = $u;
73
74my $english = $e;
75my $voiceout = $o;
76
77my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
78
79if($check > 1) {
80 print "Please use only one of -p, -u, -o and -b\n";
81 exit;
82}
83if(!$check) {
84 print "Please use at least one of -p, -u, -o and -b\n";
85 exit;
86}
87if(($binary || $update || $voiceout) && !$english) {
88 print "Please use -e too when you use -b, -o or -u\n";
89 exit;
90}
91
92my $target = $t;
93if(!$target && !$update) {
94 print "Please specify a target (with -t)!\n";
95 exit;
17} 96}
97my $verbose=$v;
98
99my %id; # string to num hash
100my @idnum; # num to string array
101
102my %source; # id string to source phrase hash
103my %dest; # id string to dest phrase hash
104my %voice; # id string to voice phrase hash
18 105
19my $input = $ARGV[0]; 106my $input = $ARGV[0];
20 107
21open(HFILE, ">$prefix.h"); 108my @m;
22open(CFILE, ">$prefix.c"); 109my $m="blank";
110
111sub match {
112 my ($string, $pattern)=@_;
113
114 $pattern =~ s/\*/.?*/g;
115 $pattern =~ s/\?/./g;
116
117 return ($string =~ $pattern);
118}
119
120sub blank {
121 # nothing to do
122}
123
124my %head;
125sub header {
126 my ($full, $n, $v)=@_;
127 $head{$n}=$v;
128}
129
130my %phrase;
131sub phrase {
132 my ($full, $n, $v)=@_;
133 $phrase{$n}=$v;
134}
135
136sub parsetarget {
137 my ($debug, $strref, $full, $n, $v)=@_;
138 my $string;
139 my @all= split(" *, *", $n);
140 my $test;
141 for $test (@all) {
142# print "TEST ($debug) $target for $test\n";
143 if(match($target, $test)) {
144 $string = $v;
145# print "MATCH: $test => $v\n";
146 }
147 }
148 if($string) {
149 $$strref = $string;
150 }
151 return $string;
152}
153
154my $src;
155sub source {
156 parsetarget("src", \$src, @_);
157}
158
159my $dest;
160sub dest {
161 parsetarget("dest", \$dest, @_);
162}
163
164my $voice;
165sub voice {
166 parsetarget("voice", \$voice, @_);
167}
168
169my %idmap;
170my %english;
171if($english) {
172 # For the cases where the english file needs to be scanned/read, we do
173 # it before we read the translated file. For -b it isn't necessary, but for
174 # -u it is convenient.
175
176 my $idnum=0; # start with a true number
177 my $vidnum=0x8000; # first voice id
178 open(ENG, "<$english") || die "can't open $english";
179 my @phrase;
180 my $id;
181 while(<ENG>) {
182
183 # get rid of DOS newlines
184 $_ =~ s/\r//g;
185
186 if($_ =~ /^ *\<phrase\>/) {
187 # this is the start of a phrase
188 }
189 elsif($_ =~ /^ *\<\/phrase\>/) {
190 # this is the end of a phrase, add it to the english hash
191 $english{$id}=join("", @phrase);
192 undef @phrase;
193 }
194 elsif($_ ne "\n") {
195 # gather everything related to this phrase
196 push @phrase, $_;
197 }
198
199 if($_ =~ /^ *id: ([^ \t\n]+)/i) {
200 $id=$1;
201 # voice-only entries get a difference range
202 if($id =~ /^VOICE_/) {
203 # Assign an ID number to this entry
204 $idmap{$id}=$vidnum;
205 $vidnum++;
206 }
207 else {
208 # Assign an ID number to this entry
209 $idmap{$id}=$idnum;
210 $idnum++;
211 }
212 }
213 }
214 close(ENG);
215}
216
217# a function that compares the english phrase with the translated one.
218# compare source strings and desc
219
220# Then output the updated version!
221sub compare {
222 my ($idstr, $engref, $locref)=@_;
223 my ($edesc, $ldesc);
224 my ($esource, $lsource);
225 my $mode=0;
226
227 for my $l (@$engref) {
228 if($l =~ /^ *desc: (.*)/) {
229 $edesc=$1;
230 }
231 elsif($l =~ / *\<source\>/i) {
232 $mode=1;
233 }
234 elsif($mode) {
235 if($l =~ / *\<\/source\>/i) {
236 last;
237 }
238 $esource .= "$l\n";
239 }
240 }
241
242 my @show;
243 my @source;
244
245 $mode = 0;
246 for my $l (@$locref) {
247 if($l =~ /^ *desc: (.*)/) {
248 $ldesc=$1;
249 if($edesc ne $ldesc) {
250 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n";
251 }
252 push @show, $l;
253 }
254 elsif($l =~ / *\<source\>/i) {
255 $mode=1;
256 push @show, $l;
257 }
258 elsif($mode) {
259 if($l =~ / *\<\/source\>/i) {
260 $mode = 0;
261 print @show;
262 if($esource ne $lsource) {
263 print "### The <source> section differs from the english!\n",
264 "### the previously used one is commented below:\n";
265 for(split("\n", $lsource)) {
266 print "### $_\n";
267 }
268 print $esource;
269 }
270 else {
271 print $lsource;
272 }
273 undef @show; # start over
274
275 push @show, $l;
276 }
277 else {
278 $lsource .= "$l";
279 }
280 }
281 else {
282 push @show, $l;
283 }
284 }
285
286
287 print @show;
288}
289
290my $idcount; # counter for lang ID numbers
291my $voiceid=0x8000; # counter for voice-only ID numbers
292
293#
294# Now start the scanning of the selected language string
295#
296
297open(LANG, "<$input");
298my @phrase;
299while(<LANG>) {
300
301 $line++;
302
303 # get rid of DOS newlines
304 $_ =~ s/\r//g;
305
306 if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
307 # comment or empty line
308 next;
309 }
310
311 my $ll = $_;
23 312
24print HFILE <<MOO 313 # print "M: $m\n";
25/* This file was automatically generated using genlang */ 314
315 push @phrase, $ll;
316
317 # this is an XML-lookalike tag
318 if(/ *<([^>]*)>/) {
319 my $part = $1;
320 #print "P: $part\n";
321
322 if($part =~ /^\//) {
323 # this was a closing tag
324
325 if($part eq "/phrase") {
326 # closing the phrase
327
328 my $idstr = $phrase{'id'};
329 my $idnum;
330
331 if($dest =~ /^none\z/i) {
332 # "none" as dest means that this entire phrase is to be
333 # ignored
334 #print "dest is NONE!\n";
335 }
336 else {
337
338 # Use the ID name to figure out which id number range we
339 # should use for this phrase. Voice-only strings are
340 # separated.
341
342 if($idstr =~ /^VOICE/) {
343 $idnum = $voiceid++;
344 }
345 else {
346 $idnum = $idcount++;
347 }
348
349 $id{$idstr} = $idnum;
350 $idnum[$idnum]=$idstr;
351
352 $source{$idstr}=$src;
353 $dest{$idstr}=$dest;
354 $voice{$idstr}=$voice;
355
356 if($verbose) {
357 print "id: $phrase{id} ($idnum)\n";
358 print "source: $src\n";
359 print "dest: $dest\n";
360 print "voice: $voice\n";
361 }
362
363 undef $src;
364 undef $dest;
365 undef $voice;
366 undef %phrase;
367 }
368
369 if($update) {
370 my $e = $english{$idstr};
371
372 if($e) {
373 # compare original english with this!
374 my @eng = split("\n", $english{$idstr});
375
376 compare($idstr, \@eng, \@phrase);
377
378 $english{$idstr}=""; # clear it
379 }
380 else {
381 print "### $idstr: The phrase is not used. Skipped\n";
382 }
383 }
384 undef @phrase;
385
386 } # end of </phrase>
387
388 # starts with a slash, this _ends_ this section
389 $m = pop @m; # get back old value, the previous level's tag
390 next;
391 } # end of tag close
392
393 # This is an opening (sub) tag
394
395 push @m, $m; # store old value
396 $m = $1;
397 next;
398 }
399
400 if(/^ *([^:]+): *(.*)/) {
401 my ($name, $val)=($1, $2);
402 &$m($_, $name, $val);
403 }
404}
405close(LANG);
406
407if($update) {
408 my $any=0;
409 for(keys %english) {
410 if($english{$_}) {
411 print "###\n",
412 "### This phrase below was not present in the translated file\n",
413 "<phrase>\n";
414 print $english{$_};
415 print "</phrase>\n";
416 }
417 }
418}
419
420if($prefix) {
421 # We create a .c and .h file
422
423 open(HFILE, ">$prefix.h");
424 open(CFILE, ">$prefix.c");
425
426 print HFILE <<MOO
427/* This file was automatically generated using genlang2 */
26/* 428/*
27 * The str() macro/functions is how to access strings that might be 429 * The str() macro/functions is how to access strings that might be
28 * translated. Use it like str(MACRO) and expect a string to be 430 * translated. Use it like str(MACRO) and expect a string to be
@@ -37,12 +439,12 @@ extern unsigned char *language_strings[];
37extern const unsigned char language_builtin[]; 439extern const unsigned char language_builtin[];
38 440
39/* The enum below contains all available strings */ 441/* The enum below contains all available strings */
40enum { 442enum \{
41MOO 443MOO
42 ; 444 ;
43 445
44print CFILE <<MOO 446 print CFILE <<MOO
45/* This file was automaticly generated using genlang, the strings come 447/* This file was automaticly generated using genlang2, the strings come
46 from "$input" */ 448 from "$input" */
47 449
48#include "$prefix.h" 450#include "$prefix.h"
@@ -50,87 +452,144 @@ print CFILE <<MOO
50unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY]; 452unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
51const unsigned char language_builtin[] = 453const unsigned char language_builtin[] =
52MOO 454MOO
53 ; 455;
54 456
55open(LANG, "<$input"); 457 # Output the ID names for the enum in the header file
56while(<LANG>) { 458 my $i;
57 $line++; 459 for $i (1 .. $idcount) {
58 if($_ =~ / *\#/) { 460 my $name=$idnum[$i - 1]; # get the ID name
59 # comment 461
60 next; 462 $name =~ s/\"//g; # cut off the quotes
463
464 printf HFILE (" %s,\n", $name);
61 } 465 }
62 # get rid of DOS newlines
63 $_ =~ s/\r//g;
64 if($_ =~ / *([a-z]+): *(.*)/) {
65 ($var, $value) = ($1, $2);
66 # print "$var => $value\n";
67 466
68 $set{$var} = $value; 467# Output separation marker for last string ID and the upcoming voice IDs
69 468
70 if( (($var eq "new") && $value && ($value !~ /^\"(.*)\"\W*$/)) || 469 print HFILE <<MOO
71 (($var eq "voice") && $value && ($value !~ /^\"(.*)\"\W*$/)) || 470 LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
72 (($var eq "eng") && ($value !~ /^\"(.*)\"\W*$/)) ) { 471 /* --- below this follows voice-only strings --- */
73 print "$input:$line:missing quotes for ".$set{'id'}."\n"; 472 VOICEONLY_DELIMITER = 0x8000,
74 $errors++; 473MOO
75 next; 474 ;
76 }
77 475
78 if($var eq "new") { 476# Output the ID names for the enum in the header file
79 # the last one for a single phrase 477 my $i;
478 for $i (0x8000 .. ($voiceid-1)) {
479 my $name=$idnum[$i]; # get the ID name
480
481 $name =~ s/\"//g; # cut off the quotes
482
483 printf HFILE (" %s,\n", $name);
484 }
80 485
81 if(!$value || ($value eq "\"\"") ) { 486 # Output end of enum
82 # if not set, get the english version 487 print HFILE "\n};\n/* end of generated enum list */\n";
83 $value = $set{'eng'}; 488
84 } 489 # Output the target phrases for the source file
85# print "VOICE: ".$set{'voice'}." VALUE: $value\n"; 490 for $i (1 .. $idcount) {
86 # Note: if both entries are "", the string is deprecated, 491 my $name=$idnum[$i - 1]; # get the ID
87 # but must be included to maintain compatibility 492 my $dest = $dest{$name}; # get the destination phrase
88 if($set{'id'} =~ /^VOICE_/) { 493
89 # voice-only 494 $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
90 push @vfile, $set{'id'};
91 }
92 else {
93 push @hfile, $set{'id'};
94 $value =~ s/^\"(.*)\"\W*$/\"$1\\0\"/;
95 print CFILE " $value\n";
96 }
97 495
98 undef %set; 496 if(!$dest) {
497 # this is just to be on the safe side
498 $dest = '"\0"';
99 } 499 }
100 500
501 printf CFILE (" %s\n", $dest);
101 } 502 }
102 503
103} 504# Output end of string chunk
104close(LANG); 505 print CFILE <<MOO
506;
507/* end of generated string list */
508MOO
509;
105 510
106for(@hfile) { 511 close(HFILE);
107 print HFILE " $_,\n"; 512 close(CFILE);
513} # end of the c/h file generation
514elsif($binary) {
515 # Creation of a binary lang file was requested
516
517 # We must first scan the english file to get the correct order of the id
518 # numbers used there, as that is what sets the id order for all language
519 # files. The english file is scanned before the translated file was
520 # scanned.
521
522 open(OUTF, ">$binary") or die "Can't create $binary";
523 binmode OUTF;
524 printf OUTF ("\x1a%c", $langversion); # magic lang file header
525
526 # loop over the target phrases
527 for $i (1 .. $idcount) {
528 my $name=$idnum[$i - 1]; # get the ID
529 my $dest = $dest{$name}; # get the destination phrase
530
531 if($dest) {
532 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
533
534 # Now, make sure we get the number from the english sort order:
535 $idnum = $idmap{$name};
536
537 printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
538 if($debug) {
539 printf("%02x => %s\n", $idnum, $value);
540 }
541 }
542 }
108} 543}
544elsif($voiceout) {
545 # voice output requested, display id: and voice: strings in a v1-like
546 # fashion
109 547
110print HFILE <<MOO 548 my @engl;
111 LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */ 549
112 /* --- below this follows voice-only strings --- */ 550 # This loops over the strings in the translated language file order
113 VOICEONLY_DELIMITER = 0x8000, 551 my @ids = ((0 .. ($idcount-1)));
114MOO 552 push @ids, (0x8000 .. ($voiceid-1));
115 ; 553
554 #for my $id (@ids) {
555 # print "$id\n";
556 #}
557
558 for $i (@ids) {
559 my $name=$idnum[$i]; # get the ID
560 my $dest = $voice{$name}; # get the destination voice string
116 561
117for(@vfile) { 562 if($dest) {
118 print HFILE " $_,\n"; 563 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
564
565 # Now, make sure we get the number from the english sort order:
566 $idnum = $idmap{$name};
567
568 $engl[$idnum] = $i;
569
570 # print "Input index $i output index $idnum\n";
571
572 }
573 }
574 for my $i (@ids) {
575
576 my $o = $engl[$i];
577
578 my $name=$idnum[$o]; # get the ID
579 my $dest = $voice{$name}; # get the destination voice string
580
581 print "#$i\nid: $name\nvoice: $dest\n";
582 }
583
119} 584}
120 585
121print HFILE <<MOO
122};
123/* end of generated enum list */
124MOO
125 ;
126 586
127print CFILE <<MOO 587if($verbose) {
128; 588 printf("%d ID strings scanned\n", $idcount);
129/* end of generated string list */
130MOO
131 ;
132 589
133close(CFILE); 590 print "* head *\n";
134close(HFILE); 591 for(keys %head) {
592 printf "$_: %s\n", $head{$_};
593 }
594}
135 595
136exit $errors;