1 files changed, 32 insertions, 17 deletions
diff --git a/tools/updatelang b/tools/updatelang
index a05011e645..94f6fd1e8c 100755
--- a/tools/updatelang
+++ b/tools/updatelang
@@ -6,11 +6,15 @@
 #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
 #                     \/            \/     \/    \/            \/
 #
-# Copyright (C) 2020 Solomon Peachy
+# Copyright (C) 2020-2024 Solomon Peachy
 #
 use utf8;
 use File::Basename;
+use Unicode::Normalize;
+use open qw( :std :encoding(UTF-8) );
+binmode(STDOUT, ":encoding(UTF-8)");
 sub trim {
    my ($string) = @_;
@@ -72,6 +76,7 @@ sub parselangfile {
 #                   $l = "*";
                }
+                $w = NFD($w); # Unicode decompose
                $thisphrase{$pos}->{$l} = $w;
            }
        }
@@ -158,7 +163,7 @@ sub reduceformat($) {
 ##################
 if($#ARGV != 2) {
-    print "Usage: updatelang <english.lang> <otherlang> <outfile|->\n";
+    print "Usage: [ENGLISHORDER=1] updatelang <english.lang> <otherlang> <outfile|->\n";
    exit;
 }
@@ -402,9 +407,9 @@ foreach my $id (@langorder) {
        my $sane = $lang{$id}{'dest'}{$tgt};
        $sane =~ s/^~?(.*)/$1/;  # Strip off leading ~ if it's there as it's not a legal character otherwise
-        if ($sane =~ tr/"~//) {
+        if ($sane =~ tr/"~<>//) {
            # If it has suspicious characters that are not allowed
-            $lang{$id}{'notes'} .= "### The <dest> section for '$id:$tgt' has some suspicious characters (eg '~'), please double-check!\n";
+            $lang{$id}{'notes'} .= "### The <dest> section for '$id:$tgt' has some suspicious characters (eg \",~,<,>), please double-check!\n";
 #           print "#!! '$id:$tgt' suspicious characters\n";
        }
    }
@@ -476,9 +481,9 @@ foreach my $id (@langorder) {
        }
        my $sane = $lang{$id}{'voice'}{$tgt};
        $sane =~ s/^~?(.*)/$1/;  # Strip off leading ~ if it's there as it's not a legal character otherwise
-        if ($sane =~ tr/%"~//) {
+        if ($sane =~ tr/%"~:\[\]<>{}\|//) {
-            # If it has suspicious characters that are not normally voiced..
+            # Suspicious characters that are not typically voiced..
-            $lang{$id}{'notes'} .= "### The <voice> section for '$id:$tgt' has some suspicious characters (eg '%' or '~'), please double-check!\n";
+            $lang{$id}{'notes'} .= "### The <voice> section for '$id:$tgt' has some suspicious characters (eg %,\",~,:,<,>,[,],{,},|), please correct!\n";
 #            print "#!! '$id:$tgt' suspicious characters\n";
        }
        if ($lang{$id}{'voice'}{$tgt} =~ /\.\.\./) {
@@ -502,7 +507,14 @@ foreach (@langheader) {
    print $fh $_;
 }
-my @finalorder = @langorder;  # TODO make configurable vs @englishorder
+my @finalorder;
+if ($ENV{'ENGLISHORDER'}) {
+    @finalorder = @englishorder;
+} else {
+    @finalorder = @langorder;
+}
 foreach my $id (@finalorder) {
    if (!defined($english{$id})) {
        next;
@@ -531,10 +543,11 @@ foreach my $id (@finalorder) {
    %lp = combinetgts(%{$lang{$id}{'source'}});
    print $fh "  <source>\n";
    foreach my $tgt (sort(keys(%lp))) {
-        if ($lp{$tgt} eq 'none') {
+        my $w = NFC($lp{$tgt});
-            print $fh "    $tgt: $lp{$tgt}\n";
+        if ($w eq 'none') {
+            print $fh "    $tgt: $w\n";
        } else {
-            print $fh "    $tgt: \"$lp{$tgt}\"\n";
+            print $fh "    $tgt: \"$w\"\n";
        }
    }
    print $fh "  </source>\n";
@@ -543,10 +556,11 @@ foreach my $id (@finalorder) {
    %lp = combinetgts(%{$lang{$id}{'dest'}});
    print $fh "  <dest>\n";
    foreach my $tgt (sort(keys(%lp))) {
-        if ($lp{$tgt} eq 'none') {
+        my $w = NFC($lp{$tgt});
-            print $fh "    $tgt: $lp{$tgt}\n";
+        if ($w eq 'none') {
+            print $fh "    $tgt: $w\n";
        } else {
-            print $fh "    $tgt: \"$lp{$tgt}\"\n";
+            print $fh "    $tgt: \"$w\"\n";
        }
    }
    print $fh "  </dest>\n";
@@ -555,10 +569,11 @@ foreach my $id (@finalorder) {
    %lp = combinetgts(%{$lang{$id}{'voice'}});
    print $fh "  <voice>\n";
    foreach my $tgt (sort(keys(%lp))) {
-        if ($lp{$tgt} eq 'none') {
+        my $w = NFC($lp{$tgt});
-            print $fh "    $tgt: $lp{$tgt}\n";
+        if ($w eq 'none') {
+            print $fh "    $tgt: $w\n";
        } else {
-            print $fh "    $tgt: \"$lp{$tgt}\"\n";
+            print $fh "    $tgt: \"$w\"\n";
        }
    }
    print $fh "  </voice>\n";