summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSolomon Peachy <pizza@shaftnet.org>2024-10-13 09:01:20 -0400
committerSolomon Peachy <pizza@shaftnet.org>2024-10-22 07:25:41 -0400
commiteb2d596d72247d90a89fc3b1c702b5b54aaecf6b (patch)
treea65b02d9aec21b1f6a9b7fb72aa6fce276fc006b
parentc354e0bd1f0842b56e9a913d302ee17ec1474db6 (diff)
downloadrockbox-eb2d596d72247d90a89fc3b1c702b5b54aaecf6b.tar.gz
rockbox-eb2d596d72247d90a89fc3b1c702b5b54aaecf6b.zip
updatelang: Normalize all strings in our lang files to NFC form.
Now no matter how [de]normalized the input strings are, we will normalize them to the best of our ability in what we use. This adds a dependencey for Perl's Unicode::Normalize. Change-Id: I13e275692ea33a463b19f3a499ea06ce1acbb44a
-rwxr-xr-xtools/updatelang28
1 files changed, 18 insertions, 10 deletions
diff --git a/tools/updatelang b/tools/updatelang
index ff4ce340e0..94f6fd1e8c 100755
--- a/tools/updatelang
+++ b/tools/updatelang
@@ -6,11 +6,15 @@
6# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ 6# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7# \/ \/ \/ \/ \/ 7# \/ \/ \/ \/ \/
8# 8#
9# Copyright (C) 2020 Solomon Peachy 9# Copyright (C) 2020-2024 Solomon Peachy
10# 10#
11 11
12use utf8; 12use utf8;
13use File::Basename; 13use File::Basename;
14use Unicode::Normalize;
15
16use open qw( :std :encoding(UTF-8) );
17binmode(STDOUT, ":encoding(UTF-8)");
14 18
15sub trim { 19sub trim {
16 my ($string) = @_; 20 my ($string) = @_;
@@ -72,6 +76,7 @@ sub parselangfile {
72# $l = "*"; 76# $l = "*";
73 } 77 }
74 78
79 $w = NFD($w); # Unicode decompose
75 $thisphrase{$pos}->{$l} = $w; 80 $thisphrase{$pos}->{$l} = $w;
76 } 81 }
77 } 82 }
@@ -538,10 +543,11 @@ foreach my $id (@finalorder) {
538 %lp = combinetgts(%{$lang{$id}{'source'}}); 543 %lp = combinetgts(%{$lang{$id}{'source'}});
539 print $fh " <source>\n"; 544 print $fh " <source>\n";
540 foreach my $tgt (sort(keys(%lp))) { 545 foreach my $tgt (sort(keys(%lp))) {
541 if ($lp{$tgt} eq 'none') { 546 my $w = NFC($lp{$tgt});
542 print $fh " $tgt: $lp{$tgt}\n"; 547 if ($w eq 'none') {
548 print $fh " $tgt: $w\n";
543 } else { 549 } else {
544 print $fh " $tgt: \"$lp{$tgt}\"\n"; 550 print $fh " $tgt: \"$w\"\n";
545 } 551 }
546 } 552 }
547 print $fh " </source>\n"; 553 print $fh " </source>\n";
@@ -550,10 +556,11 @@ foreach my $id (@finalorder) {
550 %lp = combinetgts(%{$lang{$id}{'dest'}}); 556 %lp = combinetgts(%{$lang{$id}{'dest'}});
551 print $fh " <dest>\n"; 557 print $fh " <dest>\n";
552 foreach my $tgt (sort(keys(%lp))) { 558 foreach my $tgt (sort(keys(%lp))) {
553 if ($lp{$tgt} eq 'none') { 559 my $w = NFC($lp{$tgt});
554 print $fh " $tgt: $lp{$tgt}\n"; 560 if ($w eq 'none') {
561 print $fh " $tgt: $w\n";
555 } else { 562 } else {
556 print $fh " $tgt: \"$lp{$tgt}\"\n"; 563 print $fh " $tgt: \"$w\"\n";
557 } 564 }
558 } 565 }
559 print $fh " </dest>\n"; 566 print $fh " </dest>\n";
@@ -562,10 +569,11 @@ foreach my $id (@finalorder) {
562 %lp = combinetgts(%{$lang{$id}{'voice'}}); 569 %lp = combinetgts(%{$lang{$id}{'voice'}});
563 print $fh " <voice>\n"; 570 print $fh " <voice>\n";
564 foreach my $tgt (sort(keys(%lp))) { 571 foreach my $tgt (sort(keys(%lp))) {
565 if ($lp{$tgt} eq 'none') { 572 my $w = NFC($lp{$tgt});
566 print $fh " $tgt: $lp{$tgt}\n"; 573 if ($w eq 'none') {
574 print $fh " $tgt: $w\n";
567 } else { 575 } else {
568 print $fh " $tgt: \"$lp{$tgt}\"\n"; 576 print $fh " $tgt: \"$w\"\n";
569 } 577 }
570 } 578 }
571 print $fh " </voice>\n"; 579 print $fh " </voice>\n";