From 82acdd3e1cad9256e1721cbb3da500453f19724d Mon Sep 17 00:00:00 2001
From: Thomas Martitz <kugel@rockbox.org>
Date: Thu, 8 Sep 2011 14:37:50 +0000
Subject: Make genlang faster by doing better regexes.

With this change generating all languages takes only two-thirds the
time. It changes the acceptable syntax for target wildcards in language
files, however: instead of a comma-separated list of glob-style
wildcards it requires that it be a comma-separated list of prefix
matches, i.e. the * can only appear at the end of each wildcard, and ?
cannot be used. This does not require any changes to existing language
files as they are all already in this form.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@30481 a1c6a512-1295-4272-9138-f99709370657
---
 tools/genlang | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

(limited to 'tools/genlang')

diff --git a/tools/genlang b/tools/genlang
index 19b637d237..33adb2c7e3 100755
--- a/tools/genlang
+++ b/tools/genlang
@@ -121,7 +121,23 @@ if(!$target && !$update && !$sortfile) {
     print STDERR "Please specify a target (with -t)!\n";
     exit;
 }
-my @target_parts = split ':', $target;
+
+# Build up a regex which can be applied to target wildcard lists. We only need
+# to support prefix matches, so a target parameter of foo:bar can be expanded
+# to the regex "\*|f\*|fo\*|foo|b\*|ba\*|bar" and applied to the wildcard list
+# (plus end-of-string or commas on either side). The regex engine should
+# discard any duplicates generated for us in the process of constructing the
+# state machine, so we don't bother to check.
+my $target_regex = "(?:^|,) *(?:\\*";
+foreach my $target_part (split ':', $target) {
+    for (my $c=1; $c<length $target_part; $c++) {
+        my $partial = substr $target_part, 0, $c;
+        $target_regex .= "|$partial\\*";
+    }
+    $target_regex .= "|$target_part";
+}
+$target_regex .= ") *(?:,|\$)";
+$target_regex = qr/$target_regex/;
 
 my $binpath = "";
 if ($binary =~ m|(.*)/[^/]+|) {
@@ -178,21 +194,10 @@ sub options {
 sub parsetarget {
     my ($debug, $strref, $full, $n, $v)=@_;
     my $string;
-    my @all= split(" *, *", $n);
-    my $test;
-    for $test (@all) {
-        $test =~ s/\*/.*/g;
-        $test =~ s/\?/./g;
-
-#        print "TEST ($debug) $target for $test\n";
-        for my $part (@target_parts) {
-            if($part =~ /^$test\z/) {
-                $string = $v;
-#                print "MATCH: $test => $v\n";
-                $$strref = $string;
-                return $string;
-            }
-        }
+    if ($n =~ $target_regex) {
+        $string = $v;
+        $$strref = $string;
+        return $string;
     }
 }
 
-- 
cgit v1.2.3