summaryrefslogtreecommitdiff
path: root/tools/wn2rdf.pl
diff options
context:
space:
mode:
Diffstat (limited to 'tools/wn2rdf.pl')
-rw-r--r--tools/wn2rdf.pl122
1 files changed, 122 insertions, 0 deletions
diff --git a/tools/wn2rdf.pl b/tools/wn2rdf.pl
new file mode 100644
index 0000000000..2fff87d66b
--- /dev/null
+++ b/tools/wn2rdf.pl
@@ -0,0 +1,122 @@
1#! /usr/bin/perl -w
2
3# Wordnet dictionary database converter
4#
5# Converts the Wordnet prolog data to rockbox dictionary format.
6#
7# Written by Miika Pekkarinen <slasher@ihme.org>
8#
9# $Id$
10
11use strict;
12
13# Lookup tables
14my %words;
15my %descriptions;
16
17sub getcatname {
18 my ($id) = @_;
19
20 return 'N' if $id == 1;
21 return 'V' if $id == 2;
22 return 'A' if $id == 3;
23 return 'A' if $id == 4;
24 return '?';
25}
26
27open IN_WORD, "wn_s.pl" or die "Open fail(#1): $!";
28open IN_DESC, "wn_g.pl" or die "Open fail(#2): $!";
29open OUTPUT, "> dict.preparsed" or die "Open fail(#3): $!";
30
31print "Reading word file...\n";
32
33# Read everything into memory
34while (<IN_WORD>) {
35 chomp ;
36
37 # s(100001740,1,'entity',n,1,11). => 100001740,1,'entity',n,1,11
38 s/(^s\()(.*)(\)\.$)/$2/;
39
40 my ($seqid, $n1, $word, $n2, $n3, $n4) = split /,/, $_, 6;
41
42 # 'entity' => entity
43 $word =~ s/(^\')(.*)(\'$)/$2/;
44 $word =~ s/\'\'/\'/s;
45
46 my $category = substr $seqid, 0, 1;
47
48 $words{lc $word}{$seqid} = $category;
49}
50
51close IN_WORD;
52
53print "Reading description file...\n";
54while (<IN_DESC>) {
55 chomp ;
56
57 # g(100002056,'(a separate and self-contained entity)').
58 # => 100002056,'(a separate and self-contained entity)'
59 s/(^g\()(.*)(\)\.$)/$2/;
60
61 my ($seqid, $desc) = split /,/, $_, 2;
62
63 $desc =~ s/(^\'\()(.*)(\)\'$)/$2/;
64 $desc =~ s/\'\'/\'/s;
65
66 $descriptions{$seqid} = $desc;
67}
68
69close IN_DESC;
70
71print "Sorting and writing output...\n";
72
73# Now sort and find correct descriptions
74foreach my $word (sort keys %words) {
75 my %categories;
76
77 # Find all definitions of the word
78 foreach my $id (keys %{$words{$word}}) {
79 my $catid = $words{$word}{$id};
80 my $description = $descriptions{$id};
81
82 if (!defined($description) or $description eq '') {
83 print "Error: Failed to link word: $word / ",
84 $words{$word}, "\n";
85 exit 1;
86 }
87
88 push @{$categories{$catid}}, $description;
89 }
90
91 my $finaldesc;
92
93 # 1 = noun
94 # 2 = verb
95 # 3 = adjective
96 # 4 = adverb
97 for my $catid (1 .. 4) {
98 my $n = 1;
99 my $catdesc;
100
101 next unless $categories{$catid};
102 foreach my $desc ( @{$categories{$catid}} ) {
103 $catdesc .= " " if $catdesc;
104 $catdesc .= "$n. $desc";
105 $n++;
106 }
107
108 next unless $catdesc;
109 $finaldesc .= "\t" if $finaldesc;
110 $finaldesc .= getcatname($catid) . ": $catdesc"
111 }
112
113 die "Internal error" unless $finaldesc;
114
115 print OUTPUT "$word\t$finaldesc\n";
116}
117
118close OUTPUT;
119
120print "Done, output was successfully written!\n";
121
122