diff options
-rw-r--r-- | tools/FILES | 1 | ||||
-rw-r--r-- | tools/Makefile | 5 | ||||
-rw-r--r-- | tools/rdf2binary.c | 89 | ||||
-rw-r--r-- | tools/wn2rdf.pl | 122 |
4 files changed, 216 insertions, 1 deletions
diff --git a/tools/FILES b/tools/FILES index 2e2d232160..14cdeddd41 100644 --- a/tools/FILES +++ b/tools/FILES | |||
@@ -10,6 +10,7 @@ rockbox-style.el | |||
10 | sample.emacs | 10 | sample.emacs |
11 | buildzip.pl | 11 | buildzip.pl |
12 | romsizetest.pl | 12 | romsizetest.pl |
13 | wn2rdf.pl | ||
13 | make.inc | 14 | make.inc |
14 | makesrc.inc | 15 | makesrc.inc |
15 | fwpatcher/*.[ch] | 16 | fwpatcher/*.[ch] |
diff --git a/tools/Makefile b/tools/Makefile index b98c269642..d8b1545015 100644 --- a/tools/Makefile +++ b/tools/Makefile | |||
@@ -9,7 +9,7 @@ | |||
9 | CFLAGS := -O -ansi -g | 9 | CFLAGS := -O -ansi -g |
10 | LDFLAGS := -g | 10 | LDFLAGS := -g |
11 | 11 | ||
12 | TARGETS := scramble descramble sh2d bmp2rb convbdf generate_rocklatin mkboot | 12 | TARGETS := scramble descramble sh2d bmp2rb rdf2binary convbdf generate_rocklatin mkboot |
13 | 13 | ||
14 | all: $(TARGETS) | 14 | all: $(TARGETS) |
15 | @echo "tools done" | 15 | @echo "tools done" |
@@ -26,6 +26,9 @@ sh2d: sh2d.c | |||
26 | bmp2rb: bmp2rb.c | 26 | bmp2rb: bmp2rb.c |
27 | $(CC) -DAPPLICATION_NAME=\"$@\" -g $+ -o $@ | 27 | $(CC) -DAPPLICATION_NAME=\"$@\" -g $+ -o $@ |
28 | 28 | ||
29 | rdf2binary: rdf2binary.c | ||
30 | $(CC) -g $+ -o $@ | ||
31 | |||
29 | mkboot: mkboot.c | 32 | mkboot: mkboot.c |
30 | $(CC) -g $+ -o $@ | 33 | $(CC) -g $+ -o $@ |
31 | 34 | ||
diff --git a/tools/rdf2binary.c b/tools/rdf2binary.c new file mode 100644 index 0000000000..3597efa727 --- /dev/null +++ b/tools/rdf2binary.c | |||
@@ -0,0 +1,89 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2005 Miika Pekkarinen | ||
11 | * | ||
12 | * All files in this archive are subject to the GNU General Public License. | ||
13 | * See the file COPYING in the source tree root for full license agreement. | ||
14 | * | ||
15 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
16 | * KIND, either express or implied. | ||
17 | * | ||
18 | ****************************************************************************/ | ||
19 | |||
20 | /* | ||
21 | This tool converts the rdf file to the binary data used in the dict plugin. | ||
22 | */ | ||
23 | |||
24 | #include <sys/types.h> | ||
25 | #include <sys/stat.h> | ||
26 | #include <fcntl.h> | ||
27 | #include <string.h> | ||
28 | #include <stdio.h> | ||
29 | |||
30 | /* maximum word lenght, has to be the same in dict.c */ | ||
31 | #define WORDLEN 32 | ||
32 | |||
33 | struct word { | ||
34 | char word[WORDLEN]; | ||
35 | long offset; | ||
36 | }; | ||
37 | |||
38 | int main() | ||
39 | { | ||
40 | FILE *in; | ||
41 | int idx_out, desc_out; | ||
42 | struct word w; | ||
43 | char buf[10000]; | ||
44 | long cur_offset = 0; | ||
45 | |||
46 | in = fopen("dict.preparsed", "r"); | ||
47 | idx_out = open("dict.index", O_WRONLY | O_CREAT); | ||
48 | desc_out = open("dict.desc", O_WRONLY | O_CREAT); | ||
49 | |||
50 | if (in == NULL || idx_out < 0 || desc_out < 0) { | ||
51 | fprintf(stderr, "Error: Some files couldn't be opened\n"); | ||
52 | return 1; | ||
53 | } | ||
54 | |||
55 | while (fgets(buf, sizeof buf, in) != NULL) { | ||
56 | /* It is safe to use strtok here */ | ||
57 | const char *word = strtok(buf, "\t"); | ||
58 | const char *desc = strtok(NULL, "\t"); | ||
59 | |||
60 | if (word == NULL || desc == NULL) { | ||
61 | fprintf(stderr, "Parse error!\n"); | ||
62 | fprintf(stderr, "word: %s\ndesc: %s\n", word, desc); | ||
63 | |||
64 | return 2; | ||
65 | } | ||
66 | |||
67 | /* We will null-terminate the words */ | ||
68 | strncpy(w.word, word, WORDLEN - 1); | ||
69 | w.offset = cur_offset; | ||
70 | write(idx_out, &w, sizeof(struct word)); | ||
71 | |||
72 | while (1) { | ||
73 | int len = strlen(desc); | ||
74 | cur_offset += len; | ||
75 | write(desc_out, desc, len); | ||
76 | |||
77 | desc = strtok(NULL, "\t"); | ||
78 | if (desc == NULL) | ||
79 | break ; | ||
80 | |||
81 | cur_offset++; | ||
82 | write(desc_out, "\n", 1); | ||
83 | |||
84 | } | ||
85 | } | ||
86 | |||
87 | return 0; | ||
88 | } | ||
89 | |||
diff --git a/tools/wn2rdf.pl b/tools/wn2rdf.pl new file mode 100644 index 0000000000..2fff87d66b --- /dev/null +++ b/tools/wn2rdf.pl | |||
@@ -0,0 +1,122 @@ | |||
1 | #! /usr/bin/perl -w | ||
2 | |||
3 | # Wordnet dictionary database converter | ||
4 | # | ||
5 | # Converts the Wordnet prolog data to rockbox dictionary format. | ||
6 | # | ||
7 | # Written by Miika Pekkarinen <slasher@ihme.org> | ||
8 | # | ||
9 | # $Id$ | ||
10 | |||
11 | use strict; | ||
12 | |||
13 | # Lookup tables | ||
14 | my %words; | ||
15 | my %descriptions; | ||
16 | |||
17 | sub getcatname { | ||
18 | my ($id) = @_; | ||
19 | |||
20 | return 'N' if $id == 1; | ||
21 | return 'V' if $id == 2; | ||
22 | return 'A' if $id == 3; | ||
23 | return 'A' if $id == 4; | ||
24 | return '?'; | ||
25 | } | ||
26 | |||
27 | open IN_WORD, "wn_s.pl" or die "Open fail(#1): $!"; | ||
28 | open IN_DESC, "wn_g.pl" or die "Open fail(#2): $!"; | ||
29 | open OUTPUT, "> dict.preparsed" or die "Open fail(#3): $!"; | ||
30 | |||
31 | print "Reading word file...\n"; | ||
32 | |||
33 | # Read everything into memory | ||
34 | while (<IN_WORD>) { | ||
35 | chomp ; | ||
36 | |||
37 | # s(100001740,1,'entity',n,1,11). => 100001740,1,'entity',n,1,11 | ||
38 | s/(^s\()(.*)(\)\.$)/$2/; | ||
39 | |||
40 | my ($seqid, $n1, $word, $n2, $n3, $n4) = split /,/, $_, 6; | ||
41 | |||
42 | # 'entity' => entity | ||
43 | $word =~ s/(^\')(.*)(\'$)/$2/; | ||
44 | $word =~ s/\'\'/\'/s; | ||
45 | |||
46 | my $category = substr $seqid, 0, 1; | ||
47 | |||
48 | $words{lc $word}{$seqid} = $category; | ||
49 | } | ||
50 | |||
51 | close IN_WORD; | ||
52 | |||
53 | print "Reading description file...\n"; | ||
54 | while (<IN_DESC>) { | ||
55 | chomp ; | ||
56 | |||
57 | # g(100002056,'(a separate and self-contained entity)'). | ||
58 | # => 100002056,'(a separate and self-contained entity)' | ||
59 | s/(^g\()(.*)(\)\.$)/$2/; | ||
60 | |||
61 | my ($seqid, $desc) = split /,/, $_, 2; | ||
62 | |||
63 | $desc =~ s/(^\'\()(.*)(\)\'$)/$2/; | ||
64 | $desc =~ s/\'\'/\'/s; | ||
65 | |||
66 | $descriptions{$seqid} = $desc; | ||
67 | } | ||
68 | |||
69 | close IN_DESC; | ||
70 | |||
71 | print "Sorting and writing output...\n"; | ||
72 | |||
73 | # Now sort and find correct descriptions | ||
74 | foreach my $word (sort keys %words) { | ||
75 | my %categories; | ||
76 | |||
77 | # Find all definitions of the word | ||
78 | foreach my $id (keys %{$words{$word}}) { | ||
79 | my $catid = $words{$word}{$id}; | ||
80 | my $description = $descriptions{$id}; | ||
81 | |||
82 | if (!defined($description) or $description eq '') { | ||
83 | print "Error: Failed to link word: $word / ", | ||
84 | $words{$word}, "\n"; | ||
85 | exit 1; | ||
86 | } | ||
87 | |||
88 | push @{$categories{$catid}}, $description; | ||
89 | } | ||
90 | |||
91 | my $finaldesc; | ||
92 | |||
93 | # 1 = noun | ||
94 | # 2 = verb | ||
95 | # 3 = adjective | ||
96 | # 4 = adverb | ||
97 | for my $catid (1 .. 4) { | ||
98 | my $n = 1; | ||
99 | my $catdesc; | ||
100 | |||
101 | next unless $categories{$catid}; | ||
102 | foreach my $desc ( @{$categories{$catid}} ) { | ||
103 | $catdesc .= " " if $catdesc; | ||
104 | $catdesc .= "$n. $desc"; | ||
105 | $n++; | ||
106 | } | ||
107 | |||
108 | next unless $catdesc; | ||
109 | $finaldesc .= "\t" if $finaldesc; | ||
110 | $finaldesc .= getcatname($catid) . ": $catdesc" | ||
111 | } | ||
112 | |||
113 | die "Internal error" unless $finaldesc; | ||
114 | |||
115 | print OUTPUT "$word\t$finaldesc\n"; | ||
116 | } | ||
117 | |||
118 | close OUTPUT; | ||
119 | |||
120 | print "Done, output was successfully written!\n"; | ||
121 | |||
122 | |||