summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomas Salfischberger <tomas@rockbox.org>2005-05-02 15:05:07 +0000
committerTomas Salfischberger <tomas@rockbox.org>2005-05-02 15:05:07 +0000
commit52abc68b11694d2360e119543b876cf3c5768fbe (patch)
tree937b8bdc68faccf815efdccf1192c2dd92738932
parenta810a67db7c923b01c4135761ef21ab866db256d (diff)
downloadrockbox-52abc68b11694d2360e119543b876cf3c5768fbe.tar.gz
rockbox-52abc68b11694d2360e119543b876cf3c5768fbe.zip
Dictionary conversion tools.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6395 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--tools/FILES1
-rw-r--r--tools/Makefile5
-rw-r--r--tools/rdf2binary.c89
-rw-r--r--tools/wn2rdf.pl122
4 files changed, 216 insertions, 1 deletions
diff --git a/tools/FILES b/tools/FILES
index 2e2d232160..14cdeddd41 100644
--- a/tools/FILES
+++ b/tools/FILES
@@ -10,6 +10,7 @@ rockbox-style.el
10sample.emacs 10sample.emacs
11buildzip.pl 11buildzip.pl
12romsizetest.pl 12romsizetest.pl
13wn2rdf.pl
13make.inc 14make.inc
14makesrc.inc 15makesrc.inc
15fwpatcher/*.[ch] 16fwpatcher/*.[ch]
diff --git a/tools/Makefile b/tools/Makefile
index b98c269642..d8b1545015 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -9,7 +9,7 @@
9CFLAGS := -O -ansi -g 9CFLAGS := -O -ansi -g
10LDFLAGS := -g 10LDFLAGS := -g
11 11
12TARGETS := scramble descramble sh2d bmp2rb convbdf generate_rocklatin mkboot 12TARGETS := scramble descramble sh2d bmp2rb rdf2binary convbdf generate_rocklatin mkboot
13 13
14all: $(TARGETS) 14all: $(TARGETS)
15 @echo "tools done" 15 @echo "tools done"
@@ -26,6 +26,9 @@ sh2d: sh2d.c
26bmp2rb: bmp2rb.c 26bmp2rb: bmp2rb.c
27 $(CC) -DAPPLICATION_NAME=\"$@\" -g $+ -o $@ 27 $(CC) -DAPPLICATION_NAME=\"$@\" -g $+ -o $@
28 28
29rdf2binary: rdf2binary.c
30 $(CC) -g $+ -o $@
31
29mkboot: mkboot.c 32mkboot: mkboot.c
30 $(CC) -g $+ -o $@ 33 $(CC) -g $+ -o $@
31 34
diff --git a/tools/rdf2binary.c b/tools/rdf2binary.c
new file mode 100644
index 0000000000..3597efa727
--- /dev/null
+++ b/tools/rdf2binary.c
@@ -0,0 +1,89 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2005 Miika Pekkarinen
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20/*
21This tool converts the rdf file to the binary data used in the dict plugin.
22*/
23
24#include <sys/types.h>
25#include <sys/stat.h>
26#include <fcntl.h>
27#include <string.h>
28#include <stdio.h>
29
30/* maximum word lenght, has to be the same in dict.c */
31#define WORDLEN 32
32
33struct word {
34 char word[WORDLEN];
35 long offset;
36};
37
38int main()
39{
40 FILE *in;
41 int idx_out, desc_out;
42 struct word w;
43 char buf[10000];
44 long cur_offset = 0;
45
46 in = fopen("dict.preparsed", "r");
47 idx_out = open("dict.index", O_WRONLY | O_CREAT);
48 desc_out = open("dict.desc", O_WRONLY | O_CREAT);
49
50 if (in == NULL || idx_out < 0 || desc_out < 0) {
51 fprintf(stderr, "Error: Some files couldn't be opened\n");
52 return 1;
53 }
54
55 while (fgets(buf, sizeof buf, in) != NULL) {
56 /* It is safe to use strtok here */
57 const char *word = strtok(buf, "\t");
58 const char *desc = strtok(NULL, "\t");
59
60 if (word == NULL || desc == NULL) {
61 fprintf(stderr, "Parse error!\n");
62 fprintf(stderr, "word: %s\ndesc: %s\n", word, desc);
63
64 return 2;
65 }
66
67 /* We will null-terminate the words */
68 strncpy(w.word, word, WORDLEN - 1);
69 w.offset = cur_offset;
70 write(idx_out, &w, sizeof(struct word));
71
72 while (1) {
73 int len = strlen(desc);
74 cur_offset += len;
75 write(desc_out, desc, len);
76
77 desc = strtok(NULL, "\t");
78 if (desc == NULL)
79 break ;
80
81 cur_offset++;
82 write(desc_out, "\n", 1);
83
84 }
85 }
86
87 return 0;
88}
89
diff --git a/tools/wn2rdf.pl b/tools/wn2rdf.pl
new file mode 100644
index 0000000000..2fff87d66b
--- /dev/null
+++ b/tools/wn2rdf.pl
@@ -0,0 +1,122 @@
1#! /usr/bin/perl -w
2
3# Wordnet dictionary database converter
4#
5# Converts the Wordnet prolog data to rockbox dictionary format.
6#
7# Written by Miika Pekkarinen <slasher@ihme.org>
8#
9# $Id$
10
11use strict;
12
13# Lookup tables
14my %words;
15my %descriptions;
16
17sub getcatname {
18 my ($id) = @_;
19
20 return 'N' if $id == 1;
21 return 'V' if $id == 2;
22 return 'A' if $id == 3;
23 return 'A' if $id == 4;
24 return '?';
25}
26
27open IN_WORD, "wn_s.pl" or die "Open fail(#1): $!";
28open IN_DESC, "wn_g.pl" or die "Open fail(#2): $!";
29open OUTPUT, "> dict.preparsed" or die "Open fail(#3): $!";
30
31print "Reading word file...\n";
32
33# Read everything into memory
34while (<IN_WORD>) {
35 chomp ;
36
37 # s(100001740,1,'entity',n,1,11). => 100001740,1,'entity',n,1,11
38 s/(^s\()(.*)(\)\.$)/$2/;
39
40 my ($seqid, $n1, $word, $n2, $n3, $n4) = split /,/, $_, 6;
41
42 # 'entity' => entity
43 $word =~ s/(^\')(.*)(\'$)/$2/;
44 $word =~ s/\'\'/\'/s;
45
46 my $category = substr $seqid, 0, 1;
47
48 $words{lc $word}{$seqid} = $category;
49}
50
51close IN_WORD;
52
53print "Reading description file...\n";
54while (<IN_DESC>) {
55 chomp ;
56
57 # g(100002056,'(a separate and self-contained entity)').
58 # => 100002056,'(a separate and self-contained entity)'
59 s/(^g\()(.*)(\)\.$)/$2/;
60
61 my ($seqid, $desc) = split /,/, $_, 2;
62
63 $desc =~ s/(^\'\()(.*)(\)\'$)/$2/;
64 $desc =~ s/\'\'/\'/s;
65
66 $descriptions{$seqid} = $desc;
67}
68
69close IN_DESC;
70
71print "Sorting and writing output...\n";
72
73# Now sort and find correct descriptions
74foreach my $word (sort keys %words) {
75 my %categories;
76
77 # Find all definitions of the word
78 foreach my $id (keys %{$words{$word}}) {
79 my $catid = $words{$word}{$id};
80 my $description = $descriptions{$id};
81
82 if (!defined($description) or $description eq '') {
83 print "Error: Failed to link word: $word / ",
84 $words{$word}, "\n";
85 exit 1;
86 }
87
88 push @{$categories{$catid}}, $description;
89 }
90
91 my $finaldesc;
92
93 # 1 = noun
94 # 2 = verb
95 # 3 = adjective
96 # 4 = adverb
97 for my $catid (1 .. 4) {
98 my $n = 1;
99 my $catdesc;
100
101 next unless $categories{$catid};
102 foreach my $desc ( @{$categories{$catid}} ) {
103 $catdesc .= " " if $catdesc;
104 $catdesc .= "$n. $desc";
105 $n++;
106 }
107
108 next unless $catdesc;
109 $finaldesc .= "\t" if $finaldesc;
110 $finaldesc .= getcatname($catid) . ": $catdesc"
111 }
112
113 die "Internal error" unless $finaldesc;
114
115 print OUTPUT "$word\t$finaldesc\n";
116}
117
118close OUTPUT;
119
120print "Done, output was successfully written!\n";
121
122