diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/rdf2binary.c | 115 |
1 files changed, 64 insertions, 51 deletions
diff --git a/tools/rdf2binary.c b/tools/rdf2binary.c index 3597efa727..c1c7d1b727 100644 --- a/tools/rdf2binary.c +++ b/tools/rdf2binary.c | |||
@@ -30,60 +30,73 @@ This tool converts the rdf file to the binary data used in the dict plugin. | |||
30 | /* maximum word lenght, has to be the same in dict.c */ | 30 | /* maximum word lenght, has to be the same in dict.c */ |
31 | #define WORDLEN 32 | 31 | #define WORDLEN 32 |
32 | 32 | ||
33 | struct word { | 33 | struct word |
34 | char word[WORDLEN]; | 34 | { |
35 | long offset; | 35 | char word[WORDLEN]; |
36 | long offset; | ||
36 | }; | 37 | }; |
37 | 38 | ||
39 | /* convert offsets here, not on device. */ | ||
40 | long long_to_big_endian (void* value) | ||
41 | { | ||
42 | unsigned char* bytes = (unsigned char*) value; | ||
43 | return (long)bytes[0] | ((long)bytes[1] << 8) | | ||
44 | ((long)bytes[2] << 16) | ((long)bytes[3] << 24); | ||
45 | } | ||
46 | |||
38 | int main() | 47 | int main() |
39 | { | 48 | { |
40 | FILE *in; | 49 | FILE *in; |
41 | int idx_out, desc_out; | 50 | int idx_out, desc_out; |
42 | struct word w; | 51 | struct word w; |
43 | char buf[10000]; | 52 | char buf[10000]; |
44 | long cur_offset = 0; | 53 | long cur_offset = 0; |
45 | 54 | ||
46 | in = fopen("dict.preparsed", "r"); | 55 | in = fopen("dict.preparsed", "r"); |
47 | idx_out = open("dict.index", O_WRONLY | O_CREAT); | 56 | idx_out = open("dict.index", O_WRONLY | O_CREAT); |
48 | desc_out = open("dict.desc", O_WRONLY | O_CREAT); | 57 | desc_out = open("dict.desc", O_WRONLY | O_CREAT); |
49 | 58 | ||
50 | if (in == NULL || idx_out < 0 || desc_out < 0) { | 59 | if (in == NULL || idx_out < 0 || desc_out < 0) |
51 | fprintf(stderr, "Error: Some files couldn't be opened\n"); | 60 | { |
52 | return 1; | 61 | fprintf(stderr, "Error: Some files couldn't be opened\n"); |
53 | } | 62 | return 1; |
54 | 63 | } | |
55 | while (fgets(buf, sizeof buf, in) != NULL) { | 64 | |
56 | /* It is safe to use strtok here */ | 65 | while (fgets(buf, sizeof buf, in) != NULL) |
57 | const char *word = strtok(buf, "\t"); | 66 | { |
58 | const char *desc = strtok(NULL, "\t"); | 67 | /* It is safe to use strtok here */ |
59 | 68 | const char *word = strtok(buf, "\t"); | |
60 | if (word == NULL || desc == NULL) { | 69 | const char *desc = strtok(NULL, "\t"); |
61 | fprintf(stderr, "Parse error!\n"); | 70 | |
62 | fprintf(stderr, "word: %s\ndesc: %s\n", word, desc); | 71 | if (word == NULL || desc == NULL) |
63 | 72 | { | |
64 | return 2; | 73 | fprintf(stderr, "Parse error!\n"); |
65 | } | 74 | fprintf(stderr, "word: %s\ndesc: %s\n", word, desc); |
66 | 75 | ||
67 | /* We will null-terminate the words */ | 76 | return 2; |
68 | strncpy(w.word, word, WORDLEN - 1); | 77 | } |
69 | w.offset = cur_offset; | 78 | |
70 | write(idx_out, &w, sizeof(struct word)); | 79 | /* We will null-terminate the words */ |
71 | 80 | strncpy(w.word, word, WORDLEN - 1); | |
72 | while (1) { | 81 | w.offset = long_to_big_endian(&cur_offset); |
73 | int len = strlen(desc); | 82 | write(idx_out, &w, sizeof(struct word)); |
74 | cur_offset += len; | 83 | |
75 | write(desc_out, desc, len); | 84 | while (1) |
76 | 85 | { | |
77 | desc = strtok(NULL, "\t"); | 86 | int len = strlen(desc); |
78 | if (desc == NULL) | 87 | cur_offset += len; |
79 | break ; | 88 | write(desc_out, desc, len); |
80 | 89 | ||
81 | cur_offset++; | 90 | desc = strtok(NULL, "\t"); |
82 | write(desc_out, "\n", 1); | 91 | if (desc == NULL) |
83 | 92 | break ; | |
84 | } | 93 | |
85 | } | 94 | cur_offset++; |
86 | 95 | write(desc_out, "\n", 1); | |
87 | return 0; | 96 | |
97 | } | ||
98 | } | ||
99 | |||
100 | return 0; | ||
88 | } | 101 | } |
89 | 102 | ||