diff options
Diffstat (limited to 'utils/rbutilqt/mspack/chmd.c')
-rw-r--r-- | utils/rbutilqt/mspack/chmd.c | 1377 |
1 files changed, 1377 insertions, 0 deletions
diff --git a/utils/rbutilqt/mspack/chmd.c b/utils/rbutilqt/mspack/chmd.c new file mode 100644 index 0000000000..6c8481db14 --- /dev/null +++ b/utils/rbutilqt/mspack/chmd.c | |||
@@ -0,0 +1,1377 @@ | |||
1 | /* This file is part of libmspack. | ||
2 | * (C) 2003-2018 Stuart Caie. | ||
3 | * | ||
4 | * libmspack is free software; you can redistribute it and/or modify it under | ||
5 | * the terms of the GNU Lesser General Public License (LGPL) version 2.1 | ||
6 | * | ||
7 | * For further details, see the file COPYING.LIB distributed with libmspack | ||
8 | */ | ||
9 | |||
10 | /* CHM decompression implementation */ | ||
11 | |||
12 | #include "system-mspack.h" | ||
13 | #include "chm.h" | ||
14 | |||
15 | /* prototypes */ | ||
16 | static struct mschmd_header * chmd_open( | ||
17 | struct mschm_decompressor *base, const char *filename); | ||
18 | static struct mschmd_header * chmd_fast_open( | ||
19 | struct mschm_decompressor *base, const char *filename); | ||
20 | static struct mschmd_header *chmd_real_open( | ||
21 | struct mschm_decompressor *base, const char *filename, int entire); | ||
22 | static void chmd_close( | ||
23 | struct mschm_decompressor *base, struct mschmd_header *chm); | ||
24 | static int chmd_read_headers( | ||
25 | struct mspack_system *sys, struct mspack_file *fh, | ||
26 | struct mschmd_header *chm, int entire); | ||
27 | static int chmd_fast_find( | ||
28 | struct mschm_decompressor *base, struct mschmd_header *chm, | ||
29 | const char *filename, struct mschmd_file *f_ptr, int f_size); | ||
30 | static unsigned char *read_chunk( | ||
31 | struct mschm_decompressor_p *self, struct mschmd_header *chm, | ||
32 | struct mspack_file *fh, unsigned int chunk); | ||
33 | static int search_chunk( | ||
34 | struct mschmd_header *chm, const unsigned char *chunk, const char *filename, | ||
35 | const unsigned char **result, const unsigned char **result_end); | ||
36 | static inline int compare( | ||
37 | const char *s1, const char *s2, int l1, int l2); | ||
38 | static int chmd_extract( | ||
39 | struct mschm_decompressor *base, struct mschmd_file *file, | ||
40 | const char *filename); | ||
41 | static int chmd_sys_write( | ||
42 | struct mspack_file *file, void *buffer, int bytes); | ||
43 | static int chmd_init_decomp( | ||
44 | struct mschm_decompressor_p *self, struct mschmd_file *file); | ||
45 | static int read_reset_table( | ||
46 | struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec, | ||
47 | unsigned int entry, off_t *length_ptr, off_t *offset_ptr); | ||
48 | static int read_spaninfo( | ||
49 | struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec, | ||
50 | off_t *length_ptr); | ||
51 | static int find_sys_file( | ||
52 | struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec, | ||
53 | struct mschmd_file **f_ptr, const char *name); | ||
54 | static unsigned char *read_sys_file( | ||
55 | struct mschm_decompressor_p *self, struct mschmd_file *file); | ||
56 | static int chmd_error( | ||
57 | struct mschm_decompressor *base); | ||
58 | static int read_off64( | ||
59 | off_t *var, unsigned char *mem, struct mspack_system *sys, | ||
60 | struct mspack_file *fh); | ||
61 | |||
62 | /* filenames of the system files used for decompression. | ||
63 | * Content and ControlData are essential. | ||
64 | * ResetTable is preferred, but SpanInfo can be used if not available | ||
65 | */ | ||
66 | static const char *content_name = "::DataSpace/Storage/MSCompressed/Content"; | ||
67 | static const char *control_name = "::DataSpace/Storage/MSCompressed/ControlData"; | ||
68 | static const char *spaninfo_name = "::DataSpace/Storage/MSCompressed/SpanInfo"; | ||
69 | static const char *rtable_name = "::DataSpace/Storage/MSCompressed/Transform/" | ||
70 | "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable"; | ||
71 | |||
72 | /*************************************** | ||
73 | * MSPACK_CREATE_CHM_DECOMPRESSOR | ||
74 | *************************************** | ||
75 | * constructor | ||
76 | */ | ||
77 | struct mschm_decompressor * | ||
78 | mspack_create_chm_decompressor(struct mspack_system *sys) | ||
79 | { | ||
80 | struct mschm_decompressor_p *self = NULL; | ||
81 | |||
82 | if (!sys) sys = mspack_default_system; | ||
83 | if (!mspack_valid_system(sys)) return NULL; | ||
84 | |||
85 | if ((self = (struct mschm_decompressor_p *) sys->alloc(sys, sizeof(struct mschm_decompressor_p)))) { | ||
86 | self->base.open = &chmd_open; | ||
87 | self->base.close = &chmd_close; | ||
88 | self->base.extract = &chmd_extract; | ||
89 | self->base.last_error = &chmd_error; | ||
90 | self->base.fast_open = &chmd_fast_open; | ||
91 | self->base.fast_find = &chmd_fast_find; | ||
92 | self->system = sys; | ||
93 | self->error = MSPACK_ERR_OK; | ||
94 | self->d = NULL; | ||
95 | } | ||
96 | return (struct mschm_decompressor *) self; | ||
97 | } | ||
98 | |||
99 | /*************************************** | ||
100 | * MSPACK_DESTROY_CAB_DECOMPRESSOR | ||
101 | *************************************** | ||
102 | * destructor | ||
103 | */ | ||
104 | void mspack_destroy_chm_decompressor(struct mschm_decompressor *base) { | ||
105 | struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; | ||
106 | if (self) { | ||
107 | struct mspack_system *sys = self->system; | ||
108 | if (self->d) { | ||
109 | if (self->d->infh) sys->close(self->d->infh); | ||
110 | if (self->d->state) lzxd_free(self->d->state); | ||
111 | sys->free(self->d); | ||
112 | } | ||
113 | sys->free(self); | ||
114 | } | ||
115 | } | ||
116 | |||
117 | /*************************************** | ||
118 | * CHMD_OPEN | ||
119 | *************************************** | ||
120 | * opens a file and tries to read it as a CHM file. | ||
121 | * Calls chmd_real_open() with entire=1. | ||
122 | */ | ||
123 | static struct mschmd_header *chmd_open(struct mschm_decompressor *base, | ||
124 | const char *filename) | ||
125 | { | ||
126 | return chmd_real_open(base, filename, 1); | ||
127 | } | ||
128 | |||
129 | /*************************************** | ||
130 | * CHMD_FAST_OPEN | ||
131 | *************************************** | ||
132 | * opens a file and tries to read it as a CHM file, but does not read | ||
133 | * the file headers. Calls chmd_real_open() with entire=0 | ||
134 | */ | ||
135 | static struct mschmd_header *chmd_fast_open(struct mschm_decompressor *base, | ||
136 | const char *filename) | ||
137 | { | ||
138 | return chmd_real_open(base, filename, 0); | ||
139 | } | ||
140 | |||
141 | /*************************************** | ||
142 | * CHMD_REAL_OPEN | ||
143 | *************************************** | ||
144 | * the real implementation of chmd_open() and chmd_fast_open(). It simply | ||
145 | * passes the "entire" parameter to chmd_read_headers(), which will then | ||
146 | * either read all headers, or a bare mininum. | ||
147 | */ | ||
148 | static struct mschmd_header *chmd_real_open(struct mschm_decompressor *base, | ||
149 | const char *filename, int entire) | ||
150 | { | ||
151 | struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; | ||
152 | struct mschmd_header *chm = NULL; | ||
153 | struct mspack_system *sys; | ||
154 | struct mspack_file *fh; | ||
155 | int error; | ||
156 | |||
157 | if (!base) return NULL; | ||
158 | sys = self->system; | ||
159 | |||
160 | if ((fh = sys->open(sys, filename, MSPACK_SYS_OPEN_READ))) { | ||
161 | if ((chm = (struct mschmd_header *) sys->alloc(sys, sizeof(struct mschmd_header)))) { | ||
162 | chm->filename = filename; | ||
163 | error = chmd_read_headers(sys, fh, chm, entire); | ||
164 | if (error) { | ||
165 | /* if the error is DATAFORMAT, and there are some results, return | ||
166 | * partial results with a warning, rather than nothing */ | ||
167 | if (error == MSPACK_ERR_DATAFORMAT && (chm->files || chm->sysfiles)) { | ||
168 | sys->message(fh, "WARNING; contents are corrupt"); | ||
169 | error = MSPACK_ERR_OK; | ||
170 | } | ||
171 | else { | ||
172 | chmd_close(base, chm); | ||
173 | chm = NULL; | ||
174 | } | ||
175 | } | ||
176 | self->error = error; | ||
177 | } | ||
178 | else { | ||
179 | self->error = MSPACK_ERR_NOMEMORY; | ||
180 | } | ||
181 | sys->close(fh); | ||
182 | } | ||
183 | else { | ||
184 | self->error = MSPACK_ERR_OPEN; | ||
185 | } | ||
186 | return chm; | ||
187 | } | ||
188 | |||
189 | /*************************************** | ||
190 | * CHMD_CLOSE | ||
191 | *************************************** | ||
192 | * frees all memory associated with a given mschmd_header | ||
193 | */ | ||
194 | static void chmd_close(struct mschm_decompressor *base, | ||
195 | struct mschmd_header *chm) | ||
196 | { | ||
197 | struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; | ||
198 | struct mschmd_file *fi, *nfi; | ||
199 | struct mspack_system *sys; | ||
200 | unsigned int i; | ||
201 | |||
202 | if (!base) return; | ||
203 | sys = self->system; | ||
204 | |||
205 | self->error = MSPACK_ERR_OK; | ||
206 | |||
207 | /* free files */ | ||
208 | for (fi = chm->files; fi; fi = nfi) { | ||
209 | nfi = fi->next; | ||
210 | sys->free(fi); | ||
211 | } | ||
212 | for (fi = chm->sysfiles; fi; fi = nfi) { | ||
213 | nfi = fi->next; | ||
214 | sys->free(fi); | ||
215 | } | ||
216 | |||
217 | /* if this CHM was being decompressed, free decompression state */ | ||
218 | if (self->d && (self->d->chm == chm)) { | ||
219 | if (self->d->infh) sys->close(self->d->infh); | ||
220 | if (self->d->state) lzxd_free(self->d->state); | ||
221 | sys->free(self->d); | ||
222 | self->d = NULL; | ||
223 | } | ||
224 | |||
225 | /* if this CHM had a chunk cache, free it and contents */ | ||
226 | if (chm->chunk_cache) { | ||
227 | for (i = 0; i < chm->num_chunks; i++) sys->free(chm->chunk_cache[i]); | ||
228 | sys->free(chm->chunk_cache); | ||
229 | } | ||
230 | |||
231 | sys->free(chm); | ||
232 | } | ||
233 | |||
234 | /*************************************** | ||
235 | * CHMD_READ_HEADERS | ||
236 | *************************************** | ||
237 | * reads the basic CHM file headers. If the "entire" parameter is | ||
238 | * non-zero, all file entries will also be read. fills out a pre-existing | ||
239 | * mschmd_header structure, allocates memory for files as necessary | ||
240 | */ | ||
241 | |||
242 | /* The GUIDs found in CHM headers */ | ||
243 | static const unsigned char guids[32] = { | ||
244 | /* {7C01FD10-7BAA-11D0-9E0C-00A0-C922-E6EC} */ | ||
245 | 0x10, 0xFD, 0x01, 0x7C, 0xAA, 0x7B, 0xD0, 0x11, | ||
246 | 0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC, | ||
247 | /* {7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC} */ | ||
248 | 0x11, 0xFD, 0x01, 0x7C, 0xAA, 0x7B, 0xD0, 0x11, | ||
249 | 0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC | ||
250 | }; | ||
251 | |||
252 | /* reads an encoded integer into a variable; 7 bits of data per byte, | ||
253 | * the high bit is used to indicate that there is another byte */ | ||
254 | #define READ_ENCINT(var) do { \ | ||
255 | (var) = 0; \ | ||
256 | do { \ | ||
257 | if (p >= end) goto chunk_end; \ | ||
258 | (var) = ((var) << 7) | (*p & 0x7F); \ | ||
259 | } while (*p++ & 0x80); \ | ||
260 | } while (0) | ||
261 | |||
262 | static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh, | ||
263 | struct mschmd_header *chm, int entire) | ||
264 | { | ||
265 | unsigned int section, name_len, x, errors, num_chunks; | ||
266 | unsigned char buf[0x54], *chunk = NULL, *name, *p, *end; | ||
267 | struct mschmd_file *fi, *link = NULL; | ||
268 | off_t offset, length; | ||
269 | int num_entries; | ||
270 | |||
271 | /* initialise pointers */ | ||
272 | chm->files = NULL; | ||
273 | chm->sysfiles = NULL; | ||
274 | chm->chunk_cache = NULL; | ||
275 | chm->sec0.base.chm = chm; | ||
276 | chm->sec0.base.id = 0; | ||
277 | chm->sec1.base.chm = chm; | ||
278 | chm->sec1.base.id = 1; | ||
279 | chm->sec1.content = NULL; | ||
280 | chm->sec1.control = NULL; | ||
281 | chm->sec1.spaninfo = NULL; | ||
282 | chm->sec1.rtable = NULL; | ||
283 | |||
284 | /* read the first header */ | ||
285 | if (sys->read(fh, &buf[0], chmhead_SIZEOF) != chmhead_SIZEOF) { | ||
286 | return MSPACK_ERR_READ; | ||
287 | } | ||
288 | |||
289 | /* check ITSF signature */ | ||
290 | if (EndGetI32(&buf[chmhead_Signature]) != 0x46535449) { | ||
291 | return MSPACK_ERR_SIGNATURE; | ||
292 | } | ||
293 | |||
294 | /* check both header GUIDs */ | ||
295 | if (memcmp(&buf[chmhead_GUID1], &guids[0], 32L) != 0) { | ||
296 | D(("incorrect GUIDs")) | ||
297 | return MSPACK_ERR_SIGNATURE; | ||
298 | } | ||
299 | |||
300 | chm->version = EndGetI32(&buf[chmhead_Version]); | ||
301 | chm->timestamp = EndGetM32(&buf[chmhead_Timestamp]); | ||
302 | chm->language = EndGetI32(&buf[chmhead_LanguageID]); | ||
303 | if (chm->version > 3) { | ||
304 | sys->message(fh, "WARNING; CHM version > 3"); | ||
305 | } | ||
306 | |||
307 | /* read the header section table */ | ||
308 | if (sys->read(fh, &buf[0], chmhst3_SIZEOF) != chmhst3_SIZEOF) { | ||
309 | return MSPACK_ERR_READ; | ||
310 | } | ||
311 | |||
312 | /* chmhst3_OffsetCS0 does not exist in version 1 or 2 CHM files. | ||
313 | * The offset will be corrected later, once HS1 is read. | ||
314 | */ | ||
315 | if (read_off64(&offset, &buf[chmhst_OffsetHS0], sys, fh) || | ||
316 | read_off64(&chm->dir_offset, &buf[chmhst_OffsetHS1], sys, fh) || | ||
317 | read_off64(&chm->sec0.offset, &buf[chmhst3_OffsetCS0], sys, fh)) | ||
318 | { | ||
319 | return MSPACK_ERR_DATAFORMAT; | ||
320 | } | ||
321 | |||
322 | /* seek to header section 0 */ | ||
323 | if (sys->seek(fh, offset, MSPACK_SYS_SEEK_START)) { | ||
324 | return MSPACK_ERR_SEEK; | ||
325 | } | ||
326 | |||
327 | /* read header section 0 */ | ||
328 | if (sys->read(fh, &buf[0], chmhs0_SIZEOF) != chmhs0_SIZEOF) { | ||
329 | return MSPACK_ERR_READ; | ||
330 | } | ||
331 | if (read_off64(&chm->length, &buf[chmhs0_FileLen], sys, fh)) { | ||
332 | return MSPACK_ERR_DATAFORMAT; | ||
333 | } | ||
334 | |||
335 | /* seek to header section 1 */ | ||
336 | if (sys->seek(fh, chm->dir_offset, MSPACK_SYS_SEEK_START)) { | ||
337 | return MSPACK_ERR_SEEK; | ||
338 | } | ||
339 | |||
340 | /* read header section 1 */ | ||
341 | if (sys->read(fh, &buf[0], chmhs1_SIZEOF) != chmhs1_SIZEOF) { | ||
342 | return MSPACK_ERR_READ; | ||
343 | } | ||
344 | |||
345 | chm->dir_offset = sys->tell(fh); | ||
346 | chm->chunk_size = EndGetI32(&buf[chmhs1_ChunkSize]); | ||
347 | chm->density = EndGetI32(&buf[chmhs1_Density]); | ||
348 | chm->depth = EndGetI32(&buf[chmhs1_Depth]); | ||
349 | chm->index_root = EndGetI32(&buf[chmhs1_IndexRoot]); | ||
350 | chm->num_chunks = EndGetI32(&buf[chmhs1_NumChunks]); | ||
351 | chm->first_pmgl = EndGetI32(&buf[chmhs1_FirstPMGL]); | ||
352 | chm->last_pmgl = EndGetI32(&buf[chmhs1_LastPMGL]); | ||
353 | |||
354 | if (chm->version < 3) { | ||
355 | /* versions before 3 don't have chmhst3_OffsetCS0 */ | ||
356 | chm->sec0.offset = chm->dir_offset + (chm->chunk_size * chm->num_chunks); | ||
357 | } | ||
358 | |||
359 | /* check if content offset or file size is wrong */ | ||
360 | if (chm->sec0.offset > chm->length) { | ||
361 | D(("content section begins after file has ended")) | ||
362 | return MSPACK_ERR_DATAFORMAT; | ||
363 | } | ||
364 | |||
365 | /* ensure there are chunks and that chunk size is | ||
366 | * large enough for signature and num_entries */ | ||
367 | if (chm->chunk_size < (pmgl_Entries + 2)) { | ||
368 | D(("chunk size not large enough")) | ||
369 | return MSPACK_ERR_DATAFORMAT; | ||
370 | } | ||
371 | if (chm->num_chunks == 0) { | ||
372 | D(("no chunks")) | ||
373 | return MSPACK_ERR_DATAFORMAT; | ||
374 | } | ||
375 | |||
376 | /* The chunk_cache data structure is not great; large values for num_chunks | ||
377 | * or num_chunks*chunk_size can exhaust all memory. Until a better chunk | ||
378 | * cache is implemented, put arbitrary limits on num_chunks and chunk size. | ||
379 | */ | ||
380 | if (chm->num_chunks > 100000) { | ||
381 | D(("more than 100,000 chunks")) | ||
382 | return MSPACK_ERR_DATAFORMAT; | ||
383 | } | ||
384 | if (chm->chunk_size > 8192) { | ||
385 | D(("chunk size over 8192 (get in touch if this is valid)")) | ||
386 | return MSPACK_ERR_DATAFORMAT; | ||
387 | } | ||
388 | if ((off_t)chm->chunk_size * (off_t)chm->num_chunks > chm->length) { | ||
389 | D(("chunks larger than entire file")) | ||
390 | return MSPACK_ERR_DATAFORMAT; | ||
391 | } | ||
392 | |||
393 | /* common sense checks on header section 1 fields */ | ||
394 | if (chm->chunk_size != 4096) { | ||
395 | sys->message(fh, "WARNING; chunk size is not 4096"); | ||
396 | } | ||
397 | if (chm->first_pmgl != 0) { | ||
398 | sys->message(fh, "WARNING; first PMGL chunk is not zero"); | ||
399 | } | ||
400 | if (chm->first_pmgl > chm->last_pmgl) { | ||
401 | D(("first pmgl chunk is after last pmgl chunk")) | ||
402 | return MSPACK_ERR_DATAFORMAT; | ||
403 | } | ||
404 | if (chm->index_root != 0xFFFFFFFF && chm->index_root >= chm->num_chunks) { | ||
405 | D(("index_root outside valid range")) | ||
406 | return MSPACK_ERR_DATAFORMAT; | ||
407 | } | ||
408 | |||
409 | /* if we are doing a quick read, stop here! */ | ||
410 | if (!entire) { | ||
411 | return MSPACK_ERR_OK; | ||
412 | } | ||
413 | |||
414 | /* seek to the first PMGL chunk, and reduce the number of chunks to read */ | ||
415 | if ((x = chm->first_pmgl) != 0) { | ||
416 | if (sys->seek(fh,(off_t) (x * chm->chunk_size), MSPACK_SYS_SEEK_CUR)) { | ||
417 | return MSPACK_ERR_SEEK; | ||
418 | } | ||
419 | } | ||
420 | num_chunks = chm->last_pmgl - x + 1; | ||
421 | |||
422 | if (!(chunk = (unsigned char *) sys->alloc(sys, (size_t)chm->chunk_size))) { | ||
423 | return MSPACK_ERR_NOMEMORY; | ||
424 | } | ||
425 | |||
426 | /* read and process all chunks from FirstPMGL to LastPMGL */ | ||
427 | errors = 0; | ||
428 | while (num_chunks--) { | ||
429 | /* read next chunk */ | ||
430 | if (sys->read(fh, chunk, (int)chm->chunk_size) != (int)chm->chunk_size) { | ||
431 | sys->free(chunk); | ||
432 | return MSPACK_ERR_READ; | ||
433 | } | ||
434 | |||
435 | /* process only directory (PMGL) chunks */ | ||
436 | if (EndGetI32(&chunk[pmgl_Signature]) != 0x4C474D50) continue; | ||
437 | |||
438 | if (EndGetI32(&chunk[pmgl_QuickRefSize]) < 2) { | ||
439 | sys->message(fh, "WARNING; PMGL quickref area is too small"); | ||
440 | } | ||
441 | if (EndGetI32(&chunk[pmgl_QuickRefSize]) > | ||
442 | (chm->chunk_size - pmgl_Entries)) | ||
443 | { | ||
444 | sys->message(fh, "WARNING; PMGL quickref area is too large"); | ||
445 | } | ||
446 | |||
447 | p = &chunk[pmgl_Entries]; | ||
448 | end = &chunk[chm->chunk_size - 2]; | ||
449 | num_entries = EndGetI16(end); | ||
450 | |||
451 | while (num_entries--) { | ||
452 | READ_ENCINT(name_len); | ||
453 | if (name_len > (unsigned int) (end - p)) goto chunk_end; | ||
454 | name = p; p += name_len; | ||
455 | READ_ENCINT(section); | ||
456 | READ_ENCINT(offset); | ||
457 | READ_ENCINT(length); | ||
458 | |||
459 | /* ignore blank or one-char (e.g. "/") filenames we'd return as blank */ | ||
460 | if (name_len < 2 || !name[0] || !name[1]) continue; | ||
461 | |||
462 | /* empty files and directory names are stored as a file entry at | ||
463 | * offset 0 with length 0. We want to keep empty files, but not | ||
464 | * directory names, which end with a "/" */ | ||
465 | if ((offset == 0) && (length == 0)) { | ||
466 | if ((name_len > 0) && (name[name_len-1] == '/')) continue; | ||
467 | } | ||
468 | |||
469 | if (section > 1) { | ||
470 | sys->message(fh, "invalid section number '%u'.", section); | ||
471 | continue; | ||
472 | } | ||
473 | |||
474 | if (!(fi = (struct mschmd_file *) sys->alloc(sys, sizeof(struct mschmd_file) + name_len + 1))) { | ||
475 | sys->free(chunk); | ||
476 | return MSPACK_ERR_NOMEMORY; | ||
477 | } | ||
478 | |||
479 | fi->next = NULL; | ||
480 | fi->filename = (char *) &fi[1]; | ||
481 | fi->section = ((section == 0) ? (struct mschmd_section *) (&chm->sec0) | ||
482 | : (struct mschmd_section *) (&chm->sec1)); | ||
483 | fi->offset = offset; | ||
484 | fi->length = length; | ||
485 | sys->copy(name, fi->filename, (size_t) name_len); | ||
486 | fi->filename[name_len] = '\0'; | ||
487 | |||
488 | if (name[0] == ':' && name[1] == ':') { | ||
489 | /* system file */ | ||
490 | if (name_len == 40 && memcmp(name, content_name, 40) == 0) { | ||
491 | chm->sec1.content = fi; | ||
492 | } | ||
493 | else if (name_len == 44 && memcmp(name, control_name, 44) == 0) { | ||
494 | chm->sec1.control = fi; | ||
495 | } | ||
496 | else if (name_len == 41 && memcmp(name, spaninfo_name, 41) == 0) { | ||
497 | chm->sec1.spaninfo = fi; | ||
498 | } | ||
499 | else if (name_len == 105 && memcmp(name, rtable_name, 105) == 0) { | ||
500 | chm->sec1.rtable = fi; | ||
501 | } | ||
502 | fi->next = chm->sysfiles; | ||
503 | chm->sysfiles = fi; | ||
504 | } | ||
505 | else { | ||
506 | /* normal file */ | ||
507 | if (link) link->next = fi; else chm->files = fi; | ||
508 | link = fi; | ||
509 | } | ||
510 | } | ||
511 | |||
512 | /* this is reached either when num_entries runs out, or if | ||
513 | * reading data from the chunk reached a premature end of chunk */ | ||
514 | chunk_end: | ||
515 | if (num_entries >= 0) { | ||
516 | D(("chunk ended before all entries could be read")) | ||
517 | errors++; | ||
518 | } | ||
519 | |||
520 | } | ||
521 | sys->free(chunk); | ||
522 | return (errors > 0) ? MSPACK_ERR_DATAFORMAT : MSPACK_ERR_OK; | ||
523 | } | ||
524 | |||
525 | /*************************************** | ||
526 | * CHMD_FAST_FIND | ||
527 | *************************************** | ||
528 | * uses PMGI index chunks and quickref data to quickly locate a file | ||
529 | * directly from the on-disk index. | ||
530 | * | ||
531 | * TODO: protect against infinite loops in chunks (where pgml_NextChunk | ||
532 | * or a PMGI index entry point to an already visited chunk) | ||
533 | */ | ||
534 | static int chmd_fast_find(struct mschm_decompressor *base, | ||
535 | struct mschmd_header *chm, const char *filename, | ||
536 | struct mschmd_file *f_ptr, int f_size) | ||
537 | { | ||
538 | struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; | ||
539 | struct mspack_system *sys; | ||
540 | struct mspack_file *fh; | ||
541 | /* p and end are initialised to prevent MSVC warning about "potentially" | ||
542 | * uninitialised usage. This is provably untrue, but MS won't fix: | ||
543 | * https://developercommunity.visualstudio.com/content/problem/363489/c4701-false-positive-warning.html */ | ||
544 | const unsigned char *chunk, *p = NULL, *end = NULL; | ||
545 | int err = MSPACK_ERR_OK, result = -1; | ||
546 | unsigned int n, sec; | ||
547 | |||
548 | if (!self || !chm || !f_ptr || (f_size != sizeof(struct mschmd_file))) { | ||
549 | return MSPACK_ERR_ARGS; | ||
550 | } | ||
551 | sys = self->system; | ||
552 | |||
553 | /* clear the results structure */ | ||
554 | memset(f_ptr, 0, f_size); | ||
555 | |||
556 | if (!(fh = sys->open(sys, chm->filename, MSPACK_SYS_OPEN_READ))) { | ||
557 | return MSPACK_ERR_OPEN; | ||
558 | } | ||
559 | |||
560 | /* go through PMGI chunk hierarchy to reach PMGL chunk */ | ||
561 | if (chm->index_root < chm->num_chunks) { | ||
562 | n = chm->index_root; | ||
563 | for (;;) { | ||
564 | if (!(chunk = read_chunk(self, chm, fh, n))) { | ||
565 | sys->close(fh); | ||
566 | return self->error; | ||
567 | } | ||
568 | |||
569 | /* search PMGI/PMGL chunk. exit early if no entry found */ | ||
570 | if ((result = search_chunk(chm, chunk, filename, &p, &end)) <= 0) { | ||
571 | break; | ||
572 | } | ||
573 | |||
574 | /* found result. loop around for next chunk if this is PMGI */ | ||
575 | if (chunk[3] == 0x4C) break; else READ_ENCINT(n); | ||
576 | } | ||
577 | } | ||
578 | else { | ||
579 | /* PMGL chunks only, search from first_pmgl to last_pmgl */ | ||
580 | for (n = chm->first_pmgl; n <= chm->last_pmgl; | ||
581 | n = EndGetI32(&chunk[pmgl_NextChunk])) | ||
582 | { | ||
583 | if (!(chunk = read_chunk(self, chm, fh, n))) { | ||
584 | err = self->error; | ||
585 | break; | ||
586 | } | ||
587 | |||
588 | /* search PMGL chunk. exit if file found */ | ||
589 | if ((result = search_chunk(chm, chunk, filename, &p, &end)) > 0) { | ||
590 | break; | ||
591 | } | ||
592 | |||
593 | /* stop simple infinite loops: can't visit the same chunk twice */ | ||
594 | if (n == EndGetI32(&chunk[pmgl_NextChunk])) { | ||
595 | break; | ||
596 | } | ||
597 | } | ||
598 | } | ||
599 | |||
600 | /* if we found a file, read it */ | ||
601 | if (result > 0) { | ||
602 | READ_ENCINT(sec); | ||
603 | f_ptr->section = (sec == 0) ? (struct mschmd_section *) &chm->sec0 | ||
604 | : (struct mschmd_section *) &chm->sec1; | ||
605 | READ_ENCINT(f_ptr->offset); | ||
606 | READ_ENCINT(f_ptr->length); | ||
607 | } | ||
608 | else if (result < 0) { | ||
609 | err = MSPACK_ERR_DATAFORMAT; | ||
610 | } | ||
611 | |||
612 | sys->close(fh); | ||
613 | return self->error = err; | ||
614 | |||
615 | chunk_end: | ||
616 | D(("read beyond end of chunk entries")) | ||
617 | sys->close(fh); | ||
618 | return self->error = MSPACK_ERR_DATAFORMAT; | ||
619 | } | ||
620 | |||
621 | /* reads the given chunk into memory, storing it in a chunk cache | ||
622 | * so it doesn't need to be read from disk more than once | ||
623 | */ | ||
624 | static unsigned char *read_chunk(struct mschm_decompressor_p *self, | ||
625 | struct mschmd_header *chm, | ||
626 | struct mspack_file *fh, | ||
627 | unsigned int chunk_num) | ||
628 | { | ||
629 | struct mspack_system *sys = self->system; | ||
630 | unsigned char *buf; | ||
631 | |||
632 | /* check arguments - most are already checked by chmd_fast_find */ | ||
633 | if (chunk_num >= chm->num_chunks) return NULL; | ||
634 | |||
635 | /* ensure chunk cache is available */ | ||
636 | if (!chm->chunk_cache) { | ||
637 | size_t size = sizeof(unsigned char *) * chm->num_chunks; | ||
638 | if (!(chm->chunk_cache = (unsigned char **) sys->alloc(sys, size))) { | ||
639 | self->error = MSPACK_ERR_NOMEMORY; | ||
640 | return NULL; | ||
641 | } | ||
642 | memset(chm->chunk_cache, 0, size); | ||
643 | } | ||
644 | |||
645 | /* try to answer out of chunk cache */ | ||
646 | if (chm->chunk_cache[chunk_num]) return chm->chunk_cache[chunk_num]; | ||
647 | |||
648 | /* need to read chunk - allocate memory for it */ | ||
649 | if (!(buf = (unsigned char *) sys->alloc(sys, chm->chunk_size))) { | ||
650 | self->error = MSPACK_ERR_NOMEMORY; | ||
651 | return NULL; | ||
652 | } | ||
653 | |||
654 | /* seek to block and read it */ | ||
655 | if (sys->seek(fh, (off_t) (chm->dir_offset + (chunk_num * chm->chunk_size)), | ||
656 | MSPACK_SYS_SEEK_START)) | ||
657 | { | ||
658 | self->error = MSPACK_ERR_SEEK; | ||
659 | sys->free(buf); | ||
660 | return NULL; | ||
661 | } | ||
662 | if (sys->read(fh, buf, (int)chm->chunk_size) != (int)chm->chunk_size) { | ||
663 | self->error = MSPACK_ERR_READ; | ||
664 | sys->free(buf); | ||
665 | return NULL; | ||
666 | } | ||
667 | |||
668 | /* check the signature. Is is PMGL or PMGI? */ | ||
669 | if (!((buf[0] == 0x50) && (buf[1] == 0x4D) && (buf[2] == 0x47) && | ||
670 | ((buf[3] == 0x4C) || (buf[3] == 0x49)))) | ||
671 | { | ||
672 | self->error = MSPACK_ERR_SEEK; | ||
673 | sys->free(buf); | ||
674 | return NULL; | ||
675 | } | ||
676 | |||
677 | /* all OK. Store chunk in cache and return it */ | ||
678 | return chm->chunk_cache[chunk_num] = buf; | ||
679 | } | ||
680 | |||
681 | /* searches a PMGI/PMGL chunk for a given filename entry. Returns -1 on | ||
682 | * data format error, 0 if entry definitely not found, 1 if entry | ||
683 | * found. In the latter case, *result and *result_end are set pointing | ||
684 | * to that entry's data (either the "next chunk" ENCINT for a PMGI or | ||
685 | * the section, offset and length ENCINTs for a PMGL). | ||
686 | * | ||
687 | * In the case of PMGL chunks, the entry has definitely been | ||
688 | * found. In the case of PMGI chunks, the entry which points to the | ||
689 | * chunk that may eventually contain that entry has been found. | ||
690 | */ | ||
691 | static int search_chunk(struct mschmd_header *chm, | ||
692 | const unsigned char *chunk, | ||
693 | const char *filename, | ||
694 | const unsigned char **result, | ||
695 | const unsigned char **result_end) | ||
696 | { | ||
697 | const unsigned char *start, *end, *p; | ||
698 | unsigned int qr_size, num_entries, qr_entries, qr_density, name_len; | ||
699 | unsigned int L, R, M, fname_len, entries_off, is_pmgl; | ||
700 | int cmp; | ||
701 | |||
702 | fname_len = strlen(filename); | ||
703 | |||
704 | /* PMGL chunk or PMGI chunk? (note: read_chunk() has already | ||
705 | * checked the rest of the characters in the chunk signature) */ | ||
706 | if (chunk[3] == 0x4C) { | ||
707 | is_pmgl = 1; | ||
708 | entries_off = pmgl_Entries; | ||
709 | } | ||
710 | else { | ||
711 | is_pmgl = 0; | ||
712 | entries_off = pmgi_Entries; | ||
713 | } | ||
714 | |||
715 | /* Step 1: binary search first filename of each QR entry | ||
716 | * - target filename == entry | ||
717 | * found file | ||
718 | * - target filename < all entries | ||
719 | * file not found | ||
720 | * - target filename > all entries | ||
721 | * proceed to step 2 using final entry | ||
722 | * - target filename between two searched entries | ||
723 | * proceed to step 2 | ||
724 | */ | ||
725 | qr_size = EndGetI32(&chunk[pmgl_QuickRefSize]); | ||
726 | start = &chunk[chm->chunk_size - 2]; | ||
727 | end = &chunk[chm->chunk_size - qr_size]; | ||
728 | num_entries = EndGetI16(start); | ||
729 | qr_density = 1 + (1 << chm->density); | ||
730 | qr_entries = (num_entries + qr_density-1) / qr_density; | ||
731 | |||
732 | if (num_entries == 0) { | ||
733 | D(("chunk has no entries")) | ||
734 | return -1; | ||
735 | } | ||
736 | |||
737 | if (qr_size > chm->chunk_size) { | ||
738 | D(("quickref size > chunk size")) | ||
739 | return -1; | ||
740 | } | ||
741 | |||
742 | *result_end = end; | ||
743 | |||
744 | if (((int)qr_entries * 2) > (start - end)) { | ||
745 | D(("WARNING; more quickrefs than quickref space")) | ||
746 | qr_entries = 0; /* but we can live with it */ | ||
747 | } | ||
748 | |||
749 | if (qr_entries > 0) { | ||
750 | L = 0; | ||
751 | R = qr_entries - 1; | ||
752 | do { | ||
753 | /* pick new midpoint */ | ||
754 | M = (L + R) >> 1; | ||
755 | |||
756 | /* compare filename with entry QR points to */ | ||
757 | p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)]; | ||
758 | READ_ENCINT(name_len); | ||
759 | if (name_len > (unsigned int) (end - p)) goto chunk_end; | ||
760 | cmp = compare(filename, (char *)p, fname_len, name_len); | ||
761 | |||
762 | if (cmp == 0) break; | ||
763 | else if (cmp < 0) { if (M) R = M - 1; else return 0; } | ||
764 | else if (cmp > 0) L = M + 1; | ||
765 | } while (L <= R); | ||
766 | M = (L + R) >> 1; | ||
767 | |||
768 | if (cmp == 0) { | ||
769 | /* exact match! */ | ||
770 | p += name_len; | ||
771 | *result = p; | ||
772 | return 1; | ||
773 | } | ||
774 | |||
775 | /* otherwise, read the group of entries for QR entry M */ | ||
776 | p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)]; | ||
777 | num_entries -= (M * qr_density); | ||
778 | if (num_entries > qr_density) num_entries = qr_density; | ||
779 | } | ||
780 | else { | ||
781 | p = &chunk[entries_off]; | ||
782 | } | ||
783 | |||
784 | /* Step 2: linear search through the set of entries reached in step 1. | ||
785 | * - filename == any entry | ||
786 | * found entry | ||
787 | * - filename < all entries (PMGI) or any entry (PMGL) | ||
788 | * entry not found, stop now | ||
789 | * - filename > all entries | ||
790 | * entry not found (PMGL) / maybe found (PMGI) | ||
791 | * - | ||
792 | */ | ||
793 | *result = NULL; | ||
794 | while (num_entries-- > 0) { | ||
795 | READ_ENCINT(name_len); | ||
796 | if (name_len > (unsigned int) (end - p)) goto chunk_end; | ||
797 | cmp = compare(filename, (char *)p, fname_len, name_len); | ||
798 | p += name_len; | ||
799 | |||
800 | if (cmp == 0) { | ||
801 | /* entry found */ | ||
802 | *result = p; | ||
803 | return 1; | ||
804 | } | ||
805 | |||
806 | if (cmp < 0) { | ||
807 | /* entry not found (PMGL) / maybe found (PMGI) */ | ||
808 | break; | ||
809 | } | ||
810 | |||
811 | /* read and ignore the rest of this entry */ | ||
812 | if (is_pmgl) { | ||
813 | READ_ENCINT(R); /* skip section */ | ||
814 | READ_ENCINT(R); /* skip offset */ | ||
815 | READ_ENCINT(R); /* skip length */ | ||
816 | } | ||
817 | else { | ||
818 | *result = p; /* store potential final result */ | ||
819 | READ_ENCINT(R); /* skip chunk number */ | ||
820 | } | ||
821 | } | ||
822 | |||
823 | /* PMGL? not found. PMGI? maybe found */ | ||
824 | return (is_pmgl) ? 0 : (*result ? 1 : 0); | ||
825 | |||
826 | chunk_end: | ||
827 | D(("reached end of chunk data while searching")) | ||
828 | return -1; | ||
829 | } | ||
830 | |||
831 | #if HAVE_TOWLOWER | ||
832 | # include <wctype.h> | ||
833 | # define TOLOWER(x) towlower(x) | ||
834 | #else | ||
835 | # include <ctype.h> | ||
836 | # define TOLOWER(x) tolower(x) | ||
837 | #endif | ||
838 | |||
839 | /* decodes a UTF-8 character from s[] into c. Will not read past e. | ||
840 | * doesn't test that extension bytes are %10xxxxxx. | ||
841 | * allows some overlong encodings. | ||
842 | */ | ||
843 | #define GET_UTF8_CHAR(s, e, c) do { \ | ||
844 | unsigned char x = *s++; \ | ||
845 | if (x < 0x80) c = x; \ | ||
846 | else if (x >= 0xC2 && x < 0xE0 && s < e) { \ | ||
847 | c = (x & 0x1F) << 6 | (*s++ & 0x3F); \ | ||
848 | } \ | ||
849 | else if (x >= 0xE0 && x < 0xF0 && s+1 < e) { \ | ||
850 | c = (x & 0x0F) << 12 | (s[0] & 0x3F) << 6 | (s[1] & 0x3F); \ | ||
851 | s += 2; \ | ||
852 | } \ | ||
853 | else if (x >= 0xF0 && x <= 0xF5 && s+2 < e) { \ | ||
854 | c = (x & 0x07) << 18 | (s[0] & 0x3F) << 12 | \ | ||
855 | (s[1] & 0x3F) << 6 | (s[2] & 0x3F); \ | ||
856 | if (c > 0x10FFFF) c = 0xFFFD; \ | ||
857 | s += 3; \ | ||
858 | } \ | ||
859 | else c = 0xFFFD; \ | ||
860 | } while (0) | ||
861 | |||
862 | /* case-insensitively compares two UTF8 encoded strings. String length for | ||
863 | * both strings must be provided, null bytes are not terminators */ | ||
864 | static inline int compare(const char *s1, const char *s2, int l1, int l2) { | ||
865 | register const unsigned char *p1 = (const unsigned char *) s1; | ||
866 | register const unsigned char *p2 = (const unsigned char *) s2; | ||
867 | register const unsigned char *e1 = p1 + l1, *e2 = p2 + l2; | ||
868 | int c1, c2; | ||
869 | |||
870 | while (p1 < e1 && p2 < e2) { | ||
871 | GET_UTF8_CHAR(p1, e1, c1); | ||
872 | GET_UTF8_CHAR(p2, e2, c2); | ||
873 | if (c1 == c2) continue; | ||
874 | c1 = TOLOWER(c1); | ||
875 | c2 = TOLOWER(c2); | ||
876 | if (c1 != c2) return c1 - c2; | ||
877 | } | ||
878 | return l1 - l2; | ||
879 | } | ||
880 | |||
881 | |||
882 | /*************************************** | ||
883 | * CHMD_EXTRACT | ||
884 | *************************************** | ||
885 | * extracts a file from a CHM helpfile | ||
886 | */ | ||
887 | static int chmd_extract(struct mschm_decompressor *base, | ||
888 | struct mschmd_file *file, const char *filename) | ||
889 | { | ||
890 | struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; | ||
891 | struct mspack_system *sys; | ||
892 | struct mschmd_header *chm; | ||
893 | struct mspack_file *fh; | ||
894 | off_t bytes; | ||
895 | |||
896 | if (!self) return MSPACK_ERR_ARGS; | ||
897 | if (!file || !file->section) return self->error = MSPACK_ERR_ARGS; | ||
898 | sys = self->system; | ||
899 | chm = file->section->chm; | ||
900 | |||
901 | /* create decompression state if it doesn't exist */ | ||
902 | if (!self->d) { | ||
903 | self->d = (struct mschmd_decompress_state *) sys->alloc(sys, sizeof(struct mschmd_decompress_state)); | ||
904 | if (!self->d) return self->error = MSPACK_ERR_NOMEMORY; | ||
905 | self->d->chm = chm; | ||
906 | self->d->offset = 0; | ||
907 | self->d->state = NULL; | ||
908 | self->d->sys = *sys; | ||
909 | self->d->sys.write = &chmd_sys_write; | ||
910 | self->d->infh = NULL; | ||
911 | self->d->outfh = NULL; | ||
912 | } | ||
913 | |||
914 | /* open input chm file if not open, or the open one is a different chm */ | ||
915 | if (!self->d->infh || (self->d->chm != chm)) { | ||
916 | if (self->d->infh) sys->close(self->d->infh); | ||
917 | if (self->d->state) lzxd_free(self->d->state); | ||
918 | self->d->chm = chm; | ||
919 | self->d->offset = 0; | ||
920 | self->d->state = NULL; | ||
921 | self->d->infh = sys->open(sys, chm->filename, MSPACK_SYS_OPEN_READ); | ||
922 | if (!self->d->infh) return self->error = MSPACK_ERR_OPEN; | ||
923 | } | ||
924 | |||
925 | /* open file for output */ | ||
926 | if (!(fh = sys->open(sys, filename, MSPACK_SYS_OPEN_WRITE))) { | ||
927 | return self->error = MSPACK_ERR_OPEN; | ||
928 | } | ||
929 | |||
930 | /* if file is empty, simply creating it is enough */ | ||
931 | if (!file->length) { | ||
932 | sys->close(fh); | ||
933 | return self->error = MSPACK_ERR_OK; | ||
934 | } | ||
935 | |||
936 | self->error = MSPACK_ERR_OK; | ||
937 | |||
938 | switch (file->section->id) { | ||
939 | case 0: /* Uncompressed section file */ | ||
940 | /* simple seek + copy */ | ||
941 | if (sys->seek(self->d->infh, file->section->chm->sec0.offset | ||
942 | + file->offset, MSPACK_SYS_SEEK_START)) | ||
943 | { | ||
944 | self->error = MSPACK_ERR_SEEK; | ||
945 | } | ||
946 | else { | ||
947 | unsigned char buf[512]; | ||
948 | off_t length = file->length; | ||
949 | while (length > 0) { | ||
950 | int run = sizeof(buf); | ||
951 | if ((off_t)run > length) run = (int)length; | ||
952 | if (sys->read(self->d->infh, &buf[0], run) != run) { | ||
953 | self->error = MSPACK_ERR_READ; | ||
954 | break; | ||
955 | } | ||
956 | if (sys->write(fh, &buf[0], run) != run) { | ||
957 | self->error = MSPACK_ERR_WRITE; | ||
958 | break; | ||
959 | } | ||
960 | length -= run; | ||
961 | } | ||
962 | } | ||
963 | break; | ||
964 | |||
965 | case 1: /* MSCompressed section file */ | ||
966 | /* (re)initialise compression state if we it is not yet initialised, | ||
967 | * or we have advanced too far and have to backtrack | ||
968 | */ | ||
969 | if (!self->d->state || (file->offset < self->d->offset)) { | ||
970 | if (self->d->state) { | ||
971 | lzxd_free(self->d->state); | ||
972 | self->d->state = NULL; | ||
973 | } | ||
974 | if (chmd_init_decomp(self, file)) break; | ||
975 | } | ||
976 | |||
977 | /* seek to input data */ | ||
978 | if (sys->seek(self->d->infh, self->d->inoffset, MSPACK_SYS_SEEK_START)) { | ||
979 | self->error = MSPACK_ERR_SEEK; | ||
980 | break; | ||
981 | } | ||
982 | |||
983 | /* get to correct offset. */ | ||
984 | self->d->outfh = NULL; | ||
985 | if ((bytes = file->offset - self->d->offset)) { | ||
986 | self->error = lzxd_decompress(self->d->state, bytes); | ||
987 | } | ||
988 | |||
989 | /* if getting to the correct offset was error free, unpack file */ | ||
990 | if (!self->error) { | ||
991 | self->d->outfh = fh; | ||
992 | self->error = lzxd_decompress(self->d->state, file->length); | ||
993 | } | ||
994 | |||
995 | /* save offset in input source stream, in case there is a section 0 | ||
996 | * file between now and the next section 1 file extracted */ | ||
997 | self->d->inoffset = sys->tell(self->d->infh); | ||
998 | |||
999 | /* if an LZX error occured, the LZX decompressor is now useless */ | ||
1000 | if (self->error) { | ||
1001 | if (self->d->state) lzxd_free(self->d->state); | ||
1002 | self->d->state = NULL; | ||
1003 | } | ||
1004 | break; | ||
1005 | } | ||
1006 | |||
1007 | sys->close(fh); | ||
1008 | return self->error; | ||
1009 | } | ||
1010 | |||
1011 | /*************************************** | ||
1012 | * CHMD_SYS_WRITE | ||
1013 | *************************************** | ||
1014 | * chmd_sys_write is the internal writer function which the decompressor | ||
1015 | * uses. If either writes data to disk (self->d->outfh) with the real | ||
1016 | * sys->write() function, or does nothing with the data when | ||
1017 | * self->d->outfh == NULL. advances self->d->offset. | ||
1018 | */ | ||
1019 | static int chmd_sys_write(struct mspack_file *file, void *buffer, int bytes) { | ||
1020 | struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) file; | ||
1021 | self->d->offset += bytes; | ||
1022 | if (self->d->outfh) { | ||
1023 | return self->system->write(self->d->outfh, buffer, bytes); | ||
1024 | } | ||
1025 | return bytes; | ||
1026 | } | ||
1027 | |||
1028 | /*************************************** | ||
1029 | * CHMD_INIT_DECOMP | ||
1030 | *************************************** | ||
1031 | * Initialises the LZX decompressor to decompress the compressed stream, | ||
1032 | * from the nearest reset offset and length that is needed for the given | ||
1033 | * file. | ||
1034 | */ | ||
1035 | static int chmd_init_decomp(struct mschm_decompressor_p *self, | ||
1036 | struct mschmd_file *file) | ||
1037 | { | ||
1038 | int window_size, window_bits, reset_interval, entry, err; | ||
1039 | struct mspack_system *sys = self->system; | ||
1040 | struct mschmd_sec_mscompressed *sec; | ||
1041 | unsigned char *data; | ||
1042 | off_t length, offset; | ||
1043 | |||
1044 | sec = (struct mschmd_sec_mscompressed *) file->section; | ||
1045 | |||
1046 | /* ensure we have a mscompressed content section */ | ||
1047 | err = find_sys_file(self, sec, &sec->content, content_name); | ||
1048 | if (err) return self->error = err; | ||
1049 | |||
1050 | /* ensure we have a ControlData file */ | ||
1051 | err = find_sys_file(self, sec, &sec->control, control_name); | ||
1052 | if (err) return self->error = err; | ||
1053 | |||
1054 | /* read ControlData */ | ||
1055 | if (sec->control->length < lzxcd_SIZEOF) { | ||
1056 | D(("ControlData file is too short")) | ||
1057 | return self->error = MSPACK_ERR_DATAFORMAT; | ||
1058 | } | ||
1059 | if (!(data = read_sys_file(self, sec->control))) { | ||
1060 | D(("can't read mscompressed control data file")) | ||
1061 | return self->error; | ||
1062 | } | ||
1063 | |||
1064 | /* check LZXC signature */ | ||
1065 | if (EndGetI32(&data[lzxcd_Signature]) != 0x43585A4C) { | ||
1066 | sys->free(data); | ||
1067 | return self->error = MSPACK_ERR_SIGNATURE; | ||
1068 | } | ||
1069 | |||
1070 | /* read reset_interval and window_size and validate version number */ | ||
1071 | switch (EndGetI32(&data[lzxcd_Version])) { | ||
1072 | case 1: | ||
1073 | reset_interval = EndGetI32(&data[lzxcd_ResetInterval]); | ||
1074 | window_size = EndGetI32(&data[lzxcd_WindowSize]); | ||
1075 | break; | ||
1076 | case 2: | ||
1077 | reset_interval = EndGetI32(&data[lzxcd_ResetInterval]) * LZX_FRAME_SIZE; | ||
1078 | window_size = EndGetI32(&data[lzxcd_WindowSize]) * LZX_FRAME_SIZE; | ||
1079 | break; | ||
1080 | default: | ||
1081 | D(("bad controldata version")) | ||
1082 | sys->free(data); | ||
1083 | return self->error = MSPACK_ERR_DATAFORMAT; | ||
1084 | } | ||
1085 | |||
1086 | /* free ControlData */ | ||
1087 | sys->free(data); | ||
1088 | |||
1089 | /* find window_bits from window_size */ | ||
1090 | switch (window_size) { | ||
1091 | case 0x008000: window_bits = 15; break; | ||
1092 | case 0x010000: window_bits = 16; break; | ||
1093 | case 0x020000: window_bits = 17; break; | ||
1094 | case 0x040000: window_bits = 18; break; | ||
1095 | case 0x080000: window_bits = 19; break; | ||
1096 | case 0x100000: window_bits = 20; break; | ||
1097 | case 0x200000: window_bits = 21; break; | ||
1098 | default: | ||
1099 | D(("bad controldata window size")) | ||
1100 | return self->error = MSPACK_ERR_DATAFORMAT; | ||
1101 | } | ||
1102 | |||
1103 | /* validate reset_interval */ | ||
1104 | if (reset_interval == 0 || reset_interval % LZX_FRAME_SIZE) { | ||
1105 | D(("bad controldata reset interval")) | ||
1106 | return self->error = MSPACK_ERR_DATAFORMAT; | ||
1107 | } | ||
1108 | |||
1109 | /* which reset table entry would we like? */ | ||
1110 | entry = file->offset / reset_interval; | ||
1111 | /* convert from reset interval multiple (usually 64k) to 32k frames */ | ||
1112 | entry *= reset_interval / LZX_FRAME_SIZE; | ||
1113 | |||
1114 | /* read the reset table entry */ | ||
1115 | if (read_reset_table(self, sec, entry, &length, &offset)) { | ||
1116 | /* the uncompressed length given in the reset table is dishonest. | ||
1117 | * the uncompressed data is always padded out from the given | ||
1118 | * uncompressed length up to the next reset interval */ | ||
1119 | length += reset_interval - 1; | ||
1120 | length &= -reset_interval; | ||
1121 | } | ||
1122 | else { | ||
1123 | /* if we can't read the reset table entry, just start from | ||
1124 | * the beginning. Use spaninfo to get the uncompressed length */ | ||
1125 | entry = 0; | ||
1126 | offset = 0; | ||
1127 | err = read_spaninfo(self, sec, &length); | ||
1128 | } | ||
1129 | if (err) return self->error = err; | ||
1130 | |||
1131 | /* get offset of compressed data stream: | ||
1132 | * = offset of uncompressed section from start of file | ||
1133 | * + offset of compressed stream from start of uncompressed section | ||
1134 | * + offset of chosen reset interval from start of compressed stream */ | ||
1135 | self->d->inoffset = file->section->chm->sec0.offset + sec->content->offset + offset; | ||
1136 | |||
1137 | /* set start offset and overall remaining stream length */ | ||
1138 | self->d->offset = entry * LZX_FRAME_SIZE; | ||
1139 | length -= self->d->offset; | ||
1140 | |||
1141 | /* initialise LZX stream */ | ||
1142 | self->d->state = lzxd_init(&self->d->sys, self->d->infh, | ||
1143 | (struct mspack_file *) self, window_bits, | ||
1144 | reset_interval / LZX_FRAME_SIZE, | ||
1145 | 4096, length, 0); | ||
1146 | if (!self->d->state) self->error = MSPACK_ERR_NOMEMORY; | ||
1147 | return self->error; | ||
1148 | } | ||
1149 | |||
1150 | /*************************************** | ||
1151 | * READ_RESET_TABLE | ||
1152 | *************************************** | ||
1153 | * Reads one entry out of the reset table. Also reads the uncompressed | ||
1154 | * data length. Writes these to offset_ptr and length_ptr respectively. | ||
1155 | * Returns non-zero for success, zero for failure. | ||
1156 | */ | ||
1157 | static int read_reset_table(struct mschm_decompressor_p *self, | ||
1158 | struct mschmd_sec_mscompressed *sec, | ||
1159 | unsigned int entry, | ||
1160 | off_t *length_ptr, off_t *offset_ptr) | ||
1161 | { | ||
1162 | struct mspack_system *sys = self->system; | ||
1163 | unsigned char *data; | ||
1164 | unsigned int pos, entrysize; | ||
1165 | |||
1166 | /* do we have a ResetTable file? */ | ||
1167 | int err = find_sys_file(self, sec, &sec->rtable, rtable_name); | ||
1168 | if (err) return 0; | ||
1169 | |||
1170 | /* read ResetTable file */ | ||
1171 | if (sec->rtable->length < lzxrt_headerSIZEOF) { | ||
1172 | D(("ResetTable file is too short")) | ||
1173 | return 0; | ||
1174 | } | ||
1175 | if (!(data = read_sys_file(self, sec->rtable))) { | ||
1176 | D(("can't read reset table")) | ||
1177 | return 0; | ||
1178 | } | ||
1179 | |||
1180 | /* check sanity of reset table */ | ||
1181 | if (EndGetI32(&data[lzxrt_FrameLen]) != LZX_FRAME_SIZE) { | ||
1182 | D(("bad reset table frame length")) | ||
1183 | sys->free(data); | ||
1184 | return 0; | ||
1185 | } | ||
1186 | |||
1187 | /* get the uncompressed length of the LZX stream */ | ||
1188 | if (read_off64(length_ptr, &data[lzxrt_UncompLen], sys, self->d->infh)) { | ||
1189 | sys->free(data); | ||
1190 | return 0; | ||
1191 | } | ||
1192 | |||
1193 | entrysize = EndGetI32(&data[lzxrt_EntrySize]); | ||
1194 | pos = EndGetI32(&data[lzxrt_TableOffset]) + (entry * entrysize); | ||
1195 | |||
1196 | /* ensure reset table entry for this offset exists */ | ||
1197 | if (entry < EndGetI32(&data[lzxrt_NumEntries]) && | ||
1198 | pos <= (sec->rtable->length - entrysize)) | ||
1199 | { | ||
1200 | switch (entrysize) { | ||
1201 | case 4: | ||
1202 | *offset_ptr = EndGetI32(&data[pos]); | ||
1203 | err = 0; | ||
1204 | break; | ||
1205 | case 8: | ||
1206 | err = read_off64(offset_ptr, &data[pos], sys, self->d->infh); | ||
1207 | break; | ||
1208 | default: | ||
1209 | D(("reset table entry size neither 4 nor 8")) | ||
1210 | err = 1; | ||
1211 | break; | ||
1212 | } | ||
1213 | } | ||
1214 | else { | ||
1215 | D(("bad reset interval")) | ||
1216 | err = 1; | ||
1217 | } | ||
1218 | |||
1219 | /* free the reset table */ | ||
1220 | sys->free(data); | ||
1221 | |||
1222 | /* return success */ | ||
1223 | return (err == 0); | ||
1224 | } | ||
1225 | |||
1226 | /*************************************** | ||
1227 | * READ_SPANINFO | ||
1228 | *************************************** | ||
1229 | * Reads the uncompressed data length from the spaninfo file. | ||
1230 | * Returns zero for success or a non-zero error code for failure. | ||
1231 | */ | ||
1232 | static int read_spaninfo(struct mschm_decompressor_p *self, | ||
1233 | struct mschmd_sec_mscompressed *sec, | ||
1234 | off_t *length_ptr) | ||
1235 | { | ||
1236 | struct mspack_system *sys = self->system; | ||
1237 | unsigned char *data; | ||
1238 | |||
1239 | /* find SpanInfo file */ | ||
1240 | int err = find_sys_file(self, sec, &sec->spaninfo, spaninfo_name); | ||
1241 | if (err) return MSPACK_ERR_DATAFORMAT; | ||
1242 | |||
1243 | /* check it's large enough */ | ||
1244 | if (sec->spaninfo->length != 8) { | ||
1245 | D(("SpanInfo file is wrong size")) | ||
1246 | return MSPACK_ERR_DATAFORMAT; | ||
1247 | } | ||
1248 | |||
1249 | /* read the SpanInfo file */ | ||
1250 | if (!(data = read_sys_file(self, sec->spaninfo))) { | ||
1251 | D(("can't read SpanInfo file")) | ||
1252 | return self->error; | ||
1253 | } | ||
1254 | |||
1255 | /* get the uncompressed length of the LZX stream */ | ||
1256 | err = read_off64(length_ptr, data, sys, self->d->infh); | ||
1257 | sys->free(data); | ||
1258 | if (err) return MSPACK_ERR_DATAFORMAT; | ||
1259 | |||
1260 | if (*length_ptr <= 0) { | ||
1261 | D(("output length is invalid")) | ||
1262 | return MSPACK_ERR_DATAFORMAT; | ||
1263 | } | ||
1264 | |||
1265 | return MSPACK_ERR_OK; | ||
1266 | } | ||
1267 | |||
1268 | /*************************************** | ||
1269 | * FIND_SYS_FILE | ||
1270 | *************************************** | ||
1271 | * Uses chmd_fast_find to locate a system file, and fills out that system | ||
1272 | * file's entry and links it into the list of system files. Returns zero | ||
1273 | * for success, non-zero for both failure and the file not existing. | ||
1274 | */ | ||
1275 | static int find_sys_file(struct mschm_decompressor_p *self, | ||
1276 | struct mschmd_sec_mscompressed *sec, | ||
1277 | struct mschmd_file **f_ptr, const char *name) | ||
1278 | { | ||
1279 | struct mspack_system *sys = self->system; | ||
1280 | struct mschmd_file result; | ||
1281 | |||
1282 | /* already loaded */ | ||
1283 | if (*f_ptr) return MSPACK_ERR_OK; | ||
1284 | |||
1285 | /* try using fast_find to find the file - return DATAFORMAT error if | ||
1286 | * it fails, or successfully doesn't find the file */ | ||
1287 | if (chmd_fast_find((struct mschm_decompressor *) self, sec->base.chm, | ||
1288 | name, &result, (int)sizeof(result)) || !result.section) | ||
1289 | { | ||
1290 | return MSPACK_ERR_DATAFORMAT; | ||
1291 | } | ||
1292 | |||
1293 | if (!(*f_ptr = (struct mschmd_file *) sys->alloc(sys, sizeof(result)))) { | ||
1294 | return MSPACK_ERR_NOMEMORY; | ||
1295 | } | ||
1296 | |||
1297 | /* copy result */ | ||
1298 | *(*f_ptr) = result; | ||
1299 | (*f_ptr)->filename = (char *) name; | ||
1300 | |||
1301 | /* link file into sysfiles list */ | ||
1302 | (*f_ptr)->next = sec->base.chm->sysfiles; | ||
1303 | sec->base.chm->sysfiles = *f_ptr; | ||
1304 | return MSPACK_ERR_OK; | ||
1305 | } | ||
1306 | |||
1307 | /*************************************** | ||
1308 | * READ_SYS_FILE | ||
1309 | *************************************** | ||
1310 | * Allocates memory for a section 0 (uncompressed) file and reads it into | ||
1311 | * memory. | ||
1312 | */ | ||
1313 | static unsigned char *read_sys_file(struct mschm_decompressor_p *self, | ||
1314 | struct mschmd_file *file) | ||
1315 | { | ||
1316 | struct mspack_system *sys = self->system; | ||
1317 | unsigned char *data = NULL; | ||
1318 | int len; | ||
1319 | |||
1320 | if (!file || !file->section || (file->section->id != 0)) { | ||
1321 | self->error = MSPACK_ERR_DATAFORMAT; | ||
1322 | return NULL; | ||
1323 | } | ||
1324 | |||
1325 | len = (int) file->length; | ||
1326 | |||
1327 | if (!(data = (unsigned char *) sys->alloc(sys, (size_t) len))) { | ||
1328 | self->error = MSPACK_ERR_NOMEMORY; | ||
1329 | return NULL; | ||
1330 | } | ||
1331 | if (sys->seek(self->d->infh, file->section->chm->sec0.offset | ||
1332 | + file->offset, MSPACK_SYS_SEEK_START)) | ||
1333 | { | ||
1334 | self->error = MSPACK_ERR_SEEK; | ||
1335 | sys->free(data); | ||
1336 | return NULL; | ||
1337 | } | ||
1338 | if (sys->read(self->d->infh, data, len) != len) { | ||
1339 | self->error = MSPACK_ERR_READ; | ||
1340 | sys->free(data); | ||
1341 | return NULL; | ||
1342 | } | ||
1343 | return data; | ||
1344 | } | ||
1345 | |||
1346 | /*************************************** | ||
1347 | * CHMD_ERROR | ||
1348 | *************************************** | ||
1349 | * returns the last error that occurred | ||
1350 | */ | ||
1351 | static int chmd_error(struct mschm_decompressor *base) { | ||
1352 | struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; | ||
1353 | return (self) ? self->error : MSPACK_ERR_ARGS; | ||
1354 | } | ||
1355 | |||
1356 | /*************************************** | ||
1357 | * READ_OFF64 | ||
1358 | *************************************** | ||
1359 | * Reads a 64-bit signed integer from memory in Intel byte order. | ||
1360 | * If running on a system with a 64-bit off_t, this is simply done. | ||
1361 | * If running on a system with a 32-bit off_t, offsets up to 0x7FFFFFFF | ||
1362 | * are accepted, offsets beyond that cause an error message. | ||
1363 | */ | ||
1364 | static int read_off64(off_t *var, unsigned char *mem, | ||
1365 | struct mspack_system *sys, struct mspack_file *fh) | ||
1366 | { | ||
1367 | #if LARGEFILE_SUPPORT | ||
1368 | *var = EndGetI64(mem); | ||
1369 | #else | ||
1370 | *var = EndGetI32(mem); | ||
1371 | if ((*var & 0x80000000) || EndGetI32(mem+4)) { | ||
1372 | sys->message(fh, (char *)largefile_msg); | ||
1373 | return 1; | ||
1374 | } | ||
1375 | #endif | ||
1376 | return 0; | ||
1377 | } | ||