From c876d3bbefe0dc00c27ca0c12d29da5874946962 Mon Sep 17 00:00:00 2001 From: Dominik Riebeling Date: Wed, 15 Dec 2021 21:04:28 +0100 Subject: rbutil: Merge rbutil with utils folder. rbutil uses several components from the utils folder, and can be considered part of utils too. Having it in a separate folder is an arbitrary split that doesn't help anymore these days, so merge them. This also allows other utils to easily use libtools.make without the need to navigate to a different folder. Change-Id: I3fc2f4de19e3e776553efb5dea5f779dfec0dc21 --- utils/rbutilqt/mspack/chmd.c | 1377 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1377 insertions(+) create mode 100644 utils/rbutilqt/mspack/chmd.c (limited to 'utils/rbutilqt/mspack/chmd.c') diff --git a/utils/rbutilqt/mspack/chmd.c b/utils/rbutilqt/mspack/chmd.c new file mode 100644 index 0000000000..6c8481db14 --- /dev/null +++ b/utils/rbutilqt/mspack/chmd.c @@ -0,0 +1,1377 @@ +/* This file is part of libmspack. + * (C) 2003-2018 Stuart Caie. + * + * libmspack is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License (LGPL) version 2.1 + * + * For further details, see the file COPYING.LIB distributed with libmspack + */ + +/* CHM decompression implementation */ + +#include "system-mspack.h" +#include "chm.h" + +/* prototypes */ +static struct mschmd_header * chmd_open( + struct mschm_decompressor *base, const char *filename); +static struct mschmd_header * chmd_fast_open( + struct mschm_decompressor *base, const char *filename); +static struct mschmd_header *chmd_real_open( + struct mschm_decompressor *base, const char *filename, int entire); +static void chmd_close( + struct mschm_decompressor *base, struct mschmd_header *chm); +static int chmd_read_headers( + struct mspack_system *sys, struct mspack_file *fh, + struct mschmd_header *chm, int entire); +static int chmd_fast_find( + struct mschm_decompressor *base, struct mschmd_header *chm, + const char *filename, struct mschmd_file *f_ptr, int f_size); +static unsigned char *read_chunk( + struct mschm_decompressor_p *self, struct mschmd_header *chm, + struct mspack_file *fh, unsigned int chunk); +static int search_chunk( + struct mschmd_header *chm, const unsigned char *chunk, const char *filename, + const unsigned char **result, const unsigned char **result_end); +static inline int compare( + const char *s1, const char *s2, int l1, int l2); +static int chmd_extract( + struct mschm_decompressor *base, struct mschmd_file *file, + const char *filename); +static int chmd_sys_write( + struct mspack_file *file, void *buffer, int bytes); +static int chmd_init_decomp( + struct mschm_decompressor_p *self, struct mschmd_file *file); +static int read_reset_table( + struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec, + unsigned int entry, off_t *length_ptr, off_t *offset_ptr); +static int read_spaninfo( + struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec, + off_t *length_ptr); +static int find_sys_file( + struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec, + struct mschmd_file **f_ptr, const char *name); +static unsigned char *read_sys_file( + struct mschm_decompressor_p *self, struct mschmd_file *file); +static int chmd_error( + struct mschm_decompressor *base); +static int read_off64( + off_t *var, unsigned char *mem, struct mspack_system *sys, + struct mspack_file *fh); + +/* filenames of the system files used for decompression. + * Content and ControlData are essential. + * ResetTable is preferred, but SpanInfo can be used if not available + */ +static const char *content_name = "::DataSpace/Storage/MSCompressed/Content"; +static const char *control_name = "::DataSpace/Storage/MSCompressed/ControlData"; +static const char *spaninfo_name = "::DataSpace/Storage/MSCompressed/SpanInfo"; +static const char *rtable_name = "::DataSpace/Storage/MSCompressed/Transform/" + "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable"; + +/*************************************** + * MSPACK_CREATE_CHM_DECOMPRESSOR + *************************************** + * constructor + */ +struct mschm_decompressor * + mspack_create_chm_decompressor(struct mspack_system *sys) +{ + struct mschm_decompressor_p *self = NULL; + + if (!sys) sys = mspack_default_system; + if (!mspack_valid_system(sys)) return NULL; + + if ((self = (struct mschm_decompressor_p *) sys->alloc(sys, sizeof(struct mschm_decompressor_p)))) { + self->base.open = &chmd_open; + self->base.close = &chmd_close; + self->base.extract = &chmd_extract; + self->base.last_error = &chmd_error; + self->base.fast_open = &chmd_fast_open; + self->base.fast_find = &chmd_fast_find; + self->system = sys; + self->error = MSPACK_ERR_OK; + self->d = NULL; + } + return (struct mschm_decompressor *) self; +} + +/*************************************** + * MSPACK_DESTROY_CAB_DECOMPRESSOR + *************************************** + * destructor + */ +void mspack_destroy_chm_decompressor(struct mschm_decompressor *base) { + struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; + if (self) { + struct mspack_system *sys = self->system; + if (self->d) { + if (self->d->infh) sys->close(self->d->infh); + if (self->d->state) lzxd_free(self->d->state); + sys->free(self->d); + } + sys->free(self); + } +} + +/*************************************** + * CHMD_OPEN + *************************************** + * opens a file and tries to read it as a CHM file. + * Calls chmd_real_open() with entire=1. + */ +static struct mschmd_header *chmd_open(struct mschm_decompressor *base, + const char *filename) +{ + return chmd_real_open(base, filename, 1); +} + +/*************************************** + * CHMD_FAST_OPEN + *************************************** + * opens a file and tries to read it as a CHM file, but does not read + * the file headers. Calls chmd_real_open() with entire=0 + */ +static struct mschmd_header *chmd_fast_open(struct mschm_decompressor *base, + const char *filename) +{ + return chmd_real_open(base, filename, 0); +} + +/*************************************** + * CHMD_REAL_OPEN + *************************************** + * the real implementation of chmd_open() and chmd_fast_open(). It simply + * passes the "entire" parameter to chmd_read_headers(), which will then + * either read all headers, or a bare mininum. + */ +static struct mschmd_header *chmd_real_open(struct mschm_decompressor *base, + const char *filename, int entire) +{ + struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; + struct mschmd_header *chm = NULL; + struct mspack_system *sys; + struct mspack_file *fh; + int error; + + if (!base) return NULL; + sys = self->system; + + if ((fh = sys->open(sys, filename, MSPACK_SYS_OPEN_READ))) { + if ((chm = (struct mschmd_header *) sys->alloc(sys, sizeof(struct mschmd_header)))) { + chm->filename = filename; + error = chmd_read_headers(sys, fh, chm, entire); + if (error) { + /* if the error is DATAFORMAT, and there are some results, return + * partial results with a warning, rather than nothing */ + if (error == MSPACK_ERR_DATAFORMAT && (chm->files || chm->sysfiles)) { + sys->message(fh, "WARNING; contents are corrupt"); + error = MSPACK_ERR_OK; + } + else { + chmd_close(base, chm); + chm = NULL; + } + } + self->error = error; + } + else { + self->error = MSPACK_ERR_NOMEMORY; + } + sys->close(fh); + } + else { + self->error = MSPACK_ERR_OPEN; + } + return chm; +} + +/*************************************** + * CHMD_CLOSE + *************************************** + * frees all memory associated with a given mschmd_header + */ +static void chmd_close(struct mschm_decompressor *base, + struct mschmd_header *chm) +{ + struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; + struct mschmd_file *fi, *nfi; + struct mspack_system *sys; + unsigned int i; + + if (!base) return; + sys = self->system; + + self->error = MSPACK_ERR_OK; + + /* free files */ + for (fi = chm->files; fi; fi = nfi) { + nfi = fi->next; + sys->free(fi); + } + for (fi = chm->sysfiles; fi; fi = nfi) { + nfi = fi->next; + sys->free(fi); + } + + /* if this CHM was being decompressed, free decompression state */ + if (self->d && (self->d->chm == chm)) { + if (self->d->infh) sys->close(self->d->infh); + if (self->d->state) lzxd_free(self->d->state); + sys->free(self->d); + self->d = NULL; + } + + /* if this CHM had a chunk cache, free it and contents */ + if (chm->chunk_cache) { + for (i = 0; i < chm->num_chunks; i++) sys->free(chm->chunk_cache[i]); + sys->free(chm->chunk_cache); + } + + sys->free(chm); +} + +/*************************************** + * CHMD_READ_HEADERS + *************************************** + * reads the basic CHM file headers. If the "entire" parameter is + * non-zero, all file entries will also be read. fills out a pre-existing + * mschmd_header structure, allocates memory for files as necessary + */ + +/* The GUIDs found in CHM headers */ +static const unsigned char guids[32] = { + /* {7C01FD10-7BAA-11D0-9E0C-00A0-C922-E6EC} */ + 0x10, 0xFD, 0x01, 0x7C, 0xAA, 0x7B, 0xD0, 0x11, + 0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC, + /* {7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC} */ + 0x11, 0xFD, 0x01, 0x7C, 0xAA, 0x7B, 0xD0, 0x11, + 0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC +}; + +/* reads an encoded integer into a variable; 7 bits of data per byte, + * the high bit is used to indicate that there is another byte */ +#define READ_ENCINT(var) do { \ + (var) = 0; \ + do { \ + if (p >= end) goto chunk_end; \ + (var) = ((var) << 7) | (*p & 0x7F); \ + } while (*p++ & 0x80); \ +} while (0) + +static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh, + struct mschmd_header *chm, int entire) +{ + unsigned int section, name_len, x, errors, num_chunks; + unsigned char buf[0x54], *chunk = NULL, *name, *p, *end; + struct mschmd_file *fi, *link = NULL; + off_t offset, length; + int num_entries; + + /* initialise pointers */ + chm->files = NULL; + chm->sysfiles = NULL; + chm->chunk_cache = NULL; + chm->sec0.base.chm = chm; + chm->sec0.base.id = 0; + chm->sec1.base.chm = chm; + chm->sec1.base.id = 1; + chm->sec1.content = NULL; + chm->sec1.control = NULL; + chm->sec1.spaninfo = NULL; + chm->sec1.rtable = NULL; + + /* read the first header */ + if (sys->read(fh, &buf[0], chmhead_SIZEOF) != chmhead_SIZEOF) { + return MSPACK_ERR_READ; + } + + /* check ITSF signature */ + if (EndGetI32(&buf[chmhead_Signature]) != 0x46535449) { + return MSPACK_ERR_SIGNATURE; + } + + /* check both header GUIDs */ + if (memcmp(&buf[chmhead_GUID1], &guids[0], 32L) != 0) { + D(("incorrect GUIDs")) + return MSPACK_ERR_SIGNATURE; + } + + chm->version = EndGetI32(&buf[chmhead_Version]); + chm->timestamp = EndGetM32(&buf[chmhead_Timestamp]); + chm->language = EndGetI32(&buf[chmhead_LanguageID]); + if (chm->version > 3) { + sys->message(fh, "WARNING; CHM version > 3"); + } + + /* read the header section table */ + if (sys->read(fh, &buf[0], chmhst3_SIZEOF) != chmhst3_SIZEOF) { + return MSPACK_ERR_READ; + } + + /* chmhst3_OffsetCS0 does not exist in version 1 or 2 CHM files. + * The offset will be corrected later, once HS1 is read. + */ + if (read_off64(&offset, &buf[chmhst_OffsetHS0], sys, fh) || + read_off64(&chm->dir_offset, &buf[chmhst_OffsetHS1], sys, fh) || + read_off64(&chm->sec0.offset, &buf[chmhst3_OffsetCS0], sys, fh)) + { + return MSPACK_ERR_DATAFORMAT; + } + + /* seek to header section 0 */ + if (sys->seek(fh, offset, MSPACK_SYS_SEEK_START)) { + return MSPACK_ERR_SEEK; + } + + /* read header section 0 */ + if (sys->read(fh, &buf[0], chmhs0_SIZEOF) != chmhs0_SIZEOF) { + return MSPACK_ERR_READ; + } + if (read_off64(&chm->length, &buf[chmhs0_FileLen], sys, fh)) { + return MSPACK_ERR_DATAFORMAT; + } + + /* seek to header section 1 */ + if (sys->seek(fh, chm->dir_offset, MSPACK_SYS_SEEK_START)) { + return MSPACK_ERR_SEEK; + } + + /* read header section 1 */ + if (sys->read(fh, &buf[0], chmhs1_SIZEOF) != chmhs1_SIZEOF) { + return MSPACK_ERR_READ; + } + + chm->dir_offset = sys->tell(fh); + chm->chunk_size = EndGetI32(&buf[chmhs1_ChunkSize]); + chm->density = EndGetI32(&buf[chmhs1_Density]); + chm->depth = EndGetI32(&buf[chmhs1_Depth]); + chm->index_root = EndGetI32(&buf[chmhs1_IndexRoot]); + chm->num_chunks = EndGetI32(&buf[chmhs1_NumChunks]); + chm->first_pmgl = EndGetI32(&buf[chmhs1_FirstPMGL]); + chm->last_pmgl = EndGetI32(&buf[chmhs1_LastPMGL]); + + if (chm->version < 3) { + /* versions before 3 don't have chmhst3_OffsetCS0 */ + chm->sec0.offset = chm->dir_offset + (chm->chunk_size * chm->num_chunks); + } + + /* check if content offset or file size is wrong */ + if (chm->sec0.offset > chm->length) { + D(("content section begins after file has ended")) + return MSPACK_ERR_DATAFORMAT; + } + + /* ensure there are chunks and that chunk size is + * large enough for signature and num_entries */ + if (chm->chunk_size < (pmgl_Entries + 2)) { + D(("chunk size not large enough")) + return MSPACK_ERR_DATAFORMAT; + } + if (chm->num_chunks == 0) { + D(("no chunks")) + return MSPACK_ERR_DATAFORMAT; + } + + /* The chunk_cache data structure is not great; large values for num_chunks + * or num_chunks*chunk_size can exhaust all memory. Until a better chunk + * cache is implemented, put arbitrary limits on num_chunks and chunk size. + */ + if (chm->num_chunks > 100000) { + D(("more than 100,000 chunks")) + return MSPACK_ERR_DATAFORMAT; + } + if (chm->chunk_size > 8192) { + D(("chunk size over 8192 (get in touch if this is valid)")) + return MSPACK_ERR_DATAFORMAT; + } + if ((off_t)chm->chunk_size * (off_t)chm->num_chunks > chm->length) { + D(("chunks larger than entire file")) + return MSPACK_ERR_DATAFORMAT; + } + + /* common sense checks on header section 1 fields */ + if (chm->chunk_size != 4096) { + sys->message(fh, "WARNING; chunk size is not 4096"); + } + if (chm->first_pmgl != 0) { + sys->message(fh, "WARNING; first PMGL chunk is not zero"); + } + if (chm->first_pmgl > chm->last_pmgl) { + D(("first pmgl chunk is after last pmgl chunk")) + return MSPACK_ERR_DATAFORMAT; + } + if (chm->index_root != 0xFFFFFFFF && chm->index_root >= chm->num_chunks) { + D(("index_root outside valid range")) + return MSPACK_ERR_DATAFORMAT; + } + + /* if we are doing a quick read, stop here! */ + if (!entire) { + return MSPACK_ERR_OK; + } + + /* seek to the first PMGL chunk, and reduce the number of chunks to read */ + if ((x = chm->first_pmgl) != 0) { + if (sys->seek(fh,(off_t) (x * chm->chunk_size), MSPACK_SYS_SEEK_CUR)) { + return MSPACK_ERR_SEEK; + } + } + num_chunks = chm->last_pmgl - x + 1; + + if (!(chunk = (unsigned char *) sys->alloc(sys, (size_t)chm->chunk_size))) { + return MSPACK_ERR_NOMEMORY; + } + + /* read and process all chunks from FirstPMGL to LastPMGL */ + errors = 0; + while (num_chunks--) { + /* read next chunk */ + if (sys->read(fh, chunk, (int)chm->chunk_size) != (int)chm->chunk_size) { + sys->free(chunk); + return MSPACK_ERR_READ; + } + + /* process only directory (PMGL) chunks */ + if (EndGetI32(&chunk[pmgl_Signature]) != 0x4C474D50) continue; + + if (EndGetI32(&chunk[pmgl_QuickRefSize]) < 2) { + sys->message(fh, "WARNING; PMGL quickref area is too small"); + } + if (EndGetI32(&chunk[pmgl_QuickRefSize]) > + (chm->chunk_size - pmgl_Entries)) + { + sys->message(fh, "WARNING; PMGL quickref area is too large"); + } + + p = &chunk[pmgl_Entries]; + end = &chunk[chm->chunk_size - 2]; + num_entries = EndGetI16(end); + + while (num_entries--) { + READ_ENCINT(name_len); + if (name_len > (unsigned int) (end - p)) goto chunk_end; + name = p; p += name_len; + READ_ENCINT(section); + READ_ENCINT(offset); + READ_ENCINT(length); + + /* ignore blank or one-char (e.g. "/") filenames we'd return as blank */ + if (name_len < 2 || !name[0] || !name[1]) continue; + + /* empty files and directory names are stored as a file entry at + * offset 0 with length 0. We want to keep empty files, but not + * directory names, which end with a "/" */ + if ((offset == 0) && (length == 0)) { + if ((name_len > 0) && (name[name_len-1] == '/')) continue; + } + + if (section > 1) { + sys->message(fh, "invalid section number '%u'.", section); + continue; + } + + if (!(fi = (struct mschmd_file *) sys->alloc(sys, sizeof(struct mschmd_file) + name_len + 1))) { + sys->free(chunk); + return MSPACK_ERR_NOMEMORY; + } + + fi->next = NULL; + fi->filename = (char *) &fi[1]; + fi->section = ((section == 0) ? (struct mschmd_section *) (&chm->sec0) + : (struct mschmd_section *) (&chm->sec1)); + fi->offset = offset; + fi->length = length; + sys->copy(name, fi->filename, (size_t) name_len); + fi->filename[name_len] = '\0'; + + if (name[0] == ':' && name[1] == ':') { + /* system file */ + if (name_len == 40 && memcmp(name, content_name, 40) == 0) { + chm->sec1.content = fi; + } + else if (name_len == 44 && memcmp(name, control_name, 44) == 0) { + chm->sec1.control = fi; + } + else if (name_len == 41 && memcmp(name, spaninfo_name, 41) == 0) { + chm->sec1.spaninfo = fi; + } + else if (name_len == 105 && memcmp(name, rtable_name, 105) == 0) { + chm->sec1.rtable = fi; + } + fi->next = chm->sysfiles; + chm->sysfiles = fi; + } + else { + /* normal file */ + if (link) link->next = fi; else chm->files = fi; + link = fi; + } + } + + /* this is reached either when num_entries runs out, or if + * reading data from the chunk reached a premature end of chunk */ + chunk_end: + if (num_entries >= 0) { + D(("chunk ended before all entries could be read")) + errors++; + } + + } + sys->free(chunk); + return (errors > 0) ? MSPACK_ERR_DATAFORMAT : MSPACK_ERR_OK; +} + +/*************************************** + * CHMD_FAST_FIND + *************************************** + * uses PMGI index chunks and quickref data to quickly locate a file + * directly from the on-disk index. + * + * TODO: protect against infinite loops in chunks (where pgml_NextChunk + * or a PMGI index entry point to an already visited chunk) + */ +static int chmd_fast_find(struct mschm_decompressor *base, + struct mschmd_header *chm, const char *filename, + struct mschmd_file *f_ptr, int f_size) +{ + struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; + struct mspack_system *sys; + struct mspack_file *fh; + /* p and end are initialised to prevent MSVC warning about "potentially" + * uninitialised usage. This is provably untrue, but MS won't fix: + * https://developercommunity.visualstudio.com/content/problem/363489/c4701-false-positive-warning.html */ + const unsigned char *chunk, *p = NULL, *end = NULL; + int err = MSPACK_ERR_OK, result = -1; + unsigned int n, sec; + + if (!self || !chm || !f_ptr || (f_size != sizeof(struct mschmd_file))) { + return MSPACK_ERR_ARGS; + } + sys = self->system; + + /* clear the results structure */ + memset(f_ptr, 0, f_size); + + if (!(fh = sys->open(sys, chm->filename, MSPACK_SYS_OPEN_READ))) { + return MSPACK_ERR_OPEN; + } + + /* go through PMGI chunk hierarchy to reach PMGL chunk */ + if (chm->index_root < chm->num_chunks) { + n = chm->index_root; + for (;;) { + if (!(chunk = read_chunk(self, chm, fh, n))) { + sys->close(fh); + return self->error; + } + + /* search PMGI/PMGL chunk. exit early if no entry found */ + if ((result = search_chunk(chm, chunk, filename, &p, &end)) <= 0) { + break; + } + + /* found result. loop around for next chunk if this is PMGI */ + if (chunk[3] == 0x4C) break; else READ_ENCINT(n); + } + } + else { + /* PMGL chunks only, search from first_pmgl to last_pmgl */ + for (n = chm->first_pmgl; n <= chm->last_pmgl; + n = EndGetI32(&chunk[pmgl_NextChunk])) + { + if (!(chunk = read_chunk(self, chm, fh, n))) { + err = self->error; + break; + } + + /* search PMGL chunk. exit if file found */ + if ((result = search_chunk(chm, chunk, filename, &p, &end)) > 0) { + break; + } + + /* stop simple infinite loops: can't visit the same chunk twice */ + if (n == EndGetI32(&chunk[pmgl_NextChunk])) { + break; + } + } + } + + /* if we found a file, read it */ + if (result > 0) { + READ_ENCINT(sec); + f_ptr->section = (sec == 0) ? (struct mschmd_section *) &chm->sec0 + : (struct mschmd_section *) &chm->sec1; + READ_ENCINT(f_ptr->offset); + READ_ENCINT(f_ptr->length); + } + else if (result < 0) { + err = MSPACK_ERR_DATAFORMAT; + } + + sys->close(fh); + return self->error = err; + + chunk_end: + D(("read beyond end of chunk entries")) + sys->close(fh); + return self->error = MSPACK_ERR_DATAFORMAT; +} + +/* reads the given chunk into memory, storing it in a chunk cache + * so it doesn't need to be read from disk more than once + */ +static unsigned char *read_chunk(struct mschm_decompressor_p *self, + struct mschmd_header *chm, + struct mspack_file *fh, + unsigned int chunk_num) +{ + struct mspack_system *sys = self->system; + unsigned char *buf; + + /* check arguments - most are already checked by chmd_fast_find */ + if (chunk_num >= chm->num_chunks) return NULL; + + /* ensure chunk cache is available */ + if (!chm->chunk_cache) { + size_t size = sizeof(unsigned char *) * chm->num_chunks; + if (!(chm->chunk_cache = (unsigned char **) sys->alloc(sys, size))) { + self->error = MSPACK_ERR_NOMEMORY; + return NULL; + } + memset(chm->chunk_cache, 0, size); + } + + /* try to answer out of chunk cache */ + if (chm->chunk_cache[chunk_num]) return chm->chunk_cache[chunk_num]; + + /* need to read chunk - allocate memory for it */ + if (!(buf = (unsigned char *) sys->alloc(sys, chm->chunk_size))) { + self->error = MSPACK_ERR_NOMEMORY; + return NULL; + } + + /* seek to block and read it */ + if (sys->seek(fh, (off_t) (chm->dir_offset + (chunk_num * chm->chunk_size)), + MSPACK_SYS_SEEK_START)) + { + self->error = MSPACK_ERR_SEEK; + sys->free(buf); + return NULL; + } + if (sys->read(fh, buf, (int)chm->chunk_size) != (int)chm->chunk_size) { + self->error = MSPACK_ERR_READ; + sys->free(buf); + return NULL; + } + + /* check the signature. Is is PMGL or PMGI? */ + if (!((buf[0] == 0x50) && (buf[1] == 0x4D) && (buf[2] == 0x47) && + ((buf[3] == 0x4C) || (buf[3] == 0x49)))) + { + self->error = MSPACK_ERR_SEEK; + sys->free(buf); + return NULL; + } + + /* all OK. Store chunk in cache and return it */ + return chm->chunk_cache[chunk_num] = buf; +} + +/* searches a PMGI/PMGL chunk for a given filename entry. Returns -1 on + * data format error, 0 if entry definitely not found, 1 if entry + * found. In the latter case, *result and *result_end are set pointing + * to that entry's data (either the "next chunk" ENCINT for a PMGI or + * the section, offset and length ENCINTs for a PMGL). + * + * In the case of PMGL chunks, the entry has definitely been + * found. In the case of PMGI chunks, the entry which points to the + * chunk that may eventually contain that entry has been found. + */ +static int search_chunk(struct mschmd_header *chm, + const unsigned char *chunk, + const char *filename, + const unsigned char **result, + const unsigned char **result_end) +{ + const unsigned char *start, *end, *p; + unsigned int qr_size, num_entries, qr_entries, qr_density, name_len; + unsigned int L, R, M, fname_len, entries_off, is_pmgl; + int cmp; + + fname_len = strlen(filename); + + /* PMGL chunk or PMGI chunk? (note: read_chunk() has already + * checked the rest of the characters in the chunk signature) */ + if (chunk[3] == 0x4C) { + is_pmgl = 1; + entries_off = pmgl_Entries; + } + else { + is_pmgl = 0; + entries_off = pmgi_Entries; + } + + /* Step 1: binary search first filename of each QR entry + * - target filename == entry + * found file + * - target filename < all entries + * file not found + * - target filename > all entries + * proceed to step 2 using final entry + * - target filename between two searched entries + * proceed to step 2 + */ + qr_size = EndGetI32(&chunk[pmgl_QuickRefSize]); + start = &chunk[chm->chunk_size - 2]; + end = &chunk[chm->chunk_size - qr_size]; + num_entries = EndGetI16(start); + qr_density = 1 + (1 << chm->density); + qr_entries = (num_entries + qr_density-1) / qr_density; + + if (num_entries == 0) { + D(("chunk has no entries")) + return -1; + } + + if (qr_size > chm->chunk_size) { + D(("quickref size > chunk size")) + return -1; + } + + *result_end = end; + + if (((int)qr_entries * 2) > (start - end)) { + D(("WARNING; more quickrefs than quickref space")) + qr_entries = 0; /* but we can live with it */ + } + + if (qr_entries > 0) { + L = 0; + R = qr_entries - 1; + do { + /* pick new midpoint */ + M = (L + R) >> 1; + + /* compare filename with entry QR points to */ + p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)]; + READ_ENCINT(name_len); + if (name_len > (unsigned int) (end - p)) goto chunk_end; + cmp = compare(filename, (char *)p, fname_len, name_len); + + if (cmp == 0) break; + else if (cmp < 0) { if (M) R = M - 1; else return 0; } + else if (cmp > 0) L = M + 1; + } while (L <= R); + M = (L + R) >> 1; + + if (cmp == 0) { + /* exact match! */ + p += name_len; + *result = p; + return 1; + } + + /* otherwise, read the group of entries for QR entry M */ + p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)]; + num_entries -= (M * qr_density); + if (num_entries > qr_density) num_entries = qr_density; + } + else { + p = &chunk[entries_off]; + } + + /* Step 2: linear search through the set of entries reached in step 1. + * - filename == any entry + * found entry + * - filename < all entries (PMGI) or any entry (PMGL) + * entry not found, stop now + * - filename > all entries + * entry not found (PMGL) / maybe found (PMGI) + * - + */ + *result = NULL; + while (num_entries-- > 0) { + READ_ENCINT(name_len); + if (name_len > (unsigned int) (end - p)) goto chunk_end; + cmp = compare(filename, (char *)p, fname_len, name_len); + p += name_len; + + if (cmp == 0) { + /* entry found */ + *result = p; + return 1; + } + + if (cmp < 0) { + /* entry not found (PMGL) / maybe found (PMGI) */ + break; + } + + /* read and ignore the rest of this entry */ + if (is_pmgl) { + READ_ENCINT(R); /* skip section */ + READ_ENCINT(R); /* skip offset */ + READ_ENCINT(R); /* skip length */ + } + else { + *result = p; /* store potential final result */ + READ_ENCINT(R); /* skip chunk number */ + } + } + + /* PMGL? not found. PMGI? maybe found */ + return (is_pmgl) ? 0 : (*result ? 1 : 0); + + chunk_end: + D(("reached end of chunk data while searching")) + return -1; +} + +#if HAVE_TOWLOWER +# include +# define TOLOWER(x) towlower(x) +#else +# include +# define TOLOWER(x) tolower(x) +#endif + +/* decodes a UTF-8 character from s[] into c. Will not read past e. + * doesn't test that extension bytes are %10xxxxxx. + * allows some overlong encodings. + */ +#define GET_UTF8_CHAR(s, e, c) do { \ + unsigned char x = *s++; \ + if (x < 0x80) c = x; \ + else if (x >= 0xC2 && x < 0xE0 && s < e) { \ + c = (x & 0x1F) << 6 | (*s++ & 0x3F); \ + } \ + else if (x >= 0xE0 && x < 0xF0 && s+1 < e) { \ + c = (x & 0x0F) << 12 | (s[0] & 0x3F) << 6 | (s[1] & 0x3F); \ + s += 2; \ + } \ + else if (x >= 0xF0 && x <= 0xF5 && s+2 < e) { \ + c = (x & 0x07) << 18 | (s[0] & 0x3F) << 12 | \ + (s[1] & 0x3F) << 6 | (s[2] & 0x3F); \ + if (c > 0x10FFFF) c = 0xFFFD; \ + s += 3; \ + } \ + else c = 0xFFFD; \ +} while (0) + +/* case-insensitively compares two UTF8 encoded strings. String length for + * both strings must be provided, null bytes are not terminators */ +static inline int compare(const char *s1, const char *s2, int l1, int l2) { + register const unsigned char *p1 = (const unsigned char *) s1; + register const unsigned char *p2 = (const unsigned char *) s2; + register const unsigned char *e1 = p1 + l1, *e2 = p2 + l2; + int c1, c2; + + while (p1 < e1 && p2 < e2) { + GET_UTF8_CHAR(p1, e1, c1); + GET_UTF8_CHAR(p2, e2, c2); + if (c1 == c2) continue; + c1 = TOLOWER(c1); + c2 = TOLOWER(c2); + if (c1 != c2) return c1 - c2; + } + return l1 - l2; +} + + +/*************************************** + * CHMD_EXTRACT + *************************************** + * extracts a file from a CHM helpfile + */ +static int chmd_extract(struct mschm_decompressor *base, + struct mschmd_file *file, const char *filename) +{ + struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; + struct mspack_system *sys; + struct mschmd_header *chm; + struct mspack_file *fh; + off_t bytes; + + if (!self) return MSPACK_ERR_ARGS; + if (!file || !file->section) return self->error = MSPACK_ERR_ARGS; + sys = self->system; + chm = file->section->chm; + + /* create decompression state if it doesn't exist */ + if (!self->d) { + self->d = (struct mschmd_decompress_state *) sys->alloc(sys, sizeof(struct mschmd_decompress_state)); + if (!self->d) return self->error = MSPACK_ERR_NOMEMORY; + self->d->chm = chm; + self->d->offset = 0; + self->d->state = NULL; + self->d->sys = *sys; + self->d->sys.write = &chmd_sys_write; + self->d->infh = NULL; + self->d->outfh = NULL; + } + + /* open input chm file if not open, or the open one is a different chm */ + if (!self->d->infh || (self->d->chm != chm)) { + if (self->d->infh) sys->close(self->d->infh); + if (self->d->state) lzxd_free(self->d->state); + self->d->chm = chm; + self->d->offset = 0; + self->d->state = NULL; + self->d->infh = sys->open(sys, chm->filename, MSPACK_SYS_OPEN_READ); + if (!self->d->infh) return self->error = MSPACK_ERR_OPEN; + } + + /* open file for output */ + if (!(fh = sys->open(sys, filename, MSPACK_SYS_OPEN_WRITE))) { + return self->error = MSPACK_ERR_OPEN; + } + + /* if file is empty, simply creating it is enough */ + if (!file->length) { + sys->close(fh); + return self->error = MSPACK_ERR_OK; + } + + self->error = MSPACK_ERR_OK; + + switch (file->section->id) { + case 0: /* Uncompressed section file */ + /* simple seek + copy */ + if (sys->seek(self->d->infh, file->section->chm->sec0.offset + + file->offset, MSPACK_SYS_SEEK_START)) + { + self->error = MSPACK_ERR_SEEK; + } + else { + unsigned char buf[512]; + off_t length = file->length; + while (length > 0) { + int run = sizeof(buf); + if ((off_t)run > length) run = (int)length; + if (sys->read(self->d->infh, &buf[0], run) != run) { + self->error = MSPACK_ERR_READ; + break; + } + if (sys->write(fh, &buf[0], run) != run) { + self->error = MSPACK_ERR_WRITE; + break; + } + length -= run; + } + } + break; + + case 1: /* MSCompressed section file */ + /* (re)initialise compression state if we it is not yet initialised, + * or we have advanced too far and have to backtrack + */ + if (!self->d->state || (file->offset < self->d->offset)) { + if (self->d->state) { + lzxd_free(self->d->state); + self->d->state = NULL; + } + if (chmd_init_decomp(self, file)) break; + } + + /* seek to input data */ + if (sys->seek(self->d->infh, self->d->inoffset, MSPACK_SYS_SEEK_START)) { + self->error = MSPACK_ERR_SEEK; + break; + } + + /* get to correct offset. */ + self->d->outfh = NULL; + if ((bytes = file->offset - self->d->offset)) { + self->error = lzxd_decompress(self->d->state, bytes); + } + + /* if getting to the correct offset was error free, unpack file */ + if (!self->error) { + self->d->outfh = fh; + self->error = lzxd_decompress(self->d->state, file->length); + } + + /* save offset in input source stream, in case there is a section 0 + * file between now and the next section 1 file extracted */ + self->d->inoffset = sys->tell(self->d->infh); + + /* if an LZX error occured, the LZX decompressor is now useless */ + if (self->error) { + if (self->d->state) lzxd_free(self->d->state); + self->d->state = NULL; + } + break; + } + + sys->close(fh); + return self->error; +} + +/*************************************** + * CHMD_SYS_WRITE + *************************************** + * chmd_sys_write is the internal writer function which the decompressor + * uses. If either writes data to disk (self->d->outfh) with the real + * sys->write() function, or does nothing with the data when + * self->d->outfh == NULL. advances self->d->offset. + */ +static int chmd_sys_write(struct mspack_file *file, void *buffer, int bytes) { + struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) file; + self->d->offset += bytes; + if (self->d->outfh) { + return self->system->write(self->d->outfh, buffer, bytes); + } + return bytes; +} + +/*************************************** + * CHMD_INIT_DECOMP + *************************************** + * Initialises the LZX decompressor to decompress the compressed stream, + * from the nearest reset offset and length that is needed for the given + * file. + */ +static int chmd_init_decomp(struct mschm_decompressor_p *self, + struct mschmd_file *file) +{ + int window_size, window_bits, reset_interval, entry, err; + struct mspack_system *sys = self->system; + struct mschmd_sec_mscompressed *sec; + unsigned char *data; + off_t length, offset; + + sec = (struct mschmd_sec_mscompressed *) file->section; + + /* ensure we have a mscompressed content section */ + err = find_sys_file(self, sec, &sec->content, content_name); + if (err) return self->error = err; + + /* ensure we have a ControlData file */ + err = find_sys_file(self, sec, &sec->control, control_name); + if (err) return self->error = err; + + /* read ControlData */ + if (sec->control->length < lzxcd_SIZEOF) { + D(("ControlData file is too short")) + return self->error = MSPACK_ERR_DATAFORMAT; + } + if (!(data = read_sys_file(self, sec->control))) { + D(("can't read mscompressed control data file")) + return self->error; + } + + /* check LZXC signature */ + if (EndGetI32(&data[lzxcd_Signature]) != 0x43585A4C) { + sys->free(data); + return self->error = MSPACK_ERR_SIGNATURE; + } + + /* read reset_interval and window_size and validate version number */ + switch (EndGetI32(&data[lzxcd_Version])) { + case 1: + reset_interval = EndGetI32(&data[lzxcd_ResetInterval]); + window_size = EndGetI32(&data[lzxcd_WindowSize]); + break; + case 2: + reset_interval = EndGetI32(&data[lzxcd_ResetInterval]) * LZX_FRAME_SIZE; + window_size = EndGetI32(&data[lzxcd_WindowSize]) * LZX_FRAME_SIZE; + break; + default: + D(("bad controldata version")) + sys->free(data); + return self->error = MSPACK_ERR_DATAFORMAT; + } + + /* free ControlData */ + sys->free(data); + + /* find window_bits from window_size */ + switch (window_size) { + case 0x008000: window_bits = 15; break; + case 0x010000: window_bits = 16; break; + case 0x020000: window_bits = 17; break; + case 0x040000: window_bits = 18; break; + case 0x080000: window_bits = 19; break; + case 0x100000: window_bits = 20; break; + case 0x200000: window_bits = 21; break; + default: + D(("bad controldata window size")) + return self->error = MSPACK_ERR_DATAFORMAT; + } + + /* validate reset_interval */ + if (reset_interval == 0 || reset_interval % LZX_FRAME_SIZE) { + D(("bad controldata reset interval")) + return self->error = MSPACK_ERR_DATAFORMAT; + } + + /* which reset table entry would we like? */ + entry = file->offset / reset_interval; + /* convert from reset interval multiple (usually 64k) to 32k frames */ + entry *= reset_interval / LZX_FRAME_SIZE; + + /* read the reset table entry */ + if (read_reset_table(self, sec, entry, &length, &offset)) { + /* the uncompressed length given in the reset table is dishonest. + * the uncompressed data is always padded out from the given + * uncompressed length up to the next reset interval */ + length += reset_interval - 1; + length &= -reset_interval; + } + else { + /* if we can't read the reset table entry, just start from + * the beginning. Use spaninfo to get the uncompressed length */ + entry = 0; + offset = 0; + err = read_spaninfo(self, sec, &length); + } + if (err) return self->error = err; + + /* get offset of compressed data stream: + * = offset of uncompressed section from start of file + * + offset of compressed stream from start of uncompressed section + * + offset of chosen reset interval from start of compressed stream */ + self->d->inoffset = file->section->chm->sec0.offset + sec->content->offset + offset; + + /* set start offset and overall remaining stream length */ + self->d->offset = entry * LZX_FRAME_SIZE; + length -= self->d->offset; + + /* initialise LZX stream */ + self->d->state = lzxd_init(&self->d->sys, self->d->infh, + (struct mspack_file *) self, window_bits, + reset_interval / LZX_FRAME_SIZE, + 4096, length, 0); + if (!self->d->state) self->error = MSPACK_ERR_NOMEMORY; + return self->error; +} + +/*************************************** + * READ_RESET_TABLE + *************************************** + * Reads one entry out of the reset table. Also reads the uncompressed + * data length. Writes these to offset_ptr and length_ptr respectively. + * Returns non-zero for success, zero for failure. + */ +static int read_reset_table(struct mschm_decompressor_p *self, + struct mschmd_sec_mscompressed *sec, + unsigned int entry, + off_t *length_ptr, off_t *offset_ptr) +{ + struct mspack_system *sys = self->system; + unsigned char *data; + unsigned int pos, entrysize; + + /* do we have a ResetTable file? */ + int err = find_sys_file(self, sec, &sec->rtable, rtable_name); + if (err) return 0; + + /* read ResetTable file */ + if (sec->rtable->length < lzxrt_headerSIZEOF) { + D(("ResetTable file is too short")) + return 0; + } + if (!(data = read_sys_file(self, sec->rtable))) { + D(("can't read reset table")) + return 0; + } + + /* check sanity of reset table */ + if (EndGetI32(&data[lzxrt_FrameLen]) != LZX_FRAME_SIZE) { + D(("bad reset table frame length")) + sys->free(data); + return 0; + } + + /* get the uncompressed length of the LZX stream */ + if (read_off64(length_ptr, &data[lzxrt_UncompLen], sys, self->d->infh)) { + sys->free(data); + return 0; + } + + entrysize = EndGetI32(&data[lzxrt_EntrySize]); + pos = EndGetI32(&data[lzxrt_TableOffset]) + (entry * entrysize); + + /* ensure reset table entry for this offset exists */ + if (entry < EndGetI32(&data[lzxrt_NumEntries]) && + pos <= (sec->rtable->length - entrysize)) + { + switch (entrysize) { + case 4: + *offset_ptr = EndGetI32(&data[pos]); + err = 0; + break; + case 8: + err = read_off64(offset_ptr, &data[pos], sys, self->d->infh); + break; + default: + D(("reset table entry size neither 4 nor 8")) + err = 1; + break; + } + } + else { + D(("bad reset interval")) + err = 1; + } + + /* free the reset table */ + sys->free(data); + + /* return success */ + return (err == 0); +} + +/*************************************** + * READ_SPANINFO + *************************************** + * Reads the uncompressed data length from the spaninfo file. + * Returns zero for success or a non-zero error code for failure. + */ +static int read_spaninfo(struct mschm_decompressor_p *self, + struct mschmd_sec_mscompressed *sec, + off_t *length_ptr) +{ + struct mspack_system *sys = self->system; + unsigned char *data; + + /* find SpanInfo file */ + int err = find_sys_file(self, sec, &sec->spaninfo, spaninfo_name); + if (err) return MSPACK_ERR_DATAFORMAT; + + /* check it's large enough */ + if (sec->spaninfo->length != 8) { + D(("SpanInfo file is wrong size")) + return MSPACK_ERR_DATAFORMAT; + } + + /* read the SpanInfo file */ + if (!(data = read_sys_file(self, sec->spaninfo))) { + D(("can't read SpanInfo file")) + return self->error; + } + + /* get the uncompressed length of the LZX stream */ + err = read_off64(length_ptr, data, sys, self->d->infh); + sys->free(data); + if (err) return MSPACK_ERR_DATAFORMAT; + + if (*length_ptr <= 0) { + D(("output length is invalid")) + return MSPACK_ERR_DATAFORMAT; + } + + return MSPACK_ERR_OK; +} + +/*************************************** + * FIND_SYS_FILE + *************************************** + * Uses chmd_fast_find to locate a system file, and fills out that system + * file's entry and links it into the list of system files. Returns zero + * for success, non-zero for both failure and the file not existing. + */ +static int find_sys_file(struct mschm_decompressor_p *self, + struct mschmd_sec_mscompressed *sec, + struct mschmd_file **f_ptr, const char *name) +{ + struct mspack_system *sys = self->system; + struct mschmd_file result; + + /* already loaded */ + if (*f_ptr) return MSPACK_ERR_OK; + + /* try using fast_find to find the file - return DATAFORMAT error if + * it fails, or successfully doesn't find the file */ + if (chmd_fast_find((struct mschm_decompressor *) self, sec->base.chm, + name, &result, (int)sizeof(result)) || !result.section) + { + return MSPACK_ERR_DATAFORMAT; + } + + if (!(*f_ptr = (struct mschmd_file *) sys->alloc(sys, sizeof(result)))) { + return MSPACK_ERR_NOMEMORY; + } + + /* copy result */ + *(*f_ptr) = result; + (*f_ptr)->filename = (char *) name; + + /* link file into sysfiles list */ + (*f_ptr)->next = sec->base.chm->sysfiles; + sec->base.chm->sysfiles = *f_ptr; + return MSPACK_ERR_OK; +} + +/*************************************** + * READ_SYS_FILE + *************************************** + * Allocates memory for a section 0 (uncompressed) file and reads it into + * memory. + */ +static unsigned char *read_sys_file(struct mschm_decompressor_p *self, + struct mschmd_file *file) +{ + struct mspack_system *sys = self->system; + unsigned char *data = NULL; + int len; + + if (!file || !file->section || (file->section->id != 0)) { + self->error = MSPACK_ERR_DATAFORMAT; + return NULL; + } + + len = (int) file->length; + + if (!(data = (unsigned char *) sys->alloc(sys, (size_t) len))) { + self->error = MSPACK_ERR_NOMEMORY; + return NULL; + } + if (sys->seek(self->d->infh, file->section->chm->sec0.offset + + file->offset, MSPACK_SYS_SEEK_START)) + { + self->error = MSPACK_ERR_SEEK; + sys->free(data); + return NULL; + } + if (sys->read(self->d->infh, data, len) != len) { + self->error = MSPACK_ERR_READ; + sys->free(data); + return NULL; + } + return data; +} + +/*************************************** + * CHMD_ERROR + *************************************** + * returns the last error that occurred + */ +static int chmd_error(struct mschm_decompressor *base) { + struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; + return (self) ? self->error : MSPACK_ERR_ARGS; +} + +/*************************************** + * READ_OFF64 + *************************************** + * Reads a 64-bit signed integer from memory in Intel byte order. + * If running on a system with a 64-bit off_t, this is simply done. + * If running on a system with a 32-bit off_t, offsets up to 0x7FFFFFFF + * are accepted, offsets beyond that cause an error message. + */ +static int read_off64(off_t *var, unsigned char *mem, + struct mspack_system *sys, struct mspack_file *fh) +{ +#if LARGEFILE_SUPPORT + *var = EndGetI64(mem); +#else + *var = EndGetI32(mem); + if ((*var & 0x80000000) || EndGetI32(mem+4)) { + sys->message(fh, (char *)largefile_msg); + return 1; + } +#endif + return 0; +} -- cgit v1.2.3