diff options
Diffstat (limited to 'utils/rbutilqt/mspack/lzxd.c')
-rw-r--r-- | utils/rbutilqt/mspack/lzxd.c | 905 |
1 files changed, 905 insertions, 0 deletions
diff --git a/utils/rbutilqt/mspack/lzxd.c b/utils/rbutilqt/mspack/lzxd.c new file mode 100644 index 0000000000..88cfd90c2a --- /dev/null +++ b/utils/rbutilqt/mspack/lzxd.c | |||
@@ -0,0 +1,905 @@ | |||
1 | /* This file is part of libmspack. | ||
2 | * (C) 2003-2013 Stuart Caie. | ||
3 | * | ||
4 | * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted | ||
5 | * by Microsoft Corporation. | ||
6 | * | ||
7 | * libmspack is free software; you can redistribute it and/or modify it under | ||
8 | * the terms of the GNU Lesser General Public License (LGPL) version 2.1 | ||
9 | * | ||
10 | * For further details, see the file COPYING.LIB distributed with libmspack | ||
11 | */ | ||
12 | |||
13 | /* LZX decompression implementation */ | ||
14 | |||
15 | #include "system-mspack.h" | ||
16 | #include "lzx.h" | ||
17 | |||
18 | /* Microsoft's LZX document (in cab-sdk.exe) and their implementation | ||
19 | * of the com.ms.util.cab Java package do not concur. | ||
20 | * | ||
21 | * In the LZX document, there is a table showing the correlation between | ||
22 | * window size and the number of position slots. It states that the 1MB | ||
23 | * window = 40 slots and the 2MB window = 42 slots. In the implementation, | ||
24 | * 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the | ||
25 | * first slot whose position base is equal to or more than the required | ||
26 | * window size'. This would explain why other tables in the document refer | ||
27 | * to 50 slots rather than 42. | ||
28 | * | ||
29 | * The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode | ||
30 | * is not defined in the specification. | ||
31 | * | ||
32 | * The LZX document does not state the uncompressed block has an | ||
33 | * uncompressed length field. Where does this length field come from, so | ||
34 | * we can know how large the block is? The implementation has it as the 24 | ||
35 | * bits following after the 3 blocktype bits, before the alignment | ||
36 | * padding. | ||
37 | * | ||
38 | * The LZX document states that aligned offset blocks have their aligned | ||
39 | * offset huffman tree AFTER the main and length trees. The implementation | ||
40 | * suggests that the aligned offset tree is BEFORE the main and length | ||
41 | * trees. | ||
42 | * | ||
43 | * The LZX document decoding algorithm states that, in an aligned offset | ||
44 | * block, if an extra_bits value is 1, 2 or 3, then that number of bits | ||
45 | * should be read and the result added to the match offset. This is | ||
46 | * correct for 1 and 2, but not 3, where just a huffman symbol (using the | ||
47 | * aligned tree) should be read. | ||
48 | * | ||
49 | * Regarding the E8 preprocessing, the LZX document states 'No translation | ||
50 | * may be performed on the last 6 bytes of the input block'. This is | ||
51 | * correct. However, the pseudocode provided checks for the *E8 leader* | ||
52 | * up to the last 6 bytes. If the leader appears between -10 and -7 bytes | ||
53 | * from the end, this would cause the next four bytes to be modified, at | ||
54 | * least one of which would be in the last 6 bytes, which is not allowed | ||
55 | * according to the spec. | ||
56 | * | ||
57 | * The specification states that the huffman trees must always contain at | ||
58 | * least one element. However, many CAB files contain blocks where the | ||
59 | * length tree is completely empty (because there are no matches), and | ||
60 | * this is expected to succeed. | ||
61 | * | ||
62 | * The errors in LZX documentation appear have been corrected in the | ||
63 | * new documentation for the LZX DELTA format. | ||
64 | * | ||
65 | * http://msdn.microsoft.com/en-us/library/cc483133.aspx | ||
66 | * | ||
67 | * However, this is a different format, an extension of regular LZX. | ||
68 | * I have noticed the following differences, there may be more: | ||
69 | * | ||
70 | * The maximum window size has increased from 2MB to 32MB. This also | ||
71 | * increases the maximum number of position slots, etc. | ||
72 | * | ||
73 | * If the match length is 257 (the maximum possible), this signals | ||
74 | * a further length decoding step, that allows for matches up to | ||
75 | * 33024 bytes long. | ||
76 | * | ||
77 | * The format now allows for "reference data", supplied by the caller. | ||
78 | * If match offsets go further back than the number of bytes | ||
79 | * decompressed so far, that is them accessing the reference data. | ||
80 | */ | ||
81 | |||
82 | /* import bit-reading macros and code */ | ||
83 | #define BITS_TYPE struct lzxd_stream | ||
84 | #define BITS_VAR lzx | ||
85 | #define BITS_ORDER_MSB | ||
86 | #define READ_BYTES do { \ | ||
87 | unsigned char b0, b1; \ | ||
88 | READ_IF_NEEDED; b0 = *i_ptr++; \ | ||
89 | READ_IF_NEEDED; b1 = *i_ptr++; \ | ||
90 | INJECT_BITS((b1 << 8) | b0, 16); \ | ||
91 | } while (0) | ||
92 | #include "readbits.h" | ||
93 | |||
94 | /* import huffman-reading macros and code */ | ||
95 | #define TABLEBITS(tbl) LZX_##tbl##_TABLEBITS | ||
96 | #define MAXSYMBOLS(tbl) LZX_##tbl##_MAXSYMBOLS | ||
97 | #define HUFF_TABLE(tbl,idx) lzx->tbl##_table[idx] | ||
98 | #define HUFF_LEN(tbl,idx) lzx->tbl##_len[idx] | ||
99 | #define HUFF_ERROR return lzx->error = MSPACK_ERR_DECRUNCH | ||
100 | #include "readhuff.h" | ||
101 | |||
102 | /* BUILD_TABLE(tbl) builds a huffman lookup table from code lengths */ | ||
103 | #define BUILD_TABLE(tbl) \ | ||
104 | if (make_decode_table(MAXSYMBOLS(tbl), TABLEBITS(tbl), \ | ||
105 | &HUFF_LEN(tbl,0), &HUFF_TABLE(tbl,0))) \ | ||
106 | { \ | ||
107 | D(("failed to build %s table", #tbl)) \ | ||
108 | return lzx->error = MSPACK_ERR_DECRUNCH; \ | ||
109 | } | ||
110 | |||
111 | #define BUILD_TABLE_MAYBE_EMPTY(tbl) do { \ | ||
112 | lzx->tbl##_empty = 0; \ | ||
113 | if (make_decode_table(MAXSYMBOLS(tbl), TABLEBITS(tbl), \ | ||
114 | &HUFF_LEN(tbl,0), &HUFF_TABLE(tbl,0))) \ | ||
115 | { \ | ||
116 | for (i = 0; i < MAXSYMBOLS(tbl); i++) { \ | ||
117 | if (HUFF_LEN(tbl, i) > 0) { \ | ||
118 | D(("failed to build %s table", #tbl)) \ | ||
119 | return lzx->error = MSPACK_ERR_DECRUNCH; \ | ||
120 | } \ | ||
121 | } \ | ||
122 | /* empty tree - allow it, but don't decode symbols with it */ \ | ||
123 | lzx->tbl##_empty = 1; \ | ||
124 | } \ | ||
125 | } while (0) | ||
126 | |||
127 | /* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols | ||
128 | * first to last in the given table. The code lengths are stored in their | ||
129 | * own special LZX way. | ||
130 | */ | ||
131 | #define READ_LENGTHS(tbl, first, last) do { \ | ||
132 | STORE_BITS; \ | ||
133 | if (lzxd_read_lens(lzx, &HUFF_LEN(tbl, 0), (first), \ | ||
134 | (unsigned int)(last))) return lzx->error; \ | ||
135 | RESTORE_BITS; \ | ||
136 | } while (0) | ||
137 | |||
138 | static int lzxd_read_lens(struct lzxd_stream *lzx, unsigned char *lens, | ||
139 | unsigned int first, unsigned int last) | ||
140 | { | ||
141 | /* bit buffer and huffman symbol decode variables */ | ||
142 | register unsigned int bit_buffer; | ||
143 | register int bits_left, i; | ||
144 | register unsigned short sym; | ||
145 | unsigned char *i_ptr, *i_end; | ||
146 | |||
147 | unsigned int x, y; | ||
148 | int z; | ||
149 | |||
150 | RESTORE_BITS; | ||
151 | |||
152 | /* read lengths for pretree (20 symbols, lengths stored in fixed 4 bits) */ | ||
153 | for (x = 0; x < 20; x++) { | ||
154 | READ_BITS(y, 4); | ||
155 | lzx->PRETREE_len[x] = y; | ||
156 | } | ||
157 | BUILD_TABLE(PRETREE); | ||
158 | |||
159 | for (x = first; x < last; ) { | ||
160 | READ_HUFFSYM(PRETREE, z); | ||
161 | if (z == 17) { | ||
162 | /* code = 17, run of ([read 4 bits]+4) zeros */ | ||
163 | READ_BITS(y, 4); y += 4; | ||
164 | while (y--) lens[x++] = 0; | ||
165 | } | ||
166 | else if (z == 18) { | ||
167 | /* code = 18, run of ([read 5 bits]+20) zeros */ | ||
168 | READ_BITS(y, 5); y += 20; | ||
169 | while (y--) lens[x++] = 0; | ||
170 | } | ||
171 | else if (z == 19) { | ||
172 | /* code = 19, run of ([read 1 bit]+4) [read huffman symbol] */ | ||
173 | READ_BITS(y, 1); y += 4; | ||
174 | READ_HUFFSYM(PRETREE, z); | ||
175 | z = lens[x] - z; if (z < 0) z += 17; | ||
176 | while (y--) lens[x++] = z; | ||
177 | } | ||
178 | else { | ||
179 | /* code = 0 to 16, delta current length entry */ | ||
180 | z = lens[x] - z; if (z < 0) z += 17; | ||
181 | lens[x++] = z; | ||
182 | } | ||
183 | } | ||
184 | |||
185 | STORE_BITS; | ||
186 | |||
187 | return MSPACK_ERR_OK; | ||
188 | } | ||
189 | |||
190 | /* LZX static data tables: | ||
191 | * | ||
192 | * LZX uses 'position slots' to represent match offsets. For every match, | ||
193 | * a small 'position slot' number and a small offset from that slot are | ||
194 | * encoded instead of one large offset. | ||
195 | * | ||
196 | * The number of slots is decided by how many are needed to encode the | ||
197 | * largest offset for a given window size. This is easy when the gap between | ||
198 | * slots is less than 128Kb, it's a linear relationship. But when extra_bits | ||
199 | * reaches its limit of 17 (because LZX can only ensure reading 17 bits of | ||
200 | * data at a time), we can only jump 128Kb at a time and have to start | ||
201 | * using more and more position slots as each window size doubles. | ||
202 | * | ||
203 | * position_base[] is an index to the position slot bases | ||
204 | * | ||
205 | * extra_bits[] states how many bits of offset-from-base data is needed. | ||
206 | * | ||
207 | * They are calculated as follows: | ||
208 | * extra_bits[i] = 0 where i < 4 | ||
209 | * extra_bits[i] = floor(i/2)-1 where i >= 4 && i < 36 | ||
210 | * extra_bits[i] = 17 where i >= 36 | ||
211 | * position_base[0] = 0 | ||
212 | * position_base[i] = position_base[i-1] + (1 << extra_bits[i-1]) | ||
213 | */ | ||
214 | static const unsigned int position_slots[11] = { | ||
215 | 30, 32, 34, 36, 38, 42, 50, 66, 98, 162, 290 | ||
216 | }; | ||
217 | static const unsigned char extra_bits[36] = { | ||
218 | 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, | ||
219 | 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16 | ||
220 | }; | ||
221 | static const unsigned int position_base[290] = { | ||
222 | 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, | ||
223 | 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, | ||
224 | 49152, 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360, | ||
225 | 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936, | ||
226 | 1835008, 1966080, 2097152, 2228224, 2359296, 2490368, 2621440, 2752512, | ||
227 | 2883584, 3014656, 3145728, 3276800, 3407872, 3538944, 3670016, 3801088, | ||
228 | 3932160, 4063232, 4194304, 4325376, 4456448, 4587520, 4718592, 4849664, | ||
229 | 4980736, 5111808, 5242880, 5373952, 5505024, 5636096, 5767168, 5898240, | ||
230 | 6029312, 6160384, 6291456, 6422528, 6553600, 6684672, 6815744, 6946816, | ||
231 | 7077888, 7208960, 7340032, 7471104, 7602176, 7733248, 7864320, 7995392, | ||
232 | 8126464, 8257536, 8388608, 8519680, 8650752, 8781824, 8912896, 9043968, | ||
233 | 9175040, 9306112, 9437184, 9568256, 9699328, 9830400, 9961472, 10092544, | ||
234 | 10223616, 10354688, 10485760, 10616832, 10747904, 10878976, 11010048, | ||
235 | 11141120, 11272192, 11403264, 11534336, 11665408, 11796480, 11927552, | ||
236 | 12058624, 12189696, 12320768, 12451840, 12582912, 12713984, 12845056, | ||
237 | 12976128, 13107200, 13238272, 13369344, 13500416, 13631488, 13762560, | ||
238 | 13893632, 14024704, 14155776, 14286848, 14417920, 14548992, 14680064, | ||
239 | 14811136, 14942208, 15073280, 15204352, 15335424, 15466496, 15597568, | ||
240 | 15728640, 15859712, 15990784, 16121856, 16252928, 16384000, 16515072, | ||
241 | 16646144, 16777216, 16908288, 17039360, 17170432, 17301504, 17432576, | ||
242 | 17563648, 17694720, 17825792, 17956864, 18087936, 18219008, 18350080, | ||
243 | 18481152, 18612224, 18743296, 18874368, 19005440, 19136512, 19267584, | ||
244 | 19398656, 19529728, 19660800, 19791872, 19922944, 20054016, 20185088, | ||
245 | 20316160, 20447232, 20578304, 20709376, 20840448, 20971520, 21102592, | ||
246 | 21233664, 21364736, 21495808, 21626880, 21757952, 21889024, 22020096, | ||
247 | 22151168, 22282240, 22413312, 22544384, 22675456, 22806528, 22937600, | ||
248 | 23068672, 23199744, 23330816, 23461888, 23592960, 23724032, 23855104, | ||
249 | 23986176, 24117248, 24248320, 24379392, 24510464, 24641536, 24772608, | ||
250 | 24903680, 25034752, 25165824, 25296896, 25427968, 25559040, 25690112, | ||
251 | 25821184, 25952256, 26083328, 26214400, 26345472, 26476544, 26607616, | ||
252 | 26738688, 26869760, 27000832, 27131904, 27262976, 27394048, 27525120, | ||
253 | 27656192, 27787264, 27918336, 28049408, 28180480, 28311552, 28442624, | ||
254 | 28573696, 28704768, 28835840, 28966912, 29097984, 29229056, 29360128, | ||
255 | 29491200, 29622272, 29753344, 29884416, 30015488, 30146560, 30277632, | ||
256 | 30408704, 30539776, 30670848, 30801920, 30932992, 31064064, 31195136, | ||
257 | 31326208, 31457280, 31588352, 31719424, 31850496, 31981568, 32112640, | ||
258 | 32243712, 32374784, 32505856, 32636928, 32768000, 32899072, 33030144, | ||
259 | 33161216, 33292288, 33423360 | ||
260 | }; | ||
261 | |||
262 | static void lzxd_reset_state(struct lzxd_stream *lzx) { | ||
263 | int i; | ||
264 | |||
265 | lzx->R0 = 1; | ||
266 | lzx->R1 = 1; | ||
267 | lzx->R2 = 1; | ||
268 | lzx->header_read = 0; | ||
269 | lzx->block_remaining = 0; | ||
270 | lzx->block_type = LZX_BLOCKTYPE_INVALID; | ||
271 | |||
272 | /* initialise tables to 0 (because deltas will be applied to them) */ | ||
273 | for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) lzx->MAINTREE_len[i] = 0; | ||
274 | for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++) lzx->LENGTH_len[i] = 0; | ||
275 | } | ||
276 | |||
277 | /*-------- main LZX code --------*/ | ||
278 | |||
279 | struct lzxd_stream *lzxd_init(struct mspack_system *system, | ||
280 | struct mspack_file *input, | ||
281 | struct mspack_file *output, | ||
282 | int window_bits, | ||
283 | int reset_interval, | ||
284 | int input_buffer_size, | ||
285 | off_t output_length, | ||
286 | char is_delta) | ||
287 | { | ||
288 | unsigned int window_size = 1 << window_bits; | ||
289 | struct lzxd_stream *lzx; | ||
290 | |||
291 | if (!system) return NULL; | ||
292 | |||
293 | /* LZX DELTA window sizes are between 2^17 (128KiB) and 2^25 (32MiB), | ||
294 | * regular LZX windows are between 2^15 (32KiB) and 2^21 (2MiB) | ||
295 | */ | ||
296 | if (is_delta) { | ||
297 | if (window_bits < 17 || window_bits > 25) return NULL; | ||
298 | } | ||
299 | else { | ||
300 | if (window_bits < 15 || window_bits > 21) return NULL; | ||
301 | } | ||
302 | |||
303 | if (reset_interval < 0 || output_length < 0) { | ||
304 | D(("reset interval or output length < 0")) | ||
305 | return NULL; | ||
306 | } | ||
307 | |||
308 | /* round up input buffer size to multiple of two */ | ||
309 | input_buffer_size = (input_buffer_size + 1) & -2; | ||
310 | if (input_buffer_size < 2) return NULL; | ||
311 | |||
312 | /* allocate decompression state */ | ||
313 | if (!(lzx = (struct lzxd_stream *) system->alloc(system, sizeof(struct lzxd_stream)))) { | ||
314 | return NULL; | ||
315 | } | ||
316 | |||
317 | /* allocate decompression window and input buffer */ | ||
318 | lzx->window = (unsigned char *) system->alloc(system, (size_t) window_size); | ||
319 | lzx->inbuf = (unsigned char *) system->alloc(system, (size_t) input_buffer_size); | ||
320 | if (!lzx->window || !lzx->inbuf) { | ||
321 | system->free(lzx->window); | ||
322 | system->free(lzx->inbuf); | ||
323 | system->free(lzx); | ||
324 | return NULL; | ||
325 | } | ||
326 | |||
327 | /* initialise decompression state */ | ||
328 | lzx->sys = system; | ||
329 | lzx->input = input; | ||
330 | lzx->output = output; | ||
331 | lzx->offset = 0; | ||
332 | lzx->length = output_length; | ||
333 | |||
334 | lzx->inbuf_size = input_buffer_size; | ||
335 | lzx->window_size = 1 << window_bits; | ||
336 | lzx->ref_data_size = 0; | ||
337 | lzx->window_posn = 0; | ||
338 | lzx->frame_posn = 0; | ||
339 | lzx->frame = 0; | ||
340 | lzx->reset_interval = reset_interval; | ||
341 | lzx->intel_filesize = 0; | ||
342 | lzx->intel_curpos = 0; | ||
343 | lzx->intel_started = 0; | ||
344 | lzx->error = MSPACK_ERR_OK; | ||
345 | lzx->num_offsets = position_slots[window_bits - 15] << 3; | ||
346 | lzx->is_delta = is_delta; | ||
347 | |||
348 | lzx->o_ptr = lzx->o_end = &lzx->e8_buf[0]; | ||
349 | lzxd_reset_state(lzx); | ||
350 | INIT_BITS; | ||
351 | return lzx; | ||
352 | } | ||
353 | |||
354 | int lzxd_set_reference_data(struct lzxd_stream *lzx, | ||
355 | struct mspack_system *system, | ||
356 | struct mspack_file *input, | ||
357 | unsigned int length) | ||
358 | { | ||
359 | if (!lzx) return MSPACK_ERR_ARGS; | ||
360 | |||
361 | if (!lzx->is_delta) { | ||
362 | D(("only LZX DELTA streams support reference data")) | ||
363 | return MSPACK_ERR_ARGS; | ||
364 | } | ||
365 | if (lzx->offset) { | ||
366 | D(("too late to set reference data after decoding starts")) | ||
367 | return MSPACK_ERR_ARGS; | ||
368 | } | ||
369 | if (length > lzx->window_size) { | ||
370 | D(("reference length (%u) is longer than the window", length)) | ||
371 | return MSPACK_ERR_ARGS; | ||
372 | } | ||
373 | if (length > 0 && (!system || !input)) { | ||
374 | D(("length > 0 but no system or input")) | ||
375 | return MSPACK_ERR_ARGS; | ||
376 | } | ||
377 | |||
378 | lzx->ref_data_size = length; | ||
379 | if (length > 0) { | ||
380 | /* copy reference data */ | ||
381 | unsigned char *pos = &lzx->window[lzx->window_size - length]; | ||
382 | int bytes = system->read(input, pos, length); | ||
383 | /* length can't be more than 2^25, so no signedness problem */ | ||
384 | if (bytes < (int)length) return MSPACK_ERR_READ; | ||
385 | } | ||
386 | lzx->ref_data_size = length; | ||
387 | return MSPACK_ERR_OK; | ||
388 | } | ||
389 | |||
390 | void lzxd_set_output_length(struct lzxd_stream *lzx, off_t out_bytes) { | ||
391 | if (lzx && out_bytes > 0) lzx->length = out_bytes; | ||
392 | } | ||
393 | |||
394 | int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { | ||
395 | /* bitstream and huffman reading variables */ | ||
396 | register unsigned int bit_buffer; | ||
397 | register int bits_left, i=0; | ||
398 | unsigned char *i_ptr, *i_end; | ||
399 | register unsigned short sym; | ||
400 | |||
401 | int match_length, length_footer, extra, verbatim_bits, bytes_todo; | ||
402 | int this_run, main_element, aligned_bits, j, warned = 0; | ||
403 | unsigned char *window, *runsrc, *rundest, buf[12]; | ||
404 | unsigned int frame_size=0, end_frame, match_offset, window_posn; | ||
405 | unsigned int R0, R1, R2; | ||
406 | |||
407 | /* easy answers */ | ||
408 | if (!lzx || (out_bytes < 0)) return MSPACK_ERR_ARGS; | ||
409 | if (lzx->error) return lzx->error; | ||
410 | |||
411 | /* flush out any stored-up bytes before we begin */ | ||
412 | i = lzx->o_end - lzx->o_ptr; | ||
413 | if ((off_t) i > out_bytes) i = (int) out_bytes; | ||
414 | if (i) { | ||
415 | if (lzx->sys->write(lzx->output, lzx->o_ptr, i) != i) { | ||
416 | return lzx->error = MSPACK_ERR_WRITE; | ||
417 | } | ||
418 | lzx->o_ptr += i; | ||
419 | lzx->offset += i; | ||
420 | out_bytes -= i; | ||
421 | } | ||
422 | if (out_bytes == 0) return MSPACK_ERR_OK; | ||
423 | |||
424 | /* restore local state */ | ||
425 | RESTORE_BITS; | ||
426 | window = lzx->window; | ||
427 | window_posn = lzx->window_posn; | ||
428 | R0 = lzx->R0; | ||
429 | R1 = lzx->R1; | ||
430 | R2 = lzx->R2; | ||
431 | |||
432 | end_frame = (unsigned int)((lzx->offset + out_bytes) / LZX_FRAME_SIZE) + 1; | ||
433 | |||
434 | while (lzx->frame < end_frame) { | ||
435 | /* have we reached the reset interval? (if there is one?) */ | ||
436 | if (lzx->reset_interval && ((lzx->frame % lzx->reset_interval) == 0)) { | ||
437 | if (lzx->block_remaining) { | ||
438 | /* this is a file format error, we can make a best effort to extract what we can */ | ||
439 | D(("%d bytes remaining at reset interval", lzx->block_remaining)) | ||
440 | if (!warned) { | ||
441 | lzx->sys->message(NULL, "WARNING; invalid reset interval detected during LZX decompression"); | ||
442 | warned++; | ||
443 | } | ||
444 | } | ||
445 | |||
446 | /* re-read the intel header and reset the huffman lengths */ | ||
447 | lzxd_reset_state(lzx); | ||
448 | R0 = lzx->R0; | ||
449 | R1 = lzx->R1; | ||
450 | R2 = lzx->R2; | ||
451 | } | ||
452 | |||
453 | /* LZX DELTA format has chunk_size, not present in LZX format */ | ||
454 | if (lzx->is_delta) { | ||
455 | ENSURE_BITS(16); | ||
456 | REMOVE_BITS(16); | ||
457 | } | ||
458 | |||
459 | /* read header if necessary */ | ||
460 | if (!lzx->header_read) { | ||
461 | /* read 1 bit. if bit=0, intel filesize = 0. | ||
462 | * if bit=1, read intel filesize (32 bits) */ | ||
463 | j = 0; READ_BITS(i, 1); if (i) { READ_BITS(i, 16); READ_BITS(j, 16); } | ||
464 | lzx->intel_filesize = (i << 16) | j; | ||
465 | lzx->header_read = 1; | ||
466 | } | ||
467 | |||
468 | /* calculate size of frame: all frames are 32k except the final frame | ||
469 | * which is 32kb or less. this can only be calculated when lzx->length | ||
470 | * has been filled in. */ | ||
471 | frame_size = LZX_FRAME_SIZE; | ||
472 | if (lzx->length && (lzx->length - lzx->offset) < (off_t)frame_size) { | ||
473 | frame_size = lzx->length - lzx->offset; | ||
474 | } | ||
475 | |||
476 | /* decode until one more frame is available */ | ||
477 | bytes_todo = lzx->frame_posn + frame_size - window_posn; | ||
478 | while (bytes_todo > 0) { | ||
479 | /* initialise new block, if one is needed */ | ||
480 | if (lzx->block_remaining == 0) { | ||
481 | /* realign if previous block was an odd-sized UNCOMPRESSED block */ | ||
482 | if ((lzx->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) && | ||
483 | (lzx->block_length & 1)) | ||
484 | { | ||
485 | READ_IF_NEEDED; | ||
486 | i_ptr++; | ||
487 | } | ||
488 | |||
489 | /* read block type (3 bits) and block length (24 bits) */ | ||
490 | READ_BITS(lzx->block_type, 3); | ||
491 | READ_BITS(i, 16); READ_BITS(j, 8); | ||
492 | lzx->block_remaining = lzx->block_length = (i << 8) | j; | ||
493 | /*D(("new block t%d len %u", lzx->block_type, lzx->block_length))*/ | ||
494 | |||
495 | /* read individual block headers */ | ||
496 | switch (lzx->block_type) { | ||
497 | case LZX_BLOCKTYPE_ALIGNED: | ||
498 | /* read lengths of and build aligned huffman decoding tree */ | ||
499 | for (i = 0; i < 8; i++) { READ_BITS(j, 3); lzx->ALIGNED_len[i] = j; } | ||
500 | BUILD_TABLE(ALIGNED); | ||
501 | /* rest of aligned header is same as verbatim */ /*@fallthrough@*/ | ||
502 | case LZX_BLOCKTYPE_VERBATIM: | ||
503 | /* read lengths of and build main huffman decoding tree */ | ||
504 | READ_LENGTHS(MAINTREE, 0, 256); | ||
505 | READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + lzx->num_offsets); | ||
506 | BUILD_TABLE(MAINTREE); | ||
507 | /* if the literal 0xE8 is anywhere in the block... */ | ||
508 | if (lzx->MAINTREE_len[0xE8] != 0) lzx->intel_started = 1; | ||
509 | /* read lengths of and build lengths huffman decoding tree */ | ||
510 | READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS); | ||
511 | BUILD_TABLE_MAYBE_EMPTY(LENGTH); | ||
512 | break; | ||
513 | |||
514 | case LZX_BLOCKTYPE_UNCOMPRESSED: | ||
515 | /* because we can't assume otherwise */ | ||
516 | lzx->intel_started = 1; | ||
517 | |||
518 | /* read 1-16 (not 0-15) bits to align to bytes */ | ||
519 | if (bits_left == 0) ENSURE_BITS(16); | ||
520 | bits_left = 0; bit_buffer = 0; | ||
521 | |||
522 | /* read 12 bytes of stored R0 / R1 / R2 values */ | ||
523 | for (rundest = &buf[0], i = 0; i < 12; i++) { | ||
524 | READ_IF_NEEDED; | ||
525 | *rundest++ = *i_ptr++; | ||
526 | } | ||
527 | R0 = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24); | ||
528 | R1 = buf[4] | (buf[5] << 8) | (buf[6] << 16) | (buf[7] << 24); | ||
529 | R2 = buf[8] | (buf[9] << 8) | (buf[10] << 16) | (buf[11] << 24); | ||
530 | break; | ||
531 | |||
532 | default: | ||
533 | D(("bad block type")) | ||
534 | return lzx->error = MSPACK_ERR_DECRUNCH; | ||
535 | } | ||
536 | } | ||
537 | |||
538 | /* decode more of the block: | ||
539 | * run = min(what's available, what's needed) */ | ||
540 | this_run = lzx->block_remaining; | ||
541 | if (this_run > bytes_todo) this_run = bytes_todo; | ||
542 | |||
543 | /* assume we decode exactly this_run bytes, for now */ | ||
544 | bytes_todo -= this_run; | ||
545 | lzx->block_remaining -= this_run; | ||
546 | |||
547 | /* decode at least this_run bytes */ | ||
548 | switch (lzx->block_type) { | ||
549 | case LZX_BLOCKTYPE_VERBATIM: | ||
550 | while (this_run > 0) { | ||
551 | READ_HUFFSYM(MAINTREE, main_element); | ||
552 | if (main_element < LZX_NUM_CHARS) { | ||
553 | /* literal: 0 to LZX_NUM_CHARS-1 */ | ||
554 | window[window_posn++] = main_element; | ||
555 | this_run--; | ||
556 | } | ||
557 | else { | ||
558 | /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ | ||
559 | main_element -= LZX_NUM_CHARS; | ||
560 | |||
561 | /* get match length */ | ||
562 | match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; | ||
563 | if (match_length == LZX_NUM_PRIMARY_LENGTHS) { | ||
564 | if (lzx->LENGTH_empty) { | ||
565 | D(("LENGTH symbol needed but tree is empty")) | ||
566 | return lzx->error = MSPACK_ERR_DECRUNCH; | ||
567 | } | ||
568 | READ_HUFFSYM(LENGTH, length_footer); | ||
569 | match_length += length_footer; | ||
570 | } | ||
571 | match_length += LZX_MIN_MATCH; | ||
572 | |||
573 | /* get match offset */ | ||
574 | switch ((match_offset = (main_element >> 3))) { | ||
575 | case 0: match_offset = R0; break; | ||
576 | case 1: match_offset = R1; R1=R0; R0 = match_offset; break; | ||
577 | case 2: match_offset = R2; R2=R0; R0 = match_offset; break; | ||
578 | case 3: match_offset = 1; R2=R1; R1=R0; R0 = match_offset; break; | ||
579 | default: | ||
580 | extra = (match_offset >= 36) ? 17 : extra_bits[match_offset]; | ||
581 | READ_BITS(verbatim_bits, extra); | ||
582 | match_offset = position_base[match_offset] - 2 + verbatim_bits; | ||
583 | R2 = R1; R1 = R0; R0 = match_offset; | ||
584 | } | ||
585 | |||
586 | /* LZX DELTA uses max match length to signal even longer match */ | ||
587 | if (match_length == LZX_MAX_MATCH && lzx->is_delta) { | ||
588 | int extra_len = 0; | ||
589 | ENSURE_BITS(3); /* 4 entry huffman tree */ | ||
590 | if (PEEK_BITS(1) == 0) { | ||
591 | REMOVE_BITS(1); /* '0' -> 8 extra length bits */ | ||
592 | READ_BITS(extra_len, 8); | ||
593 | } | ||
594 | else if (PEEK_BITS(2) == 2) { | ||
595 | REMOVE_BITS(2); /* '10' -> 10 extra length bits + 0x100 */ | ||
596 | READ_BITS(extra_len, 10); | ||
597 | extra_len += 0x100; | ||
598 | } | ||
599 | else if (PEEK_BITS(3) == 6) { | ||
600 | REMOVE_BITS(3); /* '110' -> 12 extra length bits + 0x500 */ | ||
601 | READ_BITS(extra_len, 12); | ||
602 | extra_len += 0x500; | ||
603 | } | ||
604 | else { | ||
605 | REMOVE_BITS(3); /* '111' -> 15 extra length bits */ | ||
606 | READ_BITS(extra_len, 15); | ||
607 | } | ||
608 | match_length += extra_len; | ||
609 | } | ||
610 | |||
611 | if ((window_posn + match_length) > lzx->window_size) { | ||
612 | D(("match ran over window wrap")) | ||
613 | return lzx->error = MSPACK_ERR_DECRUNCH; | ||
614 | } | ||
615 | |||
616 | /* copy match */ | ||
617 | rundest = &window[window_posn]; | ||
618 | i = match_length; | ||
619 | /* does match offset wrap the window? */ | ||
620 | if (match_offset > window_posn) { | ||
621 | if (match_offset > lzx->offset && | ||
622 | (match_offset - window_posn) > lzx->ref_data_size) | ||
623 | { | ||
624 | D(("match offset beyond LZX stream")) | ||
625 | return lzx->error = MSPACK_ERR_DECRUNCH; | ||
626 | } | ||
627 | /* j = length from match offset to end of window */ | ||
628 | j = match_offset - window_posn; | ||
629 | if (j > (int) lzx->window_size) { | ||
630 | D(("match offset beyond window boundaries")) | ||
631 | return lzx->error = MSPACK_ERR_DECRUNCH; | ||
632 | } | ||
633 | runsrc = &window[lzx->window_size - j]; | ||
634 | if (j < i) { | ||
635 | /* if match goes over the window edge, do two copy runs */ | ||
636 | i -= j; while (j-- > 0) *rundest++ = *runsrc++; | ||
637 | runsrc = window; | ||
638 | } | ||
639 | while (i-- > 0) *rundest++ = *runsrc++; | ||
640 | } | ||
641 | else { | ||
642 | runsrc = rundest - match_offset; | ||
643 | while (i-- > 0) *rundest++ = *runsrc++; | ||
644 | } | ||
645 | |||
646 | this_run -= match_length; | ||
647 | window_posn += match_length; | ||
648 | } | ||
649 | } /* while (this_run > 0) */ | ||
650 | break; | ||
651 | |||
652 | case LZX_BLOCKTYPE_ALIGNED: | ||
653 | while (this_run > 0) { | ||
654 | READ_HUFFSYM(MAINTREE, main_element); | ||
655 | if (main_element < LZX_NUM_CHARS) { | ||
656 | /* literal: 0 to LZX_NUM_CHARS-1 */ | ||
657 | window[window_posn++] = main_element; | ||
658 | this_run--; | ||
659 | } | ||
660 | else { | ||
661 | /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ | ||
662 | main_element -= LZX_NUM_CHARS; | ||
663 | |||
664 | /* get match length */ | ||
665 | match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; | ||
666 | if (match_length == LZX_NUM_PRIMARY_LENGTHS) { | ||
667 | if (lzx->LENGTH_empty) { | ||
668 | D(("LENGTH symbol needed but tree is empty")) | ||
669 | return lzx->error = MSPACK_ERR_DECRUNCH; | ||
670 | } | ||
671 | READ_HUFFSYM(LENGTH, length_footer); | ||
672 | match_length += length_footer; | ||
673 | } | ||
674 | match_length += LZX_MIN_MATCH; | ||
675 | |||
676 | /* get match offset */ | ||
677 | switch ((match_offset = (main_element >> 3))) { | ||
678 | case 0: match_offset = R0; break; | ||
679 | case 1: match_offset = R1; R1 = R0; R0 = match_offset; break; | ||
680 | case 2: match_offset = R2; R2 = R0; R0 = match_offset; break; | ||
681 | default: | ||
682 | extra = (match_offset >= 36) ? 17 : extra_bits[match_offset]; | ||
683 | match_offset = position_base[match_offset] - 2; | ||
684 | if (extra > 3) { | ||
685 | /* verbatim and aligned bits */ | ||
686 | extra -= 3; | ||
687 | READ_BITS(verbatim_bits, extra); | ||
688 | match_offset += (verbatim_bits << 3); | ||
689 | READ_HUFFSYM(ALIGNED, aligned_bits); | ||
690 | match_offset += aligned_bits; | ||
691 | } | ||
692 | else if (extra == 3) { | ||
693 | /* aligned bits only */ | ||
694 | READ_HUFFSYM(ALIGNED, aligned_bits); | ||
695 | match_offset += aligned_bits; | ||
696 | } | ||
697 | else if (extra > 0) { /* extra==1, extra==2 */ | ||
698 | /* verbatim bits only */ | ||
699 | READ_BITS(verbatim_bits, extra); | ||
700 | match_offset += verbatim_bits; | ||
701 | } | ||
702 | else /* extra == 0 */ { | ||
703 | /* ??? not defined in LZX specification! */ | ||
704 | match_offset = 1; | ||
705 | } | ||
706 | /* update repeated offset LRU queue */ | ||
707 | R2 = R1; R1 = R0; R0 = match_offset; | ||
708 | } | ||
709 | |||
710 | /* LZX DELTA uses max match length to signal even longer match */ | ||
711 | if (match_length == LZX_MAX_MATCH && lzx->is_delta) { | ||
712 | int extra_len = 0; | ||
713 | ENSURE_BITS(3); /* 4 entry huffman tree */ | ||
714 | if (PEEK_BITS(1) == 0) { | ||
715 | REMOVE_BITS(1); /* '0' -> 8 extra length bits */ | ||
716 | READ_BITS(extra_len, 8); | ||
717 | } | ||
718 | else if (PEEK_BITS(2) == 2) { | ||
719 | REMOVE_BITS(2); /* '10' -> 10 extra length bits + 0x100 */ | ||
720 | READ_BITS(extra_len, 10); | ||
721 | extra_len += 0x100; | ||
722 | } | ||
723 | else if (PEEK_BITS(3) == 6) { | ||
724 | REMOVE_BITS(3); /* '110' -> 12 extra length bits + 0x500 */ | ||
725 | READ_BITS(extra_len, 12); | ||
726 | extra_len += 0x500; | ||
727 | } | ||
728 | else { | ||
729 | REMOVE_BITS(3); /* '111' -> 15 extra length bits */ | ||
730 | READ_BITS(extra_len, 15); | ||
731 | } | ||
732 | match_length += extra_len; | ||
733 | } | ||
734 | |||
735 | if ((window_posn + match_length) > lzx->window_size) { | ||
736 | D(("match ran over window wrap")) | ||
737 | return lzx->error = MSPACK_ERR_DECRUNCH; | ||
738 | } | ||
739 | |||
740 | /* copy match */ | ||
741 | rundest = &window[window_posn]; | ||
742 | i = match_length; | ||
743 | /* does match offset wrap the window? */ | ||
744 | if (match_offset > window_posn) { | ||
745 | if (match_offset > lzx->offset && | ||
746 | (match_offset - window_posn) > lzx->ref_data_size) | ||
747 | { | ||
748 | D(("match offset beyond LZX stream")) | ||
749 | return lzx->error = MSPACK_ERR_DECRUNCH; | ||
750 | } | ||
751 | /* j = length from match offset to end of window */ | ||
752 | j = match_offset - window_posn; | ||
753 | if (j > (int) lzx->window_size) { | ||
754 | D(("match offset beyond window boundaries")) | ||
755 | return lzx->error = MSPACK_ERR_DECRUNCH; | ||
756 | } | ||
757 | runsrc = &window[lzx->window_size - j]; | ||
758 | if (j < i) { | ||
759 | /* if match goes over the window edge, do two copy runs */ | ||
760 | i -= j; while (j-- > 0) *rundest++ = *runsrc++; | ||
761 | runsrc = window; | ||
762 | } | ||
763 | while (i-- > 0) *rundest++ = *runsrc++; | ||
764 | } | ||
765 | else { | ||
766 | runsrc = rundest - match_offset; | ||
767 | while (i-- > 0) *rundest++ = *runsrc++; | ||
768 | } | ||
769 | |||
770 | this_run -= match_length; | ||
771 | window_posn += match_length; | ||
772 | } | ||
773 | } /* while (this_run > 0) */ | ||
774 | break; | ||
775 | |||
776 | case LZX_BLOCKTYPE_UNCOMPRESSED: | ||
777 | /* as this_run is limited not to wrap a frame, this also means it | ||
778 | * won't wrap the window (as the window is a multiple of 32k) */ | ||
779 | rundest = &window[window_posn]; | ||
780 | window_posn += this_run; | ||
781 | while (this_run > 0) { | ||
782 | if ((i = i_end - i_ptr) == 0) { | ||
783 | READ_IF_NEEDED; | ||
784 | } | ||
785 | else { | ||
786 | if (i > this_run) i = this_run; | ||
787 | lzx->sys->copy(i_ptr, rundest, (size_t) i); | ||
788 | rundest += i; | ||
789 | i_ptr += i; | ||
790 | this_run -= i; | ||
791 | } | ||
792 | } | ||
793 | break; | ||
794 | |||
795 | default: | ||
796 | return lzx->error = MSPACK_ERR_DECRUNCH; /* might as well */ | ||
797 | } | ||
798 | |||
799 | /* did the final match overrun our desired this_run length? */ | ||
800 | if (this_run < 0) { | ||
801 | if ((unsigned int)(-this_run) > lzx->block_remaining) { | ||
802 | D(("overrun went past end of block by %d (%d remaining)", | ||
803 | -this_run, lzx->block_remaining )) | ||
804 | return lzx->error = MSPACK_ERR_DECRUNCH; | ||
805 | } | ||
806 | lzx->block_remaining -= -this_run; | ||
807 | } | ||
808 | } /* while (bytes_todo > 0) */ | ||
809 | |||
810 | /* streams don't extend over frame boundaries */ | ||
811 | if ((window_posn - lzx->frame_posn) != frame_size) { | ||
812 | D(("decode beyond output frame limits! %d != %d", | ||
813 | window_posn - lzx->frame_posn, frame_size)) | ||
814 | return lzx->error = MSPACK_ERR_DECRUNCH; | ||
815 | } | ||
816 | |||
817 | /* re-align input bitstream */ | ||
818 | if (bits_left > 0) ENSURE_BITS(16); | ||
819 | if (bits_left & 15) REMOVE_BITS(bits_left & 15); | ||
820 | |||
821 | /* check that we've used all of the previous frame first */ | ||
822 | if (lzx->o_ptr != lzx->o_end) { | ||
823 | D(("%ld avail bytes, new %d frame", | ||
824 | (long)(lzx->o_end - lzx->o_ptr), frame_size)) | ||
825 | return lzx->error = MSPACK_ERR_DECRUNCH; | ||
826 | } | ||
827 | |||
828 | /* does this intel block _really_ need decoding? */ | ||
829 | if (lzx->intel_started && lzx->intel_filesize && | ||
830 | (lzx->frame <= 32768) && (frame_size > 10)) | ||
831 | { | ||
832 | unsigned char *data = &lzx->e8_buf[0]; | ||
833 | unsigned char *dataend = &lzx->e8_buf[frame_size - 10]; | ||
834 | signed int curpos = lzx->intel_curpos; | ||
835 | signed int filesize = lzx->intel_filesize; | ||
836 | signed int abs_off, rel_off; | ||
837 | |||
838 | /* copy e8 block to the e8 buffer and tweak if needed */ | ||
839 | lzx->o_ptr = data; | ||
840 | lzx->sys->copy(&lzx->window[lzx->frame_posn], data, frame_size); | ||
841 | |||
842 | while (data < dataend) { | ||
843 | if (*data++ != 0xE8) { curpos++; continue; } | ||
844 | abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24); | ||
845 | if ((abs_off >= -curpos) && (abs_off < filesize)) { | ||
846 | rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize; | ||
847 | data[0] = (unsigned char) rel_off; | ||
848 | data[1] = (unsigned char) (rel_off >> 8); | ||
849 | data[2] = (unsigned char) (rel_off >> 16); | ||
850 | data[3] = (unsigned char) (rel_off >> 24); | ||
851 | } | ||
852 | data += 4; | ||
853 | curpos += 5; | ||
854 | } | ||
855 | lzx->intel_curpos += frame_size; | ||
856 | } | ||
857 | else { | ||
858 | lzx->o_ptr = &lzx->window[lzx->frame_posn]; | ||
859 | if (lzx->intel_filesize) lzx->intel_curpos += frame_size; | ||
860 | } | ||
861 | lzx->o_end = &lzx->o_ptr[frame_size]; | ||
862 | |||
863 | /* write a frame */ | ||
864 | i = (out_bytes < (off_t)frame_size) ? (unsigned int)out_bytes : frame_size; | ||
865 | if (lzx->sys->write(lzx->output, lzx->o_ptr, i) != i) { | ||
866 | return lzx->error = MSPACK_ERR_WRITE; | ||
867 | } | ||
868 | lzx->o_ptr += i; | ||
869 | lzx->offset += i; | ||
870 | out_bytes -= i; | ||
871 | |||
872 | /* advance frame start position */ | ||
873 | lzx->frame_posn += frame_size; | ||
874 | lzx->frame++; | ||
875 | |||
876 | /* wrap window / frame position pointers */ | ||
877 | if (window_posn == lzx->window_size) window_posn = 0; | ||
878 | if (lzx->frame_posn == lzx->window_size) lzx->frame_posn = 0; | ||
879 | |||
880 | } /* while (lzx->frame < end_frame) */ | ||
881 | |||
882 | if (out_bytes) { | ||
883 | D(("bytes left to output")) | ||
884 | return lzx->error = MSPACK_ERR_DECRUNCH; | ||
885 | } | ||
886 | |||
887 | /* store local state */ | ||
888 | STORE_BITS; | ||
889 | lzx->window_posn = window_posn; | ||
890 | lzx->R0 = R0; | ||
891 | lzx->R1 = R1; | ||
892 | lzx->R2 = R2; | ||
893 | |||
894 | return MSPACK_ERR_OK; | ||
895 | } | ||
896 | |||
897 | void lzxd_free(struct lzxd_stream *lzx) { | ||
898 | struct mspack_system *sys; | ||
899 | if (lzx) { | ||
900 | sys = lzx->sys; | ||
901 | sys->free(lzx->inbuf); | ||
902 | sys->free(lzx->window); | ||
903 | sys->free(lzx); | ||
904 | } | ||
905 | } | ||