summaryrefslogtreecommitdiff
path: root/tools/ucl/src/ucl_swd.ch
diff options
context:
space:
mode:
Diffstat (limited to 'tools/ucl/src/ucl_swd.ch')
-rwxr-xr-xtools/ucl/src/ucl_swd.ch665
1 files changed, 665 insertions, 0 deletions
diff --git a/tools/ucl/src/ucl_swd.ch b/tools/ucl/src/ucl_swd.ch
new file mode 100755
index 0000000000..e40b4a4662
--- /dev/null
+++ b/tools/ucl/src/ucl_swd.ch
@@ -0,0 +1,665 @@
1/* ucl_swd.c -- sliding window dictionary
2
3 This file is part of the UCL data compression library.
4
5 Copyright (C) 1996-2002 Markus Franz Xaver Johannes Oberhumer
6 All Rights Reserved.
7
8 The UCL library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2 of
11 the License, or (at your option) any later version.
12
13 The UCL library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with the UCL library; see the file COPYING.
20 If not, write to the Free Software Foundation, Inc.,
21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22
23 Markus F.X.J. Oberhumer
24 <markus@oberhumer.com>
25 */
26
27
28#if (UCL_UINT_MAX < UCL_0xffffffffL)
29# error "UCL_UINT_MAX"
30#endif
31
32
33/***********************************************************************
34//
35************************************************************************/
36
37#ifndef SWD_N
38# define SWD_N N
39#endif
40#ifndef SWD_F
41# define SWD_F F
42#endif
43#ifndef SWD_THRESHOLD
44# define SWD_THRESHOLD THRESHOLD
45#endif
46
47/* unsigned type for dictionary access - don't waste memory here */
48#if (SWD_N + SWD_F + SWD_F < USHRT_MAX)
49 typedef unsigned short swd_uint;
50# define SWD_UINT_MAX USHRT_MAX
51#else
52 typedef ucl_uint swd_uint;
53# define SWD_UINT_MAX UCL_UINT_MAX
54#endif
55#define SWD_UINT(x) ((swd_uint)(x))
56
57
58#ifndef SWD_HSIZE
59# define SWD_HSIZE 16384
60#endif
61#ifndef SWD_MAX_CHAIN
62# define SWD_MAX_CHAIN 2048
63#endif
64
65#if !defined(HEAD3)
66#if 1
67# define HEAD3(b,p) \
68 (((0x9f5f*(((((ucl_uint32)b[p]<<5)^b[p+1])<<5)^b[p+2]))>>5) & (SWD_HSIZE-1))
69#else
70# define HEAD3(b,p) \
71 (((0x9f5f*(((((ucl_uint32)b[p+2]<<5)^b[p+1])<<5)^b[p]))>>5) & (SWD_HSIZE-1))
72#endif
73#endif
74
75#if (SWD_THRESHOLD == 1) && !defined(HEAD2)
76# if 1 && defined(UCL_UNALIGNED_OK_2)
77# define HEAD2(b,p) (* (const ucl_ushortp) &(b[p]))
78# else
79# define HEAD2(b,p) (b[p] ^ ((unsigned)b[p+1]<<8))
80# endif
81# define NIL2 SWD_UINT_MAX
82#endif
83
84
85#if defined(__UCL_CHECKER)
86 /* malloc arrays of the exact size to detect any overrun */
87# ifndef SWD_USE_MALLOC
88# define SWD_USE_MALLOC
89# endif
90#endif
91
92
93typedef struct
94{
95/* public - "built-in" */
96 ucl_uint n;
97 ucl_uint f;
98 ucl_uint threshold;
99
100/* public - configuration */
101 ucl_uint max_chain;
102 ucl_uint nice_length;
103 ucl_bool use_best_off;
104 ucl_uint lazy_insert;
105
106/* public - output */
107 ucl_uint m_len;
108 ucl_uint m_off;
109 ucl_uint look;
110 int b_char;
111#if defined(SWD_BEST_OFF)
112 ucl_uint best_off[ SWD_BEST_OFF ];
113#endif
114
115/* semi public */
116 UCL_COMPRESS_T *c;
117 ucl_uint m_pos;
118#if defined(SWD_BEST_OFF)
119 ucl_uint best_pos[ SWD_BEST_OFF ];
120#endif
121
122/* private */
123 const ucl_byte *dict;
124 const ucl_byte *dict_end;
125 ucl_uint dict_len;
126
127/* private */
128 ucl_uint ip; /* input pointer (lookahead) */
129 ucl_uint bp; /* buffer pointer */
130 ucl_uint rp; /* remove pointer */
131 ucl_uint b_size;
132
133 unsigned char *b_wrap;
134
135 ucl_uint node_count;
136 ucl_uint first_rp;
137
138#if defined(SWD_USE_MALLOC)
139 unsigned char *b;
140 swd_uint *head3;
141 swd_uint *succ3;
142 swd_uint *best3;
143 swd_uint *llen3;
144#ifdef HEAD2
145 swd_uint *head2;
146#endif
147#else
148 unsigned char b [ SWD_N + SWD_F + SWD_F ];
149 swd_uint head3 [ SWD_HSIZE ];
150 swd_uint succ3 [ SWD_N + SWD_F ];
151 swd_uint best3 [ SWD_N + SWD_F ];
152 swd_uint llen3 [ SWD_HSIZE ];
153#ifdef HEAD2
154 swd_uint head2 [ UCL_UINT32_C(65536) ];
155#endif
156#endif
157}
158ucl_swd_t;
159
160
161/* Access macro for head3.
162 * head3[key] may be uninitialized if the list is emtpy,
163 * but then its value will never be used.
164 */
165#if defined(__UCL_CHECKER)
166# define s_head3(s,key) \
167 ((s->llen3[key] == 0) ? SWD_UINT_MAX : s->head3[key])
168#else
169# define s_head3(s,key) s->head3[key]
170#endif
171
172
173/***********************************************************************
174//
175************************************************************************/
176
177static
178void swd_initdict(ucl_swd_t *s, const ucl_byte *dict, ucl_uint dict_len)
179{
180 s->dict = s->dict_end = NULL;
181 s->dict_len = 0;
182
183 if (!dict || dict_len <= 0)
184 return;
185 if (dict_len > s->n)
186 {
187 dict += dict_len - s->n;
188 dict_len = s->n;
189 }
190
191 s->dict = dict;
192 s->dict_len = dict_len;
193 s->dict_end = dict + dict_len;
194 ucl_memcpy(s->b,dict,dict_len);
195 s->ip = dict_len;
196}
197
198
199static
200void swd_insertdict(ucl_swd_t *s, ucl_uint node, ucl_uint len)
201{
202 ucl_uint key;
203
204 s->node_count = s->n - len;
205 s->first_rp = node;
206
207 while (len-- > 0)
208 {
209 key = HEAD3(s->b,node);
210 s->succ3[node] = s_head3(s,key);
211 s->head3[key] = SWD_UINT(node);
212 s->best3[node] = SWD_UINT(s->f + 1);
213 s->llen3[key]++;
214 assert(s->llen3[key] <= s->n);
215
216#ifdef HEAD2
217 key = HEAD2(s->b,node);
218 s->head2[key] = SWD_UINT(node);
219#endif
220
221 node++;
222 }
223}
224
225
226/***********************************************************************
227//
228************************************************************************/
229
230static
231int swd_init(ucl_swd_t *s, const ucl_byte *dict, ucl_uint dict_len)
232{
233 ucl_uint i = 0;
234 int c = 0;
235
236 if (s->n == 0)
237 s->n = SWD_N;
238 if (s->f == 0)
239 s->f = SWD_F;
240 s->threshold = SWD_THRESHOLD;
241 if (s->n > SWD_N || s->f > SWD_F)
242 return UCL_E_INVALID_ARGUMENT;
243
244#if defined(SWD_USE_MALLOC)
245 s->b = (unsigned char *) ucl_alloc(s->n + s->f + s->f, 1);
246 s->head3 = (swd_uint *) ucl_alloc(SWD_HSIZE, sizeof(*s->head3));
247 s->succ3 = (swd_uint *) ucl_alloc(s->n + s->f, sizeof(*s->succ3));
248 s->best3 = (swd_uint *) ucl_alloc(s->n + s->f, sizeof(*s->best3));
249 s->llen3 = (swd_uint *) ucl_alloc(SWD_HSIZE, sizeof(*s->llen3));
250 if (!s->b || !s->head3 || !s->succ3 || !s->best3 || !s->llen3)
251 return UCL_E_OUT_OF_MEMORY;
252#ifdef HEAD2
253 s->head2 = (swd_uint *) ucl_alloc(UCL_UINT32_C(65536), sizeof(*s->head2));
254 if (!s->head2)
255 return UCL_E_OUT_OF_MEMORY;
256#endif
257#endif
258
259 /* defaults */
260 s->max_chain = SWD_MAX_CHAIN;
261 s->nice_length = s->f;
262 s->use_best_off = 0;
263 s->lazy_insert = 0;
264
265 s->b_size = s->n + s->f;
266 if (s->b_size + s->f >= SWD_UINT_MAX)
267 return UCL_E_ERROR;
268 s->b_wrap = s->b + s->b_size;
269 s->node_count = s->n;
270
271 ucl_memset(s->llen3, 0, sizeof(s->llen3[0]) * SWD_HSIZE);
272#ifdef HEAD2
273#if 1
274 ucl_memset(s->head2, 0xff, sizeof(s->head2[0]) * UCL_UINT32_C(65536));
275 assert(s->head2[0] == NIL2);
276#else
277 for (i = 0; i < UCL_UINT32_C(65536); i++)
278 s->head2[i] = NIL2;
279#endif
280#endif
281
282 s->ip = 0;
283 swd_initdict(s,dict,dict_len);
284 s->bp = s->ip;
285 s->first_rp = s->ip;
286
287 assert(s->ip + s->f <= s->b_size);
288#if 1
289 s->look = (ucl_uint) (s->c->in_end - s->c->ip);
290 if (s->look > 0)
291 {
292 if (s->look > s->f)
293 s->look = s->f;
294 ucl_memcpy(&s->b[s->ip],s->c->ip,s->look);
295 s->c->ip += s->look;
296 s->ip += s->look;
297 }
298#else
299 s->look = 0;
300 while (s->look < s->f)
301 {
302 if ((c = getbyte(*(s->c))) < 0)
303 break;
304 s->b[s->ip] = UCL_BYTE(c);
305 s->ip++;
306 s->look++;
307 }
308#endif
309 if (s->ip == s->b_size)
310 s->ip = 0;
311
312 if (s->look >= 2 && s->dict_len > 0)
313 swd_insertdict(s,0,s->dict_len);
314
315 s->rp = s->first_rp;
316 if (s->rp >= s->node_count)
317 s->rp -= s->node_count;
318 else
319 s->rp += s->b_size - s->node_count;
320
321#if defined(__UCL_CHECKER)
322 /* initialize memory for the first few HEAD3 (if s->ip is not far
323 * enough ahead to do this job for us). The value doesn't matter. */
324 if (s->look < 3)
325 ucl_memset(&s->b[s->bp+s->look],0,3);
326#endif
327
328 UCL_UNUSED(i);
329 UCL_UNUSED(c);
330 return UCL_E_OK;
331}
332
333
334static
335void swd_exit(ucl_swd_t *s)
336{
337#if defined(SWD_USE_MALLOC)
338 /* free in reverse order of allocations */
339#ifdef HEAD2
340 ucl_free(s->head2); s->head2 = NULL;
341#endif
342 ucl_free(s->llen3); s->llen3 = NULL;
343 ucl_free(s->best3); s->best3 = NULL;
344 ucl_free(s->succ3); s->succ3 = NULL;
345 ucl_free(s->head3); s->head3 = NULL;
346 ucl_free(s->b); s->b = NULL;
347#else
348 UCL_UNUSED(s);
349#endif
350}
351
352
353#define swd_pos2off(s,pos) \
354 (s->bp > (pos) ? s->bp - (pos) : s->b_size - ((pos) - s->bp))
355
356
357/***********************************************************************
358//
359************************************************************************/
360
361static __inline__
362void swd_getbyte(ucl_swd_t *s)
363{
364 int c;
365
366 if ((c = getbyte(*(s->c))) < 0)
367 {
368 if (s->look > 0)
369 --s->look;
370#if defined(__UCL_CHECKER)
371 /* initialize memory - value doesn't matter */
372 s->b[s->ip] = 0;
373 if (s->ip < s->f)
374 s->b_wrap[s->ip] = 0;
375#endif
376 }
377 else
378 {
379 s->b[s->ip] = UCL_BYTE(c);
380 if (s->ip < s->f)
381 s->b_wrap[s->ip] = UCL_BYTE(c);
382 }
383 if (++s->ip == s->b_size)
384 s->ip = 0;
385 if (++s->bp == s->b_size)
386 s->bp = 0;
387 if (++s->rp == s->b_size)
388 s->rp = 0;
389}
390
391
392/***********************************************************************
393// remove node from lists
394************************************************************************/
395
396static __inline__
397void swd_remove_node(ucl_swd_t *s, ucl_uint node)
398{
399 if (s->node_count == 0)
400 {
401 ucl_uint key;
402
403#ifdef UCL_DEBUG
404 if (s->first_rp != UCL_UINT_MAX)
405 {
406 if (node != s->first_rp)
407 printf("Remove %5d: %5d %5d %5d %5d %6d %6d\n",
408 node, s->rp, s->ip, s->bp, s->first_rp,
409 s->ip - node, s->ip - s->bp);
410 assert(node == s->first_rp);
411 s->first_rp = UCL_UINT_MAX;
412 }
413#endif
414
415 key = HEAD3(s->b,node);
416 assert(s->llen3[key] > 0);
417 --s->llen3[key];
418
419#ifdef HEAD2
420 key = HEAD2(s->b,node);
421 assert(s->head2[key] != NIL2);
422 if ((ucl_uint) s->head2[key] == node)
423 s->head2[key] = NIL2;
424#endif
425 }
426 else
427 --s->node_count;
428}
429
430
431/***********************************************************************
432//
433************************************************************************/
434
435static
436void swd_accept(ucl_swd_t *s, ucl_uint n)
437{
438 assert(n <= s->look);
439
440 if (n > 0) do
441 {
442 ucl_uint key;
443
444 swd_remove_node(s,s->rp);
445
446 /* add bp into HEAD3 */
447 key = HEAD3(s->b,s->bp);
448 s->succ3[s->bp] = s_head3(s,key);
449 s->head3[key] = SWD_UINT(s->bp);
450 s->best3[s->bp] = SWD_UINT(s->f + 1);
451 s->llen3[key]++;
452 assert(s->llen3[key] <= s->n);
453
454#ifdef HEAD2
455 /* add bp into HEAD2 */
456 key = HEAD2(s->b,s->bp);
457 s->head2[key] = SWD_UINT(s->bp);
458#endif
459
460 swd_getbyte(s);
461 } while (--n > 0);
462}
463
464
465/***********************************************************************
466//
467************************************************************************/
468
469static
470void swd_search(ucl_swd_t *s, ucl_uint node, ucl_uint cnt)
471{
472#if 0 && defined(__GNUC__) && defined(__i386__)
473 register const unsigned char *p1 __asm__("%edi");
474 register const unsigned char *p2 __asm__("%esi");
475 register const unsigned char *px __asm__("%edx");
476#else
477 const unsigned char *p1;
478 const unsigned char *p2;
479 const unsigned char *px;
480#endif
481 ucl_uint m_len = s->m_len;
482 const unsigned char * b = s->b;
483 const unsigned char * bp = s->b + s->bp;
484 const unsigned char * bx = s->b + s->bp + s->look;
485 unsigned char scan_end1;
486
487 assert(s->m_len > 0);
488
489 scan_end1 = bp[m_len - 1];
490 for ( ; cnt-- > 0; node = s->succ3[node])
491 {
492 p1 = bp;
493 p2 = b + node;
494 px = bx;
495
496 assert(m_len < s->look);
497
498 if (
499#if 1
500 p2[m_len - 1] == scan_end1 &&
501 p2[m_len] == p1[m_len] &&
502#endif
503 p2[0] == p1[0] &&
504 p2[1] == p1[1])
505 {
506 ucl_uint i;
507 assert(ucl_memcmp(bp,&b[node],3) == 0);
508
509#if 0 && defined(UCL_UNALIGNED_OK_4)
510 p1 += 3; p2 += 3;
511 while (p1 < px && * (const ucl_uint32p) p1 == * (const ucl_uint32p) p2)
512 p1 += 4, p2 += 4;
513 while (p1 < px && *p1 == *p2)
514 p1 += 1, p2 += 1;
515#else
516 p1 += 2; p2 += 2;
517 do {} while (++p1 < px && *p1 == *++p2);
518#endif
519 i = p1 - bp;
520
521#ifdef UCL_DEBUG
522 if (ucl_memcmp(bp,&b[node],i) != 0)
523 printf("%5ld %5ld %02x%02x %02x%02x\n",
524 (long)s->bp, (long) node,
525 bp[0], bp[1], b[node], b[node+1]);
526#endif
527 assert(ucl_memcmp(bp,&b[node],i) == 0);
528
529#if defined(SWD_BEST_OFF)
530 if (i < SWD_BEST_OFF)
531 {
532 if (s->best_pos[i] == 0)
533 s->best_pos[i] = node + 1;
534 }
535#endif
536 if (i > m_len)
537 {
538 s->m_len = m_len = i;
539 s->m_pos = node;
540 if (m_len == s->look)
541 return;
542 if (m_len >= s->nice_length)
543 return;
544 if (m_len > (ucl_uint) s->best3[node])
545 return;
546 scan_end1 = bp[m_len - 1];
547 }
548 }
549 }
550}
551
552
553/***********************************************************************
554//
555************************************************************************/
556
557#ifdef HEAD2
558
559static
560ucl_bool swd_search2(ucl_swd_t *s)
561{
562 ucl_uint key;
563
564 assert(s->look >= 2);
565 assert(s->m_len > 0);
566
567 key = s->head2[ HEAD2(s->b,s->bp) ];
568 if (key == NIL2)
569 return 0;
570#ifdef UCL_DEBUG
571 if (ucl_memcmp(&s->b[s->bp],&s->b[key],2) != 0)
572 printf("%5ld %5ld %02x%02x %02x%02x\n", (long)s->bp, (long)key,
573 s->b[s->bp], s->b[s->bp+1], s->b[key], s->b[key+1]);
574#endif
575 assert(ucl_memcmp(&s->b[s->bp],&s->b[key],2) == 0);
576#if defined(SWD_BEST_OFF)
577 if (s->best_pos[2] == 0)
578 s->best_pos[2] = key + 1;
579#endif
580
581 if (s->m_len < 2)
582 {
583 s->m_len = 2;
584 s->m_pos = key;
585 }
586 return 1;
587}
588
589#endif
590
591
592/***********************************************************************
593//
594************************************************************************/
595
596static
597void swd_findbest(ucl_swd_t *s)
598{
599 ucl_uint key;
600 ucl_uint cnt, node;
601 ucl_uint len;
602
603 assert(s->m_len > 0);
604
605 /* get current head, add bp into HEAD3 */
606 key = HEAD3(s->b,s->bp);
607 node = s->succ3[s->bp] = s_head3(s,key);
608 cnt = s->llen3[key]++;
609 assert(s->llen3[key] <= s->n + s->f);
610 if (cnt > s->max_chain && s->max_chain > 0)
611 cnt = s->max_chain;
612 s->head3[key] = SWD_UINT(s->bp);
613
614 s->b_char = s->b[s->bp];
615 len = s->m_len;
616 if (s->m_len >= s->look)
617 {
618 if (s->look == 0)
619 s->b_char = -1;
620 s->m_off = 0;
621 s->best3[s->bp] = SWD_UINT(s->f + 1);
622 }
623 else
624 {
625#ifdef HEAD2
626 if (swd_search2(s))
627#endif
628 if (s->look >= 3)
629 swd_search(s,node,cnt);
630 if (s->m_len > len)
631 s->m_off = swd_pos2off(s,s->m_pos);
632 s->best3[s->bp] = SWD_UINT(s->m_len);
633
634#if defined(SWD_BEST_OFF)
635 if (s->use_best_off)
636 {
637 int i;
638 for (i = 2; i < SWD_BEST_OFF; i++)
639 if (s->best_pos[i] > 0)
640 s->best_off[i] = swd_pos2off(s,s->best_pos[i]-1);
641 else
642 s->best_off[i] = 0;
643 }
644#endif
645 }
646
647 swd_remove_node(s,s->rp);
648
649#ifdef HEAD2
650 /* add bp into HEAD2 */
651 key = HEAD2(s->b,s->bp);
652 s->head2[key] = SWD_UINT(s->bp);
653#endif
654}
655
656
657#undef HEAD3
658#undef HEAD2
659#undef s_head3
660
661
662/*
663vi:ts=4:et
664*/
665