summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2008-10-28 21:07:53 +0000
committerJens Arnold <amiconn@rockbox.org>2008-10-28 21:07:53 +0000
commit2c52dee83f5c796fe471e6fab15dea17a2f508ff (patch)
tree4f90ccd3c1d5785385fea9144e5011b1943acfcd
parentafd2f681d18e574442f8569f1a722d5d39d79b78 (diff)
downloadrockbox-2c52dee83f5c796fe471e6fab15dea17a2f508ff.tar.gz
rockbox-2c52dee83f5c796fe471e6fab15dea17a2f508ff.zip
Self-extractor for on-disk firmware image: UCL decompressor in SH1 assembler - less than half the size of the compiled C function, and ~45% faster.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@18904 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--firmware/decompressor/Makefile12
-rw-r--r--firmware/decompressor/decompressor.c69
-rw-r--r--firmware/decompressor/sh_nrv2e_d8.S155
3 files changed, 164 insertions, 72 deletions
diff --git a/firmware/decompressor/Makefile b/firmware/decompressor/Makefile
index 46d7afe1b0..33b6affc6d 100644
--- a/firmware/decompressor/Makefile
+++ b/firmware/decompressor/Makefile
@@ -14,7 +14,8 @@ PRINTS=$(SILENT)$(call info,$(1))
14 14
15LDS := link.lds 15LDS := link.lds
16LINKFILE = $(OBJDIR)/linkage.lds 16LINKFILE = $(OBJDIR)/linkage.lds
17OBJS := $(OBJDIR)/decompressor.o $(OBJDIR)/uclimage.o $(OBJDIR)/startup.o 17OBJS := $(OBJDIR)/decompressor.o $(OBJDIR)/uclimage.o \
18 $(OBJDIR)/sh_nrv2e_d8.o $(OBJDIR)/startup.o
18CFLAGS = $(GCCOPTS) 19CFLAGS = $(GCCOPTS)
19 20
20all: $(OBJDIR)/compressed.bin 21all: $(OBJDIR)/compressed.bin
@@ -25,9 +26,6 @@ $(OBJDIR)/compressed.bin : $(OBJDIR)/compressed.elf
25$(OBJDIR)/compressed.elf : $(OBJS) $(LINKFILE) 26$(OBJDIR)/compressed.elf : $(OBJS) $(LINKFILE)
26 $(call PRINTS,LD $(@F))$(CC) $(GCCOPTS) -Os -nostdlib -o $@ $(OBJS) -T$(LINKFILE) -Wl,-Map,$(OBJDIR)/compressed.map 27 $(call PRINTS,LD $(@F))$(CC) $(GCCOPTS) -Os -nostdlib -o $@ $(OBJS) -T$(LINKFILE) -Wl,-Map,$(OBJDIR)/compressed.map
27 28
28$(LDS): $(OBJS)
29
30
31$(LINKFILE): $(LDS) 29$(LINKFILE): $(LDS)
32 $(call PRINTS,Build LDS file)cat $< | $(CC) -DMEMORYSIZE=$(MEMORYSIZE) $(INCLUDES) $(TARGET) $(DEFINES) -E -P $(ROMBUILD) - >$@ 30 $(call PRINTS,Build LDS file)cat $< | $(CC) -DMEMORYSIZE=$(MEMORYSIZE) $(INCLUDES) $(TARGET) $(DEFINES) -E -P $(ROMBUILD) - >$@
33 31
@@ -39,11 +37,15 @@ $(OBJDIR)/startup.o : startup.S
39 $(SILENT)mkdir -p $(dir $@) 37 $(SILENT)mkdir -p $(dir $@)
40 $(call PRINTS,AS $<)$(CC) $(CFLAGS) -c $< -o $@ 38 $(call PRINTS,AS $<)$(CC) $(CFLAGS) -c $< -o $@
41 39
40$(OBJDIR)/sh_nrv2e_d8.o : sh_nrv2e_d8.S
41 $(SILENT)mkdir -p $(dir $@)
42 $(call PRINTS,AS $<)$(CC) $(CFLAGS) -c $< -o $@
43
42$(OBJDIR)/uclimage.o : $(OBJDIR)/uclimage.c 44$(OBJDIR)/uclimage.o : $(OBJDIR)/uclimage.c
43 $(SILENT)mkdir -p $(dir $@) 45 $(SILENT)mkdir -p $(dir $@)
44 $(call PRINTS,CC $(<F))$(CC) $(CFLAGS) -c $< -o $@ 46 $(call PRINTS,CC $(<F))$(CC) $(CFLAGS) -c $< -o $@
45 47
46$(OBJDIR)/uclimage.c : $(FLASHFILE) $(TOOLSDIR)/ucl2src.pl 48$(OBJDIR)/uclimage.c : $(FLASHFILE) $(TOOLSDIR)/ucl2src.pl
47 $(SILENT)mkdir -p $(dir $@) 49 $(SILENT)mkdir -p $(dir $@)
48 $(call PRINTS,UCL2SRC)perl -s $(TOOLSDIR)/ucl2src.pl -p=$(OBJDIR)/uclimage $< 50 $(call PRINTS,UCL2SRC $(<F))perl -s $(TOOLSDIR)/ucl2src.pl -p=$(OBJDIR)/uclimage $<
49 51
diff --git a/firmware/decompressor/decompressor.c b/firmware/decompressor/decompressor.c
index cec82b8b09..11888ef272 100644
--- a/firmware/decompressor/decompressor.c
+++ b/firmware/decompressor/decompressor.c
@@ -36,8 +36,8 @@ extern char loadaddress[], dramend[];
36extern void start(void); 36extern void start(void);
37 37
38void main(void) ICODE_ATTR; 38void main(void) ICODE_ATTR;
39static int ucl_nrv2e_decompress_8(const unsigned char *src, unsigned char *dst, 39int ucl_nrv2e_decompress_8(const unsigned char *src, unsigned char *dst,
40 unsigned long *dst_len) ICODE_ATTR; 40 unsigned long *dst_len) ICODE_ATTR;
41 41
42/* Vector table */ 42/* Vector table */
43void (*vbr[]) (void) __attribute__ ((section (".vectors"))) = 43void (*vbr[]) (void) __attribute__ ((section (".vectors"))) =
@@ -50,71 +50,6 @@ void (*vbr[]) (void) __attribute__ ((section (".vectors"))) =
50 50
51/** All subsequent functions are executed from IRAM **/ 51/** All subsequent functions are executed from IRAM **/
52 52
53/* Thinned out version of the UCL 2e decompression sourcecode
54 * Original (C) Markus F.X.J Oberhumer under GNU GPL license */
55#define GETBIT(bb, src, ilen) \
56 (((bb = bb & 0x7f ? bb*2 : ((unsigned)src[ilen++]*2+1)) >> 8) & 1)
57
58static int ucl_nrv2e_decompress_8(const unsigned char *src, unsigned char *dst,
59 unsigned long *dst_len)
60{
61 unsigned long bb = 0;
62 unsigned ilen = 0, olen = 0, last_m_off = 1;
63
64 for (;;)
65 {
66 unsigned m_off, m_len;
67
68 while (GETBIT(bb,src,ilen))
69 dst[olen++] = src[ilen++];
70
71 m_off = 1;
72 for (;;)
73 {
74 m_off = m_off*2 + GETBIT(bb,src,ilen);
75 if (GETBIT(bb,src,ilen))
76 break;
77 m_off = (m_off-1)*2 + GETBIT(bb,src,ilen);
78 }
79 if (m_off == 2)
80 {
81 m_off = last_m_off;
82 m_len = GETBIT(bb,src,ilen);
83 }
84 else
85 {
86 m_off = (m_off-3)*256 + src[ilen++];
87 if (m_off == 0xffffffff)
88 break;
89 m_len = (m_off ^ 0xffffffff) & 1;
90 m_off >>= 1;
91 last_m_off = ++m_off;
92 }
93 if (m_len)
94 m_len = 1 + GETBIT(bb,src,ilen);
95 else if (GETBIT(bb,src,ilen))
96 m_len = 3 + GETBIT(bb,src,ilen);
97 else
98 {
99 m_len++;
100 do {
101 m_len = m_len*2 + GETBIT(bb,src,ilen);
102 } while (!GETBIT(bb,src,ilen));
103 m_len += 3;
104 }
105 m_len += (m_off > 0x500);
106 {
107 const unsigned char *m_pos;
108 m_pos = dst + olen - m_off;
109 dst[olen++] = *m_pos++;
110 do dst[olen++] = *m_pos++; while (--m_len > 0);
111 }
112 }
113 *dst_len = olen;
114
115 return ilen;
116}
117
118#define ALIGNED_IMG_SIZE ((sizeof(image) + 3) & ~3) 53#define ALIGNED_IMG_SIZE ((sizeof(image) + 3) & ~3)
119/* This will never return */ 54/* This will never return */
120void main(void) 55void main(void)
diff --git a/firmware/decompressor/sh_nrv2e_d8.S b/firmware/decompressor/sh_nrv2e_d8.S
new file mode 100644
index 0000000000..c002911c0c
--- /dev/null
+++ b/firmware/decompressor/sh_nrv2e_d8.S
@@ -0,0 +1,155 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2008 by Jens Arnold
11 *
12 * based on arm_nrv2e_d8.S -- ARM decompressor for NRV2E
13 * Copyright (C) 1996-2008 Markus Franz Xaver Johannes Oberhumer
14 * Copyright (C) 1996-2008 Laszlo Molnar
15 * Copyright (C) 2000-2008 John F. Reiser
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public License
19 * as published by the Free Software Foundation; either version 2
20 * of the License, or (at your option) any later version.
21 *
22 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
23 * KIND, either express or implied.
24 *
25 ****************************************************************************/
26
27#define src r4
28#define dst r5
29#define len r6 /* overlaps 'cnt' */
30#define cnt r6 /* overlaps 'len' while reading an offset */
31#define tmp r7
32
33#define off r0 /* must be r0 because of indexed addressing */
34#define bits r1
35#define bitmask r2
36#define wrnk r3 /* -0x500 -M2_MAX_OFFSET before "wrinkle" */
37
38
39#define GETBIT \
40 tst bits, bitmask; \
41 bf 1f; \
42 bsr get1_n2e; \
431: \
44 shll bits /* using the delay slot on purpose */
45
46#define getnextb(reg) GETBIT; rotcl reg
47#define jnextb0 GETBIT; bf
48#define jnextb1 GETBIT; bt
49
50 .section .icode,"ax",@progbits
51 .align 2
52 .global _ucl_nrv2e_decompress_8
53 .type _ucl_nrv2e_decompress_8,@function
54
55/* src_len = ucl_nrv2e_decompress_8(const unsigned char *src,
56 * unsigned char *dst,
57 * unsigned long *dst_len)
58 */
59
60_ucl_nrv2e_decompress_8:
61 sts.l pr, @-r15
62 mov #-1, off ! off = -1 initial condition
63 mov.l r6, @-r15
64 mov #-5, wrnk
65 mov.l r5, @-r15
66 shll8 wrnk ! nrv2e -M2_MAX_OFFSET
67 mov.l r4, @-r15
68 mov #-1, bitmask
69 shlr bitmask ! 0x7fffffff for testing before shifting
70 bra top_n2e
71 not bitmask, bits ! refill next time (MSB must be set)
72
73eof_n2e:
74 mov.l @r15+, r0 ! r0 = orig_src
75 mov.l @r15+, r1 ! r1 = orig_dst
76 sub r0, src
77 mov.l @r15+, r2 ! r2 = plen_dst
78 sub r1, dst
79 mov.l dst, @r2
80 lds.l @r15+, pr
81 rts
82 mov src, r0
83
84 .align 2
85get1_n2e: ! in: T bit set
86 mov.b @src+, bits ! SH1 sign-extends on load
87 rotcl bits ! LSB = T, T = MSB
88 shll16 bits
89 rts
90 shll8 bits
91
92 .align 2
93lit_n2e:
94 mov.b @src, tmp
95 add #1, src ! Need to fill the pipeline latency anyway
96 mov.b tmp, @dst
97 add #1, dst
98top_n2e:
99 jnextb1 lit_n2e
100 bra getoff_n2e
101 mov #1, cnt
102
103off_n2e:
104 add #-1, cnt
105 getnextb(cnt)
106getoff_n2e:
107 getnextb(cnt)
108 jnextb0 off_n2e
109
110 mov cnt, tmp
111 mov #0, len ! cnt and len share a reg!
112 add #-3, tmp
113 cmp/pz tmp
114 bf offprev_n2e ! cnt was 2
115 mov.b @src+, off ! low 7+1 bits
116 shll8 tmp
117 extu.b off, off
118 or tmp, off
119 not off, off ! off = ~off
120 tst off, off
121 bt eof_n2e
122 shar off
123 bt lenlast_n2e
124 bra lenmore_n2e
125 mov #1, len
126
127offprev_n2e:
128 jnextb1 lenlast_n2e
129 mov #1, len
130lenmore_n2e:
131 jnextb1 lenlast_n2e
132len_n2e:
133 getnextb(len)
134 jnextb0 len_n2e
135 bra gotlen_n2e
136 add #6-2, len
137
138lenlast_n2e:
139 getnextb(len) ! 0,1,2,3
140 add #2, len
141gotlen_n2e:
142 cmp/gt off, wrnk
143 movt tmp ! too far away, so minimum match length is 3
144 add tmp, len
145copy_n2e:
146 add #-1, len
147 mov.b @(off,dst), tmp
148 tst len, len
149 mov.b tmp, @dst
150 add #1, dst
151 bf copy_n2e
152 bra top_n2e
153 nop
154
155 .size ucl_nrv2e_decompress_8, .-ucl_nrv2e_decompress_8