diff options
author | Jens Arnold <amiconn@rockbox.org> | 2008-10-28 21:07:53 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2008-10-28 21:07:53 +0000 |
commit | 2c52dee83f5c796fe471e6fab15dea17a2f508ff (patch) | |
tree | 4f90ccd3c1d5785385fea9144e5011b1943acfcd /firmware | |
parent | afd2f681d18e574442f8569f1a722d5d39d79b78 (diff) | |
download | rockbox-2c52dee83f5c796fe471e6fab15dea17a2f508ff.tar.gz rockbox-2c52dee83f5c796fe471e6fab15dea17a2f508ff.zip |
Self-extractor for on-disk firmware image: UCL decompressor in SH1 assembler - less than half the size of the compiled C function, and ~45% faster.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@18904 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware')
-rw-r--r-- | firmware/decompressor/Makefile | 12 | ||||
-rw-r--r-- | firmware/decompressor/decompressor.c | 69 | ||||
-rw-r--r-- | firmware/decompressor/sh_nrv2e_d8.S | 155 |
3 files changed, 164 insertions, 72 deletions
diff --git a/firmware/decompressor/Makefile b/firmware/decompressor/Makefile index 46d7afe1b0..33b6affc6d 100644 --- a/firmware/decompressor/Makefile +++ b/firmware/decompressor/Makefile | |||
@@ -14,7 +14,8 @@ PRINTS=$(SILENT)$(call info,$(1)) | |||
14 | 14 | ||
15 | LDS := link.lds | 15 | LDS := link.lds |
16 | LINKFILE = $(OBJDIR)/linkage.lds | 16 | LINKFILE = $(OBJDIR)/linkage.lds |
17 | OBJS := $(OBJDIR)/decompressor.o $(OBJDIR)/uclimage.o $(OBJDIR)/startup.o | 17 | OBJS := $(OBJDIR)/decompressor.o $(OBJDIR)/uclimage.o \ |
18 | $(OBJDIR)/sh_nrv2e_d8.o $(OBJDIR)/startup.o | ||
18 | CFLAGS = $(GCCOPTS) | 19 | CFLAGS = $(GCCOPTS) |
19 | 20 | ||
20 | all: $(OBJDIR)/compressed.bin | 21 | all: $(OBJDIR)/compressed.bin |
@@ -25,9 +26,6 @@ $(OBJDIR)/compressed.bin : $(OBJDIR)/compressed.elf | |||
25 | $(OBJDIR)/compressed.elf : $(OBJS) $(LINKFILE) | 26 | $(OBJDIR)/compressed.elf : $(OBJS) $(LINKFILE) |
26 | $(call PRINTS,LD $(@F))$(CC) $(GCCOPTS) -Os -nostdlib -o $@ $(OBJS) -T$(LINKFILE) -Wl,-Map,$(OBJDIR)/compressed.map | 27 | $(call PRINTS,LD $(@F))$(CC) $(GCCOPTS) -Os -nostdlib -o $@ $(OBJS) -T$(LINKFILE) -Wl,-Map,$(OBJDIR)/compressed.map |
27 | 28 | ||
28 | $(LDS): $(OBJS) | ||
29 | |||
30 | |||
31 | $(LINKFILE): $(LDS) | 29 | $(LINKFILE): $(LDS) |
32 | $(call PRINTS,Build LDS file)cat $< | $(CC) -DMEMORYSIZE=$(MEMORYSIZE) $(INCLUDES) $(TARGET) $(DEFINES) -E -P $(ROMBUILD) - >$@ | 30 | $(call PRINTS,Build LDS file)cat $< | $(CC) -DMEMORYSIZE=$(MEMORYSIZE) $(INCLUDES) $(TARGET) $(DEFINES) -E -P $(ROMBUILD) - >$@ |
33 | 31 | ||
@@ -39,11 +37,15 @@ $(OBJDIR)/startup.o : startup.S | |||
39 | $(SILENT)mkdir -p $(dir $@) | 37 | $(SILENT)mkdir -p $(dir $@) |
40 | $(call PRINTS,AS $<)$(CC) $(CFLAGS) -c $< -o $@ | 38 | $(call PRINTS,AS $<)$(CC) $(CFLAGS) -c $< -o $@ |
41 | 39 | ||
40 | $(OBJDIR)/sh_nrv2e_d8.o : sh_nrv2e_d8.S | ||
41 | $(SILENT)mkdir -p $(dir $@) | ||
42 | $(call PRINTS,AS $<)$(CC) $(CFLAGS) -c $< -o $@ | ||
43 | |||
42 | $(OBJDIR)/uclimage.o : $(OBJDIR)/uclimage.c | 44 | $(OBJDIR)/uclimage.o : $(OBJDIR)/uclimage.c |
43 | $(SILENT)mkdir -p $(dir $@) | 45 | $(SILENT)mkdir -p $(dir $@) |
44 | $(call PRINTS,CC $(<F))$(CC) $(CFLAGS) -c $< -o $@ | 46 | $(call PRINTS,CC $(<F))$(CC) $(CFLAGS) -c $< -o $@ |
45 | 47 | ||
46 | $(OBJDIR)/uclimage.c : $(FLASHFILE) $(TOOLSDIR)/ucl2src.pl | 48 | $(OBJDIR)/uclimage.c : $(FLASHFILE) $(TOOLSDIR)/ucl2src.pl |
47 | $(SILENT)mkdir -p $(dir $@) | 49 | $(SILENT)mkdir -p $(dir $@) |
48 | $(call PRINTS,UCL2SRC)perl -s $(TOOLSDIR)/ucl2src.pl -p=$(OBJDIR)/uclimage $< | 50 | $(call PRINTS,UCL2SRC $(<F))perl -s $(TOOLSDIR)/ucl2src.pl -p=$(OBJDIR)/uclimage $< |
49 | 51 | ||
diff --git a/firmware/decompressor/decompressor.c b/firmware/decompressor/decompressor.c index cec82b8b09..11888ef272 100644 --- a/firmware/decompressor/decompressor.c +++ b/firmware/decompressor/decompressor.c | |||
@@ -36,8 +36,8 @@ extern char loadaddress[], dramend[]; | |||
36 | extern void start(void); | 36 | extern void start(void); |
37 | 37 | ||
38 | void main(void) ICODE_ATTR; | 38 | void main(void) ICODE_ATTR; |
39 | static int ucl_nrv2e_decompress_8(const unsigned char *src, unsigned char *dst, | 39 | int ucl_nrv2e_decompress_8(const unsigned char *src, unsigned char *dst, |
40 | unsigned long *dst_len) ICODE_ATTR; | 40 | unsigned long *dst_len) ICODE_ATTR; |
41 | 41 | ||
42 | /* Vector table */ | 42 | /* Vector table */ |
43 | void (*vbr[]) (void) __attribute__ ((section (".vectors"))) = | 43 | void (*vbr[]) (void) __attribute__ ((section (".vectors"))) = |
@@ -50,71 +50,6 @@ void (*vbr[]) (void) __attribute__ ((section (".vectors"))) = | |||
50 | 50 | ||
51 | /** All subsequent functions are executed from IRAM **/ | 51 | /** All subsequent functions are executed from IRAM **/ |
52 | 52 | ||
53 | /* Thinned out version of the UCL 2e decompression sourcecode | ||
54 | * Original (C) Markus F.X.J Oberhumer under GNU GPL license */ | ||
55 | #define GETBIT(bb, src, ilen) \ | ||
56 | (((bb = bb & 0x7f ? bb*2 : ((unsigned)src[ilen++]*2+1)) >> 8) & 1) | ||
57 | |||
58 | static int ucl_nrv2e_decompress_8(const unsigned char *src, unsigned char *dst, | ||
59 | unsigned long *dst_len) | ||
60 | { | ||
61 | unsigned long bb = 0; | ||
62 | unsigned ilen = 0, olen = 0, last_m_off = 1; | ||
63 | |||
64 | for (;;) | ||
65 | { | ||
66 | unsigned m_off, m_len; | ||
67 | |||
68 | while (GETBIT(bb,src,ilen)) | ||
69 | dst[olen++] = src[ilen++]; | ||
70 | |||
71 | m_off = 1; | ||
72 | for (;;) | ||
73 | { | ||
74 | m_off = m_off*2 + GETBIT(bb,src,ilen); | ||
75 | if (GETBIT(bb,src,ilen)) | ||
76 | break; | ||
77 | m_off = (m_off-1)*2 + GETBIT(bb,src,ilen); | ||
78 | } | ||
79 | if (m_off == 2) | ||
80 | { | ||
81 | m_off = last_m_off; | ||
82 | m_len = GETBIT(bb,src,ilen); | ||
83 | } | ||
84 | else | ||
85 | { | ||
86 | m_off = (m_off-3)*256 + src[ilen++]; | ||
87 | if (m_off == 0xffffffff) | ||
88 | break; | ||
89 | m_len = (m_off ^ 0xffffffff) & 1; | ||
90 | m_off >>= 1; | ||
91 | last_m_off = ++m_off; | ||
92 | } | ||
93 | if (m_len) | ||
94 | m_len = 1 + GETBIT(bb,src,ilen); | ||
95 | else if (GETBIT(bb,src,ilen)) | ||
96 | m_len = 3 + GETBIT(bb,src,ilen); | ||
97 | else | ||
98 | { | ||
99 | m_len++; | ||
100 | do { | ||
101 | m_len = m_len*2 + GETBIT(bb,src,ilen); | ||
102 | } while (!GETBIT(bb,src,ilen)); | ||
103 | m_len += 3; | ||
104 | } | ||
105 | m_len += (m_off > 0x500); | ||
106 | { | ||
107 | const unsigned char *m_pos; | ||
108 | m_pos = dst + olen - m_off; | ||
109 | dst[olen++] = *m_pos++; | ||
110 | do dst[olen++] = *m_pos++; while (--m_len > 0); | ||
111 | } | ||
112 | } | ||
113 | *dst_len = olen; | ||
114 | |||
115 | return ilen; | ||
116 | } | ||
117 | |||
118 | #define ALIGNED_IMG_SIZE ((sizeof(image) + 3) & ~3) | 53 | #define ALIGNED_IMG_SIZE ((sizeof(image) + 3) & ~3) |
119 | /* This will never return */ | 54 | /* This will never return */ |
120 | void main(void) | 55 | void main(void) |
diff --git a/firmware/decompressor/sh_nrv2e_d8.S b/firmware/decompressor/sh_nrv2e_d8.S new file mode 100644 index 0000000000..c002911c0c --- /dev/null +++ b/firmware/decompressor/sh_nrv2e_d8.S | |||
@@ -0,0 +1,155 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2008 by Jens Arnold | ||
11 | * | ||
12 | * based on arm_nrv2e_d8.S -- ARM decompressor for NRV2E | ||
13 | * Copyright (C) 1996-2008 Markus Franz Xaver Johannes Oberhumer | ||
14 | * Copyright (C) 1996-2008 Laszlo Molnar | ||
15 | * Copyright (C) 2000-2008 John F. Reiser | ||
16 | * | ||
17 | * This program is free software; you can redistribute it and/or | ||
18 | * modify it under the terms of the GNU General Public License | ||
19 | * as published by the Free Software Foundation; either version 2 | ||
20 | * of the License, or (at your option) any later version. | ||
21 | * | ||
22 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
23 | * KIND, either express or implied. | ||
24 | * | ||
25 | ****************************************************************************/ | ||
26 | |||
27 | #define src r4 | ||
28 | #define dst r5 | ||
29 | #define len r6 /* overlaps 'cnt' */ | ||
30 | #define cnt r6 /* overlaps 'len' while reading an offset */ | ||
31 | #define tmp r7 | ||
32 | |||
33 | #define off r0 /* must be r0 because of indexed addressing */ | ||
34 | #define bits r1 | ||
35 | #define bitmask r2 | ||
36 | #define wrnk r3 /* -0x500 -M2_MAX_OFFSET before "wrinkle" */ | ||
37 | |||
38 | |||
39 | #define GETBIT \ | ||
40 | tst bits, bitmask; \ | ||
41 | bf 1f; \ | ||
42 | bsr get1_n2e; \ | ||
43 | 1: \ | ||
44 | shll bits /* using the delay slot on purpose */ | ||
45 | |||
46 | #define getnextb(reg) GETBIT; rotcl reg | ||
47 | #define jnextb0 GETBIT; bf | ||
48 | #define jnextb1 GETBIT; bt | ||
49 | |||
50 | .section .icode,"ax",@progbits | ||
51 | .align 2 | ||
52 | .global _ucl_nrv2e_decompress_8 | ||
53 | .type _ucl_nrv2e_decompress_8,@function | ||
54 | |||
55 | /* src_len = ucl_nrv2e_decompress_8(const unsigned char *src, | ||
56 | * unsigned char *dst, | ||
57 | * unsigned long *dst_len) | ||
58 | */ | ||
59 | |||
60 | _ucl_nrv2e_decompress_8: | ||
61 | sts.l pr, @-r15 | ||
62 | mov #-1, off ! off = -1 initial condition | ||
63 | mov.l r6, @-r15 | ||
64 | mov #-5, wrnk | ||
65 | mov.l r5, @-r15 | ||
66 | shll8 wrnk ! nrv2e -M2_MAX_OFFSET | ||
67 | mov.l r4, @-r15 | ||
68 | mov #-1, bitmask | ||
69 | shlr bitmask ! 0x7fffffff for testing before shifting | ||
70 | bra top_n2e | ||
71 | not bitmask, bits ! refill next time (MSB must be set) | ||
72 | |||
73 | eof_n2e: | ||
74 | mov.l @r15+, r0 ! r0 = orig_src | ||
75 | mov.l @r15+, r1 ! r1 = orig_dst | ||
76 | sub r0, src | ||
77 | mov.l @r15+, r2 ! r2 = plen_dst | ||
78 | sub r1, dst | ||
79 | mov.l dst, @r2 | ||
80 | lds.l @r15+, pr | ||
81 | rts | ||
82 | mov src, r0 | ||
83 | |||
84 | .align 2 | ||
85 | get1_n2e: ! in: T bit set | ||
86 | mov.b @src+, bits ! SH1 sign-extends on load | ||
87 | rotcl bits ! LSB = T, T = MSB | ||
88 | shll16 bits | ||
89 | rts | ||
90 | shll8 bits | ||
91 | |||
92 | .align 2 | ||
93 | lit_n2e: | ||
94 | mov.b @src, tmp | ||
95 | add #1, src ! Need to fill the pipeline latency anyway | ||
96 | mov.b tmp, @dst | ||
97 | add #1, dst | ||
98 | top_n2e: | ||
99 | jnextb1 lit_n2e | ||
100 | bra getoff_n2e | ||
101 | mov #1, cnt | ||
102 | |||
103 | off_n2e: | ||
104 | add #-1, cnt | ||
105 | getnextb(cnt) | ||
106 | getoff_n2e: | ||
107 | getnextb(cnt) | ||
108 | jnextb0 off_n2e | ||
109 | |||
110 | mov cnt, tmp | ||
111 | mov #0, len ! cnt and len share a reg! | ||
112 | add #-3, tmp | ||
113 | cmp/pz tmp | ||
114 | bf offprev_n2e ! cnt was 2 | ||
115 | mov.b @src+, off ! low 7+1 bits | ||
116 | shll8 tmp | ||
117 | extu.b off, off | ||
118 | or tmp, off | ||
119 | not off, off ! off = ~off | ||
120 | tst off, off | ||
121 | bt eof_n2e | ||
122 | shar off | ||
123 | bt lenlast_n2e | ||
124 | bra lenmore_n2e | ||
125 | mov #1, len | ||
126 | |||
127 | offprev_n2e: | ||
128 | jnextb1 lenlast_n2e | ||
129 | mov #1, len | ||
130 | lenmore_n2e: | ||
131 | jnextb1 lenlast_n2e | ||
132 | len_n2e: | ||
133 | getnextb(len) | ||
134 | jnextb0 len_n2e | ||
135 | bra gotlen_n2e | ||
136 | add #6-2, len | ||
137 | |||
138 | lenlast_n2e: | ||
139 | getnextb(len) ! 0,1,2,3 | ||
140 | add #2, len | ||
141 | gotlen_n2e: | ||
142 | cmp/gt off, wrnk | ||
143 | movt tmp ! too far away, so minimum match length is 3 | ||
144 | add tmp, len | ||
145 | copy_n2e: | ||
146 | add #-1, len | ||
147 | mov.b @(off,dst), tmp | ||
148 | tst len, len | ||
149 | mov.b tmp, @dst | ||
150 | add #1, dst | ||
151 | bf copy_n2e | ||
152 | bra top_n2e | ||
153 | nop | ||
154 | |||
155 | .size ucl_nrv2e_decompress_8, .-ucl_nrv2e_decompress_8 | ||