diff options
Diffstat (limited to 'lib/microtar/README.md')
-rw-r--r-- | lib/microtar/README.md | 312 |
1 files changed, 226 insertions, 86 deletions
diff --git a/lib/microtar/README.md b/lib/microtar/README.md index 18153caa21..109626cdd4 100644 --- a/lib/microtar/README.md +++ b/lib/microtar/README.md | |||
@@ -1,128 +1,268 @@ | |||
1 | # microtar | 1 | # microtar |
2 | A lightweight tar library written in ANSI C | ||
3 | 2 | ||
3 | A lightweight tar library written in ANSI C. | ||
4 | 4 | ||
5 | ## Modifications from upstream | 5 | This version is a fork of [rxi's microtar](https://github.com/rxi/microtar) |
6 | with bugfixes and API changes aimed at improving usability, but still keeping | ||
7 | with the minimal design of the original library. | ||
6 | 8 | ||
7 | [Upstream](https://github.com/rxi/microtar) has numerous bugs and gotchas, | 9 | ## License |
8 | which I fixed in order to improve the overall robustness of the library. | 10 | |
11 | This library is free software; you can redistribute it and/or modify it under | ||
12 | the terms of the MIT license. See [LICENSE](LICENSE) for details. | ||
9 | 13 | ||
10 | A summary of my changes, in no particular order: | ||
11 | 14 | ||
12 | - Fix possible sscanf beyond the bounds of the input buffer | 15 | ## Supported format variants |
13 | - Fix possible buffer overruns due to strcpy on untrusted input | ||
14 | - Fix incorrect octal formatting by sprintf and possible output overrruns | ||
15 | - Catch read/writes which are too big and handle them gracefully | ||
16 | - Handle over-long names in `mtar_write_file_header` / `mtar_write_dir_header` | ||
17 | - Ensure strings in `mtar_header_t` are always null-terminated | ||
18 | - Save and load group information so we don't lose information | ||
19 | - Move `mtar_open()` to `microtar-stdio.c` so `microtar.c` can be used in | ||
20 | a freestanding environment | ||
21 | - Allow control of stack usage by moving temporary variables into `mtar_t`, | ||
22 | so the caller can decide whether to use the stack or heap | ||
23 | 16 | ||
24 | An up-to-date copy of this modified version can be found | 17 | No effort has been put into handling every tar format variant. Basically |
25 | [here](https://github.com/amachronic/microtar). | 18 | what is accepted is the "old-style" format, which appears to work well |
19 | enough to access basic archives created by GNU `tar`. | ||
26 | 20 | ||
27 | 21 | ||
28 | ## Modifications for Rockbox | 22 | ## Basic usage |
29 | 23 | ||
30 | Added file `microtar-rockbox.c` implementing `mtar_open()` with native | 24 | The library consists of two files, `microtar.c` and `microtar.h`, which only |
31 | Rockbox filesystem API. | 25 | depend on a tiny part of the standard C library & can be easily incorporated |
26 | into a host project's build system. | ||
32 | 27 | ||
28 | The core library does not include any I/O hooks as these are supposed to be | ||
29 | provided by the host application. If the C library's `fopen` and friends is | ||
30 | good enough, you can use `microtar-stdio.c`. | ||
33 | 31 | ||
34 | ## Basic Usage | ||
35 | The library consists of `microtar.c` and `microtar.h`. These two files can be | ||
36 | dropped into an existing project and compiled along with it. | ||
37 | 32 | ||
33 | ### Initialization | ||
34 | |||
35 | Initialization is very simple. Everything the library needs is contained in | ||
36 | the `mtar_t` struct; there is no memory allocation and no global state. It is | ||
37 | enough to zero-initialize an `mtar_t` object to put it into a "closed" state. | ||
38 | You can use `mtar_is_open()` to query whether the archive is open or not. | ||
39 | |||
40 | An archive can be opened for reading _or_ writing, but not both. You have to | ||
41 | specify which access mode you're using when you create the archive. | ||
38 | 42 | ||
39 | #### Reading | ||
40 | ```c | 43 | ```c |
41 | mtar_t tar; | 44 | mtar_t tar; |
42 | mtar_header_t h; | 45 | mtar_init(&tar, MTAR_READ, my_io_ops, my_stream); |
43 | char *p; | 46 | ``` |
44 | 47 | ||
45 | /* Open archive for reading */ | 48 | Or if using `microtar-stdio.c`: |
46 | mtar_open(&tar, "test.tar", "r"); | ||
47 | 49 | ||
48 | /* Print all file names and sizes */ | 50 | ```c |
49 | while ( (mtar_read_header(&tar, &h)) != MTAR_ENULLRECORD ) { | 51 | int error = mtar_open(&tar, "file.tar", "rb"); |
50 | printf("%s (%d bytes)\n", h.name, h.size); | 52 | if(error) { |
51 | mtar_next(&tar); | 53 | /* do something about it */ |
52 | } | 54 | } |
55 | ``` | ||
53 | 56 | ||
54 | /* Load and print contents of file "test.txt" */ | 57 | Note that `mtar_init()` is called for you in this case and the access mode is |
55 | mtar_find(&tar, "test.txt", &h); | 58 | deduced from the mode flags. |
56 | p = calloc(1, h.size + 1); | ||
57 | mtar_read_data(&tar, p, h.size); | ||
58 | printf("%s", p); | ||
59 | free(p); | ||
60 | 59 | ||
61 | /* Close archive */ | ||
62 | mtar_close(&tar); | ||
63 | ``` | ||
64 | 60 | ||
65 | #### Writing | 61 | ### Iterating and locating files |
62 | |||
63 | If you opened an archive for reading, you'll likely want to iterate over | ||
64 | all the files. Here's the long way of doing it: | ||
65 | |||
66 | ```c | 66 | ```c |
67 | mtar_t tar; | 67 | mtar_t tar; |
68 | const char *str1 = "Hello world"; | 68 | int err; |
69 | const char *str2 = "Goodbye world"; | 69 | |
70 | /* Go to the start of the archive... Not necessary if you've | ||
71 | * just opened the archive and are already at the beginning. | ||
72 | * (And of course you normally want to check the return value.) */ | ||
73 | mtar_rewind(&tar); | ||
70 | 74 | ||
71 | /* Open archive for writing */ | 75 | /* Iterate over the archive members */ |
72 | mtar_open(&tar, "test.tar", "w"); | 76 | while((err = mtar_next(&tar)) == MTAR_ESUCCESS) { |
77 | /* Get a pointer to the current file header. It will | ||
78 | * remain valid until you move to another record with | ||
79 | * mtar_next() or call mtar_rewind() */ | ||
80 | const mtar_header_t* header = mtar_get_header(&tar); | ||
73 | 81 | ||
74 | /* Write strings to files `test1.txt` and `test2.txt` */ | 82 | printf("%s (%d bytes)\n", header->name, header->size); |
75 | mtar_write_file_header(&tar, "test1.txt", strlen(str1)); | 83 | } |
76 | mtar_write_data(&tar, str1, strlen(str1)); | 84 | |
77 | mtar_write_file_header(&tar, "test2.txt", strlen(str2)); | 85 | if(err != MTAR_ENULLRECORD) { |
78 | mtar_write_data(&tar, str2, strlen(str2)); | 86 | /* ENULLRECORD means we hit end of file; any |
87 | * other return value is an actual error. */ | ||
88 | } | ||
89 | ``` | ||
79 | 90 | ||
80 | /* Finalize -- this needs to be the last thing done before closing */ | 91 | There's a useful shortcut for this type of iteration which removes the |
81 | mtar_finalize(&tar); | 92 | loop boilerplate, replacing it with another kind of boilerplate that may |
93 | be more palatable in some cases. | ||
94 | |||
95 | ```c | ||
96 | /* Will be called for each archive member visited by mtar_foreach(). | ||
97 | * The member's header is passed in as an argument so you don't need | ||
98 | * to fetch it manually with mtar_get_header(). You can freely read | ||
99 | * data (if present) and seek around. There is no special cleanup | ||
100 | * required and it is not necessary to read to the end of the stream. | ||
101 | * | ||
102 | * The callback should return zero (= MTAR_SUCCESS) to continue the | ||
103 | * iteration or return nonzero to abort. On abort, the value returned | ||
104 | * by the callback will be returned from mtar_foreach(). Since it may | ||
105 | * also return normal microtar error codes, it is suggested to use a | ||
106 | * positive value or pass the result via 'arg'. | ||
107 | */ | ||
108 | int foreach_cb(mtar_t* tar, const mtar_header_t* header, void* arg) | ||
109 | { | ||
110 | // ... | ||
111 | return 0; | ||
112 | } | ||
82 | 113 | ||
83 | /* Close archive */ | 114 | void main() |
84 | mtar_close(&tar); | 115 | { |
116 | mtar_t tar; | ||
117 | |||
118 | // ... | ||
119 | |||
120 | int ret = mtar_foreach(&tar, foreach_cb, NULL); | ||
121 | if(ret < 0) { | ||
122 | /* Microtar error codes are negative and may be returned if | ||
123 | * there is a problem with the iteration. */ | ||
124 | } else if(ret == MTAR_ESUCCESS) { | ||
125 | /* If the iteration reaches the end of the archive without | ||
126 | * errors, the return code is MTAR_ESUCCESS. */ | ||
127 | } else if(ret > 0) { | ||
128 | /* Positive values might be returned by the callback to | ||
129 | * signal some condition was met; they'll never be returned | ||
130 | * by microtar */ | ||
131 | } | ||
132 | } | ||
133 | ``` | ||
134 | |||
135 | The other thing you're likely to do is look for a specific file: | ||
136 | |||
137 | ```c | ||
138 | /* Seek to a specific member in the archive */ | ||
139 | int err = mtar_find(&tar, "foo.txt"); | ||
140 | if(err == MTAR_ESUCCESS) { | ||
141 | /* File was found -- read the header with mtar_get_header() */ | ||
142 | } else if(err == MTAR_ENOTFOUND) { | ||
143 | /* File wasn't in the archive */ | ||
144 | } else { | ||
145 | /* Some error occurred */ | ||
146 | } | ||
85 | ``` | 147 | ``` |
86 | 148 | ||
149 | Note this isn't terribly efficient since it scans the entire archive | ||
150 | looking for the file. | ||
151 | |||
152 | |||
153 | ### Reading file data | ||
154 | |||
155 | Once pointed at a file via `mtar_next()` or `mtar_find()` you can read the | ||
156 | data with a simple POSIX-like API. | ||
157 | |||
158 | - `mtar_read_data(tar, buf, count)` reads up to `count` bytes into `buf`, | ||
159 | returning the actual number of bytes read, or a negative error value. | ||
160 | If at EOF, this returns zero. | ||
161 | |||
162 | - `mtar_seek_data(tar, offset, whence)` works exactly like `fseek()` with | ||
163 | `whence` being one of `SEEK_SET`, `SEEK_CUR`, or `SEEK_END` and `offset` | ||
164 | indicating a point relative to the beginning, current position, or end | ||
165 | of the file. Returns zero on success, or a negative error code. | ||
166 | |||
167 | - `mtar_eof_data(tar)` returns nonzero if the end of the file has been | ||
168 | reached. It is possible to seek backward to clear this condition. | ||
169 | |||
170 | |||
171 | ### Writing archives | ||
172 | |||
173 | Microtar has limited support for creating archives. When an archive is opened | ||
174 | for writing, you can add new members using `mtar_write_header()`. | ||
175 | |||
176 | - `mtar_write_header(tar, header)` writes out the header for a new member. | ||
177 | The amount of data that follows is dictated by `header->size`, though if | ||
178 | the underlying stream supports seeking and re-writing data, this size can | ||
179 | be updated later with `mtar_update_header()` or `mtar_update_file_size()`. | ||
180 | |||
181 | - `mtar_update_header(tar, header)` will re-write the previously written | ||
182 | header. This may be used to change any header field. The underlying stream | ||
183 | must support seeking. On a successful return the stream will be returned | ||
184 | to the position it was at before the call. | ||
185 | |||
186 | File data can be written with `mtar_write_data()`, and if the underlying stream | ||
187 | supports seeking, you can seek with `mtar_seek_data()` and read back previously | ||
188 | written data with `mtar_read_data()`. Note that it is not possible to truncate | ||
189 | the file stream by any means. | ||
190 | |||
191 | - `mtar_write_data(tar, buf, count)` will write up to `count` bytes from | ||
192 | `buf` to the current member's data. Returns the number of bytes actually | ||
193 | written or a negative error code. | ||
194 | |||
195 | - `mtar_update_file_size(tar)` will update the header size to reflect the | ||
196 | actual amount of written data. This is intended to be called right before | ||
197 | `mtar_end_data()` if you are not declaring file sizes in advance. | ||
198 | |||
199 | - `mtar_end_data(tar)` will end the current member. It will complain if you | ||
200 | did not write the correct amount data provided in the header. This must be | ||
201 | called before writing the next header. | ||
202 | |||
203 | - `mtar_finalize(tar)` is called after you have written all members to the | ||
204 | archive. It writes out some null records which mark the end of the archive, | ||
205 | so you cannot write any more archive members after this. | ||
206 | |||
207 | Note that `mtar_close()` can fail if there was a problem flushing buffered | ||
208 | data to disk, so its return value should always be checked. | ||
209 | |||
87 | 210 | ||
88 | ## Error handling | 211 | ## Error handling |
89 | All functions which return an `int` will return `MTAR_ESUCCESS` if the operation | ||
90 | is successful. If an error occurs an error value less-than-zero will be | ||
91 | returned; this value can be passed to the function `mtar_strerror()` to get its | ||
92 | corresponding error string. | ||
93 | 212 | ||
213 | Most functions that return `int` return an error code from `enum mtar_error`. | ||
214 | Zero is success and all other error codes are negative. `mtar_strerror()` can | ||
215 | return a string describing the error code. | ||
94 | 216 | ||
95 | ## Wrapping a stream | 217 | A couple of functions use a different return value convention: |
96 | If you want to read or write from something other than a file, the `mtar_t` | ||
97 | struct can be manually initialized with your own callback functions and a | ||
98 | `stream` pointer. | ||
99 | 218 | ||
100 | All callback functions are passed a pointer to the `mtar_t` struct as their | 219 | - `mtar_foreach()` may error codes or an arbitrary nonzero value provided |
101 | first argument. They should return `MTAR_ESUCCESS` if the operation succeeds | 220 | by the callback. |
102 | without an error, or an integer below zero if an error occurs. | 221 | - `mtar_read_data()` and `mtar_write_data()` returns the number of bytes read |
222 | or written, or a negative error code. In particular zero means that no bytes | ||
223 | were read or written. | ||
224 | - `mtar_get_header()` may return `NULL` if there is no valid header. | ||
225 | It is only possible to see a null pointer if misusing the API or after | ||
226 | a previous error so checking for this is usually not necessary. | ||
103 | 227 | ||
104 | After the `stream` field has been set, all required callbacks have been set and | 228 | There is essentially no support for error recovery. After an error you can |
105 | all unused fields have been zeroset the `mtar_t` struct can be safely used with | 229 | only do two things reliably: close the archive with `mtar_close()` or try |
106 | the microtar functions. `mtar_open` *should not* be called if the `mtar_t` | 230 | rewinding to the beginning with `mtar_rewind()`. |
107 | struct was initialized manually. | ||
108 | 231 | ||
109 | #### Reading | ||
110 | The following callbacks should be set for reading an archive from a stream: | ||
111 | 232 | ||
112 | Name | Arguments | Description | 233 | ## I/O hooks |
113 | --------|------------------------------------------|--------------------------- | ||
114 | `read` | `mtar_t *tar, void *data, unsigned size` | Read data from the stream | ||
115 | `seek` | `mtar_t *tar, unsigned pos` | Set the position indicator | ||
116 | `close` | `mtar_t *tar` | Close the stream | ||
117 | 234 | ||
118 | #### Writing | 235 | You can provide your own I/O hooks in a `mtar_ops_t` struct. The same ops |
119 | The following callbacks should be set for writing an archive to a stream: | 236 | struct can be shared among multiple `mtar_t` objects but each object gets |
237 | its own `void* stream` pointer. | ||
120 | 238 | ||
121 | Name | Arguments | Description | 239 | Name | Arguments | Required |
122 | --------|------------------------------------------------|--------------------- | 240 | --------|-------------------------------------------|------------ |
123 | `write` | `mtar_t *tar, const void *data, unsigned size` | Write data to the stream | 241 | `read` | `void* stream, void* data, unsigned size` | If reading |
242 | `write` | `void* stream, void* data, unsigned size` | If writing | ||
243 | `seek` | `void* stream, unsigned pos` | If reading | ||
244 | `close` | `void* stream` | Always | ||
124 | 245 | ||
246 | `read` and `write` should transfer the number of bytes indicated | ||
247 | and return the number of bytes actually read or written, or a negative | ||
248 | `enum mtar_error` code on error. | ||
125 | 249 | ||
126 | ## License | 250 | `seek` must have semantics like `lseek(..., pos, SEEK_SET)`; that is, |
127 | This library is free software; you can redistribute it and/or modify it under | 251 | the position is an absolute byte offset in the stream. Seeking is not |
128 | the terms of the MIT license. See [LICENSE](LICENSE) for details. | 252 | optional for read support, but the library only performs backward |
253 | seeks under two circumstances: | ||
254 | |||
255 | - `mtar_rewind()` seeks to position 0. | ||
256 | - `mtar_seek_data()` may seek backward if the user requests it. | ||
257 | |||
258 | Therefore, you will be able to get away with a limited forward-only | ||
259 | seek function if you're able to read everything in a single pass use | ||
260 | the API carefully. Note `mtar_find()` and `mtar_foreach()` will call | ||
261 | `mtar_rewind()`. | ||
262 | |||
263 | `close` is called by `mtar_close()` to clean up the stream. Note the | ||
264 | library assumes that the stream handle is cleaned up by `close` even | ||
265 | if an error occurs. | ||
266 | |||
267 | `seek` and `close` should return an `enum mtar_error` code, either | ||
268 | `MTAR_SUCCESS`, or a negative value on error. | ||