diff options
author | Linus Nielsen Feltzing <linus@haxx.se> | 2004-01-09 00:47:26 +0000 |
---|---|---|
committer | Linus Nielsen Feltzing <linus@haxx.se> | 2004-01-09 00:47:26 +0000 |
commit | 2f4a5616c01c5e3f1cfb8cc1f085ceb1d45f60b8 (patch) | |
tree | 4358ebb4bd007002efa981f0e4cdc99b979739ec /firmware | |
parent | 4b4776c362a26e43dcdbbb2aa168ca9bce54e1e9 (diff) | |
download | rockbox-2f4a5616c01c5e3f1cfb8cc1f085ceb1d45f60b8.tar.gz rockbox-2f4a5616c01c5e3f1cfb8cc1f085ceb1d45f60b8.zip |
The Unicode parser now handles unterminated strings, plus it tries to guess the byte order when the BOM is missing (a violation of the spec, btw)
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@4215 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware')
-rw-r--r-- | firmware/id3.c | 24 |
1 files changed, 22 insertions, 2 deletions
diff --git a/firmware/id3.c b/firmware/id3.c index fc4e1c11ac..3a87fc49c5 100644 --- a/firmware/id3.c +++ b/firmware/id3.c | |||
@@ -260,6 +260,8 @@ static int unicode_munge(char** string, int *len) { | |||
260 | int i; | 260 | int i; |
261 | char *str = *string; | 261 | char *str = *string; |
262 | char *outstr = *string; | 262 | char *outstr = *string; |
263 | bool bom = false; | ||
264 | int outlen; | ||
263 | 265 | ||
264 | if(str[0] > 0x03) { | 266 | if(str[0] > 0x03) { |
265 | /* Plain old string */ | 267 | /* Plain old string */ |
@@ -275,21 +277,37 @@ static int unicode_munge(char** string, int *len) { | |||
275 | 277 | ||
276 | /* Unicode with or without BOM */ | 278 | /* Unicode with or without BOM */ |
277 | if(str[0] == 0x01 || str[0] == 0x02) { | 279 | if(str[0] == 0x01 || str[0] == 0x02) { |
280 | (*len)--; | ||
278 | str++; | 281 | str++; |
279 | tmp = BYTES2INT(0, 0, str[0], str[1]); | 282 | tmp = BYTES2INT(0, 0, str[0], str[1]); |
280 | 283 | ||
281 | /* Now check if there is a BOM (zero-width non-breaking space, 0xfeff) | 284 | /* Now check if there is a BOM (zero-width non-breaking space, 0xfeff) |
282 | and if it is in little or big endian format */ | 285 | and if it is in little or big endian format */ |
283 | if(tmp == 0xfffe) { /* Little endian? */ | 286 | if(tmp == 0xfffe) { /* Little endian? */ |
287 | bom = true; | ||
284 | le = true; | 288 | le = true; |
285 | str += 2; | 289 | str += 2; |
290 | (*len)-=2; | ||
286 | } | 291 | } |
287 | 292 | ||
288 | if(tmp == 0xfeff) /* Big endian? */ | 293 | if(tmp == 0xfeff) { /* Big endian? */ |
294 | bom = true; | ||
289 | str += 2; | 295 | str += 2; |
296 | (*len)-=2; | ||
297 | } | ||
290 | 298 | ||
299 | /* If there is no BOM (which is a specification violation), | ||
300 | let's try to guess it. If one of the bytes is 0x00, it is | ||
301 | probably the most significant one. */ | ||
302 | if(!bom) { | ||
303 | if(str[1] == 0) | ||
304 | le = true; | ||
305 | } | ||
306 | |||
291 | i = 0; | 307 | i = 0; |
292 | 308 | ||
309 | outlen = *len / 2; | ||
310 | |||
293 | do { | 311 | do { |
294 | if(le) { | 312 | if(le) { |
295 | if(str[1]) | 313 | if(str[1]) |
@@ -303,9 +321,11 @@ static int unicode_munge(char** string, int *len) { | |||
303 | outstr[i++] = str[1]; | 321 | outstr[i++] = str[1]; |
304 | } | 322 | } |
305 | str += 2; | 323 | str += 2; |
306 | } while(str[0] || str[1]); | 324 | } while((str[0] || str[1]) && (i < outlen)); |
307 | 325 | ||
308 | *len = i; | 326 | *len = i; |
327 | |||
328 | outstr[i] = 0; /* Terminate the string */ | ||
309 | return 0; | 329 | return 0; |
310 | } | 330 | } |
311 | 331 | ||