From 2f4a5616c01c5e3f1cfb8cc1f085ceb1d45f60b8 Mon Sep 17 00:00:00 2001 From: Linus Nielsen Feltzing Date: Fri, 9 Jan 2004 00:47:26 +0000 Subject: The Unicode parser now handles unterminated strings, plus it tries to guess the byte order when the BOM is missing (a violation of the spec, btw) git-svn-id: svn://svn.rockbox.org/rockbox/trunk@4215 a1c6a512-1295-4272-9138-f99709370657 --- firmware/id3.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'firmware') diff --git a/firmware/id3.c b/firmware/id3.c index fc4e1c11ac..3a87fc49c5 100644 --- a/firmware/id3.c +++ b/firmware/id3.c @@ -260,6 +260,8 @@ static int unicode_munge(char** string, int *len) { int i; char *str = *string; char *outstr = *string; + bool bom = false; + int outlen; if(str[0] > 0x03) { /* Plain old string */ @@ -275,21 +277,37 @@ static int unicode_munge(char** string, int *len) { /* Unicode with or without BOM */ if(str[0] == 0x01 || str[0] == 0x02) { + (*len)--; str++; tmp = BYTES2INT(0, 0, str[0], str[1]); /* Now check if there is a BOM (zero-width non-breaking space, 0xfeff) and if it is in little or big endian format */ if(tmp == 0xfffe) { /* Little endian? */ + bom = true; le = true; str += 2; + (*len)-=2; } - if(tmp == 0xfeff) /* Big endian? */ + if(tmp == 0xfeff) { /* Big endian? */ + bom = true; str += 2; + (*len)-=2; + } + /* If there is no BOM (which is a specification violation), + let's try to guess it. If one of the bytes is 0x00, it is + probably the most significant one. */ + if(!bom) { + if(str[1] == 0) + le = true; + } + i = 0; + outlen = *len / 2; + do { if(le) { if(str[1]) @@ -303,9 +321,11 @@ static int unicode_munge(char** string, int *len) { outstr[i++] = str[1]; } str += 2; - } while(str[0] || str[1]); + } while((str[0] || str[1]) && (i < outlen)); *len = i; + + outstr[i] = 0; /* Terminate the string */ return 0; } -- cgit v1.2.3