From 2f4a5616c01c5e3f1cfb8cc1f085ceb1d45f60b8 Mon Sep 17 00:00:00 2001
From: Linus Nielsen Feltzing <linus@haxx.se>
Date: Fri, 9 Jan 2004 00:47:26 +0000
Subject: The Unicode parser now handles unterminated strings, plus it tries to
 guess the byte order when the BOM is missing (a violation of the spec, btw)

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@4215 a1c6a512-1295-4272-9138-f99709370657
---
 firmware/id3.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

(limited to 'firmware')

diff --git a/firmware/id3.c b/firmware/id3.c
index fc4e1c11ac..3a87fc49c5 100644
--- a/firmware/id3.c
+++ b/firmware/id3.c
@@ -260,6 +260,8 @@ static int unicode_munge(char** string, int *len) {
    int i;
    char *str = *string;
    char *outstr = *string;
+   bool bom = false;
+   int outlen;
 
    if(str[0] > 0x03) {
       /* Plain old string */
@@ -275,21 +277,37 @@ static int unicode_munge(char** string, int *len) {
 
    /* Unicode with or without BOM */
    if(str[0] == 0x01 || str[0] == 0x02) {
+      (*len)--;
       str++;
       tmp = BYTES2INT(0, 0, str[0], str[1]);
 
       /* Now check if there is a BOM (zero-width non-breaking space, 0xfeff)
          and if it is in little or big endian format */
       if(tmp == 0xfffe) { /* Little endian? */
+	 bom = true;
          le = true;
          str += 2;
+	 (*len)-=2;
       }
 
-      if(tmp == 0xfeff) /* Big endian? */
+      if(tmp == 0xfeff) { /* Big endian? */
+	 bom = true;
          str += 2;
+	 (*len)-=2;
+      }
 
+      /* If there is no BOM (which is a specification violation),
+	 let's try to guess it. If one of the bytes is 0x00, it is
+	 probably the most significant one. */
+      if(!bom) {
+	 if(str[1] == 0)
+	    le = true;
+      }
+      
       i = 0;
 
+      outlen = *len / 2;
+      
       do {
          if(le) {
             if(str[1])
@@ -303,9 +321,11 @@ static int unicode_munge(char** string, int *len) {
                outstr[i++] = str[1];
          }
          str += 2;
-      } while(str[0] || str[1]);
+      } while((str[0] || str[1]) && (i < outlen));
 
       *len = i;
+
+      outstr[i] = 0; /* Terminate the string */
       return 0;
    }
 
-- 
cgit v1.2.3