some unicode fixes by Frank Dischner

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8988 a1c6a512-1295-4272-9138-f99709370657
author: Marcoen Hirschberg <marcoen@gmail.com> 2006-03-10 11:13:21 +0000
committer: Marcoen Hirschberg <marcoen@gmail.com> 2006-03-10 11:13:21 +0000
commit: ea4dea67aee1081f4f090f0c763623c3aacf1f49 (patch)
tree: 4d247998501c07c105624bba73a21838380d1056
parent: 16a292d65c0e392856115d2b19f82b32c48470c4 (diff)
download: rockbox-ea4dea67aee1081f4f090f0c763623c3aacf1f49.tar.gz
rockbox-ea4dea67aee1081f4f090f0c763623c3aacf1f49.zip
2 files changed, 12 insertions, 14 deletions
diff --git a/firmware/common/unicode.c b/firmware/common/unicode.c
index e2e2dc2f3a..e04c7cf1e6 100644
--- a/firmware/common/unicode.c
+++ b/firmware/common/unicode.c
@@ -41,7 +41,7 @@ static const char *filename[NUM_TABLES] =
 static const char cp_2_table[NUM_CODEPAGES] =
 {
-    0, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5
+    0, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 0
 };
 /* Load codepage file into memory */
@@ -52,7 +52,7 @@ int load_cp_table(int cp)
    int file, tablesize;
    unsigned char tmp[2];
-    if (cp == 0 || table == loaded_cp_table)
+    if (table == 0 || table == loaded_cp_table)
        return 1;
    file = open(filename[table-1], O_RDONLY|O_BINARY);
@@ -109,12 +109,12 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
    unsigned short ucs, tmp;
    if (cp == -1) /* use default codepage */
-       cp = default_codepage;
+        cp = default_codepage;
    if (!load_cp_table(cp)) cp = 0;
    while (count--) {
-        if (*iso < 128)
+        if (*iso < 128 || cp == 0x0C) /* Already UTF-8 */
            *utf8++ = *iso++;
        else {
@@ -125,7 +125,7 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
                case 0x02: /* Hebrew (ISO-8859-8) */
                case 0x03: /* Russian (CP1251) */
                case 0x04: /* Thai (ISO-8859-11) */
-                case 0x05: /* Arabic (ISO-8859-6) */
+                case 0x05: /* Arabic (CP1256) */
                case 0x06: /* Turkish (ISO-8859-9) */
                case 0x07: /* Latin Extended (ISO-8859-2) */
                    tmp = ((cp-1)*128) + (*iso++ - 128);
@@ -134,7 +134,7 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
                case 0x08: /* Japanese (SJIS) */
                    if (*iso > 0xA0 && *iso < 0xE0) {
-                        tmp = *iso | 0xA100;
+                        tmp = *iso++ | (0xA100 - 0x8000);
                        ucs = codepage_table[tmp];
                        break;
                    }
@@ -156,14 +156,13 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
                    count--;
                    break;
-                case 0x0C: /* UTF-8, do nothing */
                default:
                    ucs = *iso++;
                    break;
            }
-            if (ucs == 0) /* unknown char, assume invalid encoding */
+            if (ucs == 0) /* unknown char, use replacement char */
-                ucs = 0xffff;
+                ucs = 0xfffd;
            utf8 = utf8encode(ucs, utf8);
        }
    }
@@ -268,7 +267,7 @@ const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs)
            code = c & 0x07;
        } else {
            /* Invalid size. */
-            code = 0xffff;
+            code = 0xfffd;
        }
        while (tail-- && ((c = *utf8++) != 0)) {
@@ -278,17 +277,17 @@ const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs)
            } else {
                /* Invalid continuation char */
-                code = 0xffff;
+                code = 0xfffd;
                utf8--;
                break;
            }
        }
    } else {
        /* Invalid UTF-8 char */
-        code = 0xffff;
+        code = 0xfffd;
    }
    /* currently we don't support chars above U-FFFF */
-    *ucs = (code < 0x10000) ? code : 0xffff;
+    *ucs = (code < 0x10000) ? code : 0xfffd;
    return utf8;
 }
diff --git a/firmware/include/rbunicode.h b/firmware/include/rbunicode.h
index 1d4bc43096..0e12890736 100644
--- a/firmware/include/rbunicode.h
+++ b/firmware/include/rbunicode.h
@@ -15,7 +15,6 @@
 #define MASK   0xC0 /* 11000000 */
 #define COMP   0x80 /* 10x      */
-extern int codepage;
 /* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
 unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8);
author	Marcoen Hirschberg <marcoen@gmail.com>	2006-03-10 11:13:21 +0000
committer	Marcoen Hirschberg <marcoen@gmail.com>	2006-03-10 11:13:21 +0000
commit	ea4dea67aee1081f4f090f0c763623c3aacf1f49 (patch)
tree	4d247998501c07c105624bba73a21838380d1056
parent	16a292d65c0e392856115d2b19f82b32c48470c4 (diff)
download	rockbox-ea4dea67aee1081f4f090f0c763623c3aacf1f49.tar.gz rockbox-ea4dea67aee1081f4f090f0c763623c3aacf1f49.zip