diff options
author | Marcoen Hirschberg <marcoen@gmail.com> | 2006-03-10 11:13:21 +0000 |
---|---|---|
committer | Marcoen Hirschberg <marcoen@gmail.com> | 2006-03-10 11:13:21 +0000 |
commit | ea4dea67aee1081f4f090f0c763623c3aacf1f49 (patch) | |
tree | 4d247998501c07c105624bba73a21838380d1056 | |
parent | 16a292d65c0e392856115d2b19f82b32c48470c4 (diff) | |
download | rockbox-ea4dea67aee1081f4f090f0c763623c3aacf1f49.tar.gz rockbox-ea4dea67aee1081f4f090f0c763623c3aacf1f49.zip |
some unicode fixes by Frank Dischner
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8988 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r-- | firmware/common/unicode.c | 25 | ||||
-rw-r--r-- | firmware/include/rbunicode.h | 1 |
2 files changed, 12 insertions, 14 deletions
diff --git a/firmware/common/unicode.c b/firmware/common/unicode.c index e2e2dc2f3a..e04c7cf1e6 100644 --- a/firmware/common/unicode.c +++ b/firmware/common/unicode.c | |||
@@ -41,7 +41,7 @@ static const char *filename[NUM_TABLES] = | |||
41 | 41 | ||
42 | static const char cp_2_table[NUM_CODEPAGES] = | 42 | static const char cp_2_table[NUM_CODEPAGES] = |
43 | { | 43 | { |
44 | 0, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5 | 44 | 0, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 0 |
45 | }; | 45 | }; |
46 | 46 | ||
47 | /* Load codepage file into memory */ | 47 | /* Load codepage file into memory */ |
@@ -52,7 +52,7 @@ int load_cp_table(int cp) | |||
52 | int file, tablesize; | 52 | int file, tablesize; |
53 | unsigned char tmp[2]; | 53 | unsigned char tmp[2]; |
54 | 54 | ||
55 | if (cp == 0 || table == loaded_cp_table) | 55 | if (table == 0 || table == loaded_cp_table) |
56 | return 1; | 56 | return 1; |
57 | 57 | ||
58 | file = open(filename[table-1], O_RDONLY|O_BINARY); | 58 | file = open(filename[table-1], O_RDONLY|O_BINARY); |
@@ -109,12 +109,12 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, | |||
109 | unsigned short ucs, tmp; | 109 | unsigned short ucs, tmp; |
110 | 110 | ||
111 | if (cp == -1) /* use default codepage */ | 111 | if (cp == -1) /* use default codepage */ |
112 | cp = default_codepage; | 112 | cp = default_codepage; |
113 | 113 | ||
114 | if (!load_cp_table(cp)) cp = 0; | 114 | if (!load_cp_table(cp)) cp = 0; |
115 | 115 | ||
116 | while (count--) { | 116 | while (count--) { |
117 | if (*iso < 128) | 117 | if (*iso < 128 || cp == 0x0C) /* Already UTF-8 */ |
118 | *utf8++ = *iso++; | 118 | *utf8++ = *iso++; |
119 | 119 | ||
120 | else { | 120 | else { |
@@ -125,7 +125,7 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, | |||
125 | case 0x02: /* Hebrew (ISO-8859-8) */ | 125 | case 0x02: /* Hebrew (ISO-8859-8) */ |
126 | case 0x03: /* Russian (CP1251) */ | 126 | case 0x03: /* Russian (CP1251) */ |
127 | case 0x04: /* Thai (ISO-8859-11) */ | 127 | case 0x04: /* Thai (ISO-8859-11) */ |
128 | case 0x05: /* Arabic (ISO-8859-6) */ | 128 | case 0x05: /* Arabic (CP1256) */ |
129 | case 0x06: /* Turkish (ISO-8859-9) */ | 129 | case 0x06: /* Turkish (ISO-8859-9) */ |
130 | case 0x07: /* Latin Extended (ISO-8859-2) */ | 130 | case 0x07: /* Latin Extended (ISO-8859-2) */ |
131 | tmp = ((cp-1)*128) + (*iso++ - 128); | 131 | tmp = ((cp-1)*128) + (*iso++ - 128); |
@@ -134,7 +134,7 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, | |||
134 | 134 | ||
135 | case 0x08: /* Japanese (SJIS) */ | 135 | case 0x08: /* Japanese (SJIS) */ |
136 | if (*iso > 0xA0 && *iso < 0xE0) { | 136 | if (*iso > 0xA0 && *iso < 0xE0) { |
137 | tmp = *iso | 0xA100; | 137 | tmp = *iso++ | (0xA100 - 0x8000); |
138 | ucs = codepage_table[tmp]; | 138 | ucs = codepage_table[tmp]; |
139 | break; | 139 | break; |
140 | } | 140 | } |
@@ -156,14 +156,13 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, | |||
156 | count--; | 156 | count--; |
157 | break; | 157 | break; |
158 | 158 | ||
159 | case 0x0C: /* UTF-8, do nothing */ | ||
160 | default: | 159 | default: |
161 | ucs = *iso++; | 160 | ucs = *iso++; |
162 | break; | 161 | break; |
163 | } | 162 | } |
164 | 163 | ||
165 | if (ucs == 0) /* unknown char, assume invalid encoding */ | 164 | if (ucs == 0) /* unknown char, use replacement char */ |
166 | ucs = 0xffff; | 165 | ucs = 0xfffd; |
167 | utf8 = utf8encode(ucs, utf8); | 166 | utf8 = utf8encode(ucs, utf8); |
168 | } | 167 | } |
169 | } | 168 | } |
@@ -268,7 +267,7 @@ const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs) | |||
268 | code = c & 0x07; | 267 | code = c & 0x07; |
269 | } else { | 268 | } else { |
270 | /* Invalid size. */ | 269 | /* Invalid size. */ |
271 | code = 0xffff; | 270 | code = 0xfffd; |
272 | } | 271 | } |
273 | 272 | ||
274 | while (tail-- && ((c = *utf8++) != 0)) { | 273 | while (tail-- && ((c = *utf8++) != 0)) { |
@@ -278,17 +277,17 @@ const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs) | |||
278 | 277 | ||
279 | } else { | 278 | } else { |
280 | /* Invalid continuation char */ | 279 | /* Invalid continuation char */ |
281 | code = 0xffff; | 280 | code = 0xfffd; |
282 | utf8--; | 281 | utf8--; |
283 | break; | 282 | break; |
284 | } | 283 | } |
285 | } | 284 | } |
286 | } else { | 285 | } else { |
287 | /* Invalid UTF-8 char */ | 286 | /* Invalid UTF-8 char */ |
288 | code = 0xffff; | 287 | code = 0xfffd; |
289 | } | 288 | } |
290 | /* currently we don't support chars above U-FFFF */ | 289 | /* currently we don't support chars above U-FFFF */ |
291 | *ucs = (code < 0x10000) ? code : 0xffff; | 290 | *ucs = (code < 0x10000) ? code : 0xfffd; |
292 | return utf8; | 291 | return utf8; |
293 | } | 292 | } |
294 | 293 | ||
diff --git a/firmware/include/rbunicode.h b/firmware/include/rbunicode.h index 1d4bc43096..0e12890736 100644 --- a/firmware/include/rbunicode.h +++ b/firmware/include/rbunicode.h | |||
@@ -15,7 +15,6 @@ | |||
15 | #define MASK 0xC0 /* 11000000 */ | 15 | #define MASK 0xC0 /* 11000000 */ |
16 | #define COMP 0x80 /* 10x */ | 16 | #define COMP 0x80 /* 10x */ |
17 | 17 | ||
18 | extern int codepage; | ||
19 | 18 | ||
20 | /* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */ | 19 | /* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */ |
21 | unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8); | 20 | unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8); |