diff options
Diffstat (limited to 'firmware/common/unicode.c')
-rw-r--r-- | firmware/common/unicode.c | 451 |
1 files changed, 313 insertions, 138 deletions
diff --git a/firmware/common/unicode.c b/firmware/common/unicode.c index 3ff1814c4b..954ad47e1d 100644 --- a/firmware/common/unicode.c +++ b/firmware/common/unicode.c | |||
@@ -28,161 +28,227 @@ | |||
28 | 28 | ||
29 | #include <stdio.h> | 29 | #include <stdio.h> |
30 | #include "config.h" | 30 | #include "config.h" |
31 | #include "system.h" | ||
32 | #include "thread.h" | ||
31 | #include "file.h" | 33 | #include "file.h" |
32 | #include "debug.h" | 34 | #include "debug.h" |
33 | #include "rbunicode.h" | 35 | #include "rbunicode.h" |
34 | #include "rbpaths.h" | 36 | #include "rbpaths.h" |
37 | #include "pathfuncs.h" | ||
38 | #include "core_alloc.h" | ||
35 | 39 | ||
36 | #ifndef O_BINARY | 40 | #ifndef O_BINARY |
37 | #define O_BINARY 0 | 41 | #define O_BINARY 0 |
38 | #endif | 42 | #endif |
43 | #ifndef O_NOISODECODE | ||
44 | #define O_NOISODECODE 0 | ||
45 | #endif | ||
39 | 46 | ||
40 | static int default_codepage = 0; | 47 | #define getle16(p) (p[0] | (p[1] >> 8)) |
41 | static int loaded_cp_table = 0; | 48 | #define getbe16(p) ((p[1] << 8) | p[0]) |
42 | |||
43 | #ifdef HAVE_LCD_BITMAP | ||
44 | 49 | ||
45 | #define MAX_CP_TABLE_SIZE 32768 | 50 | #if !defined (__PCTOOL__) && (CONFIG_PLATFORM & PLATFORM_NATIVE) |
46 | #define NUM_TABLES 5 | 51 | /* Because file scanning uses the default CP table when matching entries, |
52 | on-demand loading is not feasible; we also must use the filesystem lock */ | ||
53 | #include "file_internal.h" | ||
54 | #else /* APPLICATION */ | ||
55 | #ifdef __PCTOOL__ | ||
56 | #define yield() | ||
57 | #endif | ||
58 | #define open_noiso_internal open | ||
59 | #endif /* !APPLICATION */ | ||
60 | |||
61 | #if 0 /* not needed just now (will probably end up a spinlock) */ | ||
62 | #include "mutex.h" | ||
63 | static struct mutex cp_mutex SHAREDBSS_ATTR; | ||
64 | #define cp_lock_init() mutex_init(&cp_mutex) | ||
65 | #define cp_lock_enter() mutex_lock(&cp_mutex) | ||
66 | #define cp_lock_leave() mutex_unlock(&cp_mutex) | ||
67 | #else | ||
68 | #define cp_lock_init() do {} while (0) | ||
69 | #define cp_lock_enter() asm volatile ("") | ||
70 | #define cp_lock_leave() asm volatile ("") | ||
71 | #endif | ||
47 | 72 | ||
48 | static const char * const filename[NUM_TABLES] = | 73 | enum cp_tid |
49 | { | 74 | { |
50 | CODEPAGE_DIR"/iso.cp", | 75 | CP_TID_NONE = -1, |
51 | CODEPAGE_DIR"/932.cp", /* SJIS */ | 76 | CP_TID_ISO, |
52 | CODEPAGE_DIR"/936.cp", /* GB2312 */ | 77 | CP_TID_932, |
53 | CODEPAGE_DIR"/949.cp", /* KSX1001 */ | 78 | CP_TID_936, |
54 | CODEPAGE_DIR"/950.cp" /* BIG5 */ | 79 | CP_TID_949, |
80 | CP_TID_950, | ||
55 | }; | 81 | }; |
56 | 82 | ||
57 | static const char cp_2_table[NUM_CODEPAGES] = | 83 | struct cp_info |
58 | { | 84 | { |
59 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 0 | 85 | int8_t tid; |
86 | const char *filename; | ||
87 | const char *name; | ||
60 | }; | 88 | }; |
61 | 89 | ||
62 | static const char * const name_codepages[NUM_CODEPAGES+1] = | 90 | #ifdef HAVE_LCD_BITMAP |
63 | { | ||
64 | "ISO-8859-1", | ||
65 | "ISO-8859-7", | ||
66 | "ISO-8859-8", | ||
67 | "CP1251", | ||
68 | "ISO-8859-11", | ||
69 | "CP1256", | ||
70 | "ISO-8859-9", | ||
71 | "ISO-8859-2", | ||
72 | "CP1250", | ||
73 | "CP1252", | ||
74 | "SJIS", | ||
75 | "GB-2312", | ||
76 | "KSX-1001", | ||
77 | "BIG5", | ||
78 | "UTF-8", | ||
79 | "unknown" | ||
80 | }; | ||
81 | 91 | ||
82 | #if defined(APPLICATION) && defined(__linux__) | 92 | #define MAX_CP_TABLE_SIZE 32768 |
83 | static const char * const name_codepages_linux[NUM_CODEPAGES+1] = | ||
84 | { | ||
85 | /* "ISO-8859-1" */ "iso8859-1", | ||
86 | /* "ISO-8859-7" */ "iso8859-7", | ||
87 | /* "ISO-8859-8" */ "iso8859-8", | ||
88 | /* "CP1251" */ "cp1251", | ||
89 | /* "ISO-8859-11"*/ "iso8859-11", | ||
90 | /* "CP1256" */ "cp1256", | ||
91 | /* "ISO-8859-9" */ "iso8859-9", | ||
92 | /* "ISO-8859-2" */ "iso8859-2", | ||
93 | /* "CP1250" */ "cp1250", | ||
94 | /* "CP1252" */ "iso8859-15", /* closest, linux doesnt have a codepage named cp1252 */ | ||
95 | /* "SJIS" */ "cp932", | ||
96 | /* "GB-2312" */ "cp936", | ||
97 | /* "KSX-1001" */ "cp949", | ||
98 | /* "BIG5" */ "cp950", | ||
99 | /* "UTF-8" */ "utf8", | ||
100 | /* "unknown" */ "cp437" | ||
101 | }; | ||
102 | 93 | ||
103 | const char *get_current_codepage_name_linux(void) | 94 | #define CPF_ISO "iso.cp" |
95 | #define CPF_932 "932.cp" /* SJIS */ | ||
96 | #define CPF_936 "936.cp" /* GB2312 */ | ||
97 | #define CPF_949 "949.cp" /* KSX1001 */ | ||
98 | #define CPF_950 "950.cp" /* BIG5 */ | ||
99 | |||
100 | static const struct cp_info cp_info[NUM_CODEPAGES+1] = | ||
104 | { | 101 | { |
105 | if (default_codepage < 0 || default_codepage >= NUM_CODEPAGES) | 102 | [0 ... NUM_CODEPAGES] = { CP_TID_NONE, NULL , "unknown" }, |
106 | return name_codepages_linux[NUM_CODEPAGES]; | 103 | [ISO_8859_1] = { CP_TID_NONE, NULL , "ISO-8859-1" }, |
107 | return name_codepages_linux[default_codepage]; | 104 | [ISO_8859_7] = { CP_TID_ISO , CPF_ISO, "ISO-8859-7" }, |
108 | } | 105 | [ISO_8859_8] = { CP_TID_ISO , CPF_ISO, "ISO-8859-8" }, |
109 | #endif | 106 | [WIN_1251] = { CP_TID_ISO , CPF_ISO, "CP1251" }, |
107 | [ISO_8859_11] = { CP_TID_ISO , CPF_ISO, "ISO-8859-11" }, | ||
108 | [WIN_1256] = { CP_TID_ISO , CPF_ISO, "CP1256" }, | ||
109 | [ISO_8859_9] = { CP_TID_ISO , CPF_ISO, "ISO-8859-9" }, | ||
110 | [ISO_8859_2] = { CP_TID_ISO , CPF_ISO, "ISO-8859-2" }, | ||
111 | [WIN_1250] = { CP_TID_ISO , CPF_ISO, "CP1250" }, | ||
112 | [WIN_1252] = { CP_TID_ISO , CPF_ISO, "CP1252" }, | ||
113 | [SJIS] = { CP_TID_932 , CPF_932, "SJIS" }, | ||
114 | [GB_2312] = { CP_TID_936 , CPF_936, "GB-2312" }, | ||
115 | [KSX_1001] = { CP_TID_949 , CPF_949, "KSX-1001" }, | ||
116 | [BIG_5] = { CP_TID_950 , CPF_950, "BIG5" }, | ||
117 | [UTF_8] = { CP_TID_NONE, NULL , "UTF-8" }, | ||
118 | }; | ||
110 | 119 | ||
111 | #else /* !HAVE_LCD_BITMAP, reduced support */ | 120 | #else /* !HAVE_LCD_BITMAP, reduced support */ |
112 | 121 | ||
113 | #define MAX_CP_TABLE_SIZE 768 | 122 | #define MAX_CP_TABLE_SIZE 768 |
114 | #define NUM_TABLES 1 | ||
115 | 123 | ||
116 | static const char * const filename[NUM_TABLES] = { | 124 | #define CPF_ISOMINI "isomini.cp" |
117 | CODEPAGE_DIR"/isomini.cp" | ||
118 | }; | ||
119 | 125 | ||
120 | static const char cp_2_table[NUM_CODEPAGES] = | 126 | static const struct cp_info cp_info[NUM_CODEPAGES+1] = |
121 | { | 127 | { |
122 | 0, 1, 1, 1, 1, 1, 1, 0 | 128 | [0 ... NUM_CODEPAGES] = { CP_TID_NONE, NULL , "unknown" }, |
129 | [ISO_8859_1] = { CP_TID_NONE, NULL , "ISO-8859-1" }, | ||
130 | [ISO_8859_7] = { CP_TID_ISO , CPF_ISOMINI, "ISO-8859-7" }, | ||
131 | [WIN_1251] = { CP_TID_ISO , CPF_ISOMINI, "CP1251" }, | ||
132 | [ISO_8859_9] = { CP_TID_ISO , CPF_ISOMINI, "ISO-8859-9" }, | ||
133 | [ISO_8859_2] = { CP_TID_ISO , CPF_ISOMINI, "ISO-8859-2" }, | ||
134 | [WIN_1250] = { CP_TID_ISO , CPF_ISOMINI, "CP1250" }, | ||
135 | [WIN_1252] = { CP_TID_ISO , CPF_ISOMINI, "CP1252" }, | ||
136 | [UTF_8] = { CP_TID_ISO , NULL , "UTF-8" }, | ||
123 | }; | 137 | }; |
124 | 138 | ||
125 | static const char * const name_codepages[NUM_CODEPAGES+1] = | 139 | #endif /* HAVE_LCD_BITMAP */ |
140 | |||
141 | static int default_cp = INIT_CODEPAGE; | ||
142 | static int default_cp_tid = CP_TID_NONE; | ||
143 | static int default_cp_handle = 0; | ||
144 | static int volatile default_cp_table_ref = 0; | ||
145 | |||
146 | static int loaded_cp_tid = CP_TID_NONE; | ||
147 | static int volatile cp_table_ref = 0; | ||
148 | #define CP_LOADING BIT_N(sizeof(int)*8-1) /* guard against multi loaders */ | ||
149 | |||
150 | /* non-default codepage table buffer (cannot be bufalloced! playback itself | ||
151 | may be making the load request) */ | ||
152 | static unsigned short codepage_table[MAX_CP_TABLE_SIZE+1]; | ||
153 | |||
154 | #if defined(APPLICATION) && defined(__linux__) | ||
155 | static const char * const name_codepages_linux[NUM_CODEPAGES+1] = | ||
126 | { | 156 | { |
127 | "ISO-8859-1", | 157 | [0 ... NUM_CODEPAGES] = "unknown", |
128 | "ISO-8859-7", | 158 | [ISO_8859_1] = "iso8859-1", |
129 | "CP1251", | 159 | [ISO_8859_7] = "iso8859-7", |
130 | "ISO-8859-9", | 160 | [ISO_8859_8] = "iso8859-8", |
131 | "ISO-8859-2", | 161 | [WIN_1251] = "cp1251", |
132 | "CP1250", | 162 | [ISO_8859_11] = "iso8859-11", |
133 | "CP1252", | 163 | [WIN_1256] = "cp1256", |
134 | "UTF-8", | 164 | [ISO_8859_9] = "iso8859-9", |
135 | "unknown" | 165 | [ISO_8859_2] = "iso8859-2", |
166 | [WIN_1250] = "cp1250", | ||
167 | /* iso8859-15 is closest, linux doesnt have a codepage named cp1252 */ | ||
168 | [WIN_1252] = "iso8859-15", | ||
169 | [SJIS] = "cp932", | ||
170 | [GB_2312] = "cp936", | ||
171 | [KSX_1001] = "cp949", | ||
172 | [BIG_5] = "cp950", | ||
173 | [UTF_8] = "utf8", | ||
136 | }; | 174 | }; |
137 | 175 | ||
138 | #endif | 176 | const char *get_current_codepage_name_linux(void) |
139 | 177 | { | |
140 | static unsigned short codepage_table[MAX_CP_TABLE_SIZE]; | 178 | int cp = default_cp; |
179 | if (cp < 0 || cp>= NUM_CODEPAGES) | ||
180 | cp = NUM_CODEPAGES; | ||
181 | return name_codepages_linux[cp]; | ||
182 | } | ||
183 | #endif /* defined(APPLICATION) && defined(__linux__) */ | ||
141 | 184 | ||
142 | static const unsigned char utf8comp[6] = | 185 | static const unsigned char utf8comp[6] = |
143 | { | 186 | { |
144 | 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC | 187 | 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC |
145 | }; | 188 | }; |
146 | 189 | ||
147 | /* Load codepage file into memory */ | 190 | static inline void cptable_tohw16(uint16_t *buf, unsigned int count) |
148 | static int load_cp_table(int cp) | ||
149 | { | 191 | { |
150 | int i = 0; | 192 | #ifdef ROCKBOX_BIG_ENDIAN |
151 | int table = cp_2_table[cp]; | 193 | for (unsigned int i = 0; i < count; i++) |
152 | int file, tablesize; | 194 | buf[i] = letoh16(buf[i]); |
153 | unsigned char tmp[2]; | 195 | #endif |
196 | (void)buf; (void)count; | ||
197 | } | ||
154 | 198 | ||
155 | if (table == 0 || table == loaded_cp_table) | 199 | static int move_callback(int handle, void *current, void *new) |
156 | return 1; | 200 | { |
201 | /* we don't keep a pointer but we have to stop it if this applies to a | ||
202 | buffer not yet swapped-in since it will likely be in use in an I/O | ||
203 | call */ | ||
204 | return (handle != default_cp_handle || default_cp_table_ref != 0) ? | ||
205 | BUFLIB_CB_CANNOT_MOVE : BUFLIB_CB_OK; | ||
206 | (void)current; (void)new; | ||
207 | } | ||
157 | 208 | ||
158 | file = open(filename[table-1], O_RDONLY|O_BINARY); | 209 | static int alloc_and_load_cp_table(int cp, void *buf) |
210 | { | ||
211 | static struct buflib_callbacks ops = | ||
212 | { .move_callback = move_callback }; | ||
159 | 213 | ||
160 | if (file < 0) { | 214 | /* alloc and read only if there is an associated file */ |
161 | DEBUGF("Can't open codepage file: %s.cp\n", filename[table-1]); | 215 | const char *filename = cp_info[cp].filename; |
216 | if (!filename) | ||
162 | return 0; | 217 | return 0; |
218 | |||
219 | char path[MAX_PATH]; | ||
220 | if (path_append(path, CODEPAGE_DIR, filename, sizeof (path)) | ||
221 | >= sizeof (path)) { | ||
222 | return -1; | ||
163 | } | 223 | } |
164 | 224 | ||
165 | tablesize = filesize(file) / 2; | 225 | /* must be opened without a chance of reentering from FS code */ |
226 | int fd = open_noiso_internal(path, O_RDONLY); | ||
227 | if (fd < 0) | ||
228 | return -1; | ||
166 | 229 | ||
167 | if (tablesize > MAX_CP_TABLE_SIZE) { | 230 | off_t size = filesize(fd); |
168 | DEBUGF("Invalid codepage file: %s.cp\n", filename[table-1]); | ||
169 | close(file); | ||
170 | return 0; | ||
171 | } | ||
172 | 231 | ||
173 | while (i < tablesize) { | 232 | if (size > 0 && size <= MAX_CP_TABLE_SIZE*2 && |
174 | if (!read(file, tmp, 2)) { | 233 | !(size % (off_t)sizeof (uint16_t))) { |
175 | DEBUGF("Can't read from codepage file: %s.cp\n", | 234 | |
176 | filename[table-1]); | 235 | /* if the buffer is provided, use that but don't alloc */ |
177 | loaded_cp_table = 0; | 236 | int handle = buf ? 0 : core_alloc_ex(filename, size, &ops); |
178 | return 0; | 237 | if (handle > 0) |
238 | buf = core_get_data(handle); | ||
239 | |||
240 | if (buf && read(fd, buf, size) == size) { | ||
241 | close(fd); | ||
242 | cptable_tohw16(buf, size / sizeof (uint16_t)); | ||
243 | return handle; | ||
179 | } | 244 | } |
180 | codepage_table[i++] = (tmp[1] << 8) | tmp[0]; | 245 | |
246 | if (handle > 0) | ||
247 | core_free(handle); | ||
181 | } | 248 | } |
182 | 249 | ||
183 | loaded_cp_table = table; | 250 | close(fd); |
184 | close(file); | 251 | return -1; |
185 | return 1; | ||
186 | } | 252 | } |
187 | 253 | ||
188 | /* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */ | 254 | /* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */ |
@@ -205,47 +271,96 @@ unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8) | |||
205 | unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, | 271 | unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, |
206 | int cp, int count) | 272 | int cp, int count) |
207 | { | 273 | { |
208 | unsigned short ucs, tmp; | 274 | uint16_t *table = NULL; |
275 | |||
276 | cp_lock_enter(); | ||
277 | |||
278 | if (cp < 0 || cp >= NUM_CODEPAGES) | ||
279 | cp = default_cp; | ||
209 | 280 | ||
210 | if (cp == -1) /* use default codepage */ | 281 | int tid = cp_info[cp].tid; |
211 | cp = default_codepage; | ||
212 | 282 | ||
213 | if (!load_cp_table(cp)) cp = 0; | 283 | while (1) { |
284 | if (tid == default_cp_tid) { | ||
285 | /* use default table */ | ||
286 | if (default_cp_handle > 0) { | ||
287 | table = core_get_data(default_cp_handle); | ||
288 | default_cp_table_ref++; | ||
289 | } | ||
290 | |||
291 | break; | ||
292 | } | ||
293 | |||
294 | bool load = false; | ||
295 | |||
296 | if (tid == loaded_cp_tid) { | ||
297 | /* use loaded table */ | ||
298 | if (!(cp_table_ref & CP_LOADING)) { | ||
299 | if (tid != CP_TID_NONE) { | ||
300 | table = codepage_table; | ||
301 | cp_table_ref++; | ||
302 | } | ||
303 | |||
304 | break; | ||
305 | } | ||
306 | } else if (cp_table_ref == 0) { | ||
307 | load = true; | ||
308 | cp_table_ref |= CP_LOADING; | ||
309 | } | ||
310 | |||
311 | /* alloc and load must be done outside the lock */ | ||
312 | cp_lock_leave(); | ||
313 | |||
314 | if (!load) { | ||
315 | yield(); | ||
316 | } else if (alloc_and_load_cp_table(cp, codepage_table) < 0) { | ||
317 | cp = INIT_CODEPAGE; /* table may be clobbered now */ | ||
318 | tid = cp_info[cp].tid; | ||
319 | } | ||
320 | |||
321 | cp_lock_enter(); | ||
322 | |||
323 | if (load) { | ||
324 | loaded_cp_tid = tid; | ||
325 | cp_table_ref &= ~CP_LOADING; | ||
326 | } | ||
327 | } | ||
328 | |||
329 | cp_lock_leave(); | ||
214 | 330 | ||
215 | while (count--) { | 331 | while (count--) { |
332 | unsigned short ucs, tmp; | ||
333 | |||
216 | if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */ | 334 | if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */ |
217 | *utf8++ = *iso++; | 335 | *utf8++ = *iso++; |
218 | 336 | ||
219 | else { | 337 | else { |
220 | 338 | /* tid tells us which table to use and how */ | |
221 | /* cp tells us which codepage to convert from */ | 339 | switch (tid) { |
222 | switch (cp) { | 340 | case CP_TID_ISO: /* Greek */ |
223 | case ISO_8859_7: /* Greek */ | 341 | /* Hebrew */ |
224 | case WIN_1252: /* Western European */ | 342 | /* Cyrillic */ |
225 | case WIN_1251: /* Cyrillic */ | 343 | /* Thai */ |
226 | case ISO_8859_9: /* Turkish */ | 344 | /* Arabic */ |
227 | case ISO_8859_2: /* Latin Extended */ | 345 | /* Turkish */ |
228 | case WIN_1250: /* Central European */ | 346 | /* Latin Extended */ |
229 | #ifdef HAVE_LCD_BITMAP | 347 | /* Central European */ |
230 | case ISO_8859_8: /* Hebrew */ | 348 | /* Western European */ |
231 | case ISO_8859_11: /* Thai */ | ||
232 | case WIN_1256: /* Arabic */ | ||
233 | #endif | ||
234 | tmp = ((cp-1)*128) + (*iso++ - 128); | 349 | tmp = ((cp-1)*128) + (*iso++ - 128); |
235 | ucs = codepage_table[tmp]; | 350 | ucs = table[tmp]; |
236 | break; | 351 | break; |
237 | 352 | ||
238 | #ifdef HAVE_LCD_BITMAP | 353 | #ifdef HAVE_LCD_BITMAP |
239 | case SJIS: /* Japanese */ | 354 | case CP_TID_932: /* Japanese */ |
240 | if (*iso > 0xA0 && *iso < 0xE0) { | 355 | if (*iso > 0xA0 && *iso < 0xE0) { |
241 | tmp = *iso++ | (0xA100 - 0x8000); | 356 | tmp = *iso++ | (0xA100 - 0x8000); |
242 | ucs = codepage_table[tmp]; | 357 | ucs = table[tmp]; |
243 | break; | 358 | break; |
244 | } | 359 | } |
245 | 360 | ||
246 | case GB_2312: /* Simplified Chinese */ | 361 | case CP_TID_936: /* Simplified Chinese */ |
247 | case KSX_1001: /* Korean */ | 362 | case CP_TID_949: /* Korean */ |
248 | case BIG_5: /* Traditional Chinese */ | 363 | case CP_TID_950: /* Traditional Chinese */ |
249 | if (count < 1 || !iso[1]) { | 364 | if (count < 1 || !iso[1]) { |
250 | ucs = *iso++; | 365 | ucs = *iso++; |
251 | break; | 366 | break; |
@@ -256,7 +371,7 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, | |||
256 | tmp = *iso++ << 8; | 371 | tmp = *iso++ << 8; |
257 | tmp |= *iso++; | 372 | tmp |= *iso++; |
258 | tmp -= 0x8000; | 373 | tmp -= 0x8000; |
259 | ucs = codepage_table[tmp]; | 374 | ucs = table[tmp]; |
260 | count--; | 375 | count--; |
261 | break; | 376 | break; |
262 | #endif /* HAVE_LCD_BITMAP */ | 377 | #endif /* HAVE_LCD_BITMAP */ |
@@ -271,6 +386,17 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, | |||
271 | utf8 = utf8encode(ucs, utf8); | 386 | utf8 = utf8encode(ucs, utf8); |
272 | } | 387 | } |
273 | } | 388 | } |
389 | |||
390 | if (table) { | ||
391 | cp_lock_enter(); | ||
392 | if (table == codepage_table) { | ||
393 | cp_table_ref--; | ||
394 | } else { | ||
395 | default_cp_table_ref--; | ||
396 | } | ||
397 | cp_lock_leave(); | ||
398 | } | ||
399 | |||
274 | return utf8; | 400 | return utf8; |
275 | } | 401 | } |
276 | 402 | ||
@@ -288,7 +414,7 @@ unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, | |||
288 | utf16 += 4; | 414 | utf16 += 4; |
289 | count -= 2; | 415 | count -= 2; |
290 | } else { | 416 | } else { |
291 | ucs = (utf16[0] | (utf16[1] << 8)); | 417 | ucs = getle16(utf16); |
292 | utf16 += 2; | 418 | utf16 += 2; |
293 | count -= 1; | 419 | count -= 1; |
294 | } | 420 | } |
@@ -310,7 +436,7 @@ unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, | |||
310 | utf16 += 4; | 436 | utf16 += 4; |
311 | count -= 2; | 437 | count -= 2; |
312 | } else { | 438 | } else { |
313 | ucs = (utf16[0] << 8) | utf16[1]; | 439 | ucs = getbe16(utf16); |
314 | utf16 += 2; | 440 | utf16 += 2; |
315 | count -= 1; | 441 | count -= 1; |
316 | } | 442 | } |
@@ -400,8 +526,50 @@ const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs) | |||
400 | 526 | ||
401 | void set_codepage(int cp) | 527 | void set_codepage(int cp) |
402 | { | 528 | { |
403 | default_codepage = cp; | 529 | if (cp < 0 || cp >= NUM_CODEPAGES) |
404 | return; | 530 | cp = NUM_CODEPAGES; |
531 | |||
532 | /* load first then swap if load is successful, else just leave it; if | ||
533 | handle is 0 then we just free the current one; this won't happen often | ||
534 | thus we don't worry about reusing it and consequently avoid possible | ||
535 | clobbering of the existing one */ | ||
536 | |||
537 | int handle = -1; | ||
538 | int tid = cp_info[cp].tid; | ||
539 | |||
540 | while (1) { | ||
541 | cp_lock_enter(); | ||
542 | |||
543 | if (default_cp_tid == tid) | ||
544 | break; | ||
545 | |||
546 | if (handle >= 0 && default_cp_table_ref == 0) { | ||
547 | int hold = default_cp_handle; | ||
548 | default_cp_handle = handle; | ||
549 | handle = hold; | ||
550 | default_cp_tid = tid; | ||
551 | break; | ||
552 | } | ||
553 | |||
554 | /* alloc and load must be done outside the lock */ | ||
555 | cp_lock_leave(); | ||
556 | |||
557 | if (handle < 0 && (handle = alloc_and_load_cp_table(cp, NULL)) < 0) | ||
558 | return; /* OOM; change nothing */ | ||
559 | |||
560 | yield(); | ||
561 | } | ||
562 | |||
563 | default_cp = cp; | ||
564 | cp_lock_leave(); | ||
565 | |||
566 | if (handle > 0) | ||
567 | core_free(handle); | ||
568 | } | ||
569 | |||
570 | int get_codepage(void) | ||
571 | { | ||
572 | return default_cp; | ||
405 | } | 573 | } |
406 | 574 | ||
407 | /* seek to a given char in a utf8 string and | 575 | /* seek to a given char in a utf8 string and |
@@ -418,9 +586,16 @@ int utf8seek(const unsigned char* utf8, int offset) | |||
418 | return pos; | 586 | return pos; |
419 | } | 587 | } |
420 | 588 | ||
421 | const char* get_codepage_name(int cp) | 589 | const char * get_codepage_name(int cp) |
422 | { | 590 | { |
423 | if (cp < 0 || cp>= NUM_CODEPAGES) | 591 | if (cp < 0 || cp >= NUM_CODEPAGES) |
424 | return name_codepages[NUM_CODEPAGES]; | 592 | cp = NUM_CODEPAGES; |
425 | return name_codepages[cp]; | 593 | return cp_info[cp].name; |
426 | } | 594 | } |
595 | |||
596 | #if 0 /* not needed just now */ | ||
597 | void unicode_init(void) | ||
598 | { | ||
599 | cp_lock_init(); | ||
600 | } | ||
601 | #endif | ||