summaryrefslogtreecommitdiff
path: root/firmware/common/unicode.c
diff options
context:
space:
mode:
Diffstat (limited to 'firmware/common/unicode.c')
-rw-r--r--firmware/common/unicode.c451
1 files changed, 313 insertions, 138 deletions
diff --git a/firmware/common/unicode.c b/firmware/common/unicode.c
index 3ff1814c4b..954ad47e1d 100644
--- a/firmware/common/unicode.c
+++ b/firmware/common/unicode.c
@@ -28,161 +28,227 @@
28 28
29#include <stdio.h> 29#include <stdio.h>
30#include "config.h" 30#include "config.h"
31#include "system.h"
32#include "thread.h"
31#include "file.h" 33#include "file.h"
32#include "debug.h" 34#include "debug.h"
33#include "rbunicode.h" 35#include "rbunicode.h"
34#include "rbpaths.h" 36#include "rbpaths.h"
37#include "pathfuncs.h"
38#include "core_alloc.h"
35 39
36#ifndef O_BINARY 40#ifndef O_BINARY
37#define O_BINARY 0 41#define O_BINARY 0
38#endif 42#endif
43#ifndef O_NOISODECODE
44#define O_NOISODECODE 0
45#endif
39 46
40static int default_codepage = 0; 47#define getle16(p) (p[0] | (p[1] >> 8))
41static int loaded_cp_table = 0; 48#define getbe16(p) ((p[1] << 8) | p[0])
42
43#ifdef HAVE_LCD_BITMAP
44 49
45#define MAX_CP_TABLE_SIZE 32768 50#if !defined (__PCTOOL__) && (CONFIG_PLATFORM & PLATFORM_NATIVE)
46#define NUM_TABLES 5 51/* Because file scanning uses the default CP table when matching entries,
52 on-demand loading is not feasible; we also must use the filesystem lock */
53#include "file_internal.h"
54#else /* APPLICATION */
55#ifdef __PCTOOL__
56#define yield()
57#endif
58#define open_noiso_internal open
59#endif /* !APPLICATION */
60
61#if 0 /* not needed just now (will probably end up a spinlock) */
62#include "mutex.h"
63static struct mutex cp_mutex SHAREDBSS_ATTR;
64#define cp_lock_init() mutex_init(&cp_mutex)
65#define cp_lock_enter() mutex_lock(&cp_mutex)
66#define cp_lock_leave() mutex_unlock(&cp_mutex)
67#else
68#define cp_lock_init() do {} while (0)
69#define cp_lock_enter() asm volatile ("")
70#define cp_lock_leave() asm volatile ("")
71#endif
47 72
48static const char * const filename[NUM_TABLES] = 73enum cp_tid
49{ 74{
50 CODEPAGE_DIR"/iso.cp", 75 CP_TID_NONE = -1,
51 CODEPAGE_DIR"/932.cp", /* SJIS */ 76 CP_TID_ISO,
52 CODEPAGE_DIR"/936.cp", /* GB2312 */ 77 CP_TID_932,
53 CODEPAGE_DIR"/949.cp", /* KSX1001 */ 78 CP_TID_936,
54 CODEPAGE_DIR"/950.cp" /* BIG5 */ 79 CP_TID_949,
80 CP_TID_950,
55}; 81};
56 82
57static const char cp_2_table[NUM_CODEPAGES] = 83struct cp_info
58{ 84{
59 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 0 85 int8_t tid;
86 const char *filename;
87 const char *name;
60}; 88};
61 89
62static const char * const name_codepages[NUM_CODEPAGES+1] = 90#ifdef HAVE_LCD_BITMAP
63{
64 "ISO-8859-1",
65 "ISO-8859-7",
66 "ISO-8859-8",
67 "CP1251",
68 "ISO-8859-11",
69 "CP1256",
70 "ISO-8859-9",
71 "ISO-8859-2",
72 "CP1250",
73 "CP1252",
74 "SJIS",
75 "GB-2312",
76 "KSX-1001",
77 "BIG5",
78 "UTF-8",
79 "unknown"
80};
81 91
82#if defined(APPLICATION) && defined(__linux__) 92#define MAX_CP_TABLE_SIZE 32768
83static const char * const name_codepages_linux[NUM_CODEPAGES+1] =
84{
85 /* "ISO-8859-1" */ "iso8859-1",
86 /* "ISO-8859-7" */ "iso8859-7",
87 /* "ISO-8859-8" */ "iso8859-8",
88 /* "CP1251" */ "cp1251",
89 /* "ISO-8859-11"*/ "iso8859-11",
90 /* "CP1256" */ "cp1256",
91 /* "ISO-8859-9" */ "iso8859-9",
92 /* "ISO-8859-2" */ "iso8859-2",
93 /* "CP1250" */ "cp1250",
94 /* "CP1252" */ "iso8859-15", /* closest, linux doesnt have a codepage named cp1252 */
95 /* "SJIS" */ "cp932",
96 /* "GB-2312" */ "cp936",
97 /* "KSX-1001" */ "cp949",
98 /* "BIG5" */ "cp950",
99 /* "UTF-8" */ "utf8",
100 /* "unknown" */ "cp437"
101};
102 93
103const char *get_current_codepage_name_linux(void) 94#define CPF_ISO "iso.cp"
95#define CPF_932 "932.cp" /* SJIS */
96#define CPF_936 "936.cp" /* GB2312 */
97#define CPF_949 "949.cp" /* KSX1001 */
98#define CPF_950 "950.cp" /* BIG5 */
99
100static const struct cp_info cp_info[NUM_CODEPAGES+1] =
104{ 101{
105 if (default_codepage < 0 || default_codepage >= NUM_CODEPAGES) 102 [0 ... NUM_CODEPAGES] = { CP_TID_NONE, NULL , "unknown" },
106 return name_codepages_linux[NUM_CODEPAGES]; 103 [ISO_8859_1] = { CP_TID_NONE, NULL , "ISO-8859-1" },
107 return name_codepages_linux[default_codepage]; 104 [ISO_8859_7] = { CP_TID_ISO , CPF_ISO, "ISO-8859-7" },
108} 105 [ISO_8859_8] = { CP_TID_ISO , CPF_ISO, "ISO-8859-8" },
109#endif 106 [WIN_1251] = { CP_TID_ISO , CPF_ISO, "CP1251" },
107 [ISO_8859_11] = { CP_TID_ISO , CPF_ISO, "ISO-8859-11" },
108 [WIN_1256] = { CP_TID_ISO , CPF_ISO, "CP1256" },
109 [ISO_8859_9] = { CP_TID_ISO , CPF_ISO, "ISO-8859-9" },
110 [ISO_8859_2] = { CP_TID_ISO , CPF_ISO, "ISO-8859-2" },
111 [WIN_1250] = { CP_TID_ISO , CPF_ISO, "CP1250" },
112 [WIN_1252] = { CP_TID_ISO , CPF_ISO, "CP1252" },
113 [SJIS] = { CP_TID_932 , CPF_932, "SJIS" },
114 [GB_2312] = { CP_TID_936 , CPF_936, "GB-2312" },
115 [KSX_1001] = { CP_TID_949 , CPF_949, "KSX-1001" },
116 [BIG_5] = { CP_TID_950 , CPF_950, "BIG5" },
117 [UTF_8] = { CP_TID_NONE, NULL , "UTF-8" },
118};
110 119
111#else /* !HAVE_LCD_BITMAP, reduced support */ 120#else /* !HAVE_LCD_BITMAP, reduced support */
112 121
113#define MAX_CP_TABLE_SIZE 768 122#define MAX_CP_TABLE_SIZE 768
114#define NUM_TABLES 1
115 123
116static const char * const filename[NUM_TABLES] = { 124#define CPF_ISOMINI "isomini.cp"
117 CODEPAGE_DIR"/isomini.cp"
118};
119 125
120static const char cp_2_table[NUM_CODEPAGES] = 126static const struct cp_info cp_info[NUM_CODEPAGES+1] =
121{ 127{
122 0, 1, 1, 1, 1, 1, 1, 0 128 [0 ... NUM_CODEPAGES] = { CP_TID_NONE, NULL , "unknown" },
129 [ISO_8859_1] = { CP_TID_NONE, NULL , "ISO-8859-1" },
130 [ISO_8859_7] = { CP_TID_ISO , CPF_ISOMINI, "ISO-8859-7" },
131 [WIN_1251] = { CP_TID_ISO , CPF_ISOMINI, "CP1251" },
132 [ISO_8859_9] = { CP_TID_ISO , CPF_ISOMINI, "ISO-8859-9" },
133 [ISO_8859_2] = { CP_TID_ISO , CPF_ISOMINI, "ISO-8859-2" },
134 [WIN_1250] = { CP_TID_ISO , CPF_ISOMINI, "CP1250" },
135 [WIN_1252] = { CP_TID_ISO , CPF_ISOMINI, "CP1252" },
136 [UTF_8] = { CP_TID_ISO , NULL , "UTF-8" },
123}; 137};
124 138
125static const char * const name_codepages[NUM_CODEPAGES+1] = 139#endif /* HAVE_LCD_BITMAP */
140
141static int default_cp = INIT_CODEPAGE;
142static int default_cp_tid = CP_TID_NONE;
143static int default_cp_handle = 0;
144static int volatile default_cp_table_ref = 0;
145
146static int loaded_cp_tid = CP_TID_NONE;
147static int volatile cp_table_ref = 0;
148#define CP_LOADING BIT_N(sizeof(int)*8-1) /* guard against multi loaders */
149
150/* non-default codepage table buffer (cannot be bufalloced! playback itself
151 may be making the load request) */
152static unsigned short codepage_table[MAX_CP_TABLE_SIZE+1];
153
154#if defined(APPLICATION) && defined(__linux__)
155static const char * const name_codepages_linux[NUM_CODEPAGES+1] =
126{ 156{
127 "ISO-8859-1", 157 [0 ... NUM_CODEPAGES] = "unknown",
128 "ISO-8859-7", 158 [ISO_8859_1] = "iso8859-1",
129 "CP1251", 159 [ISO_8859_7] = "iso8859-7",
130 "ISO-8859-9", 160 [ISO_8859_8] = "iso8859-8",
131 "ISO-8859-2", 161 [WIN_1251] = "cp1251",
132 "CP1250", 162 [ISO_8859_11] = "iso8859-11",
133 "CP1252", 163 [WIN_1256] = "cp1256",
134 "UTF-8", 164 [ISO_8859_9] = "iso8859-9",
135 "unknown" 165 [ISO_8859_2] = "iso8859-2",
166 [WIN_1250] = "cp1250",
167 /* iso8859-15 is closest, linux doesnt have a codepage named cp1252 */
168 [WIN_1252] = "iso8859-15",
169 [SJIS] = "cp932",
170 [GB_2312] = "cp936",
171 [KSX_1001] = "cp949",
172 [BIG_5] = "cp950",
173 [UTF_8] = "utf8",
136}; 174};
137 175
138#endif 176const char *get_current_codepage_name_linux(void)
139 177{
140static unsigned short codepage_table[MAX_CP_TABLE_SIZE]; 178 int cp = default_cp;
179 if (cp < 0 || cp>= NUM_CODEPAGES)
180 cp = NUM_CODEPAGES;
181 return name_codepages_linux[cp];
182}
183#endif /* defined(APPLICATION) && defined(__linux__) */
141 184
142static const unsigned char utf8comp[6] = 185static const unsigned char utf8comp[6] =
143{ 186{
144 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC 187 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
145}; 188};
146 189
147/* Load codepage file into memory */ 190static inline void cptable_tohw16(uint16_t *buf, unsigned int count)
148static int load_cp_table(int cp)
149{ 191{
150 int i = 0; 192#ifdef ROCKBOX_BIG_ENDIAN
151 int table = cp_2_table[cp]; 193 for (unsigned int i = 0; i < count; i++)
152 int file, tablesize; 194 buf[i] = letoh16(buf[i]);
153 unsigned char tmp[2]; 195#endif
196 (void)buf; (void)count;
197}
154 198
155 if (table == 0 || table == loaded_cp_table) 199static int move_callback(int handle, void *current, void *new)
156 return 1; 200{
201 /* we don't keep a pointer but we have to stop it if this applies to a
202 buffer not yet swapped-in since it will likely be in use in an I/O
203 call */
204 return (handle != default_cp_handle || default_cp_table_ref != 0) ?
205 BUFLIB_CB_CANNOT_MOVE : BUFLIB_CB_OK;
206 (void)current; (void)new;
207}
157 208
158 file = open(filename[table-1], O_RDONLY|O_BINARY); 209static int alloc_and_load_cp_table(int cp, void *buf)
210{
211 static struct buflib_callbacks ops =
212 { .move_callback = move_callback };
159 213
160 if (file < 0) { 214 /* alloc and read only if there is an associated file */
161 DEBUGF("Can't open codepage file: %s.cp\n", filename[table-1]); 215 const char *filename = cp_info[cp].filename;
216 if (!filename)
162 return 0; 217 return 0;
218
219 char path[MAX_PATH];
220 if (path_append(path, CODEPAGE_DIR, filename, sizeof (path))
221 >= sizeof (path)) {
222 return -1;
163 } 223 }
164 224
165 tablesize = filesize(file) / 2; 225 /* must be opened without a chance of reentering from FS code */
226 int fd = open_noiso_internal(path, O_RDONLY);
227 if (fd < 0)
228 return -1;
166 229
167 if (tablesize > MAX_CP_TABLE_SIZE) { 230 off_t size = filesize(fd);
168 DEBUGF("Invalid codepage file: %s.cp\n", filename[table-1]);
169 close(file);
170 return 0;
171 }
172 231
173 while (i < tablesize) { 232 if (size > 0 && size <= MAX_CP_TABLE_SIZE*2 &&
174 if (!read(file, tmp, 2)) { 233 !(size % (off_t)sizeof (uint16_t))) {
175 DEBUGF("Can't read from codepage file: %s.cp\n", 234
176 filename[table-1]); 235 /* if the buffer is provided, use that but don't alloc */
177 loaded_cp_table = 0; 236 int handle = buf ? 0 : core_alloc_ex(filename, size, &ops);
178 return 0; 237 if (handle > 0)
238 buf = core_get_data(handle);
239
240 if (buf && read(fd, buf, size) == size) {
241 close(fd);
242 cptable_tohw16(buf, size / sizeof (uint16_t));
243 return handle;
179 } 244 }
180 codepage_table[i++] = (tmp[1] << 8) | tmp[0]; 245
246 if (handle > 0)
247 core_free(handle);
181 } 248 }
182 249
183 loaded_cp_table = table; 250 close(fd);
184 close(file); 251 return -1;
185 return 1;
186} 252}
187 253
188/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */ 254/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
@@ -205,47 +271,96 @@ unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
205unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, 271unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
206 int cp, int count) 272 int cp, int count)
207{ 273{
208 unsigned short ucs, tmp; 274 uint16_t *table = NULL;
275
276 cp_lock_enter();
277
278 if (cp < 0 || cp >= NUM_CODEPAGES)
279 cp = default_cp;
209 280
210 if (cp == -1) /* use default codepage */ 281 int tid = cp_info[cp].tid;
211 cp = default_codepage;
212 282
213 if (!load_cp_table(cp)) cp = 0; 283 while (1) {
284 if (tid == default_cp_tid) {
285 /* use default table */
286 if (default_cp_handle > 0) {
287 table = core_get_data(default_cp_handle);
288 default_cp_table_ref++;
289 }
290
291 break;
292 }
293
294 bool load = false;
295
296 if (tid == loaded_cp_tid) {
297 /* use loaded table */
298 if (!(cp_table_ref & CP_LOADING)) {
299 if (tid != CP_TID_NONE) {
300 table = codepage_table;
301 cp_table_ref++;
302 }
303
304 break;
305 }
306 } else if (cp_table_ref == 0) {
307 load = true;
308 cp_table_ref |= CP_LOADING;
309 }
310
311 /* alloc and load must be done outside the lock */
312 cp_lock_leave();
313
314 if (!load) {
315 yield();
316 } else if (alloc_and_load_cp_table(cp, codepage_table) < 0) {
317 cp = INIT_CODEPAGE; /* table may be clobbered now */
318 tid = cp_info[cp].tid;
319 }
320
321 cp_lock_enter();
322
323 if (load) {
324 loaded_cp_tid = tid;
325 cp_table_ref &= ~CP_LOADING;
326 }
327 }
328
329 cp_lock_leave();
214 330
215 while (count--) { 331 while (count--) {
332 unsigned short ucs, tmp;
333
216 if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */ 334 if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */
217 *utf8++ = *iso++; 335 *utf8++ = *iso++;
218 336
219 else { 337 else {
220 338 /* tid tells us which table to use and how */
221 /* cp tells us which codepage to convert from */ 339 switch (tid) {
222 switch (cp) { 340 case CP_TID_ISO: /* Greek */
223 case ISO_8859_7: /* Greek */ 341 /* Hebrew */
224 case WIN_1252: /* Western European */ 342 /* Cyrillic */
225 case WIN_1251: /* Cyrillic */ 343 /* Thai */
226 case ISO_8859_9: /* Turkish */ 344 /* Arabic */
227 case ISO_8859_2: /* Latin Extended */ 345 /* Turkish */
228 case WIN_1250: /* Central European */ 346 /* Latin Extended */
229#ifdef HAVE_LCD_BITMAP 347 /* Central European */
230 case ISO_8859_8: /* Hebrew */ 348 /* Western European */
231 case ISO_8859_11: /* Thai */
232 case WIN_1256: /* Arabic */
233#endif
234 tmp = ((cp-1)*128) + (*iso++ - 128); 349 tmp = ((cp-1)*128) + (*iso++ - 128);
235 ucs = codepage_table[tmp]; 350 ucs = table[tmp];
236 break; 351 break;
237 352
238#ifdef HAVE_LCD_BITMAP 353#ifdef HAVE_LCD_BITMAP
239 case SJIS: /* Japanese */ 354 case CP_TID_932: /* Japanese */
240 if (*iso > 0xA0 && *iso < 0xE0) { 355 if (*iso > 0xA0 && *iso < 0xE0) {
241 tmp = *iso++ | (0xA100 - 0x8000); 356 tmp = *iso++ | (0xA100 - 0x8000);
242 ucs = codepage_table[tmp]; 357 ucs = table[tmp];
243 break; 358 break;
244 } 359 }
245 360
246 case GB_2312: /* Simplified Chinese */ 361 case CP_TID_936: /* Simplified Chinese */
247 case KSX_1001: /* Korean */ 362 case CP_TID_949: /* Korean */
248 case BIG_5: /* Traditional Chinese */ 363 case CP_TID_950: /* Traditional Chinese */
249 if (count < 1 || !iso[1]) { 364 if (count < 1 || !iso[1]) {
250 ucs = *iso++; 365 ucs = *iso++;
251 break; 366 break;
@@ -256,7 +371,7 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
256 tmp = *iso++ << 8; 371 tmp = *iso++ << 8;
257 tmp |= *iso++; 372 tmp |= *iso++;
258 tmp -= 0x8000; 373 tmp -= 0x8000;
259 ucs = codepage_table[tmp]; 374 ucs = table[tmp];
260 count--; 375 count--;
261 break; 376 break;
262#endif /* HAVE_LCD_BITMAP */ 377#endif /* HAVE_LCD_BITMAP */
@@ -271,6 +386,17 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
271 utf8 = utf8encode(ucs, utf8); 386 utf8 = utf8encode(ucs, utf8);
272 } 387 }
273 } 388 }
389
390 if (table) {
391 cp_lock_enter();
392 if (table == codepage_table) {
393 cp_table_ref--;
394 } else {
395 default_cp_table_ref--;
396 }
397 cp_lock_leave();
398 }
399
274 return utf8; 400 return utf8;
275} 401}
276 402
@@ -288,7 +414,7 @@ unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8,
288 utf16 += 4; 414 utf16 += 4;
289 count -= 2; 415 count -= 2;
290 } else { 416 } else {
291 ucs = (utf16[0] | (utf16[1] << 8)); 417 ucs = getle16(utf16);
292 utf16 += 2; 418 utf16 += 2;
293 count -= 1; 419 count -= 1;
294 } 420 }
@@ -310,7 +436,7 @@ unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8,
310 utf16 += 4; 436 utf16 += 4;
311 count -= 2; 437 count -= 2;
312 } else { 438 } else {
313 ucs = (utf16[0] << 8) | utf16[1]; 439 ucs = getbe16(utf16);
314 utf16 += 2; 440 utf16 += 2;
315 count -= 1; 441 count -= 1;
316 } 442 }
@@ -400,8 +526,50 @@ const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs)
400 526
401void set_codepage(int cp) 527void set_codepage(int cp)
402{ 528{
403 default_codepage = cp; 529 if (cp < 0 || cp >= NUM_CODEPAGES)
404 return; 530 cp = NUM_CODEPAGES;
531
532 /* load first then swap if load is successful, else just leave it; if
533 handle is 0 then we just free the current one; this won't happen often
534 thus we don't worry about reusing it and consequently avoid possible
535 clobbering of the existing one */
536
537 int handle = -1;
538 int tid = cp_info[cp].tid;
539
540 while (1) {
541 cp_lock_enter();
542
543 if (default_cp_tid == tid)
544 break;
545
546 if (handle >= 0 && default_cp_table_ref == 0) {
547 int hold = default_cp_handle;
548 default_cp_handle = handle;
549 handle = hold;
550 default_cp_tid = tid;
551 break;
552 }
553
554 /* alloc and load must be done outside the lock */
555 cp_lock_leave();
556
557 if (handle < 0 && (handle = alloc_and_load_cp_table(cp, NULL)) < 0)
558 return; /* OOM; change nothing */
559
560 yield();
561 }
562
563 default_cp = cp;
564 cp_lock_leave();
565
566 if (handle > 0)
567 core_free(handle);
568}
569
570int get_codepage(void)
571{
572 return default_cp;
405} 573}
406 574
407/* seek to a given char in a utf8 string and 575/* seek to a given char in a utf8 string and
@@ -418,9 +586,16 @@ int utf8seek(const unsigned char* utf8, int offset)
418 return pos; 586 return pos;
419} 587}
420 588
421const char* get_codepage_name(int cp) 589const char * get_codepage_name(int cp)
422{ 590{
423 if (cp < 0 || cp>= NUM_CODEPAGES) 591 if (cp < 0 || cp >= NUM_CODEPAGES)
424 return name_codepages[NUM_CODEPAGES]; 592 cp = NUM_CODEPAGES;
425 return name_codepages[cp]; 593 return cp_info[cp].name;
426} 594}
595
596#if 0 /* not needed just now */
597void unicode_init(void)
598{
599 cp_lock_init();
600}
601#endif