diff options
author | Yoshihisa Uchida <uchida@rockbox.org> | 2010-06-05 10:30:08 +0000 |
---|---|---|
committer | Yoshihisa Uchida <uchida@rockbox.org> | 2010-06-05 10:30:08 +0000 |
commit | fdba8404503af0448586615330a7b27f2ced531c (patch) | |
tree | bb15677a7a720675ac2666f11e62042f3b2639ad /apps/plugins/text_viewer/tv_text_processor.c | |
parent | 991e92fd3dc15f1e365761264c26305559ddb0a4 (diff) | |
download | rockbox-fdba8404503af0448586615330a7b27f2ced531c.tar.gz rockbox-fdba8404503af0448586615330a7b27f2ced531c.zip |
reworks text viewer plugin. (FS#11209)
new text viewer plugin: text_viewer.rock.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@26571 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/plugins/text_viewer/tv_text_processor.c')
-rw-r--r-- | apps/plugins/text_viewer/tv_text_processor.c | 576 |
1 files changed, 576 insertions, 0 deletions
diff --git a/apps/plugins/text_viewer/tv_text_processor.c b/apps/plugins/text_viewer/tv_text_processor.c new file mode 100644 index 0000000000..8cdd78df15 --- /dev/null +++ b/apps/plugins/text_viewer/tv_text_processor.c | |||
@@ -0,0 +1,576 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2002 Gilles Roux | ||
11 | * 2003 Garrett Derner | ||
12 | * 2010 Yoshihisa Uchida | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or | ||
15 | * modify it under the terms of the GNU General Public License | ||
16 | * as published by the Free Software Foundation; either version 2 | ||
17 | * of the License, or (at your option) any later version. | ||
18 | * | ||
19 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
20 | * KIND, either express or implied. | ||
21 | * | ||
22 | ****************************************************************************/ | ||
23 | #include "plugin.h" | ||
24 | #include "ctype.h" | ||
25 | #include "tv_preferences.h" | ||
26 | #include "tv_text_processor.h" | ||
27 | |||
28 | enum tv_text_type { | ||
29 | TV_TEXT_UNKNOWN, | ||
30 | TV_TEXT_MAC, | ||
31 | TV_TEXT_UNIX, | ||
32 | TV_TEXT_WIN, | ||
33 | }; | ||
34 | |||
35 | /* the max characters of each blocks */ | ||
36 | #ifdef HAVE_LCD_BITMAP | ||
37 | #define TV_MAX_CHARS_PER_BLOCK (LCD_WIDTH / 2 + 1) | ||
38 | #else | ||
39 | #define TV_MAX_CHARS_PER_BLOCK (LCD_WIDTH + 1) | ||
40 | #endif | ||
41 | |||
42 | #define TV_MAX_BLOCKS 2 | ||
43 | |||
44 | /* | ||
45 | * number of spaces to indent first paragraph | ||
46 | * (this value uses the line mode is REFLOW only) | ||
47 | */ | ||
48 | #define TV_INDENT_SPACES 2 | ||
49 | |||
50 | static const struct tv_preferences *prefs; | ||
51 | static enum tv_text_type text_type = TV_TEXT_UNKNOWN; | ||
52 | |||
53 | static const unsigned char *end_ptr; | ||
54 | |||
55 | static unsigned short *ucsbuf[TV_MAX_BLOCKS]; | ||
56 | static unsigned char *utf8buf; | ||
57 | static unsigned char *outbuf; | ||
58 | |||
59 | static int block_count; | ||
60 | static int block_width; | ||
61 | |||
62 | /* if this value is true, then tv_create_line_text returns a blank line. */ | ||
63 | static bool expand_extra_line = false; | ||
64 | |||
65 | /* when a line is divided, this value sets true. */ | ||
66 | static bool is_break_line = false; | ||
67 | |||
68 | static unsigned short break_chars[] = | ||
69 | { | ||
70 | 0, | ||
71 | /* halfwidth characters */ | ||
72 | '\t', '\n', 0x0b, 0x0c, ' ', '!', ',', '-', '.', ':', ';', '?', 0xb7, | ||
73 | /* fullwidth characters */ | ||
74 | 0x2010, /* hyphen */ | ||
75 | 0x3000, /* fullwidth space */ | ||
76 | 0x3001, /* ideographic comma */ | ||
77 | 0x3002, /* ideographic full stop */ | ||
78 | 0x30fb, /* katakana middle dot */ | ||
79 | 0x30fc, /* katakana-hiragana prolonged sound mark */ | ||
80 | 0xff01, /* fullwidth exclamation mark */ | ||
81 | 0xff0c, /* fullwidth comma */ | ||
82 | 0xff0d, /* fullwidth hyphen-minus */ | ||
83 | 0xff0e, /* fullwidth full stop */ | ||
84 | 0xff1a, /* fullwidth colon */ | ||
85 | 0xff1b, /* fullwidth semicolon */ | ||
86 | 0xff1f, /* fullwidth question mark */ | ||
87 | }; | ||
88 | |||
89 | /* the characters which is not judged as space with isspace() */ | ||
90 | static unsigned short extra_spaces[] = { 0, 0x3000 }; | ||
91 | |||
92 | static int tv_glyph_width(int ch) | ||
93 | { | ||
94 | if (ch == '\n') | ||
95 | return 0; | ||
96 | |||
97 | if (ch == 0) | ||
98 | ch = ' '; | ||
99 | |||
100 | #ifdef HAVE_LCD_BITMAP | ||
101 | /* the width of the diacritics charcter is 0 */ | ||
102 | if (rb->is_diacritic(ch, NULL)) | ||
103 | return 0; | ||
104 | |||
105 | return rb->font_get_width(prefs->font, ch); | ||
106 | #else | ||
107 | return 1; | ||
108 | #endif | ||
109 | } | ||
110 | |||
111 | static unsigned char *tv_get_ucs(const unsigned char *str, unsigned short *ch) | ||
112 | { | ||
113 | int count = 1; | ||
114 | unsigned char utf8_tmp[3]; | ||
115 | |||
116 | /* distinguish the text_type */ | ||
117 | if (*str == '\r') | ||
118 | { | ||
119 | if (text_type == TV_TEXT_WIN || text_type == TV_TEXT_UNKNOWN) | ||
120 | { | ||
121 | if (str + 1 < end_ptr && *(str+1) == '\n') | ||
122 | { | ||
123 | if (text_type == TV_TEXT_UNKNOWN) | ||
124 | text_type = TV_TEXT_WIN; | ||
125 | |||
126 | *ch = '\n'; | ||
127 | return (unsigned char *)str + 2; | ||
128 | } | ||
129 | |||
130 | if (text_type == TV_TEXT_UNKNOWN) | ||
131 | text_type = TV_TEXT_MAC; | ||
132 | } | ||
133 | *ch = (text_type == TV_TEXT_MAC)? '\n' : ' '; | ||
134 | return (unsigned char *)str + 1; | ||
135 | } | ||
136 | else if (*str == '\n') | ||
137 | { | ||
138 | if (text_type == TV_TEXT_UNKNOWN) | ||
139 | text_type = TV_TEXT_UNIX; | ||
140 | |||
141 | *ch = (text_type == TV_TEXT_UNIX)? '\n' : ' '; | ||
142 | return (unsigned char *)str + 1; | ||
143 | } | ||
144 | |||
145 | if (prefs->encoding == UTF_8) | ||
146 | return (unsigned char*)rb->utf8decode(str, ch); | ||
147 | |||
148 | #ifdef HAVE_LCD_BITMAP | ||
149 | if ((*str >= 0x80) && | ||
150 | ((prefs->encoding > SJIS) || | ||
151 | (prefs->encoding == SJIS && (*str <= 0xa0 || *str >= 0xe0)))) | ||
152 | { | ||
153 | if (str + 1 >= end_ptr) | ||
154 | { | ||
155 | end_ptr = str; | ||
156 | *ch = 0; | ||
157 | return (unsigned char *)str; | ||
158 | } | ||
159 | count = 2; | ||
160 | } | ||
161 | #endif | ||
162 | rb->iso_decode(str, utf8_tmp, prefs->encoding, count); | ||
163 | rb->utf8decode(utf8_tmp, ch); | ||
164 | return (unsigned char *)str + count; | ||
165 | } | ||
166 | |||
167 | static void tv_decode2utf8(const unsigned short *ucs, int count) | ||
168 | { | ||
169 | int i; | ||
170 | |||
171 | for (i = 0; i < count; i++) | ||
172 | outbuf = rb->utf8encode(ucs[i], outbuf); | ||
173 | |||
174 | *outbuf = '\0'; | ||
175 | } | ||
176 | |||
177 | static bool tv_is_line_break_char(unsigned short ch) | ||
178 | { | ||
179 | size_t i; | ||
180 | |||
181 | /* when the word mode is CHOP, all characters does not break line. */ | ||
182 | if (prefs->word_mode == CHOP) | ||
183 | return false; | ||
184 | |||
185 | for (i = 0; i < sizeof(break_chars); i++) | ||
186 | { | ||
187 | if (break_chars[i] == ch) | ||
188 | return true; | ||
189 | } | ||
190 | return false; | ||
191 | } | ||
192 | |||
193 | static bool tv_isspace(unsigned short ch) | ||
194 | { | ||
195 | size_t i; | ||
196 | |||
197 | if (ch < 128 && isspace(ch)) | ||
198 | return true; | ||
199 | |||
200 | for (i = 0; i < sizeof(extra_spaces); i++) | ||
201 | { | ||
202 | if (extra_spaces[i] == ch) | ||
203 | return true; | ||
204 | } | ||
205 | return false; | ||
206 | } | ||
207 | |||
208 | static bool tv_is_break_line_join_mode(const unsigned char *next_str) | ||
209 | { | ||
210 | unsigned short ch; | ||
211 | |||
212 | tv_get_ucs(next_str, &ch); | ||
213 | return tv_isspace(ch); | ||
214 | } | ||
215 | |||
216 | static int tv_form_reflow_line(unsigned short *ucs, int chars) | ||
217 | { | ||
218 | unsigned short new_ucs[TV_MAX_CHARS_PER_BLOCK]; | ||
219 | unsigned short *p = new_ucs; | ||
220 | unsigned short ch; | ||
221 | int i; | ||
222 | int k; | ||
223 | int expand_spaces; | ||
224 | int indent_chars = 0; | ||
225 | int nonspace_chars = 0; | ||
226 | int nonspace_width = 0; | ||
227 | int remain_spaces; | ||
228 | int spaces = 0; | ||
229 | int words_spaces; | ||
230 | |||
231 | if (prefs->alignment == LEFT) | ||
232 | { | ||
233 | while (chars > 0 && ucs[chars-1] == ' ') | ||
234 | chars--; | ||
235 | } | ||
236 | |||
237 | if (chars == 0) | ||
238 | return 0; | ||
239 | |||
240 | while (ucs[indent_chars] == ' ') | ||
241 | indent_chars++; | ||
242 | |||
243 | for (i = indent_chars; i < chars; i++) | ||
244 | { | ||
245 | ch = ucs[i]; | ||
246 | if (ch == ' ') | ||
247 | spaces++; | ||
248 | else | ||
249 | { | ||
250 | nonspace_chars++; | ||
251 | nonspace_width += tv_glyph_width(ch); | ||
252 | } | ||
253 | } | ||
254 | |||
255 | if (spaces == 0) | ||
256 | return chars; | ||
257 | |||
258 | expand_spaces = (block_width - nonspace_width) / tv_glyph_width(' ') - indent_chars; | ||
259 | if (indent_chars + nonspace_chars + expand_spaces > TV_MAX_CHARS_PER_BLOCK) | ||
260 | expand_spaces = TV_MAX_CHARS_PER_BLOCK - indent_chars - nonspace_chars; | ||
261 | |||
262 | words_spaces = expand_spaces / spaces; | ||
263 | remain_spaces = expand_spaces - words_spaces * spaces; | ||
264 | |||
265 | for (i = 0; i < indent_chars; i++) | ||
266 | *p++ = ' '; | ||
267 | |||
268 | for ( ; i < chars; i++) | ||
269 | { | ||
270 | ch = ucs[i]; | ||
271 | *p++ = ch; | ||
272 | if (ch == ' ') | ||
273 | { | ||
274 | for (k = ((remain_spaces > 0)? 0 : 1); k < words_spaces; k++) | ||
275 | *p++ = ch; | ||
276 | |||
277 | remain_spaces--; | ||
278 | } | ||
279 | } | ||
280 | |||
281 | rb->memcpy(ucs, new_ucs, sizeof(unsigned short) * TV_MAX_CHARS_PER_BLOCK); | ||
282 | return indent_chars + nonspace_chars + expand_spaces; | ||
283 | } | ||
284 | |||
285 | static void tv_align_right(int *block_chars) | ||
286 | { | ||
287 | unsigned short *cur_text; | ||
288 | unsigned short *prev_text; | ||
289 | unsigned short ch; | ||
290 | int cur_block = block_count - 1; | ||
291 | int prev_block; | ||
292 | int cur_chars; | ||
293 | int prev_chars; | ||
294 | int idx; | ||
295 | int break_pos; | ||
296 | int break_width = 0; | ||
297 | int append_width; | ||
298 | int width; | ||
299 | |||
300 | while (cur_block > 0) | ||
301 | { | ||
302 | cur_text = ucsbuf[cur_block]; | ||
303 | cur_chars = block_chars[cur_block]; | ||
304 | idx = cur_chars; | ||
305 | width = 0; | ||
306 | while(--idx >= 0) | ||
307 | width += tv_glyph_width(cur_text[idx]); | ||
308 | |||
309 | width = block_width - width; | ||
310 | prev_block = cur_block - 1; | ||
311 | |||
312 | do { | ||
313 | prev_text = ucsbuf[prev_block]; | ||
314 | prev_chars = block_chars[prev_block]; | ||
315 | |||
316 | idx = prev_chars; | ||
317 | append_width = 0; | ||
318 | break_pos = prev_chars; | ||
319 | while (append_width < width && idx > 0) | ||
320 | { | ||
321 | ch = prev_text[--idx]; | ||
322 | if (tv_is_line_break_char(ch)) | ||
323 | { | ||
324 | break_pos = idx + 1; | ||
325 | break_width = append_width; | ||
326 | } | ||
327 | append_width += tv_glyph_width(ch); | ||
328 | } | ||
329 | if (append_width > width) | ||
330 | idx++; | ||
331 | |||
332 | if (idx == 0) | ||
333 | { | ||
334 | break_pos = 0; | ||
335 | break_width = append_width; | ||
336 | } | ||
337 | |||
338 | if (break_pos < prev_chars) | ||
339 | append_width = break_width; | ||
340 | /* the case of | ||
341 | * (1) when the first character of the cur_text concatenates | ||
342 | * the last character of the prev_text. | ||
343 | * (2) the length of ucsbuf[block] is short (< 0.75 * block width) | ||
344 | */ | ||
345 | else if (((!tv_isspace(*cur_text) && !tv_isspace(prev_text[prev_chars - 1])) || | ||
346 | (4 * width >= 3 * block_width))) | ||
347 | { | ||
348 | break_pos = idx; | ||
349 | } | ||
350 | |||
351 | if (break_pos < prev_chars) | ||
352 | { | ||
353 | rb->memmove(cur_text + prev_chars - break_pos, | ||
354 | cur_text, block_chars[cur_block] * sizeof(unsigned short)); | ||
355 | rb->memcpy(cur_text, prev_text + break_pos, | ||
356 | (prev_chars - break_pos) * sizeof(unsigned short)); | ||
357 | |||
358 | block_chars[prev_block] = break_pos; | ||
359 | block_chars[cur_block ] += prev_chars - break_pos; | ||
360 | } | ||
361 | } while ((width -= append_width) > 0 && --prev_block >= 0); | ||
362 | cur_block--; | ||
363 | } | ||
364 | } | ||
365 | |||
366 | static int tv_parse_text(const unsigned char *src, unsigned short *ucs, | ||
367 | int *ucs_chars, bool is_indent) | ||
368 | { | ||
369 | const unsigned char *cur = src; | ||
370 | const unsigned char *next = src; | ||
371 | const unsigned char *line_break_ptr = NULL; | ||
372 | const unsigned char *line_end_ptr = NULL; | ||
373 | unsigned short ch = 0; | ||
374 | unsigned short prev_ch; | ||
375 | int chars = 0; | ||
376 | int gw; | ||
377 | int i; | ||
378 | int line_break_width = 0; | ||
379 | int line_end_chars = 0; | ||
380 | int width = 0; | ||
381 | bool is_space = false; | ||
382 | |||
383 | while (true) { | ||
384 | cur = next; | ||
385 | if (cur >= end_ptr) | ||
386 | { | ||
387 | line_end_ptr = cur; | ||
388 | line_end_chars = chars; | ||
389 | is_break_line = true; | ||
390 | break; | ||
391 | } | ||
392 | |||
393 | prev_ch = ch; | ||
394 | next = tv_get_ucs(cur, &ch); | ||
395 | if (ch == '\n') | ||
396 | { | ||
397 | if (prefs->line_mode != JOIN || tv_is_break_line_join_mode(next)) | ||
398 | { | ||
399 | line_end_ptr = next; | ||
400 | line_end_chars = chars; | ||
401 | is_break_line = false; | ||
402 | break; | ||
403 | } | ||
404 | |||
405 | if (prefs->word_mode == CHOP || tv_isspace(prev_ch)) | ||
406 | continue; | ||
407 | |||
408 | /* | ||
409 | * when the line mode is JOIN and the word mode is WRAP, | ||
410 | * the next character does not concatenate with the | ||
411 | * previous character. | ||
412 | */ | ||
413 | ch = ' '; | ||
414 | } | ||
415 | else if ((is_space = tv_isspace(ch)) == true) | ||
416 | { | ||
417 | /* | ||
418 | * when the line mode is REFLOW: | ||
419 | * (1) spacelike character convert to ' ' | ||
420 | * (2) plural spaces are collected to one | ||
421 | */ | ||
422 | if (prefs->line_mode == REFLOW) | ||
423 | { | ||
424 | ch = ' '; | ||
425 | if (prev_ch == ch) | ||
426 | continue; | ||
427 | } | ||
428 | |||
429 | /* when the alignment is RIGHT, ignores indent spaces. */ | ||
430 | if (prefs->alignment == RIGHT && is_indent) | ||
431 | continue; | ||
432 | } | ||
433 | else | ||
434 | is_indent = false; | ||
435 | |||
436 | if (prefs->line_mode == REFLOW && is_indent) | ||
437 | gw = tv_glyph_width(ch) * TV_INDENT_SPACES; | ||
438 | else | ||
439 | gw = tv_glyph_width(ch); | ||
440 | |||
441 | width += gw; | ||
442 | if (width > block_width) | ||
443 | { | ||
444 | width -= gw; | ||
445 | if (is_space) | ||
446 | { | ||
447 | line_end_ptr = cur; | ||
448 | line_end_chars = chars; | ||
449 | } | ||
450 | is_break_line = true; | ||
451 | break; | ||
452 | } | ||
453 | |||
454 | if (prefs->line_mode == REFLOW && is_indent) | ||
455 | { | ||
456 | for (i = 1; i < TV_INDENT_SPACES; i++) | ||
457 | ucs[chars++] = ch; | ||
458 | } | ||
459 | ucs[chars++] = ch; | ||
460 | |||
461 | if (tv_is_line_break_char(ch)) | ||
462 | { | ||
463 | line_break_ptr = next; | ||
464 | line_break_width = width; | ||
465 | line_end_chars = chars; | ||
466 | } | ||
467 | if (chars >= TV_MAX_CHARS_PER_BLOCK) | ||
468 | { | ||
469 | is_break_line = true; | ||
470 | break; | ||
471 | } | ||
472 | } | ||
473 | |||
474 | /* set the end position and character count */ | ||
475 | if (line_end_ptr == NULL) | ||
476 | { | ||
477 | /* | ||
478 | * when the last line break position is too short (line length < 0.75 * block width), | ||
479 | * the line is cut off at the position where it is closest to the displayed width. | ||
480 | */ | ||
481 | if ((prefs->line_mode == REFLOW && line_break_ptr == NULL) || | ||
482 | (4 * line_break_width < 3 * block_width)) | ||
483 | { | ||
484 | line_end_ptr = cur; | ||
485 | line_end_chars = chars; | ||
486 | } | ||
487 | else | ||
488 | line_end_ptr = line_break_ptr; | ||
489 | } | ||
490 | |||
491 | *ucs_chars = line_end_chars; | ||
492 | return line_end_ptr - src; | ||
493 | } | ||
494 | |||
495 | int tv_create_formed_text(const unsigned char *src, ssize_t bufsize, | ||
496 | int block, bool is_multi, const unsigned char **dst) | ||
497 | { | ||
498 | unsigned short ch; | ||
499 | int chars[block_count]; | ||
500 | int i; | ||
501 | int size = 0; | ||
502 | bool is_indent; | ||
503 | |||
504 | outbuf = utf8buf; | ||
505 | *outbuf = '\0'; | ||
506 | |||
507 | for (i = 0; i < block_count; i++) | ||
508 | chars[i] = 0; | ||
509 | |||
510 | if (dst != NULL) | ||
511 | *dst = utf8buf; | ||
512 | |||
513 | if (prefs->line_mode == EXPAND && (expand_extra_line = !expand_extra_line) == true) | ||
514 | return 0; | ||
515 | |||
516 | end_ptr = src + bufsize; | ||
517 | |||
518 | tv_get_ucs(src, &ch); | ||
519 | is_indent = (tv_isspace(ch) && !is_break_line); | ||
520 | |||
521 | for (i = 0; i < block_count; i++) | ||
522 | { | ||
523 | size += tv_parse_text(src + size, ucsbuf[i], &chars[i], is_indent); | ||
524 | if (!is_break_line) | ||
525 | break; | ||
526 | |||
527 | is_indent = false; | ||
528 | } | ||
529 | |||
530 | if (dst != NULL) | ||
531 | { | ||
532 | if (prefs->alignment == RIGHT) | ||
533 | tv_align_right(chars); | ||
534 | |||
535 | for (i = 0; i < block_count; i++) | ||
536 | { | ||
537 | if (i == block || (is_multi && i == block + 1)) | ||
538 | { | ||
539 | if (is_break_line && prefs->line_mode == REFLOW) | ||
540 | chars[i] = tv_form_reflow_line(ucsbuf[i], chars[i]); | ||
541 | |||
542 | tv_decode2utf8(ucsbuf[i], chars[i]); | ||
543 | } | ||
544 | } | ||
545 | } | ||
546 | |||
547 | return size; | ||
548 | } | ||
549 | |||
550 | bool tv_init_text_processor(unsigned char *buf, size_t bufsize, size_t *used_size) | ||
551 | { | ||
552 | int i; | ||
553 | |||
554 | *used_size = TV_MAX_CHARS_PER_BLOCK * (2 * 3 + TV_MAX_BLOCKS * sizeof(unsigned short)); | ||
555 | if (bufsize < *used_size) | ||
556 | return false; | ||
557 | |||
558 | prefs = tv_get_preferences(); | ||
559 | text_type = TV_TEXT_UNKNOWN; | ||
560 | expand_extra_line = false; | ||
561 | is_break_line = false; | ||
562 | |||
563 | ucsbuf[0] = (unsigned short*)buf; | ||
564 | for (i = 1; i < TV_MAX_BLOCKS; i++) | ||
565 | ucsbuf[i] = ucsbuf[i - 1] + TV_MAX_CHARS_PER_BLOCK; | ||
566 | |||
567 | utf8buf = buf + TV_MAX_CHARS_PER_BLOCK * TV_MAX_BLOCKS * sizeof(unsigned short); | ||
568 | |||
569 | return true; | ||
570 | } | ||
571 | |||
572 | void tv_set_creation_conditions(int blocks, int width) | ||
573 | { | ||
574 | block_count = blocks; | ||
575 | block_width = width; | ||
576 | } | ||