summaryrefslogtreecommitdiff
path: root/apps/plugins/text_viewer/tv_text_processor.c
diff options
context:
space:
mode:
Diffstat (limited to 'apps/plugins/text_viewer/tv_text_processor.c')
-rw-r--r--apps/plugins/text_viewer/tv_text_processor.c576
1 files changed, 576 insertions, 0 deletions
diff --git a/apps/plugins/text_viewer/tv_text_processor.c b/apps/plugins/text_viewer/tv_text_processor.c
new file mode 100644
index 0000000000..8cdd78df15
--- /dev/null
+++ b/apps/plugins/text_viewer/tv_text_processor.c
@@ -0,0 +1,576 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2002 Gilles Roux
11 * 2003 Garrett Derner
12 * 2010 Yoshihisa Uchida
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
18 *
19 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20 * KIND, either express or implied.
21 *
22 ****************************************************************************/
23#include "plugin.h"
24#include "ctype.h"
25#include "tv_preferences.h"
26#include "tv_text_processor.h"
27
28enum tv_text_type {
29 TV_TEXT_UNKNOWN,
30 TV_TEXT_MAC,
31 TV_TEXT_UNIX,
32 TV_TEXT_WIN,
33};
34
35/* the max characters of each blocks */
36#ifdef HAVE_LCD_BITMAP
37#define TV_MAX_CHARS_PER_BLOCK (LCD_WIDTH / 2 + 1)
38#else
39#define TV_MAX_CHARS_PER_BLOCK (LCD_WIDTH + 1)
40#endif
41
42#define TV_MAX_BLOCKS 2
43
44/*
45 * number of spaces to indent first paragraph
46 * (this value uses the line mode is REFLOW only)
47 */
48#define TV_INDENT_SPACES 2
49
50static const struct tv_preferences *prefs;
51static enum tv_text_type text_type = TV_TEXT_UNKNOWN;
52
53static const unsigned char *end_ptr;
54
55static unsigned short *ucsbuf[TV_MAX_BLOCKS];
56static unsigned char *utf8buf;
57static unsigned char *outbuf;
58
59static int block_count;
60static int block_width;
61
62/* if this value is true, then tv_create_line_text returns a blank line. */
63static bool expand_extra_line = false;
64
65/* when a line is divided, this value sets true. */
66static bool is_break_line = false;
67
68static unsigned short break_chars[] =
69 {
70 0,
71 /* halfwidth characters */
72 '\t', '\n', 0x0b, 0x0c, ' ', '!', ',', '-', '.', ':', ';', '?', 0xb7,
73 /* fullwidth characters */
74 0x2010, /* hyphen */
75 0x3000, /* fullwidth space */
76 0x3001, /* ideographic comma */
77 0x3002, /* ideographic full stop */
78 0x30fb, /* katakana middle dot */
79 0x30fc, /* katakana-hiragana prolonged sound mark */
80 0xff01, /* fullwidth exclamation mark */
81 0xff0c, /* fullwidth comma */
82 0xff0d, /* fullwidth hyphen-minus */
83 0xff0e, /* fullwidth full stop */
84 0xff1a, /* fullwidth colon */
85 0xff1b, /* fullwidth semicolon */
86 0xff1f, /* fullwidth question mark */
87 };
88
89/* the characters which is not judged as space with isspace() */
90static unsigned short extra_spaces[] = { 0, 0x3000 };
91
92static int tv_glyph_width(int ch)
93{
94 if (ch == '\n')
95 return 0;
96
97 if (ch == 0)
98 ch = ' ';
99
100#ifdef HAVE_LCD_BITMAP
101 /* the width of the diacritics charcter is 0 */
102 if (rb->is_diacritic(ch, NULL))
103 return 0;
104
105 return rb->font_get_width(prefs->font, ch);
106#else
107 return 1;
108#endif
109}
110
111static unsigned char *tv_get_ucs(const unsigned char *str, unsigned short *ch)
112{
113 int count = 1;
114 unsigned char utf8_tmp[3];
115
116 /* distinguish the text_type */
117 if (*str == '\r')
118 {
119 if (text_type == TV_TEXT_WIN || text_type == TV_TEXT_UNKNOWN)
120 {
121 if (str + 1 < end_ptr && *(str+1) == '\n')
122 {
123 if (text_type == TV_TEXT_UNKNOWN)
124 text_type = TV_TEXT_WIN;
125
126 *ch = '\n';
127 return (unsigned char *)str + 2;
128 }
129
130 if (text_type == TV_TEXT_UNKNOWN)
131 text_type = TV_TEXT_MAC;
132 }
133 *ch = (text_type == TV_TEXT_MAC)? '\n' : ' ';
134 return (unsigned char *)str + 1;
135 }
136 else if (*str == '\n')
137 {
138 if (text_type == TV_TEXT_UNKNOWN)
139 text_type = TV_TEXT_UNIX;
140
141 *ch = (text_type == TV_TEXT_UNIX)? '\n' : ' ';
142 return (unsigned char *)str + 1;
143 }
144
145 if (prefs->encoding == UTF_8)
146 return (unsigned char*)rb->utf8decode(str, ch);
147
148#ifdef HAVE_LCD_BITMAP
149 if ((*str >= 0x80) &&
150 ((prefs->encoding > SJIS) ||
151 (prefs->encoding == SJIS && (*str <= 0xa0 || *str >= 0xe0))))
152 {
153 if (str + 1 >= end_ptr)
154 {
155 end_ptr = str;
156 *ch = 0;
157 return (unsigned char *)str;
158 }
159 count = 2;
160 }
161#endif
162 rb->iso_decode(str, utf8_tmp, prefs->encoding, count);
163 rb->utf8decode(utf8_tmp, ch);
164 return (unsigned char *)str + count;
165}
166
167static void tv_decode2utf8(const unsigned short *ucs, int count)
168{
169 int i;
170
171 for (i = 0; i < count; i++)
172 outbuf = rb->utf8encode(ucs[i], outbuf);
173
174 *outbuf = '\0';
175}
176
177static bool tv_is_line_break_char(unsigned short ch)
178{
179 size_t i;
180
181 /* when the word mode is CHOP, all characters does not break line. */
182 if (prefs->word_mode == CHOP)
183 return false;
184
185 for (i = 0; i < sizeof(break_chars); i++)
186 {
187 if (break_chars[i] == ch)
188 return true;
189 }
190 return false;
191}
192
193static bool tv_isspace(unsigned short ch)
194{
195 size_t i;
196
197 if (ch < 128 && isspace(ch))
198 return true;
199
200 for (i = 0; i < sizeof(extra_spaces); i++)
201 {
202 if (extra_spaces[i] == ch)
203 return true;
204 }
205 return false;
206}
207
208static bool tv_is_break_line_join_mode(const unsigned char *next_str)
209{
210 unsigned short ch;
211
212 tv_get_ucs(next_str, &ch);
213 return tv_isspace(ch);
214}
215
216static int tv_form_reflow_line(unsigned short *ucs, int chars)
217{
218 unsigned short new_ucs[TV_MAX_CHARS_PER_BLOCK];
219 unsigned short *p = new_ucs;
220 unsigned short ch;
221 int i;
222 int k;
223 int expand_spaces;
224 int indent_chars = 0;
225 int nonspace_chars = 0;
226 int nonspace_width = 0;
227 int remain_spaces;
228 int spaces = 0;
229 int words_spaces;
230
231 if (prefs->alignment == LEFT)
232 {
233 while (chars > 0 && ucs[chars-1] == ' ')
234 chars--;
235 }
236
237 if (chars == 0)
238 return 0;
239
240 while (ucs[indent_chars] == ' ')
241 indent_chars++;
242
243 for (i = indent_chars; i < chars; i++)
244 {
245 ch = ucs[i];
246 if (ch == ' ')
247 spaces++;
248 else
249 {
250 nonspace_chars++;
251 nonspace_width += tv_glyph_width(ch);
252 }
253 }
254
255 if (spaces == 0)
256 return chars;
257
258 expand_spaces = (block_width - nonspace_width) / tv_glyph_width(' ') - indent_chars;
259 if (indent_chars + nonspace_chars + expand_spaces > TV_MAX_CHARS_PER_BLOCK)
260 expand_spaces = TV_MAX_CHARS_PER_BLOCK - indent_chars - nonspace_chars;
261
262 words_spaces = expand_spaces / spaces;
263 remain_spaces = expand_spaces - words_spaces * spaces;
264
265 for (i = 0; i < indent_chars; i++)
266 *p++ = ' ';
267
268 for ( ; i < chars; i++)
269 {
270 ch = ucs[i];
271 *p++ = ch;
272 if (ch == ' ')
273 {
274 for (k = ((remain_spaces > 0)? 0 : 1); k < words_spaces; k++)
275 *p++ = ch;
276
277 remain_spaces--;
278 }
279 }
280
281 rb->memcpy(ucs, new_ucs, sizeof(unsigned short) * TV_MAX_CHARS_PER_BLOCK);
282 return indent_chars + nonspace_chars + expand_spaces;
283}
284
285static void tv_align_right(int *block_chars)
286{
287 unsigned short *cur_text;
288 unsigned short *prev_text;
289 unsigned short ch;
290 int cur_block = block_count - 1;
291 int prev_block;
292 int cur_chars;
293 int prev_chars;
294 int idx;
295 int break_pos;
296 int break_width = 0;
297 int append_width;
298 int width;
299
300 while (cur_block > 0)
301 {
302 cur_text = ucsbuf[cur_block];
303 cur_chars = block_chars[cur_block];
304 idx = cur_chars;
305 width = 0;
306 while(--idx >= 0)
307 width += tv_glyph_width(cur_text[idx]);
308
309 width = block_width - width;
310 prev_block = cur_block - 1;
311
312 do {
313 prev_text = ucsbuf[prev_block];
314 prev_chars = block_chars[prev_block];
315
316 idx = prev_chars;
317 append_width = 0;
318 break_pos = prev_chars;
319 while (append_width < width && idx > 0)
320 {
321 ch = prev_text[--idx];
322 if (tv_is_line_break_char(ch))
323 {
324 break_pos = idx + 1;
325 break_width = append_width;
326 }
327 append_width += tv_glyph_width(ch);
328 }
329 if (append_width > width)
330 idx++;
331
332 if (idx == 0)
333 {
334 break_pos = 0;
335 break_width = append_width;
336 }
337
338 if (break_pos < prev_chars)
339 append_width = break_width;
340 /* the case of
341 * (1) when the first character of the cur_text concatenates
342 * the last character of the prev_text.
343 * (2) the length of ucsbuf[block] is short (< 0.75 * block width)
344 */
345 else if (((!tv_isspace(*cur_text) && !tv_isspace(prev_text[prev_chars - 1])) ||
346 (4 * width >= 3 * block_width)))
347 {
348 break_pos = idx;
349 }
350
351 if (break_pos < prev_chars)
352 {
353 rb->memmove(cur_text + prev_chars - break_pos,
354 cur_text, block_chars[cur_block] * sizeof(unsigned short));
355 rb->memcpy(cur_text, prev_text + break_pos,
356 (prev_chars - break_pos) * sizeof(unsigned short));
357
358 block_chars[prev_block] = break_pos;
359 block_chars[cur_block ] += prev_chars - break_pos;
360 }
361 } while ((width -= append_width) > 0 && --prev_block >= 0);
362 cur_block--;
363 }
364}
365
366static int tv_parse_text(const unsigned char *src, unsigned short *ucs,
367 int *ucs_chars, bool is_indent)
368{
369 const unsigned char *cur = src;
370 const unsigned char *next = src;
371 const unsigned char *line_break_ptr = NULL;
372 const unsigned char *line_end_ptr = NULL;
373 unsigned short ch = 0;
374 unsigned short prev_ch;
375 int chars = 0;
376 int gw;
377 int i;
378 int line_break_width = 0;
379 int line_end_chars = 0;
380 int width = 0;
381 bool is_space = false;
382
383 while (true) {
384 cur = next;
385 if (cur >= end_ptr)
386 {
387 line_end_ptr = cur;
388 line_end_chars = chars;
389 is_break_line = true;
390 break;
391 }
392
393 prev_ch = ch;
394 next = tv_get_ucs(cur, &ch);
395 if (ch == '\n')
396 {
397 if (prefs->line_mode != JOIN || tv_is_break_line_join_mode(next))
398 {
399 line_end_ptr = next;
400 line_end_chars = chars;
401 is_break_line = false;
402 break;
403 }
404
405 if (prefs->word_mode == CHOP || tv_isspace(prev_ch))
406 continue;
407
408 /*
409 * when the line mode is JOIN and the word mode is WRAP,
410 * the next character does not concatenate with the
411 * previous character.
412 */
413 ch = ' ';
414 }
415 else if ((is_space = tv_isspace(ch)) == true)
416 {
417 /*
418 * when the line mode is REFLOW:
419 * (1) spacelike character convert to ' '
420 * (2) plural spaces are collected to one
421 */
422 if (prefs->line_mode == REFLOW)
423 {
424 ch = ' ';
425 if (prev_ch == ch)
426 continue;
427 }
428
429 /* when the alignment is RIGHT, ignores indent spaces. */
430 if (prefs->alignment == RIGHT && is_indent)
431 continue;
432 }
433 else
434 is_indent = false;
435
436 if (prefs->line_mode == REFLOW && is_indent)
437 gw = tv_glyph_width(ch) * TV_INDENT_SPACES;
438 else
439 gw = tv_glyph_width(ch);
440
441 width += gw;
442 if (width > block_width)
443 {
444 width -= gw;
445 if (is_space)
446 {
447 line_end_ptr = cur;
448 line_end_chars = chars;
449 }
450 is_break_line = true;
451 break;
452 }
453
454 if (prefs->line_mode == REFLOW && is_indent)
455 {
456 for (i = 1; i < TV_INDENT_SPACES; i++)
457 ucs[chars++] = ch;
458 }
459 ucs[chars++] = ch;
460
461 if (tv_is_line_break_char(ch))
462 {
463 line_break_ptr = next;
464 line_break_width = width;
465 line_end_chars = chars;
466 }
467 if (chars >= TV_MAX_CHARS_PER_BLOCK)
468 {
469 is_break_line = true;
470 break;
471 }
472 }
473
474 /* set the end position and character count */
475 if (line_end_ptr == NULL)
476 {
477 /*
478 * when the last line break position is too short (line length < 0.75 * block width),
479 * the line is cut off at the position where it is closest to the displayed width.
480 */
481 if ((prefs->line_mode == REFLOW && line_break_ptr == NULL) ||
482 (4 * line_break_width < 3 * block_width))
483 {
484 line_end_ptr = cur;
485 line_end_chars = chars;
486 }
487 else
488 line_end_ptr = line_break_ptr;
489 }
490
491 *ucs_chars = line_end_chars;
492 return line_end_ptr - src;
493}
494
495int tv_create_formed_text(const unsigned char *src, ssize_t bufsize,
496 int block, bool is_multi, const unsigned char **dst)
497{
498 unsigned short ch;
499 int chars[block_count];
500 int i;
501 int size = 0;
502 bool is_indent;
503
504 outbuf = utf8buf;
505 *outbuf = '\0';
506
507 for (i = 0; i < block_count; i++)
508 chars[i] = 0;
509
510 if (dst != NULL)
511 *dst = utf8buf;
512
513 if (prefs->line_mode == EXPAND && (expand_extra_line = !expand_extra_line) == true)
514 return 0;
515
516 end_ptr = src + bufsize;
517
518 tv_get_ucs(src, &ch);
519 is_indent = (tv_isspace(ch) && !is_break_line);
520
521 for (i = 0; i < block_count; i++)
522 {
523 size += tv_parse_text(src + size, ucsbuf[i], &chars[i], is_indent);
524 if (!is_break_line)
525 break;
526
527 is_indent = false;
528 }
529
530 if (dst != NULL)
531 {
532 if (prefs->alignment == RIGHT)
533 tv_align_right(chars);
534
535 for (i = 0; i < block_count; i++)
536 {
537 if (i == block || (is_multi && i == block + 1))
538 {
539 if (is_break_line && prefs->line_mode == REFLOW)
540 chars[i] = tv_form_reflow_line(ucsbuf[i], chars[i]);
541
542 tv_decode2utf8(ucsbuf[i], chars[i]);
543 }
544 }
545 }
546
547 return size;
548}
549
550bool tv_init_text_processor(unsigned char *buf, size_t bufsize, size_t *used_size)
551{
552 int i;
553
554 *used_size = TV_MAX_CHARS_PER_BLOCK * (2 * 3 + TV_MAX_BLOCKS * sizeof(unsigned short));
555 if (bufsize < *used_size)
556 return false;
557
558 prefs = tv_get_preferences();
559 text_type = TV_TEXT_UNKNOWN;
560 expand_extra_line = false;
561 is_break_line = false;
562
563 ucsbuf[0] = (unsigned short*)buf;
564 for (i = 1; i < TV_MAX_BLOCKS; i++)
565 ucsbuf[i] = ucsbuf[i - 1] + TV_MAX_CHARS_PER_BLOCK;
566
567 utf8buf = buf + TV_MAX_CHARS_PER_BLOCK * TV_MAX_BLOCKS * sizeof(unsigned short);
568
569 return true;
570}
571
572void tv_set_creation_conditions(int blocks, int width)
573{
574 block_count = blocks;
575 block_width = width;
576}