From b0fee17d6e1a463dcd84568e5997663b69488998 Mon Sep 17 00:00:00 2001 From: Marcoen Hirschberg Date: Tue, 6 Dec 2005 13:27:15 +0000 Subject: waiting is over: initial unicode commit git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8169 a1c6a512-1295-4272-9138-f99709370657 --- firmware/bidi.c | 186 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 140 insertions(+), 46 deletions(-) (limited to 'firmware/bidi.c') diff --git a/firmware/bidi.c b/firmware/bidi.c index dcb44999ec..93c1dd0951 100644 --- a/firmware/bidi.c +++ b/firmware/bidi.c @@ -24,75 +24,168 @@ #include #include "file.h" #include "lcd.h" +#include "rbunicode.h" +#include "arabjoin.h" -#define _HEB_BUFFER_LENGTH MAX_PATH + LCD_WIDTH/2 + 3 + 2 + 2 +//#define _HEB_BUFFER_LENGTH (MAX_PATH + LCD_WIDTH/2 + 3 + 2 + 2) * 2 #define _HEB_BLOCK_TYPE_ENG 1 #define _HEB_BLOCK_TYPE_HEB 0 #define _HEB_ORIENTATION_LTR 1 #define _HEB_ORIENTATION_RTL 0 -#define ischar(c) (((((unsigned char) c)>=193) && (((unsigned char) c)<=250)) ? 1 : 0) -#define _isblank(c) (((((unsigned char) c)==' ' || ((unsigned char) c)=='\t')) ? 1 : 0) -#define _isnewline(c) (((((unsigned char) c)=='\n' || ((unsigned char) c)=='\r')) ? 1 : 0) +#define ischar(c) ((c > 0x0589 && c < 0x0700) || (c >= 0xfb50 && c <= 0xfefc) ? 1 : 0) +#define _isblank(c) ((c==' ' || c=='\t') ? 1 : 0) +#define _isnewline(c) ((c=='\n' || c=='\r') ? 1 : 0) #define XOR(a,b) ((a||b) && !(a&&b)) -bool bidi_support_enabled = false; +arab_t * arab_lookup(unsigned short uchar) +{ + if (uchar >= 0x621 && uchar <= 0x63a) + return &(jointable[uchar - 0x621]); + if (uchar >= 0x640 && uchar <= 0x64a) + return &(jointable[uchar - 0x621 - 5]); + if (uchar >= 0x671 && uchar <= 0x6d5) + return &(jointable[uchar - 0x621 - 5 - 38]); + if (uchar == 0x200D) /* Support for the zero-width joiner */ + return ‍ + return 0; +} + +void arabjoin(unsigned short * stringprt, int length){ + + bool connected = false; + unsigned short * writeprt = stringprt; + + arab_t * prev = 0; + arab_t * cur; + arab_t * ligature = 0; + short uchar; -unsigned char *bidi_l2v(const char *str, int orientation) + int i; + for (i = 0; i <= length; i++) { + cur = arab_lookup(uchar = *stringprt++); + + /* Skip non-arabic chars */ + if (cur == 0) { + if (prev) { + /* Finish the last char */ + if (connected) { + *writeprt++ = prev->final; + connected = false; + } else + *writeprt++ = prev->isolated; + prev = 0; + *writeprt++ = uchar; + } else { + *writeprt++ = uchar; + } + continue; + } + + /* nothing to do for arabic char if the previous was non-arabic */ + if (prev == 0) { + prev = cur; + continue; + } + + /* if it's LAM, check for LAM+ALEPH ligatures */ + if (prev->isolated == 0xfedd) { + switch (cur->isolated) { + case 0xfe8d: + ligature = &(lamaleph[0]); + break; + case 0xfe87: + ligature = &(lamaleph[1]); + break; + case 0xfe83: + ligature = &(lamaleph[2]); + break; + case 0xfe81: + ligature = &(lamaleph[3]); + } + } + + if (ligature) { /* replace the 2 glyphs by their ligature */ + prev = ligature; + ligature = 0; + } else { + if (connected) { /* previous char has something connected to it */ + if (prev->medial && cur->final) /* Can we connect to it? */ + *writeprt++ = prev->medial; + else { + *writeprt++ = prev->final; + connected = false; + } + } else { + if (prev->initial && cur->final) { /* Can we connect to it? */ + *writeprt++ = prev->initial; + connected = true; + } else + *writeprt++ = prev->isolated; + } + prev = cur; + } + } +} + +unsigned short *bidi_l2v(const unsigned char *str, int orientation) { - static unsigned char buf_heb_str[_HEB_BUFFER_LENGTH]; - static unsigned char buf_broken_str[_HEB_BUFFER_LENGTH]; - const unsigned char *tmp; - unsigned char *heb_str, *target, *opposite_target, *broken_str; + int length = utf8length(str); + static unsigned short utf16_buf[MAX_PATH+1]; + static unsigned short bidi_buf[MAX_PATH+1]; + unsigned short *heb_str, *target, *tmp; // *broken_str int block_start, block_end, block_type, block_length, i; - int block_ended; - long max_chars=0; - int begin, end, char_count, orig_begin; + //long max_chars=0; + //int begin, end, char_count, orig_begin; if (!str || !*str) - return (unsigned char *)""; + return &(unsigned short){0}; + + //tmp = str; + target = tmp = utf16_buf; + while (*str) + str = utf8decode(str, target++); + *target = 0; - tmp = (unsigned char *)str; - block_start=block_end=0; - block_ended=0; + /* properly join any arabic chars */ + arabjoin(utf16_buf, length); - heb_str = buf_heb_str; + block_start=block_end=block_length=0; + + heb_str = bidi_buf; if (orientation) { target = heb_str; - opposite_target = heb_str + strlen(str); } else { - target = heb_str + strlen(str); - opposite_target = heb_str; + target = heb_str + length; *target = 0; target--; } - - block_length=0; + if (ischar(*tmp)) block_type = _HEB_BLOCK_TYPE_HEB; else block_type = _HEB_BLOCK_TYPE_ENG; - + do { - while((XOR(ischar((int)*(tmp+1)),block_type) - || _isblank((int)*(tmp+1)) || ispunct((int)*(tmp+1)) - || (int)*(tmp+1)=='\n') - && block_end<(int)strlen(str)-1) { - tmp++; - block_end++; - block_length++; + while((XOR(ischar(*(tmp+1)),block_type) + || _isblank(*(tmp+1)) || ispunct((int)*(tmp+1)) + || *(tmp+1)=='\n') + && block_end < length-1) { + tmp++; + block_end++; + block_length++; } - + if (block_type != orientation) { - while ((_isblank((int)*tmp) || ispunct((int)*tmp)) + while ((_isblank(*tmp) || ispunct((int)*tmp)) && *tmp!='/' && *tmp!='-' && block_end>block_start) { tmp--; block_end--; } } - + for (i=block_start; i<=block_end; i++) { - *target = (block_type == orientation) ? *(str+i) : *(str+block_end-i+block_start); + *target = (block_type == orientation) ? *(utf16_buf+i) : *(utf16_buf+block_end-i+block_start); if (block_type!=orientation) { switch (*target) { case '(': @@ -109,12 +202,15 @@ unsigned char *bidi_l2v(const char *str, int orientation) } block_type = !block_type; block_start=block_end+1; - } while(block_end<(int)strlen(str)-1); - - broken_str = buf_broken_str; - begin=end=strlen(str)-1; + } while(block_end0) { @@ -147,9 +243,9 @@ unsigned char *bidi_l2v(const char *str, int orientation) } orig_begin=begin; - if (_isblank(heb_str[begin])) { + /* if (_isblank(heb_str[begin])) { heb_str[begin]='\n'; - } + } */ /* skip leading newlines */ while (begin<=end && _isnewline(heb_str[begin])) { @@ -176,9 +272,7 @@ unsigned char *bidi_l2v(const char *str, int orientation) end=begin; } return broken_str; +#endif + return heb_str; } -void set_bidi_support(bool setting) -{ - bidi_support_enabled = setting; -} -- cgit v1.2.3