diff options
author | Dominik Riebeling <Dominik.Riebeling@gmail.com> | 2012-04-06 20:21:29 +0200 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2012-05-19 01:42:53 +0200 |
commit | 2d9c0bab540274e99480d965f38f266d20097976 (patch) | |
tree | e004fcdc65cd091521ca12575b82e672cc5908e3 | |
parent | 4a6b875eda8fe7827b8a55e42e48184a6588ee4c (diff) | |
download | rockbox-2d9c0bab540274e99480d965f38f266d20097976.tar.gz rockbox-2d9c0bab540274e99480d965f38f266d20097976.zip |
Add support for cp1252 (Western European) codepage.
In Europe Windows defaults to its own codepage cp1252 (also known as "WinLatin"
or "Windows-1252"). cp1252 adds some characters to ISO-8859-1.
Some mp3 tagging software on Windows uses cp1252 instead of ISO-8859-1. This
violates the ID3 specification, which requires tags to be ISO-8859-1 or
Unicode. However, similar violations are made for other codepages and supported
by Rockbox using the "Default Codepage" setting. Add support for cp1252 to
enable people using such broken tools to override the correct decoding to get
their tags displayed properly.
Change-Id: I9f2ec478afe2503e99ee8e6609416c92b0f453e0
Reviewed-on: http://gerrit.rockbox.org/209
Reviewed-by: Jens Arnold <amiconn@rockbox.org>
Tested-by: Jens Arnold <amiconn@rockbox.org>
-rw-r--r-- | apps/lang/english.lang | 14 | ||||
-rw-r--r-- | apps/settings_list.c | 9 | ||||
-rw-r--r-- | firmware/common/unicode.c | 9 | ||||
-rw-r--r-- | firmware/include/rbunicode.h | 2 | ||||
-rw-r--r-- | tools/codepage_tables.c | 7 | ||||
-rw-r--r-- | tools/codepage_tables.h | 1 | ||||
-rw-r--r-- | tools/codepages.c | 22 |
7 files changed, 52 insertions, 12 deletions
diff --git a/apps/lang/english.lang b/apps/lang/english.lang index cd575e9cfa..a70bb8b923 100644 --- a/apps/lang/english.lang +++ b/apps/lang/english.lang | |||
@@ -13038,3 +13038,17 @@ | |||
13038 | *: "Start Sleep Timer" | 13038 | *: "Start Sleep Timer" |
13039 | </voice> | 13039 | </voice> |
13040 | </phrase> | 13040 | </phrase> |
13041 | <phrase> | ||
13042 | id: LANG_CODEPAGE_WESTERN_EUROPEAN | ||
13043 | desc: in codepage setting menu | ||
13044 | user: core | ||
13045 | <source> | ||
13046 | *: "Western European (CP1252)" | ||
13047 | </source> | ||
13048 | <dest> | ||
13049 | *: "Western European (CP1252)" | ||
13050 | </dest> | ||
13051 | <voice> | ||
13052 | *: "Western European" | ||
13053 | </voice> | ||
13054 | </phrase> | ||
diff --git a/apps/settings_list.c b/apps/settings_list.c index c925b3273e..64bbe07ba4 100644 --- a/apps/settings_list.c +++ b/apps/settings_list.c | |||
@@ -1531,25 +1531,28 @@ const struct settings_list settings[] = { | |||
1531 | #ifdef HAVE_LCD_BITMAP | 1531 | #ifdef HAVE_LCD_BITMAP |
1532 | /* The order must match with that in unicode.c */ | 1532 | /* The order must match with that in unicode.c */ |
1533 | "iso8859-1,iso8859-7,iso8859-8,cp1251,iso8859-11,cp1256," | 1533 | "iso8859-1,iso8859-7,iso8859-8,cp1251,iso8859-11,cp1256," |
1534 | "iso8859-9,iso8859-2,cp1250,sjis,gb2312,ksx1001,big5,utf-8", | 1534 | "iso8859-9,iso8859-2,cp1250,cp1252,sjis,gb2312,ksx1001,big5,utf-8", |
1535 | set_codepage, 14, | 1535 | set_codepage, 14, |
1536 | ID2P(LANG_CODEPAGE_LATIN1), ID2P(LANG_CODEPAGE_GREEK), | 1536 | ID2P(LANG_CODEPAGE_LATIN1), |
1537 | ID2P(LANG_CODEPAGE_GREEK), | ||
1537 | ID2P(LANG_CODEPAGE_HEBREW), ID2P(LANG_CODEPAGE_CYRILLIC), | 1538 | ID2P(LANG_CODEPAGE_HEBREW), ID2P(LANG_CODEPAGE_CYRILLIC), |
1538 | ID2P(LANG_CODEPAGE_THAI), ID2P(LANG_CODEPAGE_ARABIC), | 1539 | ID2P(LANG_CODEPAGE_THAI), ID2P(LANG_CODEPAGE_ARABIC), |
1539 | ID2P(LANG_CODEPAGE_TURKISH), | 1540 | ID2P(LANG_CODEPAGE_TURKISH), |
1540 | ID2P(LANG_CODEPAGE_LATIN_EXTENDED), | 1541 | ID2P(LANG_CODEPAGE_LATIN_EXTENDED), |
1541 | ID2P(LANG_CODEPAGE_CENTRAL_EUROPEAN), | 1542 | ID2P(LANG_CODEPAGE_CENTRAL_EUROPEAN), |
1543 | ID2P(LANG_CODEPAGE_WESTERN_EUROPEAN), | ||
1542 | ID2P(LANG_CODEPAGE_JAPANESE), | 1544 | ID2P(LANG_CODEPAGE_JAPANESE), |
1543 | ID2P(LANG_CODEPAGE_SIMPLIFIED), ID2P(LANG_CODEPAGE_KOREAN), | 1545 | ID2P(LANG_CODEPAGE_SIMPLIFIED), ID2P(LANG_CODEPAGE_KOREAN), |
1544 | ID2P(LANG_CODEPAGE_TRADITIONAL), ID2P(LANG_CODEPAGE_UTF8)), | 1546 | ID2P(LANG_CODEPAGE_TRADITIONAL), ID2P(LANG_CODEPAGE_UTF8)), |
1545 | #else /* !HAVE_LCD_BITMAP */ | 1547 | #else /* !HAVE_LCD_BITMAP */ |
1546 | /* The order must match with that in unicode.c */ | 1548 | /* The order must match with that in unicode.c */ |
1547 | "iso8859-1,iso8859-7,cp1251,iso8859-9,iso8859-2,cp1250,utf-8", | 1549 | "iso8859-1,iso8859-7,cp1251,iso8859-9,iso8859-2,cp1250,cp1252,utf-8", |
1548 | set_codepage, 7, | 1550 | set_codepage, 7, |
1549 | ID2P(LANG_CODEPAGE_LATIN1), ID2P(LANG_CODEPAGE_GREEK), | 1551 | ID2P(LANG_CODEPAGE_LATIN1), ID2P(LANG_CODEPAGE_GREEK), |
1550 | ID2P(LANG_CODEPAGE_CYRILLIC), ID2P(LANG_CODEPAGE_TURKISH), | 1552 | ID2P(LANG_CODEPAGE_CYRILLIC), ID2P(LANG_CODEPAGE_TURKISH), |
1551 | ID2P(LANG_CODEPAGE_LATIN_EXTENDED), | 1553 | ID2P(LANG_CODEPAGE_LATIN_EXTENDED), |
1552 | ID2P(LANG_CODEPAGE_CENTRAL_EUROPEAN), | 1554 | ID2P(LANG_CODEPAGE_CENTRAL_EUROPEAN), |
1555 | ID2P(LANG_CODEPAGE_WESTERN_EUROPEAN), | ||
1553 | ID2P(LANG_CODEPAGE_UTF8)), | 1556 | ID2P(LANG_CODEPAGE_UTF8)), |
1554 | #endif | 1557 | #endif |
1555 | OFFON_SETTING(0, warnon_erase_dynplaylist, LANG_WARN_ERASEDYNPLAYLIST_MENU, | 1558 | OFFON_SETTING(0, warnon_erase_dynplaylist, LANG_WARN_ERASEDYNPLAYLIST_MENU, |
diff --git a/firmware/common/unicode.c b/firmware/common/unicode.c index 25d4a9129e..3ad63ee4fb 100644 --- a/firmware/common/unicode.c +++ b/firmware/common/unicode.c | |||
@@ -56,7 +56,7 @@ static const char * const filename[NUM_TABLES] = | |||
56 | 56 | ||
57 | static const char cp_2_table[NUM_CODEPAGES] = | 57 | static const char cp_2_table[NUM_CODEPAGES] = |
58 | { | 58 | { |
59 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 0 | 59 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 0 |
60 | }; | 60 | }; |
61 | 61 | ||
62 | static const char * const name_codepages[NUM_CODEPAGES+1] = | 62 | static const char * const name_codepages[NUM_CODEPAGES+1] = |
@@ -70,6 +70,7 @@ static const char * const name_codepages[NUM_CODEPAGES+1] = | |||
70 | "ISO-8859-9", | 70 | "ISO-8859-9", |
71 | "ISO-8859-2", | 71 | "ISO-8859-2", |
72 | "CP1250", | 72 | "CP1250", |
73 | "CP1252", | ||
73 | "SJIS", | 74 | "SJIS", |
74 | "GB-2312", | 75 | "GB-2312", |
75 | "KSX-1001", | 76 | "KSX-1001", |
@@ -80,7 +81,7 @@ static const char * const name_codepages[NUM_CODEPAGES+1] = | |||
80 | 81 | ||
81 | #else /* !HAVE_LCD_BITMAP, reduced support */ | 82 | #else /* !HAVE_LCD_BITMAP, reduced support */ |
82 | 83 | ||
83 | #define MAX_CP_TABLE_SIZE 640 | 84 | #define MAX_CP_TABLE_SIZE 768 |
84 | #define NUM_TABLES 1 | 85 | #define NUM_TABLES 1 |
85 | 86 | ||
86 | static const char * const filename[NUM_TABLES] = { | 87 | static const char * const filename[NUM_TABLES] = { |
@@ -89,7 +90,7 @@ static const char * const filename[NUM_TABLES] = { | |||
89 | 90 | ||
90 | static const char cp_2_table[NUM_CODEPAGES] = | 91 | static const char cp_2_table[NUM_CODEPAGES] = |
91 | { | 92 | { |
92 | 0, 1, 1, 1, 1, 1, 0 | 93 | 0, 1, 1, 1, 1, 1, 1, 0 |
93 | }; | 94 | }; |
94 | 95 | ||
95 | static const char * const name_codepages[NUM_CODEPAGES+1] = | 96 | static const char * const name_codepages[NUM_CODEPAGES+1] = |
@@ -100,6 +101,7 @@ static const char * const name_codepages[NUM_CODEPAGES+1] = | |||
100 | "ISO-8859-9", | 101 | "ISO-8859-9", |
101 | "ISO-8859-2", | 102 | "ISO-8859-2", |
102 | "CP1250", | 103 | "CP1250", |
104 | "CP1252", | ||
103 | "UTF-8", | 105 | "UTF-8", |
104 | "unknown" | 106 | "unknown" |
105 | }; | 107 | }; |
@@ -190,6 +192,7 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, | |||
190 | /* cp tells us which codepage to convert from */ | 192 | /* cp tells us which codepage to convert from */ |
191 | switch (cp) { | 193 | switch (cp) { |
192 | case ISO_8859_7: /* Greek */ | 194 | case ISO_8859_7: /* Greek */ |
195 | case WIN_1252: /* Western European */ | ||
193 | case WIN_1251: /* Cyrillic */ | 196 | case WIN_1251: /* Cyrillic */ |
194 | case ISO_8859_9: /* Turkish */ | 197 | case ISO_8859_9: /* Turkish */ |
195 | case ISO_8859_2: /* Latin Extended */ | 198 | case ISO_8859_2: /* Latin Extended */ |
diff --git a/firmware/include/rbunicode.h b/firmware/include/rbunicode.h index a97ebb469b..d21a840b00 100644 --- a/firmware/include/rbunicode.h +++ b/firmware/include/rbunicode.h | |||
@@ -45,6 +45,7 @@ enum codepages { | |||
45 | ISO_8859_9, /* Turkish */ | 45 | ISO_8859_9, /* Turkish */ |
46 | ISO_8859_2, /* Latin Extended */ | 46 | ISO_8859_2, /* Latin Extended */ |
47 | WIN_1250, /* Central European */ | 47 | WIN_1250, /* Central European */ |
48 | WIN_1252, /* Western European */ | ||
48 | SJIS, /* Japanese */ | 49 | SJIS, /* Japanese */ |
49 | GB_2312, /* Simp. Chinese */ | 50 | GB_2312, /* Simp. Chinese */ |
50 | KSX_1001, /* Korean */ | 51 | KSX_1001, /* Korean */ |
@@ -62,6 +63,7 @@ enum codepages { | |||
62 | ISO_8859_9, /* Turkish */ | 63 | ISO_8859_9, /* Turkish */ |
63 | ISO_8859_2, /* Latin Extended */ | 64 | ISO_8859_2, /* Latin Extended */ |
64 | WIN_1250, /* Central European */ | 65 | WIN_1250, /* Central European */ |
66 | WIN_1252, /* Western European */ | ||
65 | UTF_8, /* Unicode */ | 67 | UTF_8, /* Unicode */ |
66 | NUM_CODEPAGES | 68 | NUM_CODEPAGES |
67 | }; | 69 | }; |
diff --git a/tools/codepage_tables.c b/tools/codepage_tables.c index 87e0da81a2..3f04c9df3e 100644 --- a/tools/codepage_tables.c +++ b/tools/codepage_tables.c | |||
@@ -57,6 +57,13 @@ const unsigned short cp1251_to_uni[] = { | |||
57 | 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457 /* B8-BF */ | 57 | 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457 /* B8-BF */ |
58 | }; | 58 | }; |
59 | 59 | ||
60 | const unsigned short cp1252_to_uni[] = { | ||
61 | 0x20AC, 0x00A0, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, /* 80-87 */ | ||
62 | 0x0c26, 0x2030, 0x0160, 0x2039, 0x0152, 0x00a0, 0x017D, 0x00a0, /* 88-8F */ | ||
63 | 0x00A0, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, /* 90-97 */ | ||
64 | 0x02dc, 0x2122, 0x0161, 0x203A, 0x0153, 0x00a0, 0x017E, 0x0178 /* 98-9F */ | ||
65 | }; | ||
66 | |||
60 | const unsigned short cp1256_to_uni[] = { | 67 | const unsigned short cp1256_to_uni[] = { |
61 | 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, | 68 | 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, |
62 | 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688, | 69 | 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688, |
diff --git a/tools/codepage_tables.h b/tools/codepage_tables.h index 5c74baa38f..208a4c9237 100644 --- a/tools/codepage_tables.h +++ b/tools/codepage_tables.h | |||
@@ -22,6 +22,7 @@ | |||
22 | extern const unsigned short iso8859_7_to_uni[]; | 22 | extern const unsigned short iso8859_7_to_uni[]; |
23 | extern const unsigned short cp1250_to_uni[]; | 23 | extern const unsigned short cp1250_to_uni[]; |
24 | extern const unsigned short cp1251_to_uni[]; | 24 | extern const unsigned short cp1251_to_uni[]; |
25 | extern const unsigned short cp1252_to_uni[]; | ||
25 | extern const unsigned short cp1256_to_uni[]; | 26 | extern const unsigned short cp1256_to_uni[]; |
26 | extern const unsigned short iso8859_2_to_uni[]; | 27 | extern const unsigned short iso8859_2_to_uni[]; |
27 | extern const unsigned short cp932_table[]; | 28 | extern const unsigned short cp932_table[]; |
diff --git a/tools/codepages.c b/tools/codepages.c index 9c214397de..fb01c4dfb3 100644 --- a/tools/codepages.c +++ b/tools/codepages.c | |||
@@ -25,8 +25,8 @@ | |||
25 | 25 | ||
26 | #define MAX_TABLE_SIZE 32768 | 26 | #define MAX_TABLE_SIZE 32768 |
27 | 27 | ||
28 | static const int mini_index[6] = { | 28 | static const int mini_index[7] = { |
29 | 0, 1, 3, 6, 7, 8 | 29 | 0, 1, 3, 6, 7, 8, 9 |
30 | }; | 30 | }; |
31 | 31 | ||
32 | static unsigned short iso_table[MAX_TABLE_SIZE]; | 32 | static unsigned short iso_table[MAX_TABLE_SIZE]; |
@@ -140,7 +140,7 @@ unsigned short iso_decode(unsigned char *latin1, int cp, int count) | |||
140 | ucs = iso8859_2_to_uni[*latin1++ - 0xA1]; | 140 | ucs = iso8859_2_to_uni[*latin1++ - 0xA1]; |
141 | } | 141 | } |
142 | break; | 142 | break; |
143 | 143 | ||
144 | case 0x08: /* Central European (CP1250) */ | 144 | case 0x08: /* Central European (CP1250) */ |
145 | while (count--) { | 145 | while (count--) { |
146 | /* first convert to unicode */ | 146 | /* first convert to unicode */ |
@@ -150,7 +150,17 @@ unsigned short iso_decode(unsigned char *latin1, int cp, int count) | |||
150 | ucs = cp1250_to_uni[*latin1++ - 0x80]; | 150 | ucs = cp1250_to_uni[*latin1++ - 0x80]; |
151 | } | 151 | } |
152 | break; | 152 | break; |
153 | 153 | ||
154 | case 0x09: /* Western European (CP1252) */ | ||
155 | while (count--) { | ||
156 | /* first convert to unicode */ | ||
157 | if (*latin1 < 0x80 || *latin1 >= 0xa0) | ||
158 | ucs = *latin1++; | ||
159 | else | ||
160 | ucs = cp1252_to_uni[*latin1++ - 0x80]; | ||
161 | } | ||
162 | break; | ||
163 | |||
154 | default: | 164 | default: |
155 | break; | 165 | break; |
156 | } | 166 | } |
@@ -209,7 +219,7 @@ int main(int argc, char **argv) | |||
209 | of = fopen("isomini.cp", "wb"); | 219 | of = fopen("isomini.cp", "wb"); |
210 | if (!of) return 1; | 220 | if (!of) return 1; |
211 | 221 | ||
212 | for (i=1; i<6; i++) { | 222 | for (i=1; i<7; i++) { |
213 | 223 | ||
214 | for (j=0; j<128; j++) { | 224 | for (j=0; j<128; j++) { |
215 | k = (unsigned char)j + 128; | 225 | k = (unsigned char)j + 128; |
@@ -223,7 +233,7 @@ int main(int argc, char **argv) | |||
223 | of = fopen("iso.cp", "wb"); | 233 | of = fopen("iso.cp", "wb"); |
224 | if (!of) return 1; | 234 | if (!of) return 1; |
225 | 235 | ||
226 | for (i=1; i<9; i++) { | 236 | for (i=1; i<10; i++) { |
227 | 237 | ||
228 | for (j=0; j<128; j++) { | 238 | for (j=0; j<128; j++) { |
229 | k = (unsigned char)j + 128; | 239 | k = (unsigned char)j + 128; |