diff options
Diffstat (limited to 'tools/codepages.c')
-rw-r--r-- | tools/codepages.c | 231 |
1 files changed, 231 insertions, 0 deletions
diff --git a/tools/codepages.c b/tools/codepages.c new file mode 100644 index 0000000000..94768860f4 --- /dev/null +++ b/tools/codepages.c | |||
@@ -0,0 +1,231 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * | ||
9 | * | ||
10 | * Copyright (C) 2005 by Frank Dischner | ||
11 | * | ||
12 | * All files in this archive are subject to the GNU General Public License. | ||
13 | * See the file COPYING in the source tree root for full license agreement. | ||
14 | * | ||
15 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
16 | * KIND, either express or implied. | ||
17 | * | ||
18 | ****************************************************************************/ | ||
19 | #include <stdio.h> | ||
20 | #include <stdlib.h> | ||
21 | #include <string.h> | ||
22 | #include "codepages.h" | ||
23 | |||
24 | #define MAX_TABLE_SIZE 32768 | ||
25 | |||
26 | static unsigned short iso_table[MAX_TABLE_SIZE]; | ||
27 | |||
28 | static const unsigned short iso8859_7_to_uni[] = { | ||
29 | 0x2018, 0x2019, 0x00A3, 0x20AC, 0x20AF, 0x00A6, 0x00A7, /* A1-A7 */ | ||
30 | 0x00A8, 0x00A9, 0x037A, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015, /* A8-AF */ | ||
31 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7 /* B0-B7 */ | ||
32 | }; | ||
33 | |||
34 | static const unsigned short cp1251_to_uni[] = { | ||
35 | 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021, /* 80-87 */ | ||
36 | 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F, /* 88-8F */ | ||
37 | 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, /* 90-97 */ | ||
38 | 0x0098, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F, /* 98-9F */ | ||
39 | 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7, /* A0-A7 */ | ||
40 | 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407, /* A8-AF */ | ||
41 | 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7, /* B0-B7 */ | ||
42 | 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457 /* B8-BF */ | ||
43 | }; | ||
44 | |||
45 | static const unsigned short iso8859_2_to_uni[] = { | ||
46 | 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7, /* A1-A7 */ | ||
47 | 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B, /* A8-AF */ | ||
48 | 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7, /* B0-B7 */ | ||
49 | 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C, /* B8-BF */ | ||
50 | 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, /* C0-C7 */ | ||
51 | 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, /* C8-CF */ | ||
52 | 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, /* D0-D7 */ | ||
53 | 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, /* D8-DF */ | ||
54 | 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, /* E0-E7 */ | ||
55 | 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, /* E8-EF */ | ||
56 | 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, /* F0-F7 */ | ||
57 | 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9 /* F8-FF */ | ||
58 | }; | ||
59 | |||
60 | unsigned short iso_decode(unsigned char *latin1, int cp, int count) | ||
61 | { | ||
62 | unsigned short ucs = 0; | ||
63 | |||
64 | /* cp tells us which codepage to convert from */ | ||
65 | switch (cp) { | ||
66 | case 0x01: /* Greek (ISO-8859-7) */ | ||
67 | while (count--) { | ||
68 | /* first convert to unicode */ | ||
69 | if (*latin1 < 0xA1) | ||
70 | ucs = *latin1++; | ||
71 | else if (*latin1 > 0xB7) | ||
72 | ucs = *latin1++ + 0x02D0; | ||
73 | else | ||
74 | ucs = iso8859_7_to_uni[*latin1++ - 0xA1]; | ||
75 | } | ||
76 | break; | ||
77 | |||
78 | case 0x02: /* Hebrew (ISO-8859-8) */ | ||
79 | while (count--) { | ||
80 | /* first convert to unicode */ | ||
81 | if (*latin1 == 0xAA) { | ||
82 | ucs = 0xD7; | ||
83 | latin1++; | ||
84 | } else if (*latin1 == 0xBA) { | ||
85 | ucs = 0xF7; | ||
86 | latin1++; | ||
87 | } else if (*latin1 == 0xDF) { | ||
88 | ucs = 0x2017; | ||
89 | latin1++; | ||
90 | } else if (*latin1 < 0xC0) | ||
91 | ucs = *latin1++; | ||
92 | else | ||
93 | ucs = *latin1++ + 0x04F0; | ||
94 | } | ||
95 | break; | ||
96 | |||
97 | case 0x03: /* Russian (CP1251) */ | ||
98 | while (count--) { | ||
99 | /* first convert to unicode */ | ||
100 | if (*latin1 < 0x80) | ||
101 | ucs = *latin1++; | ||
102 | else if (*latin1 > 0xBF) | ||
103 | ucs = *latin1++ + 0x0350; | ||
104 | else | ||
105 | ucs = cp1251_to_uni[*latin1++ - 0x80]; | ||
106 | } | ||
107 | break; | ||
108 | |||
109 | case 0x04: /* Thai (ISO-8859-11) */ | ||
110 | while (count--) { | ||
111 | /* first convert to unicode */ | ||
112 | if (*latin1 < 0xA1) | ||
113 | ucs = *latin1++; | ||
114 | else | ||
115 | ucs = *latin1++ + 0x0D60; | ||
116 | } | ||
117 | break; | ||
118 | |||
119 | case 0x05: /* Arabic (ISO-8859-6) */ | ||
120 | while (count--) { | ||
121 | /* first convert to unicode */ | ||
122 | if (*latin1 < 0xAC || *latin1 == 0xAD) | ||
123 | ucs = *latin1++; | ||
124 | else | ||
125 | ucs = *latin1++ + 0x0560; | ||
126 | } | ||
127 | break; | ||
128 | |||
129 | case 0x06: /* Turkish (ISO-8859-9) */ | ||
130 | while (count--) { | ||
131 | /* first convert to unicode */ | ||
132 | switch (*latin1) { | ||
133 | case 0xD0: | ||
134 | ucs = 0x011E; | ||
135 | break; | ||
136 | case 0xDD: | ||
137 | ucs = 0x0130; | ||
138 | break; | ||
139 | case 0xDE: | ||
140 | ucs = 0x015E; | ||
141 | break; | ||
142 | case 0xF0: | ||
143 | ucs = 0x011F; | ||
144 | break; | ||
145 | case 0xFD: | ||
146 | ucs = 0x0131; | ||
147 | break; | ||
148 | case 0xFE: | ||
149 | ucs = 0x015F; | ||
150 | break; | ||
151 | default: | ||
152 | ucs = *latin1; | ||
153 | break; | ||
154 | } | ||
155 | |||
156 | latin1++; | ||
157 | } | ||
158 | break; | ||
159 | |||
160 | case 0x07: /* Latin Extended (ISO-8859-2) */ | ||
161 | while (count--) { | ||
162 | /* first convert to unicode */ | ||
163 | if (*latin1 < 0xA1) | ||
164 | ucs = *latin1++; | ||
165 | else | ||
166 | ucs = iso8859_2_to_uni[*latin1++ - 0xA1]; | ||
167 | } | ||
168 | break; | ||
169 | |||
170 | default: | ||
171 | break; | ||
172 | } | ||
173 | return ucs; | ||
174 | } | ||
175 | |||
176 | int writeshort(FILE *f, unsigned short s) | ||
177 | { | ||
178 | putc(s, f); | ||
179 | return putc(s>>8, f) != EOF; | ||
180 | } | ||
181 | |||
182 | int main(void) | ||
183 | { | ||
184 | |||
185 | int i, j; | ||
186 | unsigned char k; | ||
187 | unsigned short uni; | ||
188 | FILE *of; | ||
189 | |||
190 | for (i=0; i < MAX_TABLE_SIZE; i++) | ||
191 | iso_table[i] = 0; | ||
192 | |||
193 | of = fopen("iso.cp", "wb"); | ||
194 | if (!of) return 1; | ||
195 | |||
196 | for (i=1; i<8; i++) { | ||
197 | |||
198 | for (j=0; j<128; j++) { | ||
199 | k = (unsigned char)j + 128; | ||
200 | uni = iso_decode(&k, i, 1); | ||
201 | writeshort(of, uni); | ||
202 | } | ||
203 | } | ||
204 | fclose(of); | ||
205 | |||
206 | of = fopen("932.cp", "wb"); | ||
207 | if (!of) return 1; | ||
208 | for (i=0; i < MAX_TABLE_SIZE; i++) | ||
209 | writeshort(of, cp932_table[i]); | ||
210 | fclose(of); | ||
211 | |||
212 | of = fopen("936.cp", "wb"); | ||
213 | if (!of) return 1; | ||
214 | for (i=0; i < MAX_TABLE_SIZE; i++) | ||
215 | writeshort(of, cp936_table[i]); | ||
216 | fclose(of); | ||
217 | |||
218 | of = fopen("949.cp", "wb"); | ||
219 | if (!of) return 1; | ||
220 | for (i=0; i < MAX_TABLE_SIZE; i++) | ||
221 | writeshort(of, cp949_table[i]); | ||
222 | fclose(of); | ||
223 | |||
224 | of = fopen("950.cp", "wb"); | ||
225 | if (!of) return 1; | ||
226 | for (i=0; i < MAX_TABLE_SIZE; i++) | ||
227 | writeshort(of, cp950_table[i]); | ||
228 | fclose(of); | ||
229 | |||
230 | return 0; | ||
231 | } | ||