1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1999 by Sun Microsystems, Inc.
23 * All rights reserved.
24 */
25
26
27 /*
28 * This program convert Additional Codeset Characters from 0x00 through 0xff
29 * to UTF-8 codeset. And also again converting the chacter in UTF-8 to original
30 * codeset.
31 * For example,
32 * IBM -> UTF-8 -> IBM
33 * (1) (2) (3)
34 * -> Unicode Scaler
35 * (4)
36 * output (1) (2) (4)line by line
37 * comparing (1) (3)
38 */
39
40 #include <stdio.h>
41 #include <libgen.h>
42 #include <stdlib.h>
43 #include <unistd.h>
44 #include <locale.h>
45 #include <iconv.h>
46 #include <string.h>
47 #include <errno.h>
48 #include <sys/types.h>
49 #include <sys/wait.h>
50
51 static void mk_data(char *,char *);
52
53 char * ME;
54 int status;
55 static int flag_check = 0; /* check with data file */
56
57 static struct {
58 unsigned int from;
59 unsigned int u4;
60 } tbl[0x10000];
61
62
63 void
usage(int status)64 usage(int status)
65 {
66 fprintf(stderr, "Usage: %s from-code\n", ME);
67 exit(status);
68 }
69
70 void
validate(int i,iconv_t cd,iconv_t cd2,iconv_t cd3)71 validate(int i, iconv_t cd, iconv_t cd2, iconv_t cd3)
72 {
73 uchar_t source_buf[1024];
74 uchar_t result_buf[1024];
75 uchar_t tmp_buf[1024];
76 const uchar_t * source;
77 uchar_t * result;
78 uchar_t * tmp;
79 size_t source_len;
80 size_t result_len;
81 size_t result_len2;
82 size_t tmp_len;
83 size_t s;
84 int j;
85 ulong_t l;
86
87 #ifdef _LITTLE_ENDIAN
88 #define CHECKWITHFILE \
89 if( flag_check > 0 ) { \
90 l = 0U; \
91 for( j = sizeof (tmp_buf) - tmp_len -1; \
92 j >= ((i == 0) ? 2: 0); j--) \
93 l = (l << 8) + ((uint_t)tmp_buf[j]); \
94 if (l != tbl[i].u4 ) fprintf(stderr, "%x != %x \n", l, tbl[i].u4 ); \
95 }
96 #else
97 #define CHECKWITHFILE \
98 if( flag_check > 0 ) { \
99 l = 0U; \
100 j = ((i == 0) ? 2: 0); \
101 for(; j < sizeof (tmp_buf) - tmp_len ; j++) \
102 l = (l << 8) + ((uint_t)tmp_buf[j]); \
103 if (l != tbl[i].u4 ) fprintf(stderr, "%x != %x \n", l, tbl[i].u4 ); \
104 }
105 #endif
106
107 #define PRINTUNICODE \
108 tmp = tmp_buf; \
109 tmp_len = sizeof (tmp_buf); \
110 result = result_buf; \
111 result_len2 = sizeof (result_buf) - result_len; \
112 s = iconv(cd2, (const char**)&result, &result_len2, (char**)&tmp, &tmp_len); \
113 if (s != 0) { \
114 printf(" \n stoped \n"); \
115 fprintf(stderr, "fail to con_LITTLE_ENDIANvert UTF-8 to Unicode\n"); \
116 exit (status); \
117 } \
118 printf("\t"); \
119 for( j = 0; j < sizeof (tmp_buf) - tmp_len ; j++) \
120 printf("%02x", (uchar_t)tmp_buf[j]); \
121 CHECKWITHFILE
122
123 #define COMPARE \
124 tmp = tmp_buf; \
125 tmp_len = sizeof (tmp_buf); \
126 result = result_buf; \
127 result_len2 = sizeof (result_buf) - result_len; \
128 s = iconv(cd3, (const char**)&result, &result_len2, (char**)&tmp, &tmp_len); \
129 if (s != 0) { \
130 printf(" \n WARNING \n"); \
131 fprintf(stderr, "fail to convert UTF-8 to Orignal Codeset(%x)\n",\
132 i); \
133 fprintf(stderr, "errno=%d %d %d\n", \
134 errno, \
135 sizeof (result_buf) - result_len - result_len2, \
136 result - result_buf); \
137 exit (status); \
138 } \
139 printf("\t"); \
140 if ((sizeof (tmp_buf) - tmp_len != 1) || \
141 ((uchar_t)tmp_buf[0] != (uchar_t)i )) { \
142 printf("\t-> 0x%2x \n warning \n", (uchar_t)tmp_buf[0] ); \
143 fprintf(stderr, " Converting answer is not the same (0x%02x) for (0x%02x)\n", \
144 (uchar_t)tmp_buf[0], i); \
145 }
146
147 #define DATASIZE 1
148
149 source_buf[0] = i;
150 source = source_buf;
151 source_len = DATASIZE;
152
153 result = result_buf;
154 result_len = sizeof (result_buf);
155
156 s = iconv(cd, (const char**)&source, &source_len, (char**)&result, &result_len);
157
158 status = 1;
159 if (((size_t)(0)) == s) {
160 if ((source_len != 0) ||
161 ((source - source_buf) != DATASIZE)) {
162 fprintf(stderr, ": %d %d %d\n",
163 errno,
164 source_len,
165 source - source_buf);
166 exit(status);
167 }
168 printf("0x%02x\t0x", i);
169 for( j = 0; j < sizeof (result_buf) - result_len ; j++)
170 printf("%02x", (uchar_t)result_buf[j]);
171 PRINTUNICODE
172 COMPARE
173 printf("\n");
174 return;
175 }
176
177 status += 1;
178 if (((size_t)(-1)) == s) {
179 if (errno == EILSEQ) {
180 printf("0x%02x EILSEQ\n", i);
181 return;
182 }
183 fprintf(stderr, "Error for source 0x%02x(%d): %d %d %d %d %d\n",
184 i, i,
185 errno,
186 (DATASIZE) - source_len, /* not converted size */
187 source - source_buf,
188 (sizeof (result_buf)) - result_len,
189 result - result_buf);
190 exit(status);
191 }
192
193 status += 1;
194 exit(status);
195 }
196
main(int argc,char ** argv)197 main(int argc, char ** argv)
198 {
199 int r;
200 char * p;
201 iconv_t cd;
202 iconv_t cd2;
203 iconv_t cd3;
204 int i, j, k;
205 char *dir;
206
207 ME = basename(argv[0]);
208 setlocale(LC_ALL, "");
209 status = 100;
210
211 for (j = 1; j < argc; j++) {
212 if (argv[j][0] != '-')
213 break;
214 for (k = 1; ; k++) {
215 if (argv[j][k] == '\0')
216 break;
217 if (argv[j][k] == 'c') {
218 flag_check = 1;
219 j++;
220 if (j >= argc) usage(-1);
221 dir = argv[j];
222 continue;
223 }
224 }
225 }
226 if (j >= argc) usage(-1);
227
228
229 if( flag_check > 0 ) mk_data(dir, argv[j]);
230
231 cd = iconv_open("UTF-8", argv[j]);
232 if (((iconv_t)(-1)) == cd) {
233 perror("iconv_open");
234 exit(status);
235 }
236
237 cd2 = iconv_open("UCS-2", "UTF-8");
238 if (((iconv_t)(-1)) == cd2) {
239 perror("iconv_open for UTF-8");
240 exit(status);
241 }
242
243 cd3 = iconv_open(argv[j], "UTF-8");
244 if (((iconv_t)(-1)) == cd3) {
245 perror("iconv_open for reverse");
246 exit(status);
247 }
248
249 /*
250 * main logic
251 */
252 for (i = 0; i <= 0xff; i++)
253 validate(i, cd, cd2, cd3);
254
255 status = 200;
256 r = iconv_close(cd);
257 if (-1 == r) {
258 perror("iconv_close");
259 exit(status);
260 }
261
262 r = iconv_close(cd2);
263 if (-1 == r) {
264 perror("iconv_close for UTF-8");
265 exit(status);
266 }
267
268 return (0);
269 }
270
271 static void
mk_data(char * dir,char * name)272 mk_data(char *dir, char* name)
273 {
274 register int i, j;
275 char buf[BUFSIZ], num[100];
276 unsigned int l, k;
277 FILE *fd;
278 char file[BUFSIZ];
279
280 sprintf( file, "%s/%s.txt", dir, name);
281 if ((fd = fopen(file, "r")) == NULL) {
282 perror("fopen");
283 exit (-1);
284 }
285 /* for information file, pari data is created */
286 while (fgets(buf, BUFSIZ, fd)) {
287 i = 0;
288 while (buf[i] && isspace(buf[i]))
289 i++;
290 if (buf[i] == '#' || buf[i] == '\0')
291 continue;
292
293 for (j = 0; !isspace(buf[i]); i++, j++)
294 num[j] = buf[i];
295 num[j] = '\0';
296
297 k = strtol(num, (char **)NULL, 16);
298
299 while (isspace(buf[i]))
300 i++;
301
302 if (buf[i] == '#' || buf[i] == '\0')
303 /* undefined */
304 continue;
305
306 for (j = 0; !isspace(buf[i]); i++, j++)
307 num[j] = buf[i];
308 num[j] = '\0';
309
310 l = strtol(num, (char **)NULL, 16);
311
312 tbl[k].u4 = l;
313 tbl[k].from = k;
314 }
315 }
316