1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1999 by Sun Microsystems, Inc.
23 * All rights reserved.
24 */
25
26
27 /*
28 * For example,
29 * UCS -> UTF-8 -> IBM -> UTF-8
30 * (1) (2) (3) (4)
31 * tmp source result tmp
32 * output (1) (2) (3)line by line
33 * comparing (2) (4)
34 */
35
36 #include <stdio.h>
37 #include <libgen.h>
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include <locale.h>
41 #include <iconv.h>
42 #include <string.h>
43 #include <errno.h>
44 #include <stdarg.h>
45 #include <sys/types.h>
46 #include <sys/wait.h>
47
48 char * ME;
49 int status;
50 int flag_display = 1;
51 int flag_bubun = 1;
52
53
54 void
usage(int status)55 usage(int status)
56 {
57 fprintf(stderr, "Usage: %s [-b] [-d] to-code\n", ME);
58 exit(status);
59 }
60
61
62 void
chkprint(char * format,...)63 chkprint(char *format, ...)
64 {
65 va_list ap;
66 va_start(ap, format);
67
68 if (0 != flag_display) {
69 (void) vfprintf(stdout, format, ap);
70 }
71 va_end(ap);
72 }
73
74
75 void
validate(uint_t i,iconv_t cd,iconv_t cd2,iconv_t cd3)76 validate(uint_t i, iconv_t cd, iconv_t cd2, iconv_t cd3)
77 {
78 char source_buf[1024];
79 char result_buf[1024];
80 char tmp_buf[1024];
81 char * source;
82 char * result;
83 char * tmp;
84 size_t source_len;
85 size_t result_len;
86 size_t result_len2;
87 size_t tmp_len;
88 size_t s;
89 int j;
90 ushort_t *shortp;
91 uint_t *intp;
92
93
94 #define PREPARE_ILLEGALUTF8 \
95 if (i == 0xfffe) { \
96 source_buf[0] = 0xef; \
97 source_buf[1] = 0xbf; \
98 source_buf[2] = 0xbe; \
99 source_buf[3] = 0x00; \
100 source = source_buf; \
101 source_len = 3; \
102 chkprint("U+%04x\t** %x **", i, 0xefbfbe); \
103 } else if (i == 0xffff) { \
104 source_buf[0] = 0xef; \
105 source_buf[1] = 0xbf; \
106 source_buf[2] = 0xbf; \
107 source_buf[3] = 0x00; \
108 source = source_buf; \
109 source_len = 3; \
110 chkprint("U+%04x\t** %x **", i, 0xefbfbf); \
111 } else if (i > 0x7fffffff) { \
112 source_buf[0] = 0x0; \
113 source_buf[1] = 0x0; \
114 source_buf[2] = 0x0; \
115 source_buf[3] = 0x0; \
116 source_buf[4] = 0x0; \
117 source_buf[5] = 0xfe; \
118 source_buf[6] = 0x0; \
119 source = source_buf; \
120 source_len = 7; \
121 chkprint("U+%04x\t** %x **", i, 0xfe); \
122 }
123
124 #define DATASIZE 4
125 /*
126 shortp = (ushort_t*)&tmp_buf[0];
127 *shortp = 0xfeff;
128 shortp = (ushort_t*)&tmp_buf[2];
129 *shortp = i;
130 */
131 /* chkprint("U+"); */ \
132 /* for( j = 0; j < tmp_len ; j++) */ \
133 /* chkprint("%02x", (uchar_t)tmp[j]); */ \
134 /*
135 shortp = (ushort_t*)&tmp_buf[0]; \
136 *shortp = i; \
137 */
138
139 #define PREPAREUTF8 \
140 tmp = tmp_buf; \
141 tmp_len = DATASIZE; \
142 intp = (uint_t*)&tmp_buf[0]; \
143 *intp = i; \
144 source = source_buf; \
145 source_len = sizeof (source_buf); \
146 \
147 chkprint("U+%04x", i); \
148 s = iconv(cd2, (const char**)&tmp, &tmp_len, &source, &source_len); \
149 if (s != 0) { \
150 chkprint(" \n stopped \n"); \
151 fprintf(stderr, "fail to convert Unicode to UTF-8\n"); \
152 exit (status); \
153 } \
154 chkprint("\t0x"); \
155 for( j = 0; j < sizeof (source_buf) - source_len; j++) \
156 chkprint("%02x", (uchar_t)source_buf[j]); \
157 source_len = sizeof (source_buf) - source_len; \
158 source = &source_buf[0];
159
160 #define COMPARE_ERROR \
161 chkprint("\t-> 0x");\
162 for (j = 0; j < sizeof (tmp_buf) - tmp_len; j++) { \
163 chkprint("%02x", (uchar_t)tmp_buf[j]);\
164 } \
165 chkprint("\n warning \n"); \
166 fprintf(stderr, " Converting answer is not the same for (U+%04x)\n", \
167 i);
168
169 #define COMPARE \
170 tmp = tmp_buf; \
171 tmp_len = sizeof (tmp_buf); \
172 result = result_buf; \
173 result_len2 = sizeof (result_buf) - result_len; \
174 s = iconv(cd3, (const char**)&result, &result_len2, &tmp, &tmp_len); \
175 if (s != 0) { \
176 chkprint(" \n WARNING \n"); \
177 fprintf(stderr, "fail to convert Orignal Codeset to UTF-8\n",\
178 i); \
179 fprintf(stderr, "errno=%d %d %d\n", \
180 errno, \
181 sizeof (result_buf) - result_len - result_len2, \
182 result - result_buf); \
183 exit (status); \
184 } \
185 chkprint("\t"); \
186 if (sizeof (tmp_buf) - tmp_len != source_len) { \
187 COMPARE_ERROR \
188 } else { \
189 for (j = 0; j < source_len; j++) { \
190 if ((uchar_t)tmp_buf[j] != (uchar_t)source_buf[j]) { \
191 COMPARE_ERROR \
192 } \
193 }\
194 }
195
196
197 /*
198 * LOGIC START
199 */
200
201 if (i == 0xfffe || i == 0xffff || i > 0x7fffffff) {
202 PREPARE_ILLEGALUTF8
203 } else {
204 PREPAREUTF8
205 }
206
207 result = result_buf;
208 result_len = sizeof (result_buf);
209 tmp_len = source_len; /* save to compare source data */
210 s = iconv(cd, (const char**)&source, &source_len, &result,
211 &result_len);
212
213 status = 1;
214 if (i == 0xfffe || i == 0xffff || i > 0x7fffffff) {
215 if ((((size_t)0) == s) ||
216 (errno != EILSEQ)) {
217 fprintf(stderr, "EILSEQ expected for 0x%x: %d %d %d\n",
218 i,
219 errno,
220 source_len,
221 source - source_buf);
222 }
223 }
224 if (((size_t)(0)) == s) {
225 if ((source_len != 0) ||
226 ((source - source_buf) != tmp_len) ||
227 ((result - result_buf + result_len) !=
228 sizeof (result_buf))) {
229 fprintf(stderr, ": %d %d %d\n",
230 errno,
231 source_len,
232 source - source_buf);
233 exit(status);
234 }
235 chkprint("\t0x");
236 for( j = 0; j < sizeof (result_buf) - result_len ; j++)
237 chkprint("%02x", (uchar_t)result_buf[j]);
238 source_len = tmp_len;
239 COMPARE
240 chkprint("\n");
241 return;
242 }
243
244 status += 1;
245 if (((size_t)(-1)) == s) {
246 if (errno == EILSEQ) {
247 if (((source - source_buf) !=
248 (tmp_len - source_len)) ||
249 ((result - result_buf + result_len) !=
250 sizeof (result_buf))) {
251 fprintf(stderr, ": %d %d %d\n",
252 errno,
253 source_len,
254 source - source_buf);
255 exit(status);
256 }
257 chkprint("\tEILSEQ\n", i);
258 return;
259 }
260 fprintf(stderr, "Error for source U+%04x: %d %d %d %d %d\n",
261 i,
262 errno,
263 (DATASIZE) - source_len, /* not converted size */
264 source - source_buf,
265 (sizeof (result_buf)) - result_len,
266 result - result_buf);
267 exit(status);
268 }
269
270 status += 1;
271 exit(status);
272 }
273
main(int argc,char ** argv)274 main(int argc, char ** argv)
275 {
276 int r;
277 char * p;
278 iconv_t cd;
279 iconv_t cd2;
280 iconv_t cd3;
281 uint_t i, j, k;
282
283 ME = basename(argv[0]);
284 setlocale(LC_ALL, "");
285 status = 100;
286
287
288 for (j = 1; j < argc; j++) {
289 if (argv[j][0] != '-')
290 break;
291 for (k = 1; ; k++) {
292 if (argv[j][k] == '\0')
293 break;
294 if (argv[j][k] == 'b') {
295 flag_bubun = 0;
296 continue;
297 }
298 if (argv[j][k] == 'd') {
299 flag_display = 0;
300 continue;
301 }
302 }
303 }
304 if (j >= argc) usage(-1);
305
306 chkprint( "#UCS-4\tUTF-8\t* %s *\n", argv[j]);
307
308 cd = iconv_open( argv[j], "UTF-8"); /* to, from */
309 if (((iconv_t)(-1)) == cd) {
310 perror("iconv_open");
311 exit(status);
312 }
313
314 cd2 = iconv_open("UTF-8", "UCS-4");
315 if (((iconv_t)(-1)) == cd2) {
316 perror("iconv_open for UTF-8");
317 exit(status);
318 }
319
320 cd3 = iconv_open("UTF-8", argv[j]);
321 if (((iconv_t)(-1)) == cd3) {
322 perror("iconv_open for reverse");
323 exit(status);
324 }
325
326
327 /*
328 * main logic
329 */
330 if (flag_bubun) {
331 for (i = 0; i <= 0xff; i++)
332 validate(i, cd, cd2, cd3);
333 validate(0x100, cd, cd2, cd3);
334 validate(0x3ff, cd, cd2, cd3);
335 validate(0x400, cd, cd2, cd3);
336 validate(0xfff, cd, cd2, cd3);
337 validate(0x1000, cd, cd2, cd3);
338 validate(0x3fff, cd, cd2, cd3);
339 validate(0x4000, cd, cd2, cd3);
340 validate(0xfffd, cd, cd2, cd3);
341 validate(0xfffe, cd, cd2, cd3); /* error */
342 validate(0xffff, cd, cd2, cd3); /* error */
343 validate(0x10000, cd, cd2, cd3);
344 validate(0x3ffff, cd, cd2, cd3);
345 validate(0x40000, cd, cd2, cd3);
346 validate(0xfffff, cd, cd2, cd3);
347 validate(0x100000, cd, cd2, cd3);
348 validate(0x1fffff, cd, cd2, cd3);
349 validate(0x200000, cd, cd2, cd3);
350 validate(0x3fffff, cd, cd2, cd3);
351 validate(0x400000, cd, cd2, cd3);
352 validate(0xffffff, cd, cd2, cd3);
353 validate(0x1000000, cd, cd2, cd3);
354 validate(0x3ffffff, cd, cd2, cd3);
355 validate(0x4000000, cd, cd2, cd3);
356 validate(0xfffffff, cd, cd2, cd3);
357 validate(0x10000000, cd, cd2, cd3);
358 validate(0x7fffffff, cd, cd2, cd3);
359 validate(0x80000000, cd, cd2, cd3); /* error */
360 } else {
361 int k;
362 for (i = 0, k = 0; i <= 0x80000000; i++, k++) {
363 validate(i, cd, cd2, cd3);
364 if ((k == 0x1000000) &&
365 (0 == flag_display)) {
366 printf(" i < 0x%x: checked\n", i);
367 k = 0;
368 }
369
370 }
371 }
372
373 status = 200;
374 r = iconv_close(cd);
375 if (-1 == r) {
376 perror("iconv_close");
377 exit(status);
378 }
379
380 r = iconv_close(cd2);
381 if (-1 == r) {
382 perror("iconv_close for UTF-8");
383 exit(status);
384 }
385
386 r = iconv_close(cd3);
387 if (-1 == r) {
388 perror("iconv_close for reverse");
389 exit(status);
390 }
391
392 return (0);
393 }
394