1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1999 by Sun Microsystems, Inc.
23  * All rights reserved.
24  */
25 
26 
27 /*
28  * For example,
29  *               UCS -> UTF-8 -> IBM -> UTF-8
30  *                (1)     (2)     (3)	 (4)
31  *               tmp    source   result  tmp
32  *                           output     (1) (2) (3)line by line
33  *                           comparing  (2) (4)
34  */
35 
36 #include <stdio.h>
37 #include <libgen.h>
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include <locale.h>
41 #include <iconv.h>
42 #include <string.h>
43 #include <errno.h>
44 #include <stdarg.h>
45 #include <sys/types.h>
46 #include <sys/wait.h>
47 
48 char *	ME;
49 int	status;
50 int	flag_display = 1;
51 int	flag_bubun = 1;
52 
53 
54 void
usage(int status)55 usage(int status)
56 {
57 	fprintf(stderr, "Usage: %s [-b] [-d] to-code\n", ME);
58 	exit(status);
59 }
60 
61 
62 void
chkprint(char * format,...)63 chkprint(char *format, ...)
64 {
65 	va_list		ap;
66 	va_start(ap, format);
67 
68 	if (0 != flag_display)  {
69 		(void) vfprintf(stdout, format, ap);
70 	}
71 	va_end(ap);
72 }
73 
74 
75 void
validate(uint_t i,iconv_t cd,iconv_t cd2,iconv_t cd3)76 validate(uint_t i, iconv_t cd, iconv_t cd2, iconv_t cd3)
77 {
78 	char		source_buf[1024];
79 	char		result_buf[1024];
80 	char		tmp_buf[1024];
81 	char *		source;
82 	char *		result;
83 	char *		tmp;
84 	size_t		source_len;
85 	size_t		result_len;
86 	size_t		result_len2;
87 	size_t		tmp_len;
88 	size_t		s;
89 	int		j;
90 	ushort_t	*shortp;
91 	uint_t	*intp;
92 
93 
94 #define PREPARE_ILLEGALUTF8 \
95 	if (i == 0xfffe) { \
96 		source_buf[0] = 0xef; \
97 		source_buf[1] = 0xbf; \
98 		source_buf[2] = 0xbe; \
99 		source_buf[3] = 0x00; \
100 		source = source_buf;  \
101 		source_len = 3; \
102 		chkprint("U+%04x\t** %x **", i, 0xefbfbe); \
103 	} else if (i == 0xffff) { \
104 		source_buf[0] = 0xef; \
105 		source_buf[1] = 0xbf; \
106 		source_buf[2] = 0xbf; \
107 		source_buf[3] = 0x00; \
108 		source = source_buf;  \
109 		source_len = 3; \
110 		chkprint("U+%04x\t** %x **", i, 0xefbfbf); \
111 	} else if (i > 0x7fffffff) { \
112 		source_buf[0] = 0x0; \
113 		source_buf[1] = 0x0; \
114 		source_buf[2] = 0x0; \
115 		source_buf[3] = 0x0; \
116 		source_buf[4] = 0x0; \
117 		source_buf[5] = 0xfe; \
118 		source_buf[6] = 0x0; \
119 		source = source_buf;  \
120 		source_len = 7; \
121 		chkprint("U+%04x\t** %x **", i, 0xfe); \
122 	}
123 
124 #define DATASIZE 4
125 	/*
126 	shortp = (ushort_t*)&tmp_buf[0];
127 	*shortp = 0xfeff;
128 	shortp = (ushort_t*)&tmp_buf[2];
129 	*shortp = i;
130 	*/
131 	/* chkprint("U+");  */ \
132 	/*	for( j = 0; j < tmp_len ; j++)  */ \
133 	/* 	chkprint("%02x", (uchar_t)tmp[j]); */ \
134 	/*
135 	shortp = (ushort_t*)&tmp_buf[0]; \
136 	*shortp = i; \
137 	*/
138 
139 #define PREPAREUTF8 \
140 	tmp = tmp_buf; \
141 	tmp_len = DATASIZE; \
142 	intp = (uint_t*)&tmp_buf[0]; \
143 	*intp = i; \
144 	source = source_buf; \
145 	source_len = sizeof (source_buf); \
146 	\
147 	chkprint("U+%04x", i); \
148 	s = iconv(cd2, (const char**)&tmp, &tmp_len, &source, &source_len); \
149 	if (s != 0) { \
150 		chkprint(" \n stopped \n"); \
151 		fprintf(stderr, "fail to convert Unicode to UTF-8\n"); \
152 		exit (status); \
153 	} \
154 	chkprint("\t0x"); \
155 	for( j = 0; j < sizeof (source_buf) - source_len; j++) \
156 		chkprint("%02x", (uchar_t)source_buf[j]); \
157 	source_len = sizeof (source_buf) - source_len; \
158 	source = &source_buf[0];
159 
160 #define	COMPARE_ERROR \
161 	chkprint("\t-> 0x");\
162 	for (j = 0; j <  sizeof (tmp_buf) - tmp_len; j++) { \
163 		chkprint("%02x", (uchar_t)tmp_buf[j]);\
164 	} \
165 	chkprint("\n warning \n"); \
166 	fprintf(stderr, " Converting answer is not the same for  (U+%04x)\n", \
167 		i);
168 
169 #define COMPARE \
170 	tmp = tmp_buf; \
171 	tmp_len = sizeof (tmp_buf); \
172 	result = result_buf; \
173 	result_len2 = sizeof (result_buf) - result_len; \
174 	s = iconv(cd3, (const char**)&result, &result_len2, &tmp, &tmp_len); \
175 	if (s != 0) { \
176 		chkprint(" \n WARNING \n"); \
177 		fprintf(stderr, "fail to convert Orignal Codeset to UTF-8\n",\
178 		i); \
179 		fprintf(stderr, "errno=%d %d %d\n", \
180 			errno, \
181 			sizeof (result_buf) - result_len - result_len2, \
182 			result - result_buf); \
183 		exit (status); \
184 	} \
185 	chkprint("\t"); \
186 	if (sizeof (tmp_buf) - tmp_len != source_len) { \
187 		COMPARE_ERROR \
188 	} else { \
189 		for (j = 0; j < source_len; j++) { \
190 			if ((uchar_t)tmp_buf[j] != (uchar_t)source_buf[j]) { \
191 				COMPARE_ERROR \
192 			} \
193 		}\
194 	}
195 
196 
197 	/*
198 	 *	LOGIC START
199 	 */
200 
201 	if (i == 0xfffe || i == 0xffff || i > 0x7fffffff) {
202 		PREPARE_ILLEGALUTF8
203 	} else {
204 		PREPAREUTF8
205 	}
206 
207 	result = result_buf;
208 	result_len = sizeof (result_buf);
209 	tmp_len = source_len; /* save to compare source data */
210 	s = iconv(cd,  (const char**)&source, &source_len, &result,
211 		&result_len);
212 
213 	status = 1;
214 	if (i == 0xfffe || i == 0xffff || i > 0x7fffffff) {
215 		if ((((size_t)0) == s) ||
216 			(errno != EILSEQ)) {
217 			fprintf(stderr, "EILSEQ expected for 0x%x: %d %d %d\n",
218 			i,
219 			errno,
220 		        source_len,
221 			source - source_buf);
222 		}
223 	}
224 	if (((size_t)(0)) == s) {
225 		if ((source_len != 0) ||
226 			((source - source_buf) != tmp_len) ||
227 			((result - result_buf + result_len) !=
228 			sizeof (result_buf))) {
229 			fprintf(stderr, ": %d %d %d\n",
230 				errno,
231 				source_len,
232 				source - source_buf);
233 			exit(status);
234 		}
235 		chkprint("\t0x");
236 		for( j = 0; j < sizeof (result_buf) - result_len ; j++)
237 			chkprint("%02x", (uchar_t)result_buf[j]);
238 		source_len = tmp_len;
239 		COMPARE
240 		chkprint("\n");
241 		return;
242 	}
243 
244 	status += 1;
245 	if (((size_t)(-1)) == s) {
246 		if (errno == EILSEQ) {
247 			if (((source - source_buf) !=
248 				(tmp_len - source_len)) ||
249 				((result - result_buf + result_len) !=
250 				sizeof (result_buf))) {
251 				fprintf(stderr, ": %d %d %d\n",
252 					errno,
253 					source_len,
254 					source - source_buf);
255 				exit(status);
256 			}
257 			chkprint("\tEILSEQ\n", i);
258 			return;
259 		}
260 		fprintf(stderr, "Error for source U+%04x: %d %d %d %d %d\n",
261 			i,
262 			errno,
263 			(DATASIZE) - source_len, /* not converted size */
264 			source - source_buf,
265 			(sizeof (result_buf)) - result_len,
266 			result - result_buf);
267 		exit(status);
268 	}
269 
270 	status += 1;
271 	exit(status);
272 }
273 
main(int argc,char ** argv)274 main(int argc, char ** argv)
275 {
276 	int		r;
277 	char *		p;
278 	iconv_t		cd;
279 	iconv_t		cd2;
280 	iconv_t		cd3;
281 	uint_t		i, j, k;
282 
283 	ME = basename(argv[0]);
284 	setlocale(LC_ALL, "");
285 	status = 100;
286 
287 
288 	for (j = 1;  j < argc; j++) {
289 		if (argv[j][0] != '-')
290 			break;
291 		for (k = 1; ; k++) {
292 			if (argv[j][k] == '\0')
293 				break;
294 			if (argv[j][k] == 'b') {
295 				flag_bubun = 0;
296 				continue;
297 			}
298 			if (argv[j][k] == 'd') {
299 				flag_display = 0;
300 				continue;
301 			}
302 		}
303 	}
304 	if (j >= argc) usage(-1);
305 
306 	chkprint( "#UCS-4\tUTF-8\t* %s *\n", argv[j]);
307 
308 	cd = iconv_open( argv[j], "UTF-8"); /* to, from */
309 	if (((iconv_t)(-1)) == cd) {
310 		perror("iconv_open");
311 		exit(status);
312 	}
313 
314 	cd2 = iconv_open("UTF-8", "UCS-4");
315 	if (((iconv_t)(-1)) == cd2) {
316 		perror("iconv_open for UTF-8");
317 		exit(status);
318 	}
319 
320 	cd3 = iconv_open("UTF-8", argv[j]);
321 	if (((iconv_t)(-1)) == cd3) {
322 		perror("iconv_open for reverse");
323 		exit(status);
324 	}
325 
326 
327 	/*
328 	 *	main logic
329 	 */
330 	if (flag_bubun) {
331 		for (i = 0; i <= 0xff; i++)
332 			validate(i, cd, cd2, cd3);
333 		validate(0x100, cd, cd2, cd3);
334 		validate(0x3ff, cd, cd2, cd3);
335 		validate(0x400, cd, cd2, cd3);
336 		validate(0xfff, cd, cd2, cd3);
337 		validate(0x1000, cd, cd2, cd3);
338 		validate(0x3fff, cd, cd2, cd3);
339 		validate(0x4000, cd, cd2, cd3);
340 		validate(0xfffd, cd, cd2, cd3);
341 		validate(0xfffe, cd, cd2, cd3);    /* error */
342 		validate(0xffff, cd, cd2, cd3);    /* error */
343 		validate(0x10000, cd, cd2, cd3);
344 		validate(0x3ffff, cd, cd2, cd3);
345 		validate(0x40000, cd, cd2, cd3);
346 		validate(0xfffff, cd, cd2, cd3);
347 		validate(0x100000, cd, cd2, cd3);
348 		validate(0x1fffff, cd, cd2, cd3);
349 		validate(0x200000, cd, cd2, cd3);
350 		validate(0x3fffff, cd, cd2, cd3);
351 		validate(0x400000, cd, cd2, cd3);
352 		validate(0xffffff, cd, cd2, cd3);
353 		validate(0x1000000, cd, cd2, cd3);
354 		validate(0x3ffffff, cd, cd2, cd3);
355 		validate(0x4000000, cd, cd2, cd3);
356 		validate(0xfffffff, cd, cd2, cd3);
357 		validate(0x10000000, cd, cd2, cd3);
358 		validate(0x7fffffff, cd, cd2, cd3);
359 		validate(0x80000000, cd, cd2, cd3); /* error */
360 	} else {
361 		int	k;
362 		for (i = 0, k = 0; i <= 0x80000000; i++, k++) {
363 			validate(i, cd, cd2, cd3);
364 			if ((k == 0x1000000) &&
365 				(0 == flag_display)) {
366 				printf(" i < 0x%x: checked\n", i);
367 				k = 0;
368 			}
369 
370 		}
371 	}
372 
373 	status = 200;
374 	r = iconv_close(cd);
375 	if (-1 == r) {
376 		perror("iconv_close");
377 		exit(status);
378 	}
379 
380 	r = iconv_close(cd2);
381 	if (-1 == r) {
382 		perror("iconv_close for UTF-8");
383 		exit(status);
384 	}
385 
386 	r = iconv_close(cd3);
387 	if (-1 == r) {
388 		perror("iconv_close for reverse");
389 		exit(status);
390 	}
391 
392 	return (0);
393 }
394