1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1999 by Sun Microsystems, Inc.
23  * All rights reserved.
24  */
25 
26 
27 /*
28  * This program convert Additional Codeset Characters from 0x00 through 0xff
29  * to UTF-8 codeset. And also again converting the chacter in UTF-8 to original
30  * codeset.
31  * For example,
32  *               IBM -> UTF-8 -> IBM
33  *                (1)     (2)     (3)
34  *                            -> Unicode Scaler
35  *                                (4)
36  *                           output     (1) (2) (4)line by line
37  *                           comparing  (1) (3)
38  */
39 
40 #include <stdio.h>
41 #include <libgen.h>
42 #include <stdlib.h>
43 #include <unistd.h>
44 #include <locale.h>
45 #include <iconv.h>
46 #include <string.h>
47 #include <errno.h>
48 #include <sys/types.h>
49 #include <sys/wait.h>
50 
51 static void mk_data(char *,char *);
52 
53 char *	ME;
54 int	status;
55 static int	flag_check = 0; /* check with data file */
56 
57 static struct  {
58 	unsigned int	from;
59 	unsigned int	u4;
60 } tbl[0x10000];
61 
62 
63 void
usage(int status)64 usage(int status)
65 {
66 	fprintf(stderr, "Usage: %s from-code\n", ME);
67 	exit(status);
68 }
69 
70 void
validate(int i,iconv_t cd,iconv_t cd2,iconv_t cd3)71 validate(int i, iconv_t cd, iconv_t cd2, iconv_t cd3)
72 {
73 	uchar_t		source_buf[1024];
74 	uchar_t		result_buf[1024];
75 	uchar_t		tmp_buf[1024];
76 	const uchar_t *	source;
77 	uchar_t *		result;
78 	uchar_t *		tmp;
79 	size_t		source_len;
80 	size_t		result_len;
81 	size_t		result_len2;
82 	size_t		tmp_len;
83 	size_t		s;
84 	int		j;
85 	ulong_t   	l;
86 
87 #ifdef _LITTLE_ENDIAN
88 #define CHECKWITHFILE \
89 	if( flag_check > 0 ) { \
90 		l = 0U; \
91 		for( j =  sizeof (tmp_buf) - tmp_len -1; \
92 		     j >= ((i == 0) ? 2: 0); j--) \
93 			l = (l << 8) + ((uint_t)tmp_buf[j]); \
94 		if (l != tbl[i].u4 ) fprintf(stderr, "%x != %x \n", l, tbl[i].u4 ); \
95 	}
96 #else
97 #define CHECKWITHFILE \
98 	if( flag_check > 0 ) { \
99 		l = 0U; \
100 		j = ((i == 0) ? 2: 0); \
101 		for(; j < sizeof (tmp_buf) - tmp_len ; j++) \
102 			l = (l << 8) + ((uint_t)tmp_buf[j]); \
103 		if (l != tbl[i].u4 ) fprintf(stderr, "%x != %x \n", l, tbl[i].u4 ); \
104 	}
105 #endif
106 
107 #define PRINTUNICODE \
108 	tmp = tmp_buf; \
109 	tmp_len = sizeof (tmp_buf); \
110 	result = result_buf; \
111 	result_len2 = sizeof (result_buf) - result_len; \
112 	s = iconv(cd2, (const char**)&result, &result_len2, (char**)&tmp, &tmp_len); \
113 	if (s != 0) { \
114 		printf(" \n stoped \n"); \
115 		fprintf(stderr, "fail to con_LITTLE_ENDIANvert UTF-8 to Unicode\n"); \
116 		exit (status); \
117 	} \
118 	printf("\t"); \
119 	for( j = 0; j < sizeof (tmp_buf) - tmp_len ; j++) \
120 		printf("%02x", (uchar_t)tmp_buf[j]); \
121 	CHECKWITHFILE
122 
123 #define COMPARE \
124 	tmp = tmp_buf; \
125 	tmp_len = sizeof (tmp_buf); \
126 	result = result_buf; \
127 	result_len2 = sizeof (result_buf) - result_len; \
128 	s = iconv(cd3, (const char**)&result, &result_len2, (char**)&tmp, &tmp_len); \
129 	if (s != 0) { \
130 		printf(" \n WARNING \n"); \
131 		fprintf(stderr, "fail to convert UTF-8 to Orignal Codeset(%x)\n",\
132 		i); \
133 		fprintf(stderr, "errno=%d %d %d\n", \
134 			errno, \
135 			sizeof (result_buf) - result_len - result_len2, \
136 			result - result_buf); \
137 		exit (status); \
138 	} \
139 	printf("\t"); \
140 	if ((sizeof (tmp_buf) - tmp_len != 1) || \
141 	    ((uchar_t)tmp_buf[0] != (uchar_t)i )) { \
142 		printf("\t-> 0x%2x \n warning \n", (uchar_t)tmp_buf[0] ); \
143 		fprintf(stderr, " Converting answer is not the same (0x%02x) for  (0x%02x)\n", \
144 		(uchar_t)tmp_buf[0], i); \
145 	}
146 
147 #define DATASIZE 1
148 
149 	source_buf[0] = i;
150 	source = source_buf;
151 	source_len = DATASIZE;
152 
153 	result = result_buf;
154 	result_len = sizeof (result_buf);
155 
156 	s = iconv(cd, (const char**)&source, &source_len, (char**)&result, &result_len);
157 
158 	status = 1;
159 	if (((size_t)(0)) == s) {
160 		if ((source_len != 0) ||
161 		    ((source - source_buf) != DATASIZE)) {
162 			fprintf(stderr, ": %d %d %d\n",
163 				errno,
164 				source_len,
165 				source - source_buf);
166 			exit(status);
167 		}
168 		printf("0x%02x\t0x", i);
169 		for( j = 0; j < sizeof (result_buf) - result_len ; j++)
170 			printf("%02x", (uchar_t)result_buf[j]);
171 		PRINTUNICODE
172 		COMPARE
173 		printf("\n");
174 		return;
175 	}
176 
177 	status += 1;
178 	if (((size_t)(-1)) == s) {
179 		if (errno == EILSEQ) {
180 			printf("0x%02x	EILSEQ\n", i);
181 			return;
182 		}
183 		fprintf(stderr, "Error for source 0x%02x(%d): %d %d %d %d %d\n",
184 			i, i,
185 			errno,
186 			(DATASIZE) - source_len, /* not converted size */
187 			source - source_buf,
188 			(sizeof (result_buf)) - result_len,
189 			result - result_buf);
190 		exit(status);
191 	}
192 
193 	status += 1;
194 	exit(status);
195 }
196 
main(int argc,char ** argv)197 main(int argc, char ** argv)
198 {
199 	int		r;
200 	char *		p;
201 	iconv_t		cd;
202 	iconv_t		cd2;
203 	iconv_t		cd3;
204 	int		i, j, k;
205 	char		*dir;
206 
207 	ME = basename(argv[0]);
208 	setlocale(LC_ALL, "");
209 	status = 100;
210 
211 	for (j = 1;  j < argc; j++) {
212 		if (argv[j][0] != '-')
213 			break;
214 		for (k = 1; ; k++) {
215 			if (argv[j][k] == '\0')
216 				break;
217 			if (argv[j][k] == 'c') {
218 				flag_check = 1;
219 				j++;
220 				if (j >= argc) usage(-1);
221 				dir = argv[j];
222 				continue;
223 			}
224 		}
225 	}
226 	if (j >= argc) usage(-1);
227 
228 
229 	if( flag_check > 0 ) mk_data(dir, argv[j]);
230 
231 	cd = iconv_open("UTF-8", argv[j]);
232 	if (((iconv_t)(-1)) == cd) {
233 		perror("iconv_open");
234 		exit(status);
235 	}
236 
237 	cd2 = iconv_open("UCS-2", "UTF-8");
238 	if (((iconv_t)(-1)) == cd2) {
239 		perror("iconv_open for UTF-8");
240 		exit(status);
241 	}
242 
243 	cd3 = iconv_open(argv[j], "UTF-8");
244 	if (((iconv_t)(-1)) == cd3) {
245 		perror("iconv_open for reverse");
246 		exit(status);
247 	}
248 
249 	/*
250 	 *	main logic
251 	 */
252 	for (i = 0; i <= 0xff; i++)
253 		validate(i, cd, cd2, cd3);
254 
255 	status = 200;
256 	r = iconv_close(cd);
257 	if (-1 == r) {
258 		perror("iconv_close");
259 		exit(status);
260 	}
261 
262 	r = iconv_close(cd2);
263 	if (-1 == r) {
264 		perror("iconv_close for UTF-8");
265 		exit(status);
266 	}
267 
268 	return (0);
269 }
270 
271 static void
mk_data(char * dir,char * name)272 mk_data(char *dir, char* name)
273 {
274 	register int	i, j;
275 	char		buf[BUFSIZ], num[100];
276 	unsigned int	l, k;
277 	FILE		*fd;
278 	char		file[BUFSIZ];
279 
280 	sprintf( file, "%s/%s.txt", dir, name);
281 	if ((fd = fopen(file, "r")) == NULL) {
282 		perror("fopen");
283 		exit (-1);
284 	}
285 	/* for information file, pari data is created */
286 	while (fgets(buf, BUFSIZ, fd)) {
287 		i = 0;
288 		while (buf[i] && isspace(buf[i]))
289 			i++;
290 		if (buf[i] == '#' || buf[i] == '\0')
291 			continue;
292 
293 		for (j = 0; !isspace(buf[i]); i++, j++)
294 			num[j] = buf[i];
295 		num[j] = '\0';
296 
297 		k = strtol(num, (char **)NULL, 16);
298 
299 		while (isspace(buf[i]))
300 			i++;
301 
302 		if (buf[i] == '#' || buf[i] == '\0')
303 			/* undefined */
304 			continue;
305 
306 		for (j = 0; !isspace(buf[i]); i++, j++)
307 			num[j] = buf[i];
308 		num[j] = '\0';
309 
310 		l = strtol(num, (char **)NULL, 16);
311 
312 		tbl[k].u4 = l;
313 		tbl[k].from = k;
314 	}
315 }
316