xref: /illumos-gate/usr/src/cmd/iconv/iconv_main.c (revision 48edc7cf)
1*48edc7cfSGordon Ross /*
2*48edc7cfSGordon Ross  * This file and its contents are supplied under the terms of the
3*48edc7cfSGordon Ross  * Common Development and Distribution License ("CDDL"), version 1.0.
4*48edc7cfSGordon Ross  * You may only use this file in accordance with the terms of version
5*48edc7cfSGordon Ross  * 1.0 of the CDDL.
6*48edc7cfSGordon Ross  *
7*48edc7cfSGordon Ross  * A full copy of the text of the CDDL should have accompanied this
8*48edc7cfSGordon Ross  * source.  A copy of the CDDL is also available via the Internet at
9*48edc7cfSGordon Ross  * http://www.illumos.org/license/CDDL.
10*48edc7cfSGordon Ross  */
11*48edc7cfSGordon Ross 
12*48edc7cfSGordon Ross /*
13*48edc7cfSGordon Ross  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
14*48edc7cfSGordon Ross  */
15*48edc7cfSGordon Ross 
16*48edc7cfSGordon Ross /*
17*48edc7cfSGordon Ross  * iconv(1) command.
18*48edc7cfSGordon Ross  */
19*48edc7cfSGordon Ross 
20*48edc7cfSGordon Ross #include <stdio.h>
21*48edc7cfSGordon Ross #include <stdlib.h>
22*48edc7cfSGordon Ross #include <string.h>
23*48edc7cfSGordon Ross #include <unistd.h>
24*48edc7cfSGordon Ross #include <errno.h>
25*48edc7cfSGordon Ross #include <limits.h>
26*48edc7cfSGordon Ross #include <iconv.h>
27*48edc7cfSGordon Ross #include <libintl.h>
28*48edc7cfSGordon Ross #include <langinfo.h>
29*48edc7cfSGordon Ross #include <locale.h>
30*48edc7cfSGordon Ross #include "charmap.h"
31*48edc7cfSGordon Ross 
32*48edc7cfSGordon Ross #include <assert.h>
33*48edc7cfSGordon Ross 
34*48edc7cfSGordon Ross const char *progname;
35*48edc7cfSGordon Ross 
36*48edc7cfSGordon Ross char *from_cs;
37*48edc7cfSGordon Ross char *to_cs;
38*48edc7cfSGordon Ross int debug;
39*48edc7cfSGordon Ross int cflag;	/* skip invalid characters */
40*48edc7cfSGordon Ross int sflag;	/* silent */
41*48edc7cfSGordon Ross int lflag;	/* list conversions */
42*48edc7cfSGordon Ross 
43*48edc7cfSGordon Ross void iconv_file(FILE *, const char *);
44*48edc7cfSGordon Ross extern int list_codesets(void);
45*48edc7cfSGordon Ross 
46*48edc7cfSGordon Ross iconv_t ich;	/* iconv(3c) lib handle */
47*48edc7cfSGordon Ross size_t (*pconv)(const char **iptr, size_t *ileft,
48*48edc7cfSGordon Ross 		char **optr, size_t *oleft);
49*48edc7cfSGordon Ross 
50*48edc7cfSGordon Ross size_t
lib_iconv(const char ** iptr,size_t * ileft,char ** optr,size_t * oleft)51*48edc7cfSGordon Ross lib_iconv(const char **iptr, size_t *ileft, char **optr, size_t *oleft)
52*48edc7cfSGordon Ross {
53*48edc7cfSGordon Ross 	return (iconv(ich, iptr, ileft, optr, oleft));
54*48edc7cfSGordon Ross }
55*48edc7cfSGordon Ross 
56*48edc7cfSGordon Ross void
usage(void)57*48edc7cfSGordon Ross usage(void)
58*48edc7cfSGordon Ross {
59*48edc7cfSGordon Ross 	(void) fprintf(stderr, gettext(
60*48edc7cfSGordon Ross 	    "usage: %s [-cs] [-f from-codeset] [-t to-codeset] "
61*48edc7cfSGordon Ross 	    "[file ...]\n"), progname);
62*48edc7cfSGordon Ross 	(void) fprintf(stderr, gettext("\t%s -l\n"), progname);
63*48edc7cfSGordon Ross 	exit(1);
64*48edc7cfSGordon Ross }
65*48edc7cfSGordon Ross 
66*48edc7cfSGordon Ross int
main(int argc,char ** argv)67*48edc7cfSGordon Ross main(int argc, char **argv)
68*48edc7cfSGordon Ross {
69*48edc7cfSGordon Ross 	FILE *fp;
70*48edc7cfSGordon Ross 	char *fslash, *tslash;
71*48edc7cfSGordon Ross 	int c;
72*48edc7cfSGordon Ross 
73*48edc7cfSGordon Ross 	yydebug = 0;
74*48edc7cfSGordon Ross 	progname = getprogname();
75*48edc7cfSGordon Ross 
76*48edc7cfSGordon Ross 	(void) setlocale(LC_ALL, "");
77*48edc7cfSGordon Ross 
78*48edc7cfSGordon Ross #if !defined(TEXT_DOMAIN)
79*48edc7cfSGordon Ross #define	TEXT_DOMAIN	"SYS_TEST"
80*48edc7cfSGordon Ross #endif
81*48edc7cfSGordon Ross 	(void) textdomain(TEXT_DOMAIN);
82*48edc7cfSGordon Ross 
83*48edc7cfSGordon Ross 	while ((c = getopt(argc, argv, "cdlsf:t:")) != EOF) {
84*48edc7cfSGordon Ross 		switch (c) {
85*48edc7cfSGordon Ross 		case 'c':
86*48edc7cfSGordon Ross 			cflag++;
87*48edc7cfSGordon Ross 			break;
88*48edc7cfSGordon Ross 		case 'd':
89*48edc7cfSGordon Ross 			debug++;
90*48edc7cfSGordon Ross 			break;
91*48edc7cfSGordon Ross 		case 'l':
92*48edc7cfSGordon Ross 			lflag++;
93*48edc7cfSGordon Ross 			break;
94*48edc7cfSGordon Ross 		case 's':
95*48edc7cfSGordon Ross 			sflag++;
96*48edc7cfSGordon Ross 			break;
97*48edc7cfSGordon Ross 		case 'f':
98*48edc7cfSGordon Ross 			from_cs = optarg;
99*48edc7cfSGordon Ross 			break;
100*48edc7cfSGordon Ross 		case 't':
101*48edc7cfSGordon Ross 			to_cs = optarg;
102*48edc7cfSGordon Ross 			break;
103*48edc7cfSGordon Ross 		case '?':
104*48edc7cfSGordon Ross 			usage();
105*48edc7cfSGordon Ross 		}
106*48edc7cfSGordon Ross 	}
107*48edc7cfSGordon Ross 
108*48edc7cfSGordon Ross 	if (lflag) {
109*48edc7cfSGordon Ross 		if (from_cs != NULL || to_cs != NULL || optind != argc)
110*48edc7cfSGordon Ross 			usage();
111*48edc7cfSGordon Ross 		exit(list_codesets());
112*48edc7cfSGordon Ross 	}
113*48edc7cfSGordon Ross 
114*48edc7cfSGordon Ross 	if (from_cs == NULL)
115*48edc7cfSGordon Ross 		from_cs = nl_langinfo(CODESET);
116*48edc7cfSGordon Ross 	if (to_cs == NULL)
117*48edc7cfSGordon Ross 		to_cs = nl_langinfo(CODESET);
118*48edc7cfSGordon Ross 
119*48edc7cfSGordon Ross 	/*
120*48edc7cfSGordon Ross 	 * If either "from" or "to" contains a slash,
121*48edc7cfSGordon Ross 	 * then we're using charmaps.
122*48edc7cfSGordon Ross 	 */
123*48edc7cfSGordon Ross 	fslash = strchr(from_cs, '/');
124*48edc7cfSGordon Ross 	tslash = strchr(to_cs, '/');
125*48edc7cfSGordon Ross 	if (fslash != NULL || tslash != NULL) {
126*48edc7cfSGordon Ross 		charmap_init(to_cs, from_cs);
127*48edc7cfSGordon Ross 		pconv = cm_iconv;
128*48edc7cfSGordon Ross 		if (debug)
129*48edc7cfSGordon Ross 			charmap_dump();
130*48edc7cfSGordon Ross 	} else {
131*48edc7cfSGordon Ross 		ich = iconv_open(to_cs, from_cs);
132*48edc7cfSGordon Ross 		if (ich == ((iconv_t)-1)) {
133*48edc7cfSGordon Ross 			switch (errno) {
134*48edc7cfSGordon Ross 			case EINVAL:
135*48edc7cfSGordon Ross 				(void) fprintf(stderr,
136*48edc7cfSGordon Ross 				    _("Not supported %s to %s\n"),
137*48edc7cfSGordon Ross 				    from_cs, to_cs);
138*48edc7cfSGordon Ross 				break;
139*48edc7cfSGordon Ross 			default:
140*48edc7cfSGordon Ross 				(void) fprintf(stderr,
141*48edc7cfSGordon Ross 				    _("iconv_open failed: %s\n"),
142*48edc7cfSGordon Ross 				    strerror(errno));
143*48edc7cfSGordon Ross 				break;
144*48edc7cfSGordon Ross 			}
145*48edc7cfSGordon Ross 			exit(1);
146*48edc7cfSGordon Ross 		}
147*48edc7cfSGordon Ross 		pconv = lib_iconv;
148*48edc7cfSGordon Ross 	}
149*48edc7cfSGordon Ross 
150*48edc7cfSGordon Ross 	if (optind == argc ||
151*48edc7cfSGordon Ross 	    (optind == argc - 1 && 0 == strcmp(argv[optind], "-"))) {
152*48edc7cfSGordon Ross 		iconv_file(stdin, "stdin");
153*48edc7cfSGordon Ross 		exit(warnings ? 1 : 0);
154*48edc7cfSGordon Ross 	}
155*48edc7cfSGordon Ross 
156*48edc7cfSGordon Ross 	for (; optind < argc; optind++) {
157*48edc7cfSGordon Ross 		fp = fopen(argv[optind], "r");
158*48edc7cfSGordon Ross 		if (fp == NULL) {
159*48edc7cfSGordon Ross 			perror(argv[optind]);
160*48edc7cfSGordon Ross 			exit(1);
161*48edc7cfSGordon Ross 		}
162*48edc7cfSGordon Ross 		iconv_file(fp, argv[optind]);
163*48edc7cfSGordon Ross 		(void) fclose(fp);
164*48edc7cfSGordon Ross 	}
165*48edc7cfSGordon Ross 	exit(warnings ? 1 : 0);
166*48edc7cfSGordon Ross }
167*48edc7cfSGordon Ross 
168*48edc7cfSGordon Ross /*
169*48edc7cfSGordon Ross  * Conversion buffer sizes:
170*48edc7cfSGordon Ross  *
171*48edc7cfSGordon Ross  * The input buffer has room to prepend one mbs character if needed for
172*48edc7cfSGordon Ross  * handling a left-over at the end of a previous conversion buffer.
173*48edc7cfSGordon Ross  *
174*48edc7cfSGordon Ross  * Conversions may grow or shrink data, so using a larger output buffer
175*48edc7cfSGordon Ross  * to reduce the likelihood of leftover input buffer data in each pass.
176*48edc7cfSGordon Ross  */
177*48edc7cfSGordon Ross #define	IBUFSIZ	(MB_LEN_MAX + BUFSIZ)
178*48edc7cfSGordon Ross #define	OBUFSIZ	(2 * BUFSIZ)
179*48edc7cfSGordon Ross 
180*48edc7cfSGordon Ross void
iconv_file(FILE * fp,const char * fname)181*48edc7cfSGordon Ross iconv_file(FILE *fp, const char *fname)
182*48edc7cfSGordon Ross {
183*48edc7cfSGordon Ross 	static char ibuf[IBUFSIZ];
184*48edc7cfSGordon Ross 	static char obuf[OBUFSIZ];
185*48edc7cfSGordon Ross 	const char *iptr;
186*48edc7cfSGordon Ross 	char *optr;
187*48edc7cfSGordon Ross 	off64_t offset;
188*48edc7cfSGordon Ross 	size_t ileft, oleft, ocnt;
189*48edc7cfSGordon Ross 	int iconv_errno;
190*48edc7cfSGordon Ross 	int nr, nw, rc;
191*48edc7cfSGordon Ross 
192*48edc7cfSGordon Ross 	offset = 0;
193*48edc7cfSGordon Ross 	ileft = 0;
194*48edc7cfSGordon Ross 	iptr = ibuf + MB_LEN_MAX;
195*48edc7cfSGordon Ross 
196*48edc7cfSGordon Ross 	while ((nr = fread(ibuf+MB_LEN_MAX, 1, BUFSIZ, fp)) > 0) {
197*48edc7cfSGordon Ross 
198*48edc7cfSGordon Ross 		assert(iptr <= ibuf+MB_LEN_MAX);
199*48edc7cfSGordon Ross 		assert(ileft <= MB_LEN_MAX);
200*48edc7cfSGordon Ross 		ileft += nr;
201*48edc7cfSGordon Ross 		offset += nr;
202*48edc7cfSGordon Ross 
203*48edc7cfSGordon Ross 		optr = obuf;
204*48edc7cfSGordon Ross 		oleft = OBUFSIZ;
205*48edc7cfSGordon Ross 
206*48edc7cfSGordon Ross 		/*
207*48edc7cfSGordon Ross 		 * Note: the *pconv function is either iconv(3c) or our
208*48edc7cfSGordon Ross 		 * private equivalent when using charmaps. Both update
209*48edc7cfSGordon Ross 		 * ileft, oleft etc. even when conversion stops due to
210*48edc7cfSGordon Ross 		 * an illegal sequence or whatever, so we need to copy
211*48edc7cfSGordon Ross 		 * the partially converted buffer even on error.
212*48edc7cfSGordon Ross 		 */
213*48edc7cfSGordon Ross 	iconv_again:
214*48edc7cfSGordon Ross 		rc = (*pconv)(&iptr, &ileft, &optr, &oleft);
215*48edc7cfSGordon Ross 		iconv_errno = errno;
216*48edc7cfSGordon Ross 
217*48edc7cfSGordon Ross 		ocnt = OBUFSIZ - oleft;
218*48edc7cfSGordon Ross 		if (ocnt > 0) {
219*48edc7cfSGordon Ross 			nw = fwrite(obuf, 1, ocnt, stdout);
220*48edc7cfSGordon Ross 			if (nw != ocnt) {
221*48edc7cfSGordon Ross 				perror("fwrite");
222*48edc7cfSGordon Ross 				exit(1);
223*48edc7cfSGordon Ross 			}
224*48edc7cfSGordon Ross 		}
225*48edc7cfSGordon Ross 		optr = obuf;
226*48edc7cfSGordon Ross 		oleft = OBUFSIZ;
227*48edc7cfSGordon Ross 
228*48edc7cfSGordon Ross 		if (rc == (size_t)-1) {
229*48edc7cfSGordon Ross 			switch (iconv_errno) {
230*48edc7cfSGordon Ross 
231*48edc7cfSGordon Ross 			case E2BIG:	/* no room in output buffer */
232*48edc7cfSGordon Ross 				goto iconv_again;
233*48edc7cfSGordon Ross 
234*48edc7cfSGordon Ross 			case EINVAL:	/* incomplete sequence on input */
235*48edc7cfSGordon Ross 				if (debug) {
236*48edc7cfSGordon Ross 					(void) fprintf(stderr,
237*48edc7cfSGordon Ross 			_("Incomplete sequence in %s at offset %lld\n"),
238*48edc7cfSGordon Ross 					    fname, offset - ileft);
239*48edc7cfSGordon Ross 				}
240*48edc7cfSGordon Ross 				/*
241*48edc7cfSGordon Ross 				 * Copy the remainder to the space reserved
242*48edc7cfSGordon Ross 				 * at the start of the input buffer.
243*48edc7cfSGordon Ross 				 */
244*48edc7cfSGordon Ross 				assert(ileft > 0);
245*48edc7cfSGordon Ross 				if (ileft <= MB_LEN_MAX) {
246*48edc7cfSGordon Ross 					char *p = ibuf+MB_LEN_MAX-ileft;
247*48edc7cfSGordon Ross 					(void) memmove(p, iptr, ileft);
248*48edc7cfSGordon Ross 					iptr = p;
249*48edc7cfSGordon Ross 					continue; /* read again */
250*48edc7cfSGordon Ross 				}
251*48edc7cfSGordon Ross 				/*
252*48edc7cfSGordon Ross 				 * Should not see ileft > MB_LEN_MAX,
253*48edc7cfSGordon Ross 				 * but if we do, handle as EILSEQ.
254*48edc7cfSGordon Ross 				 */
255*48edc7cfSGordon Ross 				/* FALLTHROUGH */
256*48edc7cfSGordon Ross 
257*48edc7cfSGordon Ross 			case EILSEQ:	/* invalid sequence on input */
258*48edc7cfSGordon Ross 				if (!sflag) {
259*48edc7cfSGordon Ross 					(void) fprintf(stderr,
260*48edc7cfSGordon Ross 			_("Illegal sequence in %s at offset %lld\n"),
261*48edc7cfSGordon Ross 					    fname, offset - ileft);
262*48edc7cfSGordon Ross 					(void) fprintf(stderr,
263*48edc7cfSGordon Ross 			_("bad seq: \\x%02x\\x%02x\\x%02x\n"),
264*48edc7cfSGordon Ross 					    iptr[0] & 0xff,
265*48edc7cfSGordon Ross 					    iptr[1] & 0xff,
266*48edc7cfSGordon Ross 					    iptr[2] & 0xff);
267*48edc7cfSGordon Ross 				}
268*48edc7cfSGordon Ross 				assert(ileft > 0);
269*48edc7cfSGordon Ross 				/* skip one */
270*48edc7cfSGordon Ross 				iptr++;
271*48edc7cfSGordon Ross 				ileft--;
272*48edc7cfSGordon Ross 				assert(oleft > 0);
273*48edc7cfSGordon Ross 				if (!cflag) {
274*48edc7cfSGordon Ross 					*optr++ = '?';
275*48edc7cfSGordon Ross 					oleft--;
276*48edc7cfSGordon Ross 				}
277*48edc7cfSGordon Ross 				goto iconv_again;
278*48edc7cfSGordon Ross 
279*48edc7cfSGordon Ross 			default:
280*48edc7cfSGordon Ross 				(void) fprintf(stderr,
281*48edc7cfSGordon Ross 			_("iconv error (%s) in file $s at offset %lld\n"),
282*48edc7cfSGordon Ross 				    strerror(iconv_errno), fname,
283*48edc7cfSGordon Ross 				    offset - ileft);
284*48edc7cfSGordon Ross 				break;
285*48edc7cfSGordon Ross 			}
286*48edc7cfSGordon Ross 		}
287*48edc7cfSGordon Ross 
288*48edc7cfSGordon Ross 		/* normal iconv return */
289*48edc7cfSGordon Ross 		ileft = 0;
290*48edc7cfSGordon Ross 		iptr = ibuf + MB_LEN_MAX;
291*48edc7cfSGordon Ross 	}
292*48edc7cfSGordon Ross 
293*48edc7cfSGordon Ross 	/*
294*48edc7cfSGordon Ross 	 * End of file
295*48edc7cfSGordon Ross 	 * Flush any shift encodings.
296*48edc7cfSGordon Ross 	 */
297*48edc7cfSGordon Ross 	iptr = NULL;
298*48edc7cfSGordon Ross 	ileft = 0;
299*48edc7cfSGordon Ross 	optr = obuf;
300*48edc7cfSGordon Ross 	oleft = OBUFSIZ;
301*48edc7cfSGordon Ross 	(*pconv)(&iptr, &ileft, &optr, &oleft);
302*48edc7cfSGordon Ross 	ocnt = OBUFSIZ - oleft;
303*48edc7cfSGordon Ross 	if (ocnt > 0) {
304*48edc7cfSGordon Ross 		nw = fwrite(obuf, 1, ocnt, stdout);
305*48edc7cfSGordon Ross 		if (nw != ocnt) {
306*48edc7cfSGordon Ross 			perror("fwrite");
307*48edc7cfSGordon Ross 			exit(1);
308*48edc7cfSGordon Ross 		}
309*48edc7cfSGordon Ross 	}
310*48edc7cfSGordon Ross }
311