1*48edc7cfSGordon Ross /*
2*48edc7cfSGordon Ross * This file and its contents are supplied under the terms of the
3*48edc7cfSGordon Ross * Common Development and Distribution License ("CDDL"), version 1.0.
4*48edc7cfSGordon Ross * You may only use this file in accordance with the terms of version
5*48edc7cfSGordon Ross * 1.0 of the CDDL.
6*48edc7cfSGordon Ross *
7*48edc7cfSGordon Ross * A full copy of the text of the CDDL should have accompanied this
8*48edc7cfSGordon Ross * source. A copy of the CDDL is also available via the Internet at
9*48edc7cfSGordon Ross * http://www.illumos.org/license/CDDL.
10*48edc7cfSGordon Ross */
11*48edc7cfSGordon Ross
12*48edc7cfSGordon Ross /*
13*48edc7cfSGordon Ross * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
14*48edc7cfSGordon Ross */
15*48edc7cfSGordon Ross
16*48edc7cfSGordon Ross /*
17*48edc7cfSGordon Ross * iconv(1) command.
18*48edc7cfSGordon Ross */
19*48edc7cfSGordon Ross
20*48edc7cfSGordon Ross #include <stdio.h>
21*48edc7cfSGordon Ross #include <stdlib.h>
22*48edc7cfSGordon Ross #include <string.h>
23*48edc7cfSGordon Ross #include <unistd.h>
24*48edc7cfSGordon Ross #include <errno.h>
25*48edc7cfSGordon Ross #include <limits.h>
26*48edc7cfSGordon Ross #include <iconv.h>
27*48edc7cfSGordon Ross #include <libintl.h>
28*48edc7cfSGordon Ross #include <langinfo.h>
29*48edc7cfSGordon Ross #include <locale.h>
30*48edc7cfSGordon Ross #include "charmap.h"
31*48edc7cfSGordon Ross
32*48edc7cfSGordon Ross #include <assert.h>
33*48edc7cfSGordon Ross
34*48edc7cfSGordon Ross const char *progname;
35*48edc7cfSGordon Ross
36*48edc7cfSGordon Ross char *from_cs;
37*48edc7cfSGordon Ross char *to_cs;
38*48edc7cfSGordon Ross int debug;
39*48edc7cfSGordon Ross int cflag; /* skip invalid characters */
40*48edc7cfSGordon Ross int sflag; /* silent */
41*48edc7cfSGordon Ross int lflag; /* list conversions */
42*48edc7cfSGordon Ross
43*48edc7cfSGordon Ross void iconv_file(FILE *, const char *);
44*48edc7cfSGordon Ross extern int list_codesets(void);
45*48edc7cfSGordon Ross
46*48edc7cfSGordon Ross iconv_t ich; /* iconv(3c) lib handle */
47*48edc7cfSGordon Ross size_t (*pconv)(const char **iptr, size_t *ileft,
48*48edc7cfSGordon Ross char **optr, size_t *oleft);
49*48edc7cfSGordon Ross
50*48edc7cfSGordon Ross size_t
lib_iconv(const char ** iptr,size_t * ileft,char ** optr,size_t * oleft)51*48edc7cfSGordon Ross lib_iconv(const char **iptr, size_t *ileft, char **optr, size_t *oleft)
52*48edc7cfSGordon Ross {
53*48edc7cfSGordon Ross return (iconv(ich, iptr, ileft, optr, oleft));
54*48edc7cfSGordon Ross }
55*48edc7cfSGordon Ross
56*48edc7cfSGordon Ross void
usage(void)57*48edc7cfSGordon Ross usage(void)
58*48edc7cfSGordon Ross {
59*48edc7cfSGordon Ross (void) fprintf(stderr, gettext(
60*48edc7cfSGordon Ross "usage: %s [-cs] [-f from-codeset] [-t to-codeset] "
61*48edc7cfSGordon Ross "[file ...]\n"), progname);
62*48edc7cfSGordon Ross (void) fprintf(stderr, gettext("\t%s -l\n"), progname);
63*48edc7cfSGordon Ross exit(1);
64*48edc7cfSGordon Ross }
65*48edc7cfSGordon Ross
66*48edc7cfSGordon Ross int
main(int argc,char ** argv)67*48edc7cfSGordon Ross main(int argc, char **argv)
68*48edc7cfSGordon Ross {
69*48edc7cfSGordon Ross FILE *fp;
70*48edc7cfSGordon Ross char *fslash, *tslash;
71*48edc7cfSGordon Ross int c;
72*48edc7cfSGordon Ross
73*48edc7cfSGordon Ross yydebug = 0;
74*48edc7cfSGordon Ross progname = getprogname();
75*48edc7cfSGordon Ross
76*48edc7cfSGordon Ross (void) setlocale(LC_ALL, "");
77*48edc7cfSGordon Ross
78*48edc7cfSGordon Ross #if !defined(TEXT_DOMAIN)
79*48edc7cfSGordon Ross #define TEXT_DOMAIN "SYS_TEST"
80*48edc7cfSGordon Ross #endif
81*48edc7cfSGordon Ross (void) textdomain(TEXT_DOMAIN);
82*48edc7cfSGordon Ross
83*48edc7cfSGordon Ross while ((c = getopt(argc, argv, "cdlsf:t:")) != EOF) {
84*48edc7cfSGordon Ross switch (c) {
85*48edc7cfSGordon Ross case 'c':
86*48edc7cfSGordon Ross cflag++;
87*48edc7cfSGordon Ross break;
88*48edc7cfSGordon Ross case 'd':
89*48edc7cfSGordon Ross debug++;
90*48edc7cfSGordon Ross break;
91*48edc7cfSGordon Ross case 'l':
92*48edc7cfSGordon Ross lflag++;
93*48edc7cfSGordon Ross break;
94*48edc7cfSGordon Ross case 's':
95*48edc7cfSGordon Ross sflag++;
96*48edc7cfSGordon Ross break;
97*48edc7cfSGordon Ross case 'f':
98*48edc7cfSGordon Ross from_cs = optarg;
99*48edc7cfSGordon Ross break;
100*48edc7cfSGordon Ross case 't':
101*48edc7cfSGordon Ross to_cs = optarg;
102*48edc7cfSGordon Ross break;
103*48edc7cfSGordon Ross case '?':
104*48edc7cfSGordon Ross usage();
105*48edc7cfSGordon Ross }
106*48edc7cfSGordon Ross }
107*48edc7cfSGordon Ross
108*48edc7cfSGordon Ross if (lflag) {
109*48edc7cfSGordon Ross if (from_cs != NULL || to_cs != NULL || optind != argc)
110*48edc7cfSGordon Ross usage();
111*48edc7cfSGordon Ross exit(list_codesets());
112*48edc7cfSGordon Ross }
113*48edc7cfSGordon Ross
114*48edc7cfSGordon Ross if (from_cs == NULL)
115*48edc7cfSGordon Ross from_cs = nl_langinfo(CODESET);
116*48edc7cfSGordon Ross if (to_cs == NULL)
117*48edc7cfSGordon Ross to_cs = nl_langinfo(CODESET);
118*48edc7cfSGordon Ross
119*48edc7cfSGordon Ross /*
120*48edc7cfSGordon Ross * If either "from" or "to" contains a slash,
121*48edc7cfSGordon Ross * then we're using charmaps.
122*48edc7cfSGordon Ross */
123*48edc7cfSGordon Ross fslash = strchr(from_cs, '/');
124*48edc7cfSGordon Ross tslash = strchr(to_cs, '/');
125*48edc7cfSGordon Ross if (fslash != NULL || tslash != NULL) {
126*48edc7cfSGordon Ross charmap_init(to_cs, from_cs);
127*48edc7cfSGordon Ross pconv = cm_iconv;
128*48edc7cfSGordon Ross if (debug)
129*48edc7cfSGordon Ross charmap_dump();
130*48edc7cfSGordon Ross } else {
131*48edc7cfSGordon Ross ich = iconv_open(to_cs, from_cs);
132*48edc7cfSGordon Ross if (ich == ((iconv_t)-1)) {
133*48edc7cfSGordon Ross switch (errno) {
134*48edc7cfSGordon Ross case EINVAL:
135*48edc7cfSGordon Ross (void) fprintf(stderr,
136*48edc7cfSGordon Ross _("Not supported %s to %s\n"),
137*48edc7cfSGordon Ross from_cs, to_cs);
138*48edc7cfSGordon Ross break;
139*48edc7cfSGordon Ross default:
140*48edc7cfSGordon Ross (void) fprintf(stderr,
141*48edc7cfSGordon Ross _("iconv_open failed: %s\n"),
142*48edc7cfSGordon Ross strerror(errno));
143*48edc7cfSGordon Ross break;
144*48edc7cfSGordon Ross }
145*48edc7cfSGordon Ross exit(1);
146*48edc7cfSGordon Ross }
147*48edc7cfSGordon Ross pconv = lib_iconv;
148*48edc7cfSGordon Ross }
149*48edc7cfSGordon Ross
150*48edc7cfSGordon Ross if (optind == argc ||
151*48edc7cfSGordon Ross (optind == argc - 1 && 0 == strcmp(argv[optind], "-"))) {
152*48edc7cfSGordon Ross iconv_file(stdin, "stdin");
153*48edc7cfSGordon Ross exit(warnings ? 1 : 0);
154*48edc7cfSGordon Ross }
155*48edc7cfSGordon Ross
156*48edc7cfSGordon Ross for (; optind < argc; optind++) {
157*48edc7cfSGordon Ross fp = fopen(argv[optind], "r");
158*48edc7cfSGordon Ross if (fp == NULL) {
159*48edc7cfSGordon Ross perror(argv[optind]);
160*48edc7cfSGordon Ross exit(1);
161*48edc7cfSGordon Ross }
162*48edc7cfSGordon Ross iconv_file(fp, argv[optind]);
163*48edc7cfSGordon Ross (void) fclose(fp);
164*48edc7cfSGordon Ross }
165*48edc7cfSGordon Ross exit(warnings ? 1 : 0);
166*48edc7cfSGordon Ross }
167*48edc7cfSGordon Ross
168*48edc7cfSGordon Ross /*
169*48edc7cfSGordon Ross * Conversion buffer sizes:
170*48edc7cfSGordon Ross *
171*48edc7cfSGordon Ross * The input buffer has room to prepend one mbs character if needed for
172*48edc7cfSGordon Ross * handling a left-over at the end of a previous conversion buffer.
173*48edc7cfSGordon Ross *
174*48edc7cfSGordon Ross * Conversions may grow or shrink data, so using a larger output buffer
175*48edc7cfSGordon Ross * to reduce the likelihood of leftover input buffer data in each pass.
176*48edc7cfSGordon Ross */
177*48edc7cfSGordon Ross #define IBUFSIZ (MB_LEN_MAX + BUFSIZ)
178*48edc7cfSGordon Ross #define OBUFSIZ (2 * BUFSIZ)
179*48edc7cfSGordon Ross
180*48edc7cfSGordon Ross void
iconv_file(FILE * fp,const char * fname)181*48edc7cfSGordon Ross iconv_file(FILE *fp, const char *fname)
182*48edc7cfSGordon Ross {
183*48edc7cfSGordon Ross static char ibuf[IBUFSIZ];
184*48edc7cfSGordon Ross static char obuf[OBUFSIZ];
185*48edc7cfSGordon Ross const char *iptr;
186*48edc7cfSGordon Ross char *optr;
187*48edc7cfSGordon Ross off64_t offset;
188*48edc7cfSGordon Ross size_t ileft, oleft, ocnt;
189*48edc7cfSGordon Ross int iconv_errno;
190*48edc7cfSGordon Ross int nr, nw, rc;
191*48edc7cfSGordon Ross
192*48edc7cfSGordon Ross offset = 0;
193*48edc7cfSGordon Ross ileft = 0;
194*48edc7cfSGordon Ross iptr = ibuf + MB_LEN_MAX;
195*48edc7cfSGordon Ross
196*48edc7cfSGordon Ross while ((nr = fread(ibuf+MB_LEN_MAX, 1, BUFSIZ, fp)) > 0) {
197*48edc7cfSGordon Ross
198*48edc7cfSGordon Ross assert(iptr <= ibuf+MB_LEN_MAX);
199*48edc7cfSGordon Ross assert(ileft <= MB_LEN_MAX);
200*48edc7cfSGordon Ross ileft += nr;
201*48edc7cfSGordon Ross offset += nr;
202*48edc7cfSGordon Ross
203*48edc7cfSGordon Ross optr = obuf;
204*48edc7cfSGordon Ross oleft = OBUFSIZ;
205*48edc7cfSGordon Ross
206*48edc7cfSGordon Ross /*
207*48edc7cfSGordon Ross * Note: the *pconv function is either iconv(3c) or our
208*48edc7cfSGordon Ross * private equivalent when using charmaps. Both update
209*48edc7cfSGordon Ross * ileft, oleft etc. even when conversion stops due to
210*48edc7cfSGordon Ross * an illegal sequence or whatever, so we need to copy
211*48edc7cfSGordon Ross * the partially converted buffer even on error.
212*48edc7cfSGordon Ross */
213*48edc7cfSGordon Ross iconv_again:
214*48edc7cfSGordon Ross rc = (*pconv)(&iptr, &ileft, &optr, &oleft);
215*48edc7cfSGordon Ross iconv_errno = errno;
216*48edc7cfSGordon Ross
217*48edc7cfSGordon Ross ocnt = OBUFSIZ - oleft;
218*48edc7cfSGordon Ross if (ocnt > 0) {
219*48edc7cfSGordon Ross nw = fwrite(obuf, 1, ocnt, stdout);
220*48edc7cfSGordon Ross if (nw != ocnt) {
221*48edc7cfSGordon Ross perror("fwrite");
222*48edc7cfSGordon Ross exit(1);
223*48edc7cfSGordon Ross }
224*48edc7cfSGordon Ross }
225*48edc7cfSGordon Ross optr = obuf;
226*48edc7cfSGordon Ross oleft = OBUFSIZ;
227*48edc7cfSGordon Ross
228*48edc7cfSGordon Ross if (rc == (size_t)-1) {
229*48edc7cfSGordon Ross switch (iconv_errno) {
230*48edc7cfSGordon Ross
231*48edc7cfSGordon Ross case E2BIG: /* no room in output buffer */
232*48edc7cfSGordon Ross goto iconv_again;
233*48edc7cfSGordon Ross
234*48edc7cfSGordon Ross case EINVAL: /* incomplete sequence on input */
235*48edc7cfSGordon Ross if (debug) {
236*48edc7cfSGordon Ross (void) fprintf(stderr,
237*48edc7cfSGordon Ross _("Incomplete sequence in %s at offset %lld\n"),
238*48edc7cfSGordon Ross fname, offset - ileft);
239*48edc7cfSGordon Ross }
240*48edc7cfSGordon Ross /*
241*48edc7cfSGordon Ross * Copy the remainder to the space reserved
242*48edc7cfSGordon Ross * at the start of the input buffer.
243*48edc7cfSGordon Ross */
244*48edc7cfSGordon Ross assert(ileft > 0);
245*48edc7cfSGordon Ross if (ileft <= MB_LEN_MAX) {
246*48edc7cfSGordon Ross char *p = ibuf+MB_LEN_MAX-ileft;
247*48edc7cfSGordon Ross (void) memmove(p, iptr, ileft);
248*48edc7cfSGordon Ross iptr = p;
249*48edc7cfSGordon Ross continue; /* read again */
250*48edc7cfSGordon Ross }
251*48edc7cfSGordon Ross /*
252*48edc7cfSGordon Ross * Should not see ileft > MB_LEN_MAX,
253*48edc7cfSGordon Ross * but if we do, handle as EILSEQ.
254*48edc7cfSGordon Ross */
255*48edc7cfSGordon Ross /* FALLTHROUGH */
256*48edc7cfSGordon Ross
257*48edc7cfSGordon Ross case EILSEQ: /* invalid sequence on input */
258*48edc7cfSGordon Ross if (!sflag) {
259*48edc7cfSGordon Ross (void) fprintf(stderr,
260*48edc7cfSGordon Ross _("Illegal sequence in %s at offset %lld\n"),
261*48edc7cfSGordon Ross fname, offset - ileft);
262*48edc7cfSGordon Ross (void) fprintf(stderr,
263*48edc7cfSGordon Ross _("bad seq: \\x%02x\\x%02x\\x%02x\n"),
264*48edc7cfSGordon Ross iptr[0] & 0xff,
265*48edc7cfSGordon Ross iptr[1] & 0xff,
266*48edc7cfSGordon Ross iptr[2] & 0xff);
267*48edc7cfSGordon Ross }
268*48edc7cfSGordon Ross assert(ileft > 0);
269*48edc7cfSGordon Ross /* skip one */
270*48edc7cfSGordon Ross iptr++;
271*48edc7cfSGordon Ross ileft--;
272*48edc7cfSGordon Ross assert(oleft > 0);
273*48edc7cfSGordon Ross if (!cflag) {
274*48edc7cfSGordon Ross *optr++ = '?';
275*48edc7cfSGordon Ross oleft--;
276*48edc7cfSGordon Ross }
277*48edc7cfSGordon Ross goto iconv_again;
278*48edc7cfSGordon Ross
279*48edc7cfSGordon Ross default:
280*48edc7cfSGordon Ross (void) fprintf(stderr,
281*48edc7cfSGordon Ross _("iconv error (%s) in file $s at offset %lld\n"),
282*48edc7cfSGordon Ross strerror(iconv_errno), fname,
283*48edc7cfSGordon Ross offset - ileft);
284*48edc7cfSGordon Ross break;
285*48edc7cfSGordon Ross }
286*48edc7cfSGordon Ross }
287*48edc7cfSGordon Ross
288*48edc7cfSGordon Ross /* normal iconv return */
289*48edc7cfSGordon Ross ileft = 0;
290*48edc7cfSGordon Ross iptr = ibuf + MB_LEN_MAX;
291*48edc7cfSGordon Ross }
292*48edc7cfSGordon Ross
293*48edc7cfSGordon Ross /*
294*48edc7cfSGordon Ross * End of file
295*48edc7cfSGordon Ross * Flush any shift encodings.
296*48edc7cfSGordon Ross */
297*48edc7cfSGordon Ross iptr = NULL;
298*48edc7cfSGordon Ross ileft = 0;
299*48edc7cfSGordon Ross optr = obuf;
300*48edc7cfSGordon Ross oleft = OBUFSIZ;
301*48edc7cfSGordon Ross (*pconv)(&iptr, &ileft, &optr, &oleft);
302*48edc7cfSGordon Ross ocnt = OBUFSIZ - oleft;
303*48edc7cfSGordon Ross if (ocnt > 0) {
304*48edc7cfSGordon Ross nw = fwrite(obuf, 1, ocnt, stdout);
305*48edc7cfSGordon Ross if (nw != ocnt) {
306*48edc7cfSGordon Ross perror("fwrite");
307*48edc7cfSGordon Ross exit(1);
308*48edc7cfSGordon Ross }
309*48edc7cfSGordon Ross }
310*48edc7cfSGordon Ross }
311