xref: /illumos-gate/usr/src/lib/libc/port/gen/iconv.c (revision 4a38094c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include "lint.h"
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <sys/mman.h>
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <dlfcn.h>
34 #include <fcntl.h>
35 #include <unistd.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <sys/param.h>
39 #include <alloca.h>
40 #include "iconv.h"
41 #include "iconvP.h"
42 #include "../i18n/_loc_path.h"
43 
44 static iconv_p	iconv_open_all(const char *, const char *, char *);
45 static iconv_p	iconv_open_private(const char *, const char *);
46 static iconv_p	iconv_search_alias(const char *, const char *, char *);
47 static size_t	passthru_icv_iconv(iconv_t, const char **, size_t *, char **,
48     size_t *);
49 static void	passthru_icv_close(iconv_t);
50 
51 #define	PASSTHRU_MAGIC_NUMBER	(0x53756e)
52 
53 
54 /*
55  * These functions are mainly implemented by using a shared object and
56  * the dlopen() functions. The actual conversion algorithm for a particular
57  * conversion is implemented via a shared object as a loadable conversion
58  * module which is linked dynamically at run time.
59  *
60  * The loadable conversion module resides as either:
61  *
62  *	/usr/lib/iconv/geniconvtbl.so
63  *
64  * if the conversion is supported through a geniconvtbl code conversion
65  * binary table or as a module that directly specifies the conversion at:
66  *
67  *	/usr/lib/iconv/fromcode%tocode.so
68  *
69  * where fromcode is the source encoding and tocode is the target encoding.
70  * The modules have 3 entries: _icv_open(), _icv_iconv(), and _icv_close().
71  *
72  * If there is no code conversion supported and if the fromcode and the tocode
73  * are specifying the same codeset, then, the byte-by-byte, pass-through code
74  * conversion that is embedded in the libc is used instead.
75  *
76  * The following are the related PSARC cases:
77  *
78  *	PSARC/1993/153 iconv/iconv_open/iconv_close
79  *	PSARC/1999/292 Addition of geniconvtbl(1)
80  *	PSARC/2001/072 GNU gettext support
81  *	PSARC/2009/561 Pass-through iconv code conversion
82  *
83  * The PSARC/2001/072 includes the /usr/lib/iconv/alias interface.
84  */
85 
86 iconv_t
iconv_open(const char * tocode,const char * fromcode)87 iconv_open(const char *tocode, const char *fromcode)
88 {
89 	iconv_t	cd;
90 	char	*ipath;
91 
92 	if ((cd = malloc(sizeof (struct _iconv_info))) == NULL)
93 		return ((iconv_t)-1);
94 
95 	/*
96 	 * Memory for ipath is allocated/released in this function.
97 	 */
98 	ipath = malloc(MAXPATHLEN);
99 	if (ipath == NULL) {
100 		free(cd);
101 		return ((iconv_t)-1);
102 	}
103 
104 	cd->_conv = iconv_open_all(tocode, fromcode, ipath);
105 	if (cd->_conv != (iconv_p)-1) {
106 		/* found a valid module for this conversion */
107 		free(ipath);
108 		return (cd);
109 	}
110 
111 	/*
112 	 * Now, try using the encoding name aliasing table
113 	 */
114 	cd->_conv = iconv_search_alias(tocode, fromcode, ipath);
115 	free(ipath);
116 	if (cd->_conv == (iconv_p)-1) {
117 		/*
118 		 * As the last resort, check if the tocode and the fromcode
119 		 * are referring to the same codeset name or not. If so,
120 		 * assign the embedded pass-through code conversion.
121 		 */
122 		if (strcasecmp(tocode, fromcode) != 0) {
123 			/*
124 			 * No valid conversion available. Do failure retrun
125 			 * with the errno set by iconv_search_alias().
126 			 */
127 			free(cd);
128 			return ((iconv_t)-1);
129 		}
130 
131 		/*
132 		 * For a pass-through byte-by-byte code conversion, allocate
133 		 * an internal conversion descriptor and initialize the data
134 		 * fields appropriately and we are done.
135 		 */
136 		cd->_conv = malloc(sizeof (struct _iconv_fields));
137 		if (cd->_conv == NULL) {
138 			free(cd);
139 			return ((iconv_t)-1);
140 		}
141 
142 		cd->_conv->_icv_handle = NULL;
143 		cd->_conv->_icv_iconv = passthru_icv_iconv;
144 		cd->_conv->_icv_close = passthru_icv_close;
145 		cd->_conv->_icv_state = (void *)PASSTHRU_MAGIC_NUMBER;
146 	}
147 
148 	/* found a valid module for this conversion */
149 	return (cd);
150 }
151 
152 static size_t
search_alias(char ** paddr,size_t size,const char * variant)153 search_alias(char **paddr, size_t size, const char *variant)
154 {
155 	char	*addr = *paddr;
156 	char	*p, *sp, *q;
157 	size_t	var_len, can_len;
158 
159 	var_len = strlen(variant);
160 	p = addr;
161 	q = addr + size;
162 	while (q > p) {
163 		if (*p == '#') {
164 			/*
165 			 * Line beginning with '#' is a comment
166 			 */
167 			p++;
168 			while ((q > p) && (*p++ != '\n'))
169 				;
170 			continue;
171 		}
172 		/* skip leading spaces */
173 		while ((q > p) &&
174 		    ((*p == ' ') || (*p == '\t')))
175 			p++;
176 		if (q <= p)
177 			break;
178 		sp = p;
179 		while ((q > p) && (*p != ' ') &&
180 		    (*p != '\t') && (*p != '\n'))
181 			p++;
182 		if (q <= p) {
183 			/* invalid entry */
184 			break;
185 		}
186 		if (*p == '\n') {
187 			/* invalid entry */
188 			p++;
189 			continue;
190 		}
191 
192 		if (((p - sp) != var_len) ||
193 		    ((strncmp(sp, variant, var_len) != 0) &&
194 		    (strncasecmp(sp, variant, var_len) != 0))) {
195 			/*
196 			 * didn't match
197 			 */
198 
199 			/* skip remaining chars in this line */
200 			p++;
201 			while ((q > p) && (*p++ != '\n'))
202 				;
203 			continue;
204 		}
205 
206 		/* matching entry found */
207 
208 		/* skip spaces */
209 		while ((q > p) &&
210 		    ((*p == ' ') || (*p == '\t')))
211 			p++;
212 		if (q <= p)
213 			break;
214 		sp = p;
215 		while ((q > p) && (*p != ' ') &&
216 		    (*p != '\t') && (*p != '\n'))
217 			p++;
218 		can_len = p - sp;
219 		if (can_len == 0) {
220 			while ((q > p) && (*p++ != '\n'))
221 				;
222 			continue;
223 		}
224 		*paddr = sp;
225 		return (can_len);
226 		/* NOTREACHED */
227 	}
228 	return (0);
229 }
230 
231 static iconv_p
iconv_open_all(const char * to,const char * from,char * ipath)232 iconv_open_all(const char *to, const char *from, char *ipath)
233 {
234 	iconv_p	cv;
235 	int	len;
236 
237 	/*
238 	 * First, try using the geniconvtbl conversion, which is
239 	 * performed by /usr/lib/iconv/geniconvtbl.so with
240 	 * the conversion table file:
241 	 * /usr/lib/iconv/geniconvtbl/binarytables/fromcode%tocode.bt
242 	 *
243 	 * If the geniconvtbl conversion cannot be done,
244 	 * try the conversion by the individual shared object.
245 	 */
246 
247 	len = snprintf(ipath, MAXPATHLEN, _GENICONVTBL_PATH, from, to);
248 	if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) {
249 		/*
250 		 * from%to.bt exists in the table dir
251 		 */
252 		cv = iconv_open_private(_GENICONVTBL_INT_PATH, ipath);
253 		if (cv != (iconv_p)-1) {
254 			/* found a valid module for this conversion */
255 			return (cv);
256 		}
257 	}
258 
259 	/* Next, try /usr/lib/iconv/from%to.so */
260 	len = snprintf(ipath, MAXPATHLEN, _ICONV_PATH, from, to);
261 	if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) {
262 		/*
263 		 * /usr/lib/iconv/from%to.so exists
264 		 * errno will be set by iconv_open_private on error
265 		 */
266 		return (iconv_open_private(ipath, NULL));
267 	}
268 	/* no valid module for this conversion found */
269 	errno = EINVAL;
270 	return ((iconv_p)-1);
271 }
272 
273 static iconv_p
iconv_search_alias(const char * tocode,const char * fromcode,char * ipath)274 iconv_search_alias(const char *tocode, const char *fromcode, char *ipath)
275 {
276 	char	*p;
277 	char	*to_canonical, *from_canonical;
278 	size_t	tolen, fromlen;
279 	iconv_p	cv;
280 	int	fd;
281 	struct stat64	statbuf;
282 	caddr_t	addr;
283 	size_t	buflen;
284 
285 	fd = open(_ENCODING_ALIAS_PATH, O_RDONLY);
286 	if (fd == -1) {
287 		/*
288 		 * if no alias file found,
289 		 * errno will be set to EINVAL.
290 		 */
291 		errno = EINVAL;
292 		return ((iconv_p)-1);
293 	}
294 	if (fstat64(fd, &statbuf) == -1) {
295 		(void) close(fd);
296 		/* use errno set by fstat64 */
297 		return ((iconv_p)-1);
298 	}
299 	buflen = (size_t)statbuf.st_size;
300 	addr = mmap(NULL, buflen, PROT_READ, MAP_SHARED, fd, 0);
301 	(void) close(fd);
302 	if (addr == MAP_FAILED) {
303 		/* use errno set by mmap */
304 		return ((iconv_p)-1);
305 	}
306 	p = (char *)addr;
307 	tolen = search_alias(&p, buflen, tocode);
308 	if (tolen) {
309 		to_canonical = alloca(tolen + 1);
310 		(void) memcpy(to_canonical, p, tolen);
311 		to_canonical[tolen] = '\0';
312 	} else {
313 		to_canonical = (char *)tocode;
314 	}
315 	p = (char *)addr;
316 	fromlen = search_alias(&p, buflen, fromcode);
317 	if (fromlen) {
318 		from_canonical = alloca(fromlen + 1);
319 		(void) memcpy(from_canonical, p, fromlen);
320 		from_canonical[fromlen] = '\0';
321 	} else {
322 		from_canonical = (char *)fromcode;
323 	}
324 	(void) munmap(addr, buflen);
325 	if (tolen == 0 && fromlen == 0) {
326 		errno = EINVAL;
327 		return ((iconv_p)-1);
328 	}
329 
330 	cv = iconv_open_all(to_canonical, from_canonical, ipath);
331 
332 	/* errno set by iconv_open_all on error */
333 	return (cv);
334 }
335 
336 static iconv_p
iconv_open_private(const char * lib,const char * tbl)337 iconv_open_private(const char *lib, const char *tbl)
338 {
339 	iconv_t (*fptr)(const char *);
340 	iconv_p cdpath;
341 
342 	if ((cdpath = malloc(sizeof (struct _iconv_fields))) == NULL)
343 		return ((iconv_p)-1);
344 
345 	if ((cdpath->_icv_handle = dlopen(lib, RTLD_LAZY)) == 0) {
346 		free(cdpath);
347 		/* dlopen does not define error no */
348 		errno = EINVAL;
349 		return ((iconv_p)-1);
350 	}
351 
352 	/* gets address of _icv_open */
353 	if ((fptr = (iconv_t(*)(const char *))dlsym(cdpath->_icv_handle,
354 	    "_icv_open")) == NULL) {
355 		(void) dlclose(cdpath->_icv_handle);
356 		free(cdpath);
357 		/* dlsym does not define errno */
358 		errno = EINVAL;
359 		return ((iconv_p)-1);
360 	}
361 
362 	/*
363 	 * gets address of _icv_iconv in the loadable conversion module
364 	 * and stores it in cdpath->_icv_iconv
365 	 */
366 
367 	if ((cdpath->_icv_iconv = (size_t(*)(iconv_t, const char **,
368 	    size_t *, char **, size_t *))dlsym(cdpath->_icv_handle,
369 	    "_icv_iconv")) == NULL) {
370 		(void) dlclose(cdpath->_icv_handle);
371 		free(cdpath);
372 		/* dlsym does not define errno */
373 		errno = EINVAL;
374 		return ((iconv_p)-1);
375 	}
376 
377 	/*
378 	 * gets address of _icv_close in the loadable conversion module
379 	 * and stores it in cd->_icv_close
380 	 */
381 	if ((cdpath->_icv_close = (void(*)(iconv_t))dlsym(cdpath->_icv_handle,
382 	    "_icv_close")) == NULL) {
383 		(void) dlclose(cdpath->_icv_handle);
384 		free(cdpath);
385 		/* dlsym does not define errno */
386 		errno = EINVAL;
387 		return ((iconv_p)-1);
388 	}
389 
390 	/*
391 	 * initialize the state of the actual _icv_iconv conversion routine
392 	 * For the normal iconv module, NULL will be passed as an argument
393 	 * although the iconv_open() of the module won't use that.
394 	 */
395 	cdpath->_icv_state = (void *)(*fptr)(tbl);
396 
397 	if (cdpath->_icv_state == (struct _icv_state *)-1) {
398 		(void) dlclose(cdpath->_icv_handle);
399 		free(cdpath);
400 		/* this module does not satisfy this conversion */
401 		errno = EINVAL;
402 		return ((iconv_p)-1);
403 	}
404 
405 	return (cdpath);
406 }
407 
408 int
iconv_close(iconv_t cd)409 iconv_close(iconv_t cd)
410 {
411 	if (cd == NULL) {
412 		errno = EBADF;
413 		return (-1);
414 	}
415 	(*(cd->_conv)->_icv_close)(cd->_conv->_icv_state);
416 	if (cd->_conv->_icv_handle != NULL)
417 		(void) dlclose(cd->_conv->_icv_handle);
418 	free(cd->_conv);
419 	free(cd);
420 	return (0);
421 }
422 
423 /*
424  * To have minimal performance impact to the existing run-time behavior,
425  * we supply a dummy passthru_icv_close() that will just return.
426  */
427 static void
passthru_icv_close(iconv_t cd __unused)428 passthru_icv_close(iconv_t cd __unused)
429 {
430 }
431 
432 size_t
iconv(iconv_t cd,const char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)433 iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft,
434     char **outbuf, size_t *outbytesleft)
435 {
436 	/* check if cd is valid */
437 	if (cd == NULL || cd == (iconv_t)-1) {
438 		errno = EBADF;
439 		return ((size_t)-1);
440 	}
441 
442 	/* direct conversion */
443 	return ((*(cd->_conv)->_icv_iconv)(cd->_conv->_icv_state,
444 	    inbuf, inbytesleft, outbuf, outbytesleft));
445 }
446 
447 static size_t
passthru_icv_iconv(iconv_t cd,const char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft)448 passthru_icv_iconv(iconv_t cd, const char **inbuf, size_t *inbufleft,
449     char **outbuf, size_t *outbufleft)
450 {
451 	size_t ibl;
452 	size_t obl;
453 	size_t len;
454 	size_t ret_val;
455 
456 	/* Check if the conversion descriptor is a valid one. */
457 	if (cd != (iconv_t)PASSTHRU_MAGIC_NUMBER) {
458 		errno = EBADF;
459 		return ((size_t)-1);
460 	}
461 
462 	/* For any state reset request, return success. */
463 	if (inbuf == NULL || *inbuf == NULL)
464 		return (0);
465 
466 	/*
467 	 * Initialize internally used variables for a better performance
468 	 * and prepare for a couple of the return values before the actual
469 	 * copying of the bytes.
470 	 */
471 	ibl = *inbufleft;
472 	obl = *outbufleft;
473 
474 	if (ibl > obl) {
475 		len = obl;
476 		errno = E2BIG;
477 		ret_val = (size_t)-1;
478 	} else {
479 		len = ibl;
480 		ret_val = 0;
481 	}
482 
483 	/*
484 	 * Do the copy using memmove(). There are no EILSEQ or EINVAL
485 	 * checkings since this is a simple copying.
486 	 */
487 	(void) memmove((void *)*outbuf, (const void *)*inbuf, len);
488 
489 	/* Update the return values related to the buffers then do return. */
490 	*inbuf = *inbuf + len;
491 	*outbuf = *outbuf + len;
492 	*inbufleft = ibl - len;
493 	*outbufleft = obl - len;
494 
495 	return (ret_val);
496 }
497