xref: /illumos-gate/usr/src/uts/common/os/kiconv.c (revision d14d7d31)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Kernel iconv code conversion functions (PSARC/2007/173).
30  *
31  * Man pages: kiconv_open(9F), kiconv(9F), kiconv_close(9F), and kiconvstr(9F).
32  * Interface stability: Committed.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/sysmacros.h>
38 #include <sys/systm.h>
39 #include <sys/debug.h>
40 #include <sys/kmem.h>
41 #include <sys/sunddi.h>
42 #include <sys/ksynch.h>
43 #include <sys/modctl.h>
44 #include <sys/byteorder.h>
45 #include <sys/errno.h>
46 #include <sys/kiconv.h>
47 #include <sys/kiconv_latin1.h>
48 
49 
50 /*
51  * The following macros indicate ids to the correct code conversion mapping
52  * data tables to use. The actual tables are coming from <sys/kiconv_latin1.h>.
53  */
54 #define	KICONV_TBLID_1252		(0x00)
55 #define	KICONV_TBLID_8859_1		(0x01)
56 #define	KICONV_TBLID_8859_15		(0x02)
57 #define	KICONV_TBLID_850		(0x03)
58 
59 #define	KICONV_MAX_MAPPING_TBLID	(0x03)
60 
61 /*
62  * The following tables are coming from u8_textprep.c. We use them to
63  * check on validity of UTF-8 characters and their bytes.
64  */
65 extern const int8_t u8_number_of_bytes[];
66 extern const uint8_t u8_valid_min_2nd_byte[];
67 extern const uint8_t u8_valid_max_2nd_byte[];
68 
69 
70 /*
71  * The following four functions, open_to_1252(), open_to_88591(),
72  * open_to_885915(), and open_to_850(), are kiconv_open functions from
73  * UTF-8 to corresponding single byte codesets.
74  */
75 static void *
open_to_1252()76 open_to_1252()
77 {
78 	kiconv_state_t s;
79 
80 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
81 	s->id = KICONV_TBLID_1252;
82 	s->bom_processed = 0;
83 
84 	return ((void *)s);
85 }
86 
87 static void *
open_to_88591()88 open_to_88591()
89 {
90 	kiconv_state_t s;
91 
92 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
93 	s->id = KICONV_TBLID_8859_1;
94 	s->bom_processed = 0;
95 
96 	return ((void *)s);
97 }
98 
99 static void *
open_to_885915()100 open_to_885915()
101 {
102 	kiconv_state_t s;
103 
104 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
105 	s->id = KICONV_TBLID_8859_15;
106 	s->bom_processed = 0;
107 
108 	return ((void *)s);
109 }
110 
111 static void *
open_to_850()112 open_to_850()
113 {
114 	kiconv_state_t s;
115 
116 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
117 	s->id = KICONV_TBLID_850;
118 	s->bom_processed = 0;
119 
120 	return ((void *)s);
121 }
122 
123 /*
124  * The following four functions, open_fr_1252(), open_fr_88591(),
125  * open_fr_885915(), and open_fr_850(), are kiconv_open functions from
126  * corresponding single byte codesets to UTF-8.
127  */
128 static void *
open_fr_1252()129 open_fr_1252()
130 {
131 	return ((void *)KICONV_TBLID_1252);
132 }
133 
134 static void *
open_fr_88591()135 open_fr_88591()
136 {
137 	return ((void *)KICONV_TBLID_8859_1);
138 }
139 
140 static void *
open_fr_885915()141 open_fr_885915()
142 {
143 	return ((void *)KICONV_TBLID_8859_15);
144 }
145 
146 static void *
open_fr_850()147 open_fr_850()
148 {
149 	return ((void *)KICONV_TBLID_850);
150 }
151 
152 /*
153  * The following close_to_sb() function is kiconv_close function for
154  * the conversions from UTF-8 to single byte codesets. The close_fr_sb()
155  * is kiconv_close function for the conversions from single byte codesets to
156  * UTF-8.
157  */
158 static int
close_to_sb(void * s)159 close_to_sb(void *s)
160 {
161 	if (! s || s == (void *)-1)
162 		return (EBADF);
163 
164 	kmem_free(s, sizeof (kiconv_state_data_t));
165 
166 	return (0);
167 }
168 
169 static int
close_fr_sb(void * s)170 close_fr_sb(void *s)
171 {
172 	if ((ulong_t)s > KICONV_MAX_MAPPING_TBLID)
173 		return (EBADF);
174 
175 	return (0);
176 }
177 
178 /*
179  * The following is the common kiconv function for conversions from UTF-8
180  * to single byte codesets.
181  */
182 static size_t
kiconv_to_sb(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)183 kiconv_to_sb(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
184 	size_t *outbytesleft, int *errno)
185 {
186 	size_t id;
187 	size_t ret_val;
188 	uchar_t *ib;
189 	uchar_t *oldib;
190 	uchar_t *ob;
191 	uchar_t *ibtail;
192 	uchar_t *obtail;
193 	uint32_t u8;
194 	size_t i;
195 	size_t l;
196 	size_t h;
197 	size_t init_h;
198 	int8_t sz;
199 	boolean_t second;
200 
201 	/* Check on the kiconv code conversion descriptor. */
202 	if (! kcd || kcd == (void *)-1) {
203 		*errno = EBADF;
204 		return ((size_t)-1);
205 	}
206 
207 	/*
208 	 * Get the table id we are going to use for the code conversion
209 	 * and let's double check on it.
210 	 */
211 	id = ((kiconv_state_t)kcd)->id;
212 	if (id > KICONV_MAX_MAPPING_TBLID) {
213 		*errno = EBADF;
214 		return ((size_t)-1);
215 	}
216 
217 	/* If this is a state reset request, process and return. */
218 	if (! inbuf || ! (*inbuf)) {
219 		((kiconv_state_t)kcd)->bom_processed = 0;
220 		return ((size_t)0);
221 	}
222 
223 	ret_val = 0;
224 	ib = (uchar_t *)*inbuf;
225 	ob = (uchar_t *)*outbuf;
226 	ibtail = ib + *inbytesleft;
227 	obtail = ob + *outbytesleft;
228 
229 	/*
230 	 * The inital high value for the binary search we will be using
231 	 * shortly is a literal constant as of today but to be future proof,
232 	 * let's calculate it like the following at here.
233 	 */
234 	init_h = sizeof (to_sb_tbl[id]) / sizeof (kiconv_to_sb_tbl_comp_t) - 1;
235 
236 	/*
237 	 * If we haven't checked on the UTF-8 signature BOM character in
238 	 * the beginning of the conversion data stream, we check it and if
239 	 * find one, we skip it since we have no use for it.
240 	 */
241 	if (((kiconv_state_t)kcd)->bom_processed == 0 && (ibtail - ib) >= 3 &&
242 	    *ib == 0xef && *(ib + 1) == 0xbb && *(ib + 2) == 0xbf)
243 			ib += 3;
244 	((kiconv_state_t)kcd)->bom_processed = 1;
245 
246 	while (ib < ibtail) {
247 		sz = u8_number_of_bytes[*ib];
248 		if (sz <= 0) {
249 			*errno = EILSEQ;
250 			ret_val = (size_t)-1;
251 			break;
252 		}
253 
254 		/*
255 		 * If there is no room to write at the output buffer,
256 		 * issue E2BIG error.
257 		 */
258 		if (ob >= obtail) {
259 			*errno = E2BIG;
260 			ret_val = (size_t)-1;
261 			break;
262 		}
263 
264 		/*
265 		 * If it is a 7-bit ASCII character, we don't need to
266 		 * process further and we just copy the character over.
267 		 *
268 		 * If not, we collect the character bytes up to four bytes,
269 		 * validate the bytes, and binary search for the corresponding
270 		 * single byte codeset character byte. If we find it from
271 		 * the mapping table, we put that into the output buffer;
272 		 * otherwise, we put a replacement character instead as
273 		 * a non-identical conversion.
274 		 */
275 		if (sz == 1) {
276 			*ob++ = *ib++;
277 			continue;
278 		}
279 
280 		/*
281 		 * Issue EINVAL error if input buffer has an incomplete
282 		 * character at the end of the buffer.
283 		 */
284 		if ((ibtail - ib) < sz) {
285 			*errno = EINVAL;
286 			ret_val = (size_t)-1;
287 			break;
288 		}
289 
290 		/*
291 		 * We collect UTF-8 character bytes and also check if
292 		 * this is a valid UTF-8 character without any bogus bytes
293 		 * based on the latest UTF-8 binary representation.
294 		 */
295 		oldib = ib;
296 		u8 = *ib++;
297 		second = B_TRUE;
298 		for (i = 1; i < sz; i++) {
299 			if (second) {
300 				if (*ib < u8_valid_min_2nd_byte[u8] ||
301 				    *ib > u8_valid_max_2nd_byte[u8]) {
302 					*errno = EILSEQ;
303 					ret_val = (size_t)-1;
304 					ib = oldib;
305 					goto TO_SB_ILLEGAL_CHAR_ERR;
306 				}
307 				second = B_FALSE;
308 			} else if (*ib < 0x80 || *ib > 0xbf) {
309 				*errno = EILSEQ;
310 				ret_val = (size_t)-1;
311 				ib = oldib;
312 				goto TO_SB_ILLEGAL_CHAR_ERR;
313 			}
314 			u8 = (u8 << 8) | ((uint32_t)*ib);
315 			ib++;
316 		}
317 
318 		i = l = 0;
319 		h = init_h;
320 		while (l <= h) {
321 			i = (l + h) / 2;
322 			if (to_sb_tbl[id][i].u8 == u8)
323 				break;
324 			else if (to_sb_tbl[id][i].u8 < u8)
325 				l = i + 1;
326 			else
327 				h = i - 1;
328 		}
329 
330 		if (to_sb_tbl[id][i].u8 == u8) {
331 			*ob++ = to_sb_tbl[id][i].sb;
332 		} else {
333 			/*
334 			 * If we don't find a character in the target
335 			 * codeset, we insert an ASCII replacement character
336 			 * at the output buffer and indicate such
337 			 * "non-identical" conversion by increasing the
338 			 * return value which is the non-identical conversion
339 			 * counter if bigger than 0.
340 			 */
341 			*ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
342 			ret_val++;
343 		}
344 	}
345 
346 TO_SB_ILLEGAL_CHAR_ERR:
347 	*inbuf = (char *)ib;
348 	*inbytesleft = ibtail - ib;
349 	*outbuf = (char *)ob;
350 	*outbytesleft = obtail - ob;
351 
352 	return (ret_val);
353 }
354 
355 /*
356  * The following is the common kiconv function from single byte codesets to
357  * UTF-8.
358  */
359 static size_t
kiconv_fr_sb(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)360 kiconv_fr_sb(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
361 	size_t *outbytesleft, int *errno)
362 {
363 	size_t ret_val;
364 	uchar_t *ib;
365 	uchar_t *ob;
366 	uchar_t *ibtail;
367 	uchar_t *obtail;
368 	size_t i;
369 	size_t k;
370 	int8_t sz;
371 
372 	/* Check on the kiconv code conversion descriptor validity. */
373 	if ((ulong_t)kcd > KICONV_MAX_MAPPING_TBLID) {
374 		*errno = EBADF;
375 		return ((size_t)-1);
376 	}
377 
378 	/*
379 	 * If this is a state reset request, there is nothing to do and so
380 	 * we just return.
381 	 */
382 	if (! inbuf || ! (*inbuf))
383 		return ((size_t)0);
384 
385 	ret_val = 0;
386 	ib = (uchar_t *)*inbuf;
387 	ob = (uchar_t *)*outbuf;
388 	ibtail = ib + *inbytesleft;
389 	obtail = ob + *outbytesleft;
390 
391 	while (ib < ibtail) {
392 		/*
393 		 * If this is a 7-bit ASCII character, we just copy over and
394 		 * that's all we need to do for this character.
395 		 */
396 		if (*ib < 0x80) {
397 			if (ob >= obtail) {
398 				*errno = E2BIG;
399 				ret_val = (size_t)-1;
400 				break;
401 			}
402 
403 			*ob++ = *ib++;
404 			continue;
405 		}
406 
407 		/*
408 		 * Otherwise, we get the corresponding UTF-8 character bytes
409 		 * from the mapping table and copy them over.
410 		 *
411 		 * We don't need to worry about if the UTF-8 character bytes
412 		 * at the mapping tables are valid or not since they are good.
413 		 */
414 		k = *ib - 0x80;
415 		sz = u8_number_of_bytes[to_u8_tbl[(ulong_t)kcd][k].u8[0]];
416 
417 		/*
418 		 * If sz <= 0, that means we don't have any assigned character
419 		 * at the code point, k + 0x80, of the single byte codeset
420 		 * which is the fromcode. In other words, the input buffer
421 		 * has an illegal character.
422 		 */
423 		if (sz <= 0) {
424 			*errno = EILSEQ;
425 			ret_val = (size_t)-1;
426 			break;
427 		}
428 
429 		if ((obtail - ob) < sz) {
430 			*errno = E2BIG;
431 			ret_val = (size_t)-1;
432 			break;
433 		}
434 
435 		for (i = 0; i < sz; i++)
436 			*ob++ = to_u8_tbl[(ulong_t)kcd][k].u8[i];
437 
438 		ib++;
439 	}
440 
441 	*inbuf = (char *)ib;
442 	*inbytesleft = ibtail - ib;
443 	*outbuf = (char *)ob;
444 	*outbytesleft = obtail - ob;
445 
446 	return (ret_val);
447 }
448 
449 /*
450  * The following is the common kiconvstr function from UTF-8 to single byte
451  * codesets.
452  */
453 static size_t
kiconvstr_to_sb(size_t id,uchar_t * ib,size_t * inlen,uchar_t * ob,size_t * outlen,int flag,int * errno)454 kiconvstr_to_sb(size_t id, uchar_t *ib, size_t *inlen, uchar_t *ob,
455 	size_t *outlen, int flag, int *errno)
456 {
457 	size_t ret_val;
458 	uchar_t *oldib;
459 	uchar_t *ibtail;
460 	uchar_t *obtail;
461 	uint32_t u8;
462 	size_t i;
463 	size_t l;
464 	size_t h;
465 	size_t init_h;
466 	int8_t sz;
467 	boolean_t second;
468 	boolean_t do_not_ignore_null;
469 
470 	/* Let's make sure that the table id is within the valid boundary. */
471 	if (id > KICONV_MAX_MAPPING_TBLID) {
472 		*errno = EBADF;
473 		return ((size_t)-1);
474 	}
475 
476 	ret_val = 0;
477 	ibtail = ib + *inlen;
478 	obtail = ob + *outlen;
479 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
480 	init_h = sizeof (to_sb_tbl[id]) / sizeof (kiconv_to_sb_tbl_comp_t) - 1;
481 
482 	/* Skip any UTF-8 signature BOM character in the beginning. */
483 	if ((ibtail - ib) >= 3 && *ib == 0xef && *(ib + 1) == 0xbb &&
484 	    *(ib + 2) == 0xbf)
485 			ib += 3;
486 
487 	/*
488 	 * Basically this is pretty much the same as kiconv_to_sb() except
489 	 * that we are now accepting two flag values and doing the processing
490 	 * accordingly.
491 	 */
492 	while (ib < ibtail) {
493 		sz = u8_number_of_bytes[*ib];
494 		if (sz <= 0) {
495 			if (flag & KICONV_REPLACE_INVALID) {
496 				if (ob >= obtail) {
497 					*errno = E2BIG;
498 					ret_val = (size_t)-1;
499 					break;
500 				}
501 
502 				ib++;
503 				goto STR_TO_SB_REPLACE_INVALID;
504 			}
505 
506 			*errno = EILSEQ;
507 			ret_val = (size_t)-1;
508 			break;
509 		}
510 
511 		if (*ib == '\0' && do_not_ignore_null)
512 			break;
513 
514 		if (ob >= obtail) {
515 			*errno = E2BIG;
516 			ret_val = (size_t)-1;
517 			break;
518 		}
519 
520 		if (sz == 1) {
521 			*ob++ = *ib++;
522 			continue;
523 		}
524 
525 		if ((ibtail - ib) < sz) {
526 			if (flag & KICONV_REPLACE_INVALID) {
527 				ib = ibtail;
528 				goto STR_TO_SB_REPLACE_INVALID;
529 			}
530 
531 			*errno = EINVAL;
532 			ret_val = (size_t)-1;
533 			break;
534 		}
535 
536 		oldib = ib;
537 		u8 = *ib++;
538 		second = B_TRUE;
539 		for (i = 1; i < sz; i++) {
540 			if (second) {
541 				if (*ib < u8_valid_min_2nd_byte[u8] ||
542 				    *ib > u8_valid_max_2nd_byte[u8]) {
543 					if (flag & KICONV_REPLACE_INVALID) {
544 						ib = oldib + sz;
545 						goto STR_TO_SB_REPLACE_INVALID;
546 					}
547 
548 					*errno = EILSEQ;
549 					ret_val = (size_t)-1;
550 					ib = oldib;
551 					goto STR_TO_SB_ILLEGAL_CHAR_ERR;
552 				}
553 				second = B_FALSE;
554 			} else if (*ib < 0x80 || *ib > 0xbf) {
555 				if (flag & KICONV_REPLACE_INVALID) {
556 					ib = oldib + sz;
557 					goto STR_TO_SB_REPLACE_INVALID;
558 				}
559 
560 				*errno = EILSEQ;
561 				ret_val = (size_t)-1;
562 				ib = oldib;
563 				goto STR_TO_SB_ILLEGAL_CHAR_ERR;
564 			}
565 			u8 = (u8 << 8) | ((uint32_t)*ib);
566 			ib++;
567 		}
568 
569 		i = l = 0;
570 		h = init_h;
571 		while (l <= h) {
572 			i = (l + h) / 2;
573 			if (to_sb_tbl[id][i].u8 == u8)
574 				break;
575 			else if (to_sb_tbl[id][i].u8 < u8)
576 				l = i + 1;
577 			else
578 				h = i - 1;
579 		}
580 
581 		if (to_sb_tbl[id][i].u8 == u8) {
582 			*ob++ = to_sb_tbl[id][i].sb;
583 		} else {
584 STR_TO_SB_REPLACE_INVALID:
585 			*ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
586 			ret_val++;
587 		}
588 	}
589 
590 STR_TO_SB_ILLEGAL_CHAR_ERR:
591 	*inlen = ibtail - ib;
592 	*outlen = obtail - ob;
593 
594 	return (ret_val);
595 }
596 
597 /*
598  * The following four functions are entry points recorded at the conv_list[]
599  * defined at below.
600  */
601 static size_t
kiconvstr_to_1252(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)602 kiconvstr_to_1252(char *inarray, size_t *inlen, char *outarray,
603 	size_t *outlen, int flag, int *errno)
604 {
605 	return (kiconvstr_to_sb(KICONV_TBLID_1252, (uchar_t *)inarray,
606 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
607 }
608 
609 static size_t
kiconvstr_to_1(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)610 kiconvstr_to_1(char *inarray, size_t *inlen, char *outarray,
611 	size_t *outlen, int flag, int *errno)
612 {
613 	return (kiconvstr_to_sb(KICONV_TBLID_8859_1, (uchar_t *)inarray,
614 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
615 }
616 
617 static size_t
kiconvstr_to_15(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)618 kiconvstr_to_15(char *inarray, size_t *inlen, char *outarray,
619 	size_t *outlen, int flag, int *errno)
620 {
621 	return (kiconvstr_to_sb(KICONV_TBLID_8859_15, (uchar_t *)inarray,
622 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
623 }
624 
625 static size_t
kiconvstr_to_850(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)626 kiconvstr_to_850(char *inarray, size_t *inlen, char *outarray,
627 	size_t *outlen, int flag, int *errno)
628 {
629 	return (kiconvstr_to_sb(KICONV_TBLID_850, (uchar_t *)inarray,
630 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
631 }
632 
633 /*
634  * The following is the common kiconvstr function for conversions from
635  * single byte codesets to UTF-8.
636  */
637 static size_t
kiconvstr_fr_sb(size_t id,uchar_t * ib,size_t * inlen,uchar_t * ob,size_t * outlen,int flag,int * errno)638 kiconvstr_fr_sb(size_t id, uchar_t *ib, size_t *inlen, uchar_t *ob,
639 	size_t *outlen, int flag, int *errno)
640 {
641 	size_t ret_val;
642 	uchar_t *ibtail;
643 	uchar_t *obtail;
644 	size_t i;
645 	size_t k;
646 	int8_t sz;
647 	boolean_t do_not_ignore_null;
648 
649 	ret_val = 0;
650 	ibtail = ib + *inlen;
651 	obtail = ob + *outlen;
652 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
653 
654 	while (ib < ibtail) {
655 		if (*ib == '\0' && do_not_ignore_null)
656 			break;
657 
658 		if (*ib < 0x80) {
659 			if (ob >= obtail) {
660 				*errno = E2BIG;
661 				ret_val = (size_t)-1;
662 				break;
663 			}
664 			*ob++ = *ib++;
665 			continue;
666 		}
667 
668 		k = *ib - 0x80;
669 		sz = u8_number_of_bytes[to_u8_tbl[id][k].u8[0]];
670 
671 		if (sz <= 0) {
672 			if (flag & KICONV_REPLACE_INVALID) {
673 				if ((obtail - ob) < 3) {
674 					*errno = E2BIG;
675 					ret_val = (size_t)-1;
676 					break;
677 				}
678 
679 				/* Save KICONV_UTF8_REPLACEMENT_CHAR. */
680 				*ob++ = 0xef;
681 				*ob++ = 0xbf;
682 				*ob++ = 0xbd;
683 				ret_val++;
684 				ib++;
685 
686 				continue;
687 			}
688 
689 			*errno = EILSEQ;
690 			ret_val = (size_t)-1;
691 			break;
692 		}
693 
694 		if ((obtail - ob) < sz) {
695 			*errno = E2BIG;
696 			ret_val = (size_t)-1;
697 			break;
698 		}
699 
700 		for (i = 0; i < sz; i++)
701 			*ob++ = to_u8_tbl[id][k].u8[i];
702 
703 		ib++;
704 	}
705 
706 	*inlen = ibtail - ib;
707 	*outlen = obtail - ob;
708 
709 	return (ret_val);
710 }
711 
712 /*
713  * The following four functions are also entry points recorded at
714  * the conv_list[] at below.
715  */
716 static size_t
kiconvstr_fr_1252(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)717 kiconvstr_fr_1252(char *inarray, size_t *inlen, char *outarray,
718 	size_t *outlen, int flag, int *errno)
719 {
720 	return (kiconvstr_fr_sb(KICONV_TBLID_1252, (uchar_t *)inarray,
721 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
722 }
723 
724 static size_t
kiconvstr_fr_1(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)725 kiconvstr_fr_1(char *inarray, size_t *inlen, char *outarray,
726 	size_t *outlen, int flag, int *errno)
727 {
728 	return (kiconvstr_fr_sb(KICONV_TBLID_8859_1, (uchar_t *)inarray,
729 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
730 }
731 
732 static size_t
kiconvstr_fr_15(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)733 kiconvstr_fr_15(char *inarray, size_t *inlen, char *outarray,
734 	size_t *outlen, int flag, int *errno)
735 {
736 	return (kiconvstr_fr_sb(KICONV_TBLID_8859_15, (uchar_t *)inarray,
737 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
738 }
739 
740 static size_t
kiconvstr_fr_850(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)741 kiconvstr_fr_850(char *inarray, size_t *inlen, char *outarray,
742 	size_t *outlen, int flag, int *errno)
743 {
744 	return (kiconvstr_fr_sb(KICONV_TBLID_850, (uchar_t *)inarray,
745 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
746 }
747 
748 /*
749  * The following static vector contains the normalized code names
750  * and their corresponding code ids. They are somewhat arbitrarily ordered
751  * based on marketing data available. A code id could repeat for aliases.
752  *
753  * The vector was generated by using a small utility program called
754  * codeidlistgen.c that you can find from PSARC/2007/173/materials/util/.
755  *
756  * The code ids must be portable, i.e., if needed, you can always generate
757  * the code_list[] again with different code ids. You'll also need to
758  * update the conv_list[] at below.
759  */
760 #define	KICONV_MAX_CODEID_ENTRY		68
761 #define	KICONV_MAX_CODEID		42
762 
763 static kiconv_code_list_t code_list[KICONV_MAX_CODEID_ENTRY] = {
764 	{ "utf8", 0 },
765 	{ "cp1252", 1 },
766 	{ "1252", 1 },
767 	{ "iso88591", 2 },
768 	{ "iso885915", 3 },
769 	{ "cp850", 4 },
770 	{ "850", 4 },
771 	{ "eucjp", 5 },
772 	{ "eucjpms", 6 },
773 	{ "cp932", 7 },
774 	{ "932", 7 },
775 	{ "shiftjis", 8 },
776 	{ "pck", 8 },
777 	{ "sjis", 8 },
778 	{ "gb18030", 9 },
779 	{ "gbk", 10 },
780 	{ "cp936", 10 },
781 	{ "936", 10 },
782 	{ "euccn", 11 },
783 	{ "euckr", 12 },
784 	{ "unifiedhangul", 13 },
785 	{ "cp949", 13 },
786 	{ "949", 13 },
787 	{ "big5", 14 },
788 	{ "cp950", 14 },
789 	{ "950", 14 },
790 	{ "big5hkscs", 15 },
791 	{ "euctw", 16 },
792 	{ "cp950hkscs", 17 },
793 	{ "cp1250", 18 },
794 	{ "1250", 18 },
795 	{ "iso88592", 19 },
796 	{ "cp852", 20 },
797 	{ "852", 20 },
798 	{ "cp1251", 21 },
799 	{ "1251", 21 },
800 	{ "iso88595", 22 },
801 	{ "koi8r", 23 },
802 	{ "cp866", 24 },
803 	{ "866", 24 },
804 	{ "cp1253", 25 },
805 	{ "1253", 25 },
806 	{ "iso88597", 26 },
807 	{ "cp737", 27 },
808 	{ "737", 27 },
809 	{ "cp1254", 28 },
810 	{ "1254", 28 },
811 	{ "iso88599", 29 },
812 	{ "cp857", 30 },
813 	{ "857", 30 },
814 	{ "cp1256", 31 },
815 	{ "1256", 31 },
816 	{ "iso88596", 32 },
817 	{ "cp720", 33 },
818 	{ "720", 33 },
819 	{ "cp1255", 34 },
820 	{ "1255", 34 },
821 	{ "iso88598", 35 },
822 	{ "cp862", 36 },
823 	{ "862", 36 },
824 	{ "cp1257", 37 },
825 	{ "1257", 37 },
826 	{ "iso885913", 38 },
827 	{ "iso885910", 39 },
828 	{ "iso885911", 40 },
829 	{ "tis620", 40 },
830 	{ "iso88593", 41 },
831 	{ "iso88594", 42 },
832 };
833 
834 /*
835  * The list of code conversions supported are grouped together per
836  * module which will be loaded as needed.
837  */
838 #define	KICONV_MAX_CONVERSIONS		84
839 
840 static kiconv_conv_list_t conv_list[KICONV_MAX_CONVERSIONS] = {
841 	/* Embedded code conversions: */
842 	{
843 		1, 0, KICONV_EMBEDDED,
844 		open_to_1252, kiconv_to_sb, close_to_sb, kiconvstr_to_1252
845 	},
846 	{
847 		0, 1, KICONV_EMBEDDED,
848 		open_fr_1252, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_1252
849 	},
850 	{
851 		2, 0, KICONV_EMBEDDED,
852 		open_to_88591, kiconv_to_sb, close_to_sb, kiconvstr_to_1
853 	},
854 	{
855 		0, 2, KICONV_EMBEDDED,
856 		open_fr_88591, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_1
857 	},
858 	{
859 		3, 0, KICONV_EMBEDDED,
860 		open_to_885915, kiconv_to_sb, close_to_sb, kiconvstr_to_15
861 	},
862 	{
863 		0, 3, KICONV_EMBEDDED,
864 		open_fr_885915, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_15
865 	},
866 	{
867 		4, 0, KICONV_EMBEDDED,
868 		open_to_850, kiconv_to_sb, close_to_sb, kiconvstr_to_850
869 	},
870 	{
871 		0, 4, KICONV_EMBEDDED,
872 		open_fr_850, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_850
873 	},
874 
875 	/* kiconv_ja module conversions: */
876 	{ 0, 5, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
877 	{ 5, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
878 	{ 0, 6, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
879 	{ 6, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
880 	{ 0, 7, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
881 	{ 7, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
882 	{ 0, 8, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
883 	{ 8, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
884 
885 	/* kiconv_sc module conversions: */
886 	{ 0, 9, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
887 	{ 9, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
888 	{ 0, 10, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
889 	{ 10, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
890 	{ 0, 11, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
891 	{ 11, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
892 
893 	/* kiconv_ko module conversions: */
894 	{ 0, 12, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
895 	{ 12, 0, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
896 	{ 0, 13, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
897 	{ 13, 0, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
898 
899 	/* kiconv_tc module conversions: */
900 	{ 0, 14, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
901 	{ 14, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
902 	{ 0, 15, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
903 	{ 15, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
904 	{ 0, 16, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
905 	{ 16, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
906 	{ 0, 17, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
907 	{ 17, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
908 
909 	/* kiconv_emea module conversions: */
910 	{ 0, 18, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
911 	{ 18, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
912 	{ 0, 19, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
913 	{ 19, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
914 	{ 0, 20, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
915 	{ 20, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
916 	{ 0, 21, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
917 	{ 21, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
918 	{ 0, 22, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
919 	{ 22, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
920 	{ 0, 23, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
921 	{ 23, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
922 	{ 0, 24, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
923 	{ 24, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
924 	{ 0, 25, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
925 	{ 25, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
926 	{ 0, 26, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
927 	{ 26, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
928 	{ 0, 27, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
929 	{ 27, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
930 	{ 0, 28, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
931 	{ 28, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
932 	{ 0, 29, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
933 	{ 29, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
934 	{ 0, 30, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
935 	{ 30, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
936 	{ 0, 31, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
937 	{ 31, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
938 	{ 0, 32, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
939 	{ 32, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
940 	{ 0, 33, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
941 	{ 33, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
942 	{ 0, 34, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
943 	{ 34, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
944 	{ 0, 35, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
945 	{ 35, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
946 	{ 0, 36, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
947 	{ 36, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
948 	{ 0, 37, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
949 	{ 37, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
950 	{ 0, 38, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
951 	{ 38, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
952 	{ 0, 39, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
953 	{ 39, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
954 	{ 0, 40, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
955 	{ 40, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
956 	{ 0, 41, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
957 	{ 41, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
958 	{ 0, 42, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
959 	{ 42, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
960 };
961 
962 /* The list of implemeted and supported modules. */
963 static kiconv_mod_list_t module_list[KICONV_MAX_MODULE_ID + 1] = {
964 	"kiconv_embedded", 0,
965 	"kiconv_ja", 0,
966 	"kiconv_sc", 0,
967 	"kiconv_ko", 0,
968 	"kiconv_tc", 0,
969 	"kiconv_emea", 0,
970 };
971 
972 /*
973  * We use conv_list_lock to restrict data access of both conv_list[] and
974  * module_list[] as they are tightly coupled critical sections that need to be
975  * dealt together as a unit.
976  */
977 static kmutex_t conv_list_lock;
978 
979 void
kiconv_init()980 kiconv_init()
981 {
982 	mutex_init(&conv_list_lock, NULL, MUTEX_DEFAULT, NULL);
983 }
984 
985 /*
986  * The following is used to check on whether a kiconv module is being
987  * used or not at the _fini() of the module.
988  */
989 size_t
kiconv_module_ref_count(size_t mid)990 kiconv_module_ref_count(size_t mid)
991 {
992 	int count;
993 
994 	if (mid <= 0 || mid > KICONV_MAX_MODULE_ID)
995 		return (0);
996 
997 	mutex_enter(&conv_list_lock);
998 
999 	count = module_list[mid].refcount;
1000 
1001 	mutex_exit(&conv_list_lock);
1002 
1003 	return (count);
1004 }
1005 
1006 /*
1007  * This function "normalizes" a given code name, n, by not including skippable
1008  * characters and folding uppercase letters to corresponding lowercase letters.
1009  * We only fold 7-bit ASCII uppercase characters since the names should be in
1010  * Portable Character Set of 7-bit ASCII.
1011  *
1012  * By doing this, we will be able to maximize the code name matches.
1013  */
1014 static size_t
normalize_codename(const char * n)1015 normalize_codename(const char *n)
1016 {
1017 	char s[KICONV_MAX_CODENAME_LEN + 1];
1018 	size_t i;
1019 
1020 	if (n == NULL)
1021 		return ((size_t)-1);
1022 
1023 	for (i = 0; *n; n++) {
1024 		if (KICONV_SKIPPABLE_CHAR(*n))
1025 			continue;
1026 
1027 		/* If unreasonably lengthy, we don't support such names. */
1028 		if (i >= KICONV_MAX_CODENAME_LEN)
1029 			return ((size_t)-1);
1030 
1031 		s[i++] = (*n >= 'A' && *n <= 'Z') ? *n - 'A' + 'a' : *n;
1032 	}
1033 	s[i] = '\0';
1034 
1035 	/* With the normalized name, find the corresponding codeset id. */
1036 	for (i = 0; i < KICONV_MAX_CODEID_ENTRY; i++)
1037 		if (strcmp(s, code_list[i].name) == 0)
1038 			return (code_list[i].id);
1039 
1040 	/*
1041 	 * In future time, we will also have a few more lines of code at below
1042 	 * that will deal with other user-created modules' fromcodes and
1043 	 * tocodes including aliases in a different vector. For now, we don't
1044 	 * support that but only the known names to this project at this time.
1045 	 */
1046 
1047 	return ((size_t)-1);
1048 }
1049 
1050 /*
1051  * This function called from mod_install() registers supplied code
1052  * conversions. At this point, it does not honor aliases and hence does not
1053  * use nowait data field from the kiconv module info data structure.
1054  */
1055 int
kiconv_register_module(kiconv_module_info_t * info)1056 kiconv_register_module(kiconv_module_info_t *info)
1057 {
1058 	size_t mid;
1059 	size_t fid;
1060 	size_t tid;
1061 	size_t i;
1062 	size_t j;
1063 	kiconv_ops_t *op;
1064 
1065 	/* Validate the given kiconv module info. */
1066 	if (info == NULL || info->module_name == NULL ||
1067 	    info->kiconv_num_convs == 0 || info->kiconv_ops_tbl == NULL)
1068 		return (EINVAL);
1069 
1070 	/*
1071 	 * Check if this is one of the known modules. At this point,
1072 	 * we do not allow user-defined kiconv modules and that'd be for
1073 	 * a future project.
1074 	 */
1075 	for (mid = 1; mid <= KICONV_MAX_MODULE_ID; mid++)
1076 		if (strcmp(module_list[mid].name, info->module_name) == 0)
1077 			break;
1078 	if (mid > KICONV_MAX_MODULE_ID)
1079 		return (EINVAL);
1080 
1081 	/* Let's register the conversions supplied. */
1082 	mutex_enter(&conv_list_lock);
1083 
1084 	/*
1085 	 * This is very unlikely situation but by any chance we don't want to
1086 	 * register a module that is already in.
1087 	 */
1088 	if (module_list[mid].refcount > 0) {
1089 		mutex_exit(&conv_list_lock);
1090 		return (EAGAIN);
1091 	}
1092 
1093 	for (i = 0; i < info->kiconv_num_convs; i++) {
1094 		op = &(info->kiconv_ops_tbl[i]);
1095 
1096 		fid = normalize_codename(op->fromcode);
1097 		tid = normalize_codename(op->tocode);
1098 
1099 		/*
1100 		 * If we find anything wrong in this particular conversion,
1101 		 * we skip this one and continue to the next one. This include
1102 		 * a case where there is a conversion already being assigned
1103 		 * into the conv_list[] somehow, i.e., new one never kicks out
1104 		 * old one.
1105 		 */
1106 		if (op->kiconv_open == NULL || op->kiconv == NULL ||
1107 		    op->kiconv_close == NULL || op->kiconvstr == NULL)
1108 			continue;
1109 
1110 		for (j = 0; j < KICONV_MAX_CONVERSIONS; j++) {
1111 			if (conv_list[j].mid == mid &&
1112 			    conv_list[j].fid == fid &&
1113 			    conv_list[j].tid == tid) {
1114 				if (conv_list[j].open == NULL) {
1115 					conv_list[j].open = op->kiconv_open;
1116 					conv_list[j].kiconv = op->kiconv;
1117 					conv_list[j].close = op->kiconv_close;
1118 					conv_list[j].kiconvstr = op->kiconvstr;
1119 				}
1120 				break;
1121 			}
1122 		}
1123 	}
1124 
1125 	mutex_exit(&conv_list_lock);
1126 
1127 	return (0);
1128 }
1129 
1130 /*
1131  * The following function called during mod_remove() will try to unregister,
1132  * i.e., clear up conversion function pointers, from the conv_list[] if it
1133  * can. If there is any code conversions being used, then, the function will
1134  * just return EBUSY indicating that the module cannot be unloaded.
1135  */
1136 int
kiconv_unregister_module(kiconv_module_info_t * info)1137 kiconv_unregister_module(kiconv_module_info_t *info)
1138 {
1139 	size_t mid;
1140 	size_t i;
1141 
1142 	if (info == NULL || info->module_name == NULL ||
1143 	    info->kiconv_num_convs == 0 || info->kiconv_ops_tbl == NULL)
1144 		return (EINVAL);
1145 
1146 	for (mid = 1; mid <= KICONV_MAX_MODULE_ID; mid++)
1147 		if (strcmp(module_list[mid].name, info->module_name) == 0)
1148 			break;
1149 	if (mid > KICONV_MAX_MODULE_ID)
1150 		return (EINVAL);
1151 
1152 	mutex_enter(&conv_list_lock);
1153 
1154 	/*
1155 	 * If any of the conversions are used, then, this module canont be
1156 	 * unloaded.
1157 	 */
1158 	if (module_list[mid].refcount > 0) {
1159 		mutex_exit(&conv_list_lock);
1160 		return (EBUSY);
1161 	}
1162 
1163 	/*
1164 	 * Otherwise, we unregister all conversions from this module
1165 	 * and be ready for the unloading. At this point, we only care about
1166 	 * the conversions we know about with the module.
1167 	 */
1168 	for (i = 0; i < KICONV_MAX_CONVERSIONS; i++) {
1169 		if (conv_list[i].mid == mid) {
1170 			conv_list[i].open = NULL;
1171 			conv_list[i].kiconv = NULL;
1172 			conv_list[i].close = NULL;
1173 			conv_list[i].kiconvstr = NULL;
1174 		}
1175 	}
1176 
1177 	mutex_exit(&conv_list_lock);
1178 
1179 	return (0);
1180 }
1181 
1182 /*
1183  * The following function check if asked code conversion is available
1184  * and if necessary, load the corresponding kiconv module that contains
1185  * the conversion (and others).
1186  */
1187 static kiconv_t
check_and_load_conversions(const char * tocode,const char * fromcode)1188 check_and_load_conversions(const char *tocode, const char *fromcode)
1189 {
1190 	kiconv_t kcd;
1191 	size_t tid;
1192 	size_t fid;
1193 	size_t mid;
1194 	size_t i;
1195 
1196 	/* Normalize the given names and find the corresponding code ids. */
1197 	tid = normalize_codename(tocode);
1198 	if (tid == (size_t)-1)
1199 		return ((kiconv_t)-1);
1200 
1201 	fid = normalize_codename(fromcode);
1202 	if (fid == (size_t)-1)
1203 		return ((kiconv_t)-1);
1204 
1205 	/*
1206 	 * Search the conversion.
1207 	 *
1208 	 * If the conversion isn't supported, just return -1.
1209 	 * If the conversion is supported but there is no corresponding
1210 	 * module loaded, try to load it and if successful, return
1211 	 * a kiconv conversion descriptor memory block.
1212 	 *
1213 	 * We maintain a reference counter of uint_t for each module.
1214 	 */
1215 	mutex_enter(&conv_list_lock);
1216 
1217 	for (i = 0; i < KICONV_MAX_CONVERSIONS; i++)
1218 		if (conv_list[i].tid == tid && conv_list[i].fid == fid)
1219 			break;
1220 	if (i >= KICONV_MAX_CONVERSIONS) {
1221 		mutex_exit(&conv_list_lock);
1222 		return ((kiconv_t)-1);
1223 	}
1224 
1225 	mid = conv_list[i].mid;
1226 
1227 	if (conv_list[i].open == NULL) {
1228 		mutex_exit(&conv_list_lock);
1229 
1230 		if (modload("kiconv", module_list[mid].name) < 0)
1231 			return ((kiconv_t)-1);
1232 
1233 		/*
1234 		 * Let's double check if something happened right after
1235 		 * the modload and/or if the module really has the conversion.
1236 		 */
1237 		mutex_enter(&conv_list_lock);
1238 
1239 		if (conv_list[i].open == NULL) {
1240 			mutex_exit(&conv_list_lock);
1241 			return ((kiconv_t)-1);
1242 		}
1243 	}
1244 
1245 	/*
1246 	 * If we got the conversion, we will use the conversion function
1247 	 * in the module and so let's increase the module's refcounter
1248 	 * so that the module won't be kicked out. (To be more exact and
1249 	 * specific, the "refcount" is thus the reference counter of
1250 	 * the module functions being used.)
1251 	 */
1252 	if (module_list[mid].refcount < UINT_MAX)
1253 		module_list[mid].refcount++;
1254 
1255 	mutex_exit(&conv_list_lock);
1256 
1257 	kcd = (kiconv_t)kmem_alloc(sizeof (kiconv_data_t), KM_SLEEP);
1258 	kcd->handle = (void *)-1;
1259 	kcd->id = i;
1260 
1261 	return (kcd);
1262 }
1263 
1264 /*
1265  * The following are the four "Committed" interfaces.
1266  */
1267 kiconv_t
kiconv_open(const char * tocode,const char * fromcode)1268 kiconv_open(const char *tocode, const char *fromcode)
1269 {
1270 	kiconv_t kcd;
1271 	size_t mid;
1272 
1273 	kcd = check_and_load_conversions(tocode, fromcode);
1274 	if (kcd == (kiconv_t)-1)
1275 		return ((kiconv_t)-1);
1276 
1277 	kcd->handle = (conv_list[kcd->id].open)();
1278 	if (kcd->handle == (void *)-1) {
1279 		/*
1280 		 * If the conversion couldn't be opened for some reason,
1281 		 * then, we unallocate the kcd and, more importantly, before
1282 		 * that, we also decrease the module reference counter.
1283 		 */
1284 		mid = conv_list[kcd->id].mid;
1285 
1286 		mutex_enter(&conv_list_lock);
1287 
1288 		if (module_list[mid].refcount > 0)
1289 			module_list[mid].refcount--;
1290 
1291 		mutex_exit(&conv_list_lock);
1292 
1293 		kmem_free((void *)kcd, sizeof (kiconv_data_t));
1294 
1295 		return ((kiconv_t)-1);
1296 	}
1297 
1298 	return (kcd);
1299 }
1300 
1301 size_t
kiconv(kiconv_t kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)1302 kiconv(kiconv_t kcd, char **inbuf, size_t *inbytesleft,
1303 	char **outbuf, size_t *outbytesleft, int *errno)
1304 {
1305 	/* Do some minimum checking on the kiconv conversion descriptor. */
1306 	if (! kcd || kcd == (kiconv_t)-1 || conv_list[kcd->id].kiconv == NULL) {
1307 		*errno = EBADF;
1308 		return ((size_t)-1);
1309 	}
1310 
1311 	return ((conv_list[kcd->id].kiconv)(kcd->handle, inbuf, inbytesleft,
1312 	    outbuf, outbytesleft, errno));
1313 }
1314 
1315 int
kiconv_close(kiconv_t kcd)1316 kiconv_close(kiconv_t kcd)
1317 {
1318 	int ret;
1319 	size_t mid;
1320 
1321 	if (! kcd || kcd == (kiconv_t)-1 || conv_list[kcd->id].close == NULL)
1322 		return (EBADF);
1323 
1324 	mid = conv_list[kcd->id].mid;
1325 
1326 	ret = (conv_list[kcd->id].close)(kcd->handle);
1327 
1328 	kmem_free((void *)kcd, sizeof (kiconv_data_t));
1329 
1330 	mutex_enter(&conv_list_lock);
1331 
1332 	/*
1333 	 * While we maintain reference conter for each module, once loaded,
1334 	 * we don't modunload from kiconv functions even if the counter
1335 	 * reaches back to zero.
1336 	 */
1337 	if (module_list[mid].refcount > 0)
1338 		module_list[mid].refcount--;
1339 
1340 	mutex_exit(&conv_list_lock);
1341 
1342 	return (ret);
1343 }
1344 
1345 size_t
kiconvstr(const char * tocode,const char * fromcode,char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)1346 kiconvstr(const char *tocode, const char *fromcode, char *inarray,
1347 	size_t *inlen, char *outarray, size_t *outlen, int flag, int *errno)
1348 {
1349 	kiconv_t kcd;
1350 	size_t ret;
1351 	size_t mid;
1352 
1353 	kcd = check_and_load_conversions(tocode, fromcode);
1354 	if (kcd == (kiconv_t)-1 || conv_list[kcd->id].kiconvstr == NULL) {
1355 		*errno = EBADF;
1356 		return ((size_t)-1);
1357 	}
1358 
1359 	mid = conv_list[kcd->id].mid;
1360 
1361 	ret = (conv_list[kcd->id].kiconvstr)(inarray, inlen, outarray, outlen,
1362 	    flag, errno);
1363 
1364 	kmem_free((void *)kcd, sizeof (kiconv_data_t));
1365 
1366 	mutex_enter(&conv_list_lock);
1367 
1368 	if (module_list[mid].refcount > 0)
1369 		module_list[mid].refcount--;
1370 
1371 	mutex_exit(&conv_list_lock);
1372 
1373 	return (ret);
1374 }
1375