xref: /illumos-gate/usr/src/uts/common/os/kiconv.c (revision 2d6eb4a5)
1*d14d7d31Sis /*
2*d14d7d31Sis  * CDDL HEADER START
3*d14d7d31Sis  *
4*d14d7d31Sis  * The contents of this file are subject to the terms of the
5*d14d7d31Sis  * Common Development and Distribution License (the "License").
6*d14d7d31Sis  * You may not use this file except in compliance with the License.
7*d14d7d31Sis  *
8*d14d7d31Sis  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*d14d7d31Sis  * or http://www.opensolaris.org/os/licensing.
10*d14d7d31Sis  * See the License for the specific language governing permissions
11*d14d7d31Sis  * and limitations under the License.
12*d14d7d31Sis  *
13*d14d7d31Sis  * When distributing Covered Code, include this CDDL HEADER in each
14*d14d7d31Sis  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*d14d7d31Sis  * If applicable, add the following below this CDDL HEADER, with the
16*d14d7d31Sis  * fields enclosed by brackets "[]" replaced with your own identifying
17*d14d7d31Sis  * information: Portions Copyright [yyyy] [name of copyright owner]
18*d14d7d31Sis  *
19*d14d7d31Sis  * CDDL HEADER END
20*d14d7d31Sis  */
21*d14d7d31Sis /*
22*d14d7d31Sis  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23*d14d7d31Sis  * Use is subject to license terms.
24*d14d7d31Sis  */
25*d14d7d31Sis 
26*d14d7d31Sis /*
27*d14d7d31Sis  * Kernel iconv code conversion functions (PSARC/2007/173).
28*d14d7d31Sis  *
29*d14d7d31Sis  * Man pages: kiconv_open(9F), kiconv(9F), kiconv_close(9F), and kiconvstr(9F).
30*d14d7d31Sis  * Interface stability: Committed.
31*d14d7d31Sis  */
32*d14d7d31Sis 
33*d14d7d31Sis #include <sys/types.h>
34*d14d7d31Sis #include <sys/param.h>
35*d14d7d31Sis #include <sys/sysmacros.h>
36*d14d7d31Sis #include <sys/systm.h>
37*d14d7d31Sis #include <sys/debug.h>
38*d14d7d31Sis #include <sys/kmem.h>
39*d14d7d31Sis #include <sys/sunddi.h>
40*d14d7d31Sis #include <sys/ksynch.h>
41*d14d7d31Sis #include <sys/modctl.h>
42*d14d7d31Sis #include <sys/byteorder.h>
43*d14d7d31Sis #include <sys/errno.h>
44*d14d7d31Sis #include <sys/kiconv.h>
45*d14d7d31Sis #include <sys/kiconv_latin1.h>
46*d14d7d31Sis 
47*d14d7d31Sis 
48*d14d7d31Sis /*
49*d14d7d31Sis  * The following macros indicate ids to the correct code conversion mapping
50*d14d7d31Sis  * data tables to use. The actual tables are coming from <sys/kiconv_latin1.h>.
51*d14d7d31Sis  */
52*d14d7d31Sis #define	KICONV_TBLID_1252		(0x00)
53*d14d7d31Sis #define	KICONV_TBLID_8859_1		(0x01)
54*d14d7d31Sis #define	KICONV_TBLID_8859_15		(0x02)
55*d14d7d31Sis #define	KICONV_TBLID_850		(0x03)
56*d14d7d31Sis 
57*d14d7d31Sis #define	KICONV_MAX_MAPPING_TBLID	(0x03)
58*d14d7d31Sis 
59*d14d7d31Sis /*
60*d14d7d31Sis  * The following tables are coming from u8_textprep.c. We use them to
61*d14d7d31Sis  * check on validity of UTF-8 characters and their bytes.
62*d14d7d31Sis  */
63*d14d7d31Sis extern const int8_t u8_number_of_bytes[];
64*d14d7d31Sis extern const uint8_t u8_valid_min_2nd_byte[];
65*d14d7d31Sis extern const uint8_t u8_valid_max_2nd_byte[];
66*d14d7d31Sis 
67*d14d7d31Sis 
68*d14d7d31Sis /*
69*d14d7d31Sis  * The following four functions, open_to_1252(), open_to_88591(),
70*d14d7d31Sis  * open_to_885915(), and open_to_850(), are kiconv_open functions from
71*d14d7d31Sis  * UTF-8 to corresponding single byte codesets.
72*d14d7d31Sis  */
73*d14d7d31Sis static void *
open_to_1252()74*d14d7d31Sis open_to_1252()
75*d14d7d31Sis {
76*d14d7d31Sis 	kiconv_state_t s;
77*d14d7d31Sis 
78*d14d7d31Sis 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
79*d14d7d31Sis 	s->id = KICONV_TBLID_1252;
80*d14d7d31Sis 	s->bom_processed = 0;
81*d14d7d31Sis 
82*d14d7d31Sis 	return ((void *)s);
83*d14d7d31Sis }
84*d14d7d31Sis 
85*d14d7d31Sis static void *
open_to_88591()86*d14d7d31Sis open_to_88591()
87*d14d7d31Sis {
88*d14d7d31Sis 	kiconv_state_t s;
89*d14d7d31Sis 
90*d14d7d31Sis 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
91*d14d7d31Sis 	s->id = KICONV_TBLID_8859_1;
92*d14d7d31Sis 	s->bom_processed = 0;
93*d14d7d31Sis 
94*d14d7d31Sis 	return ((void *)s);
95*d14d7d31Sis }
96*d14d7d31Sis 
97*d14d7d31Sis static void *
open_to_885915()98*d14d7d31Sis open_to_885915()
99*d14d7d31Sis {
100*d14d7d31Sis 	kiconv_state_t s;
101*d14d7d31Sis 
102*d14d7d31Sis 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
103*d14d7d31Sis 	s->id = KICONV_TBLID_8859_15;
104*d14d7d31Sis 	s->bom_processed = 0;
105*d14d7d31Sis 
106*d14d7d31Sis 	return ((void *)s);
107*d14d7d31Sis }
108*d14d7d31Sis 
109*d14d7d31Sis static void *
open_to_850()110*d14d7d31Sis open_to_850()
111*d14d7d31Sis {
112*d14d7d31Sis 	kiconv_state_t s;
113*d14d7d31Sis 
114*d14d7d31Sis 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
115*d14d7d31Sis 	s->id = KICONV_TBLID_850;
116*d14d7d31Sis 	s->bom_processed = 0;
117*d14d7d31Sis 
118*d14d7d31Sis 	return ((void *)s);
119*d14d7d31Sis }
120*d14d7d31Sis 
121*d14d7d31Sis /*
122*d14d7d31Sis  * The following four functions, open_fr_1252(), open_fr_88591(),
123*d14d7d31Sis  * open_fr_885915(), and open_fr_850(), are kiconv_open functions from
124*d14d7d31Sis  * corresponding single byte codesets to UTF-8.
125*d14d7d31Sis  */
126*d14d7d31Sis static void *
open_fr_1252()127*d14d7d31Sis open_fr_1252()
128*d14d7d31Sis {
129*d14d7d31Sis 	return ((void *)KICONV_TBLID_1252);
130*d14d7d31Sis }
131*d14d7d31Sis 
132*d14d7d31Sis static void *
open_fr_88591()133*d14d7d31Sis open_fr_88591()
134*d14d7d31Sis {
135*d14d7d31Sis 	return ((void *)KICONV_TBLID_8859_1);
136*d14d7d31Sis }
137*d14d7d31Sis 
138*d14d7d31Sis static void *
open_fr_885915()139*d14d7d31Sis open_fr_885915()
140*d14d7d31Sis {
141*d14d7d31Sis 	return ((void *)KICONV_TBLID_8859_15);
142*d14d7d31Sis }
143*d14d7d31Sis 
144*d14d7d31Sis static void *
open_fr_850()145*d14d7d31Sis open_fr_850()
146*d14d7d31Sis {
147*d14d7d31Sis 	return ((void *)KICONV_TBLID_850);
148*d14d7d31Sis }
149*d14d7d31Sis 
150*d14d7d31Sis /*
151*d14d7d31Sis  * The following close_to_sb() function is kiconv_close function for
152*d14d7d31Sis  * the conversions from UTF-8 to single byte codesets. The close_fr_sb()
153*d14d7d31Sis  * is kiconv_close function for the conversions from single byte codesets to
154*d14d7d31Sis  * UTF-8.
155*d14d7d31Sis  */
156*d14d7d31Sis static int
close_to_sb(void * s)157*d14d7d31Sis close_to_sb(void *s)
158*d14d7d31Sis {
159*d14d7d31Sis 	if (! s || s == (void *)-1)
160*d14d7d31Sis 		return (EBADF);
161*d14d7d31Sis 
162*d14d7d31Sis 	kmem_free(s, sizeof (kiconv_state_data_t));
163*d14d7d31Sis 
164*d14d7d31Sis 	return (0);
165*d14d7d31Sis }
166*d14d7d31Sis 
167*d14d7d31Sis static int
close_fr_sb(void * s)168*d14d7d31Sis close_fr_sb(void *s)
169*d14d7d31Sis {
170*d14d7d31Sis 	if ((ulong_t)s > KICONV_MAX_MAPPING_TBLID)
171*d14d7d31Sis 		return (EBADF);
172*d14d7d31Sis 
173*d14d7d31Sis 	return (0);
174*d14d7d31Sis }
175*d14d7d31Sis 
176*d14d7d31Sis /*
177*d14d7d31Sis  * The following is the common kiconv function for conversions from UTF-8
178*d14d7d31Sis  * to single byte codesets.
179*d14d7d31Sis  */
180*d14d7d31Sis static size_t
kiconv_to_sb(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)181*d14d7d31Sis kiconv_to_sb(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
182*d14d7d31Sis 	size_t *outbytesleft, int *errno)
183*d14d7d31Sis {
184*d14d7d31Sis 	size_t id;
185*d14d7d31Sis 	size_t ret_val;
186*d14d7d31Sis 	uchar_t *ib;
187*d14d7d31Sis 	uchar_t *oldib;
188*d14d7d31Sis 	uchar_t *ob;
189*d14d7d31Sis 	uchar_t *ibtail;
190*d14d7d31Sis 	uchar_t *obtail;
191*d14d7d31Sis 	uint32_t u8;
192*d14d7d31Sis 	size_t i;
193*d14d7d31Sis 	size_t l;
194*d14d7d31Sis 	size_t h;
195*d14d7d31Sis 	size_t init_h;
196*d14d7d31Sis 	int8_t sz;
197*d14d7d31Sis 	boolean_t second;
198*d14d7d31Sis 
199*d14d7d31Sis 	/* Check on the kiconv code conversion descriptor. */
200*d14d7d31Sis 	if (! kcd || kcd == (void *)-1) {
201*d14d7d31Sis 		*errno = EBADF;
202*d14d7d31Sis 		return ((size_t)-1);
203*d14d7d31Sis 	}
204*d14d7d31Sis 
205*d14d7d31Sis 	/*
206*d14d7d31Sis 	 * Get the table id we are going to use for the code conversion
207*d14d7d31Sis 	 * and let's double check on it.
208*d14d7d31Sis 	 */
209*d14d7d31Sis 	id = ((kiconv_state_t)kcd)->id;
210*d14d7d31Sis 	if (id > KICONV_MAX_MAPPING_TBLID) {
211*d14d7d31Sis 		*errno = EBADF;
212*d14d7d31Sis 		return ((size_t)-1);
213*d14d7d31Sis 	}
214*d14d7d31Sis 
215*d14d7d31Sis 	/* If this is a state reset request, process and return. */
216*d14d7d31Sis 	if (! inbuf || ! (*inbuf)) {
217*d14d7d31Sis 		((kiconv_state_t)kcd)->bom_processed = 0;
218*d14d7d31Sis 		return ((size_t)0);
219*d14d7d31Sis 	}
220*d14d7d31Sis 
221*d14d7d31Sis 	ret_val = 0;
222*d14d7d31Sis 	ib = (uchar_t *)*inbuf;
223*d14d7d31Sis 	ob = (uchar_t *)*outbuf;
224*d14d7d31Sis 	ibtail = ib + *inbytesleft;
225*d14d7d31Sis 	obtail = ob + *outbytesleft;
226*d14d7d31Sis 
227*d14d7d31Sis 	/*
228*d14d7d31Sis 	 * The inital high value for the binary search we will be using
229*d14d7d31Sis 	 * shortly is a literal constant as of today but to be future proof,
230*d14d7d31Sis 	 * let's calculate it like the following at here.
231*d14d7d31Sis 	 */
232*d14d7d31Sis 	init_h = sizeof (to_sb_tbl[id]) / sizeof (kiconv_to_sb_tbl_comp_t) - 1;
233*d14d7d31Sis 
234*d14d7d31Sis 	/*
235*d14d7d31Sis 	 * If we haven't checked on the UTF-8 signature BOM character in
236*d14d7d31Sis 	 * the beginning of the conversion data stream, we check it and if
237*d14d7d31Sis 	 * find one, we skip it since we have no use for it.
238*d14d7d31Sis 	 */
239*d14d7d31Sis 	if (((kiconv_state_t)kcd)->bom_processed == 0 && (ibtail - ib) >= 3 &&
240*d14d7d31Sis 	    *ib == 0xef && *(ib + 1) == 0xbb && *(ib + 2) == 0xbf)
241*d14d7d31Sis 			ib += 3;
242*d14d7d31Sis 	((kiconv_state_t)kcd)->bom_processed = 1;
243*d14d7d31Sis 
244*d14d7d31Sis 	while (ib < ibtail) {
245*d14d7d31Sis 		sz = u8_number_of_bytes[*ib];
246*d14d7d31Sis 		if (sz <= 0) {
247*d14d7d31Sis 			*errno = EILSEQ;
248*d14d7d31Sis 			ret_val = (size_t)-1;
249*d14d7d31Sis 			break;
250*d14d7d31Sis 		}
251*d14d7d31Sis 
252*d14d7d31Sis 		/*
253*d14d7d31Sis 		 * If there is no room to write at the output buffer,
254*d14d7d31Sis 		 * issue E2BIG error.
255*d14d7d31Sis 		 */
256*d14d7d31Sis 		if (ob >= obtail) {
257*d14d7d31Sis 			*errno = E2BIG;
258*d14d7d31Sis 			ret_val = (size_t)-1;
259*d14d7d31Sis 			break;
260*d14d7d31Sis 		}
261*d14d7d31Sis 
262*d14d7d31Sis 		/*
263*d14d7d31Sis 		 * If it is a 7-bit ASCII character, we don't need to
264*d14d7d31Sis 		 * process further and we just copy the character over.
265*d14d7d31Sis 		 *
266*d14d7d31Sis 		 * If not, we collect the character bytes up to four bytes,
267*d14d7d31Sis 		 * validate the bytes, and binary search for the corresponding
268*d14d7d31Sis 		 * single byte codeset character byte. If we find it from
269*d14d7d31Sis 		 * the mapping table, we put that into the output buffer;
270*d14d7d31Sis 		 * otherwise, we put a replacement character instead as
271*d14d7d31Sis 		 * a non-identical conversion.
272*d14d7d31Sis 		 */
273*d14d7d31Sis 		if (sz == 1) {
274*d14d7d31Sis 			*ob++ = *ib++;
275*d14d7d31Sis 			continue;
276*d14d7d31Sis 		}
277*d14d7d31Sis 
278*d14d7d31Sis 		/*
279*d14d7d31Sis 		 * Issue EINVAL error if input buffer has an incomplete
280*d14d7d31Sis 		 * character at the end of the buffer.
281*d14d7d31Sis 		 */
282*d14d7d31Sis 		if ((ibtail - ib) < sz) {
283*d14d7d31Sis 			*errno = EINVAL;
284*d14d7d31Sis 			ret_val = (size_t)-1;
285*d14d7d31Sis 			break;
286*d14d7d31Sis 		}
287*d14d7d31Sis 
288*d14d7d31Sis 		/*
289*d14d7d31Sis 		 * We collect UTF-8 character bytes and also check if
290*d14d7d31Sis 		 * this is a valid UTF-8 character without any bogus bytes
291*d14d7d31Sis 		 * based on the latest UTF-8 binary representation.
292*d14d7d31Sis 		 */
293*d14d7d31Sis 		oldib = ib;
294*d14d7d31Sis 		u8 = *ib++;
295*d14d7d31Sis 		second = B_TRUE;
296*d14d7d31Sis 		for (i = 1; i < sz; i++) {
297*d14d7d31Sis 			if (second) {
298*d14d7d31Sis 				if (*ib < u8_valid_min_2nd_byte[u8] ||
299*d14d7d31Sis 				    *ib > u8_valid_max_2nd_byte[u8]) {
300*d14d7d31Sis 					*errno = EILSEQ;
301*d14d7d31Sis 					ret_val = (size_t)-1;
302*d14d7d31Sis 					ib = oldib;
303*d14d7d31Sis 					goto TO_SB_ILLEGAL_CHAR_ERR;
304*d14d7d31Sis 				}
305*d14d7d31Sis 				second = B_FALSE;
306*d14d7d31Sis 			} else if (*ib < 0x80 || *ib > 0xbf) {
307*d14d7d31Sis 				*errno = EILSEQ;
308*d14d7d31Sis 				ret_val = (size_t)-1;
309*d14d7d31Sis 				ib = oldib;
310*d14d7d31Sis 				goto TO_SB_ILLEGAL_CHAR_ERR;
311*d14d7d31Sis 			}
312*d14d7d31Sis 			u8 = (u8 << 8) | ((uint32_t)*ib);
313*d14d7d31Sis 			ib++;
314*d14d7d31Sis 		}
315*d14d7d31Sis 
316*d14d7d31Sis 		i = l = 0;
317*d14d7d31Sis 		h = init_h;
318*d14d7d31Sis 		while (l <= h) {
319*d14d7d31Sis 			i = (l + h) / 2;
320*d14d7d31Sis 			if (to_sb_tbl[id][i].u8 == u8)
321*d14d7d31Sis 				break;
322*d14d7d31Sis 			else if (to_sb_tbl[id][i].u8 < u8)
323*d14d7d31Sis 				l = i + 1;
324*d14d7d31Sis 			else
325*d14d7d31Sis 				h = i - 1;
326*d14d7d31Sis 		}
327*d14d7d31Sis 
328*d14d7d31Sis 		if (to_sb_tbl[id][i].u8 == u8) {
329*d14d7d31Sis 			*ob++ = to_sb_tbl[id][i].sb;
330*d14d7d31Sis 		} else {
331*d14d7d31Sis 			/*
332*d14d7d31Sis 			 * If we don't find a character in the target
333*d14d7d31Sis 			 * codeset, we insert an ASCII replacement character
334*d14d7d31Sis 			 * at the output buffer and indicate such
335*d14d7d31Sis 			 * "non-identical" conversion by increasing the
336*d14d7d31Sis 			 * return value which is the non-identical conversion
337*d14d7d31Sis 			 * counter if bigger than 0.
338*d14d7d31Sis 			 */
339*d14d7d31Sis 			*ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
340*d14d7d31Sis 			ret_val++;
341*d14d7d31Sis 		}
342*d14d7d31Sis 	}
343*d14d7d31Sis 
344*d14d7d31Sis TO_SB_ILLEGAL_CHAR_ERR:
345*d14d7d31Sis 	*inbuf = (char *)ib;
346*d14d7d31Sis 	*inbytesleft = ibtail - ib;
347*d14d7d31Sis 	*outbuf = (char *)ob;
348*d14d7d31Sis 	*outbytesleft = obtail - ob;
349*d14d7d31Sis 
350*d14d7d31Sis 	return (ret_val);
351*d14d7d31Sis }
352*d14d7d31Sis 
353*d14d7d31Sis /*
354*d14d7d31Sis  * The following is the common kiconv function from single byte codesets to
355*d14d7d31Sis  * UTF-8.
356*d14d7d31Sis  */
357*d14d7d31Sis static size_t
kiconv_fr_sb(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)358*d14d7d31Sis kiconv_fr_sb(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
359*d14d7d31Sis 	size_t *outbytesleft, int *errno)
360*d14d7d31Sis {
361*d14d7d31Sis 	size_t ret_val;
362*d14d7d31Sis 	uchar_t *ib;
363*d14d7d31Sis 	uchar_t *ob;
364*d14d7d31Sis 	uchar_t *ibtail;
365*d14d7d31Sis 	uchar_t *obtail;
366*d14d7d31Sis 	size_t i;
367*d14d7d31Sis 	size_t k;
368*d14d7d31Sis 	int8_t sz;
369*d14d7d31Sis 
370*d14d7d31Sis 	/* Check on the kiconv code conversion descriptor validity. */
371*d14d7d31Sis 	if ((ulong_t)kcd > KICONV_MAX_MAPPING_TBLID) {
372*d14d7d31Sis 		*errno = EBADF;
373*d14d7d31Sis 		return ((size_t)-1);
374*d14d7d31Sis 	}
375*d14d7d31Sis 
376*d14d7d31Sis 	/*
377*d14d7d31Sis 	 * If this is a state reset request, there is nothing to do and so
378*d14d7d31Sis 	 * we just return.
379*d14d7d31Sis 	 */
380*d14d7d31Sis 	if (! inbuf || ! (*inbuf))
381*d14d7d31Sis 		return ((size_t)0);
382*d14d7d31Sis 
383*d14d7d31Sis 	ret_val = 0;
384*d14d7d31Sis 	ib = (uchar_t *)*inbuf;
385*d14d7d31Sis 	ob = (uchar_t *)*outbuf;
386*d14d7d31Sis 	ibtail = ib + *inbytesleft;
387*d14d7d31Sis 	obtail = ob + *outbytesleft;
388*d14d7d31Sis 
389*d14d7d31Sis 	while (ib < ibtail) {
390*d14d7d31Sis 		/*
391*d14d7d31Sis 		 * If this is a 7-bit ASCII character, we just copy over and
392*d14d7d31Sis 		 * that's all we need to do for this character.
393*d14d7d31Sis 		 */
394*d14d7d31Sis 		if (*ib < 0x80) {
395*d14d7d31Sis 			if (ob >= obtail) {
396*d14d7d31Sis 				*errno = E2BIG;
397*d14d7d31Sis 				ret_val = (size_t)-1;
398*d14d7d31Sis 				break;
399*d14d7d31Sis 			}
400*d14d7d31Sis 
401*d14d7d31Sis 			*ob++ = *ib++;
402*d14d7d31Sis 			continue;
403*d14d7d31Sis 		}
404*d14d7d31Sis 
405*d14d7d31Sis 		/*
406*d14d7d31Sis 		 * Otherwise, we get the corresponding UTF-8 character bytes
407*d14d7d31Sis 		 * from the mapping table and copy them over.
408*d14d7d31Sis 		 *
409*d14d7d31Sis 		 * We don't need to worry about if the UTF-8 character bytes
410*d14d7d31Sis 		 * at the mapping tables are valid or not since they are good.
411*d14d7d31Sis 		 */
412*d14d7d31Sis 		k = *ib - 0x80;
413*d14d7d31Sis 		sz = u8_number_of_bytes[to_u8_tbl[(ulong_t)kcd][k].u8[0]];
414*d14d7d31Sis 
415*d14d7d31Sis 		/*
416*d14d7d31Sis 		 * If sz <= 0, that means we don't have any assigned character
417*d14d7d31Sis 		 * at the code point, k + 0x80, of the single byte codeset
418*d14d7d31Sis 		 * which is the fromcode. In other words, the input buffer
419*d14d7d31Sis 		 * has an illegal character.
420*d14d7d31Sis 		 */
421*d14d7d31Sis 		if (sz <= 0) {
422*d14d7d31Sis 			*errno = EILSEQ;
423*d14d7d31Sis 			ret_val = (size_t)-1;
424*d14d7d31Sis 			break;
425*d14d7d31Sis 		}
426*d14d7d31Sis 
427*d14d7d31Sis 		if ((obtail - ob) < sz) {
428*d14d7d31Sis 			*errno = E2BIG;
429*d14d7d31Sis 			ret_val = (size_t)-1;
430*d14d7d31Sis 			break;
431*d14d7d31Sis 		}
432*d14d7d31Sis 
433*d14d7d31Sis 		for (i = 0; i < sz; i++)
434*d14d7d31Sis 			*ob++ = to_u8_tbl[(ulong_t)kcd][k].u8[i];
435*d14d7d31Sis 
436*d14d7d31Sis 		ib++;
437*d14d7d31Sis 	}
438*d14d7d31Sis 
439*d14d7d31Sis 	*inbuf = (char *)ib;
440*d14d7d31Sis 	*inbytesleft = ibtail - ib;
441*d14d7d31Sis 	*outbuf = (char *)ob;
442*d14d7d31Sis 	*outbytesleft = obtail - ob;
443*d14d7d31Sis 
444*d14d7d31Sis 	return (ret_val);
445*d14d7d31Sis }
446*d14d7d31Sis 
447*d14d7d31Sis /*
448*d14d7d31Sis  * The following is the common kiconvstr function from UTF-8 to single byte
449*d14d7d31Sis  * codesets.
450*d14d7d31Sis  */
451*d14d7d31Sis static size_t
kiconvstr_to_sb(size_t id,uchar_t * ib,size_t * inlen,uchar_t * ob,size_t * outlen,int flag,int * errno)452*d14d7d31Sis kiconvstr_to_sb(size_t id, uchar_t *ib, size_t *inlen, uchar_t *ob,
453*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
454*d14d7d31Sis {
455*d14d7d31Sis 	size_t ret_val;
456*d14d7d31Sis 	uchar_t *oldib;
457*d14d7d31Sis 	uchar_t *ibtail;
458*d14d7d31Sis 	uchar_t *obtail;
459*d14d7d31Sis 	uint32_t u8;
460*d14d7d31Sis 	size_t i;
461*d14d7d31Sis 	size_t l;
462*d14d7d31Sis 	size_t h;
463*d14d7d31Sis 	size_t init_h;
464*d14d7d31Sis 	int8_t sz;
465*d14d7d31Sis 	boolean_t second;
466*d14d7d31Sis 	boolean_t do_not_ignore_null;
467*d14d7d31Sis 
468*d14d7d31Sis 	/* Let's make sure that the table id is within the valid boundary. */
469*d14d7d31Sis 	if (id > KICONV_MAX_MAPPING_TBLID) {
470*d14d7d31Sis 		*errno = EBADF;
471*d14d7d31Sis 		return ((size_t)-1);
472*d14d7d31Sis 	}
473*d14d7d31Sis 
474*d14d7d31Sis 	ret_val = 0;
475*d14d7d31Sis 	ibtail = ib + *inlen;
476*d14d7d31Sis 	obtail = ob + *outlen;
477*d14d7d31Sis 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
478*d14d7d31Sis 	init_h = sizeof (to_sb_tbl[id]) / sizeof (kiconv_to_sb_tbl_comp_t) - 1;
479*d14d7d31Sis 
480*d14d7d31Sis 	/* Skip any UTF-8 signature BOM character in the beginning. */
481*d14d7d31Sis 	if ((ibtail - ib) >= 3 && *ib == 0xef && *(ib + 1) == 0xbb &&
482*d14d7d31Sis 	    *(ib + 2) == 0xbf)
483*d14d7d31Sis 			ib += 3;
484*d14d7d31Sis 
485*d14d7d31Sis 	/*
486*d14d7d31Sis 	 * Basically this is pretty much the same as kiconv_to_sb() except
487*d14d7d31Sis 	 * that we are now accepting two flag values and doing the processing
488*d14d7d31Sis 	 * accordingly.
489*d14d7d31Sis 	 */
490*d14d7d31Sis 	while (ib < ibtail) {
491*d14d7d31Sis 		sz = u8_number_of_bytes[*ib];
492*d14d7d31Sis 		if (sz <= 0) {
493*d14d7d31Sis 			if (flag & KICONV_REPLACE_INVALID) {
494*d14d7d31Sis 				if (ob >= obtail) {
495*d14d7d31Sis 					*errno = E2BIG;
496*d14d7d31Sis 					ret_val = (size_t)-1;
497*d14d7d31Sis 					break;
498*d14d7d31Sis 				}
499*d14d7d31Sis 
500*d14d7d31Sis 				ib++;
501*d14d7d31Sis 				goto STR_TO_SB_REPLACE_INVALID;
502*d14d7d31Sis 			}
503*d14d7d31Sis 
504*d14d7d31Sis 			*errno = EILSEQ;
505*d14d7d31Sis 			ret_val = (size_t)-1;
506*d14d7d31Sis 			break;
507*d14d7d31Sis 		}
508*d14d7d31Sis 
509*d14d7d31Sis 		if (*ib == '\0' && do_not_ignore_null)
510*d14d7d31Sis 			break;
511*d14d7d31Sis 
512*d14d7d31Sis 		if (ob >= obtail) {
513*d14d7d31Sis 			*errno = E2BIG;
514*d14d7d31Sis 			ret_val = (size_t)-1;
515*d14d7d31Sis 			break;
516*d14d7d31Sis 		}
517*d14d7d31Sis 
518*d14d7d31Sis 		if (sz == 1) {
519*d14d7d31Sis 			*ob++ = *ib++;
520*d14d7d31Sis 			continue;
521*d14d7d31Sis 		}
522*d14d7d31Sis 
523*d14d7d31Sis 		if ((ibtail - ib) < sz) {
524*d14d7d31Sis 			if (flag & KICONV_REPLACE_INVALID) {
525*d14d7d31Sis 				ib = ibtail;
526*d14d7d31Sis 				goto STR_TO_SB_REPLACE_INVALID;
527*d14d7d31Sis 			}
528*d14d7d31Sis 
529*d14d7d31Sis 			*errno = EINVAL;
530*d14d7d31Sis 			ret_val = (size_t)-1;
531*d14d7d31Sis 			break;
532*d14d7d31Sis 		}
533*d14d7d31Sis 
534*d14d7d31Sis 		oldib = ib;
535*d14d7d31Sis 		u8 = *ib++;
536*d14d7d31Sis 		second = B_TRUE;
537*d14d7d31Sis 		for (i = 1; i < sz; i++) {
538*d14d7d31Sis 			if (second) {
539*d14d7d31Sis 				if (*ib < u8_valid_min_2nd_byte[u8] ||
540*d14d7d31Sis 				    *ib > u8_valid_max_2nd_byte[u8]) {
541*d14d7d31Sis 					if (flag & KICONV_REPLACE_INVALID) {
542*d14d7d31Sis 						ib = oldib + sz;
543*d14d7d31Sis 						goto STR_TO_SB_REPLACE_INVALID;
544*d14d7d31Sis 					}
545*d14d7d31Sis 
546*d14d7d31Sis 					*errno = EILSEQ;
547*d14d7d31Sis 					ret_val = (size_t)-1;
548*d14d7d31Sis 					ib = oldib;
549*d14d7d31Sis 					goto STR_TO_SB_ILLEGAL_CHAR_ERR;
550*d14d7d31Sis 				}
551*d14d7d31Sis 				second = B_FALSE;
552*d14d7d31Sis 			} else if (*ib < 0x80 || *ib > 0xbf) {
553*d14d7d31Sis 				if (flag & KICONV_REPLACE_INVALID) {
554*d14d7d31Sis 					ib = oldib + sz;
555*d14d7d31Sis 					goto STR_TO_SB_REPLACE_INVALID;
556*d14d7d31Sis 				}
557*d14d7d31Sis 
558*d14d7d31Sis 				*errno = EILSEQ;
559*d14d7d31Sis 				ret_val = (size_t)-1;
560*d14d7d31Sis 				ib = oldib;
561*d14d7d31Sis 				goto STR_TO_SB_ILLEGAL_CHAR_ERR;
562*d14d7d31Sis 			}
563*d14d7d31Sis 			u8 = (u8 << 8) | ((uint32_t)*ib);
564*d14d7d31Sis 			ib++;
565*d14d7d31Sis 		}
566*d14d7d31Sis 
567*d14d7d31Sis 		i = l = 0;
568*d14d7d31Sis 		h = init_h;
569*d14d7d31Sis 		while (l <= h) {
570*d14d7d31Sis 			i = (l + h) / 2;
571*d14d7d31Sis 			if (to_sb_tbl[id][i].u8 == u8)
572*d14d7d31Sis 				break;
573*d14d7d31Sis 			else if (to_sb_tbl[id][i].u8 < u8)
574*d14d7d31Sis 				l = i + 1;
575*d14d7d31Sis 			else
576*d14d7d31Sis 				h = i - 1;
577*d14d7d31Sis 		}
578*d14d7d31Sis 
579*d14d7d31Sis 		if (to_sb_tbl[id][i].u8 == u8) {
580*d14d7d31Sis 			*ob++ = to_sb_tbl[id][i].sb;
581*d14d7d31Sis 		} else {
582*d14d7d31Sis STR_TO_SB_REPLACE_INVALID:
583*d14d7d31Sis 			*ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
584*d14d7d31Sis 			ret_val++;
585*d14d7d31Sis 		}
586*d14d7d31Sis 	}
587*d14d7d31Sis 
588*d14d7d31Sis STR_TO_SB_ILLEGAL_CHAR_ERR:
589*d14d7d31Sis 	*inlen = ibtail - ib;
590*d14d7d31Sis 	*outlen = obtail - ob;
591*d14d7d31Sis 
592*d14d7d31Sis 	return (ret_val);
593*d14d7d31Sis }
594*d14d7d31Sis 
595*d14d7d31Sis /*
596*d14d7d31Sis  * The following four functions are entry points recorded at the conv_list[]
597*d14d7d31Sis  * defined at below.
598*d14d7d31Sis  */
599*d14d7d31Sis static size_t
kiconvstr_to_1252(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)600*d14d7d31Sis kiconvstr_to_1252(char *inarray, size_t *inlen, char *outarray,
601*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
602*d14d7d31Sis {
603*d14d7d31Sis 	return (kiconvstr_to_sb(KICONV_TBLID_1252, (uchar_t *)inarray,
604*d14d7d31Sis 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
605*d14d7d31Sis }
606*d14d7d31Sis 
607*d14d7d31Sis static size_t
kiconvstr_to_1(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)608*d14d7d31Sis kiconvstr_to_1(char *inarray, size_t *inlen, char *outarray,
609*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
610*d14d7d31Sis {
611*d14d7d31Sis 	return (kiconvstr_to_sb(KICONV_TBLID_8859_1, (uchar_t *)inarray,
612*d14d7d31Sis 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
613*d14d7d31Sis }
614*d14d7d31Sis 
615*d14d7d31Sis static size_t
kiconvstr_to_15(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)616*d14d7d31Sis kiconvstr_to_15(char *inarray, size_t *inlen, char *outarray,
617*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
618*d14d7d31Sis {
619*d14d7d31Sis 	return (kiconvstr_to_sb(KICONV_TBLID_8859_15, (uchar_t *)inarray,
620*d14d7d31Sis 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
621*d14d7d31Sis }
622*d14d7d31Sis 
623*d14d7d31Sis static size_t
kiconvstr_to_850(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)624*d14d7d31Sis kiconvstr_to_850(char *inarray, size_t *inlen, char *outarray,
625*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
626*d14d7d31Sis {
627*d14d7d31Sis 	return (kiconvstr_to_sb(KICONV_TBLID_850, (uchar_t *)inarray,
628*d14d7d31Sis 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
629*d14d7d31Sis }
630*d14d7d31Sis 
631*d14d7d31Sis /*
632*d14d7d31Sis  * The following is the common kiconvstr function for conversions from
633*d14d7d31Sis  * single byte codesets to UTF-8.
634*d14d7d31Sis  */
635*d14d7d31Sis static size_t
kiconvstr_fr_sb(size_t id,uchar_t * ib,size_t * inlen,uchar_t * ob,size_t * outlen,int flag,int * errno)636*d14d7d31Sis kiconvstr_fr_sb(size_t id, uchar_t *ib, size_t *inlen, uchar_t *ob,
637*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
638*d14d7d31Sis {
639*d14d7d31Sis 	size_t ret_val;
640*d14d7d31Sis 	uchar_t *ibtail;
641*d14d7d31Sis 	uchar_t *obtail;
642*d14d7d31Sis 	size_t i;
643*d14d7d31Sis 	size_t k;
644*d14d7d31Sis 	int8_t sz;
645*d14d7d31Sis 	boolean_t do_not_ignore_null;
646*d14d7d31Sis 
647*d14d7d31Sis 	ret_val = 0;
648*d14d7d31Sis 	ibtail = ib + *inlen;
649*d14d7d31Sis 	obtail = ob + *outlen;
650*d14d7d31Sis 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
651*d14d7d31Sis 
652*d14d7d31Sis 	while (ib < ibtail) {
653*d14d7d31Sis 		if (*ib == '\0' && do_not_ignore_null)
654*d14d7d31Sis 			break;
655*d14d7d31Sis 
656*d14d7d31Sis 		if (*ib < 0x80) {
657*d14d7d31Sis 			if (ob >= obtail) {
658*d14d7d31Sis 				*errno = E2BIG;
659*d14d7d31Sis 				ret_val = (size_t)-1;
660*d14d7d31Sis 				break;
661*d14d7d31Sis 			}
662*d14d7d31Sis 			*ob++ = *ib++;
663*d14d7d31Sis 			continue;
664*d14d7d31Sis 		}
665*d14d7d31Sis 
666*d14d7d31Sis 		k = *ib - 0x80;
667*d14d7d31Sis 		sz = u8_number_of_bytes[to_u8_tbl[id][k].u8[0]];
668*d14d7d31Sis 
669*d14d7d31Sis 		if (sz <= 0) {
670*d14d7d31Sis 			if (flag & KICONV_REPLACE_INVALID) {
671*d14d7d31Sis 				if ((obtail - ob) < 3) {
672*d14d7d31Sis 					*errno = E2BIG;
673*d14d7d31Sis 					ret_val = (size_t)-1;
674*d14d7d31Sis 					break;
675*d14d7d31Sis 				}
676*d14d7d31Sis 
677*d14d7d31Sis 				/* Save KICONV_UTF8_REPLACEMENT_CHAR. */
678*d14d7d31Sis 				*ob++ = 0xef;
679*d14d7d31Sis 				*ob++ = 0xbf;
680*d14d7d31Sis 				*ob++ = 0xbd;
681*d14d7d31Sis 				ret_val++;
682*d14d7d31Sis 				ib++;
683*d14d7d31Sis 
684*d14d7d31Sis 				continue;
685*d14d7d31Sis 			}
686*d14d7d31Sis 
687*d14d7d31Sis 			*errno = EILSEQ;
688*d14d7d31Sis 			ret_val = (size_t)-1;
689*d14d7d31Sis 			break;
690*d14d7d31Sis 		}
691*d14d7d31Sis 
692*d14d7d31Sis 		if ((obtail - ob) < sz) {
693*d14d7d31Sis 			*errno = E2BIG;
694*d14d7d31Sis 			ret_val = (size_t)-1;
695*d14d7d31Sis 			break;
696*d14d7d31Sis 		}
697*d14d7d31Sis 
698*d14d7d31Sis 		for (i = 0; i < sz; i++)
699*d14d7d31Sis 			*ob++ = to_u8_tbl[id][k].u8[i];
700*d14d7d31Sis 
701*d14d7d31Sis 		ib++;
702*d14d7d31Sis 	}
703*d14d7d31Sis 
704*d14d7d31Sis 	*inlen = ibtail - ib;
705*d14d7d31Sis 	*outlen = obtail - ob;
706*d14d7d31Sis 
707*d14d7d31Sis 	return (ret_val);
708*d14d7d31Sis }
709*d14d7d31Sis 
710*d14d7d31Sis /*
711*d14d7d31Sis  * The following four functions are also entry points recorded at
712*d14d7d31Sis  * the conv_list[] at below.
713*d14d7d31Sis  */
714*d14d7d31Sis static size_t
kiconvstr_fr_1252(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)715*d14d7d31Sis kiconvstr_fr_1252(char *inarray, size_t *inlen, char *outarray,
716*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
717*d14d7d31Sis {
718*d14d7d31Sis 	return (kiconvstr_fr_sb(KICONV_TBLID_1252, (uchar_t *)inarray,
719*d14d7d31Sis 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
720*d14d7d31Sis }
721*d14d7d31Sis 
722*d14d7d31Sis static size_t
kiconvstr_fr_1(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)723*d14d7d31Sis kiconvstr_fr_1(char *inarray, size_t *inlen, char *outarray,
724*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
725*d14d7d31Sis {
726*d14d7d31Sis 	return (kiconvstr_fr_sb(KICONV_TBLID_8859_1, (uchar_t *)inarray,
727*d14d7d31Sis 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
728*d14d7d31Sis }
729*d14d7d31Sis 
730*d14d7d31Sis static size_t
kiconvstr_fr_15(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)731*d14d7d31Sis kiconvstr_fr_15(char *inarray, size_t *inlen, char *outarray,
732*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
733*d14d7d31Sis {
734*d14d7d31Sis 	return (kiconvstr_fr_sb(KICONV_TBLID_8859_15, (uchar_t *)inarray,
735*d14d7d31Sis 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
736*d14d7d31Sis }
737*d14d7d31Sis 
738*d14d7d31Sis static size_t
kiconvstr_fr_850(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)739*d14d7d31Sis kiconvstr_fr_850(char *inarray, size_t *inlen, char *outarray,
740*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
741*d14d7d31Sis {
742*d14d7d31Sis 	return (kiconvstr_fr_sb(KICONV_TBLID_850, (uchar_t *)inarray,
743*d14d7d31Sis 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
744*d14d7d31Sis }
745*d14d7d31Sis 
746*d14d7d31Sis /*
747*d14d7d31Sis  * The following static vector contains the normalized code names
748*d14d7d31Sis  * and their corresponding code ids. They are somewhat arbitrarily ordered
749*d14d7d31Sis  * based on marketing data available. A code id could repeat for aliases.
750*d14d7d31Sis  *
751*d14d7d31Sis  * The vector was generated by using a small utility program called
752*d14d7d31Sis  * codeidlistgen.c that you can find from PSARC/2007/173/materials/util/.
753*d14d7d31Sis  *
754*d14d7d31Sis  * The code ids must be portable, i.e., if needed, you can always generate
755*d14d7d31Sis  * the code_list[] again with different code ids. You'll also need to
756*d14d7d31Sis  * update the conv_list[] at below.
757*d14d7d31Sis  */
758*d14d7d31Sis #define	KICONV_MAX_CODEID_ENTRY		68
759*d14d7d31Sis #define	KICONV_MAX_CODEID		42
760*d14d7d31Sis 
761*d14d7d31Sis static kiconv_code_list_t code_list[KICONV_MAX_CODEID_ENTRY] = {
762*d14d7d31Sis 	{ "utf8", 0 },
763*d14d7d31Sis 	{ "cp1252", 1 },
764*d14d7d31Sis 	{ "1252", 1 },
765*d14d7d31Sis 	{ "iso88591", 2 },
766*d14d7d31Sis 	{ "iso885915", 3 },
767*d14d7d31Sis 	{ "cp850", 4 },
768*d14d7d31Sis 	{ "850", 4 },
769*d14d7d31Sis 	{ "eucjp", 5 },
770*d14d7d31Sis 	{ "eucjpms", 6 },
771*d14d7d31Sis 	{ "cp932", 7 },
772*d14d7d31Sis 	{ "932", 7 },
773*d14d7d31Sis 	{ "shiftjis", 8 },
774*d14d7d31Sis 	{ "pck", 8 },
775*d14d7d31Sis 	{ "sjis", 8 },
776*d14d7d31Sis 	{ "gb18030", 9 },
777*d14d7d31Sis 	{ "gbk", 10 },
778*d14d7d31Sis 	{ "cp936", 10 },
779*d14d7d31Sis 	{ "936", 10 },
780*d14d7d31Sis 	{ "euccn", 11 },
781*d14d7d31Sis 	{ "euckr", 12 },
782*d14d7d31Sis 	{ "unifiedhangul", 13 },
783*d14d7d31Sis 	{ "cp949", 13 },
784*d14d7d31Sis 	{ "949", 13 },
785*d14d7d31Sis 	{ "big5", 14 },
786*d14d7d31Sis 	{ "cp950", 14 },
787*d14d7d31Sis 	{ "950", 14 },
788*d14d7d31Sis 	{ "big5hkscs", 15 },
789*d14d7d31Sis 	{ "euctw", 16 },
790*d14d7d31Sis 	{ "cp950hkscs", 17 },
791*d14d7d31Sis 	{ "cp1250", 18 },
792*d14d7d31Sis 	{ "1250", 18 },
793*d14d7d31Sis 	{ "iso88592", 19 },
794*d14d7d31Sis 	{ "cp852", 20 },
795*d14d7d31Sis 	{ "852", 20 },
796*d14d7d31Sis 	{ "cp1251", 21 },
797*d14d7d31Sis 	{ "1251", 21 },
798*d14d7d31Sis 	{ "iso88595", 22 },
799*d14d7d31Sis 	{ "koi8r", 23 },
800*d14d7d31Sis 	{ "cp866", 24 },
801*d14d7d31Sis 	{ "866", 24 },
802*d14d7d31Sis 	{ "cp1253", 25 },
803*d14d7d31Sis 	{ "1253", 25 },
804*d14d7d31Sis 	{ "iso88597", 26 },
805*d14d7d31Sis 	{ "cp737", 27 },
806*d14d7d31Sis 	{ "737", 27 },
807*d14d7d31Sis 	{ "cp1254", 28 },
808*d14d7d31Sis 	{ "1254", 28 },
809*d14d7d31Sis 	{ "iso88599", 29 },
810*d14d7d31Sis 	{ "cp857", 30 },
811*d14d7d31Sis 	{ "857", 30 },
812*d14d7d31Sis 	{ "cp1256", 31 },
813*d14d7d31Sis 	{ "1256", 31 },
814*d14d7d31Sis 	{ "iso88596", 32 },
815*d14d7d31Sis 	{ "cp720", 33 },
816*d14d7d31Sis 	{ "720", 33 },
817*d14d7d31Sis 	{ "cp1255", 34 },
818*d14d7d31Sis 	{ "1255", 34 },
819*d14d7d31Sis 	{ "iso88598", 35 },
820*d14d7d31Sis 	{ "cp862", 36 },
821*d14d7d31Sis 	{ "862", 36 },
822*d14d7d31Sis 	{ "cp1257", 37 },
823*d14d7d31Sis 	{ "1257", 37 },
824*d14d7d31Sis 	{ "iso885913", 38 },
825*d14d7d31Sis 	{ "iso885910", 39 },
826*d14d7d31Sis 	{ "iso885911", 40 },
827*d14d7d31Sis 	{ "tis620", 40 },
828*d14d7d31Sis 	{ "iso88593", 41 },
829*d14d7d31Sis 	{ "iso88594", 42 },
830*d14d7d31Sis };
831*d14d7d31Sis 
832*d14d7d31Sis /*
833*d14d7d31Sis  * The list of code conversions supported are grouped together per
834*d14d7d31Sis  * module which will be loaded as needed.
835*d14d7d31Sis  */
836*d14d7d31Sis #define	KICONV_MAX_CONVERSIONS		84
837*d14d7d31Sis 
838*d14d7d31Sis static kiconv_conv_list_t conv_list[KICONV_MAX_CONVERSIONS] = {
839*d14d7d31Sis 	/* Embedded code conversions: */
840*d14d7d31Sis 	{
841*d14d7d31Sis 		1, 0, KICONV_EMBEDDED,
842*d14d7d31Sis 		open_to_1252, kiconv_to_sb, close_to_sb, kiconvstr_to_1252
843*d14d7d31Sis 	},
844*d14d7d31Sis 	{
845*d14d7d31Sis 		0, 1, KICONV_EMBEDDED,
846*d14d7d31Sis 		open_fr_1252, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_1252
847*d14d7d31Sis 	},
848*d14d7d31Sis 	{
849*d14d7d31Sis 		2, 0, KICONV_EMBEDDED,
850*d14d7d31Sis 		open_to_88591, kiconv_to_sb, close_to_sb, kiconvstr_to_1
851*d14d7d31Sis 	},
852*d14d7d31Sis 	{
853*d14d7d31Sis 		0, 2, KICONV_EMBEDDED,
854*d14d7d31Sis 		open_fr_88591, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_1
855*d14d7d31Sis 	},
856*d14d7d31Sis 	{
857*d14d7d31Sis 		3, 0, KICONV_EMBEDDED,
858*d14d7d31Sis 		open_to_885915, kiconv_to_sb, close_to_sb, kiconvstr_to_15
859*d14d7d31Sis 	},
860*d14d7d31Sis 	{
861*d14d7d31Sis 		0, 3, KICONV_EMBEDDED,
862*d14d7d31Sis 		open_fr_885915, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_15
863*d14d7d31Sis 	},
864*d14d7d31Sis 	{
865*d14d7d31Sis 		4, 0, KICONV_EMBEDDED,
866*d14d7d31Sis 		open_to_850, kiconv_to_sb, close_to_sb, kiconvstr_to_850
867*d14d7d31Sis 	},
868*d14d7d31Sis 	{
869*d14d7d31Sis 		0, 4, KICONV_EMBEDDED,
870*d14d7d31Sis 		open_fr_850, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_850
871*d14d7d31Sis 	},
872*d14d7d31Sis 
873*d14d7d31Sis 	/* kiconv_ja module conversions: */
874*d14d7d31Sis 	{ 0, 5, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
875*d14d7d31Sis 	{ 5, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
876*d14d7d31Sis 	{ 0, 6, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
877*d14d7d31Sis 	{ 6, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
878*d14d7d31Sis 	{ 0, 7, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
879*d14d7d31Sis 	{ 7, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
880*d14d7d31Sis 	{ 0, 8, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
881*d14d7d31Sis 	{ 8, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
882*d14d7d31Sis 
883*d14d7d31Sis 	/* kiconv_sc module conversions: */
884*d14d7d31Sis 	{ 0, 9, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
885*d14d7d31Sis 	{ 9, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
886*d14d7d31Sis 	{ 0, 10, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
887*d14d7d31Sis 	{ 10, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
888*d14d7d31Sis 	{ 0, 11, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
889*d14d7d31Sis 	{ 11, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
890*d14d7d31Sis 
891*d14d7d31Sis 	/* kiconv_ko module conversions: */
892*d14d7d31Sis 	{ 0, 12, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
893*d14d7d31Sis 	{ 12, 0, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
894*d14d7d31Sis 	{ 0, 13, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
895*d14d7d31Sis 	{ 13, 0, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
896*d14d7d31Sis 
897*d14d7d31Sis 	/* kiconv_tc module conversions: */
898*d14d7d31Sis 	{ 0, 14, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
899*d14d7d31Sis 	{ 14, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
900*d14d7d31Sis 	{ 0, 15, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
901*d14d7d31Sis 	{ 15, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
902*d14d7d31Sis 	{ 0, 16, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
903*d14d7d31Sis 	{ 16, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
904*d14d7d31Sis 	{ 0, 17, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
905*d14d7d31Sis 	{ 17, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
906*d14d7d31Sis 
907*d14d7d31Sis 	/* kiconv_emea module conversions: */
908*d14d7d31Sis 	{ 0, 18, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
909*d14d7d31Sis 	{ 18, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
910*d14d7d31Sis 	{ 0, 19, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
911*d14d7d31Sis 	{ 19, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
912*d14d7d31Sis 	{ 0, 20, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
913*d14d7d31Sis 	{ 20, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
914*d14d7d31Sis 	{ 0, 21, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
915*d14d7d31Sis 	{ 21, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
916*d14d7d31Sis 	{ 0, 22, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
917*d14d7d31Sis 	{ 22, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
918*d14d7d31Sis 	{ 0, 23, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
919*d14d7d31Sis 	{ 23, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
920*d14d7d31Sis 	{ 0, 24, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
921*d14d7d31Sis 	{ 24, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
922*d14d7d31Sis 	{ 0, 25, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
923*d14d7d31Sis 	{ 25, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
924*d14d7d31Sis 	{ 0, 26, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
925*d14d7d31Sis 	{ 26, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
926*d14d7d31Sis 	{ 0, 27, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
927*d14d7d31Sis 	{ 27, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
928*d14d7d31Sis 	{ 0, 28, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
929*d14d7d31Sis 	{ 28, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
930*d14d7d31Sis 	{ 0, 29, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
931*d14d7d31Sis 	{ 29, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
932*d14d7d31Sis 	{ 0, 30, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
933*d14d7d31Sis 	{ 30, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
934*d14d7d31Sis 	{ 0, 31, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
935*d14d7d31Sis 	{ 31, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
936*d14d7d31Sis 	{ 0, 32, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
937*d14d7d31Sis 	{ 32, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
938*d14d7d31Sis 	{ 0, 33, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
939*d14d7d31Sis 	{ 33, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
940*d14d7d31Sis 	{ 0, 34, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
941*d14d7d31Sis 	{ 34, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
942*d14d7d31Sis 	{ 0, 35, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
943*d14d7d31Sis 	{ 35, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
944*d14d7d31Sis 	{ 0, 36, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
945*d14d7d31Sis 	{ 36, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
946*d14d7d31Sis 	{ 0, 37, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
947*d14d7d31Sis 	{ 37, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
948*d14d7d31Sis 	{ 0, 38, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
949*d14d7d31Sis 	{ 38, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
950*d14d7d31Sis 	{ 0, 39, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
951*d14d7d31Sis 	{ 39, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
952*d14d7d31Sis 	{ 0, 40, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
953*d14d7d31Sis 	{ 40, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
954*d14d7d31Sis 	{ 0, 41, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
955*d14d7d31Sis 	{ 41, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
956*d14d7d31Sis 	{ 0, 42, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
957*d14d7d31Sis 	{ 42, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
958*d14d7d31Sis };
959*d14d7d31Sis 
960*d14d7d31Sis /* The list of implemeted and supported modules. */
961*d14d7d31Sis static kiconv_mod_list_t module_list[KICONV_MAX_MODULE_ID + 1] = {
962*d14d7d31Sis 	"kiconv_embedded", 0,
963*d14d7d31Sis 	"kiconv_ja", 0,
964*d14d7d31Sis 	"kiconv_sc", 0,
965*d14d7d31Sis 	"kiconv_ko", 0,
966*d14d7d31Sis 	"kiconv_tc", 0,
967*d14d7d31Sis 	"kiconv_emea", 0,
968*d14d7d31Sis };
969*d14d7d31Sis 
970*d14d7d31Sis /*
971*d14d7d31Sis  * We use conv_list_lock to restrict data access of both conv_list[] and
972*d14d7d31Sis  * module_list[] as they are tightly coupled critical sections that need to be
973*d14d7d31Sis  * dealt together as a unit.
974*d14d7d31Sis  */
975*d14d7d31Sis static kmutex_t conv_list_lock;
976*d14d7d31Sis 
977*d14d7d31Sis void
kiconv_init()978*d14d7d31Sis kiconv_init()
979*d14d7d31Sis {
980*d14d7d31Sis 	mutex_init(&conv_list_lock, NULL, MUTEX_DEFAULT, NULL);
981*d14d7d31Sis }
982*d14d7d31Sis 
983*d14d7d31Sis /*
984*d14d7d31Sis  * The following is used to check on whether a kiconv module is being
985*d14d7d31Sis  * used or not at the _fini() of the module.
986*d14d7d31Sis  */
987*d14d7d31Sis size_t
kiconv_module_ref_count(size_t mid)988*d14d7d31Sis kiconv_module_ref_count(size_t mid)
989*d14d7d31Sis {
990*d14d7d31Sis 	int count;
991*d14d7d31Sis 
992*d14d7d31Sis 	if (mid <= 0 || mid > KICONV_MAX_MODULE_ID)
993*d14d7d31Sis 		return (0);
994*d14d7d31Sis 
995*d14d7d31Sis 	mutex_enter(&conv_list_lock);
996*d14d7d31Sis 
997*d14d7d31Sis 	count = module_list[mid].refcount;
998*d14d7d31Sis 
999*d14d7d31Sis 	mutex_exit(&conv_list_lock);
1000*d14d7d31Sis 
1001*d14d7d31Sis 	return (count);
1002*d14d7d31Sis }
1003*d14d7d31Sis 
1004*d14d7d31Sis /*
1005*d14d7d31Sis  * This function "normalizes" a given code name, n, by not including skippable
1006*d14d7d31Sis  * characters and folding uppercase letters to corresponding lowercase letters.
1007*d14d7d31Sis  * We only fold 7-bit ASCII uppercase characters since the names should be in
1008*d14d7d31Sis  * Portable Character Set of 7-bit ASCII.
1009*d14d7d31Sis  *
1010*d14d7d31Sis  * By doing this, we will be able to maximize the code name matches.
1011*d14d7d31Sis  */
1012*d14d7d31Sis static size_t
normalize_codename(const char * n)1013*d14d7d31Sis normalize_codename(const char *n)
1014*d14d7d31Sis {
1015*d14d7d31Sis 	char s[KICONV_MAX_CODENAME_LEN + 1];
1016*d14d7d31Sis 	size_t i;
1017*d14d7d31Sis 
1018*d14d7d31Sis 	if (n == NULL)
1019*d14d7d31Sis 		return ((size_t)-1);
1020*d14d7d31Sis 
1021*d14d7d31Sis 	for (i = 0; *n; n++) {
1022*d14d7d31Sis 		if (KICONV_SKIPPABLE_CHAR(*n))
1023*d14d7d31Sis 			continue;
1024*d14d7d31Sis 
1025*d14d7d31Sis 		/* If unreasonably lengthy, we don't support such names. */
1026*d14d7d31Sis 		if (i >= KICONV_MAX_CODENAME_LEN)
1027*d14d7d31Sis 			return ((size_t)-1);
1028*d14d7d31Sis 
1029*d14d7d31Sis 		s[i++] = (*n >= 'A' && *n <= 'Z') ? *n - 'A' + 'a' : *n;
1030*d14d7d31Sis 	}
1031*d14d7d31Sis 	s[i] = '\0';
1032*d14d7d31Sis 
1033*d14d7d31Sis 	/* With the normalized name, find the corresponding codeset id. */
1034*d14d7d31Sis 	for (i = 0; i < KICONV_MAX_CODEID_ENTRY; i++)
1035*d14d7d31Sis 		if (strcmp(s, code_list[i].name) == 0)
1036*d14d7d31Sis 			return (code_list[i].id);
1037*d14d7d31Sis 
1038*d14d7d31Sis 	/*
1039*d14d7d31Sis 	 * In future time, we will also have a few more lines of code at below
1040*d14d7d31Sis 	 * that will deal with other user-created modules' fromcodes and
1041*d14d7d31Sis 	 * tocodes including aliases in a different vector. For now, we don't
1042*d14d7d31Sis 	 * support that but only the known names to this project at this time.
1043*d14d7d31Sis 	 */
1044*d14d7d31Sis 
1045*d14d7d31Sis 	return ((size_t)-1);
1046*d14d7d31Sis }
1047*d14d7d31Sis 
1048*d14d7d31Sis /*
1049*d14d7d31Sis  * This function called from mod_install() registers supplied code
1050*d14d7d31Sis  * conversions. At this point, it does not honor aliases and hence does not
1051*d14d7d31Sis  * use nowait data field from the kiconv module info data structure.
1052*d14d7d31Sis  */
1053*d14d7d31Sis int
kiconv_register_module(kiconv_module_info_t * info)1054*d14d7d31Sis kiconv_register_module(kiconv_module_info_t *info)
1055*d14d7d31Sis {
1056*d14d7d31Sis 	size_t mid;
1057*d14d7d31Sis 	size_t fid;
1058*d14d7d31Sis 	size_t tid;
1059*d14d7d31Sis 	size_t i;
1060*d14d7d31Sis 	size_t j;
1061*d14d7d31Sis 	kiconv_ops_t *op;
1062*d14d7d31Sis 
1063*d14d7d31Sis 	/* Validate the given kiconv module info. */
1064*d14d7d31Sis 	if (info == NULL || info->module_name == NULL ||
1065*d14d7d31Sis 	    info->kiconv_num_convs == 0 || info->kiconv_ops_tbl == NULL)
1066*d14d7d31Sis 		return (EINVAL);
1067*d14d7d31Sis 
1068*d14d7d31Sis 	/*
1069*d14d7d31Sis 	 * Check if this is one of the known modules. At this point,
1070*d14d7d31Sis 	 * we do not allow user-defined kiconv modules and that'd be for
1071*d14d7d31Sis 	 * a future project.
1072*d14d7d31Sis 	 */
1073*d14d7d31Sis 	for (mid = 1; mid <= KICONV_MAX_MODULE_ID; mid++)
1074*d14d7d31Sis 		if (strcmp(module_list[mid].name, info->module_name) == 0)
1075*d14d7d31Sis 			break;
1076*d14d7d31Sis 	if (mid > KICONV_MAX_MODULE_ID)
1077*d14d7d31Sis 		return (EINVAL);
1078*d14d7d31Sis 
1079*d14d7d31Sis 	/* Let's register the conversions supplied. */
1080*d14d7d31Sis 	mutex_enter(&conv_list_lock);
1081*d14d7d31Sis 
1082*d14d7d31Sis 	/*
1083*d14d7d31Sis 	 * This is very unlikely situation but by any chance we don't want to
1084*d14d7d31Sis 	 * register a module that is already in.
1085*d14d7d31Sis 	 */
1086*d14d7d31Sis 	if (module_list[mid].refcount > 0) {
1087*d14d7d31Sis 		mutex_exit(&conv_list_lock);
1088*d14d7d31Sis 		return (EAGAIN);
1089*d14d7d31Sis 	}
1090*d14d7d31Sis 
1091*d14d7d31Sis 	for (i = 0; i < info->kiconv_num_convs; i++) {
1092*d14d7d31Sis 		op = &(info->kiconv_ops_tbl[i]);
1093*d14d7d31Sis 
1094*d14d7d31Sis 		fid = normalize_codename(op->fromcode);
1095*d14d7d31Sis 		tid = normalize_codename(op->tocode);
1096*d14d7d31Sis 
1097*d14d7d31Sis 		/*
1098*d14d7d31Sis 		 * If we find anything wrong in this particular conversion,
1099*d14d7d31Sis 		 * we skip this one and continue to the next one. This include
1100*d14d7d31Sis 		 * a case where there is a conversion already being assigned
1101*d14d7d31Sis 		 * into the conv_list[] somehow, i.e., new one never kicks out
1102*d14d7d31Sis 		 * old one.
1103*d14d7d31Sis 		 */
1104*d14d7d31Sis 		if (op->kiconv_open == NULL || op->kiconv == NULL ||
1105*d14d7d31Sis 		    op->kiconv_close == NULL || op->kiconvstr == NULL)
1106*d14d7d31Sis 			continue;
1107*d14d7d31Sis 
1108*d14d7d31Sis 		for (j = 0; j < KICONV_MAX_CONVERSIONS; j++) {
1109*d14d7d31Sis 			if (conv_list[j].mid == mid &&
1110*d14d7d31Sis 			    conv_list[j].fid == fid &&
1111*d14d7d31Sis 			    conv_list[j].tid == tid) {
1112*d14d7d31Sis 				if (conv_list[j].open == NULL) {
1113*d14d7d31Sis 					conv_list[j].open = op->kiconv_open;
1114*d14d7d31Sis 					conv_list[j].kiconv = op->kiconv;
1115*d14d7d31Sis 					conv_list[j].close = op->kiconv_close;
1116*d14d7d31Sis 					conv_list[j].kiconvstr = op->kiconvstr;
1117*d14d7d31Sis 				}
1118*d14d7d31Sis 				break;
1119*d14d7d31Sis 			}
1120*d14d7d31Sis 		}
1121*d14d7d31Sis 	}
1122*d14d7d31Sis 
1123*d14d7d31Sis 	mutex_exit(&conv_list_lock);
1124*d14d7d31Sis 
1125*d14d7d31Sis 	return (0);
1126*d14d7d31Sis }
1127*d14d7d31Sis 
1128*d14d7d31Sis /*
1129*d14d7d31Sis  * The following function called during mod_remove() will try to unregister,
1130*d14d7d31Sis  * i.e., clear up conversion function pointers, from the conv_list[] if it
1131*d14d7d31Sis  * can. If there is any code conversions being used, then, the function will
1132*d14d7d31Sis  * just return EBUSY indicating that the module cannot be unloaded.
1133*d14d7d31Sis  */
1134*d14d7d31Sis int
kiconv_unregister_module(kiconv_module_info_t * info)1135*d14d7d31Sis kiconv_unregister_module(kiconv_module_info_t *info)
1136*d14d7d31Sis {
1137*d14d7d31Sis 	size_t mid;
1138*d14d7d31Sis 	size_t i;
1139*d14d7d31Sis 
1140*d14d7d31Sis 	if (info == NULL || info->module_name == NULL ||
1141*d14d7d31Sis 	    info->kiconv_num_convs == 0 || info->kiconv_ops_tbl == NULL)
1142*d14d7d31Sis 		return (EINVAL);
1143*d14d7d31Sis 
1144*d14d7d31Sis 	for (mid = 1; mid <= KICONV_MAX_MODULE_ID; mid++)
1145*d14d7d31Sis 		if (strcmp(module_list[mid].name, info->module_name) == 0)
1146*d14d7d31Sis 			break;
1147*d14d7d31Sis 	if (mid > KICONV_MAX_MODULE_ID)
1148*d14d7d31Sis 		return (EINVAL);
1149*d14d7d31Sis 
1150*d14d7d31Sis 	mutex_enter(&conv_list_lock);
1151*d14d7d31Sis 
1152*d14d7d31Sis 	/*
1153*d14d7d31Sis 	 * If any of the conversions are used, then, this module canont be
1154*d14d7d31Sis 	 * unloaded.
1155*d14d7d31Sis 	 */
1156*d14d7d31Sis 	if (module_list[mid].refcount > 0) {
1157*d14d7d31Sis 		mutex_exit(&conv_list_lock);
1158*d14d7d31Sis 		return (EBUSY);
1159*d14d7d31Sis 	}
1160*d14d7d31Sis 
1161*d14d7d31Sis 	/*
1162*d14d7d31Sis 	 * Otherwise, we unregister all conversions from this module
1163*d14d7d31Sis 	 * and be ready for the unloading. At this point, we only care about
1164*d14d7d31Sis 	 * the conversions we know about with the module.
1165*d14d7d31Sis 	 */
1166*d14d7d31Sis 	for (i = 0; i < KICONV_MAX_CONVERSIONS; i++) {
1167*d14d7d31Sis 		if (conv_list[i].mid == mid) {
1168*d14d7d31Sis 			conv_list[i].open = NULL;
1169*d14d7d31Sis 			conv_list[i].kiconv = NULL;
1170*d14d7d31Sis 			conv_list[i].close = NULL;
1171*d14d7d31Sis 			conv_list[i].kiconvstr = NULL;
1172*d14d7d31Sis 		}
1173*d14d7d31Sis 	}
1174*d14d7d31Sis 
1175*d14d7d31Sis 	mutex_exit(&conv_list_lock);
1176*d14d7d31Sis 
1177*d14d7d31Sis 	return (0);
1178*d14d7d31Sis }
1179*d14d7d31Sis 
1180*d14d7d31Sis /*
1181*d14d7d31Sis  * The following function check if asked code conversion is available
1182*d14d7d31Sis  * and if necessary, load the corresponding kiconv module that contains
1183*d14d7d31Sis  * the conversion (and others).
1184*d14d7d31Sis  */
1185*d14d7d31Sis static kiconv_t
check_and_load_conversions(const char * tocode,const char * fromcode)1186*d14d7d31Sis check_and_load_conversions(const char *tocode, const char *fromcode)
1187*d14d7d31Sis {
1188*d14d7d31Sis 	kiconv_t kcd;
1189*d14d7d31Sis 	size_t tid;
1190*d14d7d31Sis 	size_t fid;
1191*d14d7d31Sis 	size_t mid;
1192*d14d7d31Sis 	size_t i;
1193*d14d7d31Sis 
1194*d14d7d31Sis 	/* Normalize the given names and find the corresponding code ids. */
1195*d14d7d31Sis 	tid = normalize_codename(tocode);
1196*d14d7d31Sis 	if (tid == (size_t)-1)
1197*d14d7d31Sis 		return ((kiconv_t)-1);
1198*d14d7d31Sis 
1199*d14d7d31Sis 	fid = normalize_codename(fromcode);
1200*d14d7d31Sis 	if (fid == (size_t)-1)
1201*d14d7d31Sis 		return ((kiconv_t)-1);
1202*d14d7d31Sis 
1203*d14d7d31Sis 	/*
1204*d14d7d31Sis 	 * Search the conversion.
1205*d14d7d31Sis 	 *
1206*d14d7d31Sis 	 * If the conversion isn't supported, just return -1.
1207*d14d7d31Sis 	 * If the conversion is supported but there is no corresponding
1208*d14d7d31Sis 	 * module loaded, try to load it and if successful, return
1209*d14d7d31Sis 	 * a kiconv conversion descriptor memory block.
1210*d14d7d31Sis 	 *
1211*d14d7d31Sis 	 * We maintain a reference counter of uint_t for each module.
1212*d14d7d31Sis 	 */
1213*d14d7d31Sis 	mutex_enter(&conv_list_lock);
1214*d14d7d31Sis 
1215*d14d7d31Sis 	for (i = 0; i < KICONV_MAX_CONVERSIONS; i++)
1216*d14d7d31Sis 		if (conv_list[i].tid == tid && conv_list[i].fid == fid)
1217*d14d7d31Sis 			break;
1218*d14d7d31Sis 	if (i >= KICONV_MAX_CONVERSIONS) {
1219*d14d7d31Sis 		mutex_exit(&conv_list_lock);
1220*d14d7d31Sis 		return ((kiconv_t)-1);
1221*d14d7d31Sis 	}
1222*d14d7d31Sis 
1223*d14d7d31Sis 	mid = conv_list[i].mid;
1224*d14d7d31Sis 
1225*d14d7d31Sis 	if (conv_list[i].open == NULL) {
1226*d14d7d31Sis 		mutex_exit(&conv_list_lock);
1227*d14d7d31Sis 
1228*d14d7d31Sis 		if (modload("kiconv", module_list[mid].name) < 0)
1229*d14d7d31Sis 			return ((kiconv_t)-1);
1230*d14d7d31Sis 
1231*d14d7d31Sis 		/*
1232*d14d7d31Sis 		 * Let's double check if something happened right after
1233*d14d7d31Sis 		 * the modload and/or if the module really has the conversion.
1234*d14d7d31Sis 		 */
1235*d14d7d31Sis 		mutex_enter(&conv_list_lock);
1236*d14d7d31Sis 
1237*d14d7d31Sis 		if (conv_list[i].open == NULL) {
1238*d14d7d31Sis 			mutex_exit(&conv_list_lock);
1239*d14d7d31Sis 			return ((kiconv_t)-1);
1240*d14d7d31Sis 		}
1241*d14d7d31Sis 	}
1242*d14d7d31Sis 
1243*d14d7d31Sis 	/*
1244*d14d7d31Sis 	 * If we got the conversion, we will use the conversion function
1245*d14d7d31Sis 	 * in the module and so let's increase the module's refcounter
1246*d14d7d31Sis 	 * so that the module won't be kicked out. (To be more exact and
1247*d14d7d31Sis 	 * specific, the "refcount" is thus the reference counter of
1248*d14d7d31Sis 	 * the module functions being used.)
1249*d14d7d31Sis 	 */
1250*d14d7d31Sis 	if (module_list[mid].refcount < UINT_MAX)
1251*d14d7d31Sis 		module_list[mid].refcount++;
1252*d14d7d31Sis 
1253*d14d7d31Sis 	mutex_exit(&conv_list_lock);
1254*d14d7d31Sis 
1255*d14d7d31Sis 	kcd = (kiconv_t)kmem_alloc(sizeof (kiconv_data_t), KM_SLEEP);
1256*d14d7d31Sis 	kcd->handle = (void *)-1;
1257*d14d7d31Sis 	kcd->id = i;
1258*d14d7d31Sis 
1259*d14d7d31Sis 	return (kcd);
1260*d14d7d31Sis }
1261*d14d7d31Sis 
1262*d14d7d31Sis /*
1263*d14d7d31Sis  * The following are the four "Committed" interfaces.
1264*d14d7d31Sis  */
1265*d14d7d31Sis kiconv_t
kiconv_open(const char * tocode,const char * fromcode)1266*d14d7d31Sis kiconv_open(const char *tocode, const char *fromcode)
1267*d14d7d31Sis {
1268*d14d7d31Sis 	kiconv_t kcd;
1269*d14d7d31Sis 	size_t mid;
1270*d14d7d31Sis 
1271*d14d7d31Sis 	kcd = check_and_load_conversions(tocode, fromcode);
1272*d14d7d31Sis 	if (kcd == (kiconv_t)-1)
1273*d14d7d31Sis 		return ((kiconv_t)-1);
1274*d14d7d31Sis 
1275*d14d7d31Sis 	kcd->handle = (conv_list[kcd->id].open)();
1276*d14d7d31Sis 	if (kcd->handle == (void *)-1) {
1277*d14d7d31Sis 		/*
1278*d14d7d31Sis 		 * If the conversion couldn't be opened for some reason,
1279*d14d7d31Sis 		 * then, we unallocate the kcd and, more importantly, before
1280*d14d7d31Sis 		 * that, we also decrease the module reference counter.
1281*d14d7d31Sis 		 */
1282*d14d7d31Sis 		mid = conv_list[kcd->id].mid;
1283*d14d7d31Sis 
1284*d14d7d31Sis 		mutex_enter(&conv_list_lock);
1285*d14d7d31Sis 
1286*d14d7d31Sis 		if (module_list[mid].refcount > 0)
1287*d14d7d31Sis 			module_list[mid].refcount--;
1288*d14d7d31Sis 
1289*d14d7d31Sis 		mutex_exit(&conv_list_lock);
1290*d14d7d31Sis 
1291*d14d7d31Sis 		kmem_free((void *)kcd, sizeof (kiconv_data_t));
1292*d14d7d31Sis 
1293*d14d7d31Sis 		return ((kiconv_t)-1);
1294*d14d7d31Sis 	}
1295*d14d7d31Sis 
1296*d14d7d31Sis 	return (kcd);
1297*d14d7d31Sis }
1298*d14d7d31Sis 
1299*d14d7d31Sis size_t
kiconv(kiconv_t kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)1300*d14d7d31Sis kiconv(kiconv_t kcd, char **inbuf, size_t *inbytesleft,
1301*d14d7d31Sis 	char **outbuf, size_t *outbytesleft, int *errno)
1302*d14d7d31Sis {
1303*d14d7d31Sis 	/* Do some minimum checking on the kiconv conversion descriptor. */
1304*d14d7d31Sis 	if (! kcd || kcd == (kiconv_t)-1 || conv_list[kcd->id].kiconv == NULL) {
1305*d14d7d31Sis 		*errno = EBADF;
1306*d14d7d31Sis 		return ((size_t)-1);
1307*d14d7d31Sis 	}
1308*d14d7d31Sis 
1309*d14d7d31Sis 	return ((conv_list[kcd->id].kiconv)(kcd->handle, inbuf, inbytesleft,
1310*d14d7d31Sis 	    outbuf, outbytesleft, errno));
1311*d14d7d31Sis }
1312*d14d7d31Sis 
1313*d14d7d31Sis int
kiconv_close(kiconv_t kcd)1314*d14d7d31Sis kiconv_close(kiconv_t kcd)
1315*d14d7d31Sis {
1316*d14d7d31Sis 	int ret;
1317*d14d7d31Sis 	size_t mid;
1318*d14d7d31Sis 
1319*d14d7d31Sis 	if (! kcd || kcd == (kiconv_t)-1 || conv_list[kcd->id].close == NULL)
1320*d14d7d31Sis 		return (EBADF);
1321*d14d7d31Sis 
1322*d14d7d31Sis 	mid = conv_list[kcd->id].mid;
1323*d14d7d31Sis 
1324*d14d7d31Sis 	ret = (conv_list[kcd->id].close)(kcd->handle);
1325*d14d7d31Sis 
1326*d14d7d31Sis 	kmem_free((void *)kcd, sizeof (kiconv_data_t));
1327*d14d7d31Sis 
1328*d14d7d31Sis 	mutex_enter(&conv_list_lock);
1329*d14d7d31Sis 
1330*d14d7d31Sis 	/*
1331*d14d7d31Sis 	 * While we maintain reference conter for each module, once loaded,
1332*d14d7d31Sis 	 * we don't modunload from kiconv functions even if the counter
1333*d14d7d31Sis 	 * reaches back to zero.
1334*d14d7d31Sis 	 */
1335*d14d7d31Sis 	if (module_list[mid].refcount > 0)
1336*d14d7d31Sis 		module_list[mid].refcount--;
1337*d14d7d31Sis 
1338*d14d7d31Sis 	mutex_exit(&conv_list_lock);
1339*d14d7d31Sis 
1340*d14d7d31Sis 	return (ret);
1341*d14d7d31Sis }
1342*d14d7d31Sis 
1343*d14d7d31Sis size_t
kiconvstr(const char * tocode,const char * fromcode,char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)1344*d14d7d31Sis kiconvstr(const char *tocode, const char *fromcode, char *inarray,
1345*d14d7d31Sis 	size_t *inlen, char *outarray, size_t *outlen, int flag, int *errno)
1346*d14d7d31Sis {
1347*d14d7d31Sis 	kiconv_t kcd;
1348*d14d7d31Sis 	size_t ret;
1349*d14d7d31Sis 	size_t mid;
1350*d14d7d31Sis 
1351*d14d7d31Sis 	kcd = check_and_load_conversions(tocode, fromcode);
1352*d14d7d31Sis 	if (kcd == (kiconv_t)-1 || conv_list[kcd->id].kiconvstr == NULL) {
1353*d14d7d31Sis 		*errno = EBADF;
1354*d14d7d31Sis 		return ((size_t)-1);
1355*d14d7d31Sis 	}
1356*d14d7d31Sis 
1357*d14d7d31Sis 	mid = conv_list[kcd->id].mid;
1358*d14d7d31Sis 
1359*d14d7d31Sis 	ret = (conv_list[kcd->id].kiconvstr)(inarray, inlen, outarray, outlen,
1360*d14d7d31Sis 	    flag, errno);
1361*d14d7d31Sis 
1362*d14d7d31Sis 	kmem_free((void *)kcd, sizeof (kiconv_data_t));
1363*d14d7d31Sis 
1364*d14d7d31Sis 	mutex_enter(&conv_list_lock);
1365*d14d7d31Sis 
1366*d14d7d31Sis 	if (module_list[mid].refcount > 0)
1367*d14d7d31Sis 		module_list[mid].refcount--;
1368*d14d7d31Sis 
1369*d14d7d31Sis 	mutex_exit(&conv_list_lock);
1370*d14d7d31Sis 
1371*d14d7d31Sis 	return (ret);
1372*d14d7d31Sis }
1373