115d9d0b5Syy /*
215d9d0b5Syy * CDDL HEADER START
315d9d0b5Syy *
415d9d0b5Syy * The contents of this file are subject to the terms of the
515d9d0b5Syy * Common Development and Distribution License (the "License").
615d9d0b5Syy * You may not use this file except in compliance with the License.
715d9d0b5Syy *
815d9d0b5Syy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
915d9d0b5Syy * or http://www.opensolaris.org/os/licensing.
1015d9d0b5Syy * See the License for the specific language governing permissions
1115d9d0b5Syy * and limitations under the License.
1215d9d0b5Syy *
1315d9d0b5Syy * When distributing Covered Code, include this CDDL HEADER in each
1415d9d0b5Syy * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1515d9d0b5Syy * If applicable, add the following below this CDDL HEADER, with the
1615d9d0b5Syy * fields enclosed by brackets "[]" replaced with your own identifying
1715d9d0b5Syy * information: Portions Copyright [yyyy] [name of copyright owner]
1815d9d0b5Syy *
1915d9d0b5Syy * CDDL HEADER END
2015d9d0b5Syy */
2115d9d0b5Syy /*
2215d9d0b5Syy * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
2315d9d0b5Syy * Use is subject to license terms.
2415d9d0b5Syy */
2515d9d0b5Syy
2615d9d0b5Syy #include <sys/types.h>
2715d9d0b5Syy #include <sys/param.h>
2815d9d0b5Syy #include <sys/sysmacros.h>
2915d9d0b5Syy #include <sys/systm.h>
3015d9d0b5Syy #include <sys/debug.h>
3115d9d0b5Syy #include <sys/kmem.h>
3215d9d0b5Syy #include <sys/sunddi.h>
3315d9d0b5Syy #include <sys/byteorder.h>
3415d9d0b5Syy #include <sys/errno.h>
3515d9d0b5Syy #include <sys/modctl.h>
3615d9d0b5Syy #include <sys/kiconv.h>
3715d9d0b5Syy #include <sys/u8_textprep.h>
3815d9d0b5Syy #include <sys/kiconv_cck_common.h>
3915d9d0b5Syy #include <sys/kiconv_sc.h>
4015d9d0b5Syy #include <sys/kiconv_gb18030_utf8.h>
4115d9d0b5Syy #include <sys/kiconv_gb2312_utf8.h>
4215d9d0b5Syy #include <sys/kiconv_utf8_gb18030.h>
4315d9d0b5Syy #include <sys/kiconv_utf8_gb2312.h>
4415d9d0b5Syy
4515d9d0b5Syy static int8_t gb2312_to_utf8(uchar_t byte1, uchar_t byte2, uchar_t *ob,
4615d9d0b5Syy uchar_t *obtail, size_t *ret_val);
4715d9d0b5Syy static int8_t gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail,
4815d9d0b5Syy size_t *ret_val, boolean_t isgbk4);
4915d9d0b5Syy static int8_t utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
5015d9d0b5Syy uchar_t *ob, uchar_t *obtail, size_t *ret);
5115d9d0b5Syy static int8_t utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
5215d9d0b5Syy uchar_t *ob, uchar_t *obtail, size_t *ret);
5315d9d0b5Syy static int8_t utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
5415d9d0b5Syy uchar_t *ob, uchar_t *obtail, size_t *ret);
5515d9d0b5Syy
5615d9d0b5Syy #define KICONV_SC_GB18030 (0x01)
5715d9d0b5Syy #define KICONV_SC_GBK (0x02)
5815d9d0b5Syy #define KICONV_SC_EUCCN (0x03)
5915d9d0b5Syy #define KICONV_SC_MAX_MAGIC_ID (0x03)
6015d9d0b5Syy
6115d9d0b5Syy static void *
open_fr_gb18030()6215d9d0b5Syy open_fr_gb18030()
6315d9d0b5Syy {
6415d9d0b5Syy return ((void *)KICONV_SC_GB18030);
6515d9d0b5Syy }
6615d9d0b5Syy
6715d9d0b5Syy static void *
open_fr_gbk()6815d9d0b5Syy open_fr_gbk()
6915d9d0b5Syy {
7015d9d0b5Syy return ((void *)KICONV_SC_GBK);
7115d9d0b5Syy }
7215d9d0b5Syy
7315d9d0b5Syy static void *
open_fr_euccn()7415d9d0b5Syy open_fr_euccn()
7515d9d0b5Syy {
7615d9d0b5Syy return ((void *)KICONV_SC_EUCCN);
7715d9d0b5Syy }
7815d9d0b5Syy
7915d9d0b5Syy static int
close_fr_sc(void * s)8015d9d0b5Syy close_fr_sc(void *s)
8115d9d0b5Syy {
8215d9d0b5Syy if ((uintptr_t)s > KICONV_SC_MAX_MAGIC_ID)
8315d9d0b5Syy return (EBADF);
8415d9d0b5Syy
8515d9d0b5Syy return (0);
8615d9d0b5Syy }
8715d9d0b5Syy
8815d9d0b5Syy /*
8915d9d0b5Syy * Encoding convertor from UTF-8 to GB18030.
9015d9d0b5Syy */
9115d9d0b5Syy size_t
kiconv_to_gb18030(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)9215d9d0b5Syy kiconv_to_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
93*86ef0a63SRichard Lowe char **outbuf, size_t *outbytesleft, int *errno)
9415d9d0b5Syy {
9515d9d0b5Syy
9615d9d0b5Syy return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
9715d9d0b5Syy outbytesleft, errno, utf8_to_gb18030);
9815d9d0b5Syy }
9915d9d0b5Syy
10015d9d0b5Syy /*
10115d9d0b5Syy * String based encoding convertor from UTF-8 to GB18030.
10215d9d0b5Syy */
10315d9d0b5Syy size_t
kiconvstr_to_gb18030(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)10415d9d0b5Syy kiconvstr_to_gb18030(char *inarray, size_t *inlen, char *outarray,
105*86ef0a63SRichard Lowe size_t *outlen, int flag, int *errno)
10615d9d0b5Syy {
10715d9d0b5Syy return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
10815d9d0b5Syy (uchar_t *)outarray, outlen, flag, errno, utf8_to_gb18030);
10915d9d0b5Syy }
11015d9d0b5Syy
11115d9d0b5Syy /*
11215d9d0b5Syy * Encoding convertor from GB18030 to UTF-8.
11315d9d0b5Syy */
11415d9d0b5Syy size_t
kiconv_fr_gb18030(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)11515d9d0b5Syy kiconv_fr_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
116*86ef0a63SRichard Lowe char **outbuf, size_t *outbytesleft, int *errno)
11715d9d0b5Syy {
11815d9d0b5Syy uchar_t *ib;
11915d9d0b5Syy uchar_t *ob;
12015d9d0b5Syy uchar_t *ibtail;
12115d9d0b5Syy uchar_t *obtail;
12215d9d0b5Syy size_t ret_val;
12315d9d0b5Syy int8_t sz;
12415d9d0b5Syy uint32_t gb_val;
12515d9d0b5Syy boolean_t isgbk4;
12615d9d0b5Syy
12715d9d0b5Syy /* Check on the kiconv code conversion descriptor. */
12815d9d0b5Syy if (kcd == NULL || kcd == (void *)-1) {
12915d9d0b5Syy *errno = EBADF;
13015d9d0b5Syy return ((size_t)-1);
13115d9d0b5Syy }
13215d9d0b5Syy
13315d9d0b5Syy /* If this is a state reset request, process and return. */
13415d9d0b5Syy if (inbuf == NULL || *inbuf == NULL) {
13515d9d0b5Syy return (0);
13615d9d0b5Syy }
13715d9d0b5Syy
13815d9d0b5Syy ret_val = 0;
13915d9d0b5Syy ib = (uchar_t *)*inbuf;
14015d9d0b5Syy ob = (uchar_t *)*outbuf;
14115d9d0b5Syy ibtail = ib + *inbytesleft;
14215d9d0b5Syy obtail = ob + *outbytesleft;
14315d9d0b5Syy
14415d9d0b5Syy while (ib < ibtail) {
14515d9d0b5Syy if (KICONV_IS_ASCII(*ib)) {
14615d9d0b5Syy if (ob >= obtail) {
14715d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(E2BIG);
14815d9d0b5Syy }
14915d9d0b5Syy
15015d9d0b5Syy *ob++ = *ib++;
15115d9d0b5Syy continue;
15215d9d0b5Syy }
15315d9d0b5Syy
15415d9d0b5Syy /*
15515d9d0b5Syy * Issue EILSEQ error if the first byte is not a
15615d9d0b5Syy * valid GB18030 leading byte.
15715d9d0b5Syy */
15815d9d0b5Syy if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
15915d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
16015d9d0b5Syy }
16115d9d0b5Syy
16215d9d0b5Syy isgbk4 = (ibtail - ib < 2) ? B_FALSE :
16315d9d0b5Syy KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
16415d9d0b5Syy
16515d9d0b5Syy if (isgbk4) {
16615d9d0b5Syy if (ibtail - ib < 4) {
16715d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(EINVAL);
16815d9d0b5Syy }
16915d9d0b5Syy
17015d9d0b5Syy if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
17115d9d0b5Syy KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
17215d9d0b5Syy KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
17315d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
17415d9d0b5Syy }
17515d9d0b5Syy
17615d9d0b5Syy gb_val = (uint32_t)(*ib) << 24 |
17715d9d0b5Syy (uint32_t)(*(ib + 1)) << 16 |
17815d9d0b5Syy (uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
17915d9d0b5Syy } else {
18015d9d0b5Syy if (ibtail - ib < 2) {
18115d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(EINVAL);
18215d9d0b5Syy }
18315d9d0b5Syy
18415d9d0b5Syy if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
18515d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
18615d9d0b5Syy }
18715d9d0b5Syy
18815d9d0b5Syy gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
18915d9d0b5Syy }
19015d9d0b5Syy
19115d9d0b5Syy sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
19215d9d0b5Syy if (sz < 0) {
19315d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(E2BIG);
19415d9d0b5Syy }
19515d9d0b5Syy
19615d9d0b5Syy ib += isgbk4 ? 4 : 2;
19715d9d0b5Syy ob += sz;
19815d9d0b5Syy }
19915d9d0b5Syy
20015d9d0b5Syy *inbuf = (char *)ib;
20115d9d0b5Syy *inbytesleft = ibtail - ib;
20215d9d0b5Syy *outbuf = (char *)ob;
20315d9d0b5Syy *outbytesleft = obtail - ob;
20415d9d0b5Syy
20515d9d0b5Syy return (ret_val);
20615d9d0b5Syy }
20715d9d0b5Syy
20815d9d0b5Syy /*
20915d9d0b5Syy * String based encoding convertor from GB18030 to UTF-8.
21015d9d0b5Syy */
21115d9d0b5Syy size_t
kiconvstr_fr_gb18030(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)21215d9d0b5Syy kiconvstr_fr_gb18030(char *inarray, size_t *inlen, char *outarray,
213*86ef0a63SRichard Lowe size_t *outlen, int flag, int *errno)
21415d9d0b5Syy {
21515d9d0b5Syy uchar_t *ib;
21615d9d0b5Syy uchar_t *ob;
21715d9d0b5Syy uchar_t *ibtail;
21815d9d0b5Syy uchar_t *obtail;
21915d9d0b5Syy uchar_t *oldib;
22015d9d0b5Syy size_t ret_val;
22115d9d0b5Syy int8_t sz;
22215d9d0b5Syy uint32_t gb_val;
22315d9d0b5Syy boolean_t isgbk4;
22415d9d0b5Syy boolean_t do_not_ignore_null;
22515d9d0b5Syy
22615d9d0b5Syy ret_val = 0;
22715d9d0b5Syy ib = (uchar_t *)inarray;
22815d9d0b5Syy ob = (uchar_t *)outarray;
22915d9d0b5Syy ibtail = ib + *inlen;
23015d9d0b5Syy obtail = ob + *outlen;
23115d9d0b5Syy do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
23215d9d0b5Syy
23315d9d0b5Syy while (ib < ibtail) {
23415d9d0b5Syy if (*ib == '\0' && do_not_ignore_null)
23515d9d0b5Syy break;
23615d9d0b5Syy
23715d9d0b5Syy if (KICONV_IS_ASCII(*ib)) {
23815d9d0b5Syy if (ob >= obtail) {
23915d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(E2BIG);
24015d9d0b5Syy }
24115d9d0b5Syy
24215d9d0b5Syy *ob++ = *ib++;
24315d9d0b5Syy continue;
24415d9d0b5Syy }
24515d9d0b5Syy
24615d9d0b5Syy oldib = ib;
24715d9d0b5Syy
24815d9d0b5Syy if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
24915d9d0b5Syy KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
25015d9d0b5Syy }
25115d9d0b5Syy
25215d9d0b5Syy isgbk4 = (ibtail - ib < 2) ? B_FALSE :
25315d9d0b5Syy KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
25415d9d0b5Syy
25515d9d0b5Syy if (isgbk4) {
25615d9d0b5Syy if (ibtail - ib < 4) {
25715d9d0b5Syy if (flag & KICONV_REPLACE_INVALID) {
25815d9d0b5Syy ib = ibtail;
25915d9d0b5Syy goto REPLACE_INVALID;
26015d9d0b5Syy }
26115d9d0b5Syy
26215d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(EINVAL);
26315d9d0b5Syy }
26415d9d0b5Syy
26515d9d0b5Syy if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
26615d9d0b5Syy KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
26715d9d0b5Syy KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
26815d9d0b5Syy KICONV_SET_ERRNO_WITH_FLAG(4, EILSEQ);
26915d9d0b5Syy }
27015d9d0b5Syy
27115d9d0b5Syy gb_val = (uint32_t)(*ib) << 24 |
27215d9d0b5Syy (uint32_t)(*(ib + 1)) << 16 |
27315d9d0b5Syy (uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
27415d9d0b5Syy } else {
27515d9d0b5Syy if (ibtail - ib < 2) {
27615d9d0b5Syy if (flag & KICONV_REPLACE_INVALID) {
27715d9d0b5Syy ib = ibtail;
27815d9d0b5Syy goto REPLACE_INVALID;
27915d9d0b5Syy }
28015d9d0b5Syy
28115d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(EINVAL);
28215d9d0b5Syy }
28315d9d0b5Syy
28415d9d0b5Syy if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
28515d9d0b5Syy KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
28615d9d0b5Syy }
28715d9d0b5Syy
28815d9d0b5Syy gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
28915d9d0b5Syy }
29015d9d0b5Syy
29115d9d0b5Syy sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
29215d9d0b5Syy if (sz < 0) {
29315d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(E2BIG);
29415d9d0b5Syy }
29515d9d0b5Syy
29615d9d0b5Syy ib += isgbk4 ? 4 : 2;
29715d9d0b5Syy ob += sz;
29815d9d0b5Syy continue;
29915d9d0b5Syy
30015d9d0b5Syy REPLACE_INVALID:
30115d9d0b5Syy if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
30215d9d0b5Syy ib = oldib;
30315d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(E2BIG);
30415d9d0b5Syy }
30515d9d0b5Syy
30615d9d0b5Syy *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
30715d9d0b5Syy *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
30815d9d0b5Syy *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
30915d9d0b5Syy ret_val++;
31015d9d0b5Syy }
31115d9d0b5Syy
31215d9d0b5Syy *inlen = ibtail - ib;
31315d9d0b5Syy *outlen = obtail - ob;
31415d9d0b5Syy
31515d9d0b5Syy return (ret_val);
31615d9d0b5Syy }
31715d9d0b5Syy
31815d9d0b5Syy /*
31915d9d0b5Syy * Encoding convertor from UTF-8 to GBK.
32015d9d0b5Syy */
32115d9d0b5Syy size_t
kiconv_to_gbk(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)32215d9d0b5Syy kiconv_to_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
323*86ef0a63SRichard Lowe char **outbuf, size_t *outbytesleft, int *errno)
32415d9d0b5Syy {
32515d9d0b5Syy
32615d9d0b5Syy return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
32715d9d0b5Syy outbytesleft, errno, utf8_to_gbk);
32815d9d0b5Syy }
32915d9d0b5Syy
33015d9d0b5Syy /*
33115d9d0b5Syy * String based encoding convertor from UTF-8 to GBK.
33215d9d0b5Syy */
33315d9d0b5Syy size_t
kiconvstr_to_gbk(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)33415d9d0b5Syy kiconvstr_to_gbk(char *inarray, size_t *inlen, char *outarray,
335*86ef0a63SRichard Lowe size_t *outlen, int flag, int *errno)
33615d9d0b5Syy {
33715d9d0b5Syy return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
33815d9d0b5Syy (uchar_t *)outarray, outlen, flag, errno, utf8_to_gbk);
33915d9d0b5Syy }
34015d9d0b5Syy
34115d9d0b5Syy /*
34215d9d0b5Syy * Encoding convertor from GBK to UTF-8.
34315d9d0b5Syy */
34415d9d0b5Syy size_t
kiconv_fr_gbk(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)34515d9d0b5Syy kiconv_fr_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
346*86ef0a63SRichard Lowe char **outbuf, size_t *outbytesleft, int *errno)
34715d9d0b5Syy {
34815d9d0b5Syy uchar_t *ib;
34915d9d0b5Syy uchar_t *ob;
35015d9d0b5Syy uchar_t *ibtail;
35115d9d0b5Syy uchar_t *obtail;
35215d9d0b5Syy size_t ret_val;
35315d9d0b5Syy int8_t sz;
35415d9d0b5Syy uint32_t gb_val;
35515d9d0b5Syy
35615d9d0b5Syy /* Check on the kiconv code conversion descriptor. */
35715d9d0b5Syy if (kcd == NULL || kcd == (void *)-1) {
35815d9d0b5Syy *errno = EBADF;
35915d9d0b5Syy return ((size_t)-1);
36015d9d0b5Syy }
36115d9d0b5Syy
36215d9d0b5Syy /* If this is a state reset request, process and return. */
36315d9d0b5Syy if (inbuf == NULL || *inbuf == NULL) {
36415d9d0b5Syy return (0);
36515d9d0b5Syy }
36615d9d0b5Syy
36715d9d0b5Syy ret_val = 0;
36815d9d0b5Syy ib = (uchar_t *)*inbuf;
36915d9d0b5Syy ob = (uchar_t *)*outbuf;
37015d9d0b5Syy ibtail = ib + *inbytesleft;
37115d9d0b5Syy obtail = ob + *outbytesleft;
37215d9d0b5Syy
37315d9d0b5Syy while (ib < ibtail) {
37415d9d0b5Syy if (KICONV_IS_ASCII(*ib)) {
37515d9d0b5Syy if (ob >= obtail) {
37615d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(E2BIG);
37715d9d0b5Syy }
37815d9d0b5Syy
37915d9d0b5Syy *ob++ = *ib++;
38015d9d0b5Syy continue;
38115d9d0b5Syy }
38215d9d0b5Syy
38315d9d0b5Syy /*
38415d9d0b5Syy * Issue EILSEQ error if the first byte is not a
38515d9d0b5Syy * valid GBK leading byte.
38615d9d0b5Syy */
38715d9d0b5Syy if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
38815d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
38915d9d0b5Syy }
39015d9d0b5Syy
39115d9d0b5Syy /*
39215d9d0b5Syy * Issue EINVAL error if input buffer has an incomplete
39315d9d0b5Syy * character at the end of the buffer.
39415d9d0b5Syy */
39515d9d0b5Syy if (ibtail - ib < 2) {
39615d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(EINVAL);
39715d9d0b5Syy }
39815d9d0b5Syy
39915d9d0b5Syy /*
40015d9d0b5Syy * Issue EILSEQ error if the remaining byte is not
40115d9d0b5Syy * a valid GBK byte.
40215d9d0b5Syy */
40315d9d0b5Syy if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
40415d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
40515d9d0b5Syy }
40615d9d0b5Syy
40715d9d0b5Syy /* Now we have a valid GBK character. */
40815d9d0b5Syy gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
40915d9d0b5Syy sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
41015d9d0b5Syy
41115d9d0b5Syy if (sz < 0) {
41215d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(E2BIG);
41315d9d0b5Syy }
41415d9d0b5Syy
41515d9d0b5Syy ib += 2;
41615d9d0b5Syy ob += sz;
41715d9d0b5Syy }
41815d9d0b5Syy
41915d9d0b5Syy *inbuf = (char *)ib;
42015d9d0b5Syy *inbytesleft = ibtail - ib;
42115d9d0b5Syy *outbuf = (char *)ob;
42215d9d0b5Syy *outbytesleft = obtail - ob;
42315d9d0b5Syy
42415d9d0b5Syy return (ret_val);
42515d9d0b5Syy }
42615d9d0b5Syy
42715d9d0b5Syy /*
42815d9d0b5Syy * String based encoding convertor from GBK to UTF-8.
42915d9d0b5Syy */
43015d9d0b5Syy size_t
kiconvstr_fr_gbk(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)43115d9d0b5Syy kiconvstr_fr_gbk(char *inarray, size_t *inlen, char *outarray,
432*86ef0a63SRichard Lowe size_t *outlen, int flag, int *errno)
43315d9d0b5Syy {
43415d9d0b5Syy uchar_t *ib;
43515d9d0b5Syy uchar_t *ob;
43615d9d0b5Syy uchar_t *ibtail;
43715d9d0b5Syy uchar_t *obtail;
43815d9d0b5Syy uchar_t *oldib;
43915d9d0b5Syy size_t ret_val;
44015d9d0b5Syy int8_t sz;
44115d9d0b5Syy uint32_t gb_val;
44215d9d0b5Syy boolean_t do_not_ignore_null;
44315d9d0b5Syy
44415d9d0b5Syy ret_val = 0;
44515d9d0b5Syy ib = (uchar_t *)inarray;
44615d9d0b5Syy ob = (uchar_t *)outarray;
44715d9d0b5Syy ibtail = ib + *inlen;
44815d9d0b5Syy obtail = ob + *outlen;
44915d9d0b5Syy do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
45015d9d0b5Syy
45115d9d0b5Syy while (ib < ibtail) {
45215d9d0b5Syy if (*ib == '\0' && do_not_ignore_null)
45315d9d0b5Syy break;
45415d9d0b5Syy
45515d9d0b5Syy if (KICONV_IS_ASCII(*ib)) {
45615d9d0b5Syy if (ob >= obtail) {
45715d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(E2BIG);
45815d9d0b5Syy }
45915d9d0b5Syy
46015d9d0b5Syy *ob++ = *ib++;
46115d9d0b5Syy continue;
46215d9d0b5Syy }
46315d9d0b5Syy
46415d9d0b5Syy oldib = ib;
46515d9d0b5Syy
46615d9d0b5Syy if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
46715d9d0b5Syy KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
46815d9d0b5Syy }
46915d9d0b5Syy
47015d9d0b5Syy if (ibtail - ib < 2) {
47115d9d0b5Syy KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
47215d9d0b5Syy }
47315d9d0b5Syy
47415d9d0b5Syy if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
47515d9d0b5Syy KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
47615d9d0b5Syy }
47715d9d0b5Syy
47815d9d0b5Syy gb_val = (uint32_t)(*ib << 8) | *(ib + 1);
47915d9d0b5Syy sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
48015d9d0b5Syy
48115d9d0b5Syy if (sz < 0) {
48215d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(E2BIG);
48315d9d0b5Syy }
48415d9d0b5Syy
48515d9d0b5Syy ib += 2;
48615d9d0b5Syy ob += sz;
48715d9d0b5Syy continue;
48815d9d0b5Syy
48915d9d0b5Syy REPLACE_INVALID:
49015d9d0b5Syy if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
49115d9d0b5Syy ib = oldib;
49215d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(E2BIG);
49315d9d0b5Syy }
49415d9d0b5Syy
49515d9d0b5Syy *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
49615d9d0b5Syy *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
49715d9d0b5Syy *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
49815d9d0b5Syy ret_val++;
49915d9d0b5Syy }
50015d9d0b5Syy
50115d9d0b5Syy *inlen = ibtail - ib;
50215d9d0b5Syy *outlen = obtail - ob;
50315d9d0b5Syy
50415d9d0b5Syy return (ret_val);
50515d9d0b5Syy }
50615d9d0b5Syy
50715d9d0b5Syy /*
50815d9d0b5Syy * Encoding convertor from UTF-8 to EUC-CN.
50915d9d0b5Syy */
51015d9d0b5Syy size_t
kiconv_to_euccn(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)51115d9d0b5Syy kiconv_to_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
512*86ef0a63SRichard Lowe char **outbuf, size_t *outbytesleft, int *errno)
51315d9d0b5Syy {
51415d9d0b5Syy return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
51515d9d0b5Syy outbytesleft, errno, utf8_to_gb2312);
51615d9d0b5Syy }
51715d9d0b5Syy
51815d9d0b5Syy /*
51915d9d0b5Syy * String based encoding convertor from UTF-8 to EUC-CN.
52015d9d0b5Syy */
52115d9d0b5Syy size_t
kiconvstr_to_euccn(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)52215d9d0b5Syy kiconvstr_to_euccn(char *inarray, size_t *inlen, char *outarray,
523*86ef0a63SRichard Lowe size_t *outlen, int flag, int *errno)
52415d9d0b5Syy {
52515d9d0b5Syy return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
52615d9d0b5Syy (uchar_t *)outarray, outlen, flag, errno, utf8_to_gb2312);
52715d9d0b5Syy }
52815d9d0b5Syy
52915d9d0b5Syy /*
53015d9d0b5Syy * Encoding converto from EUC-CN to UTF-8 code.
53115d9d0b5Syy */
53215d9d0b5Syy size_t
kiconv_fr_euccn(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)53315d9d0b5Syy kiconv_fr_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
534*86ef0a63SRichard Lowe char **outbuf, size_t *outbytesleft, int *errno)
53515d9d0b5Syy {
53615d9d0b5Syy uchar_t *ib;
53715d9d0b5Syy uchar_t *ob;
53815d9d0b5Syy uchar_t *ibtail;
53915d9d0b5Syy uchar_t *obtail;
54015d9d0b5Syy size_t ret_val;
54115d9d0b5Syy int8_t sz;
54215d9d0b5Syy
54315d9d0b5Syy /* Check on the kiconv code conversion descriptor. */
54415d9d0b5Syy if (kcd == NULL || kcd == (void *)-1) {
54515d9d0b5Syy *errno = EBADF;
54615d9d0b5Syy return ((size_t)-1);
54715d9d0b5Syy }
54815d9d0b5Syy
54915d9d0b5Syy /* If this is a state reset request, process and return. */
55015d9d0b5Syy if (inbuf == NULL || *inbuf == NULL) {
55115d9d0b5Syy return (0);
55215d9d0b5Syy }
55315d9d0b5Syy
55415d9d0b5Syy ret_val = 0;
55515d9d0b5Syy ib = (uchar_t *)*inbuf;
55615d9d0b5Syy ob = (uchar_t *)*outbuf;
55715d9d0b5Syy ibtail = ib + *inbytesleft;
55815d9d0b5Syy obtail = ob + *outbytesleft;
55915d9d0b5Syy
56015d9d0b5Syy while (ib < ibtail) {
56115d9d0b5Syy if (KICONV_IS_ASCII(*ib)) {
56215d9d0b5Syy if (ob >= obtail) {
56315d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(E2BIG);
56415d9d0b5Syy }
56515d9d0b5Syy
56615d9d0b5Syy *ob++ = *ib++;
56715d9d0b5Syy continue;
56815d9d0b5Syy }
56915d9d0b5Syy
57015d9d0b5Syy /*
57115d9d0b5Syy * Issue EILSEQ error if the first byte is not a
57215d9d0b5Syy * valid GB2312 leading byte.
57315d9d0b5Syy */
57415d9d0b5Syy if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
57515d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
57615d9d0b5Syy }
57715d9d0b5Syy
57815d9d0b5Syy /*
57915d9d0b5Syy * Issue EINVAL error if input buffer has an incomplete
58015d9d0b5Syy * character at the end of the buffer.
58115d9d0b5Syy */
58215d9d0b5Syy if (ibtail - ib < 2) {
58315d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(EINVAL);
58415d9d0b5Syy }
58515d9d0b5Syy
58615d9d0b5Syy /*
58715d9d0b5Syy * Issue EILSEQ error if the remaining byte is not
58815d9d0b5Syy * a valid GB2312 byte.
58915d9d0b5Syy */
59015d9d0b5Syy if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
59115d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
59215d9d0b5Syy }
59315d9d0b5Syy
59415d9d0b5Syy /* Now we have a valid GB2312 character */
59515d9d0b5Syy sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
59615d9d0b5Syy if (sz < 0) {
59715d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(E2BIG);
59815d9d0b5Syy }
59915d9d0b5Syy
60015d9d0b5Syy ib += 2;
60115d9d0b5Syy ob += sz;
60215d9d0b5Syy }
60315d9d0b5Syy
60415d9d0b5Syy *inbuf = (char *)ib;
60515d9d0b5Syy *inbytesleft = ibtail - ib;
60615d9d0b5Syy *outbuf = (char *)ob;
60715d9d0b5Syy *outbytesleft = obtail - ob;
60815d9d0b5Syy
60915d9d0b5Syy return (ret_val);
61015d9d0b5Syy }
61115d9d0b5Syy
61215d9d0b5Syy /*
61315d9d0b5Syy * String based encoding convertor from EUC-CN to UTF-8.
61415d9d0b5Syy */
61515d9d0b5Syy size_t
kiconvstr_fr_euccn(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)61615d9d0b5Syy kiconvstr_fr_euccn(char *inarray, size_t *inlen, char *outarray,
61715d9d0b5Syy size_t *outlen, int flag, int *errno)
61815d9d0b5Syy {
61915d9d0b5Syy uchar_t *ib;
62015d9d0b5Syy uchar_t *ob;
62115d9d0b5Syy uchar_t *ibtail;
62215d9d0b5Syy uchar_t *obtail;
62315d9d0b5Syy uchar_t *oldib;
62415d9d0b5Syy size_t ret_val;
62515d9d0b5Syy int8_t sz;
62615d9d0b5Syy boolean_t do_not_ignore_null;
62715d9d0b5Syy
62815d9d0b5Syy ret_val = 0;
62915d9d0b5Syy ib = (uchar_t *)inarray;
63015d9d0b5Syy ob = (uchar_t *)outarray;
63115d9d0b5Syy ibtail = ib + *inlen;
63215d9d0b5Syy obtail = ob + *outlen;
63315d9d0b5Syy do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
63415d9d0b5Syy
63515d9d0b5Syy while (ib < ibtail) {
63615d9d0b5Syy if (*ib == '\0' && do_not_ignore_null)
63715d9d0b5Syy break;
63815d9d0b5Syy
63915d9d0b5Syy if (KICONV_IS_ASCII(*ib)) {
64015d9d0b5Syy if (ob >= obtail) {
64115d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(E2BIG);
64215d9d0b5Syy }
64315d9d0b5Syy
64415d9d0b5Syy *ob++ = *ib++;
64515d9d0b5Syy continue;
64615d9d0b5Syy }
64715d9d0b5Syy
64815d9d0b5Syy oldib = ib;
64915d9d0b5Syy
65015d9d0b5Syy if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
65115d9d0b5Syy KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
65215d9d0b5Syy }
65315d9d0b5Syy
65415d9d0b5Syy if (ibtail - ib < 2) {
65515d9d0b5Syy KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
65615d9d0b5Syy }
65715d9d0b5Syy
65815d9d0b5Syy if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
65915d9d0b5Syy KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
66015d9d0b5Syy }
66115d9d0b5Syy
66215d9d0b5Syy sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
66315d9d0b5Syy if (sz < 0) {
66415d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(E2BIG);
66515d9d0b5Syy }
66615d9d0b5Syy
66715d9d0b5Syy ib += 2;
66815d9d0b5Syy ob += sz;
66915d9d0b5Syy continue;
67015d9d0b5Syy
67115d9d0b5Syy REPLACE_INVALID:
67215d9d0b5Syy if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
67315d9d0b5Syy ib = oldib;
67415d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK(E2BIG);
67515d9d0b5Syy }
67615d9d0b5Syy
67715d9d0b5Syy *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
67815d9d0b5Syy *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
67915d9d0b5Syy *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
68015d9d0b5Syy ret_val++;
68115d9d0b5Syy }
68215d9d0b5Syy
68315d9d0b5Syy *inlen = ibtail - ib;
68415d9d0b5Syy *outlen = obtail - ob;
68515d9d0b5Syy
68615d9d0b5Syy return (ret_val);
68715d9d0b5Syy }
68815d9d0b5Syy
68915d9d0b5Syy /*
69015d9d0b5Syy * Convert single GB2312 character to UTF-8.
69115d9d0b5Syy * Return: > 0 - Converted successfully
69215d9d0b5Syy * = -1 - E2BIG
69315d9d0b5Syy */
69415d9d0b5Syy static int8_t
gb2312_to_utf8(uchar_t b1,uchar_t b2,uchar_t * ob,uchar_t * obtail,size_t * ret_val)69515d9d0b5Syy gb2312_to_utf8(uchar_t b1, uchar_t b2, uchar_t *ob, uchar_t *obtail,
696*86ef0a63SRichard Lowe size_t *ret_val)
69715d9d0b5Syy {
69815d9d0b5Syy size_t index;
69915d9d0b5Syy int8_t sz;
70015d9d0b5Syy uchar_t *u8;
70115d9d0b5Syy
70215d9d0b5Syy /* index = (b1 - KICONV_EUC_START) * 94 + b2 - KICONV_EUC_START; */
70315d9d0b5Syy index = b1 * 94 + b2 - 0x3BBF;
70415d9d0b5Syy
70515d9d0b5Syy if (index >= KICONV_GB2312_UTF8_MAX)
70615d9d0b5Syy index = KICONV_GB2312_UTF8_MAX - 1; /* Map to 0xEFBFBD */
70715d9d0b5Syy
70815d9d0b5Syy u8 = kiconv_gb2312_utf8[index];
70915d9d0b5Syy sz = u8_number_of_bytes[u8[0]];
71015d9d0b5Syy
71115d9d0b5Syy if (obtail - ob < sz) {
71215d9d0b5Syy *ret_val = (size_t)-1;
71315d9d0b5Syy return (-1);
71415d9d0b5Syy }
71515d9d0b5Syy
71615d9d0b5Syy for (index = 0; index < sz; index++)
71715d9d0b5Syy *ob++ = u8[index];
71815d9d0b5Syy
71915d9d0b5Syy /*
72015d9d0b5Syy * As kiconv_gb2312_utf8 contain muliple KICONV_UTF8_REPLACEMENT_CHAR
72115d9d0b5Syy * elements, so need to ckeck more.
72215d9d0b5Syy */
72315d9d0b5Syy if (sz == KICONV_UTF8_REPLACEMENT_CHAR_LEN &&
72415d9d0b5Syy u8[0] == KICONV_UTF8_REPLACEMENT_CHAR1 &&
72515d9d0b5Syy u8[1] == KICONV_UTF8_REPLACEMENT_CHAR2 &&
72615d9d0b5Syy u8[2] == KICONV_UTF8_REPLACEMENT_CHAR3)
72715d9d0b5Syy (*ret_val)++;
72815d9d0b5Syy
72915d9d0b5Syy return (sz);
73015d9d0b5Syy }
73115d9d0b5Syy
73215d9d0b5Syy /*
73315d9d0b5Syy * Convert single GB18030 or GBK character to UTF-8.
73415d9d0b5Syy * Return: > 0 - Converted successfully
73515d9d0b5Syy * = -1 - E2BIG
73615d9d0b5Syy */
73715d9d0b5Syy static int8_t
gbk_to_utf8(uint32_t gbk_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val,boolean_t isgbk4)73815d9d0b5Syy gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
739*86ef0a63SRichard Lowe boolean_t isgbk4)
74015d9d0b5Syy {
74115d9d0b5Syy size_t index;
74215d9d0b5Syy int8_t sz;
74315d9d0b5Syy uchar_t u8array[4];
74415d9d0b5Syy uchar_t *u8;
74515d9d0b5Syy
74615d9d0b5Syy if (isgbk4) {
74715d9d0b5Syy if (gbk_val >= KICONV_SC_PLANE1_GB18030_START) {
74815d9d0b5Syy uint32_t u32;
74915d9d0b5Syy
75015d9d0b5Syy /*
75115d9d0b5Syy * u32 = ((gbk_val >> 24) - 0x90) * 12600 +
75215d9d0b5Syy * (((gbk_val & 0xFF0000) >> 16) - 0x30) * 1260 +
75315d9d0b5Syy * (((gbk_val & 0xFF00) >> 8) - 0x81) * 10 +
75415d9d0b5Syy * (gbk_val & 0xFF - 0x30)+
75515d9d0b5Syy * KICONV_SC_PLANE1_UCS4_START;
75615d9d0b5Syy */
75715d9d0b5Syy u32 = (gbk_val >> 24) * 12600 +
75815d9d0b5Syy ((gbk_val & 0xFF0000) >> 16) * 1260 +
75915d9d0b5Syy ((gbk_val & 0xFF00) >> 8) * 10 +
76015d9d0b5Syy (gbk_val & 0xFF) - 0x1BA0FA;
76115d9d0b5Syy u8array[0] = (uchar_t)(0xF0 | ((u32 & 0x1C0000) >> 18));
76215d9d0b5Syy u8array[1] = (uchar_t)(0x80 | ((u32 & 0x03F000) >> 12));
76315d9d0b5Syy u8array[2] = (uchar_t)(0x80 | ((u32 & 0x000FC0) >> 6));
76415d9d0b5Syy u8array[3] = (uchar_t)(0x80 | (u32 & 0x00003F));
76515d9d0b5Syy u8 = u8array;
76615d9d0b5Syy index = 1;
76715d9d0b5Syy } else {
76815d9d0b5Syy index = kiconv_binsearch(gbk_val,
76915d9d0b5Syy kiconv_gbk4_utf8, KICONV_GBK4_UTF8_MAX);
77015d9d0b5Syy u8 = kiconv_gbk4_utf8[index].u8;
77115d9d0b5Syy }
77215d9d0b5Syy } else {
77315d9d0b5Syy index = kiconv_binsearch(gbk_val,
77415d9d0b5Syy kiconv_gbk_utf8, KICONV_GBK_UTF8_MAX);
77515d9d0b5Syy u8 = kiconv_gbk_utf8[index].u8;
77615d9d0b5Syy }
77715d9d0b5Syy
77815d9d0b5Syy sz = u8_number_of_bytes[u8[0]];
77915d9d0b5Syy if (obtail - ob < sz) {
78015d9d0b5Syy *ret_val = (size_t)-1;
78115d9d0b5Syy return (-1);
78215d9d0b5Syy }
78315d9d0b5Syy
78415d9d0b5Syy if (index == 0)
78515d9d0b5Syy (*ret_val)++; /* Non-identical conversion */
78615d9d0b5Syy
78715d9d0b5Syy for (index = 0; index < sz; index++)
78815d9d0b5Syy *ob++ = u8[index];
78915d9d0b5Syy
79015d9d0b5Syy return (sz);
79115d9d0b5Syy }
79215d9d0b5Syy
79315d9d0b5Syy /*
79415d9d0b5Syy * Convert single UTF-8 character to GB18030.
79515d9d0b5Syy * Return: > 0 - Converted successfully
79615d9d0b5Syy * = -1 - E2BIG
79715d9d0b5Syy */
79815d9d0b5Syy /* ARGSUSED */
79915d9d0b5Syy static int8_t
utf8_to_gb18030(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret)80015d9d0b5Syy utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
801*86ef0a63SRichard Lowe uchar_t *ob, uchar_t *obtail, size_t *ret)
80215d9d0b5Syy {
803*86ef0a63SRichard Lowe size_t index;
80415d9d0b5Syy int8_t gbklen;
80515d9d0b5Syy uint32_t gbkcode;
80615d9d0b5Syy
80715d9d0b5Syy if (utf8 >= KICONV_SC_PLANE1_UTF8_START) {
80815d9d0b5Syy /* Four bytes GB18030 [0x90308130, 0xe339fe39] handling. */
80915d9d0b5Syy uint32_t u32;
81015d9d0b5Syy
81115d9d0b5Syy u32 = (((utf8 & 0x07000000) >> 6) | ((utf8 & 0x3F0000) >> 4) |
81215d9d0b5Syy ((utf8 & 0x3F00) >> 2) | (utf8 & 0x3F)) -
81315d9d0b5Syy KICONV_SC_PLANE1_UCS4_START;
81415d9d0b5Syy gbkcode = ((u32 / 12600 + 0x90) << 24) |
81515d9d0b5Syy (((u32 % 12600) / 1260 + 0x30) << 16) |
81615d9d0b5Syy (((u32 % 1260) / 10 + 0x81) << 8) | (u32 % 10 + 0x30);
81715d9d0b5Syy gbklen = 4;
81815d9d0b5Syy index = 1;
81915d9d0b5Syy } else {
82015d9d0b5Syy index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
82115d9d0b5Syy KICONV_UTF8_GB18030_MAX);
82215d9d0b5Syy gbkcode = kiconv_utf8_gb18030[index].value;
82315d9d0b5Syy KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
82415d9d0b5Syy }
82515d9d0b5Syy
82615d9d0b5Syy if (obtail - ob < gbklen) {
82715d9d0b5Syy *ret = (size_t)-1;
82815d9d0b5Syy return (-1);
82915d9d0b5Syy }
83015d9d0b5Syy
83115d9d0b5Syy if (index == 0)
83215d9d0b5Syy (*ret)++; /* Non-identical conversion */
83315d9d0b5Syy
83415d9d0b5Syy if (gbklen == 2) {
83515d9d0b5Syy *ob++ = (uchar_t)(gbkcode >> 8);
83615d9d0b5Syy } else if (gbklen == 4) {
83715d9d0b5Syy *ob++ = (uchar_t)(gbkcode >> 24);
83815d9d0b5Syy *ob++ = (uchar_t)(gbkcode >> 16);
83915d9d0b5Syy *ob++ = (uchar_t)(gbkcode >> 8);
84015d9d0b5Syy }
84115d9d0b5Syy *ob = (uchar_t)(gbkcode & 0xFF);
84215d9d0b5Syy
84315d9d0b5Syy return (gbklen);
84415d9d0b5Syy }
84515d9d0b5Syy
84615d9d0b5Syy /*
84715d9d0b5Syy * Convert single UTF-8 character to GBK.
84815d9d0b5Syy * Return: > 0 - Converted successfully
84915d9d0b5Syy * = -1 - E2BIG
85015d9d0b5Syy */
85115d9d0b5Syy /* ARGSUSED */
85215d9d0b5Syy static int8_t
utf8_to_gbk(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret)85315d9d0b5Syy utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
854*86ef0a63SRichard Lowe uchar_t *ob, uchar_t *obtail, size_t *ret)
85515d9d0b5Syy {
856*86ef0a63SRichard Lowe size_t index;
85715d9d0b5Syy int8_t gbklen;
85815d9d0b5Syy uint32_t gbkcode;
85915d9d0b5Syy
86015d9d0b5Syy index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
86115d9d0b5Syy KICONV_UTF8_GB18030_MAX);
86215d9d0b5Syy gbkcode = kiconv_utf8_gb18030[index].value;
86315d9d0b5Syy KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
86415d9d0b5Syy
86515d9d0b5Syy /* GBK and GB18030 share the same table, so check the length. */
86615d9d0b5Syy if (gbklen == 4) {
86715d9d0b5Syy index = 0;
86815d9d0b5Syy gbkcode = kiconv_utf8_gb18030[index].value;
86915d9d0b5Syy gbklen = 1;
87015d9d0b5Syy }
87115d9d0b5Syy
87215d9d0b5Syy if (obtail - ob < gbklen) {
87315d9d0b5Syy *ret = (size_t)-1;
87415d9d0b5Syy return (-1);
87515d9d0b5Syy }
87615d9d0b5Syy
87715d9d0b5Syy if (index == 0)
87815d9d0b5Syy (*ret)++; /* Non-identical conversion */
87915d9d0b5Syy
88015d9d0b5Syy if (gbklen > 1)
88115d9d0b5Syy *ob++ = (uchar_t)(gbkcode >> 8);
88215d9d0b5Syy *ob = (uchar_t)(gbkcode & 0xFF);
88315d9d0b5Syy
88415d9d0b5Syy return (gbklen);
88515d9d0b5Syy }
88615d9d0b5Syy
88715d9d0b5Syy /*
88815d9d0b5Syy * Convert single UTF-8 character to GB2312.
88915d9d0b5Syy * Return: > 0 - Converted successfully
89015d9d0b5Syy * = -1 - E2BIG
89115d9d0b5Syy */
89215d9d0b5Syy /* ARGSUSED */
89315d9d0b5Syy static int8_t
utf8_to_gb2312(uint32_t utf8,uchar_t ** inbuf,uchar_t * intail,uchar_t * ob,uchar_t * obtail,size_t * ret)89415d9d0b5Syy utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *intail,
895*86ef0a63SRichard Lowe uchar_t *ob, uchar_t *obtail, size_t *ret)
89615d9d0b5Syy {
89715d9d0b5Syy size_t index;
89815d9d0b5Syy int8_t gblen;
89915d9d0b5Syy uint32_t gbcode;
90015d9d0b5Syy
90115d9d0b5Syy index = kiconv_binsearch(utf8, kiconv_utf8_gb2312,
90215d9d0b5Syy KICONV_UTF8_GB2312_MAX);
90315d9d0b5Syy gbcode = kiconv_utf8_gb2312[index].value;
90415d9d0b5Syy gblen = (gbcode <= 0xFF) ? 1 : 2;
90515d9d0b5Syy
90615d9d0b5Syy if (obtail - ob < gblen) {
90715d9d0b5Syy *ret = (size_t)-1;
90815d9d0b5Syy return (-1);
90915d9d0b5Syy }
91015d9d0b5Syy
91115d9d0b5Syy if (index == 0)
91215d9d0b5Syy (*ret)++;
91315d9d0b5Syy
91415d9d0b5Syy if (gblen > 1)
91515d9d0b5Syy *ob++ = (uchar_t)(gbcode >> 8);
91615d9d0b5Syy *ob = (uchar_t)(gbcode & 0xFF);
91715d9d0b5Syy
91815d9d0b5Syy return (gblen);
91915d9d0b5Syy }
92015d9d0b5Syy
92115d9d0b5Syy static kiconv_ops_t kiconv_sc_ops_tbl[] = {
92215d9d0b5Syy {
92315d9d0b5Syy "gb18030", "utf-8", kiconv_open_to_cck, kiconv_to_gb18030,
92415d9d0b5Syy kiconv_close_to_cck, kiconvstr_to_gb18030
92515d9d0b5Syy },
92615d9d0b5Syy {
92715d9d0b5Syy "utf-8", "gb18030", open_fr_gb18030, kiconv_fr_gb18030,
92815d9d0b5Syy close_fr_sc, kiconvstr_fr_gb18030
92915d9d0b5Syy },
93015d9d0b5Syy {
93115d9d0b5Syy "gbk", "utf-8", kiconv_open_to_cck, kiconv_to_gbk,
93215d9d0b5Syy kiconv_close_to_cck, kiconvstr_to_gbk
93315d9d0b5Syy },
93415d9d0b5Syy {
93515d9d0b5Syy "utf-8", "gbk", open_fr_gbk, kiconv_fr_gbk,
93615d9d0b5Syy close_fr_sc, kiconvstr_fr_gbk
93715d9d0b5Syy },
93815d9d0b5Syy {
93915d9d0b5Syy "euccn", "utf-8", kiconv_open_to_cck, kiconv_to_euccn,
94015d9d0b5Syy kiconv_close_to_cck, kiconvstr_to_euccn
94115d9d0b5Syy },
94215d9d0b5Syy {
94315d9d0b5Syy "utf-8", "euccn", open_fr_euccn, kiconv_fr_euccn,
94415d9d0b5Syy close_fr_sc, kiconvstr_fr_euccn
94515d9d0b5Syy },
94615d9d0b5Syy };
94715d9d0b5Syy
94815d9d0b5Syy static kiconv_module_info_t kiconv_sc_info = {
94915d9d0b5Syy "kiconv_sc", /* module name */
95015d9d0b5Syy sizeof (kiconv_sc_ops_tbl) / sizeof (kiconv_sc_ops_tbl[0]),
95115d9d0b5Syy kiconv_sc_ops_tbl,
95215d9d0b5Syy 0,
95315d9d0b5Syy NULL,
95415d9d0b5Syy NULL,
95515d9d0b5Syy 0
95615d9d0b5Syy };
95715d9d0b5Syy
95815d9d0b5Syy static struct modlkiconv modlkiconv_sc = {
95915d9d0b5Syy &mod_kiconvops,
96015d9d0b5Syy "kiconv Simplified Chinese module 1.0",
96115d9d0b5Syy &kiconv_sc_info
96215d9d0b5Syy };
96315d9d0b5Syy
96415d9d0b5Syy static struct modlinkage modlinkage = {
96515d9d0b5Syy MODREV_1,
96615d9d0b5Syy (void *)&modlkiconv_sc,
96715d9d0b5Syy NULL
96815d9d0b5Syy };
96915d9d0b5Syy
97015d9d0b5Syy int
_init(void)97115d9d0b5Syy _init(void)
97215d9d0b5Syy {
97315d9d0b5Syy int err;
97415d9d0b5Syy
97515d9d0b5Syy err = mod_install(&modlinkage);
97615d9d0b5Syy if (err)
97715d9d0b5Syy cmn_err(CE_WARN, "kiconv_sc: failed to load kernel module");
97815d9d0b5Syy
97915d9d0b5Syy return (err);
98015d9d0b5Syy }
98115d9d0b5Syy
98215d9d0b5Syy int
_fini(void)98315d9d0b5Syy _fini(void)
98415d9d0b5Syy {
98515d9d0b5Syy int err;
98615d9d0b5Syy
98715d9d0b5Syy /*
98815d9d0b5Syy * If this module is being used, then, we cannot remove the module.
98915d9d0b5Syy * The following checking will catch pretty much all usual cases.
99015d9d0b5Syy *
99115d9d0b5Syy * Any remaining will be catached by the kiconv_unregister_module()
99215d9d0b5Syy * during mod_remove() at below.
99315d9d0b5Syy */
99415d9d0b5Syy if (kiconv_module_ref_count(KICONV_MODULE_ID_SC))
99515d9d0b5Syy return (EBUSY);
99615d9d0b5Syy
99715d9d0b5Syy err = mod_remove(&modlinkage);
99815d9d0b5Syy if (err)
99915d9d0b5Syy cmn_err(CE_WARN, "kiconv_sc: failed to remove kernel module");
100015d9d0b5Syy
100115d9d0b5Syy return (err);
100215d9d0b5Syy }
100315d9d0b5Syy
100415d9d0b5Syy int
_info(struct modinfo * modinfop)100515d9d0b5Syy _info(struct modinfo *modinfop)
100615d9d0b5Syy {
100715d9d0b5Syy return (mod_info(&modlinkage, modinfop));
100815d9d0b5Syy }
1009