/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int8_t utf8_to_euckr(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val); static int8_t utf8_to_uhc(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val); static int8_t ko_to_utf8(uint32_t ko_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val, kiconv_table_array_t *table, size_t nitems); #define KICONV_KO_EUCKR (0x01) #define KICONV_KO_UHC (0x02) #define KICONV_KO_MAX_MAGIC_ID (0x02) static void * open_fr_euckr() { return ((void *)KICONV_KO_EUCKR); } static void * open_fr_uhc() { return ((void *)KICONV_KO_UHC); } static int close_fr_ko(void *s) { if ((uintptr_t)s > KICONV_KO_MAX_MAGIC_ID) return (EBADF); return (0); } /* * Encoding convertor from EUC-KR to UTF-8. */ static size_t kiconv_fr_euckr(void *kcd, char **inbuf, size_t *inbufleft, char **outbuf, size_t *outbufleft, int *errno) { uchar_t *ib; uchar_t *ob; uchar_t *ibtail; uchar_t *obtail; size_t ret_val; int8_t sz; uint32_t euckr_val; /* Check on the kiconv code conversion descriptor. */ if (kcd == NULL || kcd == (void *)-1) { *errno = EBADF; return ((size_t)-1); } /* If this is a state reset request, process and return. */ if (inbuf == NULL || *inbuf == NULL) { return (0); } ret_val = 0; ib = (uchar_t *)*inbuf; ob = (uchar_t *)*outbuf; ibtail = ib + *inbufleft; obtail = ob + *outbufleft; while (ib < ibtail) { if (KICONV_IS_ASCII(*ib)) { if (ob >= obtail) { KICONV_SET_ERRNO_AND_BREAK(E2BIG); } *ob++ = *ib++; continue; } /* * Issue EILSEQ error if the first byte is not a * valid EUC-KR leading byte. */ if (! KICONV_KO_IS_EUCKR_BYTE(*ib)) { KICONV_SET_ERRNO_AND_BREAK(EILSEQ); } /* * Issue EINVAL error if input buffer has an incomplete * character at the end of the buffer. */ if (ibtail - ib < 2) { KICONV_SET_ERRNO_AND_BREAK(EINVAL); } /* * Issue EILSEQ error if the remaining byte is not * a valid EUC-KR byte. */ if (! KICONV_KO_IS_EUCKR_BYTE(*(ib + 1))) { KICONV_SET_ERRNO_AND_BREAK(EILSEQ); } euckr_val = (uint32_t)(*ib) << 8 | *(ib + 1); sz = ko_to_utf8(euckr_val, ob, obtail, &ret_val, kiconv_euckr_utf8, KICONV_EUCKR_UTF8_MAX); if (sz < 0) { KICONV_SET_ERRNO_AND_BREAK(E2BIG); } ib += 2; ob += sz; } *inbuf = (char *)ib; *inbufleft = ibtail - ib; *outbuf = (char *)ob; *outbufleft = obtail - ob; return (ret_val); } /* * String based encoding convertor from EUC-KR to UTF-8. */ static size_t kiconvstr_fr_euckr(char *inarray, size_t *inlen, char *outarray, size_t *outlen, int flag, int *errno) { uchar_t *ib; uchar_t *ob; uchar_t *ibtail; uchar_t *obtail; uchar_t *oldib; size_t ret_val; int8_t sz; uint32_t euckr_val; boolean_t do_not_ignore_null; ret_val = 0; ib = (uchar_t *)inarray; ob = (uchar_t *)outarray; ibtail = ib + *inlen; obtail = ob + *outlen; do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0); while (ib < ibtail) { if (*ib == '\0' && do_not_ignore_null) break; if (KICONV_IS_ASCII(*ib)) { if (ob >= obtail) { KICONV_SET_ERRNO_AND_BREAK(E2BIG); } *ob++ = *ib++; continue; } oldib = ib; if (! KICONV_KO_IS_EUCKR_BYTE(*ib)) { KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ); } if (ibtail - ib < 2) { KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL); } if (! KICONV_KO_IS_EUCKR_BYTE(*(ib + 1))) { KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ); } euckr_val = *ib++; euckr_val = (euckr_val << 8) | *ib++; sz = ko_to_utf8(euckr_val, ob, obtail, &ret_val, kiconv_euckr_utf8, KICONV_EUCKR_UTF8_MAX); if (sz < 0) { ib = oldib; KICONV_SET_ERRNO_AND_BREAK(E2BIG); } ob += sz; continue; REPLACE_INVALID: if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) { ib = oldib; KICONV_SET_ERRNO_AND_BREAK(E2BIG); } *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1; *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2; *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3; ret_val++; } *inlen = ibtail - ib; *outlen = obtail - ob; return (ret_val); } /* * Encoding convertor from Unified Hangul Code to UTF-8. */ static size_t kiconv_fr_uhc(void *kcd, char **inbuf, size_t *inbufleft, char **outbuf, size_t *outbufleft, int *errno) { uchar_t *ib; uchar_t *ob; uchar_t *ibtail; uchar_t *obtail; size_t ret_val; int8_t sz; uint32_t uhc_val; /* Check on the kiconv code conversion descriptor. */ if (kcd == NULL || kcd == (void *)-1) { *errno = EBADF; return ((size_t)-1); } /* If this is a state reset request, process and return. */ if (inbuf == NULL || *inbuf == NULL) { return (0); } ret_val = 0; ib = (uchar_t *)*inbuf; ob = (uchar_t *)*outbuf; ibtail = ib + *inbufleft; obtail = ob + *outbufleft; while (ib < ibtail) { if (KICONV_IS_ASCII(*ib)) { if (ob >= obtail) { KICONV_SET_ERRNO_AND_BREAK(E2BIG); } *ob++ = *ib++; continue; } /* * Issue EILSEQ error if the first byte is not a * valid UHC leading byte. */ if (! KICONV_KO_IS_UHC_1st_BYTE(*ib)) { KICONV_SET_ERRNO_AND_BREAK(EILSEQ); } /* * Issue EINVAL error if input buffer has an incomplete * character at the end of the buffer. */ if (ibtail - ib < 2) { KICONV_SET_ERRNO_AND_BREAK(EINVAL); } /* * Issue EILSEQ error if the remaining byte is not * a valid UHC byte. */ if (! KICONV_KO_IS_UHC_2nd_BYTE(*(ib + 1))) { KICONV_SET_ERRNO_AND_BREAK(EILSEQ); } uhc_val = (uint32_t)(*ib) << 8 | *(ib + 1); sz = ko_to_utf8(uhc_val, ob, obtail, &ret_val, kiconv_uhc_utf8, KICONV_UHC_UTF8_MAX); if (sz < 0) { KICONV_SET_ERRNO_AND_BREAK(E2BIG); } ib += 2; ob += sz; } *inbuf = (char *)ib; *inbufleft = ibtail - ib; *outbuf = (char *)ob; *outbufleft = obtail - ob; return (ret_val); } /* * String based encoding convertor from Unified Hangul Code to UTF-8. */ static size_t kiconvstr_fr_uhc(char *inarray, size_t *inlen, char *outarray, size_t *outlen, int flag, int *errno) { uchar_t *ib; uchar_t *ob; uchar_t *ibtail; uchar_t *obtail; uchar_t *oldib; size_t ret_val; int8_t sz; uint32_t uhc_val; boolean_t do_not_ignore_null; ret_val = 0; ib = (uchar_t *)inarray; ob = (uchar_t *)outarray; ibtail = ib + *inlen; obtail = ob + *outlen; do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0); while (ib < ibtail) { if (*ib == '\0' && do_not_ignore_null) break; if (KICONV_IS_ASCII(*ib)) { if (ob >= obtail) { KICONV_SET_ERRNO_AND_BREAK(E2BIG); } *ob++ = *ib++; continue; } oldib = ib; if (! KICONV_KO_IS_UHC_1st_BYTE(*ib)) { KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ); } if (ibtail - ib < 2) { KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL); } if (! KICONV_KO_IS_UHC_2nd_BYTE(*(ib + 1))) { KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ); } uhc_val = *ib++; uhc_val = (uhc_val << 8) | *ib++; sz = ko_to_utf8(uhc_val, ob, obtail, &ret_val, kiconv_uhc_utf8, KICONV_UHC_UTF8_MAX); if (sz < 0) { ib = oldib; KICONV_SET_ERRNO_AND_BREAK(E2BIG); } ob += sz; continue; REPLACE_INVALID: if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) { ib = oldib; KICONV_SET_ERRNO_AND_BREAK(E2BIG); } *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1; *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2; *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3; ret_val++; } *inlen = ibtail - ib; *outlen = obtail - ob; return (ret_val); } /* * Encoding convertor from UTF-8 to EUC-KR. */ static size_t kiconv_to_euckr(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, int *errno) { return (kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf, outbytesleft, errno, utf8_to_euckr)); } /* * Encoding convertor from UTF-8 to Unified Hangul Code. */ static size_t kiconv_to_uhc(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, int *errno) { return (kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf, outbytesleft, errno, utf8_to_uhc)); } /* * String based encoding convertor from UTF-8 to EUC-KR. */ static size_t kiconvstr_to_euckr(char *inarray, size_t *inlen, char *outarray, size_t *outlen, int flag, int *errno) { return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen, (uchar_t *)outarray, outlen, flag, errno, utf8_to_euckr); } /* * String based encoding convertor from UTF-8 to Unified Hangul Code. */ static size_t kiconvstr_to_uhc(char *inarray, size_t *inlen, char *outarray, size_t *outlen, int flag, int *errno) { return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen, (uchar_t *)outarray, outlen, flag, errno, utf8_to_uhc); } /* * Convert an UTF-8 character to a character of ko encodings * (EUC-KR or UHC). */ static int8_t utf8_to_ko(uint32_t utf8, uchar_t *ob, uchar_t *obtail, size_t *ret_val, kiconv_table_t *table, size_t nitems) { size_t index; size_t kocode; int8_t kolen; if (KICONV_KO_IS_UDC_IN_UTF8(utf8)) { /* User Definable Area handing. */ kocode = (((utf8 & 0xF0000) >> 4) | ((utf8 & 0x3F00) >> 2) | (utf8 & 0x3F)) - KICONV_KO_UDA_UCS4_START; if (kocode < KICONV_KO_UDA_RANGE) { kocode = (KICONV_KO_UDA_EUC_SEG1 << 8) | (kocode + KICONV_KO_UDA_OFFSET_START); } else { /* 0x43 = 0xA1 - 0x5E */ kocode = (KICONV_KO_UDA_EUC_SEG2 << 8) | (kocode + 0x43); } index = 1; } else { index = kiconv_binsearch(utf8, table, nitems); kocode = table[index].value; } kolen = (kocode <= 0xFF) ? 1 : 2; if (obtail - ob < kolen) { *ret_val = (size_t)-1; return (-1); } if (index == 0) (*ret_val)++; if (kolen > 1) *ob++ = (uchar_t)(kocode >> 8); *ob = (uchar_t)(kocode & 0xFF); return (kolen); } /* * Convert an UTF-8 character to Unified Hangual Code. */ /* ARGSUSED */ static int8_t utf8_to_uhc(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val) { return (utf8_to_ko(utf8, ob, obtail, ret_val, kiconv_utf8_uhc, KICONV_UTF8_UHC_MAX)); } /* * Convert an UTF-8 character to EUC-KR. */ /* ARGSUSED */ static int8_t utf8_to_euckr(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val) { return (utf8_to_ko(utf8, ob, obtail, ret_val, kiconv_utf8_euckr, KICONV_UTF8_EUCKR_MAX)); } /* * Convert a single ko encoding (EUC-KR or UHC) character to UTF-8. */ static int8_t ko_to_utf8(uint32_t ko_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val, kiconv_table_array_t *table, size_t nitems) { size_t index; int8_t sz; uchar_t udc[3]; uchar_t *u8; if (KICONV_KO_IS_UDC_IN_EUC(ko_val)) { /* UDA(User Definable Area) handling. */ uint32_t u32; u32 = (ko_val & 0xFF) + (((ko_val & 0xFF00) == 0xC900) ? KICONV_KO_UDA_OFFSET_1 : KICONV_KO_UDA_OFFSET_2); udc[0] = 0xEF; udc[1] = (uchar_t)(0x80 | (u32 & 0x00000FC0) >> 6); udc[2] = (uchar_t)(0x80 | (u32 & 0x0000003F)); u8 = udc; index = 1; } else { index = kiconv_binsearch(ko_val, table, nitems); u8 = table[index].u8; } sz = u8_number_of_bytes[u8[0]]; if (obtail - ob < sz) { *ret_val = (size_t)-1; return (-1); } if (index == 0) (*ret_val)++; /* Non-identical conversion */ for (index = 0; index < sz; index++) *ob++ = u8[index]; return (sz); } static kiconv_ops_t kiconv_ko_ops_tbl[] = { { "euc-kr", "utf-8", kiconv_open_to_cck, kiconv_to_euckr, kiconv_close_to_cck, kiconvstr_to_euckr }, { "utf-8", "euc-kr", open_fr_euckr, kiconv_fr_euckr, close_fr_ko, kiconvstr_fr_euckr }, { "unifiedhangul", "utf-8", kiconv_open_to_cck, kiconv_to_uhc, kiconv_close_to_cck, kiconvstr_to_uhc }, { "utf-8", "unifiedhangul", open_fr_uhc, kiconv_fr_uhc, close_fr_ko, kiconvstr_fr_uhc } }; static kiconv_module_info_t kiconv_ko_info = { "kiconv_ko", /* module name */ sizeof (kiconv_ko_ops_tbl) / sizeof (kiconv_ko_ops_tbl[0]), kiconv_ko_ops_tbl, 0, NULL, NULL, 0 }; static struct modlkiconv modlkiconv_ko = { &mod_kiconvops, "kiconv korean module 1.0", &kiconv_ko_info }; static struct modlinkage modlinkage = { MODREV_1, (void *)&modlkiconv_ko, NULL }; int _init(void) { int err; err = mod_install(&modlinkage); if (err) cmn_err(CE_WARN, "kiconv_ko: failed to load kernel module"); return (err); } int _fini(void) { int err; /* * If this module is being used, then, we cannot remove the module. * The following checking will catch pretty much all usual cases. * * Any remaining will be catached by the kiconv_unregister_module() * during mod_remove() at below. */ if (kiconv_module_ref_count(KICONV_MODULE_ID_KO)) return (EBUSY); err = mod_remove(&modlinkage); if (err) cmn_err(CE_WARN, "kiconv_ko: failed to remove kernel module"); return (err); } int _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); }