1*15d9d0b5Syy /* 2*15d9d0b5Syy * CDDL HEADER START 3*15d9d0b5Syy * 4*15d9d0b5Syy * The contents of this file are subject to the terms of the 5*15d9d0b5Syy * Common Development and Distribution License (the "License"). 6*15d9d0b5Syy * You may not use this file except in compliance with the License. 7*15d9d0b5Syy * 8*15d9d0b5Syy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*15d9d0b5Syy * or http://www.opensolaris.org/os/licensing. 10*15d9d0b5Syy * See the License for the specific language governing permissions 11*15d9d0b5Syy * and limitations under the License. 12*15d9d0b5Syy * 13*15d9d0b5Syy * When distributing Covered Code, include this CDDL HEADER in each 14*15d9d0b5Syy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*15d9d0b5Syy * If applicable, add the following below this CDDL HEADER, with the 16*15d9d0b5Syy * fields enclosed by brackets "[]" replaced with your own identifying 17*15d9d0b5Syy * information: Portions Copyright [yyyy] [name of copyright owner] 18*15d9d0b5Syy * 19*15d9d0b5Syy * CDDL HEADER END 20*15d9d0b5Syy */ 21*15d9d0b5Syy /* 22*15d9d0b5Syy * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23*15d9d0b5Syy * Use is subject to license terms. 24*15d9d0b5Syy */ 25*15d9d0b5Syy 26*15d9d0b5Syy #ifndef _SYS_KICONV_CCK_COMMON_H 27*15d9d0b5Syy #define _SYS_KICONV_CCK_COMMON_H 28*15d9d0b5Syy 29*15d9d0b5Syy #ifdef __cplusplus 30*15d9d0b5Syy extern "C" { 31*15d9d0b5Syy #endif 32*15d9d0b5Syy 33*15d9d0b5Syy #ifdef _KERNEL 34*15d9d0b5Syy 35*15d9d0b5Syy /* The start value of leading byte of EUC encoding. */ 36*15d9d0b5Syy #define KICONV_EUC_START (0xA1) 37*15d9d0b5Syy 38*15d9d0b5Syy /* Valid EUC range or not. */ 39*15d9d0b5Syy #define KICONV_IS_VALID_EUC_BYTE(v) ((v) >= 0xA1 && (v) <= 0xFE) 40*15d9d0b5Syy 41*15d9d0b5Syy /* Is ASCII character or not: 0x00 - 0x7F. */ 42*15d9d0b5Syy #define KICONV_IS_ASCII(c) (((uchar_t)(c)) <= 0x7F) 43*15d9d0b5Syy 44*15d9d0b5Syy /* UTF-8 replacement character for non-identicals and its length. */ 45*15d9d0b5Syy #define KICONV_UTF8_REPLACEMENT_CHAR1 (0xEF) 46*15d9d0b5Syy #define KICONV_UTF8_REPLACEMENT_CHAR2 (0xBF) 47*15d9d0b5Syy #define KICONV_UTF8_REPLACEMENT_CHAR3 (0xBD) 48*15d9d0b5Syy #define KICONV_UTF8_REPLACEMENT_CHAR (0xefbfbd) 49*15d9d0b5Syy #define KICONV_UTF8_REPLACEMENT_CHAR_LEN (3) 50*15d9d0b5Syy 51*15d9d0b5Syy /* 52*15d9d0b5Syy * Whether the 2nd byte of 3 or 4 bytes UTF-8 character is invalid or not. 53*15d9d0b5Syy */ 54*15d9d0b5Syy #define KICONV_IS_INVALID_UTF8_SECOND_BYTE(second, first) \ 55*15d9d0b5Syy ((second) < u8_valid_min_2nd_byte[(first)] || \ 56*15d9d0b5Syy (second) > u8_valid_max_2nd_byte[(first)]) 57*15d9d0b5Syy 58*15d9d0b5Syy /* 59*15d9d0b5Syy * If we haven't checked on the UTF-8 signature BOM character in 60*15d9d0b5Syy * the beginning of the conversion data stream, we check it and if 61*15d9d0b5Syy * find one, we skip it since we have no use for it. 62*15d9d0b5Syy */ 63*15d9d0b5Syy #define KICONV_CHECK_UTF8_BOM(ib, ibtail) \ 64*15d9d0b5Syy if (((kiconv_state_t)kcd)->bom_processed == 0 && \ 65*15d9d0b5Syy ((ibtail) - (ib)) >= 3 && *(ib) == 0xef && \ 66*15d9d0b5Syy *((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) { \ 67*15d9d0b5Syy (ib) += 3; \ 68*15d9d0b5Syy } \ 69*15d9d0b5Syy ((kiconv_state_t)kcd)->bom_processed = 1 70*15d9d0b5Syy 71*15d9d0b5Syy /* 72*15d9d0b5Syy * Check BOM of UTF-8 without state information. 73*15d9d0b5Syy */ 74*15d9d0b5Syy #define KICONV_CHECK_UTF8_BOM_WITHOUT_STATE(ib, ibtail) \ 75*15d9d0b5Syy if (((ibtail) - (ib)) >= 3 && *(ib) == 0xef && \ 76*15d9d0b5Syy *((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) { \ 77*15d9d0b5Syy (ib) += 3; \ 78*15d9d0b5Syy } 79*15d9d0b5Syy 80*15d9d0b5Syy /* 81*15d9d0b5Syy * Set errno and break. 82*15d9d0b5Syy */ 83*15d9d0b5Syy #define KICONV_SET_ERRNO_AND_BREAK(err) \ 84*15d9d0b5Syy *errno = (err); \ 85*15d9d0b5Syy ret_val = (size_t)-1; \ 86*15d9d0b5Syy break 87*15d9d0b5Syy 88*15d9d0b5Syy /* 89*15d9d0b5Syy * Handling flag, advance input buffer, set errno and break. 90*15d9d0b5Syy */ 91*15d9d0b5Syy #define KICONV_SET_ERRNO_WITH_FLAG(advance, err) \ 92*15d9d0b5Syy if (flag & KICONV_REPLACE_INVALID) { \ 93*15d9d0b5Syy ib += (advance); \ 94*15d9d0b5Syy goto REPLACE_INVALID; \ 95*15d9d0b5Syy } \ 96*15d9d0b5Syy KICONV_SET_ERRNO_AND_BREAK((err)) 97*15d9d0b5Syy 98*15d9d0b5Syy /* Conversion table for UTF-8 -> CCK encoding. */ 99*15d9d0b5Syy typedef struct { 100*15d9d0b5Syy uint32_t key; 101*15d9d0b5Syy uint32_t value; 102*15d9d0b5Syy } kiconv_table_t; 103*15d9d0b5Syy 104*15d9d0b5Syy /* Conversion table for CCK encoding -> utf8. */ 105*15d9d0b5Syy typedef struct { 106*15d9d0b5Syy uint32_t key; 107*15d9d0b5Syy uchar_t u8[4]; 108*15d9d0b5Syy } kiconv_table_array_t; 109*15d9d0b5Syy 110*15d9d0b5Syy /* 111*15d9d0b5Syy * Function prototype for UTF-8 -> GB18030/BIG5/EUC-TW/UHC... 112*15d9d0b5Syy * Currently parameter ib/ibtail are used by BIG5HKSCS only. 113*15d9d0b5Syy */ 114*15d9d0b5Syy typedef int8_t (*kiconv_utf8tocck_t)(uint32_t utf8, uchar_t **ib, 115*15d9d0b5Syy uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val); 116*15d9d0b5Syy 117*15d9d0b5Syy /* Common open and close function for UTF-8 to CCK conversion. */ 118*15d9d0b5Syy void * kiconv_open_to_cck(void); 119*15d9d0b5Syy int kiconv_close_to_cck(void *); 120*15d9d0b5Syy 121*15d9d0b5Syy /* Binary search funciton. */ 122*15d9d0b5Syy size_t kiconv_binsearch(uint32_t key, void *tbl, size_t nitems); 123*15d9d0b5Syy 124*15d9d0b5Syy /* Wrapper for conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... */ 125*15d9d0b5Syy size_t kiconv_utf8_to_cck(void *kcd, char **inbuf, size_t *inbytesleft, 126*15d9d0b5Syy char **outbuf, size_t *outbytesleft, int *errno, 127*15d9d0b5Syy kiconv_utf8tocck_t ptr_utf8tocck); 128*15d9d0b5Syy 129*15d9d0b5Syy /* 130*15d9d0b5Syy * Wrapper for string based conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... 131*15d9d0b5Syy */ 132*15d9d0b5Syy size_t kiconvstr_utf8_to_cck(uchar_t *inarray, size_t *inlen, 133*15d9d0b5Syy uchar_t *outarray, size_t *outlen, int flag, int *errno, 134*15d9d0b5Syy kiconv_utf8tocck_t ptr_utf8tocck); 135*15d9d0b5Syy 136*15d9d0b5Syy /* 137*15d9d0b5Syy * The following tables are coming from u8_textprep.c. We use them to 138*15d9d0b5Syy * check on validity of UTF-8 characters and their bytes. 139*15d9d0b5Syy */ 140*15d9d0b5Syy extern const int8_t u8_number_of_bytes[]; 141*15d9d0b5Syy extern const uint8_t u8_valid_min_2nd_byte[]; 142*15d9d0b5Syy extern const uint8_t u8_valid_max_2nd_byte[]; 143*15d9d0b5Syy 144*15d9d0b5Syy #endif /* _KERNEL */ 145*15d9d0b5Syy 146*15d9d0b5Syy #ifdef __cplusplus 147*15d9d0b5Syy } 148*15d9d0b5Syy #endif 149*15d9d0b5Syy 150*15d9d0b5Syy #endif /* _SYS_KICONV_CCK_COMMON_H */ 151