1*15d9d0b5Syy /*
2*15d9d0b5Syy  * CDDL HEADER START
3*15d9d0b5Syy  *
4*15d9d0b5Syy  * The contents of this file are subject to the terms of the
5*15d9d0b5Syy  * Common Development and Distribution License (the "License").
6*15d9d0b5Syy  * You may not use this file except in compliance with the License.
7*15d9d0b5Syy  *
8*15d9d0b5Syy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*15d9d0b5Syy  * or http://www.opensolaris.org/os/licensing.
10*15d9d0b5Syy  * See the License for the specific language governing permissions
11*15d9d0b5Syy  * and limitations under the License.
12*15d9d0b5Syy  *
13*15d9d0b5Syy  * When distributing Covered Code, include this CDDL HEADER in each
14*15d9d0b5Syy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*15d9d0b5Syy  * If applicable, add the following below this CDDL HEADER, with the
16*15d9d0b5Syy  * fields enclosed by brackets "[]" replaced with your own identifying
17*15d9d0b5Syy  * information: Portions Copyright [yyyy] [name of copyright owner]
18*15d9d0b5Syy  *
19*15d9d0b5Syy  * CDDL HEADER END
20*15d9d0b5Syy  */
21*15d9d0b5Syy /*
22*15d9d0b5Syy  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23*15d9d0b5Syy  * Use is subject to license terms.
24*15d9d0b5Syy  */
25*15d9d0b5Syy 
26*15d9d0b5Syy #ifndef _SYS_KICONV_CCK_COMMON_H
27*15d9d0b5Syy #define	_SYS_KICONV_CCK_COMMON_H
28*15d9d0b5Syy 
29*15d9d0b5Syy #ifdef __cplusplus
30*15d9d0b5Syy extern "C" {
31*15d9d0b5Syy #endif
32*15d9d0b5Syy 
33*15d9d0b5Syy #ifdef	_KERNEL
34*15d9d0b5Syy 
35*15d9d0b5Syy /* The start value of leading byte of EUC encoding. */
36*15d9d0b5Syy #define	KICONV_EUC_START		(0xA1)
37*15d9d0b5Syy 
38*15d9d0b5Syy /* Valid EUC range or not. */
39*15d9d0b5Syy #define	KICONV_IS_VALID_EUC_BYTE(v)	((v) >= 0xA1 &&	(v) <= 0xFE)
40*15d9d0b5Syy 
41*15d9d0b5Syy /* Is ASCII character or not: 0x00 - 0x7F. */
42*15d9d0b5Syy #define	KICONV_IS_ASCII(c)		(((uchar_t)(c)) <= 0x7F)
43*15d9d0b5Syy 
44*15d9d0b5Syy /* UTF-8 replacement character for non-identicals and its length. */
45*15d9d0b5Syy #define	KICONV_UTF8_REPLACEMENT_CHAR1		(0xEF)
46*15d9d0b5Syy #define	KICONV_UTF8_REPLACEMENT_CHAR2		(0xBF)
47*15d9d0b5Syy #define	KICONV_UTF8_REPLACEMENT_CHAR3		(0xBD)
48*15d9d0b5Syy #define	KICONV_UTF8_REPLACEMENT_CHAR		(0xefbfbd)
49*15d9d0b5Syy #define	KICONV_UTF8_REPLACEMENT_CHAR_LEN	(3)
50*15d9d0b5Syy 
51*15d9d0b5Syy /*
52*15d9d0b5Syy  * Whether the 2nd byte of 3 or 4 bytes UTF-8 character is invalid or not.
53*15d9d0b5Syy  */
54*15d9d0b5Syy #define	KICONV_IS_INVALID_UTF8_SECOND_BYTE(second, first)		\
55*15d9d0b5Syy 	    ((second) < u8_valid_min_2nd_byte[(first)] ||		\
56*15d9d0b5Syy 	    (second) > u8_valid_max_2nd_byte[(first)])
57*15d9d0b5Syy 
58*15d9d0b5Syy /*
59*15d9d0b5Syy  * If we haven't checked on the UTF-8 signature BOM character in
60*15d9d0b5Syy  * the beginning of the conversion data stream, we check it and if
61*15d9d0b5Syy  * find one, we skip it since we have no use for it.
62*15d9d0b5Syy  */
63*15d9d0b5Syy #define	KICONV_CHECK_UTF8_BOM(ib, ibtail)				\
64*15d9d0b5Syy 	if (((kiconv_state_t)kcd)->bom_processed == 0 &&		\
65*15d9d0b5Syy 		((ibtail) - (ib)) >= 3 && *(ib) == 0xef &&		\
66*15d9d0b5Syy 		*((ib) + 1) == 0xbb &&	*((ib) + 2) == 0xbf) {		\
67*15d9d0b5Syy 		(ib) += 3;						\
68*15d9d0b5Syy 	}								\
69*15d9d0b5Syy 	((kiconv_state_t)kcd)->bom_processed = 1
70*15d9d0b5Syy 
71*15d9d0b5Syy /*
72*15d9d0b5Syy  * Check BOM of UTF-8 without state information.
73*15d9d0b5Syy  */
74*15d9d0b5Syy #define	KICONV_CHECK_UTF8_BOM_WITHOUT_STATE(ib, ibtail)			\
75*15d9d0b5Syy 	if (((ibtail) - (ib)) >= 3 && *(ib) == 0xef &&			\
76*15d9d0b5Syy 		*((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) {		\
77*15d9d0b5Syy 		(ib) += 3;						\
78*15d9d0b5Syy 	}
79*15d9d0b5Syy 
80*15d9d0b5Syy /*
81*15d9d0b5Syy  * Set errno and break.
82*15d9d0b5Syy  */
83*15d9d0b5Syy #define	KICONV_SET_ERRNO_AND_BREAK(err)					\
84*15d9d0b5Syy 	*errno = (err);							\
85*15d9d0b5Syy 	ret_val = (size_t)-1;						\
86*15d9d0b5Syy 	break
87*15d9d0b5Syy 
88*15d9d0b5Syy /*
89*15d9d0b5Syy  * Handling flag, advance input buffer, set errno and break.
90*15d9d0b5Syy  */
91*15d9d0b5Syy #define	KICONV_SET_ERRNO_WITH_FLAG(advance, err)			\
92*15d9d0b5Syy 	if (flag & KICONV_REPLACE_INVALID) {				\
93*15d9d0b5Syy 		ib += (advance);					\
94*15d9d0b5Syy 		goto REPLACE_INVALID;					\
95*15d9d0b5Syy 	}								\
96*15d9d0b5Syy 	KICONV_SET_ERRNO_AND_BREAK((err))
97*15d9d0b5Syy 
98*15d9d0b5Syy /* Conversion table for UTF-8 -> CCK encoding. */
99*15d9d0b5Syy typedef struct {
100*15d9d0b5Syy 	uint32_t key;
101*15d9d0b5Syy 	uint32_t value;
102*15d9d0b5Syy } kiconv_table_t;
103*15d9d0b5Syy 
104*15d9d0b5Syy /* Conversion table for CCK encoding -> utf8. */
105*15d9d0b5Syy typedef struct {
106*15d9d0b5Syy 	uint32_t key;
107*15d9d0b5Syy 	uchar_t u8[4];
108*15d9d0b5Syy } kiconv_table_array_t;
109*15d9d0b5Syy 
110*15d9d0b5Syy /*
111*15d9d0b5Syy  * Function prototype for UTF-8 -> GB18030/BIG5/EUC-TW/UHC...
112*15d9d0b5Syy  * Currently parameter ib/ibtail are used by BIG5HKSCS only.
113*15d9d0b5Syy  */
114*15d9d0b5Syy typedef int8_t (*kiconv_utf8tocck_t)(uint32_t utf8, uchar_t **ib,
115*15d9d0b5Syy 	uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val);
116*15d9d0b5Syy 
117*15d9d0b5Syy /* Common open and close function for UTF-8 to CCK conversion. */
118*15d9d0b5Syy void * 	kiconv_open_to_cck(void);
119*15d9d0b5Syy int    	kiconv_close_to_cck(void *);
120*15d9d0b5Syy 
121*15d9d0b5Syy /* Binary search funciton. */
122*15d9d0b5Syy size_t	kiconv_binsearch(uint32_t key, void *tbl, size_t nitems);
123*15d9d0b5Syy 
124*15d9d0b5Syy /* Wrapper for conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... */
125*15d9d0b5Syy size_t 	kiconv_utf8_to_cck(void *kcd, char **inbuf, size_t *inbytesleft,
126*15d9d0b5Syy 	char **outbuf, size_t *outbytesleft, int *errno,
127*15d9d0b5Syy 	kiconv_utf8tocck_t ptr_utf8tocck);
128*15d9d0b5Syy 
129*15d9d0b5Syy /*
130*15d9d0b5Syy  * Wrapper for string based conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC...
131*15d9d0b5Syy  */
132*15d9d0b5Syy size_t 	kiconvstr_utf8_to_cck(uchar_t *inarray, size_t *inlen,
133*15d9d0b5Syy 	uchar_t *outarray, size_t *outlen, int flag, int *errno,
134*15d9d0b5Syy 	kiconv_utf8tocck_t ptr_utf8tocck);
135*15d9d0b5Syy 
136*15d9d0b5Syy /*
137*15d9d0b5Syy  * The following tables are coming from u8_textprep.c. We use them to
138*15d9d0b5Syy  * check on validity of UTF-8 characters and their bytes.
139*15d9d0b5Syy  */
140*15d9d0b5Syy extern const int8_t u8_number_of_bytes[];
141*15d9d0b5Syy extern const uint8_t u8_valid_min_2nd_byte[];
142*15d9d0b5Syy extern const uint8_t u8_valid_max_2nd_byte[];
143*15d9d0b5Syy 
144*15d9d0b5Syy #endif	/* _KERNEL */
145*15d9d0b5Syy 
146*15d9d0b5Syy #ifdef __cplusplus
147*15d9d0b5Syy }
148*15d9d0b5Syy #endif
149*15d9d0b5Syy 
150*15d9d0b5Syy #endif	/* _SYS_KICONV_CCK_COMMON_H */
151