1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#ifndef _SYS_KICONV_CCK_COMMON_H
27#define	_SYS_KICONV_CCK_COMMON_H
28
29#pragma ident	"%Z%%M%	%I%	%E% SMI"
30
31#ifdef __cplusplus
32extern "C" {
33#endif
34
35#ifdef	_KERNEL
36
37/* The start value of leading byte of EUC encoding. */
38#define	KICONV_EUC_START		(0xA1)
39
40/* Valid EUC range or not. */
41#define	KICONV_IS_VALID_EUC_BYTE(v)	((v) >= 0xA1 &&	(v) <= 0xFE)
42
43/* Is ASCII character or not: 0x00 - 0x7F. */
44#define	KICONV_IS_ASCII(c)		(((uchar_t)(c)) <= 0x7F)
45
46/* UTF-8 replacement character for non-identicals and its length. */
47#define	KICONV_UTF8_REPLACEMENT_CHAR1		(0xEF)
48#define	KICONV_UTF8_REPLACEMENT_CHAR2		(0xBF)
49#define	KICONV_UTF8_REPLACEMENT_CHAR3		(0xBD)
50#define	KICONV_UTF8_REPLACEMENT_CHAR		(0xefbfbd)
51#define	KICONV_UTF8_REPLACEMENT_CHAR_LEN	(3)
52
53/*
54 * Whether the 2nd byte of 3 or 4 bytes UTF-8 character is invalid or not.
55 */
56#define	KICONV_IS_INVALID_UTF8_SECOND_BYTE(second, first)		\
57	    ((second) < u8_valid_min_2nd_byte[(first)] ||		\
58	    (second) > u8_valid_max_2nd_byte[(first)])
59
60/*
61 * If we haven't checked on the UTF-8 signature BOM character in
62 * the beginning of the conversion data stream, we check it and if
63 * find one, we skip it since we have no use for it.
64 */
65#define	KICONV_CHECK_UTF8_BOM(ib, ibtail)				\
66	if (((kiconv_state_t)kcd)->bom_processed == 0 &&		\
67		((ibtail) - (ib)) >= 3 && *(ib) == 0xef &&		\
68		*((ib) + 1) == 0xbb &&	*((ib) + 2) == 0xbf) {		\
69		(ib) += 3;						\
70	}								\
71	((kiconv_state_t)kcd)->bom_processed = 1
72
73/*
74 * Check BOM of UTF-8 without state information.
75 */
76#define	KICONV_CHECK_UTF8_BOM_WITHOUT_STATE(ib, ibtail)			\
77	if (((ibtail) - (ib)) >= 3 && *(ib) == 0xef &&			\
78		*((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) {		\
79		(ib) += 3;						\
80	}
81
82/*
83 * Set errno and break.
84 */
85#define	KICONV_SET_ERRNO_AND_BREAK(err)					\
86	*errno = (err);							\
87	ret_val = (size_t)-1;						\
88	break
89
90/*
91 * Handling flag, advance input buffer, set errno and break.
92 */
93#define	KICONV_SET_ERRNO_WITH_FLAG(advance, err)			\
94	if (flag & KICONV_REPLACE_INVALID) {				\
95		ib += (advance);					\
96		goto REPLACE_INVALID;					\
97	}								\
98	KICONV_SET_ERRNO_AND_BREAK((err))
99
100/* Conversion table for UTF-8 -> CCK encoding. */
101typedef struct {
102	uint32_t key;
103	uint32_t value;
104} kiconv_table_t;
105
106/* Conversion table for CCK encoding -> utf8. */
107typedef struct {
108	uint32_t key;
109	uchar_t u8[4];
110} kiconv_table_array_t;
111
112/*
113 * Function prototype for UTF-8 -> GB18030/BIG5/EUC-TW/UHC...
114 * Currently parameter ib/ibtail are used by BIG5HKSCS only.
115 */
116typedef int8_t (*kiconv_utf8tocck_t)(uint32_t utf8, uchar_t **ib,
117	uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val);
118
119/* Common open and close function for UTF-8 to CCK conversion. */
120void * 	kiconv_open_to_cck(void);
121int    	kiconv_close_to_cck(void *);
122
123/* Binary search funciton. */
124size_t	kiconv_binsearch(uint32_t key, void *tbl, size_t nitems);
125
126/* Wrapper for conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... */
127size_t 	kiconv_utf8_to_cck(void *kcd, char **inbuf, size_t *inbytesleft,
128	char **outbuf, size_t *outbytesleft, int *errno,
129	kiconv_utf8tocck_t ptr_utf8tocck);
130
131/*
132 * Wrapper for string based conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC...
133 */
134size_t 	kiconvstr_utf8_to_cck(uchar_t *inarray, size_t *inlen,
135	uchar_t *outarray, size_t *outlen, int flag, int *errno,
136	kiconv_utf8tocck_t ptr_utf8tocck);
137
138/*
139 * The following tables are coming from u8_textprep.c. We use them to
140 * check on validity of UTF-8 characters and their bytes.
141 */
142extern const int8_t u8_number_of_bytes[];
143extern const uint8_t u8_valid_min_2nd_byte[];
144extern const uint8_t u8_valid_max_2nd_byte[];
145
146#endif	/* _KERNEL */
147
148#ifdef __cplusplus
149}
150#endif
151
152#endif	/* _SYS_KICONV_CCK_COMMON_H */
153