1*16d86563SAlexander Pyhalov /*
2*16d86563SAlexander Pyhalov  * CDDL HEADER START
3*16d86563SAlexander Pyhalov  *
4*16d86563SAlexander Pyhalov  * The contents of this file are subject to the terms of the
5*16d86563SAlexander Pyhalov  * Common Development and Distribution License (the "License").
6*16d86563SAlexander Pyhalov  * You may not use this file except in compliance with the License.
7*16d86563SAlexander Pyhalov  *
8*16d86563SAlexander Pyhalov  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9*16d86563SAlexander Pyhalov  * or http://www.opensolaris.org/os/licensing.
10*16d86563SAlexander Pyhalov  * See the License for the specific language governing permissions
11*16d86563SAlexander Pyhalov  * and limitations under the License.
12*16d86563SAlexander Pyhalov  *
13*16d86563SAlexander Pyhalov  * When distributing Covered Code, include this CDDL HEADER in each
14*16d86563SAlexander Pyhalov  * file and include the License file at src/OPENSOLARIS.LICENSE.
15*16d86563SAlexander Pyhalov  * If applicable, add the following below this CDDL HEADER, with the
16*16d86563SAlexander Pyhalov  * fields enclosed by brackets "[]" replaced with your own identifying
17*16d86563SAlexander Pyhalov  * information: Portions Copyright [yyyy] [name of copyright owner]
18*16d86563SAlexander Pyhalov  *
19*16d86563SAlexander Pyhalov  * CDDL HEADER END
20*16d86563SAlexander Pyhalov  */
21*16d86563SAlexander Pyhalov /*
22*16d86563SAlexander Pyhalov  * Copyright (c) 1998-1999 by Sun Microsystems, Inc.
23*16d86563SAlexander Pyhalov  * All rights reserved.
24*16d86563SAlexander Pyhalov  */
25*16d86563SAlexander Pyhalov 
26*16d86563SAlexander Pyhalov #ifndef	UTF7_TO_UCS_H
27*16d86563SAlexander Pyhalov #define	UTF7_TO_UCS_H
28*16d86563SAlexander Pyhalov 
29*16d86563SAlexander Pyhalov 
30*16d86563SAlexander Pyhalov #include "common_defs.h"
31*16d86563SAlexander Pyhalov 
32*16d86563SAlexander Pyhalov 
33*16d86563SAlexander Pyhalov /* Modified Base64 alphabet to Value mapping table -- see RFC 2045. */
34*16d86563SAlexander Pyhalov static const signed char rmb64[0x100] = {
35*16d86563SAlexander Pyhalov /*00*/  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
36*16d86563SAlexander Pyhalov /*10*/  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
37*16d86563SAlexander Pyhalov /*20*/  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
38*16d86563SAlexander Pyhalov /*30*/  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
39*16d86563SAlexander Pyhalov /*40*/  -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
40*16d86563SAlexander Pyhalov /*50*/  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
41*16d86563SAlexander Pyhalov /*60*/  -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
42*16d86563SAlexander Pyhalov /*70*/  41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
43*16d86563SAlexander Pyhalov /*80*/  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
44*16d86563SAlexander Pyhalov /*90*/  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
45*16d86563SAlexander Pyhalov /*a0*/  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
46*16d86563SAlexander Pyhalov /*b0*/  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
47*16d86563SAlexander Pyhalov /*c0*/  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
48*16d86563SAlexander Pyhalov /*d0*/  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
49*16d86563SAlexander Pyhalov /*e0*/  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
50*16d86563SAlexander Pyhalov /*f0*/  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
51*16d86563SAlexander Pyhalov };
52*16d86563SAlexander Pyhalov 
53*16d86563SAlexander Pyhalov /*
54*16d86563SAlexander Pyhalov  * Any UCS-2 character sequences will yield:
55*16d86563SAlexander Pyhalov  *
56*16d86563SAlexander Pyhalov  * +-16 bits (UCS-2)-+  +-16 bits (UCS-2)-+  +-16 bits (UCS-2)-+
57*16d86563SAlexander Pyhalov  * |                 |  |                 |  |                 |
58*16d86563SAlexander Pyhalov  * xxxx xxxx xxxx xxxx  xxxx xxxx xxxx xxxx  xxxx xxxx xxxx xxxx
59*16d86563SAlexander Pyhalov  * |     ||     | |      ||     | |     ||      | |     ||     |
60*16d86563SAlexander Pyhalov  * +--0--++--1--+ +---2--++--3--+ +--4--++---5--+ +--6--++--7--+ MBase64 chars
61*16d86563SAlexander Pyhalov  *                ^                      ^
62*16d86563SAlexander Pyhalov  * initially,     |                      |
63*16d86563SAlexander Pyhalov  *                four remnant bits,     |
64*16d86563SAlexander Pyhalov  *                                       two remnant bits,
65*16d86563SAlexander Pyhalov  *
66*16d86563SAlexander Pyhalov  * and, then no remnant bit for three sequential UCS-2 characters,
67*16d86563SAlexander Pyhalov  * respectively, and repeat these three UCS-2 character sequences. For the
68*16d86563SAlexander Pyhalov  * first UCS-2 character in this sequence, there will be two MBase64
69*16d86563SAlexander Pyhalov  * characters, and for the second and the third UCS-2 characters, there will be
70*16d86563SAlexander Pyhalov  * three MBase64 characters.
71*16d86563SAlexander Pyhalov  *
72*16d86563SAlexander Pyhalov  * Following action numbers, 0, 2, 5, and, 7, are assigned to each of
73*16d86563SAlexander Pyhalov  * corresponding MBase64 characters that can either yield a UCS-2 character or
74*16d86563SAlexander Pyhalov  * indicate a character that is the starting/initial one.
75*16d86563SAlexander Pyhalov  */
76*16d86563SAlexander Pyhalov #define	ICV_U7_ACTION_START			0
77*16d86563SAlexander Pyhalov #define	ICV_U7_ACTION_HARVEST1			2
78*16d86563SAlexander Pyhalov #define	ICV_U7_ACTION_HARVEST2			5
79*16d86563SAlexander Pyhalov #define	ICV_U7_ACTION_HARVEST3			7
80*16d86563SAlexander Pyhalov 
81*16d86563SAlexander Pyhalov #define	ICV_U7_UCS4_OUTOFUTF16			0xfffefeff
82*16d86563SAlexander Pyhalov 
83*16d86563SAlexander Pyhalov #define OUTBUF_SIZE_CHECK(sz) \
84*16d86563SAlexander Pyhalov 	if ((obtail - ob) < (sz)) { \
85*16d86563SAlexander Pyhalov 		errno = E2BIG; \
86*16d86563SAlexander Pyhalov 		ret_val = (size_t)-1; \
87*16d86563SAlexander Pyhalov 		break; \
88*16d86563SAlexander Pyhalov 	}
89*16d86563SAlexander Pyhalov 
90*16d86563SAlexander Pyhalov /*
91*16d86563SAlexander Pyhalov  * For better performance and readability, we perfer to write macros like
92*16d86563SAlexander Pyhalov  * below instead of putting them in functions and then calling them.
93*16d86563SAlexander Pyhalov  */
94*16d86563SAlexander Pyhalov #define CHECK_OUTBUF_SZ_AND_WRITE_U2 \
95*16d86563SAlexander Pyhalov 	obsz = (cd->bom_written) ? ICV_FETCH_UCS_SIZE : ICV_FETCH_UCS_SIZE_TWO;\
96*16d86563SAlexander Pyhalov 	if ((obtail - ob) < obsz) { \
97*16d86563SAlexander Pyhalov 		errno = E2BIG; \
98*16d86563SAlexander Pyhalov 		ret_val = (size_t)-1; \
99*16d86563SAlexander Pyhalov 		break; \
100*16d86563SAlexander Pyhalov 	} \
101*16d86563SAlexander Pyhalov 	if (cd->little_endian) { \
102*16d86563SAlexander Pyhalov 		if (! cd->bom_written) { \
103*16d86563SAlexander Pyhalov 			*ob++ = (uchar_t)0xff; \
104*16d86563SAlexander Pyhalov 			*ob++ = (uchar_t)0xfe; \
105*16d86563SAlexander Pyhalov 			cd->bom_written = true; \
106*16d86563SAlexander Pyhalov 		} \
107*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)(u4 & 0xff); \
108*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)((u4 >> 8) & 0xff); \
109*16d86563SAlexander Pyhalov 	} else { \
110*16d86563SAlexander Pyhalov 		if (! cd->bom_written) { \
111*16d86563SAlexander Pyhalov 			*ob++ = (uchar_t)0xfe; \
112*16d86563SAlexander Pyhalov 			*ob++ = (uchar_t)0xff; \
113*16d86563SAlexander Pyhalov 			cd->bom_written = true; \
114*16d86563SAlexander Pyhalov 		} \
115*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)((u4 >> 8) & 0xff); \
116*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)(u4 & 0xff); \
117*16d86563SAlexander Pyhalov 	}
118*16d86563SAlexander Pyhalov 
119*16d86563SAlexander Pyhalov #define CHECK_OUTBUF_SZ_AND_WRITE_U4 \
120*16d86563SAlexander Pyhalov 	obsz = (cd->bom_written) ? ICV_FETCH_UCS_SIZE : ICV_FETCH_UCS_SIZE_TWO;\
121*16d86563SAlexander Pyhalov 	if ((obtail - ob) < obsz) { \
122*16d86563SAlexander Pyhalov 		errno = E2BIG; \
123*16d86563SAlexander Pyhalov 		ret_val = (size_t)-1; \
124*16d86563SAlexander Pyhalov 		break; \
125*16d86563SAlexander Pyhalov 	} \
126*16d86563SAlexander Pyhalov 	if (cd->little_endian) { \
127*16d86563SAlexander Pyhalov 		if (! cd->bom_written) { \
128*16d86563SAlexander Pyhalov 			*ob++ = (uchar_t)0xff; \
129*16d86563SAlexander Pyhalov 			*ob++ = (uchar_t)0xfe; \
130*16d86563SAlexander Pyhalov 			*(ushort_t *)ob = (ushort_t)0; \
131*16d86563SAlexander Pyhalov 			ob += 2; \
132*16d86563SAlexander Pyhalov 			cd->bom_written = true; \
133*16d86563SAlexander Pyhalov 		} \
134*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)(u4 & 0xff); \
135*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)((u4 >> 8) & 0xff); \
136*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)((u4 >> 16) & 0xff); \
137*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)((u4 >> 24) & 0xff); \
138*16d86563SAlexander Pyhalov 	} else { \
139*16d86563SAlexander Pyhalov 		if (! cd->bom_written) { \
140*16d86563SAlexander Pyhalov 			*(ushort_t *)ob = (ushort_t)0; \
141*16d86563SAlexander Pyhalov 			ob += 2; \
142*16d86563SAlexander Pyhalov 			*ob++ = (uchar_t)0xfe; \
143*16d86563SAlexander Pyhalov 			*ob++ = (uchar_t)0xff; \
144*16d86563SAlexander Pyhalov 			cd->bom_written = true; \
145*16d86563SAlexander Pyhalov 		} \
146*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)((u4 >> 24) & 0xff); \
147*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)((u4 >> 16) & 0xff); \
148*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)((u4 >> 8) & 0xff); \
149*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)(u4 & 0xff); \
150*16d86563SAlexander Pyhalov 	}
151*16d86563SAlexander Pyhalov 
152*16d86563SAlexander Pyhalov /*
153*16d86563SAlexander Pyhalov  * UTF-7's code range is basically that of UTF-16, i.e.,
154*16d86563SAlexander Pyhalov  * U+0000 0000 ~ U+0010 FFFF, it cannot go beyond the U+0010 FFFF.
155*16d86563SAlexander Pyhalov  */
156*16d86563SAlexander Pyhalov #define	CHECK_OUTBUF_SZ_AND_WRITE_U8_OR_EILSEQ \
157*16d86563SAlexander Pyhalov 	if (u4 <= 0x7f) { \
158*16d86563SAlexander Pyhalov 		OUTBUF_SIZE_CHECK(1); \
159*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)u4; \
160*16d86563SAlexander Pyhalov 	} else if (u4 <= 0x7ff) { \
161*16d86563SAlexander Pyhalov 		OUTBUF_SIZE_CHECK(2); \
162*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)(0xc0 | ((u4 & 0x07c0) >> 6)); \
163*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)(0x80 |  (u4 & 0x003f)); \
164*16d86563SAlexander Pyhalov 	} else if (u4 <= 0x00ffff) { \
165*16d86563SAlexander Pyhalov 		OUTBUF_SIZE_CHECK(3); \
166*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)(0xe0 | ((u4 & 0x0f000) >> 12)); \
167*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)(0x80 | ((u4 & 0x00fc0) >> 6)); \
168*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)(0x80 |  (u4 & 0x0003f)); \
169*16d86563SAlexander Pyhalov 	} else if (u4 <= 0x10ffff) { \
170*16d86563SAlexander Pyhalov 		OUTBUF_SIZE_CHECK(4); \
171*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)(0xf0 | ((u4 & 0x01c0000) >> 18)); \
172*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)(0x80 | ((u4 & 0x003f000) >> 12)); \
173*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)(0x80 | ((u4 & 0x0000fc0) >> 6)); \
174*16d86563SAlexander Pyhalov 		*ob++ = (uchar_t)(0x80 |  (u4 & 0x000003f)); \
175*16d86563SAlexander Pyhalov 	} else { \
176*16d86563SAlexander Pyhalov 		errno = EILSEQ; \
177*16d86563SAlexander Pyhalov 		ret_val = (size_t)-1; \
178*16d86563SAlexander Pyhalov 		break; \
179*16d86563SAlexander Pyhalov 	}
180*16d86563SAlexander Pyhalov 
181*16d86563SAlexander Pyhalov 
182*16d86563SAlexander Pyhalov #endif	/* UTF7_TO_UCS_H */
183