1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 by Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <sys/types.h>
30 #include <sys/isa_defs.h>
31 #include "utf8_to_utf_ebcdic.h"
32 
33 void *
_icv_open()34 _icv_open()
35 {
36 	return((void *)MAGIC_NUMBER);
37 }
38 
39 
40 void
_icv_close(int * cd)41 _icv_close(int *cd)
42 {
43 	if (! cd || cd != (int *)MAGIC_NUMBER)
44 		errno = EBADF;
45 }
46 
47 
48 #define	OUTBUF_SIZE_CHECK(sz) \
49 	if ((obtail - ob) < (sz)) { \
50 		ib = ib_org; \
51 		errno = E2BIG; \
52 		ret_val = (size_t)-1; \
53 		break; \
54 	}
55 
56 #define I8_UTFEBICDIC(i8) i8_to_utf_ebcdic[(i8)]
57 
58 size_t
_icv_iconv(int * cd,char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft)59 _icv_iconv(int *cd, char **inbuf, size_t *inbufleft, char **outbuf,
60                 size_t *outbufleft)
61 {
62 	size_t ret_val = 0;
63 	uchar_t *ib;
64 	uchar_t *ob;
65 	uchar_t *ibtail;
66 	uchar_t *obtail;
67 
68 	if (cd != (int *)MAGIC_NUMBER) {
69 		errno = EBADF;
70 		return((size_t)-1);
71 	}
72 
73 	if (!inbuf || !(*inbuf))
74 		return((size_t)0);
75 
76 	ib = (uchar_t *)*inbuf;
77 	ob = (uchar_t *)*outbuf;
78 	ibtail = ib + *inbufleft;
79 	obtail = ob + *outbufleft;
80 
81 	while (ib < ibtail) {
82 		uchar_t *ib_org;
83 		uint_t u4;
84 		uint_t first_byte;
85 		signed char sz;
86 
87 		sz = number_of_bytes_in_utf8_char[*ib];
88 		if (sz == ICV_TYPE_ILLEGAL_CHAR) {
89 			errno = EILSEQ;
90 			ret_val = (size_t)-1;
91 			break;
92 		}
93 
94 		if ((ibtail - ib) < sz) {
95 			errno = EINVAL;
96 			ret_val = (size_t)-1;
97 			break;
98 		}
99 
100 		ib_org = ib;
101 		first_byte = *ib;
102 		u4 = (uint_t)(*ib++ & masks_tbl[sz]);
103 		for (; sz > 1; sz--) {
104 			if (first_byte) {
105 				if (((uchar_t)*ib) <
106 					valid_min_2nd_byte[first_byte] ||
107 				    ((uchar_t)*ib) >
108 					valid_max_2nd_byte[first_byte]) {
109 					ib = ib_org;
110 					errno = EILSEQ;
111 					ret_val = (size_t)-1;
112 					goto ILLEGAL_CHAR_ERR;
113 				}
114 				first_byte = 0;
115 			} else if (((uint_t)*ib) < 0x80 ||
116 				   ((uint_t)*ib) > 0xbf) {
117 				ib = ib_org;
118 				errno = EILSEQ;
119 				ret_val = (size_t)-1;
120 				goto ILLEGAL_CHAR_ERR;
121 			}
122 			u4 = (u4 << ICV_UTF8_BIT_SHIFT) |
123 				(((uint_t)*ib) & ICV_UTF8_BIT_MASK);
124 			ib++;
125 		}
126 
127 		/* Check against known non-characters. */
128 		if ((u4 & ICV_UTF32_NONCHAR_mask) == ICV_UTF32_NONCHAR_fffe ||
129 		    (u4 & ICV_UTF32_NONCHAR_mask) == ICV_UTF32_NONCHAR_ffff ||
130 		    u4 > ICV_UTF32_LAST_VALID_CHAR ||
131 		    (u4 >= ICV_UTF32_SURROGATE_START_d800 &&
132 		    u4 <= ICV_UTF32_SURROGATE_END_dfff) ||
133 		    (u4 >= ICV_UTF32_ARABIC_NONCHAR_START_fdd0 &&
134 		    u4 <= ICV_UTF32_ARABIC_NONCHAR_END_fdef)) {
135 			ib = ib_org;
136 			errno = EILSEQ;
137 			ret_val = (size_t)-1;
138 			goto ILLEGAL_CHAR_ERR;
139 		}
140 
141 		if (u4 <= 0x7f) {
142 			OUTBUF_SIZE_CHECK(1);
143 			*ob++ = I8_UTFEBICDIC(u4);
144 		} else if (u4 <= 0x9f) {
145 			OUTBUF_SIZE_CHECK(1);
146 			*ob++ = I8_UTFEBICDIC(u4);
147 		} else if (u4 <= 0x3ff) {
148 			OUTBUF_SIZE_CHECK(2);
149 			*ob++ = I8_UTFEBICDIC(0xc0 | ((u4 & 0x03e0) >> 5));
150 			*ob++ = I8_UTFEBICDIC(0xa0 |  (u4 & 0x001f));
151 		} else if (u4 <= 0x3fff) {
152 			OUTBUF_SIZE_CHECK(3);
153 			*ob++ = I8_UTFEBICDIC(0xe0 | ((u4 & 0x3c00) >> 10));
154 			*ob++ = I8_UTFEBICDIC(0xa0 | ((u4 & 0x03e0) >> 5));
155 			*ob++ = I8_UTFEBICDIC(0xa0 |  (u4 & 0x001f));
156 		} else if (u4 <= 0x3ffff) {
157 			OUTBUF_SIZE_CHECK(4);
158 			*ob++ = I8_UTFEBICDIC(0xf0 | ((u4 & 0x38000) >> 15));
159 			*ob++ = I8_UTFEBICDIC(0xa0 | ((u4 & 0x07c00) >> 10));
160 			*ob++ = I8_UTFEBICDIC(0xa0 | ((u4 & 0x003e0) >> 5));
161 			*ob++ = I8_UTFEBICDIC(0xa0 |  (u4 & 0x0001f));
162 		} else if (u4 <= 0x3fffff) {
163 			OUTBUF_SIZE_CHECK(5);
164 			*ob++ = I8_UTFEBICDIC(0xf8 | ((u4 & 0x300000) >> 20));
165 			*ob++ = I8_UTFEBICDIC(0xa0 | ((u4 & 0x0f8000) >> 15));
166 			*ob++ = I8_UTFEBICDIC(0xa0 | ((u4 & 0x007c00) >> 10));
167 			*ob++ = I8_UTFEBICDIC(0xa0 | ((u4 & 0x0003e0) >> 5));
168 			*ob++ = I8_UTFEBICDIC(0xa0 |  (u4 & 0x00001f));
169 		} else if (u4 <= 0x3ffffff) {
170 			OUTBUF_SIZE_CHECK(6);
171 			*ob++ = I8_UTFEBICDIC(0xfc | ((u4 & 0x2000000) >> 25));
172 			*ob++ = I8_UTFEBICDIC(0xa0 | ((u4 & 0x1f00000) >> 20));
173 			*ob++ = I8_UTFEBICDIC(0xa0 | ((u4 & 0x00f8000) >> 15));
174 			*ob++ = I8_UTFEBICDIC(0xa0 | ((u4 & 0x0007c00) >> 10));
175 			*ob++ = I8_UTFEBICDIC(0xa0 | ((u4 & 0x00003e0) >> 5));
176 			*ob++ = I8_UTFEBICDIC(0xa0 |  (u4 & 0x000001f));
177 		} else if (u4 <= 0x7fffffff) {
178 			OUTBUF_SIZE_CHECK(7);
179 			*ob++ = I8_UTFEBICDIC(0xfe | ((u4 & 0x40000000) >> 30));
180 			*ob++ = I8_UTFEBICDIC(0xa0 | ((u4 & 0x3e000000) >> 25));
181 			*ob++ = I8_UTFEBICDIC(0xa0 | ((u4 & 0x01f00000) >> 20));
182 			*ob++ = I8_UTFEBICDIC(0xa0 | ((u4 & 0x000f8000) >> 15));
183 			*ob++ = I8_UTFEBICDIC(0xa0 | ((u4 & 0x00007c00) >> 10));
184 			*ob++ = I8_UTFEBICDIC(0xa0 | ((u4 & 0x000003e0) >> 5));
185 			*ob++ = I8_UTFEBICDIC(0xa0 |  (u4 & 0x0000001f));
186 		} else {
187 			ib = ib_org;
188 			errno = EILSEQ;
189 			ret_val = (size_t)-1;
190 			break;
191 		}
192 	}
193 
194 ILLEGAL_CHAR_ERR:
195 	*inbuf = (char *)ib;
196 	*inbufleft = ibtail - ib;
197 	*outbuf = (char *)ob;
198 	*outbufleft = obtail - ob;
199 
200 	return(ret_val);
201 }
202