1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008, by Sun Microsystems, Inc.
23  * All rights reserved.
24  */
25 
26 #include <stdio.h>
27 #include <errno.h>
28 #include <stdlib.h>
29 #include <sys/types.h>
30 #define __NEED_TCVN_2_UNI__
31 #include <unicode_tcvn.h>	/* Unicode to tcvn mapping table */
32 #include <vi_combine.h>
33 #include "common_defs.h"
34 
35 
36 typedef struct _icv_state {
37     int	_errno;		/* internal errno */
38     unsigned short last;
39 } _iconv_st;
40 
41 
42 static int binsearch(unsigned long x, Combine_map v[], int n);
43 
44 /*
45  * Open; called from iconv_open()
46  */
47 void *
_icv_open()48 _icv_open()
49 {
50     _iconv_st *st;
51 
52     if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
53         errno = ENOMEM;
54 	return ((void *) -1);
55     }
56 
57     st->_errno = 0;
58     return ((void *) st);
59 }
60 
61 
62 /*
63  * Close; called from iconv_close()
64  */
65 void
_icv_close(_iconv_st * st)66 _icv_close(_iconv_st *st)
67 {
68     if (!st)
69         errno = EBADF;
70     else
71         free(st);
72 }
73 
74 
75 /*
76  * Actual conversion; called from iconv()
77  */
78 size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)79 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
80 				char **outbuf, size_t *outbytesleft)
81 {
82     int             unidx = -1;
83 #ifdef DEBUG
84     fprintf(stderr, "==========     iconv(): TCVN5712 -->UCS-2   ==========\n");
85 #endif
86     if (st == NULL) {
87         errno = EBADF;
88         return ((size_t) -1);
89     }
90 
91     if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
92         st->_errno = 0;
93         return ((size_t) 0);
94     }
95 
96     st->_errno = 0;     /* Reset internal errno */
97     errno = 0;          /* Reset external errno */
98 
99     /* Convert tcvn encoding to UCS-2 */
100     while (*inbytesleft > 0 && *outbytesleft > 0) {
101         unsigned long uni = 0;
102 
103         tcvn_2_uni((unsigned char*)*inbuf, &uni);
104         if (st->last != 0) {
105             if (ISCOMB_UNI(uni)) {
106                 /*
107                  * Composed characters with combine character
108                  */
109                 unsigned int k = 0;
110                 switch (uni) {
111                     case 0x0300: k = 0; break;
112                     case 0x0301: k = 1; break;
113                     case 0x0303: k = 2; break;
114                     case 0x0309: k = 3; break;
115                     case 0x0323: k = 4; break;
116                     default:
117                         break;
118                 }
119                 unidx =  binsearch(st->last, vi_comb_data, VOWEL_NUM);
120                 if (unidx >= 0) {
121                     uni = vi_comb_data[unidx].composed[k];
122                 } else {
123                     errno = EBADF;
124                 }
125                 st->last = 0;
126 
127             } else {
128                 if (st->last < 0x80) {
129                     *(*outbuf)++ = (char)st->last;
130                     (*outbytesleft) -= 1;
131                 } else if (st->last >= 0x0080 && st->last <= 0x07ff) {
132                     if (*outbytesleft < 2) {
133                         errno = E2BIG;
134                         return((size_t)-1);
135                     }
136                     *(*outbuf)++ = (char)((st->last >> 6) & 0x1f) | 0xc0;
137                     *(*outbuf)++ = (char)(st->last & 0x3f) | 0x80;
138                     (*outbytesleft) -= 2;
139                 } else if (st->last >= 0x0800) {
140                     if (*outbytesleft < 3) {
141                         errno = E2BIG;
142                         return((size_t)-1);
143                     }
144                     *(*outbuf)++ = (char)((st->last >> 12) & 0xf) | 0xe0;
145                     *(*outbuf)++ = (char)((st->last >>6) & 0x3f) | 0x80;
146                     *(*outbuf)++ = (char)(st->last & 0x3f) | 0x80;
147                     (*outbytesleft) -= 3;
148                 }
149             }
150             st->last = 0;
151         } else {
152             if (uni >= 0x0041 && uni <= 0x01b0
153                 && ((tcvn_comp_bases_mask[(uni-0x0040) >> 5] >> (uni & 0x1f)) & 1)) {
154                 /*
155                  * uni is vowel, it's a possible match with combine character.
156                  * Buffer it.
157                  * */
158                 st->last = uni;
159                 (*inbuf)++;
160                 (*inbytesleft)--;
161                 continue;
162             }
163         }
164 
165         if (uni < 0x80) {
166             *(*outbuf)++ = (char)uni;
167             (*outbytesleft) -= 1;
168         } else if (uni >= 0x0080 && uni <= 0x07ff) {
169             if (*outbytesleft < 2) {
170                 errno = E2BIG;
171                 return((size_t)-1);
172             }
173             *(*outbuf)++ = (char)((uni >> 6) & 0x1f) | 0xc0;
174             *(*outbuf)++ = (char)(uni & 0x3f) | 0x80;
175             (*outbytesleft) -= 2;
176         } else if (uni >= 0x0800 && uni <= 0xffff) {
177             if (*outbytesleft < 3) {
178                 errno = E2BIG;
179                 return((size_t)-1);
180             }
181             *(*outbuf)++ = (char)((uni >> 12) & 0xf) | 0xe0;
182             *(*outbuf)++ = (char)((uni >>6) & 0x3f) | 0x80;
183             *(*outbuf)++ = (char)(uni & 0x3f) | 0x80;
184             (*outbytesleft) -= 3;
185         }
186 
187 	(*inbuf)++;
188         (*inbytesleft)--;
189     }
190 
191     if ( *inbytesleft > 0 && *outbytesleft <= 0 ) {
192         errno = E2BIG;
193         st->last = 0;
194         return ((size_t)-1);
195     }
196 
197     if (st->last !=0 ) {
198         if (st->last < 0x80) {
199             *(*outbuf)++ = (char)st->last;
200             (*outbytesleft) -= 1;
201         } else if (st->last >= 0x0080 && st->last <= 0x07ff) {
202             if (*outbytesleft < 2 ) {
203                 errno = E2BIG;
204                 return((size_t)-1);
205             }
206             *(*outbuf)++ = (char)((st->last >> 6) & 0x1f) | 0xc0;
207             *(*outbuf)++ = (char)(st->last & 0x3f) | 0x80;
208             (*outbytesleft) -= 2;
209         } else if (st->last >= 0x0800) {
210             if (*outbytesleft < 3) {
211                 errno = E2BIG;
212                 return((size_t)-1);
213             }
214             *(*outbuf)++ = (char)((st->last >> 12) & 0xf) | 0xe0;
215             *(*outbuf)++ = (char)((st->last >>6) & 0x3f) | 0x80;
216             *(*outbuf)++ = (char)(st->last & 0x3f) | 0x80;
217             (*outbytesleft) -= 3;
218         }
219         st->last = 0;
220     }
221 
222     return ((size_t)(*inbytesleft));
223 
224 }
225 
226 /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
binsearch(unsigned long x,Combine_map v[],int n)227 static int binsearch(unsigned long x, Combine_map v[], int n)
228 {
229     int low = 0;
230     int mid = 0;
231     int high = n - 1;
232 
233     low = 0;
234     while (low <= high) {
235         mid = (low + high) / 2;
236         if (x < v[mid].base)
237             high = mid - 1;
238         else if (x > v[mid].base)
239             low = mid + 1;
240         else
241             /* found match */
242             return mid;
243     }
244 
245     /* no match */
246     return (-1);
247 }
248