1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright(c) 2001 Sun Microsystems, Inc.
23  * All rights reserved.
24  */
25 #include <stdio.h>
26 #include <ctype.h>
27 #include <errno.h>
28 #include <strings.h>
29 #include <stdlib.h>
30 #include "ea-iscii.h"
31 
32 #define MSB          0x80
33 #define REPLACE_CHAR '?'
34 #define EA_START     0x40
35 
36 #define get_vowel(a)  EAISCII_vowel_type[(a) - EA_START]
37 #define get_nukta_value(a)  EAISCII_nukta_type[(a) - EA_START]
38 #define is_first_vowel(a) ((a) == FIRST_VOWEL)
39 #define is_nukta(a) ((a) == NUKTA_VALUE)
40 
41 typedef enum { SPACE, ASCII, POSSIBLE_ISCII, ISCII } CONTEXT;
42 typedef struct _icv_state {
43     uchar   keepc;    /* if is_vowel is true, store the char following the FIRST_VOWEL */
44     CONTEXT context;
45     int     is_vowel;
46 } _iconv_st;
47 
48 static uchar
traverse_table(Entry * entry,int num,uchar ea_iscii)49 traverse_table(Entry *entry , int num, uchar ea_iscii)
50 {
51     int   i=0;
52     uchar iscii=0;
53 
54     for ( ; i < num; ++i) {
55         Entry en = entry[i];
56 
57         if ( ea_iscii < en.ea_iscii ) break;
58         if ( ea_iscii >= en.ea_iscii && ea_iscii < en.ea_iscii + en.count ) {
59             iscii = (ea_iscii - en.ea_iscii) + en.iscii;
60             break;
61         }
62     }
63 
64     return iscii;
65 }
66 
67 /*
68  * run in ISCII context.
69  * ea_iscii being 0: flush the keepc
70  * flag return 0: don't decide iscii yet, need to advance the next char in outbuf
71  */
72 static uchar
get_iscii(_iconv_st * st,uchar ea_iscii,int * flag)73 get_iscii(_iconv_st *st, uchar ea_iscii, int *flag)
74 {
75     uchar iscii = 0;
76 
77     if ( st->keepc == 0 ) {
78         if ( ea_iscii == 0 ) { *flag = 0; return 0; }
79         if ( ea_iscii < EA_START ) return 0; /* invalid iscii */
80 
81         if ( get_nukta_value(ea_iscii) || is_first_vowel(ea_iscii) ) {
82             /* do nothing except store ea_iscii into st->keepc */
83             *flag = 0;
84             st->keepc = ea_iscii;
85         } else {
86             iscii = traverse_table( eaiscii_isc_tbl,
87                        sizeof(eaiscii_isc_tbl)/sizeof(Entry), ea_iscii);
88         }
89     } else {
90        uchar vowel, nukta_value;
91 
92        if ( st->is_vowel ) {
93            /* need decide whether it is 0xAE or 0xB2 case */
94            if ( ea_iscii >= EA_START && is_nukta(ea_iscii) ) {
95                if ( st->keepc == 0x73 ) iscii = 0xAE;
96                if ( st->keepc == 0x76 ) iscii = 0xB2;
97                st->keepc = 0;
98            } else {
99                iscii = get_vowel(st->keepc);
100                st->keepc = ea_iscii;
101            }
102            st->is_vowel = 0;
103            goto end;
104        }
105 
106        if ( is_first_vowel(st->keepc) ) {
107            if ( (ea_iscii >= EA_START) && (vowel = get_vowel(ea_iscii)) ) {
108                 if ( ea_iscii == 0x73 || ea_iscii == 0x76 ) {
109                     st->keepc = ea_iscii;
110                     *flag = 0;
111                     st->is_vowel = 1;
112                 } else {
113                     st->keepc = 0;
114                     iscii = vowel;
115                 }
116            } else {
117                 iscii = traverse_table( eaiscii_isc_tbl,
118                        sizeof(eaiscii_isc_tbl)/sizeof(Entry), st->keepc);
119                 st->keepc = ea_iscii;
120            }
121        } else if ( (st->keepc >= EA_START) && (nukta_value = get_nukta_value(st->keepc))) {
122            if ( ea_iscii >= EA_START && is_nukta(ea_iscii) ) {
123                 st->keepc = 0;
124                 iscii = nukta_value;
125            } else {
126                 iscii = traverse_table( eaiscii_isc_tbl,
127                        sizeof(eaiscii_isc_tbl)/sizeof(Entry), st->keepc);
128                 st->keepc = ea_iscii;
129            }
130        } else {
131            iscii = traverse_table( eaiscii_isc_tbl,
132                   sizeof(eaiscii_isc_tbl)/sizeof(Entry), st->keepc);
133            st->keepc = ea_iscii;
134        }
135     }
136 
137 end:
138     return iscii;
139 }
140 
141 void *
_icv_open()142 _icv_open()
143 {
144     _iconv_st *st;
145 
146     if ((st = (_iconv_st*)malloc(sizeof(_iconv_st))) == NULL) {
147         errno = ENOMEM;
148         return ((void*)-1);
149     }
150 
151     bzero(st, sizeof(_iconv_st));
152 
153     return ((void*)st);
154 }
155 
156 /*
157  * Close; called from iconv_close()
158  */
159 void
_icv_close(_iconv_st * st)160 _icv_close(_iconv_st *st)
161 {
162     if (!st)
163         errno = EBADF;
164     else
165         free(st);
166 }
167 
168 size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)169 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
170        char **outbuf, size_t *outbytesleft)
171 {
172     if (st == NULL) {
173         errno = EBADF;
174         return ((size_t) -1);
175     }
176 
177     if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
178         return ((size_t)0);
179     }
180 
181     /* a state machine for interpreting ISCII code */
182     while (*inbytesleft > 0 && *outbytesleft > 0) {
183         uchar c = (uchar)**inbuf;
184 
185         if ( c & MSB ) { errno = EILSEQ; return (size_t)-1; }
186 
187         switch (st->context) {
188         case SPACE:
189             if ( c == LEADING_BYTE ) st->context = POSSIBLE_ISCII;
190             else {
191                 if ( !isspace(c) ) st->context = ASCII;
192                 **outbuf = c;
193                 (*outbuf)++;
194                 (*outbytesleft)--;
195             }
196             break;
197         case ASCII:
198             if ( isspace(c) ) st->context = SPACE;
199             **outbuf = c;
200             (*outbuf)++;
201             (*outbytesleft)--;
202             break;
203         case POSSIBLE_ISCII:
204             /* it is impossible to represent with 'xx' one ASCII word that starts with 'x' */
205             if ( !isspace(c) ) { st->context = ISCII; continue; } /* don't advance */
206 
207             **outbuf = LEADING_BYTE;  /* the previous 'x' */
208             (*outbuf)++;
209             (*outbytesleft)--;
210             st->context = ASCII;
211 
212             if (*outbytesleft < 1) {
213                 errno = E2BIG;
214                 return (size_t)-1;
215             }
216 
217             **outbuf = c;
218             (*outbuf)++;
219             (*outbytesleft)--;
220             st->context = SPACE;
221 
222             break;
223         case ISCII:
224             if ( isspace(c) ) {
225                 uchar iscii;
226                 int flag = 1;
227 
228                 /* flush keepc */
229                 iscii = get_iscii(st, 0, &flag);
230                 if (flag) {
231                     if ( iscii ) **outbuf = iscii;
232                     else **outbuf = REPLACE_CHAR;
233 
234                     (*outbuf)++;
235                     (*outbytesleft)--;
236                 }
237 
238                 if ( *outbytesleft < 1 ) {
239                     errno = E2BIG;
240                     return (size_t)-1;
241                 }
242 
243                 **outbuf = c;
244                 (*outbuf)++;
245                 (*outbytesleft)--;
246                 st->context = SPACE;
247             } else {
248                uchar iscii;
249                int   flag = 1;
250 
251                iscii = get_iscii(st, c, &flag);
252                if (flag) {
253                    if ( iscii ) **outbuf = iscii;
254                    else **outbuf = REPLACE_CHAR;
255 
256                    (*outbuf)++;
257                    (*outbytesleft)--;
258                }
259             }
260             break;
261         }
262 
263         (*inbuf)++;
264         (*inbytesleft)--;
265     }
266 
267     if ( *inbytesleft > 0 && *outbytesleft == 0 ) {
268          errno = E2BIG;
269          return ((size_t)-1);
270     }
271 
272     return ((size_t)(*inbytesleft));
273 }
274