1*16d86563SAlexander Pyhalov /*
2*16d86563SAlexander Pyhalov  * CDDL HEADER START
3*16d86563SAlexander Pyhalov  *
4*16d86563SAlexander Pyhalov  * The contents of this file are subject to the terms of the
5*16d86563SAlexander Pyhalov  * Common Development and Distribution License (the "License").
6*16d86563SAlexander Pyhalov  * You may not use this file except in compliance with the License.
7*16d86563SAlexander Pyhalov  *
8*16d86563SAlexander Pyhalov  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9*16d86563SAlexander Pyhalov  * or http://www.opensolaris.org/os/licensing.
10*16d86563SAlexander Pyhalov  * See the License for the specific language governing permissions
11*16d86563SAlexander Pyhalov  * and limitations under the License.
12*16d86563SAlexander Pyhalov  *
13*16d86563SAlexander Pyhalov  * When distributing Covered Code, include this CDDL HEADER in each
14*16d86563SAlexander Pyhalov  * file and include the License file at src/OPENSOLARIS.LICENSE.
15*16d86563SAlexander Pyhalov  * If applicable, add the following below this CDDL HEADER, with the
16*16d86563SAlexander Pyhalov  * fields enclosed by brackets "[]" replaced with your own identifying
17*16d86563SAlexander Pyhalov  * information: Portions Copyright [yyyy] [name of copyright owner]
18*16d86563SAlexander Pyhalov  *
19*16d86563SAlexander Pyhalov  * CDDL HEADER END
20*16d86563SAlexander Pyhalov  */
21*16d86563SAlexander Pyhalov /*
22*16d86563SAlexander Pyhalov  * Copyright(c) 2001 Sun Microsystems, Inc.
23*16d86563SAlexander Pyhalov  * All rights reserved.
24*16d86563SAlexander Pyhalov  */
25*16d86563SAlexander Pyhalov #include <stdio.h>
26*16d86563SAlexander Pyhalov #include <ctype.h>
27*16d86563SAlexander Pyhalov #include <errno.h>
28*16d86563SAlexander Pyhalov #include <strings.h>
29*16d86563SAlexander Pyhalov #include <stdlib.h>
30*16d86563SAlexander Pyhalov #include "ea-iscii.h"
31*16d86563SAlexander Pyhalov 
32*16d86563SAlexander Pyhalov #define MSB          0x80
33*16d86563SAlexander Pyhalov #define REPLACE_CHAR '?'
34*16d86563SAlexander Pyhalov #define EA_START     0x40
35*16d86563SAlexander Pyhalov 
36*16d86563SAlexander Pyhalov #define get_vowel(a)  EAISCII_vowel_type[(a) - EA_START]
37*16d86563SAlexander Pyhalov #define get_nukta_value(a)  EAISCII_nukta_type[(a) - EA_START]
38*16d86563SAlexander Pyhalov #define is_first_vowel(a) ((a) == FIRST_VOWEL)
39*16d86563SAlexander Pyhalov #define is_nukta(a) ((a) == NUKTA_VALUE)
40*16d86563SAlexander Pyhalov 
41*16d86563SAlexander Pyhalov typedef enum { SPACE, ASCII, POSSIBLE_ISCII, ISCII } CONTEXT;
42*16d86563SAlexander Pyhalov typedef struct _icv_state {
43*16d86563SAlexander Pyhalov     uchar   keepc;    /* if is_vowel is true, store the char following the FIRST_VOWEL */
44*16d86563SAlexander Pyhalov     CONTEXT context;
45*16d86563SAlexander Pyhalov     int     is_vowel;
46*16d86563SAlexander Pyhalov } _iconv_st;
47*16d86563SAlexander Pyhalov 
48*16d86563SAlexander Pyhalov static uchar
traverse_table(Entry * entry,int num,uchar ea_iscii)49*16d86563SAlexander Pyhalov traverse_table(Entry *entry , int num, uchar ea_iscii)
50*16d86563SAlexander Pyhalov {
51*16d86563SAlexander Pyhalov     int   i=0;
52*16d86563SAlexander Pyhalov     uchar iscii=0;
53*16d86563SAlexander Pyhalov 
54*16d86563SAlexander Pyhalov     for ( ; i < num; ++i) {
55*16d86563SAlexander Pyhalov         Entry en = entry[i];
56*16d86563SAlexander Pyhalov 
57*16d86563SAlexander Pyhalov         if ( ea_iscii < en.ea_iscii ) break;
58*16d86563SAlexander Pyhalov         if ( ea_iscii >= en.ea_iscii && ea_iscii < en.ea_iscii + en.count ) {
59*16d86563SAlexander Pyhalov             iscii = (ea_iscii - en.ea_iscii) + en.iscii;
60*16d86563SAlexander Pyhalov             break;
61*16d86563SAlexander Pyhalov         }
62*16d86563SAlexander Pyhalov     }
63*16d86563SAlexander Pyhalov 
64*16d86563SAlexander Pyhalov     return iscii;
65*16d86563SAlexander Pyhalov }
66*16d86563SAlexander Pyhalov 
67*16d86563SAlexander Pyhalov /*
68*16d86563SAlexander Pyhalov  * run in ISCII context.
69*16d86563SAlexander Pyhalov  * ea_iscii being 0: flush the keepc
70*16d86563SAlexander Pyhalov  * flag return 0: don't decide iscii yet, need to advance the next char in outbuf
71*16d86563SAlexander Pyhalov  */
72*16d86563SAlexander Pyhalov static uchar
get_iscii(_iconv_st * st,uchar ea_iscii,int * flag)73*16d86563SAlexander Pyhalov get_iscii(_iconv_st *st, uchar ea_iscii, int *flag)
74*16d86563SAlexander Pyhalov {
75*16d86563SAlexander Pyhalov     uchar iscii = 0;
76*16d86563SAlexander Pyhalov 
77*16d86563SAlexander Pyhalov     if ( st->keepc == 0 ) {
78*16d86563SAlexander Pyhalov         if ( ea_iscii == 0 ) { *flag = 0; return 0; }
79*16d86563SAlexander Pyhalov         if ( ea_iscii < EA_START ) return 0; /* invalid iscii */
80*16d86563SAlexander Pyhalov 
81*16d86563SAlexander Pyhalov         if ( get_nukta_value(ea_iscii) || is_first_vowel(ea_iscii) ) {
82*16d86563SAlexander Pyhalov             /* do nothing except store ea_iscii into st->keepc */
83*16d86563SAlexander Pyhalov             *flag = 0;
84*16d86563SAlexander Pyhalov             st->keepc = ea_iscii;
85*16d86563SAlexander Pyhalov         } else {
86*16d86563SAlexander Pyhalov             iscii = traverse_table( eaiscii_isc_tbl,
87*16d86563SAlexander Pyhalov                        sizeof(eaiscii_isc_tbl)/sizeof(Entry), ea_iscii);
88*16d86563SAlexander Pyhalov         }
89*16d86563SAlexander Pyhalov     } else {
90*16d86563SAlexander Pyhalov        uchar vowel, nukta_value;
91*16d86563SAlexander Pyhalov 
92*16d86563SAlexander Pyhalov        if ( st->is_vowel ) {
93*16d86563SAlexander Pyhalov            /* need decide whether it is 0xAE or 0xB2 case */
94*16d86563SAlexander Pyhalov            if ( ea_iscii >= EA_START && is_nukta(ea_iscii) ) {
95*16d86563SAlexander Pyhalov                if ( st->keepc == 0x73 ) iscii = 0xAE;
96*16d86563SAlexander Pyhalov                if ( st->keepc == 0x76 ) iscii = 0xB2;
97*16d86563SAlexander Pyhalov                st->keepc = 0;
98*16d86563SAlexander Pyhalov            } else {
99*16d86563SAlexander Pyhalov                iscii = get_vowel(st->keepc);
100*16d86563SAlexander Pyhalov                st->keepc = ea_iscii;
101*16d86563SAlexander Pyhalov            }
102*16d86563SAlexander Pyhalov            st->is_vowel = 0;
103*16d86563SAlexander Pyhalov            goto end;
104*16d86563SAlexander Pyhalov        }
105*16d86563SAlexander Pyhalov 
106*16d86563SAlexander Pyhalov        if ( is_first_vowel(st->keepc) ) {
107*16d86563SAlexander Pyhalov            if ( (ea_iscii >= EA_START) && (vowel = get_vowel(ea_iscii)) ) {
108*16d86563SAlexander Pyhalov                 if ( ea_iscii == 0x73 || ea_iscii == 0x76 ) {
109*16d86563SAlexander Pyhalov                     st->keepc = ea_iscii;
110*16d86563SAlexander Pyhalov                     *flag = 0;
111*16d86563SAlexander Pyhalov                     st->is_vowel = 1;
112*16d86563SAlexander Pyhalov                 } else {
113*16d86563SAlexander Pyhalov                     st->keepc = 0;
114*16d86563SAlexander Pyhalov                     iscii = vowel;
115*16d86563SAlexander Pyhalov                 }
116*16d86563SAlexander Pyhalov            } else {
117*16d86563SAlexander Pyhalov                 iscii = traverse_table( eaiscii_isc_tbl,
118*16d86563SAlexander Pyhalov                        sizeof(eaiscii_isc_tbl)/sizeof(Entry), st->keepc);
119*16d86563SAlexander Pyhalov                 st->keepc = ea_iscii;
120*16d86563SAlexander Pyhalov            }
121*16d86563SAlexander Pyhalov        } else if ( (st->keepc >= EA_START) && (nukta_value = get_nukta_value(st->keepc))) {
122*16d86563SAlexander Pyhalov            if ( ea_iscii >= EA_START && is_nukta(ea_iscii) ) {
123*16d86563SAlexander Pyhalov                 st->keepc = 0;
124*16d86563SAlexander Pyhalov                 iscii = nukta_value;
125*16d86563SAlexander Pyhalov            } else {
126*16d86563SAlexander Pyhalov                 iscii = traverse_table( eaiscii_isc_tbl,
127*16d86563SAlexander Pyhalov                        sizeof(eaiscii_isc_tbl)/sizeof(Entry), st->keepc);
128*16d86563SAlexander Pyhalov                 st->keepc = ea_iscii;
129*16d86563SAlexander Pyhalov            }
130*16d86563SAlexander Pyhalov        } else {
131*16d86563SAlexander Pyhalov            iscii = traverse_table( eaiscii_isc_tbl,
132*16d86563SAlexander Pyhalov                   sizeof(eaiscii_isc_tbl)/sizeof(Entry), st->keepc);
133*16d86563SAlexander Pyhalov            st->keepc = ea_iscii;
134*16d86563SAlexander Pyhalov        }
135*16d86563SAlexander Pyhalov     }
136*16d86563SAlexander Pyhalov 
137*16d86563SAlexander Pyhalov end:
138*16d86563SAlexander Pyhalov     return iscii;
139*16d86563SAlexander Pyhalov }
140*16d86563SAlexander Pyhalov 
141*16d86563SAlexander Pyhalov void *
_icv_open()142*16d86563SAlexander Pyhalov _icv_open()
143*16d86563SAlexander Pyhalov {
144*16d86563SAlexander Pyhalov     _iconv_st *st;
145*16d86563SAlexander Pyhalov 
146*16d86563SAlexander Pyhalov     if ((st = (_iconv_st*)malloc(sizeof(_iconv_st))) == NULL) {
147*16d86563SAlexander Pyhalov         errno = ENOMEM;
148*16d86563SAlexander Pyhalov         return ((void*)-1);
149*16d86563SAlexander Pyhalov     }
150*16d86563SAlexander Pyhalov 
151*16d86563SAlexander Pyhalov     bzero(st, sizeof(_iconv_st));
152*16d86563SAlexander Pyhalov 
153*16d86563SAlexander Pyhalov     return ((void*)st);
154*16d86563SAlexander Pyhalov }
155*16d86563SAlexander Pyhalov 
156*16d86563SAlexander Pyhalov /*
157*16d86563SAlexander Pyhalov  * Close; called from iconv_close()
158*16d86563SAlexander Pyhalov  */
159*16d86563SAlexander Pyhalov void
_icv_close(_iconv_st * st)160*16d86563SAlexander Pyhalov _icv_close(_iconv_st *st)
161*16d86563SAlexander Pyhalov {
162*16d86563SAlexander Pyhalov     if (!st)
163*16d86563SAlexander Pyhalov         errno = EBADF;
164*16d86563SAlexander Pyhalov     else
165*16d86563SAlexander Pyhalov         free(st);
166*16d86563SAlexander Pyhalov }
167*16d86563SAlexander Pyhalov 
168*16d86563SAlexander Pyhalov size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)169*16d86563SAlexander Pyhalov _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
170*16d86563SAlexander Pyhalov        char **outbuf, size_t *outbytesleft)
171*16d86563SAlexander Pyhalov {
172*16d86563SAlexander Pyhalov     if (st == NULL) {
173*16d86563SAlexander Pyhalov         errno = EBADF;
174*16d86563SAlexander Pyhalov         return ((size_t) -1);
175*16d86563SAlexander Pyhalov     }
176*16d86563SAlexander Pyhalov 
177*16d86563SAlexander Pyhalov     if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
178*16d86563SAlexander Pyhalov         return ((size_t)0);
179*16d86563SAlexander Pyhalov     }
180*16d86563SAlexander Pyhalov 
181*16d86563SAlexander Pyhalov     /* a state machine for interpreting ISCII code */
182*16d86563SAlexander Pyhalov     while (*inbytesleft > 0 && *outbytesleft > 0) {
183*16d86563SAlexander Pyhalov         uchar c = (uchar)**inbuf;
184*16d86563SAlexander Pyhalov 
185*16d86563SAlexander Pyhalov         if ( c & MSB ) { errno = EILSEQ; return (size_t)-1; }
186*16d86563SAlexander Pyhalov 
187*16d86563SAlexander Pyhalov         switch (st->context) {
188*16d86563SAlexander Pyhalov         case SPACE:
189*16d86563SAlexander Pyhalov             if ( c == LEADING_BYTE ) st->context = POSSIBLE_ISCII;
190*16d86563SAlexander Pyhalov             else {
191*16d86563SAlexander Pyhalov                 if ( !isspace(c) ) st->context = ASCII;
192*16d86563SAlexander Pyhalov                 **outbuf = c;
193*16d86563SAlexander Pyhalov                 (*outbuf)++;
194*16d86563SAlexander Pyhalov                 (*outbytesleft)--;
195*16d86563SAlexander Pyhalov             }
196*16d86563SAlexander Pyhalov             break;
197*16d86563SAlexander Pyhalov         case ASCII:
198*16d86563SAlexander Pyhalov             if ( isspace(c) ) st->context = SPACE;
199*16d86563SAlexander Pyhalov             **outbuf = c;
200*16d86563SAlexander Pyhalov             (*outbuf)++;
201*16d86563SAlexander Pyhalov             (*outbytesleft)--;
202*16d86563SAlexander Pyhalov             break;
203*16d86563SAlexander Pyhalov         case POSSIBLE_ISCII:
204*16d86563SAlexander Pyhalov             /* it is impossible to represent with 'xx' one ASCII word that starts with 'x' */
205*16d86563SAlexander Pyhalov             if ( !isspace(c) ) { st->context = ISCII; continue; } /* don't advance */
206*16d86563SAlexander Pyhalov 
207*16d86563SAlexander Pyhalov             **outbuf = LEADING_BYTE;  /* the previous 'x' */
208*16d86563SAlexander Pyhalov             (*outbuf)++;
209*16d86563SAlexander Pyhalov             (*outbytesleft)--;
210*16d86563SAlexander Pyhalov             st->context = ASCII;
211*16d86563SAlexander Pyhalov 
212*16d86563SAlexander Pyhalov             if (*outbytesleft < 1) {
213*16d86563SAlexander Pyhalov                 errno = E2BIG;
214*16d86563SAlexander Pyhalov                 return (size_t)-1;
215*16d86563SAlexander Pyhalov             }
216*16d86563SAlexander Pyhalov 
217*16d86563SAlexander Pyhalov             **outbuf = c;
218*16d86563SAlexander Pyhalov             (*outbuf)++;
219*16d86563SAlexander Pyhalov             (*outbytesleft)--;
220*16d86563SAlexander Pyhalov             st->context = SPACE;
221*16d86563SAlexander Pyhalov 
222*16d86563SAlexander Pyhalov             break;
223*16d86563SAlexander Pyhalov         case ISCII:
224*16d86563SAlexander Pyhalov             if ( isspace(c) ) {
225*16d86563SAlexander Pyhalov                 uchar iscii;
226*16d86563SAlexander Pyhalov                 int flag = 1;
227*16d86563SAlexander Pyhalov 
228*16d86563SAlexander Pyhalov                 /* flush keepc */
229*16d86563SAlexander Pyhalov                 iscii = get_iscii(st, 0, &flag);
230*16d86563SAlexander Pyhalov                 if (flag) {
231*16d86563SAlexander Pyhalov                     if ( iscii ) **outbuf = iscii;
232*16d86563SAlexander Pyhalov                     else **outbuf = REPLACE_CHAR;
233*16d86563SAlexander Pyhalov 
234*16d86563SAlexander Pyhalov                     (*outbuf)++;
235*16d86563SAlexander Pyhalov                     (*outbytesleft)--;
236*16d86563SAlexander Pyhalov                 }
237*16d86563SAlexander Pyhalov 
238*16d86563SAlexander Pyhalov                 if ( *outbytesleft < 1 ) {
239*16d86563SAlexander Pyhalov                     errno = E2BIG;
240*16d86563SAlexander Pyhalov                     return (size_t)-1;
241*16d86563SAlexander Pyhalov                 }
242*16d86563SAlexander Pyhalov 
243*16d86563SAlexander Pyhalov                 **outbuf = c;
244*16d86563SAlexander Pyhalov                 (*outbuf)++;
245*16d86563SAlexander Pyhalov                 (*outbytesleft)--;
246*16d86563SAlexander Pyhalov                 st->context = SPACE;
247*16d86563SAlexander Pyhalov             } else {
248*16d86563SAlexander Pyhalov                uchar iscii;
249*16d86563SAlexander Pyhalov                int   flag = 1;
250*16d86563SAlexander Pyhalov 
251*16d86563SAlexander Pyhalov                iscii = get_iscii(st, c, &flag);
252*16d86563SAlexander Pyhalov                if (flag) {
253*16d86563SAlexander Pyhalov                    if ( iscii ) **outbuf = iscii;
254*16d86563SAlexander Pyhalov                    else **outbuf = REPLACE_CHAR;
255*16d86563SAlexander Pyhalov 
256*16d86563SAlexander Pyhalov                    (*outbuf)++;
257*16d86563SAlexander Pyhalov                    (*outbytesleft)--;
258*16d86563SAlexander Pyhalov                }
259*16d86563SAlexander Pyhalov             }
260*16d86563SAlexander Pyhalov             break;
261*16d86563SAlexander Pyhalov         }
262*16d86563SAlexander Pyhalov 
263*16d86563SAlexander Pyhalov         (*inbuf)++;
264*16d86563SAlexander Pyhalov         (*inbytesleft)--;
265*16d86563SAlexander Pyhalov     }
266*16d86563SAlexander Pyhalov 
267*16d86563SAlexander Pyhalov     if ( *inbytesleft > 0 && *outbytesleft == 0 ) {
268*16d86563SAlexander Pyhalov          errno = E2BIG;
269*16d86563SAlexander Pyhalov          return ((size_t)-1);
270*16d86563SAlexander Pyhalov     }
271*16d86563SAlexander Pyhalov 
272*16d86563SAlexander Pyhalov     return ((size_t)(*inbytesleft));
273*16d86563SAlexander Pyhalov }
274