1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright(c) 2001 Sun Microsystems, Inc.
23 * All rights reserved.
24 */
25 #include <stdio.h>
26 #include <ctype.h>
27 #include <errno.h>
28 #include <strings.h>
29 #include <stdlib.h>
30 #include "ea-iscii.h"
31
32 #define MSB 0x80
33 #define REPLACE_CHAR '?'
34 #define EA_START 0x40
35
36 #define get_vowel(a) EAISCII_vowel_type[(a) - EA_START]
37 #define get_nukta_value(a) EAISCII_nukta_type[(a) - EA_START]
38 #define is_first_vowel(a) ((a) == FIRST_VOWEL)
39 #define is_nukta(a) ((a) == NUKTA_VALUE)
40
41 typedef enum { SPACE, ASCII, POSSIBLE_ISCII, ISCII } CONTEXT;
42 typedef struct _icv_state {
43 uchar keepc; /* if is_vowel is true, store the char following the FIRST_VOWEL */
44 CONTEXT context;
45 int is_vowel;
46 } _iconv_st;
47
48 static uchar
traverse_table(Entry * entry,int num,uchar ea_iscii)49 traverse_table(Entry *entry , int num, uchar ea_iscii)
50 {
51 int i=0;
52 uchar iscii=0;
53
54 for ( ; i < num; ++i) {
55 Entry en = entry[i];
56
57 if ( ea_iscii < en.ea_iscii ) break;
58 if ( ea_iscii >= en.ea_iscii && ea_iscii < en.ea_iscii + en.count ) {
59 iscii = (ea_iscii - en.ea_iscii) + en.iscii;
60 break;
61 }
62 }
63
64 return iscii;
65 }
66
67 /*
68 * run in ISCII context.
69 * ea_iscii being 0: flush the keepc
70 * flag return 0: don't decide iscii yet, need to advance the next char in outbuf
71 */
72 static uchar
get_iscii(_iconv_st * st,uchar ea_iscii,int * flag)73 get_iscii(_iconv_st *st, uchar ea_iscii, int *flag)
74 {
75 uchar iscii = 0;
76
77 if ( st->keepc == 0 ) {
78 if ( ea_iscii == 0 ) { *flag = 0; return 0; }
79 if ( ea_iscii < EA_START ) return 0; /* invalid iscii */
80
81 if ( get_nukta_value(ea_iscii) || is_first_vowel(ea_iscii) ) {
82 /* do nothing except store ea_iscii into st->keepc */
83 *flag = 0;
84 st->keepc = ea_iscii;
85 } else {
86 iscii = traverse_table( eaiscii_isc_tbl,
87 sizeof(eaiscii_isc_tbl)/sizeof(Entry), ea_iscii);
88 }
89 } else {
90 uchar vowel, nukta_value;
91
92 if ( st->is_vowel ) {
93 /* need decide whether it is 0xAE or 0xB2 case */
94 if ( ea_iscii >= EA_START && is_nukta(ea_iscii) ) {
95 if ( st->keepc == 0x73 ) iscii = 0xAE;
96 if ( st->keepc == 0x76 ) iscii = 0xB2;
97 st->keepc = 0;
98 } else {
99 iscii = get_vowel(st->keepc);
100 st->keepc = ea_iscii;
101 }
102 st->is_vowel = 0;
103 goto end;
104 }
105
106 if ( is_first_vowel(st->keepc) ) {
107 if ( (ea_iscii >= EA_START) && (vowel = get_vowel(ea_iscii)) ) {
108 if ( ea_iscii == 0x73 || ea_iscii == 0x76 ) {
109 st->keepc = ea_iscii;
110 *flag = 0;
111 st->is_vowel = 1;
112 } else {
113 st->keepc = 0;
114 iscii = vowel;
115 }
116 } else {
117 iscii = traverse_table( eaiscii_isc_tbl,
118 sizeof(eaiscii_isc_tbl)/sizeof(Entry), st->keepc);
119 st->keepc = ea_iscii;
120 }
121 } else if ( (st->keepc >= EA_START) && (nukta_value = get_nukta_value(st->keepc))) {
122 if ( ea_iscii >= EA_START && is_nukta(ea_iscii) ) {
123 st->keepc = 0;
124 iscii = nukta_value;
125 } else {
126 iscii = traverse_table( eaiscii_isc_tbl,
127 sizeof(eaiscii_isc_tbl)/sizeof(Entry), st->keepc);
128 st->keepc = ea_iscii;
129 }
130 } else {
131 iscii = traverse_table( eaiscii_isc_tbl,
132 sizeof(eaiscii_isc_tbl)/sizeof(Entry), st->keepc);
133 st->keepc = ea_iscii;
134 }
135 }
136
137 end:
138 return iscii;
139 }
140
141 void *
_icv_open()142 _icv_open()
143 {
144 _iconv_st *st;
145
146 if ((st = (_iconv_st*)malloc(sizeof(_iconv_st))) == NULL) {
147 errno = ENOMEM;
148 return ((void*)-1);
149 }
150
151 bzero(st, sizeof(_iconv_st));
152
153 return ((void*)st);
154 }
155
156 /*
157 * Close; called from iconv_close()
158 */
159 void
_icv_close(_iconv_st * st)160 _icv_close(_iconv_st *st)
161 {
162 if (!st)
163 errno = EBADF;
164 else
165 free(st);
166 }
167
168 size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)169 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
170 char **outbuf, size_t *outbytesleft)
171 {
172 if (st == NULL) {
173 errno = EBADF;
174 return ((size_t) -1);
175 }
176
177 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
178 return ((size_t)0);
179 }
180
181 /* a state machine for interpreting ISCII code */
182 while (*inbytesleft > 0 && *outbytesleft > 0) {
183 uchar c = (uchar)**inbuf;
184
185 if ( c & MSB ) { errno = EILSEQ; return (size_t)-1; }
186
187 switch (st->context) {
188 case SPACE:
189 if ( c == LEADING_BYTE ) st->context = POSSIBLE_ISCII;
190 else {
191 if ( !isspace(c) ) st->context = ASCII;
192 **outbuf = c;
193 (*outbuf)++;
194 (*outbytesleft)--;
195 }
196 break;
197 case ASCII:
198 if ( isspace(c) ) st->context = SPACE;
199 **outbuf = c;
200 (*outbuf)++;
201 (*outbytesleft)--;
202 break;
203 case POSSIBLE_ISCII:
204 /* it is impossible to represent with 'xx' one ASCII word that starts with 'x' */
205 if ( !isspace(c) ) { st->context = ISCII; continue; } /* don't advance */
206
207 **outbuf = LEADING_BYTE; /* the previous 'x' */
208 (*outbuf)++;
209 (*outbytesleft)--;
210 st->context = ASCII;
211
212 if (*outbytesleft < 1) {
213 errno = E2BIG;
214 return (size_t)-1;
215 }
216
217 **outbuf = c;
218 (*outbuf)++;
219 (*outbytesleft)--;
220 st->context = SPACE;
221
222 break;
223 case ISCII:
224 if ( isspace(c) ) {
225 uchar iscii;
226 int flag = 1;
227
228 /* flush keepc */
229 iscii = get_iscii(st, 0, &flag);
230 if (flag) {
231 if ( iscii ) **outbuf = iscii;
232 else **outbuf = REPLACE_CHAR;
233
234 (*outbuf)++;
235 (*outbytesleft)--;
236 }
237
238 if ( *outbytesleft < 1 ) {
239 errno = E2BIG;
240 return (size_t)-1;
241 }
242
243 **outbuf = c;
244 (*outbuf)++;
245 (*outbytesleft)--;
246 st->context = SPACE;
247 } else {
248 uchar iscii;
249 int flag = 1;
250
251 iscii = get_iscii(st, c, &flag);
252 if (flag) {
253 if ( iscii ) **outbuf = iscii;
254 else **outbuf = REPLACE_CHAR;
255
256 (*outbuf)++;
257 (*outbytesleft)--;
258 }
259 }
260 break;
261 }
262
263 (*inbuf)++;
264 (*inbytesleft)--;
265 }
266
267 if ( *inbytesleft > 0 && *outbytesleft == 0 ) {
268 errno = E2BIG;
269 return ((size_t)-1);
270 }
271
272 return ((size_t)(*inbytesleft));
273 }
274