1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 1994-2003 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 #include <euc.h>
31 #include "japanese.h"
32
33
34 /*
35 * struct _cv_state; to keep status
36 */
37 struct _icv_state {
38 int _st_cset;
39 int _st_cset_sav;
40 };
41
42 void *
_icv_open()43 _icv_open()
44 {
45 struct _icv_state *st;
46
47 if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state)))
48 == NULL)
49 return ((void *)ERR_RETURN);
50
51 st->_st_cset = st->_st_cset_sav = CS_0;
52
53 return (st);
54 }
55
56 void
_icv_close(struct _icv_state * st)57 _icv_close(struct _icv_state *st)
58 {
59 free(st);
60 }
61
62 size_t
_icv_iconv(struct _icv_state * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)63 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft,
64 char **outbuf, size_t *outbytesleft)
65 {
66 int cset;
67 int stat = ST_INIT;
68 unsigned char *op;
69 char *ip, ic;
70 size_t ileft, oleft;
71 size_t retval;
72 #ifdef RFC1468_MODE
73 unsigned short zenkaku;
74 #endif
75
76 /*
77 * If inbuf and/or *inbuf are NULL, reset conversion descriptor
78 * and put escape sequence if needed.
79 */
80 if ((inbuf == NULL) || (*inbuf == NULL)) {
81 st->_st_cset_sav = st->_st_cset = CS_0;
82 return ((size_t)0);
83 }
84
85 cset = st->_st_cset;
86
87 ip = *inbuf;
88 op = (unsigned char *)*outbuf;
89 ileft = *inbytesleft;
90 oleft = *outbytesleft;
91
92 /*
93 * Main loop; basically 1 loop per 1 input byte
94 */
95
96 while ((int)ileft > 0) {
97 GET(ic);
98 if (stat == ST_INIT) {
99 goto text;
100 }
101
102 if (stat == ST_ESC) {
103 if (ic == MBTOG0_1) {
104 if ((int)ileft > 0) {
105 stat = ST_MBTOG0_1;
106 continue;
107 } else {
108 UNGET();
109 UNGET();
110 errno = EINVAL;
111 retval = (size_t)ERR_RETURN;
112 goto ret;
113 }
114 } else if (ic == SBTOG0_1) {
115 if ((int)ileft > 0) {
116 stat = ST_SBTOG0;
117 continue;
118 } else {
119 UNGET();
120 UNGET();
121 errno = EINVAL;
122 retval = (size_t)ERR_RETURN;
123 goto ret;
124 }
125 } else if (ic == X208REV_1) {
126 if ((int)ileft > 0) {
127 stat = ST_208REV_1;
128 continue;
129 } else {
130 UNGET();
131 UNGET();
132 errno = EINVAL;
133 retval = (size_t)ERR_RETURN;
134 goto ret;
135 }
136 } else {
137 UNGET();
138 UNGET();
139 errno = EILSEQ;
140 retval = (size_t)ERR_RETURN;
141 goto ret;
142 }
143 } else if (stat == ST_MBTOG0_1) {
144 if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) {
145 stat = ST_INIT;
146 st->_st_cset_sav = cset = CS_1;
147 continue;
148 } else if (ic == MBTOG0_2) {
149 if ((int)ileft > 0) {
150 stat = ST_MBTOG0_2;
151 continue;
152 } else {
153 UNGET();
154 UNGET();
155 UNGET();
156 errno = EINVAL;
157 retval = (size_t)ERR_RETURN;
158 goto ret;
159 }
160 } else if (ic == F_X0212_90) {
161 stat = ST_INIT;
162 st->_st_cset_sav = cset = CS_3;
163 continue;
164 } else {
165 UNGET();
166 UNGET();
167 UNGET();
168 errno = EILSEQ;
169 retval = (size_t)ERR_RETURN;
170 goto ret;
171 }
172 } else if (stat == ST_MBTOG0_2) {
173 if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) {
174 stat = ST_INIT;
175 st->_st_cset_sav = cset = CS_1;
176 continue;
177 } else if (ic == F_X0212_90) {
178 stat = ST_INIT;
179 st->_st_cset_sav = cset = CS_3;
180 continue;
181 } else {
182 UNGET();
183 UNGET();
184 UNGET();
185 UNGET();
186 errno = EILSEQ;
187 retval = (size_t)ERR_RETURN;
188 goto ret;
189 }
190 } else if (stat == ST_SBTOG0) {
191 if ((ic == F_ASCII) ||
192 (ic == F_X0201_RM) ||
193 (ic == F_ISO646)) {
194 stat = ST_INIT;
195 st->_st_cset_sav = cset = CS_0;
196 continue;
197 } else if (ic == F_X0201_KN) {
198 stat = ST_INIT;
199 st->_st_cset_sav = cset = CS_2;
200 continue;
201 } else {
202 UNGET();
203 UNGET();
204 UNGET();
205 errno = EILSEQ;
206 retval = (size_t)ERR_RETURN;
207 goto ret;
208 }
209 } else if (stat == ST_208REV_1) {
210 if (ic == X208REV_2) {
211 if ((int)ileft > 0) {
212 stat = ST_208REV_2;
213 continue;
214 } else {
215 UNGET();
216 UNGET();
217 UNGET();
218 errno = EINVAL;
219 retval = (size_t)ERR_RETURN;
220 goto ret;
221 }
222 } else {
223 UNGET();
224 UNGET();
225 UNGET();
226 errno = EILSEQ;
227 retval = (size_t)ERR_RETURN;
228 goto ret;
229 }
230 } else if (stat == ST_208REV_2) {
231 if (ic == ESC) {
232 if ((int)ileft > 0) {
233 stat = ST_REV_AFT_ESC;
234 continue;
235 } else {
236 UNGET();
237 UNGET();
238 UNGET();
239 UNGET();
240 errno = EINVAL;
241 retval = (size_t)ERR_RETURN;
242 goto ret;
243 }
244 } else {
245 UNGET();
246 UNGET();
247 UNGET();
248 UNGET();
249 errno = EILSEQ;
250 retval = (size_t)ERR_RETURN;
251 goto ret;
252 }
253 } else if (stat == ST_REV_AFT_ESC) {
254 if (ic == MBTOG0_1) {
255 if ((int)ileft > 0) {
256 stat = ST_REV_AFT_MBTOG0_1;
257 continue;
258 } else {
259 UNGET();
260 UNGET();
261 UNGET();
262 UNGET();
263 UNGET();
264 errno = EINVAL;
265 retval = (size_t)ERR_RETURN;
266 goto ret;
267 }
268 } else {
269 UNGET();
270 UNGET();
271 UNGET();
272 UNGET();
273 UNGET();
274 errno = EILSEQ;
275 retval = (size_t)ERR_RETURN;
276 goto ret;
277 }
278 } else if (stat == ST_REV_AFT_MBTOG0_1) {
279 if (ic == F_X0208_83_90) {
280 stat = ST_INIT;
281 st->_st_cset_sav = cset = CS_1;
282 continue;
283 } else if (ic == MBTOG0_2) {
284 if ((int)ileft > 0) {
285 stat = ST_REV_AFT_MBTOG0_2;
286 continue;
287 } else {
288 UNGET();
289 UNGET();
290 UNGET();
291 UNGET();
292 UNGET();
293 UNGET();
294 errno = EINVAL;
295 retval = (size_t)ERR_RETURN;
296 goto ret;
297 }
298 } else {
299 UNGET();
300 UNGET();
301 UNGET();
302 UNGET();
303 UNGET();
304 UNGET();
305 errno = EILSEQ;
306 retval = (size_t)ERR_RETURN;
307 goto ret;
308 }
309 } else if (stat == ST_REV_AFT_MBTOG0_2) {
310 if (ic == F_X0208_83_90) {
311 stat = ST_INIT;
312 st->_st_cset_sav = cset = CS_1;
313 continue;
314 } else {
315 UNGET();
316 UNGET();
317 UNGET();
318 UNGET();
319 UNGET();
320 UNGET();
321 UNGET();
322 errno = EILSEQ;
323 retval = (size_t)ERR_RETURN;
324 goto ret;
325 }
326 }
327 text:
328 /*
329 * Break through chars or ESC sequence
330 */
331 if (ic == ESC) {
332 if ((int)ileft > 0) {
333 stat = ST_ESC;
334 continue;
335 } else {
336 UNGET();
337 errno = EINVAL;
338 retval = (size_t)ERR_RETURN;
339 goto ret;
340 }
341 /*
342 * XXX- Because V3 mailtool uses SI/SO to switch
343 * G0 and G1 sets while it puts "iso2022-7"
344 * as its "X-Sun-Charset" tag. Though it
345 * breaks ISO-2022-JP definition based on
346 * UI-OSF, dtmail have handle them correctly.
347 * Therefore, we have to following a few codes, UGH.
348 */
349 } else if (ic == SO) {
350 cset = CS_2;
351 stat = ST_INIT;
352 continue;
353 } else if (ic == SI) {
354 cset = st->_st_cset_sav;
355 stat = ST_INIT;
356 continue;
357 } else if (!(ic & CMSB)) {
358 if (cset == CS_0) {
359 CHECK2BIG(EUCW0, 1);
360 PUT(ic);
361 continue;
362 } else if (cset == CS_1) {
363 if ((int)ileft > 0) {
364 CHECK2BIG(EUCW1, 1);
365 if ((ic < 0x21) || (ic == 0x7f)) {
366 UNGET();
367 errno = EILSEQ;
368 retval = (size_t)ERR_RETURN;
369 goto ret;
370 } else if ((*ip < 0x21) || (*ip ==
371 0x7f)) {
372 UNGET();
373 errno = EILSEQ;
374 retval = (size_t)ERR_RETURN;
375 goto ret;
376 }
377 #ifdef RFC1468_MODE /* Convert VDC and UDC to GETA */
378 if ((ic == 0x2d) || (0x75 <= ic )){
379 PUT((EGETA >> 8) & 0xff);
380 GET(ic); /* Get dummy */
381 PUT(EGETA & 0xff);
382 continue;
383 }
384 #endif /* RFC1468_MODE */
385 PUT(ic | CMSB);
386 GET(ic);
387 PUT(ic | CMSB);
388 stat = ST_INIT;
389 continue;
390 } else {
391 UNGET();
392 errno = EINVAL;
393 retval = (size_t)ERR_RETURN;
394 goto ret;
395 }
396 } else if (cset == CS_2) {
397 if (!ISSJKANA((ic | CMSB))) {
398 UNGET();
399 errno = EILSEQ;
400 retval = (size_t)ERR_RETURN;
401 goto ret;
402 }
403 #ifdef RFC1468_MODE /* Convert JIS X 0201 Kana to JIS X 0208 Kana */
404 CHECK2BIG(EUCW1, 1);
405 zenkaku = halfkana2zenkakue[(ic - 0x21)];
406 ic = (unsigned char)((zenkaku >> 8) & 0xFF);
407 PUT(ic);
408 ic = (unsigned char)(zenkaku & 0xFF);
409 PUT(ic);
410 #else /* ISO-2022-JP.UIOSF */
411 CHECK2BIG(EUCW2 + SEQ_SS, 1);
412 PUT(SS2);
413 PUT(ic | CMSB);
414 #endif /* RFC1468_MODE */
415 continue;
416 } else if (cset == CS_3) {
417 if ((int)ileft > 0) {
418 if ((ic < 0x21) || (ic == 0x7f)) {
419 UNGET();
420 errno = EILSEQ;
421 retval = (size_t)ERR_RETURN;
422 goto ret;
423 } else if ((*ip < 0x21) || (*ip ==
424 0x7f)) {
425 UNGET();
426 errno = EILSEQ;
427 retval = (size_t)ERR_RETURN;
428 goto ret;
429 }
430 #ifdef RFC1468_MODE /* Convert JIS X 0212 to GETA */
431 CHECK2BIG(EUCW1, 1);
432 PUT((EGETA >> 8) | CMSB);
433 GET(ic); /* Get dummy */
434 PUT((EGETA & CMASK) | CMSB);
435 #else /* ISO-2022-JP.UIOSF */
436 CHECK2BIG(EUCW3 + SEQ_SS, 1);
437 PUT(SS3);
438 PUT(ic | CMSB);
439 GET(ic);
440 PUT(ic | CMSB);
441 #endif /* RFC1468_MODE */
442 stat = ST_INIT;
443 continue;
444 } else {
445 UNGET();
446 errno = EINVAL;
447 retval = (size_t)ERR_RETURN;
448 goto ret;
449 }
450 }
451 } else {
452 UNGET();
453 errno = EILSEQ;
454 retval = (size_t)ERR_RETURN;
455 goto ret;
456 }
457 }
458 retval = ileft;
459 ret:
460 *inbuf = ip;
461 *inbytesleft = ileft;
462 *outbuf = (char *)op;
463 *outbytesleft = oleft;
464 st->_st_cset = cset;
465
466 return (retval);
467 }
468