1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 1994-2003 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <euc.h>
30 #include "japanese.h"
31
32 /*
33 * struct _cv_state; to keep status
34 */
35 struct _icv_state {
36 int _st_cset;
37 int _st_cset_sav;
38 };
39
40 static unsigned short lookuptbl(unsigned short);
41
42 void *
_icv_open()43 _icv_open()
44 {
45 struct _icv_state *st;
46
47 if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state)))
48 == NULL)
49 return ((void *)ERR_RETURN);
50
51 st->_st_cset = st->_st_cset_sav = CS_0;
52
53 return (st);
54 }
55
56 void
_icv_close(struct _icv_state * st)57 _icv_close(struct _icv_state *st)
58 {
59 free(st);
60 }
61
62 size_t
_icv_iconv(struct _icv_state * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)63 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft,
64 char **outbuf, size_t *outbytesleft)
65 {
66 int cset;
67 int stat = ST_INIT;
68 unsigned char *op, ic;
69 char *ip;
70 size_t ileft, oleft;
71 size_t retval;
72 #ifdef RFC1468_MODE
73 unsigned short zenkaku;
74 #endif
75
76 /*
77 * If inbuf or *inbuf is NULL, reset conversion descriptor
78 * and put escape sequence if needed.
79 */
80 if ((inbuf == NULL) || (*inbuf == NULL)) {
81 st->_st_cset_sav = st->_st_cset = CS_0;
82 return ((size_t)0);
83 }
84
85 cset = st->_st_cset;
86
87 ip = *inbuf;
88 op = (unsigned char *)*outbuf;
89 ileft = *inbytesleft;
90 oleft = *outbytesleft;
91
92 /*
93 * Main loop; basically 1 loop per 1 input byte
94 */
95
96 while ((int)ileft > 0) {
97 GET(ic);
98 if (stat == ST_INIT) {
99 goto text;
100 }
101 /*
102 * Half way of Kanji or ESC sequence
103 */
104 if (stat == ST_ESC) {
105 if (ic == MBTOG0_1) {
106 if ((int)ileft > 0) {
107 stat = ST_MBTOG0_1;
108 continue;
109 } else {
110 UNGET();
111 UNGET();
112 errno = EINVAL;
113 retval = (size_t)ERR_RETURN;
114 goto ret;
115 }
116 } else if (ic == SBTOG0_1) {
117 if ((int)ileft > 0) {
118 stat = ST_SBTOG0;
119 continue;
120 } else {
121 UNGET();
122 UNGET();
123 errno = EINVAL;
124 retval = (size_t)ERR_RETURN;
125 goto ret;
126 }
127 } else if (ic == X208REV_1) {
128 if ((int)ileft > 0) {
129 stat = ST_208REV_1;
130 continue;
131 } else {
132 UNGET();
133 UNGET();
134 errno = EINVAL;
135 retval = (size_t)ERR_RETURN;
136 goto ret;
137 }
138 } else {
139 UNGET();
140 UNGET();
141 errno = EILSEQ;
142 retval = (size_t)ERR_RETURN;
143 goto ret;
144 }
145 } else if (stat == ST_MBTOG0_1) {
146 if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) {
147 stat = ST_INIT;
148 st->_st_cset_sav = cset = CS_1;
149 continue;
150 } else if (ic == MBTOG0_2) {
151 if ((int)ileft > 0) {
152 stat = ST_MBTOG0_2;
153 continue;
154 } else {
155 UNGET();
156 UNGET();
157 UNGET();
158 errno = EINVAL;
159 retval = (size_t)ERR_RETURN;
160 goto ret;
161 }
162 } else if (ic == F_X0212_90) {
163 stat = ST_INIT;
164 st->_st_cset_sav = cset = CS_3;
165 continue;
166 } else {
167 UNGET();
168 UNGET();
169 UNGET();
170 errno = EILSEQ;
171 retval = (size_t)ERR_RETURN;
172 goto ret;
173 }
174 } else if (stat == ST_MBTOG0_2) {
175 if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) {
176 stat = ST_INIT;
177 st->_st_cset_sav = cset = CS_1;
178 continue;
179 } else if (ic == F_X0212_90) {
180 stat = ST_INIT;
181 st->_st_cset_sav = cset = CS_3;
182 continue;
183 } else {
184 UNGET();
185 UNGET();
186 UNGET();
187 UNGET();
188 errno = EILSEQ;
189 retval = (size_t)ERR_RETURN;
190 goto ret;
191 }
192 } else if (stat == ST_SBTOG0) {
193 if ((ic == F_ASCII) ||
194 (ic == F_X0201_RM) ||
195 (ic == F_ISO646)) {
196 stat = ST_INIT;
197 st->_st_cset_sav = cset = CS_0;
198 continue;
199 } if (ic == F_X0201_KN) {
200 st->_st_cset_sav = cset = CS_2;
201 stat = ST_INIT;
202 continue;
203 } else {
204 UNGET();
205 UNGET();
206 UNGET();
207 errno = EILSEQ;
208 retval = (size_t)ERR_RETURN;
209 goto ret;
210 }
211 } else if (stat == ST_208REV_1) {
212 if (ic == X208REV_2) {
213 if ((int)ileft > 0) {
214 stat = ST_208REV_2;
215 continue;
216 } else {
217 UNGET();
218 UNGET();
219 UNGET();
220 errno = EINVAL;
221 retval = (size_t)ERR_RETURN;
222 goto ret;
223 }
224 } else {
225 UNGET();
226 UNGET();
227 UNGET();
228 errno = EILSEQ;
229 retval = (size_t)ERR_RETURN;
230 goto ret;
231 }
232 } else if (stat == ST_208REV_2) {
233 if (ic == ESC) {
234 if ((int)ileft > 0) {
235 stat = ST_REV_AFT_ESC;
236 continue;
237 } else {
238 UNGET();
239 UNGET();
240 UNGET();
241 UNGET();
242 errno = EINVAL;
243 retval = (size_t)ERR_RETURN;
244 goto ret;
245 }
246 } else {
247 UNGET();
248 UNGET();
249 UNGET();
250 UNGET();
251 errno = EILSEQ;
252 retval = (size_t)ERR_RETURN;
253 goto ret;
254 }
255 } else if (stat == ST_REV_AFT_ESC) {
256 if (ic == MBTOG0_1) {
257 if ((int)ileft > 0) {
258 stat = ST_REV_AFT_MBTOG0_1;
259 continue;
260 } else {
261 UNGET();
262 UNGET();
263 UNGET();
264 UNGET();
265 UNGET();
266 errno = EINVAL;
267 retval = (size_t)ERR_RETURN;
268 goto ret;
269 }
270 } else {
271 UNGET();
272 UNGET();
273 UNGET();
274 UNGET();
275 UNGET();
276 errno = EILSEQ;
277 retval = (size_t)ERR_RETURN;
278 goto ret;
279 }
280 } else if (stat == ST_REV_AFT_MBTOG0_1) {
281 if (ic == F_X0208_83_90) {
282 stat = ST_INIT;
283 st->_st_cset_sav = cset = CS_1;
284 continue;
285 } else if (ic == MBTOG0_2) {
286 if ((int)ileft > 0) {
287 stat = ST_REV_AFT_MBTOG0_2;
288 continue;
289 } else {
290 UNGET();
291 UNGET();
292 UNGET();
293 UNGET();
294 UNGET();
295 UNGET();
296 errno = EINVAL;
297 retval = (size_t)ERR_RETURN;
298 goto ret;
299 }
300 } else {
301 UNGET();
302 UNGET();
303 UNGET();
304 UNGET();
305 UNGET();
306 UNGET();
307 errno = EILSEQ;
308 retval = (size_t)ERR_RETURN;
309 goto ret;
310 }
311 } else if (stat == ST_REV_AFT_MBTOG0_2) {
312 if (ic == F_X0208_83_90) {
313 stat = ST_INIT;
314 st->_st_cset_sav = cset = CS_1;
315 continue;
316 } else {
317 UNGET();
318 UNGET();
319 UNGET();
320 UNGET();
321 UNGET();
322 UNGET();
323 UNGET();
324 errno = EILSEQ;
325 retval = (size_t)ERR_RETURN;
326 goto ret;
327 }
328 }
329 text:
330 /*
331 * Break through chars or ESC sequence
332 */
333 if (ic == ESC) {
334 if ((int)ileft > 0) {
335 stat = ST_ESC;
336 continue;
337 } else {
338 UNGET();
339 errno = EINVAL;
340 retval = (size_t)ERR_RETURN;
341 goto ret;
342 }
343 /*
344 * XXX- Because V3 mailtool uses SI/SO to switch
345 * G0 and G1 sets while it puts "iso2022-7"
346 * as its "X-Sun-Charset" tag. Though it
347 * breaks ISO-2022-JP definition based on
348 * UI-OSF, dtmail have handle them correctly.
349 * Therefore, we have to following a few codes, UGH.
350 */
351 } else if (ic == SO) {
352 cset = CS_2;
353 stat = ST_INIT;
354 continue;
355 } else if (ic == SI) {
356 cset = st->_st_cset_sav;
357 stat = ST_INIT;
358 continue;
359 }
360 if (!(ic & CMSB)) {
361 if (cset == CS_0) { /* ASCII or JIS roman */
362 CHECK2BIG(SJISW0, 1);
363 PUT(ic);
364 continue;
365 } else if (cset == CS_1) { /* CS_1 Kanji starts */
366 if ((int)ileft > 0) {
367 int even_ku;
368 CHECK2BIG(SJISW1, 1);
369 if ((ic < 0x21) || (ic == 0x7f)) {
370 UNGET();
371 errno = EILSEQ;
372 retval = (size_t)ERR_RETURN;
373 goto ret;
374 }
375 if ((*ip < 0x21) || (*ip == 0x7f)) {
376 UNGET();
377 errno = EILSEQ;
378 retval = (size_t)ERR_RETURN;
379 goto ret;
380 }
381 #ifdef RFC1468_MODE /* Convert VDC and UDC to GETA */
382 if ((ic == 0x2d) || (0x75 <= ic)) {
383 PUT(PGETA >> 8);
384 GET(ic); /* Get dummy */
385 PUT(PGETA & 0xff);
386 continue;
387 }
388 #endif /* RFC1468_MODE */
389 PUT(jis208tosj1[ic]);
390 if ((ic % 2) == 0)
391 even_ku = TRUE;
392 else
393 even_ku = FALSE;
394 GET(ic);
395 if (even_ku)
396 ic += 0x80;
397 PUT(jistosj2[ic]);
398 continue;
399 } else { /* input fragment of Kanji */
400 UNGET();
401 errno = EINVAL;
402 retval = (size_t)ERR_RETURN;
403 goto ret;
404 }
405 } else if (cset == CS_2) { /* Hankaku Katakana */
406 if (!ISSJKANA((ic | CMSB))) {
407 UNGET();
408 errno = EILSEQ;
409 retval = (size_t)ERR_RETURN;
410 goto ret;
411 }
412 #ifdef RFC1468_MODE /* Convert JIS X 0201 kana to PCK zenkaku Kana */
413 CHECK2BIG(SJISW1, 1);
414 zenkaku = halfkana2zenkakus[(ic - 0x21)];
415 ic = (unsigned char)((zenkaku >> 8) & 0xff);
416 PUT(ic);
417 ic = (unsigned char)(zenkaku & 0xff);
418 PUT(ic);
419 #else /* ISO-2022-JP.UIOSF */
420 CHECK2BIG(SJISW2, 1);
421 PUT(ic | CMSB);
422 #endif /* RFC1468_MODE */
423 continue;
424 } else if (cset == CS_3) { /* CS_3 Kanji starts */
425 unsigned short dest;
426 if ((int)ileft > 0) {
427 CHECK2BIG(SJISW1, 1);
428 if ((ic < 0x21) || (ic == 0x7f)) {
429 UNGET();
430 errno = EILSEQ;
431 retval = (size_t)ERR_RETURN;
432 goto ret;
433 }
434 if ((*ip < 0x21) || (*ip == 0x7f)) {
435 UNGET();
436 errno = EILSEQ;
437 retval = (size_t)ERR_RETURN;
438 goto ret;
439 }
440
441 #ifdef RFC1468_MODE /* Convert JIS X 0212 to GETA */
442 PUT(PGETA >> 8);
443 GET(ic); /* Get dummy */
444 PUT(PGETA & 0xff);
445 #else /* ISO-2022-JP.UIOSF */
446 if (ic < 0x75) { /* check IBM area */
447 dest = (ic << 8);
448 GET(ic);
449 dest += ic;
450 dest = lookuptbl(dest);
451 if (dest == 0xffff) {
452 /*
453 * Illegal code points
454 * in G3 plane.
455 */
456 UNGET();
457 UNGET();
458 errno = EILSEQ;
459 retval =
460 (size_t)ERR_RETURN;
461 goto ret;
462 } else {
463 PUT((dest >> 8) & 0xff);
464 PUT(dest & 0xff);
465 }
466 } else {
467 int even_ku;
468
469 if ((ic % 2) == 0)
470 even_ku = TRUE;
471 else
472 even_ku = FALSE;
473 PUT(jis212tosj1[ic]);
474 GET(ic);
475 if (even_ku)
476 ic += 0x80;
477 PUT(jistosj2[ic]);
478 }
479 #endif /* RFC1468_MODE */
480 continue;
481 } else { /* input fragment of Kanji */
482 UNGET();
483 errno = EINVAL;
484 retval = (size_t)ERR_RETURN;
485 goto ret;
486 }
487 }
488 } else {
489 UNGET();
490 errno = EILSEQ;
491 retval = (size_t)ERR_RETURN;
492 goto ret;
493 }
494 }
495 retval = ileft;
496 ret:
497 *inbuf = ip;
498 *inbytesleft = ileft;
499 *outbuf = (char *)op;
500 *outbytesleft = oleft;
501 st->_st_cset = cset;
502
503 return (retval);
504 }
505
506 /*
507 * lookuptbl()
508 * Return the index number if its index-ed number
509 * is the same as dest value.
510 */
511 static unsigned short
lookuptbl(unsigned short dest)512 lookuptbl(unsigned short dest)
513 {
514 unsigned short tmp;
515 int i;
516 int sz = (sizeof (sjtoibmext) / sizeof (sjtoibmext[0]));
517
518 for (i = 0; i < sz; i++) {
519 tmp = (sjtoibmext[i] & 0x7f7f);
520 if (tmp == dest)
521 return ((i + 0xfa40 + ((i / 0xc0) * 0x40)));
522 }
523 return (PGETA);
524 }
525