1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 1991-2003 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 #include <euc.h>
31 #include "japanese.h"
32
33 /*
34 * struct _cv_state; to keep status
35 */
36 struct _icv_state {
37 int _st_cset;
38 int _st_cset_sav;
39 };
40
41 static unsigned short lookuptbl(unsigned short);
42
43 void *
_icv_open()44 _icv_open()
45 {
46 struct _icv_state *st;
47
48 if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state)))
49 == NULL)
50 return ((void *)ERR_RETURN);
51
52 st->_st_cset = st->_st_cset_sav = CS_0;
53
54 return (st);
55 }
56
57 void
_icv_close(struct _icv_state * st)58 _icv_close(struct _icv_state *st)
59 {
60 free(st);
61 }
62
63 size_t
_icv_iconv(struct _icv_state * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)64 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft,
65 char **outbuf, size_t *outbytesleft)
66 {
67 int cset;
68 int stat = ST_INIT;
69 unsigned char *op, ic;
70 char *ip;
71 size_t ileft, oleft;
72 size_t retval;
73
74 /*
75 * If inbuf and/or *inbuf are NULL, reset conversion descriptor
76 * and put escape sequence if needed.
77 */
78 if ((inbuf == NULL) || (*inbuf == NULL)) {
79 st->_st_cset_sav = st->_st_cset = CS_0;
80 return ((size_t)0);
81 }
82
83 cset = st->_st_cset;
84
85 ip = *inbuf;
86 op = (unsigned char *)*outbuf;
87 ileft = *inbytesleft;
88 oleft = *outbytesleft;
89
90 /*
91 * Main loop; basically 1 loop per 1 input byte
92 */
93
94 while ((int)ileft > 0) {
95 GET(ic);
96 if (stat == ST_INIT) {
97 goto text;
98 }
99 if (stat == ST_ESC) {
100 if (ic == MBTOG0_1) {
101 if ((int)ileft > 0) {
102 stat = ST_MBTOG0_1;
103 continue;
104 } else {
105 UNGET();
106 UNGET();
107 errno = EINVAL;
108 retval = (size_t)ERR_RETURN;
109 goto ret;
110 }
111 } else if (ic == SBTOG0_1) {
112 if ((int)ileft > 0) {
113 stat = ST_SBTOG0;
114 continue;
115 } else {
116 UNGET();
117 UNGET();
118 errno = EINVAL;
119 retval = (size_t)ERR_RETURN;
120 goto ret;
121 }
122 } else if (ic == X208REV_1) {
123 if ((int)ileft > 0) {
124 stat = ST_208REV_1;
125 continue;
126 } else {
127 UNGET();
128 UNGET();
129 errno = EINVAL;
130 retval = (size_t)ERR_RETURN;
131 goto ret;
132 }
133 } else {
134 UNGET();
135 UNGET();
136 errno = EILSEQ;
137 retval = (size_t)ERR_RETURN;
138 goto ret;
139 }
140 } else if (stat == ST_MBTOG0_1) {
141 if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) {
142 stat = ST_INIT;
143 st->_st_cset_sav = cset = CS_1;
144 continue;
145 } else if (ic == MBTOG0_2) {
146 if ((int)ileft > 0) {
147 stat = ST_MBTOG0_2;
148 continue;
149 } else {
150 UNGET();
151 UNGET();
152 UNGET();
153 errno = EINVAL;
154 retval = (size_t)ERR_RETURN;
155 goto ret;
156 }
157 } else if (ic == F_X0212_90) {
158 stat = ST_INIT;
159 st->_st_cset_sav = cset = CS_3;
160 continue;
161 } else {
162 UNGET();
163 UNGET();
164 UNGET();
165 errno = EILSEQ;
166 retval = (size_t)ERR_RETURN;
167 goto ret;
168 }
169 } else if (stat == ST_MBTOG0_2) {
170 if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) {
171 stat = ST_INIT;
172 st->_st_cset_sav = cset = CS_1;
173 continue;
174 } else if (ic == F_X0212_90) {
175 stat = ST_INIT;
176 st->_st_cset_sav = cset = CS_3;
177 continue;
178 } else {
179 UNGET();
180 UNGET();
181 UNGET();
182 UNGET();
183 errno = EILSEQ;
184 retval = (size_t)ERR_RETURN;
185 goto ret;
186 }
187 } else if (stat == ST_SBTOG0) {
188 if ((ic == F_ASCII) ||
189 (ic == F_X0201_RM) ||
190 (ic == F_ISO646)) {
191 stat = ST_INIT;
192 st->_st_cset_sav = cset = CS_0;
193 continue;
194 } else if (ic == F_X0201_KN) {
195 cset = CS_2;
196 stat = ST_INIT;
197 continue;
198 } else {
199 UNGET();
200 UNGET();
201 UNGET();
202 errno = EILSEQ;
203 retval = (size_t)ERR_RETURN;
204 goto ret;
205 }
206 } else if (stat == ST_208REV_1) {
207 if (ic == X208REV_2) {
208 if ((int)ileft > 0) {
209 stat = ST_208REV_2;
210 continue;
211 } else {
212 UNGET();
213 UNGET();
214 UNGET();
215 errno = EINVAL;
216 retval = (size_t)ERR_RETURN;
217 goto ret;
218 }
219 } else {
220 UNGET();
221 UNGET();
222 UNGET();
223 errno = EILSEQ;
224 retval = (size_t)ERR_RETURN;
225 goto ret;
226 }
227 } else if (stat == ST_208REV_2) {
228 if (ic == ESC) {
229 if ((int)ileft > 0) {
230 stat = ST_REV_AFT_ESC;
231 continue;
232 } else {
233 UNGET();
234 UNGET();
235 UNGET();
236 UNGET();
237 errno = EINVAL;
238 retval = (size_t)ERR_RETURN;
239 goto ret;
240 }
241 } else {
242 UNGET();
243 UNGET();
244 UNGET();
245 UNGET();
246 errno = EILSEQ;
247 retval = (size_t)ERR_RETURN;
248 goto ret;
249 }
250 } else if (stat == ST_REV_AFT_ESC) {
251 if (ic == MBTOG0_1) {
252 if ((int)ileft > 0) {
253 stat = ST_REV_AFT_MBTOG0_1;
254 continue;
255 } else {
256 UNGET();
257 UNGET();
258 UNGET();
259 UNGET();
260 UNGET();
261 errno = EINVAL;
262 retval = (size_t)ERR_RETURN;
263 goto ret;
264 }
265 } else {
266 UNGET();
267 UNGET();
268 UNGET();
269 UNGET();
270 UNGET();
271 errno = EILSEQ;
272 retval = (size_t)ERR_RETURN;
273 goto ret;
274 }
275 } else if (stat == ST_REV_AFT_MBTOG0_1) {
276 if (ic == F_X0208_83_90) {
277 stat = ST_INIT;
278 st->_st_cset_sav = cset = CS_1;
279 continue;
280 } else if (ic == MBTOG0_2) {
281 if ((int)ileft > 0) {
282 stat = ST_REV_AFT_MBTOG0_2;
283 continue;
284 } else {
285 UNGET();
286 UNGET();
287 UNGET();
288 UNGET();
289 UNGET();
290 UNGET();
291 errno = EINVAL;
292 retval = (size_t)ERR_RETURN;
293 goto ret;
294 }
295 } else {
296 UNGET();
297 UNGET();
298 UNGET();
299 UNGET();
300 UNGET();
301 UNGET();
302 errno = EILSEQ;
303 retval = (size_t)ERR_RETURN;
304 goto ret;
305 }
306 } else if (stat == ST_REV_AFT_MBTOG0_2) {
307 if (ic == F_X0208_83_90) {
308 stat = ST_INIT;
309 st->_st_cset_sav = cset = CS_1;
310 continue;
311 } else {
312 UNGET();
313 UNGET();
314 UNGET();
315 UNGET();
316 UNGET();
317 UNGET();
318 UNGET();
319 errno = EILSEQ;
320 retval = (size_t)ERR_RETURN;
321 goto ret;
322 }
323 }
324 text:
325 /*
326 * Break through chars or ESC sequence
327 */
328 if (ic == ESC) {
329 if ((int)ileft > 0) {
330 stat = ST_ESC;
331 continue;
332 } else {
333 UNGET();
334 errno = EINVAL;
335 retval = (size_t)ERR_RETURN;
336 goto ret;
337 }
338 } else if (ic == SO) {
339 cset = CS_2;
340 stat = ST_INIT;
341 continue;
342 } else if (ic == SI) {
343 cset = st->_st_cset_sav;
344 stat = ST_INIT;
345 continue;
346 }
347 if (!(ic & CMSB)) {
348 if (cset == CS_0) {
349 /* ASCII or JIS roman : may be 8bit chars */
350 if (oleft < SJISW0) {
351 UNGET();
352 errno = E2BIG;
353 retval = (size_t)ERR_RETURN;
354 goto ret;
355 }
356 PUT(ic);
357 continue;
358 } else if (cset == CS_1) { /* CS_1 Kanji starts */
359 if ((int)ileft > 0) {
360 int even_ku;
361 if (oleft < SJISW1) {
362 UNGET();
363 errno = E2BIG;
364 retval = (size_t)ERR_RETURN;
365 goto ret;
366 }
367 if ((ic < 0x21) || (ic == 0x7f)) {
368 UNGET();
369 errno = EILSEQ;
370 retval = (size_t)ERR_RETURN;
371 goto ret;
372 }
373 if ((*ip < 0x21) || (*ip == 0x7f)) {
374 UNGET();
375 errno = EILSEQ;
376 retval = (size_t)ERR_RETURN;
377 goto ret;
378 }
379 PUT(jis208tosj1[ic]);
380 if ((ic % 2) == 0)
381 even_ku = TRUE;
382 else
383 even_ku = FALSE;
384 GET(ic);
385 if (even_ku)
386 ic += 0x80;
387 PUT(jistosj2[ic]);
388 continue;
389 } else { /* input fragment of Kanji */
390 UNGET();
391 errno = EINVAL;
392 retval = (size_t)ERR_RETURN;
393 goto ret;
394 }
395 } else if (cset == CS_2) { /* Hankaku Katakana */
396 if (oleft < SJISW2) {
397 UNGET();
398 errno = E2BIG;
399 retval = (size_t)ERR_RETURN;
400 goto ret;
401 }
402 PUT(ic | CMSB);
403 continue;
404 } else if (cset == CS_3) { /* CS_3 Kanji starts */
405 unsigned short dest;
406 if ((int)ileft > 0) {
407 if (oleft < SJISW1) {
408 UNGET();
409 errno = E2BIG;
410 retval = (size_t)ERR_RETURN;
411 goto ret;
412 }
413 if ((ic < 0x21) || (ic == 0x7f)) {
414 UNGET();
415 errno = EILSEQ;
416 retval = (size_t)ERR_RETURN;
417 goto ret;
418 }
419 if ((*ip < 0x21) || (*ip == 0x7f)) {
420 UNGET();
421 errno = EILSEQ;
422 retval = (size_t)ERR_RETURN;
423 goto ret;
424 }
425 if (ic < 0x75) { /* check IBM area */
426 dest = (ic << 8);
427 GET(ic);
428 dest += ic;
429 dest = lookuptbl(dest);
430 if (dest == 0xffff) {
431 /*
432 * Illegal code points
433 * in G3 plane.
434 */
435 UNGET();
436 UNGET();
437 errno = EILSEQ;
438 retval =
439 (size_t)ERR_RETURN;
440 goto ret;
441 } else {
442 PUT((dest >> 8) &
443 0xff);
444 PUT(dest & 0xff);
445 }
446 continue;
447 } else {
448 int even_ku;
449
450 if ((ic % 2) == 0)
451 even_ku = TRUE;
452 else
453 even_ku = FALSE;
454 PUT(jis212tosj1[ic]);
455 GET(ic);
456 if (even_ku)
457 ic += 0x80;
458 PUT(jistosj2[ic]);
459 continue;
460 }
461 } else { /* input fragment of Kanji */
462 UNGET();
463 errno = EINVAL;
464 retval = (size_t)ERR_RETURN;
465 goto ret;
466 }
467 }
468 } else {
469 if (oleft < UNKNOWNW) {
470 UNGET();
471 errno = E2BIG;
472 retval = (size_t)ERR_RETURN;
473 goto ret;
474 }
475 PUT(ic);
476 continue;
477 }
478 }
479 retval = ileft;
480 ret:
481 *inbuf = ip;
482 *inbytesleft = ileft;
483 *outbuf = (char *)op;
484 *outbytesleft = oleft;
485 st->_st_cset = cset;
486
487 return (retval);
488 }
489
490 /*
491 * lookuptbl()
492 * Return the index number if its index-ed number
493 * is the same as dest value.
494 */
495 static unsigned short
lookuptbl(unsigned short dest)496 lookuptbl(unsigned short dest)
497 {
498 unsigned short tmp;
499 int i;
500 int sz = (sizeof (sjtoibmext) / sizeof (sjtoibmext[0]));
501
502 for (i = 0; i < sz; i++) {
503 tmp = (sjtoibmext[i] & 0x7f7f);
504 if (tmp == dest)
505 return ((i + 0xfa40 + ((i / 0xc0) * 0x40)));
506 }
507 return (PGETA);
508 }
509