1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1995, by Sun Microsystems, Inc.
24  * All rights reserved.
25  */
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 #include <libintl.h>
31 
32 #define	MSB	0x80	/* most significant bit */
33 #define	MBYTE	0x8e	/* multi-byte (4 byte character) */
34 #define	PMASK	0xa0	/* plane number mask */
35 #define ONEBYTE	0xff	/* right most byte */
36 #define MSB_OFF	0x7f	/* mask off MSB */
37 
38 #define SI      0x0f    /* shift in */
39 #define SO      0x0e    /* shift out */
40 #define ESC     0x1b    /* escape */
41 
42 /* static const char plane_char[] = "0GH23456789:;<=>?"; */
43 static const char plane_char[] = "0GHIJKLMNOPQRSTUV";
44 
45 #define GET_PLANEC(i)   (plane_char[i])
46 
47 #define NON_ID_CHAR '_'	/* non-identified character */
48 
49 typedef struct _icv_state {
50 	char	keepc[4];	/* maximum # byte of CNS11643 code */
51 	short	cstate;		/* state machine id (CNS) */
52 	short	istate;		/* state machine id (ISO) */
53 	int	_errno;		/* internal errno */
54 } _iconv_st;
55 
56 enum _CSTATE	{ C0, C1, C2, C3, C4 };
57 enum _ISTATE    { IN, OUT };
58 
59 
60 static int get_plane_no_by_char(const char);
61 static int cns_to_iso(int, char[], char*, size_t);
62 
63 static int get_plane_no_by_str(const char *);
64 struct _cv_state {
65 	int	plane_no;
66 	int	get_a_mbchar;
67 	int	more_bytes;
68 	int	first_byte;
69 	int	plane_changed;
70 	char	planec;
71 	char	*p;
72 	char	keepc[4];
73 };
74 
75 /*
76  * Open; called from iconv_open()
77  */
78 void *
_icv_open()79 _icv_open()
80 {
81 	_iconv_st *st;
82 
83 	if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
84 		errno = ENOMEM;
85 		return ((void *) -1);
86 	}
87 
88 	st->cstate = C0;
89 	st->istate = IN;
90 	st->_errno = 0;
91 
92 #ifdef DEBUG
93     fprintf(stderr, "==========     iconv(): CNS11643 --> ISO 2022-7     ==========\n");
94 #endif
95 
96 	return ((void *) st);
97 }
98 
99 
100 /*
101  * Close; called from iconv_close()
102  */
103 void
_icv_close(_iconv_st * st)104 _icv_close(_iconv_st *st)
105 {
106 	if (!st)
107 		errno = EBADF;
108 	else
109 		free(st);
110 }
111 
112 
113 /*
114  * Actual conversion; called from iconv()
115  */
116 /*=======================================================
117  *
118  *   State Machine for interpreting CNS 11643 code
119  *
120  *=======================================================
121  *
122  *               (ESC,SO)   plane 2 - 16
123  *                1st C         2nd C       3rd C
124  *    +------> C0 -----> C1 -----------> C2 -----> C3
125  *    |  ascii |  plane 1 |                   4th C |
126  *    ^        |  2nd C   v                         v
127  *    |        |         C4 <------<--------<-------+
128  *    |        v          | (SI)
129  *    +----<---+-----<----v
130  *
131  *=======================================================*/
132 size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)133 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
134 				char **outbuf, size_t *outbytesleft)
135 {
136 	int plane_no = -1, n;
137 	/* pre_plane_no: need to be static when re-entry occurs on errno set */
138 	static int	pre_plane_no = -1;	/* previous plane number */
139 
140 	if (st == NULL) {
141 		errno = EBADF;
142 		return ((size_t) -1);
143 	}
144 
145 	if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
146 		if (st->cstate == C1) {
147 			if (outbytesleft && *outbytesleft >= 1
148 				&& outbuf && *outbuf) {
149 				**outbuf = SI;
150 				(*outbuf)++;
151 				(*outbytesleft)--;
152 			} else {
153 				errno = E2BIG;
154 				return((size_t) -1);
155 			}
156 		}
157 		st->cstate = C0;
158 		st->istate = IN;
159 		st->_errno = 0;
160 		return ((size_t) 0);
161 	}
162 
163 #ifdef DEBUG
164     fprintf(stderr, "=== (Re-entry)     iconv(): CNS11643 --> ISO 2022-7     ===\n");
165     fprintf(stderr, "st->cstate=%d\tst->istate=%d\tst->_errno=%d\tplane_no=%d\n",
166 	st->cstate, st->istate, st->_errno, plane_no);
167 #endif
168 	st->_errno = 0;         /* reset internal errno */
169 	errno = 0;		/* reset external errno */
170 
171 	/* a state machine for interpreting CNS 11643 code */
172 	while (*inbytesleft > 0 && *outbytesleft > 0) {
173 		switch (st->cstate) {
174 		case C0:		/* assuming ASCII in the beginning */
175 			if (**inbuf & MSB) {
176 				st->keepc[0] = (**inbuf);
177 				st->cstate = C1;
178 			} else {	/* real ASCII */
179 				if (st->istate == OUT) {
180 					st->cstate = C0;
181 					st->istate = IN;
182 					**outbuf = SI;
183 					(*outbuf)++;
184 					(*outbytesleft)--;
185 					if (*outbytesleft <= 0) {
186 						errno = E2BIG;
187 						return((size_t)-1);
188 					}
189 				}
190 				**outbuf = **inbuf;
191 				(*outbuf)++;
192 				(*outbytesleft)--;
193 			}
194 			break;
195 		case C1:		/* Chinese characters: 2nd byte */
196 			if ((st->keepc[0] & ONEBYTE) == MBYTE) { /* 4-byte (0x8e) */
197 				plane_no = get_plane_no_by_char(**inbuf);
198 				if (plane_no == -1) {	/* illegal plane */
199 					st->cstate = C0;
200 					st->istate = IN;
201 					st->_errno = errno = EILSEQ;
202 				} else {	/* 4-byte Chinese character */
203 					st->keepc[1] = (**inbuf);
204 					st->cstate = C2;
205 				}
206 			} else {	/* 2-byte Chinese character - plane #1 */
207 				if (**inbuf & MSB) {	/* plane #1 */
208 					st->cstate = C4;
209 					st->keepc[1] = (**inbuf);
210 					st->keepc[2] = st->keepc[3] = '\0';
211 					plane_no = 1;
212 					continue;       /* should not advance *inbuf */
213 				} else {	/* input char doesn't belong
214 						 * to the input code set
215 						 */
216 					st->cstate = C0;
217 					st->istate = IN;
218 					st->_errno = errno = EINVAL;
219 				}
220 			}
221 			break;
222 		case C2:	/* plane #2 - #16 (4 bytes): get 3nd byte */
223 			if (**inbuf & MSB) {	/* 3rd byte */
224 				st->keepc[2] = (**inbuf);
225 				st->cstate = C3;
226 			} else {
227 				st->_errno = errno = EINVAL;
228 				st->cstate = C0;
229 			}
230 			break;
231 		case C3:	/* plane #2 - #16 (4 bytes): get 4th byte */
232 			if (**inbuf & MSB) {	/* 4th byte */
233 				st->cstate = C4;
234 				st->keepc[3] = (**inbuf);
235 				continue;       /* should not advance *inbuf */
236 			} else {
237 				st->_errno = errno = EINVAL;
238 				st->cstate = C0;
239 			}
240 			break;
241 		case C4:	/* Convert code from CNS 11643 to ISO 2022-7 */
242 			if ((st->istate == IN) || (pre_plane_no != plane_no)) {
243 				/* change plane # in Chinese mode */
244 				if (st->istate == OUT) {
245 					**outbuf = SI;
246 					(*outbuf)++;
247 					(*outbytesleft)--;
248 #ifdef DEBUG
249 fprintf(stderr, "(plane #=%d\tpre_plane #=%d)\t", plane_no, pre_plane_no);
250 #endif
251 				}
252 				if (*outbytesleft < 4) {
253 					st->_errno = errno = E2BIG;
254 					return((size_t)-1);
255 				}
256 				pre_plane_no = plane_no;
257 				st->istate = OUT;	/* shift out */
258 				**outbuf = ESC;
259 				*(*outbuf+1) = '$';
260 				*(*outbuf+2) = ')';
261 				*(*outbuf+3) = GET_PLANEC(plane_no);
262 #ifdef DEBUG
263 fprintf(stderr, "ESC $ ) %c\n", *(*outbuf+3));
264 #endif
265 				(*outbuf) += 4;
266 				(*outbytesleft) -= 4;
267 				if (*outbytesleft <= 0) {
268 					st->_errno = errno = E2BIG;
269 					return((size_t)-1);
270 				}
271 				**outbuf = SO;
272 				(*outbuf)++;
273 				(*outbytesleft)--;
274 			}
275 			n = cns_to_iso(plane_no, st->keepc, *outbuf, *outbytesleft);
276 			if (n > 0) {
277 				(*outbuf) += n;
278 				(*outbytesleft) -= n;
279 			} else {
280 				st->_errno = errno;
281 				return((size_t)-1);
282 			}
283 			st->cstate = C0;
284 			break;
285 		default:			/* should never come here */
286 			st->_errno = errno = EILSEQ;
287 			st->cstate = C0;	/* reset state */
288 			break;
289 		}
290 
291 		(*inbuf)++;
292 		(*inbytesleft)--;
293 
294 		if (st->_errno) {
295 #ifdef DEBUG
296     fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\n",
297 		st->_errno, st->cstate);
298 #endif
299 			break;
300 		}
301 		if (errno)
302 			return((size_t)-1);
303 	}
304 
305 	if (*inbytesleft > 0 && *outbytesleft == 0) {
306 		errno = E2BIG;
307 		return((size_t)-1);
308 	}
309 	return (*inbytesleft);
310 }
311 
312 
313 /*
314  * Get plane number by char; i.e. 0xa2 returns 2, 0xae returns 14, etc.
315  * Returns -1 on error conditions
316  */
get_plane_no_by_char(const char inbuf)317 static int get_plane_no_by_char(const char inbuf)
318 {
319 	int ret;
320 	unsigned char uc = (unsigned char) inbuf;
321 
322 	ret = uc - PMASK;
323 	switch (ret) {
324 	case 1:		/* 0x8EA1 */
325 	case 2:		/* 0x8EA2 */
326 	case 3:		/* 0x8EA3 */
327 	case 4:		/* 0x8EA4 */
328 	case 5:		/* 0x8EA5 */
329 	case 6:		/* 0x8EA6 */
330 	case 7:		/* 0x8EA7 */
331 	case 12:	/* 0x8EAC */
332 	case 14:	/* 0x8EAE */
333 	case 15:	/* 0x8EAF */
334 	case 16:	/* 0x8EB0 */
335 		return (ret);
336 	default:
337 		return (-1);
338 	}
339 }
340 
341 
342 /*
343  * CNS 11643 code --> ISO 2022-7
344  * Return: > 0 - converted with enough space in output buffer
345  *         = 0 - no space in outbuf
346  */
cns_to_iso(int plane_no,char keepc[],char * buf,size_t buflen)347 static int cns_to_iso(int plane_no, char keepc[], char *buf, size_t buflen)
348 {
349 	char		cns_str[3];
350 	unsigned long	cns_val;	/* MSB mask off CNS 11643 value */
351 
352 #ifdef DEBUG
353     fprintf(stderr, "%s %d ", keepc, plane_no);
354 #endif
355         if (buflen < 2) {
356                 errno = E2BIG;
357                 return(0);
358         }
359 
360 	if (plane_no == 1) {
361 		cns_str[0] = keepc[0] & MSB_OFF;
362 		cns_str[1] = keepc[1] & MSB_OFF;
363 	} else {
364 		cns_str[0] = keepc[2] & MSB_OFF;
365 		cns_str[1] = keepc[3] & MSB_OFF;
366 	}
367 	cns_val = (cns_str[0] << 8) + cns_str[1];
368 #ifdef DEBUG
369     fprintf(stderr, "%x\t", cns_val);
370 #endif
371 
372 	*buf = (cns_val & 0xff00) >> 8;
373 	*(buf+1) = cns_val & 0xff;
374 
375 #ifdef DEBUG
376     fprintf(stderr, "->%x %x<-\t->%c %c<-\n", *buf, *(buf+1), *buf, *(buf+1));
377 #endif
378 	return(2);
379 }
380 void *
_cv_open()381 _cv_open()
382 {
383 	struct _cv_state *st;
384 
385 	if ((st = (struct _cv_state *)malloc(sizeof(struct _cv_state))) == NULL)
386 		return ((void *)-1);
387 
388 	st->plane_no = 0;
389 	st->get_a_mbchar = 1;
390 	st->first_byte = 1;
391 
392 	return (st);
393 }
394 
395 void
_cv_close(struct _cv_state * st)396 _cv_close(struct _cv_state *st)
397 {
398 	free(st);
399 }
400 
401 
402 size_t
_cv_enconv(struct _cv_state * st,char ** cvinbuf,size_t * cvinbytesleft,char ** cvoutbuf,size_t * cvoutbytesleft)403 _cv_enconv(struct _cv_state *st, char **cvinbuf, size_t *cvinbytesleft,
404 				char **cvoutbuf, size_t *cvoutbytesleft)
405 {
406 	char	*inbuf;
407 	char	*outbuf;
408 	size_t insize;
409 	size_t outsize;
410 
411 	unsigned char	uc;
412 	int		i;
413 
414 	if (cvinbuf == NULL || *cvinbuf == NULL) { /* Reset request. */
415 		if (cvoutbuf && *cvoutbuf != NULL &&
416 		*cvoutbytesleft > 0 && st->plane_no != 0) {
417 			**cvoutbuf = SI;
418 			(*cvoutbytesleft)--;
419 			(*cvoutbuf)++;
420 		}
421 		st->plane_no = 0;
422 		st->get_a_mbchar = 1;
423 		st->first_byte = 1;
424 
425 		return (0);
426 	}
427 
428 
429 	inbuf = *cvinbuf;
430 	outbuf = *cvoutbuf;
431 	insize = *cvinbytesleft;
432 	outsize = *cvoutbytesleft;
433 
434 	while ((int) insize > 0 && (int) outsize > 0) {
435 
436 		if (st->get_a_mbchar) {
437 			if (st->plane_no == 0) { /* short cut */
438 				do {
439 					uc = *inbuf;
440 					if ((uc & MSB) == 0) {
441 						*outbuf++ = uc;
442 						outsize--;
443 						inbuf++;
444 						insize--;
445 					} else
446 						goto non_plane_0;
447 				} while ((int) insize > 0 && (int) outsize > 0);
448 				goto success;
449 			}
450 
451 non_plane_0:
452 			if (st->first_byte) {
453 				st->first_byte = 0;
454 				st->keepc[0] = uc = *inbuf++;
455 				insize--;
456 				if (uc & MSB) {
457 					if (uc == 0x8e)
458 						st->more_bytes = 3;
459 					else
460 						st->more_bytes = 1;
461 					st->p = st->keepc + 1;
462 				} else
463 					st->more_bytes = 0;
464 			}
465 			while (st->more_bytes > 0 && (int) insize > 0) {
466 				*st->p++ = *inbuf++;
467 				st->more_bytes--;
468 				insize--;
469 			}
470 			if (st->more_bytes == 0)
471 				st->get_a_mbchar = 0;
472 
473 		/* up to this point, st->keepc contains a complete mb char */
474 
475 			i = get_plane_no_by_str(st->keepc);
476 			st->plane_changed = (st->plane_no != i);
477 			if (st->plane_changed) { /* generate SI */
478 				st->planec = GET_PLANEC(i);
479 				if (st->plane_no != 0) {
480 					*outbuf++ = SI;
481 					outsize--;
482 					st->plane_no = i;
483 					if ((int) outsize <= 0)
484 						goto success;
485 				} else
486 					st->plane_no = i;
487 			}
488 		}
489 
490 		/*
491 		 * up to this point, st->keepc contains a complete mb char and
492 		 * we know the plane_no
493 		 */
494 
495 		switch (st->plane_no) {
496 		case 0:
497 			*outbuf++ = st->keepc[0];
498 			outsize--;
499 			break;
500 		case 1:
501 			if (st->plane_changed) {
502 				if (outsize < 7)
503 					goto success;
504 				*outbuf++ = ESC;
505 				*outbuf++ = '$';
506 				*outbuf++ = ')';
507 				*outbuf++ = 'G';
508 				*outbuf++ = SO;
509 				*outbuf++ = st->keepc[0] & MSB_OFF;
510 				*outbuf++ = st->keepc[1] & MSB_OFF;
511 				outsize -= 7;
512 			} else { /* don't need the escape sequence */
513 				if (outsize < 2)
514 					goto success;
515 				*outbuf++ = st->keepc[0] & MSB_OFF;
516 				*outbuf++ = st->keepc[1] & MSB_OFF;
517 				outsize -= 2;
518 			}
519 			break;
520 		default:
521 			if (st->plane_changed) {
522 				if (outsize < 7)
523 					goto success;
524 				*outbuf++ = ESC;
525 				*outbuf++ = '$';
526 				*outbuf++ = ')';
527 				*outbuf++ = st->planec;
528 				*outbuf++ = SO;
529 				*outbuf++ = st->keepc[2] & MSB_OFF;
530 				*outbuf++ = st->keepc[3] & MSB_OFF;
531 				outsize -= 7;
532 			} else { /* don't need the escape sequence */
533 				if (outsize < 2)
534 					goto success;
535 				*outbuf++ = st->keepc[2] & MSB_OFF;
536 				*outbuf++ = st->keepc[3] & MSB_OFF;
537 				outsize -= 2;
538 			}
539 			break;
540 		}
541 		/*
542 		 * up to this point, a complete multibyte character has been
543 		 * converted and written to outbuf, so need to grab the next
544 		 * mb char from inbuf
545 		 */
546 		st->get_a_mbchar = 1;
547 		st->first_byte = 1;
548 	}
549 
550 success:
551 	*cvinbytesleft = insize;
552 	*cvoutbytesleft = outsize;
553 	*cvinbuf = inbuf;
554 	*cvoutbuf = outbuf;
555 
556 	return (insize);
557 }
558 
get_plane_no_by_str(const char * inbuf)559 static int get_plane_no_by_str(const char *inbuf) {
560 	unsigned char uc = (unsigned char) *inbuf;
561 
562 	if (uc & MSB) {
563 		if (uc != 0x8e)
564 			return (1);
565 		uc = *(++inbuf);
566 		return (uc - 0xa0);
567 	} else
568 		return (0);
569 }
570