1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1997, by Sun Microsystems, Inc.
24 * All rights reserved.
25 */
26
27#include <stdio.h>
28#include <stdlib.h>
29#include <errno.h>
30#include <sys/types.h>
31
32#include "tab_lookup.h"   	/* table lookup data types */
33
34int bisearch(unsigned long val, _icv_state *st, int n);
35
36#define MSB     0x80    /* most significant bit */
37#define MBYTE   0x8e    /* multi-byte (4 byte character) */
38#define PMASK   0xa0    /* plane number mask */
39#define ONEBYTE 0xff    /* right most byte */
40
41/* non-identified character */
42#define UTF8_NON_ID_CHAR1 0xEF
43#define UTF8_NON_ID_CHAR2 0xBF
44#define UTF8_NON_ID_CHAR3 0xBD
45
46enum _USTATE    { C0, C1, C2 };
47
48
49int ibm_to_utf8(_icv_state *st, char    *buf, size_t  buflen);
50
51
52/*
53 * Actual conversion; called from iconv()
54 * Input is UTF-8 data.
55 * first convert to UCS2
56 */
57size_t
58_icv_iconv(_icv_state *st, char **inbuf, size_t *inbytesleft,
59                        char **outbuf, size_t *outbytesleft)
60{
61/*
62 * Actual conversion; called from iconv()
63 */
64
65        int             n;
66
67#ifdef DEBUG
68    fprintf(stderr, "==========     iconv(): IBM --> UTF8     ==========\n");
69#endif
70
71        if (st == NULL) {
72                errno = EBADF;
73                return ((size_t) -1);
74        }
75
76        if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
77                st->ustate = C0;
78                st->_errno = 0;
79		st->shift = SHIFT_IN;
80                return ((size_t) 0);
81        }
82
83        st->_errno = 0;         /* reset internal errno */
84        errno = 0;              /* reset external errno */
85
86        /* a state machine for interpreting UTF8 code */
87        while (*inbytesleft > 0 && *outbytesleft > 0) {
88		switch (**inbuf) {
89		case SHIFT_OUT :
90			if (st->shift == SHIFT_IN) {
91				st->shift = SHIFT_OUT;
92				(*inbuf)++;
93				(*inbytesleft)--;
94				continue;
95			}
96			break;
97		case SHIFT_IN :
98			if (st->shift == SHIFT_OUT) {
99				st->shift = SHIFT_IN;
100				(*inbuf)++;
101				(*inbytesleft)--;
102				continue;
103			}
104			break;
105		}
106
107                switch (st->ustate) {
108                case C0 :
109			/* the input is ascii, single byte, convert it */
110			if (st->shift == SHIFT_IN) {
111				st->keepc[0] = 0x0;
112				st->keepc[1] = **inbuf;
113				st->ustate = C2;
114				continue;
115			 }
116
117			/* two bytes character */
118		        st->keepc[0] = (**inbuf);
119			st->ustate = C1;
120		        break;
121                case C1 :
122		        st->keepc[1] = (**inbuf);
123			st->ustate = C2;
124			continue;
125		case C2 :
126                        n = ibm_to_utf8(st, *outbuf, *outbytesleft);
127                        if (n > 0) {
128                                (*outbuf) += n;
129                                (*outbytesleft) -= n;
130                        } else {
131                                st->_errno = errno;
132                                return((size_t)-1);
133                        }
134                        st->ustate = C0;
135                        st->_errno = 0;
136                        break;
137
138                default:                        /* should never come here */
139                        st->_errno = errno = EILSEQ;
140                        st->ustate = C0;        /* reset state */
141                        break;
142                }
143
144
145                (*inbuf)++;
146                (*inbytesleft)--;
147
148                if (st->_errno) {
149#ifdef DEBUG
150    fprintf(stderr, "!!!!!\tst->_errno = %d\tst->ustate = %d\n",
151                st->_errno, st->ustate);
152#endif
153                        break;
154                }
155
156                if (errno)
157                        return((size_t)-1);
158        }
159
160        if (*outbytesleft == 0) {
161                errno = E2BIG;
162                return((size_t)-1);
163        }
164        return (*inbytesleft);
165}
166
167/*
168 * IBM code --> (Unicode)
169 * Unicode --> UTF8 (FSS-UTF)
170 *             (File System Safe Universal Character Set Transformation Format)
171 * Return: > 0 - converted with enough space in output buffer
172 *         = 0 - no space in outbuf
173 */
174int ibm_to_utf8(st, buf, buflen)
175_icv_state *st;
176char    *buf;
177size_t  buflen;
178{
179        unsigned long   ibm_val;       /* Big-5 value */
180        int             unidx;          /* Unicode index */
181        unsigned long   uni_val;        /* Unicode */
182
183        ibm_val = ((st->keepc[0]&ONEBYTE) << 8) + (st->keepc[1]&ONEBYTE);
184#ifdef DEBUG
185    fprintf(stderr, "%x\t", ibm_val);
186#endif
187
188
189        unidx = bisearch(ibm_val, st, st->table_size);
190
191        if (unidx >= 0)
192	{
193            if ( st->left_to_right )
194                uni_val = st->table[unidx].right_code;
195            else
196                uni_val = st->table[unidx].left_code;
197        }
198
199#ifdef DEBUG
200    fprintf(stderr, "unidx = %d, unicode = %x\t", unidx, uni_val);
201#endif
202
203        if (unidx >= 0) {       /* do Unicode to UTF8 conversion */
204		if (uni_val <= 0x07f) {
205			if (buflen < 1) {
206				errno = E2BIG;
207				return 0;
208			}
209			*buf = uni_val;
210			return 1;
211		}
212                if (uni_val >= 0x0080 && uni_val <= 0x07ff) {
213                        if (buflen < 2) {
214#ifdef DEBUG
215    fprintf(stderr, "outbuf overflow in ibm_to_utf8()!!\n");
216#endif
217                                errno = E2BIG;
218                                return(0);
219                        }
220                        *buf = (char)((uni_val >> 6) & 0x1f) | 0xc0;
221                        *(buf+1) = (char)(uni_val & 0x3f) | 0x80;
222#ifdef DEBUG
223    fprintf(stderr, "%x %x\n", *buf&ONEBYTE, *(buf+1)&ONEBYTE);
224#endif
225                        return(2);
226                }
227                if (uni_val >= 0x0800 && uni_val <= 0xffff) {
228                        if (buflen < 3) {
229#ifdef DEBUG
230    fprintf(stderr, "outbuf overflow in ibm_to_utf8()!!\n");
231#endif
232                                errno = E2BIG;
233                                return(0);
234                        }
235                        *buf = (char)((uni_val >> 12) & 0xf) | 0xe0;
236                        *(buf+1) = (char)((uni_val >>6) & 0x3f) | 0x80;
237                        *(buf+2) = (char)(uni_val & 0x3f) | 0x80;
238#ifdef DEBUG
239    fprintf(stderr, "%x %x %x\n", *buf&ONEBYTE, *(buf+1)&ONEBYTE, *(buf+2)&ONEBYTE);
240#endif
241                        return(3);
242                }
243        }
244
245        /* can't find a match in IBM --> UTF8 table or illegal UTF8 code */
246        if (buflen < 3) {
247#ifdef DEBUG
248    fprintf(stderr, "outbuf overflow in ibm_to_utf8()!!\n");
249#endif
250                errno = E2BIG;
251                return(0);
252        }
253
254        *buf     = (char)UTF8_NON_ID_CHAR1;
255        *(buf+1) = (char)UTF8_NON_ID_CHAR2;
256        *(buf+2) = (char)UTF8_NON_ID_CHAR3;
257
258#ifdef DEBUG
259    fprintf(stderr, "%c %c %c\n", *buf, *(buf+1), *(buf+2));
260#endif
261        return(3);
262}
263