/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 1995 by Sun Microsystems, Inc. * All Rights Reserved. */ #include #include #include #include #include "kctype.h" #include "kdefs.h" #include #include "ktable.h" #include "hangulcode.h" KCHAR c2p(); struct _cv_state { char **my_outbuf; size_t *my_outbytesleft; int invalid; int flush_obuf; char temp_obuf[5]; int start_cnt; int end_cnt; char temp_ibuf[1]; int ibuf_left; }; void AddChar (char Char, struct _cv_state *st); int write_21(KCHAR code_2, struct _cv_state *st); static void echo_vowel(char*, int*); static void echo_consonant(char*, int*); static int _wansung_to_cvc(unsigned short code, unsigned char* ci_ret, unsigned char* v_ret, unsigned char* cf_ret); typedef enum { ASCII, WANSUNG } _conv_desc; /**** _ I C V _ O P E N ****/ void* _icv_open() { _conv_desc* cd = (_conv_desc*)malloc(sizeof(_conv_desc)); if (cd == (_conv_desc*)NULL) { errno = ENOMEM; return((void*)-1); } *cd = ASCII; return((void*)cd); } /* end of int _icv_open(). */ /**** _ I C V _ C L O S E ****/ void _icv_close(_conv_desc* cd) { if (!cd) errno = EBADF; else free((void*)cd); } /* end of void _icv_close(_conv_desc*). */ /**** _ I C V _ I C O N V ****/ size_t _icv_iconv(_conv_desc* state, char** inbuf, size_t* inbufleft, char** outbuf, size_t* outbufleft) { size_t ret_val = 0; unsigned char* ib; unsigned char* ob; unsigned char* ibtail; unsigned char* obtail; if (!state) { errno = EBADF; return((size_t)-1); } if (!inbuf || !(*inbuf)) { if (*state == WANSUNG) { if (outbufleft && *outbufleft >= 1 && outbuf && *outbuf) { **outbuf = SI; (*outbuf)++; (*outbufleft)--; } else { errno = E2BIG; return((size_t)-1); } } *state = ASCII; return((size_t)0); } ib = (unsigned char*)*inbuf; ob = (unsigned char*)*outbuf; ibtail = ib + *inbufleft; obtail = ob + *outbufleft; while (ib < ibtail) { if (!(*ib & 0x80)) /* 7 bits */ { if ((obtail - ob) < (*state == WANSUNG ? 2 : 1)) { errno = E2BIG; ret_val = (size_t)-1; break; } if (*state == WANSUNG) { *ob++ = SI; *state = ASCII; } *ob++ = *ib++; } else { unsigned char ci, v, cf; register int ret, j; int i; char c[5]; if ((ibtail - ib) < 2) { errno = EINVAL; ret_val = (size_t)-1; break; } ret = _wansung_to_cvc((unsigned short)(*ib) << 8 | (unsigned short)(*(ib + 1)), &ci, &v, &cf); i = 0; if (ret != ILLEGAL_SEQ && ret != FAILED) { c[i] = (char)Y19_32[ci != CVC_FILL ? ci + 1 : 0] + '@'; if (c[i] > '@') i++; c[i] = (char)Y21_32[v != CVC_FILL ? v + (short)(v + 1) / 3 + 2 : 1] + '`'; if (c[i] > 'a') echo_vowel(c, &i); c[i] = (char)Y28_32[cf != CVC_FILL ? cf - 1 : 0] + '@'; if (c[i] > '@') echo_consonant(c, &i); if ((obtail - ob) < (i + (*state == ASCII ? 1 : 0))) { errno = E2BIG; ret_val = (size_t)-1; break; } if (*state == ASCII) { *ob++ = SO; *state = WANSUNG; } for (j = 0; j < i; j++) *ob++ = c[j]; } else { /* Let's assume the code is non-identical. */ if (*state == WANSUNG) { if ((obtail - ob) < 3) { errno = E2BIG; ret_val = (size_t)-1; break; } *ob++ = SI; *state = ASCII; } else if ((obtail - ob) < 2) { errno = E2BIG; ret_val = (size_t)-1; break; } *ob++ = NON_IDENTICAL; *ob++ = NON_IDENTICAL; ret_val += 2; } ib += 2; } } *inbuf = (char*)ib; *inbufleft = ibtail - ib; *outbuf = (char*)ob; *outbufleft = obtail - ob; return(ret_val); } /* end of size_t _icv_iconv(_conv_desc*, char**, size_t*, char**, size_t*).*/ /**** E C H O _ V O W E L ****/ static void echo_vowel(char* c, int* i) { if (c[*i] == 'm') /* _|_|- */ { c[(*i)++] = 'l'; /* _|_ */ c[(*i)++] = 'b'; /* |- */ } else if (c[*i] == 'n') /* _|_H */ { c[(*i)++] = 'l'; /* _|_ */ c[(*i)++] = 'c'; /* H */ } else if (c[*i] == 'o') /* _|_| */ { c[(*i)++] = 'l'; /* _|_ */ c[(*i)++] = '|'; /* | */ } else if (c[*i] == 't') /* T-| */ { c[(*i)++] = 's'; /* T */ c[(*i)++] = 'f'; /* -| */ } else if (c[*i] == 'u') /* T-|| */ { c[(*i)++] = 's'; /* T */ c[(*i)++] = 'g'; /* -|| */ } else if (c[*i] == 'v') /* T| */ { c[(*i)++] = 's'; /* T */ c[(*i)++] = '|'; /* | */ } else if (c[*i] == '{') /* _| */ { c[(*i)++] = 'z'; /* __ */ c[(*i)++] = '|'; /* | */ } else (*i)++; } /* end of static void echo_vowel(char*, int*). */ /**** E C H O _ C O N S O N A N T ****/ static void echo_consonant(char* c, int* i) { if (c[*i] == 'C') /* gs */ { c[(*i)++] = 'A'; /* g */ c[(*i)++] = 'U'; /* s */ } else if (c[*i] == 'E') /* nj */ { c[(*i)++] = 'D'; /* n */ c[(*i)++] = 'X'; /* j */ } else if (c[*i] == 'F') /* nh */ { c[(*i)++] = 'D'; /* n */ c[(*i)++] = '^'; /* h */ } else if (c[*i] == 'J') /* rg */ { c[(*i)++] = 'I'; /* r */ c[(*i)++] = 'A'; /* g */ } else if (c[*i] == 'K') /* rm */ { c[(*i)++] = 'I'; /* r */ c[(*i)++] = 'Q'; /* m */ } else if (c[*i] == 'L') /* rb */ { c[(*i)++] = 'I'; /* r */ c[(*i)++] = 'R'; /* b */ } else if (c[*i] == 'M') /* rs */ { c[(*i)++] = 'I'; /* r */ c[(*i)++] = 'U'; /* s */ } else if (c[*i] == 'N') /* rt */ { c[(*i)++] = 'I'; /* r */ c[(*i)++] = '\\'; /* t */ } else if (c[*i] == 'O') /* rp */ { c[(*i)++] = 'I'; /* r */ c[(*i)++] = ']'; /* p */ } else if (c[*i] == 'P') /* rh */ { c[(*i)++] = 'I'; /* r */ c[(*i)++] = '^'; /* h */ } else if (c[*i] == 'T') /* bs */ { c[(*i)++] = 'R'; /* b */ c[(*i)++] = 'U'; /* s */ } else (*i)++; } /* end of static void echo_consonant(char*, int*). */ /**** _ W A N S U N G _ T O _ C V C ****/ static int _wansung_to_cvc(unsigned short code, unsigned char* ci_ret, unsigned char* v_ret, unsigned char* cf_ret) { register short h, i, l; short ci, v, cf; short disp; long cfbit; *ci_ret = *v_ret = *cf_ret = CVC_FILL; if (code >= 0xB0A1 && code <= 0xC8FE) { if ((unsigned short)(code & 0xFF) < 0xA1) return(ILLEGAL_SEQ); for (h = CI_CNT, l = 0; ; ) { ci = (l + h) / 2; if (l >= h) break; if (code < cmp_srchtbl[ci][0]) h = ci - 1; else if (code < cmp_srchtbl[ci + 1][0]) break; else l = ci + 1; } for (v = 1; ; ) { if (code < cmp_srchtbl[ci][v]) { while (!cmp_srchtbl[ci][--v]) ; break; } else if (v == V_CNT) break; v++; } disp = code - cmp_srchtbl[ci][v]; if (((short)(cmp_srchtbl[ci][v] & 0xFF) + disp) > 0xFE) disp -= SKIP; for (cfbit = cmp_bitmap[ci][v], i = -1, cf = -1; i < disp; cf++) { if (cfbit & 0x01) i++; cfbit >>= 1; } if (cf == -1) return(FAILED); *ci_ret = (unsigned char)ci; *v_ret = (unsigned char)v; if (cf >= 2) *cf_ret = (unsigned char)cf; return(HANGUL); } /* Chosung-only */ if ((code >= 0xA4A1 && code <= 0xA4BE) && (X32_19[code - 0xA4A0] != -1)) { *ci_ret = (unsigned char)((X32_19[code - 0xA4A0] << 2) - 0xA0); return(HANJA_OR_SYMBOL); } else if (code >= 0xA4BF && code <= 0xA4D3) /* Joongsung-only */ { *v_ret = (unsigned char)(code - 0xA4BE); return(HANJA_OR_SYMBOL); } return(ILLEGAL_SEQ); } /* end of static int _wansung_to_cvc(unsigned short, unsigned char*, unsigned char*, unsigned char*). */ struct _cv_state * _cv_open() { struct _cv_state *st; if ((st = (struct _cv_state *)malloc(sizeof(struct _cv_state))) == NULL) return ((void *)-1); st->invalid = 0; st->flush_obuf = 0; st->ibuf_left = 0; st->start_cnt = 0; st->end_cnt = 0; return (st); } void _cv_close(st) struct _cv_state *st; { free(st); } size_t _cv_enconv(st, inbuf, inbytesleft, outbuf, outbytesleft) struct _cv_state *st; char **inbuf; size_t*inbytesleft; char **outbuf; size_t*outbytesleft; { int c, d; KCHAR code; if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */ st->invalid = 0; st->flush_obuf = 0; st->ibuf_left = 0; st->start_cnt = 0; st->end_cnt = 0; return (0); } if (st->flush_obuf) { while ((*outbytesleft > 0) && (st->start_cnt < st->end_cnt)) { **outbuf = st->temp_obuf[st->start_cnt]; (*outbuf)++; (*outbytesleft)--; (st->start_cnt)++; } if (st->start_cnt < st->end_cnt) { return(*inbytesleft); } else { st->flush_obuf = 0; st->start_cnt = 0; st->end_cnt = 0; } } st->my_outbuf = outbuf; st->my_outbytesleft = outbytesleft; while (*inbytesleft > 0 && *(st->my_outbytesleft) > 0) { if (st->ibuf_left) { c = st->temp_ibuf[0]; st->ibuf_left = 0; } else { c = (**inbuf)&BYTE_MASK; (*inbuf)++, (*inbytesleft)--; } if (iskorea1(c)) {/* Completion Code */ if ( *inbytesleft <= 0) { st->ibuf_left = 1; st->temp_ibuf[0] = c; return(*inbytesleft); } d = (**inbuf)&BYTE_MASK; (*inbuf)++, (*inbytesleft)--; code = c<<8|d; /* output hangul character */ if (iskorea2(code&BYTE_MASK) && !ishanja(c)) { if (ishangul(c)) { code = c2p(code); AddChar (0x0e, st); write_21(code, st); AddChar (0x0f, st); } else if (ishaninit(code) || ishanmid(code)) { AddChar (0x0e, st); if (ishaninit(code)) { AddChar(code - 0xa4a0 + 0x40, st); } else { code -= 0xa4bf; code += (code/6) * 2 + 1; AddChar(code + 0x61, st); } AddChar (0x0f, st); /* other case */ } else { AddChar ('?', st); AddChar ('?', st); /*AddChar (c); AddChar (code&BYTE_MASK);*/ } } else { AddChar ('?', st); AddChar ('?', st); } if (st->invalid) { /* ran out of outbuf space */ st->invalid = 0; return(*inbytesleft); } } else { /* output normal Ascii code */ AddChar (c, st); if (st->invalid) { st->invalid = 0; /*(*outbuf)--; (*outbytesleft)++; (*inbuf)--; (*inbytesleft)++;*/ return(*inbytesleft); } } } return (*inbytesleft); } void AddChar (Char, st) char Char; struct _cv_state *st; { /* no more outbuf space */ if (*(st->my_outbytesleft) <= 0) { st->invalid = 1; st->temp_obuf[st->end_cnt] = Char; st->end_cnt++; st->flush_obuf = 1; } else { **(st->my_outbuf) = Char; (*(st->my_outbuf))++, (*(st->my_outbytesleft))--; } } #ifdef TEST /* test case 1 */ char ibuf1[] = {0xbf, 0xc0, 0xbc, 0xbc, 0xc3, 0xa2 , 0x41}; char obuf1[20]; /* test case 2 */ char ibuf2[] = {0xbf, 0xc0, 0xbc, 0xbc, 0xc3}; char ibuf21[] = {0xa2 , 0x41}; char obuf2[20]; /* test case 3 */ char ibuf3[] = {0xbf, 0xc0, 0xbc, 0xbc, 0xc3, 0xa2 , 0x41}; char obuf3[5]; main() { int i; struct _cv_state *st; size_t oleft, ileft; char *ip1 = &ibuf1[0], *op1 = &obuf1[0], *ip2 = &ibuf2[0], *ip21 = &ibuf21[0], *op2 = &obuf2[0], *ip3 = &ibuf3[0], *op3 = &obuf3[0]; /****************************** test case 1 *************************/ ileft = sizeof(ibuf1); oleft = sizeof(obuf1); st = _cv_open(); printf("TEST 1\n INPUT BUFFER: "); for (i = 0; i < ileft ; i++) { printf("%x ", 0xff&ibuf1[i]); } printf("\n"); printf("OUTPUT: return value %d ", _cv_enconv(st, &ip1, &ileft, &op1, &oleft)); printf("ileft %d oleft %d\n", ileft, oleft); printf(" flush_obuf %d ibuf_left %d\n", st->flush_obuf, st->ibuf_left); printf("OUTPUT BUFFER: "); for (i = 0; i < (sizeof(obuf1) - oleft) ; i++) { printf("%x ", obuf1[i]); } printf("\n\n\n"); _cv_close(st); /************************ test case 2 ******************************/ ileft = sizeof(ibuf2); oleft = sizeof(obuf2); st = _cv_open(); printf("TEST 2\nINPUT BUFFER: "); for (i = 0; i < ileft ; i++) { printf("%x ", 0xff&ibuf2[i]); } printf("\n"); printf("OUTPUT: return value %d ", _cv_enconv(st, &ip2, &ileft, &op2, &oleft)); printf("ileft %d oleft %d\n", ileft, oleft); printf(" flush_obuf %d ibuf_left %d\n", st->flush_obuf, st->ibuf_left); printf("OUTPUT BUFFER: "); for (i = 0; i < (sizeof(obuf2) - oleft) ; i++) { printf("%x ", obuf2[i]); } printf("\n\n"); ileft = sizeof(ibuf21); oleft = sizeof(obuf2); op2 = &obuf2[0]; printf("INPUT BUFFER: "); for (i = 0; i < ileft ; i++) { printf("%x ", 0xff&ibuf21[i]); } printf("\n"); printf("OUTPUT: return value %d ", _cv_enconv(st, &ip21, &ileft, &op2, &oleft)); printf("ileft %d oleft %d\n", ileft, oleft); printf(" flush_obuf %d ibuf_left %d\n", st->flush_obuf, st->ibuf_left); printf("OUTPUT BUFFER: "); for (i = 0; i < (sizeof(obuf2) - oleft) ; i++) { printf("%x ", obuf2[i]); } printf("\n\n\n"); _cv_close(st); /************************ test case 3 ******************************/ ileft = sizeof(ibuf3); oleft = sizeof(obuf3); st = _cv_open(); printf("TEST 3\nINPUT BUFFER: "); for (i = 0; i < ileft ; i++) { printf("%x ", 0xff&ibuf3[i]); } printf("\n"); printf("OUTPUT: return value %d ", _cv_enconv(st, &ip3, &ileft, &op3, &oleft)); printf("ileft %d oleft %d\n", ileft, oleft); printf(" flush_obuf %d ibuf_left %d\n", st->flush_obuf, st->ibuf_left); printf(" strat_cnt %d end_cnt %d\n", st->start_cnt, st->end_cnt); printf("OUTPUT BUFFER: "); for (i = 0; i < (sizeof(obuf3) - oleft) ; i++) { printf("%x ", obuf3[i]); } printf("\n\n"); op3 = &obuf3[0]; oleft = sizeof(obuf3); printf("OUTPUT: return value %d ", _cv_enconv(st, &ip3, &ileft, &op3, &oleft)); printf("ileft %d oleft %d\n", ileft, oleft); printf(" flush_obuf %d ibuf_left %d\n", st->flush_obuf, st->ibuf_left); printf(" strat_cnt %d end_cnt %d\n", st->start_cnt, st->end_cnt); printf("OUTPUT BUFFER: "); for (i = 0; i < (sizeof(obuf3) - oleft) ; i++) { printf("%x ", obuf3[i]); } printf("\n\n"); op3 = &obuf3[0]; oleft = sizeof(obuf3); printf("OUTPUT: return value %d ", _cv_enconv(st, &ip3, &ileft, &op3, &oleft)); printf("ileft %d oleft %d\n", ileft, oleft); printf(" flush_obuf %d ibuf_left %d\n", st->flush_obuf, st->ibuf_left); printf(" strat_cnt %d end_cnt %d\n", st->start_cnt, st->end_cnt); printf("OUTPUT BUFFER: "); for (i = 0; i < (sizeof(obuf3) - oleft) ; i++) { printf("%x ", obuf3[i]); } printf("\n\n\n"); _cv_close(st); } /* expected output TEST 1 INPUT BUFFER: bf c0 bc bc c3 a2 41 OUTPUT: return value 0 ileft 0 oleft 6 flush_obuf 0 ibuf_left 0 OUTPUT BUFFER: e 57 6c f e 55 67 f e 5a 62 57 f 41 TEST 2 INPUT BUFFER: bf c0 bc bc c3 OUTPUT: return value 0 ileft 0 oleft 12 flush_obuf 0 ibuf_left 1 OUTPUT BUFFER: e 57 6c f e 55 67 f INPUT BUFFER: a2 41 OUTPUT: return value 0 ileft 0 oleft 14 flush_obuf 0 ibuf_left 0 OUTPUT BUFFER: e 5a 62 57 f 41 TEST 3 INPUT BUFFER: bf c0 bc bc c3 a2 41 OUTPUT: return value 3 ileft 3 oleft 0 flush_obuf 1 ibuf_left 0 strat_cnt 0 end_cnt 3 OUTPUT BUFFER: e 57 6c f e OUTPUT: return value 1 ileft 1 oleft 0 flush_obuf 1 ibuf_left 0 strat_cnt 0 end_cnt 3 OUTPUT BUFFER: 55 67 f e 5a OUTPUT: return value 0 ileft 0 oleft 1 flush_obuf 0 ibuf_left 0 strat_cnt 0 end_cnt 0 OUTPUT BUFFER: 62 57 f 41 */ #endif /* TEST */