/* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * The contents of this file are subject to the Netscape Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is Mozilla Communicator client code, released * March 31, 1998. * * The Initial Developer of the Original Code is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998-1999 Netscape Communications Corporation. All * Rights Reserved. * * Contributor(s): */ #include #include #include #include #include #ifndef HAVE_LIBICU #ifdef SOLARIS_LDAP_CMD #include #include #include #endif #ifdef __cplusplus extern "C" { #endif extern char *ldaptool_charset; char *ldaptool_convdir = NULL; static int charsetset = 0; char *ldaptool_local2UTF8( const char *src ); #ifdef SOLARIS_LDAP_CMD static char *ldaptool_convert( const char *src, const char *fcode, const char *tcode); char *ldaptool_UTF82local( const char *src ); #endif /* SOLARIS_LDAP_CMD */ #ifdef SOLARIS_LDAP_CMD /* * ICU version always returns string, unless strdup fails. * As in ICU version, in case of error strdup(src) * Usually strdup(src) will be ASCII and legal anyways. */ static char * ldaptool_convert( const char *src, const char *fcode, const char *tcode) { char *dest, *tptr, *tmp; const char *fptr; iconv_t cd; size_t ileft, oleft, ret, size; if (src == NULL) return (NULL); if (fcode == NULL || tcode == NULL) return (strdup(src)); if (strcasecmp(fcode, tcode) == 0) return (strdup(src)); if ((cd = iconv_open(tcode, fcode)) == (iconv_t)-1) { /* conversion table not available */ return (strdup(src)); } ileft = strlen(src); oleft = 2 * ileft; size = oleft; ret = -1; if ((dest = (char *)malloc(size)) == NULL) { (void) iconv_close(cd); /* maybe sizeof strlen(src) memory still exists */ return (strdup(src)); } tptr = dest; fptr = src; for (;;) { ret = iconv(cd, &fptr, &ileft, &tptr, &oleft); if (ret != (size_t)-1) { /* * Success. Place 'cd' into its initial shift * state before returning. */ if (fptr == NULL) /* already in initial state */ break; fptr = NULL; ileft = 0; continue; } if (errno == E2BIG) { /* * Lack of space in output buffer. * Hence double the size and retry. * But before calling iconv(), oleft * and tptr have to re-adjusted, so that * iconv() doesn't overwrite the data * which has already been converted. */ oleft += size; size *= 2; if ((tmp = (char *) realloc(dest, size)) == NULL) break; tptr = tmp + (tptr - dest); dest = tmp; continue; } else { /* Other errors */ break; } } if (dest != NULL) { if (ret == -1) { /* Free malloc'ed memory on failure */ free(dest); dest = NULL; } else if (oleft > 0) { /* NULL terminate the return value */ *(dest + (size - oleft)) = '\0'; } else { /* realloc one more byte and NULL terminate */ if ((tmp = (char *) realloc(dest, size + 1)) == NULL) { free(dest); dest = NULL; } else { *(dest + size) = '\0'; } } } (void) iconv_close(cd); if (dest == NULL) { /* last chance in case some other failure along the way occurs */ return (strdup(src)); } return (dest); } char * ldaptool_UTF82local( const char *src ) { char *to_code; if ((to_code = nl_langinfo(CODESET)) == NULL) return (strdup(src)); return (ldaptool_convert(src, "UTF-8", (const char *)to_code)); } #endif /* SOLARIS_LDAP_CMD */ char * ldaptool_local2UTF8( const char *src ) { #ifdef SOLARIS_LDAP_CMD char *from_code; if ((from_code = nl_langinfo(CODESET)) == NULL) return (strdup(src)); return (ldaptool_convert(src, (const char *)from_code, "UTF-8")); #else char *utf8; charsetset = 0; if (src == NULL) { return NULL; } utf8 = strdup(src); return ( utf8 ); #endif /* SOLARIS_LDAP_CMD */ } #else /* HAVE_LIBICU */ #include "unicode/utypes.h" #include "unicode/ucnv.h" #define NSPR20 #ifdef XP_WIN32 #define VC_EXTRALEAN #include #include #endif extern char *ldaptool_charset; static int charsetset = 0; extern "C" { char *ldaptool_convdir = NULL; char *ldaptool_local2UTF8( const char * ); } #ifndef XP_WIN32 char * GetNormalizedLocaleName(void); char * GetNormalizedLocaleName(void) { #ifdef _HPUX_SOURCE int len; char *locale; locale = setlocale(LC_CTYPE, ""); if (locale && *locale) { len = strlen(locale); } else { locale = "C"; len = 1; } if ((!strncmp(locale, "/\x03:", 3)) && (!strcmp(&locale[len - 2], ";/"))) { locale += 3; len -= 5; } locale = strdup(locale); if (locale) { locale[len] = 0; } return locale; #else char *locale; locale = setlocale(LC_CTYPE, ""); if (locale && *locale) { return strdup(locale); } return strdup("C"); #endif } #if defined(IRIX) const char *CHARCONVTABLE[] = { "! This table maps the host's locale names to IANA charsets", "!", "C: ISO_8859-1:1987", "cs: ISO_8859-2:1987", "da: ISO_8859-1:1987", "de: ISO_8859-1:1987", "de_AT: ISO_8859-1:1987", "de_CH: ISO_8859-1:1987", "en: ISO_8859-1:1987", "en_AU: ISO_8859-1:1987", "en_CA: ISO_8859-1:1987", "en_TH: ISO_8859-1:1987", "en_US: ISO_8859-1:1987", "es: ISO_8859-1:1987", "fi: ISO_8859-1:1987", "fr: ISO_8859-1:1987", "fr_BE: ISO_8859-1:1987", "fr_CA: ISO_8859-1:1987", "fr_CH: ISO_8859-1:1987", "is: ISO_8859-1:1987", "it: ISO_8859-1:1987", "it_CH: ISO_8859-1:1987", "ja_JP.EUC: Extended_UNIX_Code_Packed_Format_for_Japanese", "ko_KR.euc: EUC-KR", "nl: ISO_8859-1:1987", "nl_BE: ISO_8859-1:1987", "no: ISO_8859-1:1987", "pl: ISO_8859-2:1987", "pt: ISO_8859-1:1987", "sh: ISO_8859-2:1987", "sk: ISO_8859-2:1987", "sv: ISO_8859-1:1987", "zh_CN.ugb: GB2312", "zh_TW.ucns: cns11643_1", NULL }; #elif defined(SOLARIS) const char *CHARCONVTABLE[] = { "! This table maps the host's locale names to IANA charsets", "!", "C: ISO_8859-1:1987", "ja: Extended_UNIX_Code_Packed_Format_for_Japanese", "ja_JP.EUC: Extended_UNIX_Code_Packed_Format_for_Japanese", "ja_JP.PCK: Shift_JIS", "en: ISO_8859-1:1987", "en_AU: ISO_8859-1:1987", "en_CA: ISO_8859-1:1987", "en_UK: ISO_8859-1:1987", "en_US: ISO_8859-1:1987", "es: ISO_8859-1:1987", "es_AR: ISO_8859-1:1987", "es_BO: ISO_8859-1:1987", "es_CL: ISO_8859-1:1987", "es_CO: ISO_8859-1:1987", "es_CR: ISO_8859-1:1987", "es_EC: ISO_8859-1:1987", "es_GT: ISO_8859-1:1987", "es_MX: ISO_8859-1:1987", "es_NI: ISO_8859-1:1987", "es_PA: ISO_8859-1:1987", "es_PE: ISO_8859-1:1987", "es_PY: ISO_8859-1:1987", "es_SV: ISO_8859-1:1987", "es_UY: ISO_8859-1:1987", "es_VE: ISO_8859-1:1987", "fr: ISO_8859-1:1987", "fr_BE: ISO_8859-1:1987", "fr_CA: ISO_8859-1:1987", "fr_CH: ISO_8859-1:1987", "de: ISO_8859-1:1987", "de_AT: ISO_8859-1:1987", "de_CH: ISO_8859-1:1987", "nl: ISO_8859-1:1987", "nl_BE: ISO_8859-1:1987", "it: ISO_8859-1:1987", "sv: ISO_8859-1:1987", "no: ISO_8859-1:1987", "da: ISO_8859-1:1987", "iso_8859_1: ISO_8859-1:1987", "japanese: Extended_UNIX_Code_Packed_Format_for_Japanese", "ko: EUC-KR", "zh: GB2312", "zh_TW: cns11643_1", NULL }; #elif defined(OSF1) const char *CHARCONVTABLE[] = { "! This table maps the host's locale names to IANA charsets", "!", "C: ISO_8859-1:1987", "cs_CZ.ISO8859-2: ISO_8859-2:1987", "cs_CZ: ISO_8859-2:1987", "da_DK.ISO8859-1: ISO_8859-1:1987", "de_CH.ISO8859-1: ISO_8859-1:1987", "de_DE.ISO8859-1: ISO_8859-1:1987", "en_GB.ISO8859-1: ISO_8859-1:1987", "en_US.ISO8859-1: ISO_8859-1:1987", "es_ES.ISO8859-1: ISO_8859-1:1987", "fi_FI.ISO8859-1: ISO_8859-1:1987", "fr_BE.ISO8859-1: ISO_8859-1:1987", "fr_CA.ISO8859-1: ISO_8859-1:1987", "fr_CH.ISO8859-1: ISO_8859-1:1987", "fr_FR.ISO8859-1: ISO_8859-1:1987", "hu_HU.ISO8859-2: ISO_8859-2:1987", "hu_HU: ISO_8859-2:1987", "is_IS.ISO8859-1: ISO_8859-1:1987", "it_IT.ISO8859-1: ISO_8859-1:1987", "ja_JP.SJIS: Shift_JIS", "ja_JP.eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese", "ja_JP: Extended_UNIX_Code_Packed_Format_for_Japanese", "ko_KR.eucKR: EUC-KR", "ko_KR: EUC-KR", "nl_BE.ISO8859-1: ISO_8859-1:1987", "nl_NL.ISO8859-1: ISO_8859-1:1987", "no_NO.ISO8859-1: ISO_8859-1:1987", "pl_PL.ISO8859-2: ISO_8859-2:1987", "pl_PL: ISO_8859-2:1987", "pt_PT.ISO8859-1: ISO_8859-1:1987", "sk_SK.ISO8859-2: ISO_8859-2:1987", "sk_SK: ISO_8859-2:1987", "sv_SE.ISO8859-1: ISO_8859-1:1987", "zh_CN: GB2312", "zh_HK.big5: Big5", "zh_HK.eucTW: cns11643_1", "zh_TW.big5: Big5", "zh_TW.big5@chuyin: Big5", "zh_TW.big5@radical: Big5", "zh_TW.big5@stroke: Big5", "zh_TW.eucTW: cns11643_1", "zh_TW.eucTW@chuyin: cns11643_1", "zh_TW.eucTW@radical: cns11643_1", "zh_TW.eucTW@stroke: cns11643_1", "zh_TW: cns11643_1", NULL }; #elif defined(HPUX) const char *CHARCONVTABLE[] = { "! This table maps the host's locale names to IANA charsets", "!", "C: ISO_8859-1:1987", "ja_JP: Extended_UNIX_Code_Packed_Format_for_Japanese", "ja_JP.SJIS: Shift_JIS", "ja_JP.eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese", "es_ES: ISO_8859-1:1987", "es_ES.iso88591: ISO_8859-1:1987", "sv_SE: ISO_8859-1:1987", "sv_SE.iso88591: ISO_8859-1:1987", "da_DK: ISO_8859-1:1987", "da_DK.iso88591: ISO_8859-1:1987", "nl_NL: ISO_8859-1:1987", "nl_NL.iso88591: ISO_8859-1:1987", "en: ISO_8859-1:1987", "en_GB: ISO_8859-1:1987", "en_GB.iso88591: ISO_8859-1:1987", "en_US: ISO_8859-1:1987", "en_US.iso88591: ISO_8859-1:1987", "fi_FI: ISO_8859-1:1987", "fi_FI.iso88591: ISO_8859-1:1987", "fr_CA: ISO_8859-1:1987", "fr_CA.iso88591: ISO_8859-1:1987", "fr_FR: ISO_8859-1:1987", "fr_FR.iso88591: ISO_8859-1:1987", "de_DE: ISO_8859-1:1987", "de_DE.iso88591: ISO_8859-1:1987", "is_IS: ISO_8859-1:1987", "is_IS.iso88591: ISO_8859-1:1987", "it_IT: ISO_8859-1:1987", "it_IT.iso88591: ISO_8859-1:1987", "no_NO: ISO_8859-1:1987", "no_NO.iso88591: ISO_8859-1:1987", "pt_PT: ISO_8859-1:1987", "pt_PT.iso88591: ISO_8859-1:1987", "hu_HU: ISO_8859-2:1987", "hu_HU.iso88592: ISO_8859-2:1987", "cs_CZ: ISO_8859-2:1987", "cs_CZ.iso88592: ISO_8859-2:1987", "pl_PL: ISO_8859-2:1987", "pl_PL.iso88592: ISO_8859-2:1987", "ro_RO: ISO_8859-2:1987", "ro_RO.iso88592: ISO_8859-2:1987", "hr_HR: ISO_8859-2:1987", "hr_HR.iso88592: ISO_8859-2:1987", "sk_SK: ISO_8859-2:1987", "sk_SK.iso88592: ISO_8859-2:1987", "sl_SI: ISO_8859-2:1987", "sl_SI.iso88592: ISO_8859-2:1987", "american.iso88591: ISO_8859-1:1987", "bulgarian: ISO_8859-2:1987", "c-french.iso88591: ISO_8859-1:1987", "chinese-s: GB2312", "chinese-t.big5: Big5", "czech: ISO_8859-2:1987", "danish.iso88591: ISO_8859-1:1987", "dutch.iso88591: ISO_8859-1:1987", "english.iso88591: ISO_8859-1:1987", "finnish.iso88591: ISO_8859-1:1987", "french.iso88591: ISO_8859-1:1987", "german.iso88591: ISO_8859-1:1987", "hungarian: ISO_8859-2:1987", "icelandic.iso88591: ISO_8859-1:1987", "italian.iso88591: ISO_8859-1:1987", "japanese.euc: Extended_UNIX_Code_Packed_Format_for_Japanese", "japanese: Shift_JIS", "katakana: Shift_JIS", "korean: EUC-KR", "norwegian.iso88591: ISO_8859-1:1987", "polish: ISO_8859-2:1987", "portuguese.iso88591: ISO_8859-1:1987", "rumanian: ISO_8859-2:1987", "serbocroatian: ISO_8859-2:1987", "slovene: ISO_8859-2:1987", "spanish.iso88591: ISO_8859-1:1987", "swedish.iso88591: ISO_8859-1:1987", NULL }; #elif defined(AIX) const char *CHARCONVTABLE[] = { "! This table maps the host's locale names to IANA charsets", "!", "C: ISO_8859-1:1987", "En_JP.IBM-932: Shift_JIS", "En_JP: Shift_JIS", "Ja_JP.IBM-932: Shift_JIS", "Ja_JP: Shift_JIS", "da_DK.ISO8859-1: ISO_8859-1:1987", "da_DK: ISO_8859-1:1987", "de_CH.ISO8859-1: ISO_8859-1:1987", "de_CH: ISO_8859-1:1987", "de_DE.ISO8859-1: ISO_8859-1:1987", "de_DE: ISO_8859-1:1987", "en_GB.ISO8859-1: ISO_8859-1:1987", "en_GB: ISO_8859-1:1987", "en_JP.IBM-eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese", "en_JP: Extended_UNIX_Code_Packed_Format_for_Japanese", "en_KR.IBM-eucKR: EUC-KR", "en_KR: EUC-KR", "en_TW.IBM-eucTW: cns11643_1", "en_TW: cns11643_1", "en_US.ISO8859-1: ISO_8859-1:1987", "en_US: ISO_8859-1:1987", "es_ES.ISO8859-1: ISO_8859-1:1987", "es_ES: ISO_8859-1:1987", "fi_FI.ISO8859-1: ISO_8859-1:1987", "fi_FI: ISO_8859-1:1987", "fr_BE.ISO8859-1: ISO_8859-1:1987", "fr_BE: ISO_8859-1:1987", "fr_CA.ISO8859-1: ISO_8859-1:1987", "fr_CA: ISO_8859-1:1987", "fr_CH.ISO8859-1: ISO_8859-1:1987", "fr_CH: ISO_8859-1:1987", "fr_FR.ISO8859-1: ISO_8859-1:1987", "fr_FR: ISO_8859-1:1987", "is_IS.ISO8859-1: ISO_8859-1:1987", "is_IS: ISO_8859-1:1987", "it_IT.ISO8859-1: ISO_8859-1:1987", "it_IT: ISO_8859-1:1987", "ja_JP.IBM-eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese", "ja_JP: Extended_UNIX_Code_Packed_Format_for_Japanese", "ko_KR.IBM-eucKR: EUC-KR", "ko_KR: EUC-KR", "nl_BE.ISO8859-1: ISO_8859-1:1987", "nl_BE: ISO_8859-1:1987", "nl_NL.ISO8859-1: ISO_8859-1:1987", "nl_NL: ISO_8859-1:1987", "no_NO.ISO8859-1: ISO_8859-1:1987", "no_NO: ISO_8859-1:1987", "pt_PT.ISO8859-1: ISO_8859-1:1987", "pt_PT: ISO_8859-1:1987", "sv_SE.ISO8859-1: ISO_8859-1:1987", "sv_SE: ISO_8859-1:1987", "zh_TW.IBM-eucTW: cns11643_1", "zh_TW: cns11643_1", NULL }; #else // sunos by default const char *CHARCONVTABLE[] = { "! This table maps the host's locale names to IANA charsets", "!", "C: ISO_8859-1:1987", "de: ISO_8859-1:1987", "en_US: ISO_8859-1:1987", "es: ISO_8859-1:1987", "fr: ISO_8859-1:1987", "iso_8859_1: ISO_8859-1:1987", "it: ISO_8859-1:1987", "ja: Extended_UNIX_Code_Packed_Format_for_Japanese", "ja_JP.EUC: Extended_UNIX_Code_Packed_Format_for_Japanese", "japanese: Extended_UNIX_Code_Packed_Format_for_Japanese", "ko: EUC-KR", "sv: ISO_8859-1:1987", "zh: GB2312", "zh_TW: cns11643_1", NULL }; #endif #define BSZ 256 char * GetCharsetFromLocale(char *locale) { char *tmpcharset = NULL; char buf[BSZ]; char *p; const char *line; int i=0; line = CHARCONVTABLE[i]; while (line != NULL) { if (*line == 0) { break; } strcpy(buf, line); line = CHARCONVTABLE[++i]; if (strlen(buf) == 0 || buf[0] == '!') { continue; } p = strchr(buf, ':'); if (p == NULL) { tmpcharset = NULL; break; } *p = 0; if (strcmp(buf, locale) == 0) { while (*++p == ' ' || *p == '\t') ; if (isalpha(*p)) { tmpcharset = strdup(p); } else tmpcharset = NULL; break; } } return tmpcharset; } #endif /* Not defined XP_WIN32 */ #ifdef XP_WIN32 char *_convertor(const char *instr, int bFromUTF8) { char *outstr = NULL; int inlen, wclen, outlen; LPWSTR wcstr; if (instr == NULL) return NULL; if ((inlen = strlen(instr)) <= 0) return NULL; /* output never becomes longer than input, * thus we don't have to ask for the length */ wcstr = (LPWSTR) malloc( sizeof( WCHAR ) * (inlen+1) ); if (!wcstr) return NULL; wclen = MultiByteToWideChar(bFromUTF8 ? CP_UTF8 : CP_ACP, 0, instr, inlen, wcstr, inlen); outlen = WideCharToMultiByte(bFromUTF8 ? CP_ACP : CP_UTF8, 0, wcstr, wclen, NULL, 0, NULL, NULL); if (outlen > 0) { outstr = (char *) malloc(outlen + 2); outlen = WideCharToMultiByte(bFromUTF8 ? CP_ACP : CP_UTF8, 0, wcstr, wclen, outstr, outlen, NULL, NULL); if (outlen > 0) *(outstr+outlen) = _T('\0'); else return NULL; } free( wcstr ); return outstr; } #endif char * ldaptool_local2UTF8( const char *src ) { char *utf8; #ifndef XP_WIN32 char *locale, *newcharset; size_t outLen, resultLen; UErrorCode err = U_ZERO_ERROR; UConverter *cnv; if (src == NULL) { return NULL; } else if (*src == 0 || (ldaptool_charset == NULL) || (!strcmp( ldaptool_charset, "" ))) { /* no option specified, so assume it's already in utf-8 */ utf8 = strdup(src); return utf8; } if( !strcmp( ldaptool_charset, "0" ) && (!charsetset) ) { /* zero option specified, so try to get default codepage this sucker is strdup'd immediately so it's OK to cast */ newcharset = (char *)ucnv_getDefaultName(); if (newcharset != NULL) { free( ldaptool_charset ); /* the default codepage lives in ICU */ ldaptool_charset = strdup(newcharset); if (ldaptool_charset == NULL) { return strdup(src); } } charsetset = 1; } else if( strcmp( ldaptool_charset, "" ) && (!charsetset) ) { /* -i option specified with charset name */ charsetset = 1; } /* do the preflight - get the size needed for the target buffer */ outLen = (size_t) ucnv_convert( "utf-8", ldaptool_charset, NULL, 0, src, strlen( src ) * sizeof(char), &err); if ((err != U_BUFFER_OVERFLOW_ERROR) || (outLen == 0)) { /* default to just a copy of the string - this covers the case of an illegal charset also */ return strdup(src); } utf8 = (char *) malloc( outLen + 1); if( utf8 == NULL ) { /* if we're already out of memory, does strdup just return NULL? */ return strdup(src); } /* do the actual conversion this time */ err = U_ZERO_ERROR; resultLen = ucnv_convert( "utf-8", ldaptool_charset, utf8, (outLen + 1), src, strlen(src) * sizeof(char), &err ); if (!U_SUCCESS(err)) { free(utf8); return strdup(src); } #else utf8 = _convertor(src, FALSE); if( utf8 == NULL ) utf8 = strdup(src); #endif return utf8; } #endif /* HAVE_LIBICU */ #ifndef HAVE_LIBICU #ifdef __cplusplus } #endif #endif