12d08521bSGarrett D'Amore /*
22d08521bSGarrett D'Amore * This file and its contents are supplied under the terms of the
32d08521bSGarrett D'Amore * Common Development and Distribution License ("CDDL"), version 1.0.
42d08521bSGarrett D'Amore * You may only use this file in accordance with the terms of version
52d08521bSGarrett D'Amore * 1.0 of the CDDL.
62d08521bSGarrett D'Amore *
72d08521bSGarrett D'Amore * A full copy of the text of the CDDL should have accompanied this
82d08521bSGarrett D'Amore * source. A copy of the CDDL is also available via the Internet at
92d08521bSGarrett D'Amore * http://www.illumos.org/license/CDDL.
102d08521bSGarrett D'Amore */
112d08521bSGarrett D'Amore
122d08521bSGarrett D'Amore /*
132d08521bSGarrett D'Amore * Copyright 2014 Garrett D'Amore <garrett@damore.org>
142d08521bSGarrett D'Amore */
152d08521bSGarrett D'Amore
162d08521bSGarrett D'Amore /*
172d08521bSGarrett D'Amore * This file implements the 2008 newlocale and friends handling.
182d08521bSGarrett D'Amore */
192d08521bSGarrett D'Amore
202d08521bSGarrett D'Amore #ifndef _LCONV_C99
212d08521bSGarrett D'Amore #define _LCONV_C99
222d08521bSGarrett D'Amore #endif
232d08521bSGarrett D'Amore
242d08521bSGarrett D'Amore #include "lint.h"
252d08521bSGarrett D'Amore #include <atomic.h>
262d08521bSGarrett D'Amore #include <locale.h>
272d08521bSGarrett D'Amore #include <sys/types.h>
282d08521bSGarrett D'Amore #include <sys/mman.h>
292d08521bSGarrett D'Amore #include <errno.h>
302d08521bSGarrett D'Amore #include <string.h>
312d08521bSGarrett D'Amore #include "libc.h"
322d08521bSGarrett D'Amore #include "mtlib.h"
332d08521bSGarrett D'Amore #include "tsd.h"
342d08521bSGarrett D'Amore #include "localeimpl.h"
352d08521bSGarrett D'Amore #include "lctype.h"
362d08521bSGarrett D'Amore
372d08521bSGarrett D'Amore /*
382d08521bSGarrett D'Amore * Big Theory of Locales:
392d08521bSGarrett D'Amore *
402d08521bSGarrett D'Amore * (It is recommended that readers familiarize themselves with the POSIX
412d08521bSGarrett D'Amore * 2008 (XPG Issue 7) specifications for locales, first.)
422d08521bSGarrett D'Amore *
432d08521bSGarrett D'Amore * Historically, we had a bunch of global variables that stored locale
442d08521bSGarrett D'Amore * data. While this worked well, it limited applications to a single locale
452d08521bSGarrett D'Amore * at a time. This doesn't work well in certain server applications.
462d08521bSGarrett D'Amore *
472d08521bSGarrett D'Amore * Issue 7, X/Open introduced the concept of a locale_t object, along with
482d08521bSGarrett D'Amore * versions of functions that can take this object as a parameter, along
492d08521bSGarrett D'Amore * with functions to clone and manipulate these locale objects. The new
502d08521bSGarrett D'Amore * functions are named with a _l() suffix.
512d08521bSGarrett D'Amore *
522d08521bSGarrett D'Amore * Additionally uselocale() is introduced which can change the locale of
532d08521bSGarrett D'Amore * of a single thread. However, setlocale() can still be used to change
542d08521bSGarrett D'Amore * the global locale.
552d08521bSGarrett D'Amore *
562d08521bSGarrett D'Amore * In our implementation, we use libc's TSD to store the locale data that
572d08521bSGarrett D'Amore * was previously global. We still have global data because some applications
582d08521bSGarrett D'Amore * have had those global objects compiled into them. (Such applications will
592d08521bSGarrett D'Amore * be unable to benefit from uselocale(), btw.) The legacy routines are
602d08521bSGarrett D'Amore * reimplemented as wrappers that use the appropriate locale object by
612d08521bSGarrett D'Amore * calling uselocale(). uselocale() when passed a NULL pointer returns the
622d08521bSGarrett D'Amore * thread-specific locale object if one is present, or the global locale
632d08521bSGarrett D'Amore * object otherwise. Note that once the TSD data is set, the only way
642d08521bSGarrett D'Amore * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE
652d08521bSGarrett D'Amore * to uselocale().
662d08521bSGarrett D'Amore *
672d08521bSGarrett D'Amore * We are careful to minimize performance impact of multiple calls to
682d08521bSGarrett D'Amore * uselocale() or setlocale() by using a cache of locale data whenever possible.
692d08521bSGarrett D'Amore * As a consequence of this, applications that iterate over all possible
702d08521bSGarrett D'Amore * locales will burn through a lot of virtual memory, but we find such
712d08521bSGarrett D'Amore * applications rare. (locale -a might be an exception, but it is short lived.)
722d08521bSGarrett D'Amore *
732d08521bSGarrett D'Amore * Category data is never released (although enclosing locale objects might be),
742d08521bSGarrett D'Amore * in order to guarantee thread-safety. Calling freelocale() on an object
752d08521bSGarrett D'Amore * while it is in use by another thread is a programmer error (use-after-free)
762d08521bSGarrett D'Amore * and we don't bother to note it further.
772d08521bSGarrett D'Amore *
782d08521bSGarrett D'Amore * Locale objects (global locales) established by setlocale() are also
792d08521bSGarrett D'Amore * never freed (for MT safety), but we will save previous locale objects
802d08521bSGarrett D'Amore * and reuse them when we can.
812d08521bSGarrett D'Amore */
822d08521bSGarrett D'Amore
832d08521bSGarrett D'Amore typedef struct locdata *(*loadfn_t)(const char *);
842d08521bSGarrett D'Amore
852d08521bSGarrett D'Amore static const loadfn_t loaders[LC_ALL] = {
862d08521bSGarrett D'Amore __lc_ctype_load,
872d08521bSGarrett D'Amore __lc_numeric_load,
882d08521bSGarrett D'Amore __lc_time_load,
892d08521bSGarrett D'Amore __lc_collate_load,
902d08521bSGarrett D'Amore __lc_monetary_load,
912d08521bSGarrett D'Amore __lc_messages_load,
922d08521bSGarrett D'Amore };
932d08521bSGarrett D'Amore
942d08521bSGarrett D'Amore extern struct lc_monetary lc_monetary_posix;
952d08521bSGarrett D'Amore extern struct lc_numeric lc_numeric_posix;
962d08521bSGarrett D'Amore extern struct lc_messages lc_messages_posix;
972d08521bSGarrett D'Amore extern struct lc_time lc_time_posix;
982d08521bSGarrett D'Amore extern struct lc_ctype lc_ctype_posix;
992d08521bSGarrett D'Amore extern struct lc_collate lc_collate_posix;
100bc09504fSGordon Ross extern struct _RuneLocale _DefaultRuneLocale;
1012d08521bSGarrett D'Amore
102732efd55SDan McDonald static struct _locale posix_locale = {
1032d08521bSGarrett D'Amore /* locdata */
1042d08521bSGarrett D'Amore .locdata = {
1052d08521bSGarrett D'Amore &__posix_ctype_locdata,
1062d08521bSGarrett D'Amore &__posix_numeric_locdata,
1072d08521bSGarrett D'Amore &__posix_time_locdata,
1082d08521bSGarrett D'Amore &__posix_collate_locdata,
1092d08521bSGarrett D'Amore &__posix_monetary_locdata,
1102d08521bSGarrett D'Amore &__posix_messages_locdata,
1112d08521bSGarrett D'Amore },
1122d08521bSGarrett D'Amore .locname = "C",
1132d08521bSGarrett D'Amore .ctype = &lc_ctype_posix,
1142d08521bSGarrett D'Amore .numeric = &lc_numeric_posix,
1152d08521bSGarrett D'Amore .collate = &lc_collate_posix,
1162d08521bSGarrett D'Amore .monetary = &lc_monetary_posix,
1172d08521bSGarrett D'Amore .messages = &lc_messages_posix,
1182d08521bSGarrett D'Amore .time = &lc_time_posix,
1192d08521bSGarrett D'Amore .runelocale = &_DefaultRuneLocale,
1202d08521bSGarrett D'Amore };
1212d08521bSGarrett D'Amore
1222d08521bSGarrett D'Amore locale_t ___global_locale = &posix_locale;
1232d08521bSGarrett D'Amore
1242d08521bSGarrett D'Amore locale_t
__global_locale(void)1252d08521bSGarrett D'Amore __global_locale(void)
1262d08521bSGarrett D'Amore {
1272d08521bSGarrett D'Amore return (___global_locale);
1282d08521bSGarrett D'Amore }
1292d08521bSGarrett D'Amore
1303125066dSYuri Pankov /*
1313125066dSYuri Pankov * Locale data for hybrid C.UTF-8 locale having all the characteristics of
1323125066dSYuri Pankov * default C/POSIX locale, except for LC_CTYPE data which is retrieved from
1333125066dSYuri Pankov * cache/file as for other UTF-8 locales.
1343125066dSYuri Pankov */
1353125066dSYuri Pankov static struct locdata cutf_locdata[LC_ALL] = {
1363125066dSYuri Pankov { "C.UTF-8", NULL }, /* unused */
1373125066dSYuri Pankov { "C.UTF-8", &lc_numeric_posix },
1383125066dSYuri Pankov { "C.UTF-8", &lc_time_posix },
1393125066dSYuri Pankov { "C.UTF-8", &lc_collate_posix },
1403125066dSYuri Pankov { "C.UTF-8", &lc_monetary_posix },
1413125066dSYuri Pankov { "C.UTF-8", &lc_messages_posix },
1423125066dSYuri Pankov };
1433125066dSYuri Pankov
1442d08521bSGarrett D'Amore /*
1452d08521bSGarrett D'Amore * Category names for getenv() Note that this was modified
1462d08521bSGarrett D'Amore * for Solaris. See <iso/locale_iso.h>.
1472d08521bSGarrett D'Amore */
1482d08521bSGarrett D'Amore #define NUM_CATS 7
1492d08521bSGarrett D'Amore static char *categories[7] = {
1502d08521bSGarrett D'Amore "LC_CTYPE",
1512d08521bSGarrett D'Amore "LC_NUMERIC",
1522d08521bSGarrett D'Amore "LC_TIME",
1532d08521bSGarrett D'Amore "LC_COLLATE",
1542d08521bSGarrett D'Amore "LC_MONETARY",
1552d08521bSGarrett D'Amore "LC_MESSAGES",
1562d08521bSGarrett D'Amore "LC_ALL",
1572d08521bSGarrett D'Amore };
1582d08521bSGarrett D'Amore
1592d08521bSGarrett D'Amore /*
1602d08521bSGarrett D'Amore * Prototypes.
1612d08521bSGarrett D'Amore */
1622d08521bSGarrett D'Amore static const char *get_locale_env(int);
163815e3086SToomas Soome static struct locdata *locdata_get(int, const char *);
1642d08521bSGarrett D'Amore static struct locdata *locdata_get_cache(int, const char *);
1652d08521bSGarrett D'Amore static locale_t mklocname(locale_t);
1662d08521bSGarrett D'Amore
1672d08521bSGarrett D'Amore /*
1682d08521bSGarrett D'Amore * Some utility routines.
1692d08521bSGarrett D'Amore */
1702d08521bSGarrett D'Amore
1712d08521bSGarrett D'Amore struct locdata *
__locdata_alloc(const char * name,size_t memsz)1722d08521bSGarrett D'Amore __locdata_alloc(const char *name, size_t memsz)
1732d08521bSGarrett D'Amore {
1742d08521bSGarrett D'Amore struct locdata *ldata;
1752d08521bSGarrett D'Amore
1762d08521bSGarrett D'Amore if ((ldata = lmalloc(sizeof (*ldata))) == NULL) {
1772d08521bSGarrett D'Amore return (NULL);
1782d08521bSGarrett D'Amore }
1792d08521bSGarrett D'Amore if ((ldata->l_data[0] = libc_malloc(memsz)) == NULL) {
1802d08521bSGarrett D'Amore lfree(ldata, sizeof (*ldata));
1812d08521bSGarrett D'Amore errno = ENOMEM;
1822d08521bSGarrett D'Amore return (NULL);
1832d08521bSGarrett D'Amore }
1842d08521bSGarrett D'Amore (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname));
1852d08521bSGarrett D'Amore
1862d08521bSGarrett D'Amore return (ldata);
1872d08521bSGarrett D'Amore }
1882d08521bSGarrett D'Amore
1892d08521bSGarrett D'Amore /*
1902d08521bSGarrett D'Amore * Normally we never free locale data truly, but if we failed to load it
1912d08521bSGarrett D'Amore * for some reason, this routine is used to cleanup the partial mess.
1922d08521bSGarrett D'Amore */
1932d08521bSGarrett D'Amore void
__locdata_free(struct locdata * ldata)1942d08521bSGarrett D'Amore __locdata_free(struct locdata *ldata)
1952d08521bSGarrett D'Amore {
1962d08521bSGarrett D'Amore for (int i = 0; i < NLOCDATA; i++)
1972d08521bSGarrett D'Amore libc_free(ldata->l_data[i]);
1982d08521bSGarrett D'Amore if (ldata->l_map != NULL && ldata->l_map_len)
1992d08521bSGarrett D'Amore (void) munmap(ldata->l_map, ldata->l_map_len);
2002d08521bSGarrett D'Amore lfree(ldata, sizeof (*ldata));
2012d08521bSGarrett D'Amore }
2022d08521bSGarrett D'Amore
2032d08521bSGarrett D'Amore /*
2042d08521bSGarrett D'Amore * It turns out that for performance reasons we would really like to
2052d08521bSGarrett D'Amore * cache the most recently referenced locale data to avoid wasteful
2062d08521bSGarrett D'Amore * loading from files.
2072d08521bSGarrett D'Amore */
2082d08521bSGarrett D'Amore
2092d08521bSGarrett D'Amore static struct locdata *cache_data[LC_ALL];
2102d08521bSGarrett D'Amore static struct locdata *cat_data[LC_ALL];
2112d08521bSGarrett D'Amore static mutex_t cache_lock = DEFAULTMUTEX;
2122d08521bSGarrett D'Amore
2132d08521bSGarrett D'Amore /*
2142d08521bSGarrett D'Amore * Returns the cached data if the locale name is the same. If not,
2152d08521bSGarrett D'Amore * returns NULL (cache miss). The locdata is returned with a hold on
2162d08521bSGarrett D'Amore * it, taken on behalf of the caller. The caller should drop the hold
2172d08521bSGarrett D'Amore * when it is finished.
2182d08521bSGarrett D'Amore */
2192d08521bSGarrett D'Amore static struct locdata *
locdata_get_cache(int category,const char * locname)2202d08521bSGarrett D'Amore locdata_get_cache(int category, const char *locname)
2212d08521bSGarrett D'Amore {
2222d08521bSGarrett D'Amore struct locdata *loc;
2232d08521bSGarrett D'Amore
2242d08521bSGarrett D'Amore if (category < 0 || category >= LC_ALL)
2252d08521bSGarrett D'Amore return (NULL);
2262d08521bSGarrett D'Amore
2272d08521bSGarrett D'Amore /* Try cache first. */
2282d08521bSGarrett D'Amore lmutex_lock(&cache_lock);
2292d08521bSGarrett D'Amore loc = cache_data[category];
2302d08521bSGarrett D'Amore
2312d08521bSGarrett D'Amore if ((loc != NULL) && (strcmp(loc->l_lname, locname) == 0)) {
2322d08521bSGarrett D'Amore lmutex_unlock(&cache_lock);
2332d08521bSGarrett D'Amore return (loc);
2342d08521bSGarrett D'Amore }
2352d08521bSGarrett D'Amore
2362d08521bSGarrett D'Amore /*
2372d08521bSGarrett D'Amore * Failing that try previously loaded locales (linear search) --
2382d08521bSGarrett D'Amore * this could be optimized to a hash, but its unlikely that a single
2392d08521bSGarrett D'Amore * application will ever need to work with more than a few locales.
2402d08521bSGarrett D'Amore */
2412d08521bSGarrett D'Amore for (loc = cat_data[category]; loc != NULL; loc = loc->l_next) {
2422d08521bSGarrett D'Amore if (strcmp(locname, loc->l_lname) == 0) {
2432d08521bSGarrett D'Amore break;
2442d08521bSGarrett D'Amore }
2452d08521bSGarrett D'Amore }
2462d08521bSGarrett D'Amore
2472d08521bSGarrett D'Amore /*
2482d08521bSGarrett D'Amore * Finally, if we still don't have one, try loading the locale
2492d08521bSGarrett D'Amore * data from the actual on-disk data.
2502d08521bSGarrett D'Amore *
2512d08521bSGarrett D'Amore * We drop the lock (libc wants to ensure no internal locks
2522d08521bSGarrett D'Amore * are held when we call other routines required to read from
2532d08521bSGarrett D'Amore * files, allocate memory, etc.) There is a small race here,
2542d08521bSGarrett D'Amore * but the consequences of the race are benign -- if multiple
2552d08521bSGarrett D'Amore * threads hit this at precisely the same point, we could
2562d08521bSGarrett D'Amore * wind up with duplicates of the locale data in the cache.
2572d08521bSGarrett D'Amore *
2582d08521bSGarrett D'Amore * This wastes the memory for an extra copy of the locale
2592d08521bSGarrett D'Amore * data, but there is no further harm beyond that. Its not
2602d08521bSGarrett D'Amore * worth the effort to recode this to something "safe"
2612d08521bSGarrett D'Amore * (which would require rescanning the list, etc.), given
2622d08521bSGarrett D'Amore * that this race will probably never actually occur.
2632d08521bSGarrett D'Amore */
2642d08521bSGarrett D'Amore if (loc == NULL) {
2652d08521bSGarrett D'Amore lmutex_unlock(&cache_lock);
2662d08521bSGarrett D'Amore loc = (*loaders[category])(locname);
2672d08521bSGarrett D'Amore lmutex_lock(&cache_lock);
2682d08521bSGarrett D'Amore if (loc != NULL)
2692d08521bSGarrett D'Amore (void) strlcpy(loc->l_lname, locname,
2702d08521bSGarrett D'Amore sizeof (loc->l_lname));
2712d08521bSGarrett D'Amore }
2722d08521bSGarrett D'Amore
2732d08521bSGarrett D'Amore /*
2742d08521bSGarrett D'Amore * Assuming we got one, update the cache, and stick us on the list
2752d08521bSGarrett D'Amore * of loaded locale data. We insert into the head (more recent
2762d08521bSGarrett D'Amore * use is likely to win.)
2772d08521bSGarrett D'Amore */
2782d08521bSGarrett D'Amore if (loc != NULL) {
2792d08521bSGarrett D'Amore cache_data[category] = loc;
2802d08521bSGarrett D'Amore if (!loc->l_cached) {
2812d08521bSGarrett D'Amore loc->l_cached = 1;
2822d08521bSGarrett D'Amore loc->l_next = cat_data[category];
2832d08521bSGarrett D'Amore cat_data[category] = loc;
2842d08521bSGarrett D'Amore }
2852d08521bSGarrett D'Amore }
2862d08521bSGarrett D'Amore
2872d08521bSGarrett D'Amore lmutex_unlock(&cache_lock);
2882d08521bSGarrett D'Amore return (loc);
2892d08521bSGarrett D'Amore }
2902d08521bSGarrett D'Amore
291*13027a8eSYuri Pankov /* Charmap aliases, mostly found in Linux */
292*13027a8eSYuri Pankov static const struct {
293*13027a8eSYuri Pankov const char *alias;
294*13027a8eSYuri Pankov const char *name;
295*13027a8eSYuri Pankov } cmalias[] = {
296*13027a8eSYuri Pankov { "utf8", "UTF-8" },
297*13027a8eSYuri Pankov { "iso88591", "ISO8859-1" },
298*13027a8eSYuri Pankov { "iso885915", "ISO8859-15" },
299*13027a8eSYuri Pankov { "gb18030", "GB18030" },
300*13027a8eSYuri Pankov { "koi8r", "KOI8-R" },
301*13027a8eSYuri Pankov { NULL, NULL }
302*13027a8eSYuri Pankov };
303*13027a8eSYuri Pankov
3042d08521bSGarrett D'Amore /*
3052d08521bSGarrett D'Amore * Routine to get the locdata for a given category and locale.
3062d08521bSGarrett D'Amore * This includes retrieving it from cache, retrieving it from
3072d08521bSGarrett D'Amore * a file, etc.
3082d08521bSGarrett D'Amore */
3092d08521bSGarrett D'Amore static struct locdata *
locdata_get(int category,const char * locname)3102d08521bSGarrett D'Amore locdata_get(int category, const char *locname)
3112d08521bSGarrett D'Amore {
3122d08521bSGarrett D'Amore char scratch[ENCODING_LEN + 1];
313*13027a8eSYuri Pankov char scratch2[ENCODING_LEN + 1];
314*13027a8eSYuri Pankov char *slash, *cm;
3152d08521bSGarrett D'Amore int cnt;
3162d08521bSGarrett D'Amore int len;
317*13027a8eSYuri Pankov int i;
3182d08521bSGarrett D'Amore
3192d08521bSGarrett D'Amore if (locname == NULL || *locname == 0) {
3202d08521bSGarrett D'Amore locname = get_locale_env(category);
3212d08521bSGarrett D'Amore }
3222d08521bSGarrett D'Amore
3232d08521bSGarrett D'Amore /*
3242d08521bSGarrett D'Amore * Extract the locale name for the category if it is a composite
3252d08521bSGarrett D'Amore * locale.
3262d08521bSGarrett D'Amore */
3272d08521bSGarrett D'Amore if ((slash = strchr(locname, '/')) != NULL) {
3282d08521bSGarrett D'Amore for (cnt = category; cnt && slash != NULL; cnt--) {
3292d08521bSGarrett D'Amore locname = slash + 1;
3302d08521bSGarrett D'Amore slash = strchr(locname, '/');
3312d08521bSGarrett D'Amore }
3322d08521bSGarrett D'Amore if (slash) {
3332d08521bSGarrett D'Amore len = slash - locname + 1;
3342d08521bSGarrett D'Amore if (len >= sizeof (scratch)) {
3352d08521bSGarrett D'Amore len = sizeof (scratch);
3362d08521bSGarrett D'Amore }
3372d08521bSGarrett D'Amore } else {
3382d08521bSGarrett D'Amore len = sizeof (scratch);
3392d08521bSGarrett D'Amore }
3402d08521bSGarrett D'Amore (void) strlcpy(scratch, locname, len);
3412d08521bSGarrett D'Amore locname = scratch;
3422d08521bSGarrett D'Amore }
3432d08521bSGarrett D'Amore
3442d08521bSGarrett D'Amore if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0))
3452d08521bSGarrett D'Amore return (posix_locale.locdata[category]);
3462d08521bSGarrett D'Amore
347*13027a8eSYuri Pankov /* Handle charmap aliases */
348*13027a8eSYuri Pankov for (i = 0; cmalias[i].alias != NULL; i++) {
349*13027a8eSYuri Pankov if ((cm = strstr(locname, cmalias[i].alias)) != NULL &&
350*13027a8eSYuri Pankov strlen(cm) == strlen(cmalias[i].alias)) {
351*13027a8eSYuri Pankov len = cm - locname + 1;
352*13027a8eSYuri Pankov if (len + strlen(cmalias[i].name) >= sizeof (scratch2))
353*13027a8eSYuri Pankov break;
354*13027a8eSYuri Pankov (void) strlcpy(scratch2, locname, len);
355*13027a8eSYuri Pankov (void) strlcat(scratch2, cmalias[i].name,
356*13027a8eSYuri Pankov sizeof (scratch2));
357*13027a8eSYuri Pankov locname = scratch2;
358*13027a8eSYuri Pankov break;
359*13027a8eSYuri Pankov }
360*13027a8eSYuri Pankov }
361*13027a8eSYuri Pankov
3623125066dSYuri Pankov if ((strcmp(locname, "C.UTF-8") == 0) && (category != LC_CTYPE))
3633125066dSYuri Pankov return (&cutf_locdata[category]);
3643125066dSYuri Pankov
3652d08521bSGarrett D'Amore return (locdata_get_cache(category, locname));
3662d08521bSGarrett D'Amore }
3672d08521bSGarrett D'Amore
3682d08521bSGarrett D'Amore /* tsd destructor */
3692d08521bSGarrett D'Amore static void
freelocptr(void * arg)3702d08521bSGarrett D'Amore freelocptr(void *arg)
3712d08521bSGarrett D'Amore {
3722d08521bSGarrett D'Amore locale_t *locptr = arg;
3732d08521bSGarrett D'Amore if (*locptr != NULL)
3742d08521bSGarrett D'Amore freelocale(*locptr);
3752d08521bSGarrett D'Amore }
3762d08521bSGarrett D'Amore
3772d08521bSGarrett D'Amore static const char *
get_locale_env(int category)3782d08521bSGarrett D'Amore get_locale_env(int category)
3792d08521bSGarrett D'Amore {
3802d08521bSGarrett D'Amore const char *env;
3812d08521bSGarrett D'Amore
3822d08521bSGarrett D'Amore /* 1. check LC_ALL. */
3832d08521bSGarrett D'Amore env = getenv(categories[LC_ALL]);
3842d08521bSGarrett D'Amore
3852d08521bSGarrett D'Amore /* 2. check LC_* */
3862d08521bSGarrett D'Amore if (env == NULL || *env == '\0')
3872d08521bSGarrett D'Amore env = getenv(categories[category]);
3882d08521bSGarrett D'Amore
3892d08521bSGarrett D'Amore /* 3. check LANG */
3902d08521bSGarrett D'Amore if (env == NULL || *env == '\0')
3912d08521bSGarrett D'Amore env = getenv("LANG");
3922d08521bSGarrett D'Amore
3932d08521bSGarrett D'Amore /* 4. if none is set, fall to "C" */
3942d08521bSGarrett D'Amore if (env == NULL || *env == '\0')
3952d08521bSGarrett D'Amore env = "C";
3962d08521bSGarrett D'Amore
3972d08521bSGarrett D'Amore return (env);
3982d08521bSGarrett D'Amore }
3992d08521bSGarrett D'Amore
4002d08521bSGarrett D'Amore
4012d08521bSGarrett D'Amore /*
4022d08521bSGarrett D'Amore * This routine is exposed via the MB_CUR_MAX macro. Note that legacy
4032d08521bSGarrett D'Amore * code will continue to use _ctype[520], but we prefer this function as
4042d08521bSGarrett D'Amore * it is the only way to get thread-specific information.
4052d08521bSGarrett D'Amore */
4062d08521bSGarrett D'Amore unsigned char
__mb_cur_max_l(locale_t loc)4072d08521bSGarrett D'Amore __mb_cur_max_l(locale_t loc)
4082d08521bSGarrett D'Amore {
4092d08521bSGarrett D'Amore return (loc->ctype->lc_max_mblen);
4102d08521bSGarrett D'Amore }
4112d08521bSGarrett D'Amore
4122d08521bSGarrett D'Amore unsigned char
__mb_cur_max(void)4132d08521bSGarrett D'Amore __mb_cur_max(void)
4142d08521bSGarrett D'Amore {
4152d08521bSGarrett D'Amore return (__mb_cur_max_l(uselocale(NULL)));
4162d08521bSGarrett D'Amore }
4172d08521bSGarrett D'Amore
4182d08521bSGarrett D'Amore /*
4192d08521bSGarrett D'Amore * Public interfaces.
4202d08521bSGarrett D'Amore */
4212d08521bSGarrett D'Amore
4222d08521bSGarrett D'Amore locale_t
duplocale(locale_t src)4232d08521bSGarrett D'Amore duplocale(locale_t src)
4242d08521bSGarrett D'Amore {
4252d08521bSGarrett D'Amore locale_t loc;
4262d08521bSGarrett D'Amore int i;
4272d08521bSGarrett D'Amore
4282d08521bSGarrett D'Amore loc = lmalloc(sizeof (*loc));
4292d08521bSGarrett D'Amore if (loc == NULL) {
4302d08521bSGarrett D'Amore return (NULL);
4312d08521bSGarrett D'Amore }
4322d08521bSGarrett D'Amore if (src == NULL) {
4332d08521bSGarrett D'Amore /* illumos extension: POSIX says LC_GLOBAL_LOCALE here */
4342d08521bSGarrett D'Amore src = ___global_locale;
4352d08521bSGarrett D'Amore }
4362d08521bSGarrett D'Amore for (i = 0; i < LC_ALL; i++) {
4372d08521bSGarrett D'Amore loc->locdata[i] = src->locdata[i];
4382d08521bSGarrett D'Amore loc->loaded[i] = 0;
4392d08521bSGarrett D'Amore }
4402d08521bSGarrett D'Amore loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
4412d08521bSGarrett D'Amore loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
4422d08521bSGarrett D'Amore loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
4432d08521bSGarrett D'Amore loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
4442d08521bSGarrett D'Amore loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
4452d08521bSGarrett D'Amore loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
4462d08521bSGarrett D'Amore loc->time = loc->locdata[LC_TIME]->l_data[0];
4472d08521bSGarrett D'Amore return (loc);
4482d08521bSGarrett D'Amore }
4492d08521bSGarrett D'Amore
4502d08521bSGarrett D'Amore void
freelocale(locale_t loc)4512d08521bSGarrett D'Amore freelocale(locale_t loc)
4522d08521bSGarrett D'Amore {
4532d08521bSGarrett D'Amore /*
4542d08521bSGarrett D'Amore * We take extra care never to free a saved locale created by
4552d08521bSGarrett D'Amore * setlocale(). This shouldn't be strictly necessary, but a little
4562d08521bSGarrett D'Amore * extra safety doesn't hurt here.
4572d08521bSGarrett D'Amore */
4582d08521bSGarrett D'Amore if ((loc != NULL) && (loc != &posix_locale) && (!loc->on_list))
4592d08521bSGarrett D'Amore lfree(loc, sizeof (*loc));
4602d08521bSGarrett D'Amore }
4612d08521bSGarrett D'Amore
4622d08521bSGarrett D'Amore locale_t
newlocale(int catmask,const char * locname,locale_t base)4632d08521bSGarrett D'Amore newlocale(int catmask, const char *locname, locale_t base)
4642d08521bSGarrett D'Amore {
4652d08521bSGarrett D'Amore locale_t loc;
4662d08521bSGarrett D'Amore int i, e;
4672d08521bSGarrett D'Amore
4682d08521bSGarrett D'Amore if (catmask & ~(LC_ALL_MASK)) {
4692d08521bSGarrett D'Amore errno = EINVAL;
4702d08521bSGarrett D'Amore return (NULL);
4712d08521bSGarrett D'Amore }
4722d08521bSGarrett D'Amore
4732d08521bSGarrett D'Amore /*
4742d08521bSGarrett D'Amore * Technically passing LC_GLOBAL_LOCALE here is illegal,
4752d08521bSGarrett D'Amore * but we allow it.
4762d08521bSGarrett D'Amore */
4772d08521bSGarrett D'Amore if (base == NULL || base == ___global_locale) {
4782d08521bSGarrett D'Amore loc = duplocale(___global_locale);
4792d08521bSGarrett D'Amore } else {
4802d08521bSGarrett D'Amore loc = duplocale(base);
4812d08521bSGarrett D'Amore }
4822d08521bSGarrett D'Amore if (loc == NULL) {
4832d08521bSGarrett D'Amore return (NULL);
4842d08521bSGarrett D'Amore }
4852d08521bSGarrett D'Amore
4862d08521bSGarrett D'Amore for (i = 0; i < LC_ALL; i++) {
4872d08521bSGarrett D'Amore struct locdata *ldata;
4882d08521bSGarrett D'Amore loc->loaded[i] = 0;
4892d08521bSGarrett D'Amore if (((1 << i) & catmask) == 0) {
4902d08521bSGarrett D'Amore /* Default to base locale if not overriding */
4912d08521bSGarrett D'Amore continue;
4922d08521bSGarrett D'Amore }
4932d08521bSGarrett D'Amore ldata = locdata_get(i, locname);
4942d08521bSGarrett D'Amore if (ldata == NULL) {
4952d08521bSGarrett D'Amore e = errno;
4962d08521bSGarrett D'Amore freelocale(loc);
4972d08521bSGarrett D'Amore errno = e;
4982d08521bSGarrett D'Amore return (NULL);
4992d08521bSGarrett D'Amore }
5002d08521bSGarrett D'Amore loc->locdata[i] = ldata;
5012d08521bSGarrett D'Amore }
5022d08521bSGarrett D'Amore loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
5032d08521bSGarrett D'Amore loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
5042d08521bSGarrett D'Amore loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
5052d08521bSGarrett D'Amore loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
5062d08521bSGarrett D'Amore loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
5072d08521bSGarrett D'Amore loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
5082d08521bSGarrett D'Amore loc->time = loc->locdata[LC_TIME]->l_data[0];
5092d08521bSGarrett D'Amore freelocale(base);
5102d08521bSGarrett D'Amore
5112d08521bSGarrett D'Amore return (mklocname(loc));
5122d08521bSGarrett D'Amore }
5132d08521bSGarrett D'Amore
5142d08521bSGarrett D'Amore locale_t
uselocale(locale_t loc)5152d08521bSGarrett D'Amore uselocale(locale_t loc)
5162d08521bSGarrett D'Amore {
5172d08521bSGarrett D'Amore locale_t lastloc = ___global_locale;
5182d08521bSGarrett D'Amore locale_t *locptr;
5192d08521bSGarrett D'Amore
5202d08521bSGarrett D'Amore locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr);
5212d08521bSGarrett D'Amore /* Should never occur */
5222d08521bSGarrett D'Amore if (locptr == NULL) {
5232d08521bSGarrett D'Amore errno = EINVAL;
5242d08521bSGarrett D'Amore return (NULL);
5252d08521bSGarrett D'Amore }
5262d08521bSGarrett D'Amore
5272d08521bSGarrett D'Amore if (*locptr != NULL)
5282d08521bSGarrett D'Amore lastloc = *locptr;
5292d08521bSGarrett D'Amore
5302d08521bSGarrett D'Amore /* Argument loc is NULL if we are just querying. */
5312d08521bSGarrett D'Amore if (loc != NULL) {
5322d08521bSGarrett D'Amore /*
5332d08521bSGarrett D'Amore * Set it to LC_GLOBAL_LOCAL to return to using
5342d08521bSGarrett D'Amore * the global locale (setlocale).
5352d08521bSGarrett D'Amore */
5362d08521bSGarrett D'Amore if (loc == ___global_locale) {
5372d08521bSGarrett D'Amore *locptr = NULL;
5382d08521bSGarrett D'Amore } else {
5392d08521bSGarrett D'Amore /* No validation of the provided locale at present */
5402d08521bSGarrett D'Amore *locptr = loc;
5412d08521bSGarrett D'Amore }
5422d08521bSGarrett D'Amore }
5432d08521bSGarrett D'Amore
5442d08521bSGarrett D'Amore /*
5452d08521bSGarrett D'Amore * The caller is responsible for freeing, of course it would be
5462d08521bSGarrett D'Amore * gross error to call freelocale() on a locale object that is still
5472d08521bSGarrett D'Amore * in use.
5482d08521bSGarrett D'Amore */
5492d08521bSGarrett D'Amore return (lastloc);
5502d08521bSGarrett D'Amore }
5512d08521bSGarrett D'Amore
5522d08521bSGarrett D'Amore static locale_t
mklocname(locale_t loc)5532d08521bSGarrett D'Amore mklocname(locale_t loc)
5542d08521bSGarrett D'Amore {
5552d08521bSGarrett D'Amore int composite = 0;
5562d08521bSGarrett D'Amore
5572d08521bSGarrett D'Amore /* Look to see if any category is different */
5582d08521bSGarrett D'Amore for (int i = 1; i < LC_ALL; ++i) {
5592d08521bSGarrett D'Amore if (strcmp(loc->locdata[0]->l_lname,
5602d08521bSGarrett D'Amore loc->locdata[i]->l_lname) != 0) {
5612d08521bSGarrett D'Amore composite = 1;
5622d08521bSGarrett D'Amore break;
5632d08521bSGarrett D'Amore }
5642d08521bSGarrett D'Amore }
5652d08521bSGarrett D'Amore
5662d08521bSGarrett D'Amore if (composite) {
5672d08521bSGarrett D'Amore /*
5682d08521bSGarrett D'Amore * Note ordering of these follows the numeric order,
5692d08521bSGarrett D'Amore * if the order is changed, then setlocale() will need
5702d08521bSGarrett D'Amore * to be changed as well.
5712d08521bSGarrett D'Amore */
5722d08521bSGarrett D'Amore (void) snprintf(loc->locname, sizeof (loc->locname),
5732d08521bSGarrett D'Amore "%s/%s/%s/%s/%s/%s",
5742d08521bSGarrett D'Amore loc->locdata[LC_CTYPE]->l_lname,
5752d08521bSGarrett D'Amore loc->locdata[LC_NUMERIC]->l_lname,
5762d08521bSGarrett D'Amore loc->locdata[LC_TIME]->l_lname,
5772d08521bSGarrett D'Amore loc->locdata[LC_COLLATE]->l_lname,
5782d08521bSGarrett D'Amore loc->locdata[LC_MONETARY]->l_lname,
5792d08521bSGarrett D'Amore loc->locdata[LC_MESSAGES]->l_lname);
5802d08521bSGarrett D'Amore } else {
5812d08521bSGarrett D'Amore (void) strlcpy(loc->locname, loc->locdata[LC_CTYPE]->l_lname,
5822d08521bSGarrett D'Amore sizeof (loc->locname));
5832d08521bSGarrett D'Amore }
5842d08521bSGarrett D'Amore return (loc);
5852d08521bSGarrett D'Amore }
586