12d08521bSGarrett D'Amore /*
22d08521bSGarrett D'Amore  * This file and its contents are supplied under the terms of the
32d08521bSGarrett D'Amore  * Common Development and Distribution License ("CDDL"), version 1.0.
42d08521bSGarrett D'Amore  * You may only use this file in accordance with the terms of version
52d08521bSGarrett D'Amore  * 1.0 of the CDDL.
62d08521bSGarrett D'Amore  *
72d08521bSGarrett D'Amore  * A full copy of the text of the CDDL should have accompanied this
82d08521bSGarrett D'Amore  * source.  A copy of the CDDL is also available via the Internet at
92d08521bSGarrett D'Amore  * http://www.illumos.org/license/CDDL.
102d08521bSGarrett D'Amore  */
112d08521bSGarrett D'Amore 
122d08521bSGarrett D'Amore /*
132d08521bSGarrett D'Amore  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
142d08521bSGarrett D'Amore  */
152d08521bSGarrett D'Amore 
162d08521bSGarrett D'Amore /*
172d08521bSGarrett D'Amore  * This file implements the 2008 newlocale and friends handling.
182d08521bSGarrett D'Amore  */
192d08521bSGarrett D'Amore 
202d08521bSGarrett D'Amore #ifndef	_LCONV_C99
212d08521bSGarrett D'Amore #define	_LCONV_C99
222d08521bSGarrett D'Amore #endif
232d08521bSGarrett D'Amore 
242d08521bSGarrett D'Amore #include "lint.h"
252d08521bSGarrett D'Amore #include <atomic.h>
262d08521bSGarrett D'Amore #include <locale.h>
272d08521bSGarrett D'Amore #include <sys/types.h>
282d08521bSGarrett D'Amore #include <sys/mman.h>
292d08521bSGarrett D'Amore #include <errno.h>
302d08521bSGarrett D'Amore #include <string.h>
312d08521bSGarrett D'Amore #include "libc.h"
322d08521bSGarrett D'Amore #include "mtlib.h"
332d08521bSGarrett D'Amore #include "tsd.h"
342d08521bSGarrett D'Amore #include "localeimpl.h"
352d08521bSGarrett D'Amore #include "lctype.h"
362d08521bSGarrett D'Amore 
372d08521bSGarrett D'Amore /*
382d08521bSGarrett D'Amore  * Big Theory of Locales:
392d08521bSGarrett D'Amore  *
402d08521bSGarrett D'Amore  * (It is recommended that readers familiarize themselves with the POSIX
412d08521bSGarrett D'Amore  * 2008 (XPG Issue 7) specifications for locales, first.)
422d08521bSGarrett D'Amore  *
432d08521bSGarrett D'Amore  * Historically, we had a bunch of global variables that stored locale
442d08521bSGarrett D'Amore  * data.  While this worked well, it limited applications to a single locale
452d08521bSGarrett D'Amore  * at a time.  This doesn't work well in certain server applications.
462d08521bSGarrett D'Amore  *
472d08521bSGarrett D'Amore  * Issue 7, X/Open introduced the concept of a locale_t object, along with
482d08521bSGarrett D'Amore  * versions of functions that can take this object as a parameter, along
492d08521bSGarrett D'Amore  * with functions to clone and manipulate these locale objects.  The new
502d08521bSGarrett D'Amore  * functions are named with a _l() suffix.
512d08521bSGarrett D'Amore  *
522d08521bSGarrett D'Amore  * Additionally uselocale() is introduced which can change the locale of
532d08521bSGarrett D'Amore  * of a single thread.  However, setlocale() can still be used to change
542d08521bSGarrett D'Amore  * the global locale.
552d08521bSGarrett D'Amore  *
562d08521bSGarrett D'Amore  * In our implementation, we use libc's TSD to store the locale data that
572d08521bSGarrett D'Amore  * was previously global.  We still have global data because some applications
582d08521bSGarrett D'Amore  * have had those global objects compiled into them.  (Such applications will
592d08521bSGarrett D'Amore  * be unable to benefit from uselocale(), btw.)  The legacy routines are
602d08521bSGarrett D'Amore  * reimplemented as wrappers that use the appropriate locale object by
612d08521bSGarrett D'Amore  * calling uselocale().  uselocale() when passed a NULL pointer returns the
622d08521bSGarrett D'Amore  * thread-specific locale object if one is present, or the global locale
632d08521bSGarrett D'Amore  * object otherwise.  Note that once the TSD data is set, the only way
642d08521bSGarrett D'Amore  * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE
652d08521bSGarrett D'Amore  * to uselocale().
662d08521bSGarrett D'Amore  *
672d08521bSGarrett D'Amore  * We are careful to minimize performance impact of multiple calls to
682d08521bSGarrett D'Amore  * uselocale() or setlocale() by using a cache of locale data whenever possible.
692d08521bSGarrett D'Amore  * As a consequence of this, applications that iterate over all possible
702d08521bSGarrett D'Amore  * locales will burn through a lot of virtual memory, but we find such
712d08521bSGarrett D'Amore  * applications rare.  (locale -a might be an exception, but it is short lived.)
722d08521bSGarrett D'Amore  *
732d08521bSGarrett D'Amore  * Category data is never released (although enclosing locale objects might be),
742d08521bSGarrett D'Amore  * in order to guarantee thread-safety.  Calling freelocale() on an object
752d08521bSGarrett D'Amore  * while it is in use by another thread is a programmer error (use-after-free)
762d08521bSGarrett D'Amore  * and we don't bother to note it further.
772d08521bSGarrett D'Amore  *
782d08521bSGarrett D'Amore  * Locale objects (global locales) established by setlocale() are also
792d08521bSGarrett D'Amore  * never freed (for MT safety), but we will save previous locale objects
802d08521bSGarrett D'Amore  * and reuse them when we can.
812d08521bSGarrett D'Amore  */
822d08521bSGarrett D'Amore 
832d08521bSGarrett D'Amore typedef struct locdata *(*loadfn_t)(const char *);
842d08521bSGarrett D'Amore 
852d08521bSGarrett D'Amore static const loadfn_t loaders[LC_ALL] = {
862d08521bSGarrett D'Amore 	__lc_ctype_load,
872d08521bSGarrett D'Amore 	__lc_numeric_load,
882d08521bSGarrett D'Amore 	__lc_time_load,
892d08521bSGarrett D'Amore 	__lc_collate_load,
902d08521bSGarrett D'Amore 	__lc_monetary_load,
912d08521bSGarrett D'Amore 	__lc_messages_load,
922d08521bSGarrett D'Amore };
932d08521bSGarrett D'Amore 
942d08521bSGarrett D'Amore extern struct lc_monetary lc_monetary_posix;
952d08521bSGarrett D'Amore extern struct lc_numeric lc_numeric_posix;
962d08521bSGarrett D'Amore extern struct lc_messages lc_messages_posix;
972d08521bSGarrett D'Amore extern struct lc_time lc_time_posix;
982d08521bSGarrett D'Amore extern struct lc_ctype lc_ctype_posix;
992d08521bSGarrett D'Amore extern struct lc_collate lc_collate_posix;
100bc09504fSGordon Ross extern struct _RuneLocale _DefaultRuneLocale;
1012d08521bSGarrett D'Amore 
102732efd55SDan McDonald static struct _locale posix_locale = {
1032d08521bSGarrett D'Amore 	/* locdata */
1042d08521bSGarrett D'Amore 	.locdata = {
1052d08521bSGarrett D'Amore 		&__posix_ctype_locdata,
1062d08521bSGarrett D'Amore 		&__posix_numeric_locdata,
1072d08521bSGarrett D'Amore 		&__posix_time_locdata,
1082d08521bSGarrett D'Amore 		&__posix_collate_locdata,
1092d08521bSGarrett D'Amore 		&__posix_monetary_locdata,
1102d08521bSGarrett D'Amore 		&__posix_messages_locdata,
1112d08521bSGarrett D'Amore 	},
1122d08521bSGarrett D'Amore 	.locname = "C",
1132d08521bSGarrett D'Amore 	.ctype = &lc_ctype_posix,
1142d08521bSGarrett D'Amore 	.numeric = &lc_numeric_posix,
1152d08521bSGarrett D'Amore 	.collate = &lc_collate_posix,
1162d08521bSGarrett D'Amore 	.monetary = &lc_monetary_posix,
1172d08521bSGarrett D'Amore 	.messages = &lc_messages_posix,
1182d08521bSGarrett D'Amore 	.time = &lc_time_posix,
1192d08521bSGarrett D'Amore 	.runelocale = &_DefaultRuneLocale,
1202d08521bSGarrett D'Amore };
1212d08521bSGarrett D'Amore 
1222d08521bSGarrett D'Amore locale_t ___global_locale = &posix_locale;
1232d08521bSGarrett D'Amore 
1242d08521bSGarrett D'Amore locale_t
__global_locale(void)1252d08521bSGarrett D'Amore __global_locale(void)
1262d08521bSGarrett D'Amore {
1272d08521bSGarrett D'Amore 	return (___global_locale);
1282d08521bSGarrett D'Amore }
1292d08521bSGarrett D'Amore 
1303125066dSYuri Pankov /*
1313125066dSYuri Pankov  * Locale data for hybrid C.UTF-8 locale having all the characteristics of
1323125066dSYuri Pankov  * default C/POSIX locale, except for LC_CTYPE data which is retrieved from
1333125066dSYuri Pankov  * cache/file as for other UTF-8 locales.
1343125066dSYuri Pankov  */
1353125066dSYuri Pankov static struct locdata cutf_locdata[LC_ALL] = {
1363125066dSYuri Pankov 	{ "C.UTF-8", NULL }, /* unused */
1373125066dSYuri Pankov 	{ "C.UTF-8", &lc_numeric_posix },
1383125066dSYuri Pankov 	{ "C.UTF-8", &lc_time_posix },
1393125066dSYuri Pankov 	{ "C.UTF-8", &lc_collate_posix },
1403125066dSYuri Pankov 	{ "C.UTF-8", &lc_monetary_posix },
1413125066dSYuri Pankov 	{ "C.UTF-8", &lc_messages_posix },
1423125066dSYuri Pankov };
1433125066dSYuri Pankov 
1442d08521bSGarrett D'Amore /*
1452d08521bSGarrett D'Amore  * Category names for getenv()  Note that this was modified
1462d08521bSGarrett D'Amore  * for Solaris.  See <iso/locale_iso.h>.
1472d08521bSGarrett D'Amore  */
1482d08521bSGarrett D'Amore #define	NUM_CATS	7
1492d08521bSGarrett D'Amore static char *categories[7] = {
1502d08521bSGarrett D'Amore 	"LC_CTYPE",
1512d08521bSGarrett D'Amore 	"LC_NUMERIC",
1522d08521bSGarrett D'Amore 	"LC_TIME",
1532d08521bSGarrett D'Amore 	"LC_COLLATE",
1542d08521bSGarrett D'Amore 	"LC_MONETARY",
1552d08521bSGarrett D'Amore 	"LC_MESSAGES",
1562d08521bSGarrett D'Amore 	"LC_ALL",
1572d08521bSGarrett D'Amore };
1582d08521bSGarrett D'Amore 
1592d08521bSGarrett D'Amore /*
1602d08521bSGarrett D'Amore  * Prototypes.
1612d08521bSGarrett D'Amore  */
1622d08521bSGarrett D'Amore static const char *get_locale_env(int);
163815e3086SToomas Soome static struct locdata *locdata_get(int, const char *);
1642d08521bSGarrett D'Amore static struct locdata *locdata_get_cache(int, const char *);
1652d08521bSGarrett D'Amore static locale_t mklocname(locale_t);
1662d08521bSGarrett D'Amore 
1672d08521bSGarrett D'Amore /*
1682d08521bSGarrett D'Amore  * Some utility routines.
1692d08521bSGarrett D'Amore  */
1702d08521bSGarrett D'Amore 
1712d08521bSGarrett D'Amore struct locdata *
__locdata_alloc(const char * name,size_t memsz)1722d08521bSGarrett D'Amore __locdata_alloc(const char *name, size_t memsz)
1732d08521bSGarrett D'Amore {
1742d08521bSGarrett D'Amore 	struct locdata *ldata;
1752d08521bSGarrett D'Amore 
1762d08521bSGarrett D'Amore 	if ((ldata = lmalloc(sizeof (*ldata))) == NULL) {
1772d08521bSGarrett D'Amore 		return (NULL);
1782d08521bSGarrett D'Amore 	}
1792d08521bSGarrett D'Amore 	if ((ldata->l_data[0] = libc_malloc(memsz)) == NULL) {
1802d08521bSGarrett D'Amore 		lfree(ldata, sizeof (*ldata));
1812d08521bSGarrett D'Amore 		errno = ENOMEM;
1822d08521bSGarrett D'Amore 		return (NULL);
1832d08521bSGarrett D'Amore 	}
1842d08521bSGarrett D'Amore 	(void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname));
1852d08521bSGarrett D'Amore 
1862d08521bSGarrett D'Amore 	return (ldata);
1872d08521bSGarrett D'Amore }
1882d08521bSGarrett D'Amore 
1892d08521bSGarrett D'Amore /*
1902d08521bSGarrett D'Amore  * Normally we never free locale data truly, but if we failed to load it
1912d08521bSGarrett D'Amore  * for some reason, this routine is used to cleanup the partial mess.
1922d08521bSGarrett D'Amore  */
1932d08521bSGarrett D'Amore void
__locdata_free(struct locdata * ldata)1942d08521bSGarrett D'Amore __locdata_free(struct locdata *ldata)
1952d08521bSGarrett D'Amore {
1962d08521bSGarrett D'Amore 	for (int i = 0; i < NLOCDATA; i++)
1972d08521bSGarrett D'Amore 		libc_free(ldata->l_data[i]);
1982d08521bSGarrett D'Amore 	if (ldata->l_map != NULL && ldata->l_map_len)
1992d08521bSGarrett D'Amore 		(void) munmap(ldata->l_map, ldata->l_map_len);
2002d08521bSGarrett D'Amore 	lfree(ldata, sizeof (*ldata));
2012d08521bSGarrett D'Amore }
2022d08521bSGarrett D'Amore 
2032d08521bSGarrett D'Amore /*
2042d08521bSGarrett D'Amore  * It turns out that for performance reasons we would really like to
2052d08521bSGarrett D'Amore  * cache the most recently referenced locale data to avoid wasteful
2062d08521bSGarrett D'Amore  * loading from files.
2072d08521bSGarrett D'Amore  */
2082d08521bSGarrett D'Amore 
2092d08521bSGarrett D'Amore static struct locdata *cache_data[LC_ALL];
2102d08521bSGarrett D'Amore static struct locdata *cat_data[LC_ALL];
2112d08521bSGarrett D'Amore static mutex_t cache_lock = DEFAULTMUTEX;
2122d08521bSGarrett D'Amore 
2132d08521bSGarrett D'Amore /*
2142d08521bSGarrett D'Amore  * Returns the cached data if the locale name is the same.  If not,
2152d08521bSGarrett D'Amore  * returns NULL (cache miss).  The locdata is returned with a hold on
2162d08521bSGarrett D'Amore  * it, taken on behalf of the caller.  The caller should drop the hold
2172d08521bSGarrett D'Amore  * when it is finished.
2182d08521bSGarrett D'Amore  */
2192d08521bSGarrett D'Amore static struct locdata *
locdata_get_cache(int category,const char * locname)2202d08521bSGarrett D'Amore locdata_get_cache(int category, const char *locname)
2212d08521bSGarrett D'Amore {
2222d08521bSGarrett D'Amore 	struct locdata *loc;
2232d08521bSGarrett D'Amore 
2242d08521bSGarrett D'Amore 	if (category < 0 || category >= LC_ALL)
2252d08521bSGarrett D'Amore 		return (NULL);
2262d08521bSGarrett D'Amore 
2272d08521bSGarrett D'Amore 	/* Try cache first. */
2282d08521bSGarrett D'Amore 	lmutex_lock(&cache_lock);
2292d08521bSGarrett D'Amore 	loc = cache_data[category];
2302d08521bSGarrett D'Amore 
2312d08521bSGarrett D'Amore 	if ((loc != NULL) && (strcmp(loc->l_lname, locname) == 0)) {
2322d08521bSGarrett D'Amore 		lmutex_unlock(&cache_lock);
2332d08521bSGarrett D'Amore 		return (loc);
2342d08521bSGarrett D'Amore 	}
2352d08521bSGarrett D'Amore 
2362d08521bSGarrett D'Amore 	/*
2372d08521bSGarrett D'Amore 	 * Failing that try previously loaded locales (linear search) --
2382d08521bSGarrett D'Amore 	 * this could be optimized to a hash, but its unlikely that a single
2392d08521bSGarrett D'Amore 	 * application will ever need to work with more than a few locales.
2402d08521bSGarrett D'Amore 	 */
2412d08521bSGarrett D'Amore 	for (loc = cat_data[category]; loc != NULL; loc = loc->l_next) {
2422d08521bSGarrett D'Amore 		if (strcmp(locname, loc->l_lname) == 0) {
2432d08521bSGarrett D'Amore 			break;
2442d08521bSGarrett D'Amore 		}
2452d08521bSGarrett D'Amore 	}
2462d08521bSGarrett D'Amore 
2472d08521bSGarrett D'Amore 	/*
2482d08521bSGarrett D'Amore 	 * Finally, if we still don't have one, try loading the locale
2492d08521bSGarrett D'Amore 	 * data from the actual on-disk data.
2502d08521bSGarrett D'Amore 	 *
2512d08521bSGarrett D'Amore 	 * We drop the lock (libc wants to ensure no internal locks
2522d08521bSGarrett D'Amore 	 * are held when we call other routines required to read from
2532d08521bSGarrett D'Amore 	 * files, allocate memory, etc.)  There is a small race here,
2542d08521bSGarrett D'Amore 	 * but the consequences of the race are benign -- if multiple
2552d08521bSGarrett D'Amore 	 * threads hit this at precisely the same point, we could
2562d08521bSGarrett D'Amore 	 * wind up with duplicates of the locale data in the cache.
2572d08521bSGarrett D'Amore 	 *
2582d08521bSGarrett D'Amore 	 * This wastes the memory for an extra copy of the locale
2592d08521bSGarrett D'Amore 	 * data, but there is no further harm beyond that.  Its not
2602d08521bSGarrett D'Amore 	 * worth the effort to recode this to something "safe"
2612d08521bSGarrett D'Amore 	 * (which would require rescanning the list, etc.), given
2622d08521bSGarrett D'Amore 	 * that this race will probably never actually occur.
2632d08521bSGarrett D'Amore 	 */
2642d08521bSGarrett D'Amore 	if (loc == NULL) {
2652d08521bSGarrett D'Amore 		lmutex_unlock(&cache_lock);
2662d08521bSGarrett D'Amore 		loc = (*loaders[category])(locname);
2672d08521bSGarrett D'Amore 		lmutex_lock(&cache_lock);
2682d08521bSGarrett D'Amore 		if (loc != NULL)
2692d08521bSGarrett D'Amore 			(void) strlcpy(loc->l_lname, locname,
2702d08521bSGarrett D'Amore 			    sizeof (loc->l_lname));
2712d08521bSGarrett D'Amore 	}
2722d08521bSGarrett D'Amore 
2732d08521bSGarrett D'Amore 	/*
2742d08521bSGarrett D'Amore 	 * Assuming we got one, update the cache, and stick us on the list
2752d08521bSGarrett D'Amore 	 * of loaded locale data.  We insert into the head (more recent
2762d08521bSGarrett D'Amore 	 * use is likely to win.)
2772d08521bSGarrett D'Amore 	 */
2782d08521bSGarrett D'Amore 	if (loc != NULL) {
2792d08521bSGarrett D'Amore 		cache_data[category] = loc;
2802d08521bSGarrett D'Amore 		if (!loc->l_cached) {
2812d08521bSGarrett D'Amore 			loc->l_cached = 1;
2822d08521bSGarrett D'Amore 			loc->l_next = cat_data[category];
2832d08521bSGarrett D'Amore 			cat_data[category] = loc;
2842d08521bSGarrett D'Amore 		}
2852d08521bSGarrett D'Amore 	}
2862d08521bSGarrett D'Amore 
2872d08521bSGarrett D'Amore 	lmutex_unlock(&cache_lock);
2882d08521bSGarrett D'Amore 	return (loc);
2892d08521bSGarrett D'Amore }
2902d08521bSGarrett D'Amore 
291*13027a8eSYuri Pankov /* Charmap aliases, mostly found in Linux */
292*13027a8eSYuri Pankov static const struct {
293*13027a8eSYuri Pankov 	const char *alias;
294*13027a8eSYuri Pankov 	const char *name;
295*13027a8eSYuri Pankov } cmalias[] = {
296*13027a8eSYuri Pankov 	{ "utf8", "UTF-8" },
297*13027a8eSYuri Pankov 	{ "iso88591", "ISO8859-1" },
298*13027a8eSYuri Pankov 	{ "iso885915", "ISO8859-15" },
299*13027a8eSYuri Pankov 	{ "gb18030", "GB18030" },
300*13027a8eSYuri Pankov 	{ "koi8r", "KOI8-R" },
301*13027a8eSYuri Pankov 	{ NULL, NULL }
302*13027a8eSYuri Pankov };
303*13027a8eSYuri Pankov 
3042d08521bSGarrett D'Amore /*
3052d08521bSGarrett D'Amore  * Routine to get the locdata for a given category and locale.
3062d08521bSGarrett D'Amore  * This includes retrieving it from cache, retrieving it from
3072d08521bSGarrett D'Amore  * a file, etc.
3082d08521bSGarrett D'Amore  */
3092d08521bSGarrett D'Amore static struct locdata *
locdata_get(int category,const char * locname)3102d08521bSGarrett D'Amore locdata_get(int category, const char *locname)
3112d08521bSGarrett D'Amore {
3122d08521bSGarrett D'Amore 	char scratch[ENCODING_LEN + 1];
313*13027a8eSYuri Pankov 	char scratch2[ENCODING_LEN + 1];
314*13027a8eSYuri Pankov 	char *slash, *cm;
3152d08521bSGarrett D'Amore 	int cnt;
3162d08521bSGarrett D'Amore 	int len;
317*13027a8eSYuri Pankov 	int i;
3182d08521bSGarrett D'Amore 
3192d08521bSGarrett D'Amore 	if (locname == NULL || *locname == 0) {
3202d08521bSGarrett D'Amore 		locname = get_locale_env(category);
3212d08521bSGarrett D'Amore 	}
3222d08521bSGarrett D'Amore 
3232d08521bSGarrett D'Amore 	/*
3242d08521bSGarrett D'Amore 	 * Extract the locale name for the category if it is a composite
3252d08521bSGarrett D'Amore 	 * locale.
3262d08521bSGarrett D'Amore 	 */
3272d08521bSGarrett D'Amore 	if ((slash = strchr(locname, '/')) != NULL) {
3282d08521bSGarrett D'Amore 		for (cnt = category; cnt && slash != NULL; cnt--) {
3292d08521bSGarrett D'Amore 			locname = slash + 1;
3302d08521bSGarrett D'Amore 			slash = strchr(locname, '/');
3312d08521bSGarrett D'Amore 		}
3322d08521bSGarrett D'Amore 		if (slash) {
3332d08521bSGarrett D'Amore 			len = slash - locname + 1;
3342d08521bSGarrett D'Amore 			if (len >= sizeof (scratch)) {
3352d08521bSGarrett D'Amore 				len = sizeof (scratch);
3362d08521bSGarrett D'Amore 			}
3372d08521bSGarrett D'Amore 		} else {
3382d08521bSGarrett D'Amore 			len = sizeof (scratch);
3392d08521bSGarrett D'Amore 		}
3402d08521bSGarrett D'Amore 		(void) strlcpy(scratch, locname, len);
3412d08521bSGarrett D'Amore 		locname = scratch;
3422d08521bSGarrett D'Amore 	}
3432d08521bSGarrett D'Amore 
3442d08521bSGarrett D'Amore 	if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0))
3452d08521bSGarrett D'Amore 		return (posix_locale.locdata[category]);
3462d08521bSGarrett D'Amore 
347*13027a8eSYuri Pankov 	/* Handle charmap aliases */
348*13027a8eSYuri Pankov 	for (i = 0; cmalias[i].alias != NULL; i++) {
349*13027a8eSYuri Pankov 		if ((cm = strstr(locname, cmalias[i].alias)) != NULL &&
350*13027a8eSYuri Pankov 		    strlen(cm) == strlen(cmalias[i].alias)) {
351*13027a8eSYuri Pankov 			len = cm - locname + 1;
352*13027a8eSYuri Pankov 			if (len + strlen(cmalias[i].name) >= sizeof (scratch2))
353*13027a8eSYuri Pankov 				break;
354*13027a8eSYuri Pankov 			(void) strlcpy(scratch2, locname, len);
355*13027a8eSYuri Pankov 			(void) strlcat(scratch2, cmalias[i].name,
356*13027a8eSYuri Pankov 			    sizeof (scratch2));
357*13027a8eSYuri Pankov 			locname = scratch2;
358*13027a8eSYuri Pankov 			break;
359*13027a8eSYuri Pankov 		}
360*13027a8eSYuri Pankov 	}
361*13027a8eSYuri Pankov 
3623125066dSYuri Pankov 	if ((strcmp(locname, "C.UTF-8") == 0) && (category != LC_CTYPE))
3633125066dSYuri Pankov 		return (&cutf_locdata[category]);
3643125066dSYuri Pankov 
3652d08521bSGarrett D'Amore 	return (locdata_get_cache(category, locname));
3662d08521bSGarrett D'Amore }
3672d08521bSGarrett D'Amore 
3682d08521bSGarrett D'Amore /* tsd destructor */
3692d08521bSGarrett D'Amore static void
freelocptr(void * arg)3702d08521bSGarrett D'Amore freelocptr(void *arg)
3712d08521bSGarrett D'Amore {
3722d08521bSGarrett D'Amore 	locale_t *locptr = arg;
3732d08521bSGarrett D'Amore 	if (*locptr != NULL)
3742d08521bSGarrett D'Amore 		freelocale(*locptr);
3752d08521bSGarrett D'Amore }
3762d08521bSGarrett D'Amore 
3772d08521bSGarrett D'Amore static const char *
get_locale_env(int category)3782d08521bSGarrett D'Amore get_locale_env(int category)
3792d08521bSGarrett D'Amore {
3802d08521bSGarrett D'Amore 	const char *env;
3812d08521bSGarrett D'Amore 
3822d08521bSGarrett D'Amore 	/* 1. check LC_ALL. */
3832d08521bSGarrett D'Amore 	env = getenv(categories[LC_ALL]);
3842d08521bSGarrett D'Amore 
3852d08521bSGarrett D'Amore 	/* 2. check LC_* */
3862d08521bSGarrett D'Amore 	if (env == NULL || *env == '\0')
3872d08521bSGarrett D'Amore 		env = getenv(categories[category]);
3882d08521bSGarrett D'Amore 
3892d08521bSGarrett D'Amore 	/* 3. check LANG */
3902d08521bSGarrett D'Amore 	if (env == NULL || *env == '\0')
3912d08521bSGarrett D'Amore 		env = getenv("LANG");
3922d08521bSGarrett D'Amore 
3932d08521bSGarrett D'Amore 	/* 4. if none is set, fall to "C" */
3942d08521bSGarrett D'Amore 	if (env == NULL || *env == '\0')
3952d08521bSGarrett D'Amore 		env = "C";
3962d08521bSGarrett D'Amore 
3972d08521bSGarrett D'Amore 	return (env);
3982d08521bSGarrett D'Amore }
3992d08521bSGarrett D'Amore 
4002d08521bSGarrett D'Amore 
4012d08521bSGarrett D'Amore /*
4022d08521bSGarrett D'Amore  * This routine is exposed via the MB_CUR_MAX macro.  Note that legacy
4032d08521bSGarrett D'Amore  * code will continue to use _ctype[520], but we prefer this function as
4042d08521bSGarrett D'Amore  * it is the only way to get thread-specific information.
4052d08521bSGarrett D'Amore  */
4062d08521bSGarrett D'Amore unsigned char
__mb_cur_max_l(locale_t loc)4072d08521bSGarrett D'Amore __mb_cur_max_l(locale_t loc)
4082d08521bSGarrett D'Amore {
4092d08521bSGarrett D'Amore 	return (loc->ctype->lc_max_mblen);
4102d08521bSGarrett D'Amore }
4112d08521bSGarrett D'Amore 
4122d08521bSGarrett D'Amore unsigned char
__mb_cur_max(void)4132d08521bSGarrett D'Amore __mb_cur_max(void)
4142d08521bSGarrett D'Amore {
4152d08521bSGarrett D'Amore 	return (__mb_cur_max_l(uselocale(NULL)));
4162d08521bSGarrett D'Amore }
4172d08521bSGarrett D'Amore 
4182d08521bSGarrett D'Amore /*
4192d08521bSGarrett D'Amore  * Public interfaces.
4202d08521bSGarrett D'Amore  */
4212d08521bSGarrett D'Amore 
4222d08521bSGarrett D'Amore locale_t
duplocale(locale_t src)4232d08521bSGarrett D'Amore duplocale(locale_t src)
4242d08521bSGarrett D'Amore {
4252d08521bSGarrett D'Amore 	locale_t	loc;
4262d08521bSGarrett D'Amore 	int		i;
4272d08521bSGarrett D'Amore 
4282d08521bSGarrett D'Amore 	loc = lmalloc(sizeof (*loc));
4292d08521bSGarrett D'Amore 	if (loc == NULL) {
4302d08521bSGarrett D'Amore 		return (NULL);
4312d08521bSGarrett D'Amore 	}
4322d08521bSGarrett D'Amore 	if (src == NULL) {
4332d08521bSGarrett D'Amore 		/* illumos extension: POSIX says LC_GLOBAL_LOCALE here */
4342d08521bSGarrett D'Amore 		src = ___global_locale;
4352d08521bSGarrett D'Amore 	}
4362d08521bSGarrett D'Amore 	for (i = 0; i < LC_ALL; i++) {
4372d08521bSGarrett D'Amore 		loc->locdata[i] = src->locdata[i];
4382d08521bSGarrett D'Amore 		loc->loaded[i] = 0;
4392d08521bSGarrett D'Amore 	}
4402d08521bSGarrett D'Amore 	loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
4412d08521bSGarrett D'Amore 	loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
4422d08521bSGarrett D'Amore 	loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
4432d08521bSGarrett D'Amore 	loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
4442d08521bSGarrett D'Amore 	loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
4452d08521bSGarrett D'Amore 	loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
4462d08521bSGarrett D'Amore 	loc->time = loc->locdata[LC_TIME]->l_data[0];
4472d08521bSGarrett D'Amore 	return (loc);
4482d08521bSGarrett D'Amore }
4492d08521bSGarrett D'Amore 
4502d08521bSGarrett D'Amore void
freelocale(locale_t loc)4512d08521bSGarrett D'Amore freelocale(locale_t loc)
4522d08521bSGarrett D'Amore {
4532d08521bSGarrett D'Amore 	/*
4542d08521bSGarrett D'Amore 	 * We take extra care never to free a saved locale created by
4552d08521bSGarrett D'Amore 	 * setlocale().  This shouldn't be strictly necessary, but a little
4562d08521bSGarrett D'Amore 	 * extra safety doesn't hurt here.
4572d08521bSGarrett D'Amore 	 */
4582d08521bSGarrett D'Amore 	if ((loc != NULL) && (loc != &posix_locale) && (!loc->on_list))
4592d08521bSGarrett D'Amore 		lfree(loc, sizeof (*loc));
4602d08521bSGarrett D'Amore }
4612d08521bSGarrett D'Amore 
4622d08521bSGarrett D'Amore locale_t
newlocale(int catmask,const char * locname,locale_t base)4632d08521bSGarrett D'Amore newlocale(int catmask, const char *locname, locale_t base)
4642d08521bSGarrett D'Amore {
4652d08521bSGarrett D'Amore 	locale_t loc;
4662d08521bSGarrett D'Amore 	int i, e;
4672d08521bSGarrett D'Amore 
4682d08521bSGarrett D'Amore 	if (catmask & ~(LC_ALL_MASK)) {
4692d08521bSGarrett D'Amore 		errno = EINVAL;
4702d08521bSGarrett D'Amore 		return (NULL);
4712d08521bSGarrett D'Amore 	}
4722d08521bSGarrett D'Amore 
4732d08521bSGarrett D'Amore 	/*
4742d08521bSGarrett D'Amore 	 * Technically passing LC_GLOBAL_LOCALE here is illegal,
4752d08521bSGarrett D'Amore 	 * but we allow it.
4762d08521bSGarrett D'Amore 	 */
4772d08521bSGarrett D'Amore 	if (base == NULL || base == ___global_locale) {
4782d08521bSGarrett D'Amore 		loc = duplocale(___global_locale);
4792d08521bSGarrett D'Amore 	} else {
4802d08521bSGarrett D'Amore 		loc = duplocale(base);
4812d08521bSGarrett D'Amore 	}
4822d08521bSGarrett D'Amore 	if (loc == NULL) {
4832d08521bSGarrett D'Amore 		return (NULL);
4842d08521bSGarrett D'Amore 	}
4852d08521bSGarrett D'Amore 
4862d08521bSGarrett D'Amore 	for (i = 0; i < LC_ALL; i++) {
4872d08521bSGarrett D'Amore 		struct locdata *ldata;
4882d08521bSGarrett D'Amore 		loc->loaded[i] = 0;
4892d08521bSGarrett D'Amore 		if (((1 << i) & catmask) == 0) {
4902d08521bSGarrett D'Amore 			/* Default to base locale if not overriding */
4912d08521bSGarrett D'Amore 			continue;
4922d08521bSGarrett D'Amore 		}
4932d08521bSGarrett D'Amore 		ldata = locdata_get(i, locname);
4942d08521bSGarrett D'Amore 		if (ldata == NULL) {
4952d08521bSGarrett D'Amore 			e = errno;
4962d08521bSGarrett D'Amore 			freelocale(loc);
4972d08521bSGarrett D'Amore 			errno = e;
4982d08521bSGarrett D'Amore 			return (NULL);
4992d08521bSGarrett D'Amore 		}
5002d08521bSGarrett D'Amore 		loc->locdata[i] = ldata;
5012d08521bSGarrett D'Amore 	}
5022d08521bSGarrett D'Amore 	loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
5032d08521bSGarrett D'Amore 	loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
5042d08521bSGarrett D'Amore 	loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
5052d08521bSGarrett D'Amore 	loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
5062d08521bSGarrett D'Amore 	loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
5072d08521bSGarrett D'Amore 	loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
5082d08521bSGarrett D'Amore 	loc->time = loc->locdata[LC_TIME]->l_data[0];
5092d08521bSGarrett D'Amore 	freelocale(base);
5102d08521bSGarrett D'Amore 
5112d08521bSGarrett D'Amore 	return (mklocname(loc));
5122d08521bSGarrett D'Amore }
5132d08521bSGarrett D'Amore 
5142d08521bSGarrett D'Amore locale_t
uselocale(locale_t loc)5152d08521bSGarrett D'Amore uselocale(locale_t loc)
5162d08521bSGarrett D'Amore {
5172d08521bSGarrett D'Amore 	locale_t lastloc = ___global_locale;
5182d08521bSGarrett D'Amore 	locale_t *locptr;
5192d08521bSGarrett D'Amore 
5202d08521bSGarrett D'Amore 	locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr);
5212d08521bSGarrett D'Amore 	/* Should never occur */
5222d08521bSGarrett D'Amore 	if (locptr == NULL) {
5232d08521bSGarrett D'Amore 		errno = EINVAL;
5242d08521bSGarrett D'Amore 		return (NULL);
5252d08521bSGarrett D'Amore 	}
5262d08521bSGarrett D'Amore 
5272d08521bSGarrett D'Amore 	if (*locptr != NULL)
5282d08521bSGarrett D'Amore 		lastloc = *locptr;
5292d08521bSGarrett D'Amore 
5302d08521bSGarrett D'Amore 	/* Argument loc is NULL if we are just querying. */
5312d08521bSGarrett D'Amore 	if (loc != NULL) {
5322d08521bSGarrett D'Amore 		/*
5332d08521bSGarrett D'Amore 		 * Set it to LC_GLOBAL_LOCAL to return to using
5342d08521bSGarrett D'Amore 		 * the global locale (setlocale).
5352d08521bSGarrett D'Amore 		 */
5362d08521bSGarrett D'Amore 		if (loc == ___global_locale) {
5372d08521bSGarrett D'Amore 			*locptr = NULL;
5382d08521bSGarrett D'Amore 		} else {
5392d08521bSGarrett D'Amore 			/* No validation of the provided locale at present */
5402d08521bSGarrett D'Amore 			*locptr = loc;
5412d08521bSGarrett D'Amore 		}
5422d08521bSGarrett D'Amore 	}
5432d08521bSGarrett D'Amore 
5442d08521bSGarrett D'Amore 	/*
5452d08521bSGarrett D'Amore 	 * The caller is responsible for freeing, of course it would be
5462d08521bSGarrett D'Amore 	 * gross error to call freelocale() on a locale object that is still
5472d08521bSGarrett D'Amore 	 * in use.
5482d08521bSGarrett D'Amore 	 */
5492d08521bSGarrett D'Amore 	return (lastloc);
5502d08521bSGarrett D'Amore }
5512d08521bSGarrett D'Amore 
5522d08521bSGarrett D'Amore static locale_t
mklocname(locale_t loc)5532d08521bSGarrett D'Amore mklocname(locale_t loc)
5542d08521bSGarrett D'Amore {
5552d08521bSGarrett D'Amore 	int composite = 0;
5562d08521bSGarrett D'Amore 
5572d08521bSGarrett D'Amore 	/* Look to see if any category is different */
5582d08521bSGarrett D'Amore 	for (int i = 1; i < LC_ALL; ++i) {
5592d08521bSGarrett D'Amore 		if (strcmp(loc->locdata[0]->l_lname,
5602d08521bSGarrett D'Amore 		    loc->locdata[i]->l_lname) != 0) {
5612d08521bSGarrett D'Amore 			composite = 1;
5622d08521bSGarrett D'Amore 			break;
5632d08521bSGarrett D'Amore 		}
5642d08521bSGarrett D'Amore 	}
5652d08521bSGarrett D'Amore 
5662d08521bSGarrett D'Amore 	if (composite) {
5672d08521bSGarrett D'Amore 		/*
5682d08521bSGarrett D'Amore 		 * Note ordering of these follows the numeric order,
5692d08521bSGarrett D'Amore 		 * if the order is changed, then setlocale() will need
5702d08521bSGarrett D'Amore 		 * to be changed as well.
5712d08521bSGarrett D'Amore 		 */
5722d08521bSGarrett D'Amore 		(void) snprintf(loc->locname, sizeof (loc->locname),
5732d08521bSGarrett D'Amore 		    "%s/%s/%s/%s/%s/%s",
5742d08521bSGarrett D'Amore 		    loc->locdata[LC_CTYPE]->l_lname,
5752d08521bSGarrett D'Amore 		    loc->locdata[LC_NUMERIC]->l_lname,
5762d08521bSGarrett D'Amore 		    loc->locdata[LC_TIME]->l_lname,
5772d08521bSGarrett D'Amore 		    loc->locdata[LC_COLLATE]->l_lname,
5782d08521bSGarrett D'Amore 		    loc->locdata[LC_MONETARY]->l_lname,
5792d08521bSGarrett D'Amore 		    loc->locdata[LC_MESSAGES]->l_lname);
5802d08521bSGarrett D'Amore 	} else {
5812d08521bSGarrett D'Amore 		(void) strlcpy(loc->locname, loc->locdata[LC_CTYPE]->l_lname,
5822d08521bSGarrett D'Amore 		    sizeof (loc->locname));
5832d08521bSGarrett D'Amore 	}
5842d08521bSGarrett D'Amore 	return (loc);
5852d08521bSGarrett D'Amore }
586