xref: /illumos-gate/usr/src/cmd/localedef/ctype.c (revision 5080145b)
16b5e5868SGarrett D'Amore /*
26b5e5868SGarrett D'Amore  * This file and its contents are supplied under the terms of the
36b5e5868SGarrett D'Amore  * Common Development and Distribution License ("CDDL"), version 1.0.
45aec55ebSGarrett D'Amore  * You may only use this file in accordance with the terms of version
55aec55ebSGarrett D'Amore  * 1.0 of the CDDL.
66b5e5868SGarrett D'Amore  *
76b5e5868SGarrett D'Amore  * A full copy of the text of the CDDL should have accompanied this
86b5e5868SGarrett D'Amore  * source.  A copy of the CDDL is also available via the Internet at
96b5e5868SGarrett D'Amore  * http://www.illumos.org/license/CDDL.
106b5e5868SGarrett D'Amore  */
116b5e5868SGarrett D'Amore 
126b5e5868SGarrett D'Amore /*
13017c01f8SYuri Pankov  * Copyright 2010,2011 Nexenta Systems, Inc.  All rights reserved.
14*5080145bSGarrett D'Amore  * Copyright 2012 Garrett D'Amore <garrett@damore.org>  All rights reserved.
156b5e5868SGarrett D'Amore  */
166b5e5868SGarrett D'Amore 
176b5e5868SGarrett D'Amore /*
186b5e5868SGarrett D'Amore  * LC_CTYPE database generation routines for localedef.
196b5e5868SGarrett D'Amore  */
206b5e5868SGarrett D'Amore 
216b5e5868SGarrett D'Amore #include <stdio.h>
226b5e5868SGarrett D'Amore #include <stdlib.h>
236b5e5868SGarrett D'Amore #include <string.h>
246b5e5868SGarrett D'Amore #include <sys/types.h>
256b5e5868SGarrett D'Amore #include <sys/avl.h>
266b5e5868SGarrett D'Amore #include <wchar.h>
276b5e5868SGarrett D'Amore #include <ctype.h>
286b5e5868SGarrett D'Amore #include <wctype.h>
296b5e5868SGarrett D'Amore #include <unistd.h>
306b5e5868SGarrett D'Amore #include "localedef.h"
316b5e5868SGarrett D'Amore #include "parser.tab.h"
326b5e5868SGarrett D'Amore #include "runefile.h"
336b5e5868SGarrett D'Amore 
346b5e5868SGarrett D'Amore static avl_tree_t	ctypes;
356b5e5868SGarrett D'Amore 
366b5e5868SGarrett D'Amore static wchar_t		last_ctype;
376b5e5868SGarrett D'Amore 
386b5e5868SGarrett D'Amore typedef struct ctype_node {
396b5e5868SGarrett D'Amore 	wchar_t wc;
406b5e5868SGarrett D'Amore 	int32_t	ctype;
416b5e5868SGarrett D'Amore 	int32_t	toupper;
426b5e5868SGarrett D'Amore 	int32_t	tolower;
436b5e5868SGarrett D'Amore 	avl_node_t avl;
446b5e5868SGarrett D'Amore } ctype_node_t;
456b5e5868SGarrett D'Amore 
466b5e5868SGarrett D'Amore static int
476b5e5868SGarrett D'Amore ctype_compare(const void *n1, const void *n2)
486b5e5868SGarrett D'Amore {
496b5e5868SGarrett D'Amore 	const ctype_node_t *c1 = n1;
506b5e5868SGarrett D'Amore 	const ctype_node_t *c2 = n2;
516b5e5868SGarrett D'Amore 
526b5e5868SGarrett D'Amore 	return (c1->wc < c2->wc ? -1 : c1->wc > c2->wc ? 1 : 0);
536b5e5868SGarrett D'Amore }
546b5e5868SGarrett D'Amore 
556b5e5868SGarrett D'Amore void
566b5e5868SGarrett D'Amore init_ctype(void)
576b5e5868SGarrett D'Amore {
586b5e5868SGarrett D'Amore 	avl_create(&ctypes, ctype_compare, sizeof (ctype_node_t),
596b5e5868SGarrett D'Amore 	    offsetof(ctype_node_t, avl));
606b5e5868SGarrett D'Amore }
616b5e5868SGarrett D'Amore 
626b5e5868SGarrett D'Amore 
636b5e5868SGarrett D'Amore static void
646b5e5868SGarrett D'Amore add_ctype_impl(ctype_node_t *ctn)
656b5e5868SGarrett D'Amore {
666b5e5868SGarrett D'Amore 	switch (last_kw) {
676b5e5868SGarrett D'Amore 	case T_ISUPPER:
686b5e5868SGarrett D'Amore 		ctn->ctype |= (_ISUPPER | _ISALPHA | _ISGRAPH | _ISPRINT);
696b5e5868SGarrett D'Amore 		break;
706b5e5868SGarrett D'Amore 	case T_ISLOWER:
716b5e5868SGarrett D'Amore 		ctn->ctype |= (_ISLOWER | _ISALPHA | _ISGRAPH | _ISPRINT);
726b5e5868SGarrett D'Amore 		break;
736b5e5868SGarrett D'Amore 	case T_ISALPHA:
746b5e5868SGarrett D'Amore 		ctn->ctype |= (_ISALPHA | _ISGRAPH | _ISPRINT);
756b5e5868SGarrett D'Amore 		break;
766b5e5868SGarrett D'Amore 	case T_ISDIGIT:
776b5e5868SGarrett D'Amore 		ctn->ctype |= (_ISDIGIT | _ISGRAPH | _ISPRINT | _ISXDIGIT);
786b5e5868SGarrett D'Amore 		break;
796b5e5868SGarrett D'Amore 	case T_ISSPACE:
806b5e5868SGarrett D'Amore 		ctn->ctype |= _ISSPACE;
816b5e5868SGarrett D'Amore 		break;
826b5e5868SGarrett D'Amore 	case T_ISCNTRL:
836b5e5868SGarrett D'Amore 		ctn->ctype |= _ISCNTRL;
846b5e5868SGarrett D'Amore 		break;
856b5e5868SGarrett D'Amore 	case T_ISGRAPH:
866b5e5868SGarrett D'Amore 		ctn->ctype |= (_ISGRAPH | _ISPRINT);
876b5e5868SGarrett D'Amore 		break;
886b5e5868SGarrett D'Amore 	case T_ISPRINT:
896b5e5868SGarrett D'Amore 		ctn->ctype |= _ISPRINT;
906b5e5868SGarrett D'Amore 		break;
916b5e5868SGarrett D'Amore 	case T_ISPUNCT:
926b5e5868SGarrett D'Amore 		ctn->ctype |= (_ISPUNCT | _ISGRAPH | _ISPRINT);
936b5e5868SGarrett D'Amore 		break;
946b5e5868SGarrett D'Amore 	case T_ISXDIGIT:
956b5e5868SGarrett D'Amore 		ctn->ctype |= (_ISXDIGIT | _ISPRINT);
966b5e5868SGarrett D'Amore 		break;
976b5e5868SGarrett D'Amore 	case T_ISBLANK:
986b5e5868SGarrett D'Amore 		ctn->ctype |= (_ISBLANK | _ISSPACE);
996b5e5868SGarrett D'Amore 		break;
1006b5e5868SGarrett D'Amore 	case T_ISPHONOGRAM:
1016b5e5868SGarrett D'Amore 		ctn->ctype |= (_E1 | _ISPRINT | _ISGRAPH);
1026b5e5868SGarrett D'Amore 		break;
1036b5e5868SGarrett D'Amore 	case T_ISIDEOGRAM:
1046b5e5868SGarrett D'Amore 		ctn->ctype |= (_E2 | _ISPRINT | _ISGRAPH);
1056b5e5868SGarrett D'Amore 		break;
1066b5e5868SGarrett D'Amore 	case T_ISENGLISH:
1076b5e5868SGarrett D'Amore 		ctn->ctype |= (_E3 | _ISPRINT | _ISGRAPH);
1086b5e5868SGarrett D'Amore 		break;
1096b5e5868SGarrett D'Amore 	case T_ISNUMBER:
1106b5e5868SGarrett D'Amore 		ctn->ctype |= (_E4 | _ISPRINT | _ISGRAPH);
1116b5e5868SGarrett D'Amore 		break;
1126b5e5868SGarrett D'Amore 	case T_ISSPECIAL:
1136b5e5868SGarrett D'Amore 		ctn->ctype |= (_E5 | _ISPRINT | _ISGRAPH);
1146b5e5868SGarrett D'Amore 		break;
1156b5e5868SGarrett D'Amore 	case T_ISALNUM:
1166b5e5868SGarrett D'Amore 		/*
1176b5e5868SGarrett D'Amore 		 * We can't do anything with this.  The character
1186b5e5868SGarrett D'Amore 		 * should already be specified as a digit or alpha.
1196b5e5868SGarrett D'Amore 		 */
1206b5e5868SGarrett D'Amore 		break;
1216b5e5868SGarrett D'Amore 	default:
1226b5e5868SGarrett D'Amore 		errf(_("not a valid character class"));
1236b5e5868SGarrett D'Amore 	}
1246b5e5868SGarrett D'Amore }
1256b5e5868SGarrett D'Amore 
1266b5e5868SGarrett D'Amore static ctype_node_t *
1276b5e5868SGarrett D'Amore get_ctype(wchar_t wc)
1286b5e5868SGarrett D'Amore {
1296b5e5868SGarrett D'Amore 	ctype_node_t	srch;
1306b5e5868SGarrett D'Amore 	ctype_node_t	*ctn;
1316b5e5868SGarrett D'Amore 	avl_index_t	where;
1326b5e5868SGarrett D'Amore 
1336b5e5868SGarrett D'Amore 	srch.wc = wc;
1346b5e5868SGarrett D'Amore 	if ((ctn = avl_find(&ctypes, &srch, &where)) == NULL) {
1356b5e5868SGarrett D'Amore 		if ((ctn = calloc(1, sizeof (*ctn))) == NULL) {
1366b5e5868SGarrett D'Amore 			errf(_("out of memory"));
1376b5e5868SGarrett D'Amore 			return (NULL);
1386b5e5868SGarrett D'Amore 		}
1396b5e5868SGarrett D'Amore 		ctn->wc = wc;
1406b5e5868SGarrett D'Amore 
1416b5e5868SGarrett D'Amore 		avl_insert(&ctypes, ctn, where);
1426b5e5868SGarrett D'Amore 	}
1436b5e5868SGarrett D'Amore 	return (ctn);
1446b5e5868SGarrett D'Amore }
1456b5e5868SGarrett D'Amore 
1466b5e5868SGarrett D'Amore void
1476b5e5868SGarrett D'Amore add_ctype(int val)
1486b5e5868SGarrett D'Amore {
1496b5e5868SGarrett D'Amore 	ctype_node_t	*ctn;
1506b5e5868SGarrett D'Amore 
1516b5e5868SGarrett D'Amore 	if ((ctn = get_ctype(val)) == NULL) {
1526b5e5868SGarrett D'Amore 		INTERR;
1536b5e5868SGarrett D'Amore 		return;
1546b5e5868SGarrett D'Amore 	}
1556b5e5868SGarrett D'Amore 	add_ctype_impl(ctn);
1566b5e5868SGarrett D'Amore 	last_ctype = ctn->wc;
1576b5e5868SGarrett D'Amore }
1586b5e5868SGarrett D'Amore 
1596b5e5868SGarrett D'Amore void
1606b5e5868SGarrett D'Amore add_ctype_range(int end)
1616b5e5868SGarrett D'Amore {
1626b5e5868SGarrett D'Amore 	ctype_node_t	*ctn;
1636b5e5868SGarrett D'Amore 	wchar_t		cur;
1646b5e5868SGarrett D'Amore 
1656b5e5868SGarrett D'Amore 	if (end < last_ctype) {
1666b5e5868SGarrett D'Amore 		errf(_("malformed character range (%u ... %u))"),
1676b5e5868SGarrett D'Amore 		    last_ctype, end);
1686b5e5868SGarrett D'Amore 		return;
1696b5e5868SGarrett D'Amore 	}
1706b5e5868SGarrett D'Amore 	for (cur = last_ctype + 1; cur <= end; cur++) {
1716b5e5868SGarrett D'Amore 		if ((ctn = get_ctype(cur)) == NULL) {
1726b5e5868SGarrett D'Amore 			INTERR;
1736b5e5868SGarrett D'Amore 			return;
1746b5e5868SGarrett D'Amore 		}
1756b5e5868SGarrett D'Amore 		add_ctype_impl(ctn);
1766b5e5868SGarrett D'Amore 	}
1776b5e5868SGarrett D'Amore 	last_ctype = end;
1786b5e5868SGarrett D'Amore 
1796b5e5868SGarrett D'Amore }
1806b5e5868SGarrett D'Amore 
1816b5e5868SGarrett D'Amore void
1826b5e5868SGarrett D'Amore add_caseconv(int val, int wc)
1836b5e5868SGarrett D'Amore {
1846b5e5868SGarrett D'Amore 	ctype_node_t	*ctn;
1856b5e5868SGarrett D'Amore 
1866b5e5868SGarrett D'Amore 	ctn = get_ctype(val);
1876b5e5868SGarrett D'Amore 	if (ctn == NULL) {
1886b5e5868SGarrett D'Amore 		INTERR;
1896b5e5868SGarrett D'Amore 		return;
1906b5e5868SGarrett D'Amore 	}
1916b5e5868SGarrett D'Amore 
1926b5e5868SGarrett D'Amore 	switch (last_kw) {
1936b5e5868SGarrett D'Amore 	case T_TOUPPER:
1946b5e5868SGarrett D'Amore 		ctn->toupper = wc;
1956b5e5868SGarrett D'Amore 		break;
1966b5e5868SGarrett D'Amore 	case T_TOLOWER:
1976b5e5868SGarrett D'Amore 		ctn->tolower = wc;
1986b5e5868SGarrett D'Amore 		break;
1996b5e5868SGarrett D'Amore 	default:
2006b5e5868SGarrett D'Amore 		INTERR;
2016b5e5868SGarrett D'Amore 		break;
2026b5e5868SGarrett D'Amore 	}
2036b5e5868SGarrett D'Amore }
2046b5e5868SGarrett D'Amore 
2056b5e5868SGarrett D'Amore void
2066b5e5868SGarrett D'Amore dump_ctype(void)
2076b5e5868SGarrett D'Amore {
2086b5e5868SGarrett D'Amore 	FILE		*f;
2096b5e5868SGarrett D'Amore 	_FileRuneLocale	rl;
2106b5e5868SGarrett D'Amore 	ctype_node_t	*ctn, *last_ct, *last_lo, *last_up;
2116b5e5868SGarrett D'Amore 	_FileRuneEntry	*ct = NULL;
2126b5e5868SGarrett D'Amore 	_FileRuneEntry	*lo = NULL;
2136b5e5868SGarrett D'Amore 	_FileRuneEntry	*up = NULL;
2146b5e5868SGarrett D'Amore 
2156b5e5868SGarrett D'Amore 	(void) memset(&rl, 0, sizeof (rl));
2166b5e5868SGarrett D'Amore 	last_ct = NULL;
2176b5e5868SGarrett D'Amore 	last_lo = NULL;
2186b5e5868SGarrett D'Amore 	last_up = NULL;
2196b5e5868SGarrett D'Amore 
2206b5e5868SGarrett D'Amore 	if ((f = open_category()) == NULL)
2216b5e5868SGarrett D'Amore 		return;
2226b5e5868SGarrett D'Amore 
2236b5e5868SGarrett D'Amore 	(void) memcpy(rl.magic, _FILE_RUNE_MAGIC_1, 8);
2246b5e5868SGarrett D'Amore 	(void) strncpy(rl.encoding, get_wide_encoding(), sizeof (rl.encoding));
2256b5e5868SGarrett D'Amore 
2266b5e5868SGarrett D'Amore 	for (ctn = avl_first(&ctypes); ctn; ctn = AVL_NEXT(&ctypes, ctn)) {
2276b5e5868SGarrett D'Amore 
2286b5e5868SGarrett D'Amore 		wchar_t	wc = ctn->wc;
2296b5e5868SGarrett D'Amore 		int conflict = 0;
2306b5e5868SGarrett D'Amore 
2316b5e5868SGarrett D'Amore 		/*
2326b5e5868SGarrett D'Amore 		 * POSIX requires certain portable characters have
2336b5e5868SGarrett D'Amore 		 * certain types.  Add them if they are missing.
2346b5e5868SGarrett D'Amore 		 */
2356b5e5868SGarrett D'Amore 		if ((wc >= 1) && (wc <= 127)) {
2366b5e5868SGarrett D'Amore 			if ((wc >= 'A') && (wc <= 'Z'))
2376b5e5868SGarrett D'Amore 				ctn->ctype |= _ISUPPER;
2386b5e5868SGarrett D'Amore 			if ((wc >= 'a') && (wc <= 'z'))
2396b5e5868SGarrett D'Amore 				ctn->ctype |= _ISLOWER;
2406b5e5868SGarrett D'Amore 			if ((wc >= '0') && (wc <= '9'))
2416b5e5868SGarrett D'Amore 				ctn->ctype |= _ISDIGIT;
2426b5e5868SGarrett D'Amore 			if (strchr(" \f\n\r\t\v", (char)wc) != NULL)
2436b5e5868SGarrett D'Amore 				ctn->ctype |= _ISSPACE;
2446b5e5868SGarrett D'Amore 			if (strchr("0123456789ABCDEFabcdef", (char)wc) != NULL)
2456b5e5868SGarrett D'Amore 				ctn->ctype |= _ISXDIGIT;
2466b5e5868SGarrett D'Amore 			if (strchr(" \t", (char)wc))
2476b5e5868SGarrett D'Amore 				ctn->ctype |= _ISBLANK;
248723fee08SGarrett D'Amore 
249723fee08SGarrett D'Amore 			/*
250723fee08SGarrett D'Amore 			 * Technically these settings are only
251723fee08SGarrett D'Amore 			 * required for the C locale.  However, it
252723fee08SGarrett D'Amore 			 * turns out that because of the historical
253723fee08SGarrett D'Amore 			 * version of isprint(), we need them for all
254723fee08SGarrett D'Amore 			 * locales as well.  Note that these are not
255723fee08SGarrett D'Amore 			 * necessarily valid punctation characters in
256723fee08SGarrett D'Amore 			 * the current language, but ispunct() needs
257723fee08SGarrett D'Amore 			 * to return TRUE for them.
258723fee08SGarrett D'Amore 			 */
259723fee08SGarrett D'Amore 			if (strchr("!\"'#$%&()*+,-./:;<=>?@[\\]^_`{|}~",
260723fee08SGarrett D'Amore 			    (char)wc))
261723fee08SGarrett D'Amore 				ctn->ctype |= _ISPUNCT;
2626b5e5868SGarrett D'Amore 		}
2636b5e5868SGarrett D'Amore 
2646b5e5868SGarrett D'Amore 		/*
2656b5e5868SGarrett D'Amore 		 * POSIX also requires that certain types imply
2666b5e5868SGarrett D'Amore 		 * others.  Add any inferred types here.
2676b5e5868SGarrett D'Amore 		 */
2686b5e5868SGarrett D'Amore 		if (ctn->ctype & (_ISUPPER |_ISLOWER))
2696b5e5868SGarrett D'Amore 			ctn->ctype |= _ISALPHA;
2706b5e5868SGarrett D'Amore 		if (ctn->ctype & _ISDIGIT)
2716b5e5868SGarrett D'Amore 			ctn->ctype |= _ISXDIGIT;
2726b5e5868SGarrett D'Amore 		if (ctn->ctype & _ISBLANK)
2736b5e5868SGarrett D'Amore 			ctn->ctype |= _ISSPACE;
2746b5e5868SGarrett D'Amore 		if (ctn->ctype & (_ISALPHA|_ISDIGIT|_ISXDIGIT))
2756b5e5868SGarrett D'Amore 			ctn->ctype |= _ISGRAPH;
2766b5e5868SGarrett D'Amore 		if (ctn->ctype & _ISGRAPH)
2776b5e5868SGarrett D'Amore 			ctn->ctype |= _ISPRINT;
2786b5e5868SGarrett D'Amore 
2796b5e5868SGarrett D'Amore 		/*
2806b5e5868SGarrett D'Amore 		 * Finally, POSIX requires that certain combinations
2816b5e5868SGarrett D'Amore 		 * are invalid.  We don't flag this as a fatal error,
2826b5e5868SGarrett D'Amore 		 * but we will warn about.
2836b5e5868SGarrett D'Amore 		 */
2846b5e5868SGarrett D'Amore 		if ((ctn->ctype & _ISALPHA) &&
2856b5e5868SGarrett D'Amore 		    (ctn->ctype & (_ISPUNCT|_ISDIGIT)))
2866b5e5868SGarrett D'Amore 			conflict++;
2876b5e5868SGarrett D'Amore 		if ((ctn->ctype & _ISPUNCT) &
2886b5e5868SGarrett D'Amore 		    (ctn->ctype & (_ISDIGIT|_ISALPHA|_ISXDIGIT)))
2896b5e5868SGarrett D'Amore 			conflict++;
2906b5e5868SGarrett D'Amore 		if ((ctn->ctype & _ISSPACE) && (ctn->ctype & _ISGRAPH))
2916b5e5868SGarrett D'Amore 			conflict++;
2926b5e5868SGarrett D'Amore 		if ((ctn->ctype & _ISCNTRL) & _ISPRINT)
2936b5e5868SGarrett D'Amore 			conflict++;
2946b5e5868SGarrett D'Amore 		if ((wc == ' ') && (ctn->ctype & (_ISPUNCT|_ISGRAPH)))
2956b5e5868SGarrett D'Amore 			conflict++;
2966b5e5868SGarrett D'Amore 
2976b5e5868SGarrett D'Amore 		if (conflict) {
2986b5e5868SGarrett D'Amore 			warn("conflicting classes for character 0x%x (%x)",
2996b5e5868SGarrett D'Amore 			    wc, ctn->ctype);
3006b5e5868SGarrett D'Amore 		}
3016b5e5868SGarrett D'Amore 		/*
3026b5e5868SGarrett D'Amore 		 * Handle the lower 256 characters using the simple
3036b5e5868SGarrett D'Amore 		 * optimization.  Note that if we have not defined the
3046b5e5868SGarrett D'Amore 		 * upper/lower case, then we identity map it.
3056b5e5868SGarrett D'Amore 		 */
306*5080145bSGarrett D'Amore 		if ((unsigned)wc < _CACHED_RUNES) {
3076b5e5868SGarrett D'Amore 			rl.runetype[wc] = ctn->ctype;
3086b5e5868SGarrett D'Amore 			rl.maplower[wc] = ctn->tolower ? ctn->tolower : wc;
3096b5e5868SGarrett D'Amore 			rl.mapupper[wc] = ctn->toupper ? ctn->toupper : wc;
3106b5e5868SGarrett D'Amore 			continue;
3116b5e5868SGarrett D'Amore 		}
3126b5e5868SGarrett D'Amore 
3136b5e5868SGarrett D'Amore 		if ((last_ct != NULL) && (last_ct->ctype == ctn->ctype)) {
3146b5e5868SGarrett D'Amore 			ct[rl.runetype_ext_nranges-1].max = wc;
3156b5e5868SGarrett D'Amore 			last_ct = ctn;
3166b5e5868SGarrett D'Amore 		} else {
3176b5e5868SGarrett D'Amore 			rl.runetype_ext_nranges++;
3186b5e5868SGarrett D'Amore 			ct = realloc(ct,
3196b5e5868SGarrett D'Amore 			    sizeof (*ct) * rl.runetype_ext_nranges);
3206b5e5868SGarrett D'Amore 			ct[rl.runetype_ext_nranges - 1].min = wc;
3216b5e5868SGarrett D'Amore 			ct[rl.runetype_ext_nranges - 1].max = wc;
3226b5e5868SGarrett D'Amore 			ct[rl.runetype_ext_nranges - 1].map = ctn->ctype;
3236b5e5868SGarrett D'Amore 			last_ct = ctn;
3246b5e5868SGarrett D'Amore 		}
325017c01f8SYuri Pankov 		if (ctn->tolower == 0) {
326017c01f8SYuri Pankov 			last_lo = NULL;
3276b5e5868SGarrett D'Amore 		} else if ((last_lo != NULL) &&
3286b5e5868SGarrett D'Amore 		    (last_lo->tolower + 1 == ctn->tolower)) {
3296b5e5868SGarrett D'Amore 			lo[rl.maplower_ext_nranges-1].max = wc;
3306b5e5868SGarrett D'Amore 			last_lo = ctn;
3316b5e5868SGarrett D'Amore 		} else {
3326b5e5868SGarrett D'Amore 			rl.maplower_ext_nranges++;
3336b5e5868SGarrett D'Amore 			lo = realloc(lo,
3346b5e5868SGarrett D'Amore 			    sizeof (*lo) * rl.maplower_ext_nranges);
3356b5e5868SGarrett D'Amore 			lo[rl.maplower_ext_nranges - 1].min = wc;
3366b5e5868SGarrett D'Amore 			lo[rl.maplower_ext_nranges - 1].max = wc;
3376b5e5868SGarrett D'Amore 			lo[rl.maplower_ext_nranges - 1].map = ctn->tolower;
3386b5e5868SGarrett D'Amore 			last_lo = ctn;
3396b5e5868SGarrett D'Amore 		}
3406b5e5868SGarrett D'Amore 
3416b5e5868SGarrett D'Amore 		if (ctn->toupper == 0) {
3426b5e5868SGarrett D'Amore 			last_up = NULL;
3436b5e5868SGarrett D'Amore 		} else if ((last_up != NULL) &&
3446b5e5868SGarrett D'Amore 		    (last_up->toupper + 1 == ctn->toupper)) {
3456b5e5868SGarrett D'Amore 			up[rl.mapupper_ext_nranges-1].max = wc;
3466b5e5868SGarrett D'Amore 			last_up = ctn;
3476b5e5868SGarrett D'Amore 		} else {
3486b5e5868SGarrett D'Amore 			rl.mapupper_ext_nranges++;
3496b5e5868SGarrett D'Amore 			up = realloc(up,
3506b5e5868SGarrett D'Amore 			    sizeof (*up) * rl.mapupper_ext_nranges);
3516b5e5868SGarrett D'Amore 			up[rl.mapupper_ext_nranges - 1].min = wc;
3526b5e5868SGarrett D'Amore 			up[rl.mapupper_ext_nranges - 1].max = wc;
3536b5e5868SGarrett D'Amore 			up[rl.mapupper_ext_nranges - 1].map = ctn->toupper;
3546b5e5868SGarrett D'Amore 			last_up = ctn;
3556b5e5868SGarrett D'Amore 		}
3566b5e5868SGarrett D'Amore 	}
3576b5e5868SGarrett D'Amore 
3586b5e5868SGarrett D'Amore 	if ((wr_category(&rl, sizeof (rl), f) < 0) ||
3596b5e5868SGarrett D'Amore 	    (wr_category(ct, sizeof (*ct) * rl.runetype_ext_nranges, f) < 0) ||
3606b5e5868SGarrett D'Amore 	    (wr_category(lo, sizeof (*lo) * rl.maplower_ext_nranges, f) < 0) ||
3616b5e5868SGarrett D'Amore 	    (wr_category(up, sizeof (*up) * rl.mapupper_ext_nranges, f) < 0)) {
3626b5e5868SGarrett D'Amore 		return;
3636b5e5868SGarrett D'Amore 	}
3646b5e5868SGarrett D'Amore 
3656b5e5868SGarrett D'Amore 	close_category(f);
3666b5e5868SGarrett D'Amore }
367