16b5e5868SGarrett D'Amore /*
26b5e5868SGarrett D'Amore * This file and its contents are supplied under the terms of the
36b5e5868SGarrett D'Amore * Common Development and Distribution License ("CDDL"), version 1.0.
45aec55ebSGarrett D'Amore * You may only use this file in accordance with the terms of version
55aec55ebSGarrett D'Amore * 1.0 of the CDDL.
66b5e5868SGarrett D'Amore *
76b5e5868SGarrett D'Amore * A full copy of the text of the CDDL should have accompanied this
86b5e5868SGarrett D'Amore * source. A copy of the CDDL is also available via the Internet at
96b5e5868SGarrett D'Amore * http://www.illumos.org/license/CDDL.
106b5e5868SGarrett D'Amore */
116b5e5868SGarrett D'Amore
126b5e5868SGarrett D'Amore /*
13e1508819SYuri Pankov * Copyright 2017 Nexenta Systems, Inc.
142da1cd3aSGarrett D'Amore * Copyright 2012 Garrett D'Amore <garrett@damore.org>
152da1cd3aSGarrett D'Amore * Copyright 2013 DEY Storage Systems, Inc.
166b5e5868SGarrett D'Amore */
176b5e5868SGarrett D'Amore
186b5e5868SGarrett D'Amore /*
196b5e5868SGarrett D'Amore * LC_CTYPE database generation routines for localedef.
206b5e5868SGarrett D'Amore */
216b5e5868SGarrett D'Amore
226b5e5868SGarrett D'Amore #include <stdio.h>
236b5e5868SGarrett D'Amore #include <stdlib.h>
246b5e5868SGarrett D'Amore #include <string.h>
256b5e5868SGarrett D'Amore #include <sys/types.h>
266b5e5868SGarrett D'Amore #include <sys/avl.h>
276b5e5868SGarrett D'Amore #include <wchar.h>
286b5e5868SGarrett D'Amore #include <ctype.h>
296b5e5868SGarrett D'Amore #include <wctype.h>
306b5e5868SGarrett D'Amore #include <unistd.h>
312da1cd3aSGarrett D'Amore #include "_ctype.h"
326b5e5868SGarrett D'Amore #include "localedef.h"
336b5e5868SGarrett D'Amore #include "parser.tab.h"
346b5e5868SGarrett D'Amore #include "runefile.h"
356b5e5868SGarrett D'Amore
366b5e5868SGarrett D'Amore static avl_tree_t ctypes;
376b5e5868SGarrett D'Amore
386b5e5868SGarrett D'Amore static wchar_t last_ctype;
396b5e5868SGarrett D'Amore
406b5e5868SGarrett D'Amore typedef struct ctype_node {
416b5e5868SGarrett D'Amore wchar_t wc;
426b5e5868SGarrett D'Amore int32_t ctype;
436b5e5868SGarrett D'Amore int32_t toupper;
446b5e5868SGarrett D'Amore int32_t tolower;
456b5e5868SGarrett D'Amore avl_node_t avl;
466b5e5868SGarrett D'Amore } ctype_node_t;
476b5e5868SGarrett D'Amore
482da1cd3aSGarrett D'Amore typedef struct width_node {
492da1cd3aSGarrett D'Amore wchar_t start;
502da1cd3aSGarrett D'Amore wchar_t end;
512da1cd3aSGarrett D'Amore int8_t width;
522da1cd3aSGarrett D'Amore avl_node_t avl;
532da1cd3aSGarrett D'Amore } width_node_t;
542da1cd3aSGarrett D'Amore
556b5e5868SGarrett D'Amore static int
ctype_compare(const void * n1,const void * n2)566b5e5868SGarrett D'Amore ctype_compare(const void *n1, const void *n2)
576b5e5868SGarrett D'Amore {
586b5e5868SGarrett D'Amore const ctype_node_t *c1 = n1;
596b5e5868SGarrett D'Amore const ctype_node_t *c2 = n2;
606b5e5868SGarrett D'Amore
616b5e5868SGarrett D'Amore return (c1->wc < c2->wc ? -1 : c1->wc > c2->wc ? 1 : 0);
626b5e5868SGarrett D'Amore }
636b5e5868SGarrett D'Amore
646b5e5868SGarrett D'Amore void
init_ctype(void)656b5e5868SGarrett D'Amore init_ctype(void)
666b5e5868SGarrett D'Amore {
676b5e5868SGarrett D'Amore avl_create(&ctypes, ctype_compare, sizeof (ctype_node_t),
686b5e5868SGarrett D'Amore offsetof(ctype_node_t, avl));
696b5e5868SGarrett D'Amore }
706b5e5868SGarrett D'Amore
716b5e5868SGarrett D'Amore
726b5e5868SGarrett D'Amore static void
add_ctype_impl(ctype_node_t * ctn)736b5e5868SGarrett D'Amore add_ctype_impl(ctype_node_t *ctn)
746b5e5868SGarrett D'Amore {
756b5e5868SGarrett D'Amore switch (last_kw) {
766b5e5868SGarrett D'Amore case T_ISUPPER:
776b5e5868SGarrett D'Amore ctn->ctype |= (_ISUPPER | _ISALPHA | _ISGRAPH | _ISPRINT);
786b5e5868SGarrett D'Amore break;
796b5e5868SGarrett D'Amore case T_ISLOWER:
806b5e5868SGarrett D'Amore ctn->ctype |= (_ISLOWER | _ISALPHA | _ISGRAPH | _ISPRINT);
816b5e5868SGarrett D'Amore break;
826b5e5868SGarrett D'Amore case T_ISALPHA:
836b5e5868SGarrett D'Amore ctn->ctype |= (_ISALPHA | _ISGRAPH | _ISPRINT);
846b5e5868SGarrett D'Amore break;
856b5e5868SGarrett D'Amore case T_ISDIGIT:
866b5e5868SGarrett D'Amore ctn->ctype |= (_ISDIGIT | _ISGRAPH | _ISPRINT | _ISXDIGIT);
876b5e5868SGarrett D'Amore break;
886b5e5868SGarrett D'Amore case T_ISSPACE:
896b5e5868SGarrett D'Amore ctn->ctype |= _ISSPACE;
906b5e5868SGarrett D'Amore break;
916b5e5868SGarrett D'Amore case T_ISCNTRL:
926b5e5868SGarrett D'Amore ctn->ctype |= _ISCNTRL;
936b5e5868SGarrett D'Amore break;
946b5e5868SGarrett D'Amore case T_ISGRAPH:
956b5e5868SGarrett D'Amore ctn->ctype |= (_ISGRAPH | _ISPRINT);
966b5e5868SGarrett D'Amore break;
976b5e5868SGarrett D'Amore case T_ISPRINT:
986b5e5868SGarrett D'Amore ctn->ctype |= _ISPRINT;
996b5e5868SGarrett D'Amore break;
1006b5e5868SGarrett D'Amore case T_ISPUNCT:
1016b5e5868SGarrett D'Amore ctn->ctype |= (_ISPUNCT | _ISGRAPH | _ISPRINT);
1026b5e5868SGarrett D'Amore break;
1036b5e5868SGarrett D'Amore case T_ISXDIGIT:
1046b5e5868SGarrett D'Amore ctn->ctype |= (_ISXDIGIT | _ISPRINT);
1056b5e5868SGarrett D'Amore break;
1066b5e5868SGarrett D'Amore case T_ISBLANK:
1076b5e5868SGarrett D'Amore ctn->ctype |= (_ISBLANK | _ISSPACE);
1086b5e5868SGarrett D'Amore break;
1096b5e5868SGarrett D'Amore case T_ISPHONOGRAM:
1106b5e5868SGarrett D'Amore ctn->ctype |= (_E1 | _ISPRINT | _ISGRAPH);
1116b5e5868SGarrett D'Amore break;
1126b5e5868SGarrett D'Amore case T_ISIDEOGRAM:
1136b5e5868SGarrett D'Amore ctn->ctype |= (_E2 | _ISPRINT | _ISGRAPH);
1146b5e5868SGarrett D'Amore break;
1156b5e5868SGarrett D'Amore case T_ISENGLISH:
1166b5e5868SGarrett D'Amore ctn->ctype |= (_E3 | _ISPRINT | _ISGRAPH);
1176b5e5868SGarrett D'Amore break;
1186b5e5868SGarrett D'Amore case T_ISNUMBER:
1196b5e5868SGarrett D'Amore ctn->ctype |= (_E4 | _ISPRINT | _ISGRAPH);
1206b5e5868SGarrett D'Amore break;
1216b5e5868SGarrett D'Amore case T_ISSPECIAL:
1226b5e5868SGarrett D'Amore ctn->ctype |= (_E5 | _ISPRINT | _ISGRAPH);
1236b5e5868SGarrett D'Amore break;
1246b5e5868SGarrett D'Amore case T_ISALNUM:
1256b5e5868SGarrett D'Amore /*
1266b5e5868SGarrett D'Amore * We can't do anything with this. The character
1276b5e5868SGarrett D'Amore * should already be specified as a digit or alpha.
1286b5e5868SGarrett D'Amore */
1296b5e5868SGarrett D'Amore break;
1306b5e5868SGarrett D'Amore default:
1316b5e5868SGarrett D'Amore errf(_("not a valid character class"));
1326b5e5868SGarrett D'Amore }
1336b5e5868SGarrett D'Amore }
1346b5e5868SGarrett D'Amore
1356b5e5868SGarrett D'Amore static ctype_node_t *
get_ctype(wchar_t wc)1366b5e5868SGarrett D'Amore get_ctype(wchar_t wc)
1376b5e5868SGarrett D'Amore {
1386b5e5868SGarrett D'Amore ctype_node_t srch;
1396b5e5868SGarrett D'Amore ctype_node_t *ctn;
1406b5e5868SGarrett D'Amore avl_index_t where;
1416b5e5868SGarrett D'Amore
1426b5e5868SGarrett D'Amore srch.wc = wc;
1436b5e5868SGarrett D'Amore if ((ctn = avl_find(&ctypes, &srch, &where)) == NULL) {
1446b5e5868SGarrett D'Amore if ((ctn = calloc(1, sizeof (*ctn))) == NULL) {
1456b5e5868SGarrett D'Amore errf(_("out of memory"));
1466b5e5868SGarrett D'Amore return (NULL);
1476b5e5868SGarrett D'Amore }
1486b5e5868SGarrett D'Amore ctn->wc = wc;
1496b5e5868SGarrett D'Amore
1506b5e5868SGarrett D'Amore avl_insert(&ctypes, ctn, where);
1516b5e5868SGarrett D'Amore }
1526b5e5868SGarrett D'Amore return (ctn);
1536b5e5868SGarrett D'Amore }
1546b5e5868SGarrett D'Amore
1556b5e5868SGarrett D'Amore void
add_ctype(int val)1566b5e5868SGarrett D'Amore add_ctype(int val)
1576b5e5868SGarrett D'Amore {
1586b5e5868SGarrett D'Amore ctype_node_t *ctn;
1596b5e5868SGarrett D'Amore
1606b5e5868SGarrett D'Amore if ((ctn = get_ctype(val)) == NULL) {
1616b5e5868SGarrett D'Amore INTERR;
1626b5e5868SGarrett D'Amore return;
1636b5e5868SGarrett D'Amore }
1646b5e5868SGarrett D'Amore add_ctype_impl(ctn);
1656b5e5868SGarrett D'Amore last_ctype = ctn->wc;
1666b5e5868SGarrett D'Amore }
1676b5e5868SGarrett D'Amore
1686b5e5868SGarrett D'Amore void
add_ctype_range(wchar_t end)1697262c8a6SYuri Pankov add_ctype_range(wchar_t end)
1706b5e5868SGarrett D'Amore {
1716b5e5868SGarrett D'Amore ctype_node_t *ctn;
1726b5e5868SGarrett D'Amore wchar_t cur;
1736b5e5868SGarrett D'Amore
1746b5e5868SGarrett D'Amore if (end < last_ctype) {
1756b5e5868SGarrett D'Amore errf(_("malformed character range (%u ... %u))"),
1766b5e5868SGarrett D'Amore last_ctype, end);
1776b5e5868SGarrett D'Amore return;
1786b5e5868SGarrett D'Amore }
1796b5e5868SGarrett D'Amore for (cur = last_ctype + 1; cur <= end; cur++) {
1806b5e5868SGarrett D'Amore if ((ctn = get_ctype(cur)) == NULL) {
1816b5e5868SGarrett D'Amore INTERR;
1826b5e5868SGarrett D'Amore return;
1836b5e5868SGarrett D'Amore }
1846b5e5868SGarrett D'Amore add_ctype_impl(ctn);
1856b5e5868SGarrett D'Amore }
1866b5e5868SGarrett D'Amore last_ctype = end;
1876b5e5868SGarrett D'Amore
1886b5e5868SGarrett D'Amore }
1896b5e5868SGarrett D'Amore
1902da1cd3aSGarrett D'Amore /*
1912da1cd3aSGarrett D'Amore * A word about widths: if the width mask is specified, then libc
1922da1cd3aSGarrett D'Amore * unconditionally honors it. Otherwise, it assumes printable
1932da1cd3aSGarrett D'Amore * characters have width 1, and non-printable characters have width
1942da1cd3aSGarrett D'Amore * -1 (except for NULL which is special with with 0). Hence, we have
1952da1cd3aSGarrett D'Amore * no need to inject defaults here -- the "default" unset value of 0
1962da1cd3aSGarrett D'Amore * indicates that libc should use its own logic in wcwidth as described.
1972da1cd3aSGarrett D'Amore */
1982da1cd3aSGarrett D'Amore void
add_width(int wc,int width)1992da1cd3aSGarrett D'Amore add_width(int wc, int width)
2002da1cd3aSGarrett D'Amore {
2012da1cd3aSGarrett D'Amore ctype_node_t *ctn;
2022da1cd3aSGarrett D'Amore
2032da1cd3aSGarrett D'Amore if ((ctn = get_ctype(wc)) == NULL) {
2042da1cd3aSGarrett D'Amore INTERR;
2052da1cd3aSGarrett D'Amore return;
2062da1cd3aSGarrett D'Amore }
2072da1cd3aSGarrett D'Amore ctn->ctype &= ~(_CTYPE_SWM);
2082da1cd3aSGarrett D'Amore switch (width) {
2092da1cd3aSGarrett D'Amore case 0:
2102da1cd3aSGarrett D'Amore ctn->ctype |= _CTYPE_SW0;
2112da1cd3aSGarrett D'Amore break;
2122da1cd3aSGarrett D'Amore case 1:
2132da1cd3aSGarrett D'Amore ctn->ctype |= _CTYPE_SW1;
2142da1cd3aSGarrett D'Amore break;
2152da1cd3aSGarrett D'Amore case 2:
2162da1cd3aSGarrett D'Amore ctn->ctype |= _CTYPE_SW2;
2172da1cd3aSGarrett D'Amore break;
2182da1cd3aSGarrett D'Amore case 3:
2192da1cd3aSGarrett D'Amore ctn->ctype |= _CTYPE_SW3;
2202da1cd3aSGarrett D'Amore break;
2212da1cd3aSGarrett D'Amore }
2222da1cd3aSGarrett D'Amore }
2232da1cd3aSGarrett D'Amore
2242da1cd3aSGarrett D'Amore void
add_width_range(int start,int end,int width)2252da1cd3aSGarrett D'Amore add_width_range(int start, int end, int width)
2262da1cd3aSGarrett D'Amore {
2272da1cd3aSGarrett D'Amore for (; start <= end; start++) {
2282da1cd3aSGarrett D'Amore add_width(start, width);
2292da1cd3aSGarrett D'Amore }
2302da1cd3aSGarrett D'Amore }
2312da1cd3aSGarrett D'Amore
2326b5e5868SGarrett D'Amore void
add_caseconv(int val,int wc)2336b5e5868SGarrett D'Amore add_caseconv(int val, int wc)
2346b5e5868SGarrett D'Amore {
2356b5e5868SGarrett D'Amore ctype_node_t *ctn;
2366b5e5868SGarrett D'Amore
2376b5e5868SGarrett D'Amore ctn = get_ctype(val);
2386b5e5868SGarrett D'Amore if (ctn == NULL) {
2396b5e5868SGarrett D'Amore INTERR;
2406b5e5868SGarrett D'Amore return;
2416b5e5868SGarrett D'Amore }
2426b5e5868SGarrett D'Amore
2436b5e5868SGarrett D'Amore switch (last_kw) {
2446b5e5868SGarrett D'Amore case T_TOUPPER:
2456b5e5868SGarrett D'Amore ctn->toupper = wc;
2466b5e5868SGarrett D'Amore break;
2476b5e5868SGarrett D'Amore case T_TOLOWER:
2486b5e5868SGarrett D'Amore ctn->tolower = wc;
2496b5e5868SGarrett D'Amore break;
2506b5e5868SGarrett D'Amore default:
2516b5e5868SGarrett D'Amore INTERR;
2526b5e5868SGarrett D'Amore break;
2536b5e5868SGarrett D'Amore }
2546b5e5868SGarrett D'Amore }
2556b5e5868SGarrett D'Amore
2566b5e5868SGarrett D'Amore void
dump_ctype(void)2576b5e5868SGarrett D'Amore dump_ctype(void)
2586b5e5868SGarrett D'Amore {
2596b5e5868SGarrett D'Amore FILE *f;
2606b5e5868SGarrett D'Amore _FileRuneLocale rl;
2616b5e5868SGarrett D'Amore ctype_node_t *ctn, *last_ct, *last_lo, *last_up;
2626b5e5868SGarrett D'Amore _FileRuneEntry *ct = NULL;
2636b5e5868SGarrett D'Amore _FileRuneEntry *lo = NULL;
2646b5e5868SGarrett D'Amore _FileRuneEntry *up = NULL;
2656125cca6SDavid Höppner wchar_t wc;
2666b5e5868SGarrett D'Amore
2676b5e5868SGarrett D'Amore (void) memset(&rl, 0, sizeof (rl));
2686b5e5868SGarrett D'Amore last_ct = NULL;
2696b5e5868SGarrett D'Amore last_lo = NULL;
2706b5e5868SGarrett D'Amore last_up = NULL;
2716b5e5868SGarrett D'Amore
2726b5e5868SGarrett D'Amore if ((f = open_category()) == NULL)
2736b5e5868SGarrett D'Amore return;
2746b5e5868SGarrett D'Amore
2756b5e5868SGarrett D'Amore (void) memcpy(rl.magic, _FILE_RUNE_MAGIC_1, 8);
276e1508819SYuri Pankov (void) strlcpy(rl.encoding, get_wide_encoding(), sizeof (rl.encoding));
2776b5e5868SGarrett D'Amore
2786125cca6SDavid Höppner /*
2796125cca6SDavid Höppner * Initialize the identity map.
2806125cca6SDavid Höppner */
2816125cca6SDavid Höppner for (wc = 0; (unsigned)wc < _CACHED_RUNES; wc++) {
2826125cca6SDavid Höppner rl.maplower[wc] = wc;
2836125cca6SDavid Höppner rl.mapupper[wc] = wc;
2846125cca6SDavid Höppner }
2856b5e5868SGarrett D'Amore
2866125cca6SDavid Höppner for (ctn = avl_first(&ctypes); ctn; ctn = AVL_NEXT(&ctypes, ctn)) {
2876b5e5868SGarrett D'Amore int conflict = 0;
2886b5e5868SGarrett D'Amore
2892da1cd3aSGarrett D'Amore
2906125cca6SDavid Höppner wc = ctn->wc;
2916125cca6SDavid Höppner
2926b5e5868SGarrett D'Amore /*
2936b5e5868SGarrett D'Amore * POSIX requires certain portable characters have
2946b5e5868SGarrett D'Amore * certain types. Add them if they are missing.
2956b5e5868SGarrett D'Amore */
2966b5e5868SGarrett D'Amore if ((wc >= 1) && (wc <= 127)) {
2976b5e5868SGarrett D'Amore if ((wc >= 'A') && (wc <= 'Z'))
2986b5e5868SGarrett D'Amore ctn->ctype |= _ISUPPER;
2996b5e5868SGarrett D'Amore if ((wc >= 'a') && (wc <= 'z'))
3006b5e5868SGarrett D'Amore ctn->ctype |= _ISLOWER;
3016b5e5868SGarrett D'Amore if ((wc >= '0') && (wc <= '9'))
3026b5e5868SGarrett D'Amore ctn->ctype |= _ISDIGIT;
3035a4ef21aSLauri Tirkkonen if (wc == ' ')
3045a4ef21aSLauri Tirkkonen ctn->ctype |= _ISPRINT;
3056b5e5868SGarrett D'Amore if (strchr(" \f\n\r\t\v", (char)wc) != NULL)
3066b5e5868SGarrett D'Amore ctn->ctype |= _ISSPACE;
3076b5e5868SGarrett D'Amore if (strchr("0123456789ABCDEFabcdef", (char)wc) != NULL)
3086b5e5868SGarrett D'Amore ctn->ctype |= _ISXDIGIT;
3096b5e5868SGarrett D'Amore if (strchr(" \t", (char)wc))
3106b5e5868SGarrett D'Amore ctn->ctype |= _ISBLANK;
311723fee08SGarrett D'Amore
312723fee08SGarrett D'Amore /*
313723fee08SGarrett D'Amore * Technically these settings are only
314723fee08SGarrett D'Amore * required for the C locale. However, it
315723fee08SGarrett D'Amore * turns out that because of the historical
316723fee08SGarrett D'Amore * version of isprint(), we need them for all
317723fee08SGarrett D'Amore * locales as well. Note that these are not
318723fee08SGarrett D'Amore * necessarily valid punctation characters in
319723fee08SGarrett D'Amore * the current language, but ispunct() needs
320723fee08SGarrett D'Amore * to return TRUE for them.
321723fee08SGarrett D'Amore */
322723fee08SGarrett D'Amore if (strchr("!\"'#$%&()*+,-./:;<=>?@[\\]^_`{|}~",
323723fee08SGarrett D'Amore (char)wc))
324723fee08SGarrett D'Amore ctn->ctype |= _ISPUNCT;
3256b5e5868SGarrett D'Amore }
3266b5e5868SGarrett D'Amore
3276b5e5868SGarrett D'Amore /*
3286b5e5868SGarrett D'Amore * POSIX also requires that certain types imply
3296b5e5868SGarrett D'Amore * others. Add any inferred types here.
3306b5e5868SGarrett D'Amore */
3316b5e5868SGarrett D'Amore if (ctn->ctype & (_ISUPPER |_ISLOWER))
3326b5e5868SGarrett D'Amore ctn->ctype |= _ISALPHA;
3336b5e5868SGarrett D'Amore if (ctn->ctype & _ISDIGIT)
3346b5e5868SGarrett D'Amore ctn->ctype |= _ISXDIGIT;
3356b5e5868SGarrett D'Amore if (ctn->ctype & _ISBLANK)
3366b5e5868SGarrett D'Amore ctn->ctype |= _ISSPACE;
3376b5e5868SGarrett D'Amore if (ctn->ctype & (_ISALPHA|_ISDIGIT|_ISXDIGIT))
3386b5e5868SGarrett D'Amore ctn->ctype |= _ISGRAPH;
3396b5e5868SGarrett D'Amore if (ctn->ctype & _ISGRAPH)
3406b5e5868SGarrett D'Amore ctn->ctype |= _ISPRINT;
3416b5e5868SGarrett D'Amore
3426b5e5868SGarrett D'Amore /*
3436b5e5868SGarrett D'Amore * Finally, POSIX requires that certain combinations
3446b5e5868SGarrett D'Amore * are invalid. We don't flag this as a fatal error,
3456b5e5868SGarrett D'Amore * but we will warn about.
3466b5e5868SGarrett D'Amore */
3476b5e5868SGarrett D'Amore if ((ctn->ctype & _ISALPHA) &&
3486b5e5868SGarrett D'Amore (ctn->ctype & (_ISPUNCT|_ISDIGIT)))
3496b5e5868SGarrett D'Amore conflict++;
350e1508819SYuri Pankov if ((ctn->ctype & _ISPUNCT) &&
3516b5e5868SGarrett D'Amore (ctn->ctype & (_ISDIGIT|_ISALPHA|_ISXDIGIT)))
3526b5e5868SGarrett D'Amore conflict++;
3536b5e5868SGarrett D'Amore if ((ctn->ctype & _ISSPACE) && (ctn->ctype & _ISGRAPH))
3546b5e5868SGarrett D'Amore conflict++;
355e1508819SYuri Pankov if ((ctn->ctype & _ISCNTRL) && (ctn->ctype & _ISPRINT))
3566b5e5868SGarrett D'Amore conflict++;
3576b5e5868SGarrett D'Amore if ((wc == ' ') && (ctn->ctype & (_ISPUNCT|_ISGRAPH)))
3586b5e5868SGarrett D'Amore conflict++;
3596b5e5868SGarrett D'Amore
3606b5e5868SGarrett D'Amore if (conflict) {
3616b5e5868SGarrett D'Amore warn("conflicting classes for character 0x%x (%x)",
3626b5e5868SGarrett D'Amore wc, ctn->ctype);
3636b5e5868SGarrett D'Amore }
364*6cf13876SYuri Pankov
3656b5e5868SGarrett D'Amore /*
3666b5e5868SGarrett D'Amore * Handle the lower 256 characters using the simple
3676b5e5868SGarrett D'Amore * optimization. Note that if we have not defined the
3686b5e5868SGarrett D'Amore * upper/lower case, then we identity map it.
3696b5e5868SGarrett D'Amore */
3705080145bSGarrett D'Amore if ((unsigned)wc < _CACHED_RUNES) {
3716b5e5868SGarrett D'Amore rl.runetype[wc] = ctn->ctype;
3726125cca6SDavid Höppner if (ctn->tolower)
3736125cca6SDavid Höppner rl.maplower[wc] = ctn->tolower;
3746125cca6SDavid Höppner if (ctn->toupper)
3756125cca6SDavid Höppner rl.mapupper[wc] = ctn->toupper;
3766b5e5868SGarrett D'Amore continue;
3776b5e5868SGarrett D'Amore }
3786b5e5868SGarrett D'Amore
3797262c8a6SYuri Pankov if ((last_ct != NULL) && (last_ct->ctype == ctn->ctype) &&
3807262c8a6SYuri Pankov (last_ct->wc + 1 == wc)) {
3816b5e5868SGarrett D'Amore ct[rl.runetype_ext_nranges-1].max = wc;
3826b5e5868SGarrett D'Amore } else {
3836b5e5868SGarrett D'Amore rl.runetype_ext_nranges++;
3846b5e5868SGarrett D'Amore ct = realloc(ct,
3856b5e5868SGarrett D'Amore sizeof (*ct) * rl.runetype_ext_nranges);
386e1508819SYuri Pankov if (ct == NULL)
387e1508819SYuri Pankov goto fail;
3886b5e5868SGarrett D'Amore ct[rl.runetype_ext_nranges - 1].min = wc;
3896b5e5868SGarrett D'Amore ct[rl.runetype_ext_nranges - 1].max = wc;
3906b5e5868SGarrett D'Amore ct[rl.runetype_ext_nranges - 1].map = ctn->ctype;
3916b5e5868SGarrett D'Amore }
3927262c8a6SYuri Pankov last_ct = ctn;
393017c01f8SYuri Pankov if (ctn->tolower == 0) {
394017c01f8SYuri Pankov last_lo = NULL;
3956b5e5868SGarrett D'Amore } else if ((last_lo != NULL) &&
3966b5e5868SGarrett D'Amore (last_lo->tolower + 1 == ctn->tolower)) {
3976b5e5868SGarrett D'Amore lo[rl.maplower_ext_nranges-1].max = wc;
3986b5e5868SGarrett D'Amore last_lo = ctn;
3996b5e5868SGarrett D'Amore } else {
4006b5e5868SGarrett D'Amore rl.maplower_ext_nranges++;
4016b5e5868SGarrett D'Amore lo = realloc(lo,
4026b5e5868SGarrett D'Amore sizeof (*lo) * rl.maplower_ext_nranges);
403e1508819SYuri Pankov if (lo == NULL)
404e1508819SYuri Pankov goto fail;
4056b5e5868SGarrett D'Amore lo[rl.maplower_ext_nranges - 1].min = wc;
4066b5e5868SGarrett D'Amore lo[rl.maplower_ext_nranges - 1].max = wc;
4076b5e5868SGarrett D'Amore lo[rl.maplower_ext_nranges - 1].map = ctn->tolower;
4086b5e5868SGarrett D'Amore last_lo = ctn;
4096b5e5868SGarrett D'Amore }
4106b5e5868SGarrett D'Amore
4116b5e5868SGarrett D'Amore if (ctn->toupper == 0) {
4126b5e5868SGarrett D'Amore last_up = NULL;
4136b5e5868SGarrett D'Amore } else if ((last_up != NULL) &&
4146b5e5868SGarrett D'Amore (last_up->toupper + 1 == ctn->toupper)) {
4156b5e5868SGarrett D'Amore up[rl.mapupper_ext_nranges-1].max = wc;
4166b5e5868SGarrett D'Amore last_up = ctn;
4176b5e5868SGarrett D'Amore } else {
4186b5e5868SGarrett D'Amore rl.mapupper_ext_nranges++;
4196b5e5868SGarrett D'Amore up = realloc(up,
4206b5e5868SGarrett D'Amore sizeof (*up) * rl.mapupper_ext_nranges);
421e1508819SYuri Pankov if (up == NULL)
422e1508819SYuri Pankov goto fail;
4236b5e5868SGarrett D'Amore up[rl.mapupper_ext_nranges - 1].min = wc;
4246b5e5868SGarrett D'Amore up[rl.mapupper_ext_nranges - 1].max = wc;
4256b5e5868SGarrett D'Amore up[rl.mapupper_ext_nranges - 1].map = ctn->toupper;
4266b5e5868SGarrett D'Amore last_up = ctn;
4276b5e5868SGarrett D'Amore }
4286b5e5868SGarrett D'Amore }
4296b5e5868SGarrett D'Amore
430e1508819SYuri Pankov if ((wr_category(&rl, sizeof (rl), f) == 0) &&
431e1508819SYuri Pankov (wr_category(ct, sizeof (*ct) * rl.runetype_ext_nranges, f) == 0) &&
432e1508819SYuri Pankov (wr_category(lo, sizeof (*lo) * rl.maplower_ext_nranges, f) == 0) &&
433e1508819SYuri Pankov (wr_category(up, sizeof (*up) * rl.mapupper_ext_nranges, f) == 0)) {
434e1508819SYuri Pankov close_category(f);
435e1508819SYuri Pankov goto out;
4366b5e5868SGarrett D'Amore }
4376b5e5868SGarrett D'Amore
438e1508819SYuri Pankov fail:
439e1508819SYuri Pankov delete_category(f);
440e1508819SYuri Pankov out:
441e1508819SYuri Pankov free(ct);
442e1508819SYuri Pankov free(lo);
443e1508819SYuri Pankov free(up);
4446b5e5868SGarrett D'Amore }
445