16b5e5868SGarrett D'Amore /* 26b5e5868SGarrett D'Amore * This file and its contents are supplied under the terms of the 36b5e5868SGarrett D'Amore * Common Development and Distribution License ("CDDL"), version 1.0. 45aec55ebSGarrett D'Amore * You may only use this file in accordance with the terms of version 55aec55ebSGarrett D'Amore * 1.0 of the CDDL. 66b5e5868SGarrett D'Amore * 76b5e5868SGarrett D'Amore * A full copy of the text of the CDDL should have accompanied this 86b5e5868SGarrett D'Amore * source. A copy of the CDDL is also available via the Internet at 96b5e5868SGarrett D'Amore * http://www.illumos.org/license/CDDL. 106b5e5868SGarrett D'Amore */ 116b5e5868SGarrett D'Amore 126b5e5868SGarrett D'Amore /* 13e1508819SYuri Pankov * Copyright 2017 Nexenta Systems, Inc. 142da1cd3aSGarrett D'Amore * Copyright 2012 Garrett D'Amore <garrett@damore.org> 152da1cd3aSGarrett D'Amore * Copyright 2013 DEY Storage Systems, Inc. 166b5e5868SGarrett D'Amore */ 176b5e5868SGarrett D'Amore 186b5e5868SGarrett D'Amore /* 196b5e5868SGarrett D'Amore * LC_CTYPE database generation routines for localedef. 206b5e5868SGarrett D'Amore */ 216b5e5868SGarrett D'Amore 226b5e5868SGarrett D'Amore #include <stdio.h> 236b5e5868SGarrett D'Amore #include <stdlib.h> 246b5e5868SGarrett D'Amore #include <string.h> 256b5e5868SGarrett D'Amore #include <sys/types.h> 266b5e5868SGarrett D'Amore #include <sys/avl.h> 276b5e5868SGarrett D'Amore #include <wchar.h> 286b5e5868SGarrett D'Amore #include <ctype.h> 296b5e5868SGarrett D'Amore #include <wctype.h> 306b5e5868SGarrett D'Amore #include <unistd.h> 312da1cd3aSGarrett D'Amore #include "_ctype.h" 326b5e5868SGarrett D'Amore #include "localedef.h" 336b5e5868SGarrett D'Amore #include "parser.tab.h" 346b5e5868SGarrett D'Amore #include "runefile.h" 356b5e5868SGarrett D'Amore 366b5e5868SGarrett D'Amore static avl_tree_t ctypes; 376b5e5868SGarrett D'Amore 386b5e5868SGarrett D'Amore static wchar_t last_ctype; 396b5e5868SGarrett D'Amore 406b5e5868SGarrett D'Amore typedef struct ctype_node { 416b5e5868SGarrett D'Amore wchar_t wc; 426b5e5868SGarrett D'Amore int32_t ctype; 436b5e5868SGarrett D'Amore int32_t toupper; 446b5e5868SGarrett D'Amore int32_t tolower; 456b5e5868SGarrett D'Amore avl_node_t avl; 466b5e5868SGarrett D'Amore } ctype_node_t; 476b5e5868SGarrett D'Amore 482da1cd3aSGarrett D'Amore typedef struct width_node { 492da1cd3aSGarrett D'Amore wchar_t start; 502da1cd3aSGarrett D'Amore wchar_t end; 512da1cd3aSGarrett D'Amore int8_t width; 522da1cd3aSGarrett D'Amore avl_node_t avl; 532da1cd3aSGarrett D'Amore } width_node_t; 542da1cd3aSGarrett D'Amore 556b5e5868SGarrett D'Amore static int 566b5e5868SGarrett D'Amore ctype_compare(const void *n1, const void *n2) 576b5e5868SGarrett D'Amore { 586b5e5868SGarrett D'Amore const ctype_node_t *c1 = n1; 596b5e5868SGarrett D'Amore const ctype_node_t *c2 = n2; 606b5e5868SGarrett D'Amore 616b5e5868SGarrett D'Amore return (c1->wc < c2->wc ? -1 : c1->wc > c2->wc ? 1 : 0); 626b5e5868SGarrett D'Amore } 636b5e5868SGarrett D'Amore 646b5e5868SGarrett D'Amore void 656b5e5868SGarrett D'Amore init_ctype(void) 666b5e5868SGarrett D'Amore { 676b5e5868SGarrett D'Amore avl_create(&ctypes, ctype_compare, sizeof (ctype_node_t), 686b5e5868SGarrett D'Amore offsetof(ctype_node_t, avl)); 696b5e5868SGarrett D'Amore } 706b5e5868SGarrett D'Amore 716b5e5868SGarrett D'Amore 726b5e5868SGarrett D'Amore static void 736b5e5868SGarrett D'Amore add_ctype_impl(ctype_node_t *ctn) 746b5e5868SGarrett D'Amore { 756b5e5868SGarrett D'Amore switch (last_kw) { 766b5e5868SGarrett D'Amore case T_ISUPPER: 776b5e5868SGarrett D'Amore ctn->ctype |= (_ISUPPER | _ISALPHA | _ISGRAPH | _ISPRINT); 786b5e5868SGarrett D'Amore break; 796b5e5868SGarrett D'Amore case T_ISLOWER: 806b5e5868SGarrett D'Amore ctn->ctype |= (_ISLOWER | _ISALPHA | _ISGRAPH | _ISPRINT); 816b5e5868SGarrett D'Amore break; 826b5e5868SGarrett D'Amore case T_ISALPHA: 836b5e5868SGarrett D'Amore ctn->ctype |= (_ISALPHA | _ISGRAPH | _ISPRINT); 846b5e5868SGarrett D'Amore break; 856b5e5868SGarrett D'Amore case T_ISDIGIT: 866b5e5868SGarrett D'Amore ctn->ctype |= (_ISDIGIT | _ISGRAPH | _ISPRINT | _ISXDIGIT); 876b5e5868SGarrett D'Amore break; 886b5e5868SGarrett D'Amore case T_ISSPACE: 896b5e5868SGarrett D'Amore ctn->ctype |= _ISSPACE; 906b5e5868SGarrett D'Amore break; 916b5e5868SGarrett D'Amore case T_ISCNTRL: 926b5e5868SGarrett D'Amore ctn->ctype |= _ISCNTRL; 936b5e5868SGarrett D'Amore break; 946b5e5868SGarrett D'Amore case T_ISGRAPH: 956b5e5868SGarrett D'Amore ctn->ctype |= (_ISGRAPH | _ISPRINT); 966b5e5868SGarrett D'Amore break; 976b5e5868SGarrett D'Amore case T_ISPRINT: 986b5e5868SGarrett D'Amore ctn->ctype |= _ISPRINT; 996b5e5868SGarrett D'Amore break; 1006b5e5868SGarrett D'Amore case T_ISPUNCT: 1016b5e5868SGarrett D'Amore ctn->ctype |= (_ISPUNCT | _ISGRAPH | _ISPRINT); 1026b5e5868SGarrett D'Amore break; 1036b5e5868SGarrett D'Amore case T_ISXDIGIT: 1046b5e5868SGarrett D'Amore ctn->ctype |= (_ISXDIGIT | _ISPRINT); 1056b5e5868SGarrett D'Amore break; 1066b5e5868SGarrett D'Amore case T_ISBLANK: 1076b5e5868SGarrett D'Amore ctn->ctype |= (_ISBLANK | _ISSPACE); 1086b5e5868SGarrett D'Amore break; 1096b5e5868SGarrett D'Amore case T_ISPHONOGRAM: 1106b5e5868SGarrett D'Amore ctn->ctype |= (_E1 | _ISPRINT | _ISGRAPH); 1116b5e5868SGarrett D'Amore break; 1126b5e5868SGarrett D'Amore case T_ISIDEOGRAM: 1136b5e5868SGarrett D'Amore ctn->ctype |= (_E2 | _ISPRINT | _ISGRAPH); 1146b5e5868SGarrett D'Amore break; 1156b5e5868SGarrett D'Amore case T_ISENGLISH: 1166b5e5868SGarrett D'Amore ctn->ctype |= (_E3 | _ISPRINT | _ISGRAPH); 1176b5e5868SGarrett D'Amore break; 1186b5e5868SGarrett D'Amore case T_ISNUMBER: 1196b5e5868SGarrett D'Amore ctn->ctype |= (_E4 | _ISPRINT | _ISGRAPH); 1206b5e5868SGarrett D'Amore break; 1216b5e5868SGarrett D'Amore case T_ISSPECIAL: 1226b5e5868SGarrett D'Amore ctn->ctype |= (_E5 | _ISPRINT | _ISGRAPH); 1236b5e5868SGarrett D'Amore break; 1246b5e5868SGarrett D'Amore case T_ISALNUM: 1256b5e5868SGarrett D'Amore /* 1266b5e5868SGarrett D'Amore * We can't do anything with this. The character 1276b5e5868SGarrett D'Amore * should already be specified as a digit or alpha. 1286b5e5868SGarrett D'Amore */ 1296b5e5868SGarrett D'Amore break; 1306b5e5868SGarrett D'Amore default: 1316b5e5868SGarrett D'Amore errf(_("not a valid character class")); 1326b5e5868SGarrett D'Amore } 1336b5e5868SGarrett D'Amore } 1346b5e5868SGarrett D'Amore 1356b5e5868SGarrett D'Amore static ctype_node_t * 1366b5e5868SGarrett D'Amore get_ctype(wchar_t wc) 1376b5e5868SGarrett D'Amore { 1386b5e5868SGarrett D'Amore ctype_node_t srch; 1396b5e5868SGarrett D'Amore ctype_node_t *ctn; 1406b5e5868SGarrett D'Amore avl_index_t where; 1416b5e5868SGarrett D'Amore 1426b5e5868SGarrett D'Amore srch.wc = wc; 1436b5e5868SGarrett D'Amore if ((ctn = avl_find(&ctypes, &srch, &where)) == NULL) { 1446b5e5868SGarrett D'Amore if ((ctn = calloc(1, sizeof (*ctn))) == NULL) { 1456b5e5868SGarrett D'Amore errf(_("out of memory")); 1466b5e5868SGarrett D'Amore return (NULL); 1476b5e5868SGarrett D'Amore } 1486b5e5868SGarrett D'Amore ctn->wc = wc; 1496b5e5868SGarrett D'Amore 1506b5e5868SGarrett D'Amore avl_insert(&ctypes, ctn, where); 1516b5e5868SGarrett D'Amore } 1526b5e5868SGarrett D'Amore return (ctn); 1536b5e5868SGarrett D'Amore } 1546b5e5868SGarrett D'Amore 1556b5e5868SGarrett D'Amore void 1566b5e5868SGarrett D'Amore add_ctype(int val) 1576b5e5868SGarrett D'Amore { 1586b5e5868SGarrett D'Amore ctype_node_t *ctn; 1596b5e5868SGarrett D'Amore 1606b5e5868SGarrett D'Amore if ((ctn = get_ctype(val)) == NULL) { 1616b5e5868SGarrett D'Amore INTERR; 1626b5e5868SGarrett D'Amore return; 1636b5e5868SGarrett D'Amore } 1646b5e5868SGarrett D'Amore add_ctype_impl(ctn); 1656b5e5868SGarrett D'Amore last_ctype = ctn->wc; 1666b5e5868SGarrett D'Amore } 1676b5e5868SGarrett D'Amore 1686b5e5868SGarrett D'Amore void 169*7262c8a6SYuri Pankov add_ctype_range(wchar_t end) 1706b5e5868SGarrett D'Amore { 1716b5e5868SGarrett D'Amore ctype_node_t *ctn; 1726b5e5868SGarrett D'Amore wchar_t cur; 1736b5e5868SGarrett D'Amore 1746b5e5868SGarrett D'Amore if (end < last_ctype) { 1756b5e5868SGarrett D'Amore errf(_("malformed character range (%u ... %u))"), 1766b5e5868SGarrett D'Amore last_ctype, end); 1776b5e5868SGarrett D'Amore return; 1786b5e5868SGarrett D'Amore } 1796b5e5868SGarrett D'Amore for (cur = last_ctype + 1; cur <= end; cur++) { 1806b5e5868SGarrett D'Amore if ((ctn = get_ctype(cur)) == NULL) { 1816b5e5868SGarrett D'Amore INTERR; 1826b5e5868SGarrett D'Amore return; 1836b5e5868SGarrett D'Amore } 1846b5e5868SGarrett D'Amore add_ctype_impl(ctn); 1856b5e5868SGarrett D'Amore } 1866b5e5868SGarrett D'Amore last_ctype = end; 1876b5e5868SGarrett D'Amore 1886b5e5868SGarrett D'Amore } 1896b5e5868SGarrett D'Amore 1902da1cd3aSGarrett D'Amore /* 1912da1cd3aSGarrett D'Amore * A word about widths: if the width mask is specified, then libc 1922da1cd3aSGarrett D'Amore * unconditionally honors it. Otherwise, it assumes printable 1932da1cd3aSGarrett D'Amore * characters have width 1, and non-printable characters have width 1942da1cd3aSGarrett D'Amore * -1 (except for NULL which is special with with 0). Hence, we have 1952da1cd3aSGarrett D'Amore * no need to inject defaults here -- the "default" unset value of 0 1962da1cd3aSGarrett D'Amore * indicates that libc should use its own logic in wcwidth as described. 1972da1cd3aSGarrett D'Amore */ 1982da1cd3aSGarrett D'Amore void 1992da1cd3aSGarrett D'Amore add_width(int wc, int width) 2002da1cd3aSGarrett D'Amore { 2012da1cd3aSGarrett D'Amore ctype_node_t *ctn; 2022da1cd3aSGarrett D'Amore 2032da1cd3aSGarrett D'Amore if ((ctn = get_ctype(wc)) == NULL) { 2042da1cd3aSGarrett D'Amore INTERR; 2052da1cd3aSGarrett D'Amore return; 2062da1cd3aSGarrett D'Amore } 2072da1cd3aSGarrett D'Amore ctn->ctype &= ~(_CTYPE_SWM); 2082da1cd3aSGarrett D'Amore switch (width) { 2092da1cd3aSGarrett D'Amore case 0: 2102da1cd3aSGarrett D'Amore ctn->ctype |= _CTYPE_SW0; 2112da1cd3aSGarrett D'Amore break; 2122da1cd3aSGarrett D'Amore case 1: 2132da1cd3aSGarrett D'Amore ctn->ctype |= _CTYPE_SW1; 2142da1cd3aSGarrett D'Amore break; 2152da1cd3aSGarrett D'Amore case 2: 2162da1cd3aSGarrett D'Amore ctn->ctype |= _CTYPE_SW2; 2172da1cd3aSGarrett D'Amore break; 2182da1cd3aSGarrett D'Amore case 3: 2192da1cd3aSGarrett D'Amore ctn->ctype |= _CTYPE_SW3; 2202da1cd3aSGarrett D'Amore break; 2212da1cd3aSGarrett D'Amore } 2222da1cd3aSGarrett D'Amore } 2232da1cd3aSGarrett D'Amore 2242da1cd3aSGarrett D'Amore void 2252da1cd3aSGarrett D'Amore add_width_range(int start, int end, int width) 2262da1cd3aSGarrett D'Amore { 2272da1cd3aSGarrett D'Amore for (; start <= end; start++) { 2282da1cd3aSGarrett D'Amore add_width(start, width); 2292da1cd3aSGarrett D'Amore } 2302da1cd3aSGarrett D'Amore } 2312da1cd3aSGarrett D'Amore 2326b5e5868SGarrett D'Amore void 2336b5e5868SGarrett D'Amore add_caseconv(int val, int wc) 2346b5e5868SGarrett D'Amore { 2356b5e5868SGarrett D'Amore ctype_node_t *ctn; 2366b5e5868SGarrett D'Amore 2376b5e5868SGarrett D'Amore ctn = get_ctype(val); 2386b5e5868SGarrett D'Amore if (ctn == NULL) { 2396b5e5868SGarrett D'Amore INTERR; 2406b5e5868SGarrett D'Amore return; 2416b5e5868SGarrett D'Amore } 2426b5e5868SGarrett D'Amore 2436b5e5868SGarrett D'Amore switch (last_kw) { 2446b5e5868SGarrett D'Amore case T_TOUPPER: 2456b5e5868SGarrett D'Amore ctn->toupper = wc; 2466b5e5868SGarrett D'Amore break; 2476b5e5868SGarrett D'Amore case T_TOLOWER: 2486b5e5868SGarrett D'Amore ctn->tolower = wc; 2496b5e5868SGarrett D'Amore break; 2506b5e5868SGarrett D'Amore default: 2516b5e5868SGarrett D'Amore INTERR; 2526b5e5868SGarrett D'Amore break; 2536b5e5868SGarrett D'Amore } 2546b5e5868SGarrett D'Amore } 2556b5e5868SGarrett D'Amore 2566b5e5868SGarrett D'Amore void 2576b5e5868SGarrett D'Amore dump_ctype(void) 2586b5e5868SGarrett D'Amore { 2596b5e5868SGarrett D'Amore FILE *f; 2606b5e5868SGarrett D'Amore _FileRuneLocale rl; 2616b5e5868SGarrett D'Amore ctype_node_t *ctn, *last_ct, *last_lo, *last_up; 2626b5e5868SGarrett D'Amore _FileRuneEntry *ct = NULL; 2636b5e5868SGarrett D'Amore _FileRuneEntry *lo = NULL; 2646b5e5868SGarrett D'Amore _FileRuneEntry *up = NULL; 2656125cca6SDavid Höppner wchar_t wc; 2666b5e5868SGarrett D'Amore 2676b5e5868SGarrett D'Amore (void) memset(&rl, 0, sizeof (rl)); 2686b5e5868SGarrett D'Amore last_ct = NULL; 2696b5e5868SGarrett D'Amore last_lo = NULL; 2706b5e5868SGarrett D'Amore last_up = NULL; 2716b5e5868SGarrett D'Amore 2726b5e5868SGarrett D'Amore if ((f = open_category()) == NULL) 2736b5e5868SGarrett D'Amore return; 2746b5e5868SGarrett D'Amore 2756b5e5868SGarrett D'Amore (void) memcpy(rl.magic, _FILE_RUNE_MAGIC_1, 8); 276e1508819SYuri Pankov (void) strlcpy(rl.encoding, get_wide_encoding(), sizeof (rl.encoding)); 2776b5e5868SGarrett D'Amore 2786125cca6SDavid Höppner /* 2796125cca6SDavid Höppner * Initialize the identity map. 2806125cca6SDavid Höppner */ 2816125cca6SDavid Höppner for (wc = 0; (unsigned)wc < _CACHED_RUNES; wc++) { 2826125cca6SDavid Höppner rl.maplower[wc] = wc; 2836125cca6SDavid Höppner rl.mapupper[wc] = wc; 2846125cca6SDavid Höppner } 2856b5e5868SGarrett D'Amore 2866125cca6SDavid Höppner for (ctn = avl_first(&ctypes); ctn; ctn = AVL_NEXT(&ctypes, ctn)) { 2876b5e5868SGarrett D'Amore int conflict = 0; 2886b5e5868SGarrett D'Amore 2892da1cd3aSGarrett D'Amore 2906125cca6SDavid Höppner wc = ctn->wc; 2916125cca6SDavid Höppner 2926b5e5868SGarrett D'Amore /* 2936b5e5868SGarrett D'Amore * POSIX requires certain portable characters have 2946b5e5868SGarrett D'Amore * certain types. Add them if they are missing. 2956b5e5868SGarrett D'Amore */ 2966b5e5868SGarrett D'Amore if ((wc >= 1) && (wc <= 127)) { 2976b5e5868SGarrett D'Amore if ((wc >= 'A') && (wc <= 'Z')) 2986b5e5868SGarrett D'Amore ctn->ctype |= _ISUPPER; 2996b5e5868SGarrett D'Amore if ((wc >= 'a') && (wc <= 'z')) 3006b5e5868SGarrett D'Amore ctn->ctype |= _ISLOWER; 3016b5e5868SGarrett D'Amore if ((wc >= '0') && (wc <= '9')) 3026b5e5868SGarrett D'Amore ctn->ctype |= _ISDIGIT; 3035a4ef21aSLauri Tirkkonen if (wc == ' ') 3045a4ef21aSLauri Tirkkonen ctn->ctype |= _ISPRINT; 3056b5e5868SGarrett D'Amore if (strchr(" \f\n\r\t\v", (char)wc) != NULL) 3066b5e5868SGarrett D'Amore ctn->ctype |= _ISSPACE; 3076b5e5868SGarrett D'Amore if (strchr("0123456789ABCDEFabcdef", (char)wc) != NULL) 3086b5e5868SGarrett D'Amore ctn->ctype |= _ISXDIGIT; 3096b5e5868SGarrett D'Amore if (strchr(" \t", (char)wc)) 3106b5e5868SGarrett D'Amore ctn->ctype |= _ISBLANK; 311723fee08SGarrett D'Amore 312723fee08SGarrett D'Amore /* 313723fee08SGarrett D'Amore * Technically these settings are only 314723fee08SGarrett D'Amore * required for the C locale. However, it 315723fee08SGarrett D'Amore * turns out that because of the historical 316723fee08SGarrett D'Amore * version of isprint(), we need them for all 317723fee08SGarrett D'Amore * locales as well. Note that these are not 318723fee08SGarrett D'Amore * necessarily valid punctation characters in 319723fee08SGarrett D'Amore * the current language, but ispunct() needs 320723fee08SGarrett D'Amore * to return TRUE for them. 321723fee08SGarrett D'Amore */ 322723fee08SGarrett D'Amore if (strchr("!\"'#$%&()*+,-./:;<=>?@[\\]^_`{|}~", 323723fee08SGarrett D'Amore (char)wc)) 324723fee08SGarrett D'Amore ctn->ctype |= _ISPUNCT; 3256b5e5868SGarrett D'Amore } 3266b5e5868SGarrett D'Amore 3276b5e5868SGarrett D'Amore /* 3286b5e5868SGarrett D'Amore * POSIX also requires that certain types imply 3296b5e5868SGarrett D'Amore * others. Add any inferred types here. 3306b5e5868SGarrett D'Amore */ 3316b5e5868SGarrett D'Amore if (ctn->ctype & (_ISUPPER |_ISLOWER)) 3326b5e5868SGarrett D'Amore ctn->ctype |= _ISALPHA; 3336b5e5868SGarrett D'Amore if (ctn->ctype & _ISDIGIT) 3346b5e5868SGarrett D'Amore ctn->ctype |= _ISXDIGIT; 3356b5e5868SGarrett D'Amore if (ctn->ctype & _ISBLANK) 3366b5e5868SGarrett D'Amore ctn->ctype |= _ISSPACE; 3376b5e5868SGarrett D'Amore if (ctn->ctype & (_ISALPHA|_ISDIGIT|_ISXDIGIT)) 3386b5e5868SGarrett D'Amore ctn->ctype |= _ISGRAPH; 3396b5e5868SGarrett D'Amore if (ctn->ctype & _ISGRAPH) 3406b5e5868SGarrett D'Amore ctn->ctype |= _ISPRINT; 3416b5e5868SGarrett D'Amore 3426b5e5868SGarrett D'Amore /* 3436b5e5868SGarrett D'Amore * Finally, POSIX requires that certain combinations 3446b5e5868SGarrett D'Amore * are invalid. We don't flag this as a fatal error, 3456b5e5868SGarrett D'Amore * but we will warn about. 3466b5e5868SGarrett D'Amore */ 3476b5e5868SGarrett D'Amore if ((ctn->ctype & _ISALPHA) && 3486b5e5868SGarrett D'Amore (ctn->ctype & (_ISPUNCT|_ISDIGIT))) 3496b5e5868SGarrett D'Amore conflict++; 350e1508819SYuri Pankov if ((ctn->ctype & _ISPUNCT) && 3516b5e5868SGarrett D'Amore (ctn->ctype & (_ISDIGIT|_ISALPHA|_ISXDIGIT))) 3526b5e5868SGarrett D'Amore conflict++; 3536b5e5868SGarrett D'Amore if ((ctn->ctype & _ISSPACE) && (ctn->ctype & _ISGRAPH)) 3546b5e5868SGarrett D'Amore conflict++; 355e1508819SYuri Pankov if ((ctn->ctype & _ISCNTRL) && (ctn->ctype & _ISPRINT)) 3566b5e5868SGarrett D'Amore conflict++; 3576b5e5868SGarrett D'Amore if ((wc == ' ') && (ctn->ctype & (_ISPUNCT|_ISGRAPH))) 3586b5e5868SGarrett D'Amore conflict++; 3596b5e5868SGarrett D'Amore 3603e6960d7SYuri Pankov #ifndef NATIVE 3616b5e5868SGarrett D'Amore if (conflict) { 3626b5e5868SGarrett D'Amore warn("conflicting classes for character 0x%x (%x)", 3636b5e5868SGarrett D'Amore wc, ctn->ctype); 3646b5e5868SGarrett D'Amore } 3653e6960d7SYuri Pankov #endif 3666b5e5868SGarrett D'Amore /* 3676b5e5868SGarrett D'Amore * Handle the lower 256 characters using the simple 3686b5e5868SGarrett D'Amore * optimization. Note that if we have not defined the 3696b5e5868SGarrett D'Amore * upper/lower case, then we identity map it. 3706b5e5868SGarrett D'Amore */ 3715080145bSGarrett D'Amore if ((unsigned)wc < _CACHED_RUNES) { 3726b5e5868SGarrett D'Amore rl.runetype[wc] = ctn->ctype; 3736125cca6SDavid Höppner if (ctn->tolower) 3746125cca6SDavid Höppner rl.maplower[wc] = ctn->tolower; 3756125cca6SDavid Höppner if (ctn->toupper) 3766125cca6SDavid Höppner rl.mapupper[wc] = ctn->toupper; 3776b5e5868SGarrett D'Amore continue; 3786b5e5868SGarrett D'Amore } 3796b5e5868SGarrett D'Amore 380*7262c8a6SYuri Pankov if ((last_ct != NULL) && (last_ct->ctype == ctn->ctype) && 381*7262c8a6SYuri Pankov (last_ct->wc + 1 == wc)) { 3826b5e5868SGarrett D'Amore ct[rl.runetype_ext_nranges-1].max = wc; 3836b5e5868SGarrett D'Amore } else { 3846b5e5868SGarrett D'Amore rl.runetype_ext_nranges++; 3856b5e5868SGarrett D'Amore ct = realloc(ct, 3866b5e5868SGarrett D'Amore sizeof (*ct) * rl.runetype_ext_nranges); 387e1508819SYuri Pankov if (ct == NULL) 388e1508819SYuri Pankov goto fail; 3896b5e5868SGarrett D'Amore ct[rl.runetype_ext_nranges - 1].min = wc; 3906b5e5868SGarrett D'Amore ct[rl.runetype_ext_nranges - 1].max = wc; 3916b5e5868SGarrett D'Amore ct[rl.runetype_ext_nranges - 1].map = ctn->ctype; 3926b5e5868SGarrett D'Amore } 393*7262c8a6SYuri Pankov last_ct = ctn; 394017c01f8SYuri Pankov if (ctn->tolower == 0) { 395017c01f8SYuri Pankov last_lo = NULL; 3966b5e5868SGarrett D'Amore } else if ((last_lo != NULL) && 3976b5e5868SGarrett D'Amore (last_lo->tolower + 1 == ctn->tolower)) { 3986b5e5868SGarrett D'Amore lo[rl.maplower_ext_nranges-1].max = wc; 3996b5e5868SGarrett D'Amore last_lo = ctn; 4006b5e5868SGarrett D'Amore } else { 4016b5e5868SGarrett D'Amore rl.maplower_ext_nranges++; 4026b5e5868SGarrett D'Amore lo = realloc(lo, 4036b5e5868SGarrett D'Amore sizeof (*lo) * rl.maplower_ext_nranges); 404e1508819SYuri Pankov if (lo == NULL) 405e1508819SYuri Pankov goto fail; 4066b5e5868SGarrett D'Amore lo[rl.maplower_ext_nranges - 1].min = wc; 4076b5e5868SGarrett D'Amore lo[rl.maplower_ext_nranges - 1].max = wc; 4086b5e5868SGarrett D'Amore lo[rl.maplower_ext_nranges - 1].map = ctn->tolower; 4096b5e5868SGarrett D'Amore last_lo = ctn; 4106b5e5868SGarrett D'Amore } 4116b5e5868SGarrett D'Amore 4126b5e5868SGarrett D'Amore if (ctn->toupper == 0) { 4136b5e5868SGarrett D'Amore last_up = NULL; 4146b5e5868SGarrett D'Amore } else if ((last_up != NULL) && 4156b5e5868SGarrett D'Amore (last_up->toupper + 1 == ctn->toupper)) { 4166b5e5868SGarrett D'Amore up[rl.mapupper_ext_nranges-1].max = wc; 4176b5e5868SGarrett D'Amore last_up = ctn; 4186b5e5868SGarrett D'Amore } else { 4196b5e5868SGarrett D'Amore rl.mapupper_ext_nranges++; 4206b5e5868SGarrett D'Amore up = realloc(up, 4216b5e5868SGarrett D'Amore sizeof (*up) * rl.mapupper_ext_nranges); 422e1508819SYuri Pankov if (up == NULL) 423e1508819SYuri Pankov goto fail; 4246b5e5868SGarrett D'Amore up[rl.mapupper_ext_nranges - 1].min = wc; 4256b5e5868SGarrett D'Amore up[rl.mapupper_ext_nranges - 1].max = wc; 4266b5e5868SGarrett D'Amore up[rl.mapupper_ext_nranges - 1].map = ctn->toupper; 4276b5e5868SGarrett D'Amore last_up = ctn; 4286b5e5868SGarrett D'Amore } 4296b5e5868SGarrett D'Amore } 4306b5e5868SGarrett D'Amore 431e1508819SYuri Pankov if ((wr_category(&rl, sizeof (rl), f) == 0) && 432e1508819SYuri Pankov (wr_category(ct, sizeof (*ct) * rl.runetype_ext_nranges, f) == 0) && 433e1508819SYuri Pankov (wr_category(lo, sizeof (*lo) * rl.maplower_ext_nranges, f) == 0) && 434e1508819SYuri Pankov (wr_category(up, sizeof (*up) * rl.mapupper_ext_nranges, f) == 0)) { 435e1508819SYuri Pankov close_category(f); 436e1508819SYuri Pankov goto out; 4376b5e5868SGarrett D'Amore } 4386b5e5868SGarrett D'Amore 439e1508819SYuri Pankov fail: 440e1508819SYuri Pankov delete_category(f); 441e1508819SYuri Pankov out: 442e1508819SYuri Pankov free(ct); 443e1508819SYuri Pankov free(lo); 444e1508819SYuri Pankov free(up); 4456b5e5868SGarrett D'Amore } 446