16b5e5868SGarrett D'Amore /* 26b5e5868SGarrett D'Amore * This file and its contents are supplied under the terms of the 36b5e5868SGarrett D'Amore * Common Development and Distribution License ("CDDL"), version 1.0. 45aec55ebSGarrett D'Amore * You may only use this file in accordance with the terms of version 55aec55ebSGarrett D'Amore * 1.0 of the CDDL. 66b5e5868SGarrett D'Amore * 76b5e5868SGarrett D'Amore * A full copy of the text of the CDDL should have accompanied this 86b5e5868SGarrett D'Amore * source. A copy of the CDDL is also available via the Internet at 96b5e5868SGarrett D'Amore * http://www.illumos.org/license/CDDL. 106b5e5868SGarrett D'Amore */ 116b5e5868SGarrett D'Amore 126b5e5868SGarrett D'Amore /* 13017c01f8SYuri Pankov * Copyright 2010,2011 Nexenta Systems, Inc. All rights reserved. 14*2da1cd3aSGarrett D'Amore * Copyright 2012 Garrett D'Amore <garrett@damore.org> 15*2da1cd3aSGarrett D'Amore * Copyright 2013 DEY Storage Systems, Inc. 166b5e5868SGarrett D'Amore */ 176b5e5868SGarrett D'Amore 186b5e5868SGarrett D'Amore /* 196b5e5868SGarrett D'Amore * LC_CTYPE database generation routines for localedef. 206b5e5868SGarrett D'Amore */ 216b5e5868SGarrett D'Amore 226b5e5868SGarrett D'Amore #include <stdio.h> 236b5e5868SGarrett D'Amore #include <stdlib.h> 246b5e5868SGarrett D'Amore #include <string.h> 256b5e5868SGarrett D'Amore #include <sys/types.h> 266b5e5868SGarrett D'Amore #include <sys/avl.h> 276b5e5868SGarrett D'Amore #include <wchar.h> 286b5e5868SGarrett D'Amore #include <ctype.h> 296b5e5868SGarrett D'Amore #include <wctype.h> 306b5e5868SGarrett D'Amore #include <unistd.h> 31*2da1cd3aSGarrett D'Amore #include "_ctype.h" 326b5e5868SGarrett D'Amore #include "localedef.h" 336b5e5868SGarrett D'Amore #include "parser.tab.h" 346b5e5868SGarrett D'Amore #include "runefile.h" 356b5e5868SGarrett D'Amore 366b5e5868SGarrett D'Amore static avl_tree_t ctypes; 376b5e5868SGarrett D'Amore 386b5e5868SGarrett D'Amore static wchar_t last_ctype; 396b5e5868SGarrett D'Amore 406b5e5868SGarrett D'Amore typedef struct ctype_node { 416b5e5868SGarrett D'Amore wchar_t wc; 426b5e5868SGarrett D'Amore int32_t ctype; 436b5e5868SGarrett D'Amore int32_t toupper; 446b5e5868SGarrett D'Amore int32_t tolower; 456b5e5868SGarrett D'Amore avl_node_t avl; 466b5e5868SGarrett D'Amore } ctype_node_t; 476b5e5868SGarrett D'Amore 48*2da1cd3aSGarrett D'Amore typedef struct width_node { 49*2da1cd3aSGarrett D'Amore wchar_t start; 50*2da1cd3aSGarrett D'Amore wchar_t end; 51*2da1cd3aSGarrett D'Amore int8_t width; 52*2da1cd3aSGarrett D'Amore avl_node_t avl; 53*2da1cd3aSGarrett D'Amore } width_node_t; 54*2da1cd3aSGarrett D'Amore 556b5e5868SGarrett D'Amore static int 566b5e5868SGarrett D'Amore ctype_compare(const void *n1, const void *n2) 576b5e5868SGarrett D'Amore { 586b5e5868SGarrett D'Amore const ctype_node_t *c1 = n1; 596b5e5868SGarrett D'Amore const ctype_node_t *c2 = n2; 606b5e5868SGarrett D'Amore 616b5e5868SGarrett D'Amore return (c1->wc < c2->wc ? -1 : c1->wc > c2->wc ? 1 : 0); 626b5e5868SGarrett D'Amore } 636b5e5868SGarrett D'Amore 646b5e5868SGarrett D'Amore void 656b5e5868SGarrett D'Amore init_ctype(void) 666b5e5868SGarrett D'Amore { 676b5e5868SGarrett D'Amore avl_create(&ctypes, ctype_compare, sizeof (ctype_node_t), 686b5e5868SGarrett D'Amore offsetof(ctype_node_t, avl)); 696b5e5868SGarrett D'Amore } 706b5e5868SGarrett D'Amore 716b5e5868SGarrett D'Amore 726b5e5868SGarrett D'Amore static void 736b5e5868SGarrett D'Amore add_ctype_impl(ctype_node_t *ctn) 746b5e5868SGarrett D'Amore { 756b5e5868SGarrett D'Amore switch (last_kw) { 766b5e5868SGarrett D'Amore case T_ISUPPER: 776b5e5868SGarrett D'Amore ctn->ctype |= (_ISUPPER | _ISALPHA | _ISGRAPH | _ISPRINT); 786b5e5868SGarrett D'Amore break; 796b5e5868SGarrett D'Amore case T_ISLOWER: 806b5e5868SGarrett D'Amore ctn->ctype |= (_ISLOWER | _ISALPHA | _ISGRAPH | _ISPRINT); 816b5e5868SGarrett D'Amore break; 826b5e5868SGarrett D'Amore case T_ISALPHA: 836b5e5868SGarrett D'Amore ctn->ctype |= (_ISALPHA | _ISGRAPH | _ISPRINT); 846b5e5868SGarrett D'Amore break; 856b5e5868SGarrett D'Amore case T_ISDIGIT: 866b5e5868SGarrett D'Amore ctn->ctype |= (_ISDIGIT | _ISGRAPH | _ISPRINT | _ISXDIGIT); 876b5e5868SGarrett D'Amore break; 886b5e5868SGarrett D'Amore case T_ISSPACE: 896b5e5868SGarrett D'Amore ctn->ctype |= _ISSPACE; 906b5e5868SGarrett D'Amore break; 916b5e5868SGarrett D'Amore case T_ISCNTRL: 926b5e5868SGarrett D'Amore ctn->ctype |= _ISCNTRL; 936b5e5868SGarrett D'Amore break; 946b5e5868SGarrett D'Amore case T_ISGRAPH: 956b5e5868SGarrett D'Amore ctn->ctype |= (_ISGRAPH | _ISPRINT); 966b5e5868SGarrett D'Amore break; 976b5e5868SGarrett D'Amore case T_ISPRINT: 986b5e5868SGarrett D'Amore ctn->ctype |= _ISPRINT; 996b5e5868SGarrett D'Amore break; 1006b5e5868SGarrett D'Amore case T_ISPUNCT: 1016b5e5868SGarrett D'Amore ctn->ctype |= (_ISPUNCT | _ISGRAPH | _ISPRINT); 1026b5e5868SGarrett D'Amore break; 1036b5e5868SGarrett D'Amore case T_ISXDIGIT: 1046b5e5868SGarrett D'Amore ctn->ctype |= (_ISXDIGIT | _ISPRINT); 1056b5e5868SGarrett D'Amore break; 1066b5e5868SGarrett D'Amore case T_ISBLANK: 1076b5e5868SGarrett D'Amore ctn->ctype |= (_ISBLANK | _ISSPACE); 1086b5e5868SGarrett D'Amore break; 1096b5e5868SGarrett D'Amore case T_ISPHONOGRAM: 1106b5e5868SGarrett D'Amore ctn->ctype |= (_E1 | _ISPRINT | _ISGRAPH); 1116b5e5868SGarrett D'Amore break; 1126b5e5868SGarrett D'Amore case T_ISIDEOGRAM: 1136b5e5868SGarrett D'Amore ctn->ctype |= (_E2 | _ISPRINT | _ISGRAPH); 1146b5e5868SGarrett D'Amore break; 1156b5e5868SGarrett D'Amore case T_ISENGLISH: 1166b5e5868SGarrett D'Amore ctn->ctype |= (_E3 | _ISPRINT | _ISGRAPH); 1176b5e5868SGarrett D'Amore break; 1186b5e5868SGarrett D'Amore case T_ISNUMBER: 1196b5e5868SGarrett D'Amore ctn->ctype |= (_E4 | _ISPRINT | _ISGRAPH); 1206b5e5868SGarrett D'Amore break; 1216b5e5868SGarrett D'Amore case T_ISSPECIAL: 1226b5e5868SGarrett D'Amore ctn->ctype |= (_E5 | _ISPRINT | _ISGRAPH); 1236b5e5868SGarrett D'Amore break; 1246b5e5868SGarrett D'Amore case T_ISALNUM: 1256b5e5868SGarrett D'Amore /* 1266b5e5868SGarrett D'Amore * We can't do anything with this. The character 1276b5e5868SGarrett D'Amore * should already be specified as a digit or alpha. 1286b5e5868SGarrett D'Amore */ 1296b5e5868SGarrett D'Amore break; 1306b5e5868SGarrett D'Amore default: 1316b5e5868SGarrett D'Amore errf(_("not a valid character class")); 1326b5e5868SGarrett D'Amore } 1336b5e5868SGarrett D'Amore } 1346b5e5868SGarrett D'Amore 1356b5e5868SGarrett D'Amore static ctype_node_t * 1366b5e5868SGarrett D'Amore get_ctype(wchar_t wc) 1376b5e5868SGarrett D'Amore { 1386b5e5868SGarrett D'Amore ctype_node_t srch; 1396b5e5868SGarrett D'Amore ctype_node_t *ctn; 1406b5e5868SGarrett D'Amore avl_index_t where; 1416b5e5868SGarrett D'Amore 1426b5e5868SGarrett D'Amore srch.wc = wc; 1436b5e5868SGarrett D'Amore if ((ctn = avl_find(&ctypes, &srch, &where)) == NULL) { 1446b5e5868SGarrett D'Amore if ((ctn = calloc(1, sizeof (*ctn))) == NULL) { 1456b5e5868SGarrett D'Amore errf(_("out of memory")); 1466b5e5868SGarrett D'Amore return (NULL); 1476b5e5868SGarrett D'Amore } 1486b5e5868SGarrett D'Amore ctn->wc = wc; 1496b5e5868SGarrett D'Amore 1506b5e5868SGarrett D'Amore avl_insert(&ctypes, ctn, where); 1516b5e5868SGarrett D'Amore } 1526b5e5868SGarrett D'Amore return (ctn); 1536b5e5868SGarrett D'Amore } 1546b5e5868SGarrett D'Amore 1556b5e5868SGarrett D'Amore void 1566b5e5868SGarrett D'Amore add_ctype(int val) 1576b5e5868SGarrett D'Amore { 1586b5e5868SGarrett D'Amore ctype_node_t *ctn; 1596b5e5868SGarrett D'Amore 1606b5e5868SGarrett D'Amore if ((ctn = get_ctype(val)) == NULL) { 1616b5e5868SGarrett D'Amore INTERR; 1626b5e5868SGarrett D'Amore return; 1636b5e5868SGarrett D'Amore } 1646b5e5868SGarrett D'Amore add_ctype_impl(ctn); 1656b5e5868SGarrett D'Amore last_ctype = ctn->wc; 1666b5e5868SGarrett D'Amore } 1676b5e5868SGarrett D'Amore 1686b5e5868SGarrett D'Amore void 1696b5e5868SGarrett D'Amore add_ctype_range(int end) 1706b5e5868SGarrett D'Amore { 1716b5e5868SGarrett D'Amore ctype_node_t *ctn; 1726b5e5868SGarrett D'Amore wchar_t cur; 1736b5e5868SGarrett D'Amore 1746b5e5868SGarrett D'Amore if (end < last_ctype) { 1756b5e5868SGarrett D'Amore errf(_("malformed character range (%u ... %u))"), 1766b5e5868SGarrett D'Amore last_ctype, end); 1776b5e5868SGarrett D'Amore return; 1786b5e5868SGarrett D'Amore } 1796b5e5868SGarrett D'Amore for (cur = last_ctype + 1; cur <= end; cur++) { 1806b5e5868SGarrett D'Amore if ((ctn = get_ctype(cur)) == NULL) { 1816b5e5868SGarrett D'Amore INTERR; 1826b5e5868SGarrett D'Amore return; 1836b5e5868SGarrett D'Amore } 1846b5e5868SGarrett D'Amore add_ctype_impl(ctn); 1856b5e5868SGarrett D'Amore } 1866b5e5868SGarrett D'Amore last_ctype = end; 1876b5e5868SGarrett D'Amore 1886b5e5868SGarrett D'Amore } 1896b5e5868SGarrett D'Amore 190*2da1cd3aSGarrett D'Amore /* 191*2da1cd3aSGarrett D'Amore * A word about widths: if the width mask is specified, then libc 192*2da1cd3aSGarrett D'Amore * unconditionally honors it. Otherwise, it assumes printable 193*2da1cd3aSGarrett D'Amore * characters have width 1, and non-printable characters have width 194*2da1cd3aSGarrett D'Amore * -1 (except for NULL which is special with with 0). Hence, we have 195*2da1cd3aSGarrett D'Amore * no need to inject defaults here -- the "default" unset value of 0 196*2da1cd3aSGarrett D'Amore * indicates that libc should use its own logic in wcwidth as described. 197*2da1cd3aSGarrett D'Amore */ 198*2da1cd3aSGarrett D'Amore void 199*2da1cd3aSGarrett D'Amore add_width(int wc, int width) 200*2da1cd3aSGarrett D'Amore { 201*2da1cd3aSGarrett D'Amore ctype_node_t *ctn; 202*2da1cd3aSGarrett D'Amore 203*2da1cd3aSGarrett D'Amore if ((ctn = get_ctype(wc)) == NULL) { 204*2da1cd3aSGarrett D'Amore INTERR; 205*2da1cd3aSGarrett D'Amore return; 206*2da1cd3aSGarrett D'Amore } 207*2da1cd3aSGarrett D'Amore ctn->ctype &= ~(_CTYPE_SWM); 208*2da1cd3aSGarrett D'Amore switch (width) { 209*2da1cd3aSGarrett D'Amore case 0: 210*2da1cd3aSGarrett D'Amore ctn->ctype |= _CTYPE_SW0; 211*2da1cd3aSGarrett D'Amore break; 212*2da1cd3aSGarrett D'Amore case 1: 213*2da1cd3aSGarrett D'Amore ctn->ctype |= _CTYPE_SW1; 214*2da1cd3aSGarrett D'Amore break; 215*2da1cd3aSGarrett D'Amore case 2: 216*2da1cd3aSGarrett D'Amore ctn->ctype |= _CTYPE_SW2; 217*2da1cd3aSGarrett D'Amore break; 218*2da1cd3aSGarrett D'Amore case 3: 219*2da1cd3aSGarrett D'Amore ctn->ctype |= _CTYPE_SW3; 220*2da1cd3aSGarrett D'Amore break; 221*2da1cd3aSGarrett D'Amore } 222*2da1cd3aSGarrett D'Amore } 223*2da1cd3aSGarrett D'Amore 224*2da1cd3aSGarrett D'Amore void 225*2da1cd3aSGarrett D'Amore add_width_range(int start, int end, int width) 226*2da1cd3aSGarrett D'Amore { 227*2da1cd3aSGarrett D'Amore for (; start <= end; start++) { 228*2da1cd3aSGarrett D'Amore add_width(start, width); 229*2da1cd3aSGarrett D'Amore } 230*2da1cd3aSGarrett D'Amore } 231*2da1cd3aSGarrett D'Amore 2326b5e5868SGarrett D'Amore void 2336b5e5868SGarrett D'Amore add_caseconv(int val, int wc) 2346b5e5868SGarrett D'Amore { 2356b5e5868SGarrett D'Amore ctype_node_t *ctn; 2366b5e5868SGarrett D'Amore 2376b5e5868SGarrett D'Amore ctn = get_ctype(val); 2386b5e5868SGarrett D'Amore if (ctn == NULL) { 2396b5e5868SGarrett D'Amore INTERR; 2406b5e5868SGarrett D'Amore return; 2416b5e5868SGarrett D'Amore } 2426b5e5868SGarrett D'Amore 2436b5e5868SGarrett D'Amore switch (last_kw) { 2446b5e5868SGarrett D'Amore case T_TOUPPER: 2456b5e5868SGarrett D'Amore ctn->toupper = wc; 2466b5e5868SGarrett D'Amore break; 2476b5e5868SGarrett D'Amore case T_TOLOWER: 2486b5e5868SGarrett D'Amore ctn->tolower = wc; 2496b5e5868SGarrett D'Amore break; 2506b5e5868SGarrett D'Amore default: 2516b5e5868SGarrett D'Amore INTERR; 2526b5e5868SGarrett D'Amore break; 2536b5e5868SGarrett D'Amore } 2546b5e5868SGarrett D'Amore } 2556b5e5868SGarrett D'Amore 2566b5e5868SGarrett D'Amore void 2576b5e5868SGarrett D'Amore dump_ctype(void) 2586b5e5868SGarrett D'Amore { 2596b5e5868SGarrett D'Amore FILE *f; 2606b5e5868SGarrett D'Amore _FileRuneLocale rl; 2616b5e5868SGarrett D'Amore ctype_node_t *ctn, *last_ct, *last_lo, *last_up; 2626b5e5868SGarrett D'Amore _FileRuneEntry *ct = NULL; 2636b5e5868SGarrett D'Amore _FileRuneEntry *lo = NULL; 2646b5e5868SGarrett D'Amore _FileRuneEntry *up = NULL; 2656125cca6SDavid Höppner wchar_t wc; 2666b5e5868SGarrett D'Amore 2676b5e5868SGarrett D'Amore (void) memset(&rl, 0, sizeof (rl)); 2686b5e5868SGarrett D'Amore last_ct = NULL; 2696b5e5868SGarrett D'Amore last_lo = NULL; 2706b5e5868SGarrett D'Amore last_up = NULL; 2716b5e5868SGarrett D'Amore 2726b5e5868SGarrett D'Amore if ((f = open_category()) == NULL) 2736b5e5868SGarrett D'Amore return; 2746b5e5868SGarrett D'Amore 2756b5e5868SGarrett D'Amore (void) memcpy(rl.magic, _FILE_RUNE_MAGIC_1, 8); 2766b5e5868SGarrett D'Amore (void) strncpy(rl.encoding, get_wide_encoding(), sizeof (rl.encoding)); 2776b5e5868SGarrett D'Amore 2786125cca6SDavid Höppner /* 2796125cca6SDavid Höppner * Initialize the identity map. 2806125cca6SDavid Höppner */ 2816125cca6SDavid Höppner for (wc = 0; (unsigned)wc < _CACHED_RUNES; wc++) { 2826125cca6SDavid Höppner rl.maplower[wc] = wc; 2836125cca6SDavid Höppner rl.mapupper[wc] = wc; 2846125cca6SDavid Höppner } 2856b5e5868SGarrett D'Amore 2866125cca6SDavid Höppner for (ctn = avl_first(&ctypes); ctn; ctn = AVL_NEXT(&ctypes, ctn)) { 2876b5e5868SGarrett D'Amore int conflict = 0; 2886b5e5868SGarrett D'Amore 289*2da1cd3aSGarrett D'Amore 2906125cca6SDavid Höppner wc = ctn->wc; 2916125cca6SDavid Höppner 2926b5e5868SGarrett D'Amore /* 2936b5e5868SGarrett D'Amore * POSIX requires certain portable characters have 2946b5e5868SGarrett D'Amore * certain types. Add them if they are missing. 2956b5e5868SGarrett D'Amore */ 2966b5e5868SGarrett D'Amore if ((wc >= 1) && (wc <= 127)) { 2976b5e5868SGarrett D'Amore if ((wc >= 'A') && (wc <= 'Z')) 2986b5e5868SGarrett D'Amore ctn->ctype |= _ISUPPER; 2996b5e5868SGarrett D'Amore if ((wc >= 'a') && (wc <= 'z')) 3006b5e5868SGarrett D'Amore ctn->ctype |= _ISLOWER; 3016b5e5868SGarrett D'Amore if ((wc >= '0') && (wc <= '9')) 3026b5e5868SGarrett D'Amore ctn->ctype |= _ISDIGIT; 3036b5e5868SGarrett D'Amore if (strchr(" \f\n\r\t\v", (char)wc) != NULL) 3046b5e5868SGarrett D'Amore ctn->ctype |= _ISSPACE; 3056b5e5868SGarrett D'Amore if (strchr("0123456789ABCDEFabcdef", (char)wc) != NULL) 3066b5e5868SGarrett D'Amore ctn->ctype |= _ISXDIGIT; 3076b5e5868SGarrett D'Amore if (strchr(" \t", (char)wc)) 3086b5e5868SGarrett D'Amore ctn->ctype |= _ISBLANK; 309723fee08SGarrett D'Amore 310723fee08SGarrett D'Amore /* 311723fee08SGarrett D'Amore * Technically these settings are only 312723fee08SGarrett D'Amore * required for the C locale. However, it 313723fee08SGarrett D'Amore * turns out that because of the historical 314723fee08SGarrett D'Amore * version of isprint(), we need them for all 315723fee08SGarrett D'Amore * locales as well. Note that these are not 316723fee08SGarrett D'Amore * necessarily valid punctation characters in 317723fee08SGarrett D'Amore * the current language, but ispunct() needs 318723fee08SGarrett D'Amore * to return TRUE for them. 319723fee08SGarrett D'Amore */ 320723fee08SGarrett D'Amore if (strchr("!\"'#$%&()*+,-./:;<=>?@[\\]^_`{|}~", 321723fee08SGarrett D'Amore (char)wc)) 322723fee08SGarrett D'Amore ctn->ctype |= _ISPUNCT; 3236b5e5868SGarrett D'Amore } 3246b5e5868SGarrett D'Amore 3256b5e5868SGarrett D'Amore /* 3266b5e5868SGarrett D'Amore * POSIX also requires that certain types imply 3276b5e5868SGarrett D'Amore * others. Add any inferred types here. 3286b5e5868SGarrett D'Amore */ 3296b5e5868SGarrett D'Amore if (ctn->ctype & (_ISUPPER |_ISLOWER)) 3306b5e5868SGarrett D'Amore ctn->ctype |= _ISALPHA; 3316b5e5868SGarrett D'Amore if (ctn->ctype & _ISDIGIT) 3326b5e5868SGarrett D'Amore ctn->ctype |= _ISXDIGIT; 3336b5e5868SGarrett D'Amore if (ctn->ctype & _ISBLANK) 3346b5e5868SGarrett D'Amore ctn->ctype |= _ISSPACE; 3356b5e5868SGarrett D'Amore if (ctn->ctype & (_ISALPHA|_ISDIGIT|_ISXDIGIT)) 3366b5e5868SGarrett D'Amore ctn->ctype |= _ISGRAPH; 3376b5e5868SGarrett D'Amore if (ctn->ctype & _ISGRAPH) 3386b5e5868SGarrett D'Amore ctn->ctype |= _ISPRINT; 3396b5e5868SGarrett D'Amore 3406b5e5868SGarrett D'Amore /* 3416b5e5868SGarrett D'Amore * Finally, POSIX requires that certain combinations 3426b5e5868SGarrett D'Amore * are invalid. We don't flag this as a fatal error, 3436b5e5868SGarrett D'Amore * but we will warn about. 3446b5e5868SGarrett D'Amore */ 3456b5e5868SGarrett D'Amore if ((ctn->ctype & _ISALPHA) && 3466b5e5868SGarrett D'Amore (ctn->ctype & (_ISPUNCT|_ISDIGIT))) 3476b5e5868SGarrett D'Amore conflict++; 3486b5e5868SGarrett D'Amore if ((ctn->ctype & _ISPUNCT) & 3496b5e5868SGarrett D'Amore (ctn->ctype & (_ISDIGIT|_ISALPHA|_ISXDIGIT))) 3506b5e5868SGarrett D'Amore conflict++; 3516b5e5868SGarrett D'Amore if ((ctn->ctype & _ISSPACE) && (ctn->ctype & _ISGRAPH)) 3526b5e5868SGarrett D'Amore conflict++; 3536b5e5868SGarrett D'Amore if ((ctn->ctype & _ISCNTRL) & _ISPRINT) 3546b5e5868SGarrett D'Amore conflict++; 3556b5e5868SGarrett D'Amore if ((wc == ' ') && (ctn->ctype & (_ISPUNCT|_ISGRAPH))) 3566b5e5868SGarrett D'Amore conflict++; 3576b5e5868SGarrett D'Amore 3586b5e5868SGarrett D'Amore if (conflict) { 3596b5e5868SGarrett D'Amore warn("conflicting classes for character 0x%x (%x)", 3606b5e5868SGarrett D'Amore wc, ctn->ctype); 3616b5e5868SGarrett D'Amore } 3626b5e5868SGarrett D'Amore /* 3636b5e5868SGarrett D'Amore * Handle the lower 256 characters using the simple 3646b5e5868SGarrett D'Amore * optimization. Note that if we have not defined the 3656b5e5868SGarrett D'Amore * upper/lower case, then we identity map it. 3666b5e5868SGarrett D'Amore */ 3675080145bSGarrett D'Amore if ((unsigned)wc < _CACHED_RUNES) { 3686b5e5868SGarrett D'Amore rl.runetype[wc] = ctn->ctype; 3696125cca6SDavid Höppner if (ctn->tolower) 3706125cca6SDavid Höppner rl.maplower[wc] = ctn->tolower; 3716125cca6SDavid Höppner if (ctn->toupper) 3726125cca6SDavid Höppner rl.mapupper[wc] = ctn->toupper; 3736b5e5868SGarrett D'Amore continue; 3746b5e5868SGarrett D'Amore } 3756b5e5868SGarrett D'Amore 3766b5e5868SGarrett D'Amore if ((last_ct != NULL) && (last_ct->ctype == ctn->ctype)) { 3776b5e5868SGarrett D'Amore ct[rl.runetype_ext_nranges-1].max = wc; 3786b5e5868SGarrett D'Amore last_ct = ctn; 3796b5e5868SGarrett D'Amore } else { 3806b5e5868SGarrett D'Amore rl.runetype_ext_nranges++; 3816b5e5868SGarrett D'Amore ct = realloc(ct, 3826b5e5868SGarrett D'Amore sizeof (*ct) * rl.runetype_ext_nranges); 3836b5e5868SGarrett D'Amore ct[rl.runetype_ext_nranges - 1].min = wc; 3846b5e5868SGarrett D'Amore ct[rl.runetype_ext_nranges - 1].max = wc; 3856b5e5868SGarrett D'Amore ct[rl.runetype_ext_nranges - 1].map = ctn->ctype; 3866b5e5868SGarrett D'Amore last_ct = ctn; 3876b5e5868SGarrett D'Amore } 388017c01f8SYuri Pankov if (ctn->tolower == 0) { 389017c01f8SYuri Pankov last_lo = NULL; 3906b5e5868SGarrett D'Amore } else if ((last_lo != NULL) && 3916b5e5868SGarrett D'Amore (last_lo->tolower + 1 == ctn->tolower)) { 3926b5e5868SGarrett D'Amore lo[rl.maplower_ext_nranges-1].max = wc; 3936b5e5868SGarrett D'Amore last_lo = ctn; 3946b5e5868SGarrett D'Amore } else { 3956b5e5868SGarrett D'Amore rl.maplower_ext_nranges++; 3966b5e5868SGarrett D'Amore lo = realloc(lo, 3976b5e5868SGarrett D'Amore sizeof (*lo) * rl.maplower_ext_nranges); 3986b5e5868SGarrett D'Amore lo[rl.maplower_ext_nranges - 1].min = wc; 3996b5e5868SGarrett D'Amore lo[rl.maplower_ext_nranges - 1].max = wc; 4006b5e5868SGarrett D'Amore lo[rl.maplower_ext_nranges - 1].map = ctn->tolower; 4016b5e5868SGarrett D'Amore last_lo = ctn; 4026b5e5868SGarrett D'Amore } 4036b5e5868SGarrett D'Amore 4046b5e5868SGarrett D'Amore if (ctn->toupper == 0) { 4056b5e5868SGarrett D'Amore last_up = NULL; 4066b5e5868SGarrett D'Amore } else if ((last_up != NULL) && 4076b5e5868SGarrett D'Amore (last_up->toupper + 1 == ctn->toupper)) { 4086b5e5868SGarrett D'Amore up[rl.mapupper_ext_nranges-1].max = wc; 4096b5e5868SGarrett D'Amore last_up = ctn; 4106b5e5868SGarrett D'Amore } else { 4116b5e5868SGarrett D'Amore rl.mapupper_ext_nranges++; 4126b5e5868SGarrett D'Amore up = realloc(up, 4136b5e5868SGarrett D'Amore sizeof (*up) * rl.mapupper_ext_nranges); 4146b5e5868SGarrett D'Amore up[rl.mapupper_ext_nranges - 1].min = wc; 4156b5e5868SGarrett D'Amore up[rl.mapupper_ext_nranges - 1].max = wc; 4166b5e5868SGarrett D'Amore up[rl.mapupper_ext_nranges - 1].map = ctn->toupper; 4176b5e5868SGarrett D'Amore last_up = ctn; 4186b5e5868SGarrett D'Amore } 4196b5e5868SGarrett D'Amore } 4206b5e5868SGarrett D'Amore 4216b5e5868SGarrett D'Amore if ((wr_category(&rl, sizeof (rl), f) < 0) || 4226b5e5868SGarrett D'Amore (wr_category(ct, sizeof (*ct) * rl.runetype_ext_nranges, f) < 0) || 4236b5e5868SGarrett D'Amore (wr_category(lo, sizeof (*lo) * rl.maplower_ext_nranges, f) < 0) || 4246b5e5868SGarrett D'Amore (wr_category(up, sizeof (*up) * rl.mapupper_ext_nranges, f) < 0)) { 4256b5e5868SGarrett D'Amore return; 4266b5e5868SGarrett D'Amore } 4276b5e5868SGarrett D'Amore 4286b5e5868SGarrett D'Amore close_category(f); 4296b5e5868SGarrett D'Amore } 430