14297a3b0SGarrett D'Amore /*
22d08521bSGarrett D'Amore  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
36b5e5868SGarrett D'Amore  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
44297a3b0SGarrett D'Amore  * Copyright (c) 1989, 1993
54297a3b0SGarrett D'Amore  *	The Regents of the University of California.  All rights reserved.
64297a3b0SGarrett D'Amore  * (c) UNIX System Laboratories, Inc.
74297a3b0SGarrett D'Amore  * All or some portions of this file are derived from material licensed
84297a3b0SGarrett D'Amore  * to the University of California by American Telephone and Telegraph
94297a3b0SGarrett D'Amore  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
104297a3b0SGarrett D'Amore  * the permission of UNIX System Laboratories, Inc.
114297a3b0SGarrett D'Amore  *
124297a3b0SGarrett D'Amore  * This code is derived from software contributed to Berkeley by
134297a3b0SGarrett D'Amore  * Paul Borman at Krystal Technologies.
144297a3b0SGarrett D'Amore  *
154297a3b0SGarrett D'Amore  * Redistribution and use in source and binary forms, with or without
164297a3b0SGarrett D'Amore  * modification, are permitted provided that the following conditions
174297a3b0SGarrett D'Amore  * are met:
184297a3b0SGarrett D'Amore  * 1. Redistributions of source code must retain the above copyright
194297a3b0SGarrett D'Amore  *    notice, this list of conditions and the following disclaimer.
204297a3b0SGarrett D'Amore  * 2. Redistributions in binary form must reproduce the above copyright
214297a3b0SGarrett D'Amore  *    notice, this list of conditions and the following disclaimer in the
224297a3b0SGarrett D'Amore  *    documentation and/or other materials provided with the distribution.
234297a3b0SGarrett D'Amore  * 4. Neither the name of the University nor the names of its contributors
244297a3b0SGarrett D'Amore  *    may be used to endorse or promote products derived from this software
254297a3b0SGarrett D'Amore  *    without specific prior written permission.
264297a3b0SGarrett D'Amore  *
274297a3b0SGarrett D'Amore  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
284297a3b0SGarrett D'Amore  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
294297a3b0SGarrett D'Amore  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
304297a3b0SGarrett D'Amore  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
314297a3b0SGarrett D'Amore  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
324297a3b0SGarrett D'Amore  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
334297a3b0SGarrett D'Amore  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
344297a3b0SGarrett D'Amore  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
354297a3b0SGarrett D'Amore  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
364297a3b0SGarrett D'Amore  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
374297a3b0SGarrett D'Amore  * SUCH DAMAGE.
384297a3b0SGarrett D'Amore  */
394297a3b0SGarrett D'Amore 
404297a3b0SGarrett D'Amore #include "lint.h"
414297a3b0SGarrett D'Amore #include <wctype.h>
422d08521bSGarrett D'Amore #include <locale.h>
434297a3b0SGarrett D'Amore #include "runefile.h"
444297a3b0SGarrett D'Amore #include "runetype.h"
452d08521bSGarrett D'Amore #include "localeimpl.h"
464297a3b0SGarrett D'Amore #include "_ctype.h"
474297a3b0SGarrett D'Amore 
484297a3b0SGarrett D'Amore /*
492d08521bSGarrett D'Amore  * Note that the standard requires iswascii to be a macro, so it is defined
502d08521bSGarrett D'Amore  * in our headers.
514297a3b0SGarrett D'Amore  *
522d08521bSGarrett D'Amore  * We aliased (per Solaris) iswideogram, iswspecial, iswspecial to the
532d08521bSGarrett D'Amore  * equivalent values without "w".  The Solaris specific function isenglish()
542d08521bSGarrett D'Amore  * is here, but does not get an isw* equivalent.
552d08521bSGarrett D'Amore  *
562d08521bSGarrett D'Amore  * Note that various code assumes that "numbers" (iswdigit, iswxdigit)
572d08521bSGarrett D'Amore  * only return true for characters in the portable set.  While the assumption
582d08521bSGarrett D'Amore  * is not technically correct, it turns out that for all of our locales this
592d08521bSGarrett D'Amore  * is true.  iswhexnumber is aliased to iswxdigit.
604297a3b0SGarrett D'Amore  */
614297a3b0SGarrett D'Amore 
624297a3b0SGarrett D'Amore static int
__istype_l(locale_t loc,wint_t c,unsigned int f)632d08521bSGarrett D'Amore __istype_l(locale_t loc, wint_t c, unsigned int f)
644297a3b0SGarrett D'Amore {
65eda71b4aSGarrett D'Amore 	unsigned int rt;
664297a3b0SGarrett D'Amore 
674297a3b0SGarrett D'Amore 	if (c < 0 || c >= _CACHED_RUNES)
682d08521bSGarrett D'Amore 		rt = __runetype(loc->runelocale, c);
694297a3b0SGarrett D'Amore 	else
702d08521bSGarrett D'Amore 		rt = loc->runelocale->__runetype[c];
715ffb5900SGarrett D'Amore 	return (rt & f);
724297a3b0SGarrett D'Amore }
734297a3b0SGarrett D'Amore 
744297a3b0SGarrett D'Amore static int
__istype(wint_t c,unsigned int f)752d08521bSGarrett D'Amore __istype(wint_t c, unsigned int f)
764297a3b0SGarrett D'Amore {
772d08521bSGarrett D'Amore 	return (__istype_l(uselocale(NULL), c, f));
782d08521bSGarrett D'Amore }
794297a3b0SGarrett D'Amore 
802d08521bSGarrett D'Amore int
iswctype_l(wint_t wc,wctype_t class,locale_t loc)812d08521bSGarrett D'Amore iswctype_l(wint_t wc, wctype_t class, locale_t loc)
822d08521bSGarrett D'Amore {
832d08521bSGarrett D'Amore 	if (iswascii(wc))
842d08521bSGarrett D'Amore 		return (__ctype_mask[wc] & class);
852d08521bSGarrett D'Amore 	return (__istype_l(loc, wc, class));
864297a3b0SGarrett D'Amore }
874297a3b0SGarrett D'Amore 
884297a3b0SGarrett D'Amore #undef iswctype
894297a3b0SGarrett D'Amore int
iswctype(wint_t wc,wctype_t class)904297a3b0SGarrett D'Amore iswctype(wint_t wc, wctype_t class)
914297a3b0SGarrett D'Amore {
922d08521bSGarrett D'Amore 	/*
932d08521bSGarrett D'Amore 	 * Note that we don't just call iswctype_l because we optimize for
942d08521bSGarrett D'Amore 	 * the iswascii() case, so that most of the time we have no need to
952d08521bSGarrett D'Amore 	 * call uselocale().
962d08521bSGarrett D'Amore 	 */
972d08521bSGarrett D'Amore 	if (iswascii(wc))
982d08521bSGarrett D'Amore 		return (__ctype_mask[wc] & class);
99eda71b4aSGarrett D'Amore 	return (__istype(wc, class));
1004297a3b0SGarrett D'Amore }
1014297a3b0SGarrett D'Amore 
1022d08521bSGarrett D'Amore /*
1032d08521bSGarrett D'Amore  * This is a legacy version, baked into binaries.
1042d08521bSGarrett D'Amore  */
1054297a3b0SGarrett D'Amore #undef _iswctype
1064297a3b0SGarrett D'Amore unsigned
_iswctype(wchar_t wc,int class)1074297a3b0SGarrett D'Amore _iswctype(wchar_t wc, int class)
1084297a3b0SGarrett D'Amore {
1092d08521bSGarrett D'Amore 	if (iswascii(wc))
1102d08521bSGarrett D'Amore 		return (__ctype_mask[wc] & class);
111eda71b4aSGarrett D'Amore 	return (__istype((wint_t)wc, (unsigned int)class));
1124297a3b0SGarrett D'Amore }
1134297a3b0SGarrett D'Amore 
1142d08521bSGarrett D'Amore #define	DEFN_ISWTYPE(type, mask)		\
1152d08521bSGarrett D'Amore int						\
1162d08521bSGarrett D'Amore isw##type##_l(wint_t wc, locale_t loc)		\
1172d08521bSGarrett D'Amore {						\
1182d08521bSGarrett D'Amore 	return (iswascii(wc) ?			\
1192d08521bSGarrett D'Amore 		(__ctype_mask[wc] & (mask)) :	\
1202d08521bSGarrett D'Amore 		__istype_l(loc, wc, mask));	\
1212d08521bSGarrett D'Amore }						\
1222d08521bSGarrett D'Amore 						\
1232d08521bSGarrett D'Amore int						\
1242d08521bSGarrett D'Amore isw##type(wint_t wc)				\
1252d08521bSGarrett D'Amore {						\
1262d08521bSGarrett D'Amore 	return (iswascii(wc) ?			\
1272d08521bSGarrett D'Amore 		(__ctype_mask[wc] & (mask)) :	\
1282d08521bSGarrett D'Amore 		__istype(wc, mask));		\
1292d08521bSGarrett D'Amore }
1302d08521bSGarrett D'Amore 
1312d08521bSGarrett D'Amore /* kill off any macros */
1322d08521bSGarrett D'Amore #undef	iswalnum
1332d08521bSGarrett D'Amore #undef	iswalpha
1342d08521bSGarrett D'Amore #undef	iswblank
1352d08521bSGarrett D'Amore 
1362d08521bSGarrett D'Amore DEFN_ISWTYPE(alnum, _CTYPE_A|_CTYPE_D)
DEFN_ISWTYPE(alpha,_CTYPE_A)1372d08521bSGarrett D'Amore DEFN_ISWTYPE(alpha, _CTYPE_A)
1382d08521bSGarrett D'Amore DEFN_ISWTYPE(blank, _CTYPE_B)
1392d08521bSGarrett D'Amore DEFN_ISWTYPE(cntrl, _CTYPE_C)
1402d08521bSGarrett D'Amore DEFN_ISWTYPE(digit, _CTYPE_D)
141*81cc9994SLauri Tirkkonen DEFN_ISWTYPE(graph, _CTYPE_G)
1422d08521bSGarrett D'Amore DEFN_ISWTYPE(lower, _CTYPE_L)
1432d08521bSGarrett D'Amore DEFN_ISWTYPE(upper, _CTYPE_U)
1442d08521bSGarrett D'Amore DEFN_ISWTYPE(print, _CTYPE_R)
1452d08521bSGarrett D'Amore DEFN_ISWTYPE(punct, _CTYPE_P)
1462d08521bSGarrett D'Amore DEFN_ISWTYPE(space, _CTYPE_S)
1472d08521bSGarrett D'Amore DEFN_ISWTYPE(xdigit, _CTYPE_X)
1482d08521bSGarrett D'Amore DEFN_ISWTYPE(ideogram, _CTYPE_I)
1492d08521bSGarrett D'Amore DEFN_ISWTYPE(phonogram, _CTYPE_Q)
1502d08521bSGarrett D'Amore DEFN_ISWTYPE(special, _CTYPE_T)
1512d08521bSGarrett D'Amore DEFN_ISWTYPE(number, _CTYPE_N)
1522d08521bSGarrett D'Amore 
1532d08521bSGarrett D'Amore 
1542d08521bSGarrett D'Amore #undef iswhexnumber
1552d08521bSGarrett D'Amore #pragma weak iswhexnumber = iswxdigit
1562d08521bSGarrett D'Amore #pragma weak iswhexnumber_l = iswxdigit_l
1574297a3b0SGarrett D'Amore 
1584297a3b0SGarrett D'Amore #undef isideogram
1592d08521bSGarrett D'Amore #pragma weak isideogram = iswideogram
1604297a3b0SGarrett D'Amore 
1614297a3b0SGarrett D'Amore #undef isphonogram
1622d08521bSGarrett D'Amore #pragma weak isphonogram = iswphonogram
1634297a3b0SGarrett D'Amore 
1644297a3b0SGarrett D'Amore #undef isspecial
1652d08521bSGarrett D'Amore #pragma weak isspecial = iswspecial
1664297a3b0SGarrett D'Amore 
1674297a3b0SGarrett D'Amore #undef isnumber
1682d08521bSGarrett D'Amore #pragma weak isnumber = iswnumber
169163bd69bSGarrett D'Amore 
170163bd69bSGarrett D'Amore /*
171163bd69bSGarrett D'Amore  * FreeBSD has iswrune() for use by external programs, and this is used by
172163bd69bSGarrett D'Amore  * the "tr" program.  As that program is part of our consolidation, we
173163bd69bSGarrett D'Amore  * provide an _ILLUMOS_PRIVATE version of this function that we can use.
174163bd69bSGarrett D'Amore  *
175163bd69bSGarrett D'Amore  * No programs that are not part of the illumos stack itself should use
176163bd69bSGarrett D'Amore  * this function -- programs that do reference will not be portable to
177163bd69bSGarrett D'Amore  * other versions of SunOS or Solaris.
178163bd69bSGarrett D'Amore  */
179163bd69bSGarrett D'Amore int
180163bd69bSGarrett D'Amore __iswrune(wint_t wc)
181163bd69bSGarrett D'Amore {
182163bd69bSGarrett D'Amore 	/*
183163bd69bSGarrett D'Amore 	 * Note, FreeBSD ignored the low order byte, as they encode their
184163bd69bSGarrett D'Amore 	 * ctype values differently.  We can't do that (ctype is baked into
185163bd69bSGarrett D'Amore 	 * applications), but instead can just check if *any* bit is set in
186163bd69bSGarrett D'Amore 	 * the ctype.  Any bit being set indicates its a valid rune.
1872d08521bSGarrett D'Amore 	 *
1882d08521bSGarrett D'Amore 	 * NB: For ASCII all positions except NULL are runes.
189163bd69bSGarrett D'Amore 	 */
1902d08521bSGarrett D'Amore 	return (wc == 0 ? 0 : iswascii(wc) ? 1 : __istype(wc, 0xffffffffU));
1912d08521bSGarrett D'Amore }
1922d08521bSGarrett D'Amore 
1932d08521bSGarrett D'Amore /*
1942d08521bSGarrett D'Amore  * isenglish is a Solaris legacy.  No isw* equivalent.  Note that this most
1952d08521bSGarrett D'Amore  * likely doesn't work, as the locale data we have doesn't include it.  It
1962d08521bSGarrett D'Amore  * specifically is only valid for non-ASCII characters.  We're not sure this
1972d08521bSGarrett D'Amore  * is in actual use in the wild.
1982d08521bSGarrett D'Amore  */
1992d08521bSGarrett D'Amore #undef isenglish
2002d08521bSGarrett D'Amore int
isenglish(wint_t wc)2012d08521bSGarrett D'Amore isenglish(wint_t wc)
2022d08521bSGarrett D'Amore {
2032d08521bSGarrett D'Amore 	return (__istype(wc, _CTYPE_E));
204163bd69bSGarrett D'Amore }
205