1 /*
2  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
3  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  * (c) UNIX System Laboratories, Inc.
7  * All or some portions of this file are derived from material licensed
8  * to the University of California by American Telephone and Telegraph
9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10  * the permission of UNIX System Laboratories, Inc.
11  *
12  * This code is derived from software contributed to Berkeley by
13  * Paul Borman at Krystal Technologies.
14  *
15  * Redistribution and use in source and binary forms, with or without
16  * modification, are permitted provided that the following conditions
17  * are met:
18  * 1. Redistributions of source code must retain the above copyright
19  *    notice, this list of conditions and the following disclaimer.
20  * 2. Redistributions in binary form must reproduce the above copyright
21  *    notice, this list of conditions and the following disclaimer in the
22  *    documentation and/or other materials provided with the distribution.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #include "lint.h"
41 #include <wctype.h>
42 #include <locale.h>
43 #include "runefile.h"
44 #include "runetype.h"
45 #include "localeimpl.h"
46 #include "_ctype.h"
47 
48 /*
49  * Note that the standard requires iswascii to be a macro, so it is defined
50  * in our headers.
51  *
52  * We aliased (per Solaris) iswideogram, iswspecial, iswspecial to the
53  * equivalent values without "w".  The Solaris specific function isenglish()
54  * is here, but does not get an isw* equivalent.
55  *
56  * Note that various code assumes that "numbers" (iswdigit, iswxdigit)
57  * only return true for characters in the portable set.  While the assumption
58  * is not technically correct, it turns out that for all of our locales this
59  * is true.  iswhexnumber is aliased to iswxdigit.
60  */
61 
62 static int
__istype_l(locale_t loc,wint_t c,unsigned int f)63 __istype_l(locale_t loc, wint_t c, unsigned int f)
64 {
65 	unsigned int rt;
66 
67 	if (c < 0 || c >= _CACHED_RUNES)
68 		rt = __runetype(loc->runelocale, c);
69 	else
70 		rt = loc->runelocale->__runetype[c];
71 	return (rt & f);
72 }
73 
74 static int
__istype(wint_t c,unsigned int f)75 __istype(wint_t c, unsigned int f)
76 {
77 	return (__istype_l(uselocale(NULL), c, f));
78 }
79 
80 int
iswctype_l(wint_t wc,wctype_t class,locale_t loc)81 iswctype_l(wint_t wc, wctype_t class, locale_t loc)
82 {
83 	if (iswascii(wc))
84 		return (__ctype_mask[wc] & class);
85 	return (__istype_l(loc, wc, class));
86 }
87 
88 #undef iswctype
89 int
iswctype(wint_t wc,wctype_t class)90 iswctype(wint_t wc, wctype_t class)
91 {
92 	/*
93 	 * Note that we don't just call iswctype_l because we optimize for
94 	 * the iswascii() case, so that most of the time we have no need to
95 	 * call uselocale().
96 	 */
97 	if (iswascii(wc))
98 		return (__ctype_mask[wc] & class);
99 	return (__istype(wc, class));
100 }
101 
102 /*
103  * This is a legacy version, baked into binaries.
104  */
105 #undef _iswctype
106 unsigned
_iswctype(wchar_t wc,int class)107 _iswctype(wchar_t wc, int class)
108 {
109 	if (iswascii(wc))
110 		return (__ctype_mask[wc] & class);
111 	return (__istype((wint_t)wc, (unsigned int)class));
112 }
113 
114 #define	DEFN_ISWTYPE(type, mask)		\
115 int						\
116 isw##type##_l(wint_t wc, locale_t loc)		\
117 {						\
118 	return (iswascii(wc) ?			\
119 		(__ctype_mask[wc] & (mask)) :	\
120 		__istype_l(loc, wc, mask));	\
121 }						\
122 						\
123 int						\
124 isw##type(wint_t wc)				\
125 {						\
126 	return (iswascii(wc) ?			\
127 		(__ctype_mask[wc] & (mask)) :	\
128 		__istype(wc, mask));		\
129 }
130 
131 /* kill off any macros */
132 #undef	iswalnum
133 #undef	iswalpha
134 #undef	iswblank
135 
136 DEFN_ISWTYPE(alnum, _CTYPE_A|_CTYPE_D)
DEFN_ISWTYPE(alpha,_CTYPE_A)137 DEFN_ISWTYPE(alpha, _CTYPE_A)
138 DEFN_ISWTYPE(blank, _CTYPE_B)
139 DEFN_ISWTYPE(cntrl, _CTYPE_C)
140 DEFN_ISWTYPE(digit, _CTYPE_D)
141 DEFN_ISWTYPE(graph, _CTYPE_G)
142 DEFN_ISWTYPE(lower, _CTYPE_L)
143 DEFN_ISWTYPE(upper, _CTYPE_U)
144 DEFN_ISWTYPE(print, _CTYPE_R)
145 DEFN_ISWTYPE(punct, _CTYPE_P)
146 DEFN_ISWTYPE(space, _CTYPE_S)
147 DEFN_ISWTYPE(xdigit, _CTYPE_X)
148 DEFN_ISWTYPE(ideogram, _CTYPE_I)
149 DEFN_ISWTYPE(phonogram, _CTYPE_Q)
150 DEFN_ISWTYPE(special, _CTYPE_T)
151 DEFN_ISWTYPE(number, _CTYPE_N)
152 
153 
154 #undef iswhexnumber
155 #pragma weak iswhexnumber = iswxdigit
156 #pragma weak iswhexnumber_l = iswxdigit_l
157 
158 #undef isideogram
159 #pragma weak isideogram = iswideogram
160 
161 #undef isphonogram
162 #pragma weak isphonogram = iswphonogram
163 
164 #undef isspecial
165 #pragma weak isspecial = iswspecial
166 
167 #undef isnumber
168 #pragma weak isnumber = iswnumber
169 
170 /*
171  * FreeBSD has iswrune() for use by external programs, and this is used by
172  * the "tr" program.  As that program is part of our consolidation, we
173  * provide an _ILLUMOS_PRIVATE version of this function that we can use.
174  *
175  * No programs that are not part of the illumos stack itself should use
176  * this function -- programs that do reference will not be portable to
177  * other versions of SunOS or Solaris.
178  */
179 int
180 __iswrune(wint_t wc)
181 {
182 	/*
183 	 * Note, FreeBSD ignored the low order byte, as they encode their
184 	 * ctype values differently.  We can't do that (ctype is baked into
185 	 * applications), but instead can just check if *any* bit is set in
186 	 * the ctype.  Any bit being set indicates its a valid rune.
187 	 *
188 	 * NB: For ASCII all positions except NULL are runes.
189 	 */
190 	return (wc == 0 ? 0 : iswascii(wc) ? 1 : __istype(wc, 0xffffffffU));
191 }
192 
193 /*
194  * isenglish is a Solaris legacy.  No isw* equivalent.  Note that this most
195  * likely doesn't work, as the locale data we have doesn't include it.  It
196  * specifically is only valid for non-ASCII characters.  We're not sure this
197  * is in actual use in the wild.
198  */
199 #undef isenglish
200 int
isenglish(wint_t wc)201 isenglish(wint_t wc)
202 {
203 	return (__istype(wc, _CTYPE_E));
204 }
205