1da2e3ebdSchin /***********************************************************************
2da2e3ebdSchin * *
3da2e3ebdSchin * This software is part of the ast package *
4*b30d1939SAndy Fiddaman * Copyright (c) 1985-2011 AT&T Intellectual Property *
5da2e3ebdSchin * and is licensed under the *
6*b30d1939SAndy Fiddaman * Eclipse Public License, Version 1.0 *
77c2fbfb3SApril Chin * by AT&T Intellectual Property *
8da2e3ebdSchin * *
9da2e3ebdSchin * A copy of the License is available at *
10*b30d1939SAndy Fiddaman * http://www.eclipse.org/org/documents/epl-v10.html *
11*b30d1939SAndy Fiddaman * (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12da2e3ebdSchin * *
13da2e3ebdSchin * Information and Software Systems Research *
14da2e3ebdSchin * AT&T Research *
15da2e3ebdSchin * Florham Park NJ *
16da2e3ebdSchin * *
17da2e3ebdSchin * Glenn Fowler <gsf@research.att.com> *
18da2e3ebdSchin * David Korn <dgk@research.att.com> *
19da2e3ebdSchin * Phong Vo <kpv@research.att.com> *
20da2e3ebdSchin * *
21da2e3ebdSchin ***********************************************************************/
22da2e3ebdSchin #pragma prototyped
23da2e3ebdSchin /*
24da2e3ebdSchin * RE character class support
25da2e3ebdSchin */
26da2e3ebdSchin
27da2e3ebdSchin #include "reglib.h"
28da2e3ebdSchin
29da2e3ebdSchin struct Ctype_s; typedef struct Ctype_s Ctype_t;
30da2e3ebdSchin
31da2e3ebdSchin struct Ctype_s
32da2e3ebdSchin {
33da2e3ebdSchin const char* name;
34da2e3ebdSchin size_t size;
35da2e3ebdSchin regclass_t ctype;
36da2e3ebdSchin Ctype_t* next;
37da2e3ebdSchin #if _lib_wctype
38da2e3ebdSchin wctype_t wtype;
39da2e3ebdSchin #endif
40da2e3ebdSchin };
41da2e3ebdSchin
42da2e3ebdSchin static Ctype_t* ctypes;
43da2e3ebdSchin
44da2e3ebdSchin /*
45da2e3ebdSchin * this stuff gets around posix failure to define isblank,
46da2e3ebdSchin * and the fact that ctype functions are macros
47da2e3ebdSchin * and any local extensions that may not even have functions or macros
48da2e3ebdSchin */
49da2e3ebdSchin
50da2e3ebdSchin #if _need_iswblank
51da2e3ebdSchin
52da2e3ebdSchin int
_reg_iswblank(wint_t wc)53da2e3ebdSchin _reg_iswblank(wint_t wc)
54da2e3ebdSchin {
55da2e3ebdSchin static int initialized;
56da2e3ebdSchin static wctype_t wt;
57da2e3ebdSchin
58da2e3ebdSchin if (!initialized)
59da2e3ebdSchin {
60da2e3ebdSchin initialized = 1;
61da2e3ebdSchin wt = wctype("blank");
62da2e3ebdSchin }
63da2e3ebdSchin return iswctype(wc, wt);
64da2e3ebdSchin }
65da2e3ebdSchin
66da2e3ebdSchin #endif
67da2e3ebdSchin
Isalnum(int c)68da2e3ebdSchin static int Isalnum(int c) { return iswalnum(c); }
Isalpha(int c)69da2e3ebdSchin static int Isalpha(int c) { return iswalpha(c); }
Isblank(int c)70da2e3ebdSchin static int Isblank(int c) { return iswblank(c); }
Iscntrl(int c)71da2e3ebdSchin static int Iscntrl(int c) { return iswcntrl(c); }
Isdigit(int c)72da2e3ebdSchin static int Isdigit(int c) { return iswdigit(c); }
Notdigit(int c)73da2e3ebdSchin static int Notdigit(int c) { return !iswdigit(c); }
Isgraph(int c)74da2e3ebdSchin static int Isgraph(int c) { return iswgraph(c); }
Islower(int c)75da2e3ebdSchin static int Islower(int c) { return iswlower(c); }
Isprint(int c)76da2e3ebdSchin static int Isprint(int c) { return iswprint(c); }
Ispunct(int c)77da2e3ebdSchin static int Ispunct(int c) { return iswpunct(c); }
Isspace(int c)78da2e3ebdSchin static int Isspace(int c) { return iswspace(c); }
Notspace(int c)79da2e3ebdSchin static int Notspace(int c) { return !iswspace(c); }
Isupper(int c)80da2e3ebdSchin static int Isupper(int c) { return iswupper(c); }
Isword(int c)81da2e3ebdSchin static int Isword(int c) { return iswalnum(c) || c == '_'; }
Notword(int c)82da2e3ebdSchin static int Notword(int c) { return !iswalnum(c) && c != '_'; }
Isxdigit(int c)83da2e3ebdSchin static int Isxdigit(int c) { return iswxdigit(c);}
84da2e3ebdSchin
85da2e3ebdSchin #if _lib_wctype
86da2e3ebdSchin
87da2e3ebdSchin static int Is_wc_1(int);
88da2e3ebdSchin static int Is_wc_2(int);
89da2e3ebdSchin static int Is_wc_3(int);
90da2e3ebdSchin static int Is_wc_4(int);
91da2e3ebdSchin static int Is_wc_5(int);
92da2e3ebdSchin static int Is_wc_6(int);
93da2e3ebdSchin static int Is_wc_7(int);
94da2e3ebdSchin static int Is_wc_8(int);
95*b30d1939SAndy Fiddaman static int Is_wc_9(int);
96*b30d1939SAndy Fiddaman static int Is_wc_10(int);
97*b30d1939SAndy Fiddaman static int Is_wc_11(int);
98*b30d1939SAndy Fiddaman static int Is_wc_12(int);
99*b30d1939SAndy Fiddaman static int Is_wc_13(int);
100*b30d1939SAndy Fiddaman static int Is_wc_14(int);
101*b30d1939SAndy Fiddaman static int Is_wc_15(int);
102*b30d1939SAndy Fiddaman static int Is_wc_16(int);
103da2e3ebdSchin
104da2e3ebdSchin #endif
105da2e3ebdSchin
106da2e3ebdSchin #define SZ(s) s,(sizeof(s)-1)
107da2e3ebdSchin
108da2e3ebdSchin static Ctype_t ctype[] =
109da2e3ebdSchin {
110da2e3ebdSchin { SZ("alnum"), Isalnum },
111da2e3ebdSchin { SZ("alpha"), Isalpha },
112da2e3ebdSchin { SZ("blank"), Isblank },
113da2e3ebdSchin { SZ("cntrl"), Iscntrl },
114da2e3ebdSchin { SZ("digit"), Isdigit },
115da2e3ebdSchin { SZ("graph"), Isgraph },
116da2e3ebdSchin { SZ("lower"), Islower },
117da2e3ebdSchin { SZ("print"), Isprint },
118da2e3ebdSchin { SZ("punct"), Ispunct },
119da2e3ebdSchin { SZ("space"), Isspace },
120da2e3ebdSchin { SZ("upper"), Isupper },
121da2e3ebdSchin { SZ("word"), Isword },
122da2e3ebdSchin { SZ("xdigit"),Isxdigit},
123*b30d1939SAndy Fiddaman
124*b30d1939SAndy Fiddaman #define CTYPES 13
125*b30d1939SAndy Fiddaman
126da2e3ebdSchin #if _lib_wctype
127da2e3ebdSchin { 0, 0, Is_wc_1 },
128da2e3ebdSchin { 0, 0, Is_wc_2 },
129da2e3ebdSchin { 0, 0, Is_wc_3 },
130da2e3ebdSchin { 0, 0, Is_wc_4 },
131da2e3ebdSchin { 0, 0, Is_wc_5 },
132da2e3ebdSchin { 0, 0, Is_wc_6 },
133da2e3ebdSchin { 0, 0, Is_wc_7 },
134da2e3ebdSchin { 0, 0, Is_wc_8 },
135*b30d1939SAndy Fiddaman { 0, 0, Is_wc_9 },
136*b30d1939SAndy Fiddaman { 0, 0, Is_wc_10 },
137*b30d1939SAndy Fiddaman { 0, 0, Is_wc_11 },
138*b30d1939SAndy Fiddaman { 0, 0, Is_wc_12 },
139*b30d1939SAndy Fiddaman { 0, 0, Is_wc_13 },
140*b30d1939SAndy Fiddaman { 0, 0, Is_wc_14 },
141*b30d1939SAndy Fiddaman { 0, 0, Is_wc_15 },
142*b30d1939SAndy Fiddaman { 0, 0, Is_wc_16 },
143*b30d1939SAndy Fiddaman
144*b30d1939SAndy Fiddaman #define WTYPES 16
145*b30d1939SAndy Fiddaman
146*b30d1939SAndy Fiddaman #else
147*b30d1939SAndy Fiddaman
148*b30d1939SAndy Fiddaman #define WTYPES 0
149*b30d1939SAndy Fiddaman
150da2e3ebdSchin #endif
151da2e3ebdSchin };
152da2e3ebdSchin
153da2e3ebdSchin #if _lib_wctype
154da2e3ebdSchin
Is_wc_1(int c)155da2e3ebdSchin static int Is_wc_1(int c) { return iswctype(c, ctype[CTYPES+0].wtype); }
Is_wc_2(int c)156da2e3ebdSchin static int Is_wc_2(int c) { return iswctype(c, ctype[CTYPES+1].wtype); }
Is_wc_3(int c)157da2e3ebdSchin static int Is_wc_3(int c) { return iswctype(c, ctype[CTYPES+2].wtype); }
Is_wc_4(int c)158da2e3ebdSchin static int Is_wc_4(int c) { return iswctype(c, ctype[CTYPES+3].wtype); }
Is_wc_5(int c)159da2e3ebdSchin static int Is_wc_5(int c) { return iswctype(c, ctype[CTYPES+4].wtype); }
Is_wc_6(int c)160da2e3ebdSchin static int Is_wc_6(int c) { return iswctype(c, ctype[CTYPES+5].wtype); }
Is_wc_7(int c)161da2e3ebdSchin static int Is_wc_7(int c) { return iswctype(c, ctype[CTYPES+6].wtype); }
Is_wc_8(int c)162da2e3ebdSchin static int Is_wc_8(int c) { return iswctype(c, ctype[CTYPES+7].wtype); }
Is_wc_9(int c)163*b30d1939SAndy Fiddaman static int Is_wc_9(int c) { return iswctype(c, ctype[CTYPES+8].wtype); }
Is_wc_10(int c)164*b30d1939SAndy Fiddaman static int Is_wc_10(int c) { return iswctype(c, ctype[CTYPES+9].wtype); }
Is_wc_11(int c)165*b30d1939SAndy Fiddaman static int Is_wc_11(int c) { return iswctype(c, ctype[CTYPES+10].wtype); }
Is_wc_12(int c)166*b30d1939SAndy Fiddaman static int Is_wc_12(int c) { return iswctype(c, ctype[CTYPES+11].wtype); }
Is_wc_13(int c)167*b30d1939SAndy Fiddaman static int Is_wc_13(int c) { return iswctype(c, ctype[CTYPES+12].wtype); }
Is_wc_14(int c)168*b30d1939SAndy Fiddaman static int Is_wc_14(int c) { return iswctype(c, ctype[CTYPES+13].wtype); }
Is_wc_15(int c)169*b30d1939SAndy Fiddaman static int Is_wc_15(int c) { return iswctype(c, ctype[CTYPES+14].wtype); }
Is_wc_16(int c)170*b30d1939SAndy Fiddaman static int Is_wc_16(int c) { return iswctype(c, ctype[CTYPES+15].wtype); }
171da2e3ebdSchin
172da2e3ebdSchin #endif
173da2e3ebdSchin
174da2e3ebdSchin /*
175da2e3ebdSchin * return pointer to ctype function for :class:] in s
176da2e3ebdSchin * s points to the first char after the initial [
177*b30d1939SAndy Fiddaman * dynamic wctype classes are locale-specific
178*b30d1939SAndy Fiddaman * dynamic entry locale is punned in Ctype_t.next
179*b30d1939SAndy Fiddaman * the search does a lazy (one entry at a time) flush on locale mismatch
180da2e3ebdSchin * if e!=0 it points to next char in s
181da2e3ebdSchin * 0 returned on error
182da2e3ebdSchin */
183da2e3ebdSchin
184da2e3ebdSchin regclass_t
regclass(const char * s,char ** e)185da2e3ebdSchin regclass(const char* s, char** e)
186da2e3ebdSchin {
187da2e3ebdSchin register Ctype_t* cp;
188da2e3ebdSchin register int c;
189da2e3ebdSchin register size_t n;
190da2e3ebdSchin register const char* t;
191*b30d1939SAndy Fiddaman Ctype_t* lc;
192*b30d1939SAndy Fiddaman Ctype_t* xp;
193*b30d1939SAndy Fiddaman Ctype_t* zp;
194da2e3ebdSchin
195*b30d1939SAndy Fiddaman if (!(c = *s++))
196*b30d1939SAndy Fiddaman return 0;
197*b30d1939SAndy Fiddaman for (t = s; *t && (*t != c || *(t + 1) != ']'); t++);
198*b30d1939SAndy Fiddaman if (*t != c || !(n = t - s))
199*b30d1939SAndy Fiddaman return 0;
200*b30d1939SAndy Fiddaman for (cp = ctypes; cp; cp = cp->next)
201*b30d1939SAndy Fiddaman if (n == cp->size && strneq(s, cp->name, n))
202*b30d1939SAndy Fiddaman goto found;
203*b30d1939SAndy Fiddaman xp = zp = 0;
204*b30d1939SAndy Fiddaman lc = (Ctype_t*)setlocale(LC_CTYPE, NiL);
205*b30d1939SAndy Fiddaman for (cp = ctype; cp < &ctype[elementsof(ctype)]; cp++)
206da2e3ebdSchin {
207da2e3ebdSchin #if _lib_wctype
208*b30d1939SAndy Fiddaman if (!zp)
209*b30d1939SAndy Fiddaman {
210*b30d1939SAndy Fiddaman if (!cp->size)
211*b30d1939SAndy Fiddaman zp = cp;
212*b30d1939SAndy Fiddaman else if (!xp && cp->next && cp->next != lc)
213*b30d1939SAndy Fiddaman xp = cp;
214*b30d1939SAndy Fiddaman }
215da2e3ebdSchin #endif
216*b30d1939SAndy Fiddaman if (n == cp->size && strneq(s, cp->name, n) && (!cp->next || cp->next == lc))
217*b30d1939SAndy Fiddaman goto found;
218*b30d1939SAndy Fiddaman }
219*b30d1939SAndy Fiddaman #if _lib_wctype
220*b30d1939SAndy Fiddaman if (!(cp = zp))
221*b30d1939SAndy Fiddaman {
222*b30d1939SAndy Fiddaman if (!(cp = xp))
223*b30d1939SAndy Fiddaman return 0;
224*b30d1939SAndy Fiddaman cp->size = 0;
225*b30d1939SAndy Fiddaman if (!streq(cp->name, s))
226*b30d1939SAndy Fiddaman {
227*b30d1939SAndy Fiddaman free((char*)cp->name);
228*b30d1939SAndy Fiddaman cp->name = 0;
229da2e3ebdSchin }
230da2e3ebdSchin }
231*b30d1939SAndy Fiddaman if (!cp->name)
232*b30d1939SAndy Fiddaman {
233*b30d1939SAndy Fiddaman if (!(cp->name = (const char*)memdup(s, n + 1)))
234*b30d1939SAndy Fiddaman return 0;
235*b30d1939SAndy Fiddaman *((char*)cp->name + n) = 0;
236*b30d1939SAndy Fiddaman }
237*b30d1939SAndy Fiddaman /* mvs.390 needs the (char*) cast -- barf */
238*b30d1939SAndy Fiddaman if (!(cp->wtype = wctype((char*)cp->name)))
239*b30d1939SAndy Fiddaman {
240*b30d1939SAndy Fiddaman free((char*)cp->name);
241*b30d1939SAndy Fiddaman cp->name = 0;
242*b30d1939SAndy Fiddaman return 0;
243*b30d1939SAndy Fiddaman }
244*b30d1939SAndy Fiddaman cp->size = n;
245*b30d1939SAndy Fiddaman cp->next = lc;
246*b30d1939SAndy Fiddaman #endif
247da2e3ebdSchin found:
248da2e3ebdSchin if (e)
249da2e3ebdSchin *e = (char*)t + 2;
250da2e3ebdSchin return cp->ctype;
251da2e3ebdSchin }
252da2e3ebdSchin
253da2e3ebdSchin /*
254da2e3ebdSchin * associate the ctype function fun with name
255da2e3ebdSchin */
256da2e3ebdSchin
257da2e3ebdSchin int
regaddclass(const char * name,regclass_t fun)258da2e3ebdSchin regaddclass(const char* name, regclass_t fun)
259da2e3ebdSchin {
260da2e3ebdSchin register Ctype_t* cp;
261da2e3ebdSchin register Ctype_t* np;
262da2e3ebdSchin register size_t n;
263da2e3ebdSchin
264da2e3ebdSchin n = strlen(name);
265da2e3ebdSchin for (cp = ctypes; cp; cp = cp->next)
266da2e3ebdSchin if (cp->size == n && strneq(name, cp->name, n))
267da2e3ebdSchin {
268da2e3ebdSchin cp->ctype = fun;
269da2e3ebdSchin return 0;
270da2e3ebdSchin }
271da2e3ebdSchin if (!(np = newof(0, Ctype_t, 1, n + 1)))
272da2e3ebdSchin return REG_ESPACE;
273da2e3ebdSchin np->size = n;
274da2e3ebdSchin np->name = strcpy((char*)(np + 1), name);
275da2e3ebdSchin np->ctype = fun;
276da2e3ebdSchin np->next = ctypes;
277da2e3ebdSchin ctypes = np;
278da2e3ebdSchin return 0;
279da2e3ebdSchin }
280da2e3ebdSchin
281da2e3ebdSchin /*
282da2e3ebdSchin * return pointer to ctype function for token
283da2e3ebdSchin */
284da2e3ebdSchin
285da2e3ebdSchin regclass_t
classfun(int type)286da2e3ebdSchin classfun(int type)
287da2e3ebdSchin {
288da2e3ebdSchin switch (type)
289da2e3ebdSchin {
290da2e3ebdSchin case T_ALNUM: return Isword;
291da2e3ebdSchin case T_ALNUM_NOT: return Notword;
292da2e3ebdSchin case T_DIGIT: return Isdigit;
293da2e3ebdSchin case T_DIGIT_NOT: return Notdigit;
294da2e3ebdSchin case T_SPACE: return Isspace;
295da2e3ebdSchin case T_SPACE_NOT: return Notspace;
296da2e3ebdSchin }
297da2e3ebdSchin return 0;
298da2e3ebdSchin }
299