1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1985-2012 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 
24 /*
25  * D. G. Korn
26  * G. S. Fowler
27  * AT&T Research
28  *
29  * match shell file patterns
30  * this interface is a wrapper on regex
31  *
32  *	sh pattern	egrep RE	description
33  *	----------	--------	-----------
34  *	*		.*		0 or more chars
35  *	?		.		any single char
36  *	[.]		[.]		char class
37  *	[!.]		[^.]		negated char class
38  *	[[:.:]]		[[:.:]]		ctype class
39  *	[[=.=]]		[[=.=]]		equivalence class
40  *	[[...]]		[[...]]		collation element
41  *	*(.)		(.)*		0 or more of
42  *	+(.)		(.)+		1 or more of
43  *	?(.)		(.)?		0 or 1 of
44  *	(.)		(.)		1 of
45  *	@(.)		(.)		1 of
46  *	a|b		a|b		a or b
47  *	\#				() subgroup back reference [1-9]
48  *	a&b				a and b
49  *	!(.)				none of
50  *
51  * \ used to escape metacharacters
52  *
53  *	*, ?, (, |, &, ), [, \ must be \'d outside of [...]
54  *	only ] must be \'d inside [...]
55  *
56  */
57 
58 #include <ast.h>
59 #include <regex.h>
60 
61 static struct State_s
62 {
63 	regmatch_t*	match;
64 	int		nmatch;
65 } matchstate;
66 
67 #define STR_INT		040000
68 
69 /*
70  * subgroup match
71  * 0 returned if no match
72  * otherwise number of subgroups matched returned
73  * match group begin offsets are even elements of sub
74  * match group end offsets are odd elements of sub
75  * the matched string is from s+sub[0] up to but not
76  * including s+sub[1]
77  */
78 
79 int
strgrpmatch(const char * b,const char * p,ssize_t * sub,int n,register int flags)80 strgrpmatch(const char* b, const char* p, ssize_t* sub, int n, register int flags)
81 {
82 	register regex_t*	re;
83 	register ssize_t*	end;
84 	register int		i;
85 	register regflags_t	reflags;
86 
87 	/*
88 	 * 0 and empty patterns are special
89 	 */
90 
91 	if (!p || !b)
92 	{
93 		if (!p && !b)
94 			regcache(NiL, 0, NiL);
95 		return 0;
96 	}
97 	if (!*p)
98 	{
99 		if (sub && n > 0)
100 		{
101 			if (flags & STR_INT)
102 			{
103 				int*	subi = (int*)sub;
104 
105 				subi[0] = subi[1] = 0;
106 			}
107 			else
108 				sub[0] = sub[1] = 0;
109 		}
110 		return *b == 0;
111 	}
112 
113 	/*
114 	 * convert flags
115 	 */
116 
117 	if (flags & REG_ADVANCE)
118 		reflags = flags & ~REG_ADVANCE;
119 	else
120 	{
121 		reflags = REG_SHELL|REG_AUGMENTED;
122 		if (!(flags & STR_MAXIMAL))
123 			reflags |= REG_MINIMAL;
124 		if (flags & STR_GROUP)
125 			reflags |= REG_SHELL_GROUP;
126 		if (flags & STR_LEFT)
127 			reflags |= REG_LEFT;
128 		if (flags & STR_RIGHT)
129 			reflags |= REG_RIGHT;
130 		if (flags & STR_ICASE)
131 			reflags |= REG_ICASE;
132 	}
133 	if (!sub || n <= 0)
134 		reflags |= REG_NOSUB;
135 	if (!(re = regcache(p, reflags, NiL)))
136 		return 0;
137 	if (n > matchstate.nmatch)
138 	{
139 		if (!(matchstate.match = newof(matchstate.match, regmatch_t, n, 0)))
140 			return 0;
141 		matchstate.nmatch = n;
142 	}
143 	if (regexec(re, b, n, matchstate.match, reflags & ~(REG_MINIMAL|REG_SHELL_GROUP|REG_LEFT|REG_RIGHT|REG_ICASE)))
144 		return 0;
145 	if (!sub || n <= 0)
146 		return 1;
147 	i = re->re_nsub;
148 	if (flags & STR_INT)
149 	{
150 		int*	subi = (int*)sub;
151 		int*	endi = subi + n * 2;
152 
153 		for (n = 0; subi < endi && n <= i; n++)
154 		{
155 			*subi++ = matchstate.match[n].rm_so;
156 			*subi++ = matchstate.match[n].rm_eo;
157 		}
158 	}
159 	else
160 	{
161 		end = sub + n * 2;
162 		for (n = 0; sub < end && n <= i; n++)
163 		{
164 			*sub++ = matchstate.match[n].rm_so;
165 			*sub++ = matchstate.match[n].rm_eo;
166 		}
167 	}
168 	return i + 1;
169 }
170 
171 /*
172  * compare the string s with the shell pattern p
173  * returns 1 for match 0 otherwise
174  */
175 
176 int
strmatch(const char * s,const char * p)177 strmatch(const char* s, const char* p)
178 {
179 	return strgrpmatch(s, p, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT);
180 }
181 
182 /*
183  * leading substring match
184  * first char after end of substring returned
185  * 0 returned if no match
186  *
187  * OBSOLETE: use strgrpmatch()
188  */
189 
190 char*
strsubmatch(const char * s,const char * p,int flags)191 strsubmatch(const char* s, const char* p, int flags)
192 {
193 	ssize_t	match[2];
194 
195 	return strgrpmatch(s, p, match, 1, (flags ? STR_MAXIMAL : 0)|STR_LEFT) ? (char*)s + match[1] : (char*)0;
196 }
197 
198 #undef	strgrpmatch
199 #if _map_libc
200 #define strgrpmatch	_ast_strgrpmatch
201 #endif
202 
203 int
strgrpmatch(const char * b,const char * p,int * sub,int n,int flags)204 strgrpmatch(const char* b, const char* p, int* sub, int n, int flags)
205 {
206 	return strgrpmatch_20120528(b, p, (ssize_t*)sub, n, flags|STR_INT);
207 }
208