1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1985-2012 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 
24 /*
25  * D. G. Korn
26  * G. S. Fowler
27  * AT&T Research
28  *
29  * match shell file patterns -- derived from Bourne and Korn shell gmatch()
30  *
31  *	sh pattern	egrep RE	description
32  *	----------	--------	-----------
33  *	*		.*		0 or more chars
34  *	?		.		any single char
35  *	[.]		[.]		char class
36  *	[!.]		[^.]		negated char class
37  *	[[:.:]]		[[:.:]]		ctype class
38  *	[[=.=]]		[[=.=]]		equivalence class
39  *	[[...]]		[[...]]		collation element
40  *	*(.)		(.)*		0 or more of
41  *	+(.)		(.)+		1 or more of
42  *	?(.)		(.)?		0 or 1 of
43  *	(.)		(.)		1 of
44  *	@(.)		(.)		1 of
45  *	a|b		a|b		a or b
46  *	\#				() subgroup back reference [1-9]
47  *	a&b				a and b
48  *	!(.)				none of
49  *
50  * \ used to escape metacharacters
51  *
52  *	*, ?, (, |, &, ), [, \ must be \'d outside of [...]
53  *	only ] must be \'d inside [...]
54  *
55  * BUG: unbalanced ) terminates top level pattern
56  */
57 
58 #include <ast.h>
59 #include <ctype.h>
60 #include <hashkey.h>
61 
62 #ifndef	isblank
63 #define	isblank(x)	((x)==' '||(x)=='\t')
64 #endif
65 
66 #ifndef isgraph
67 #define	isgraph(x)	(isprint(x)&&!isblank(x))
68 #endif
69 
70 #define MAXGROUP	10
71 
72 typedef struct
73 {
74 	char*		beg[MAXGROUP];
75 	char*		end[MAXGROUP];
76 	char*		next_s;
77 	short		groups;
78 } Group_t;
79 
80 typedef struct
81 {
82 	Group_t		current;
83 	Group_t		best;
84 	char*		last_s;
85 	char*		next_p;
86 } Match_t;
87 
88 #define mbgetchar(p)	(*p++)
89 
90 #ifndef isxdigit
91 #define isxdigit(c)	((c)>='0'&&(c)<='9'||(c)>='a'&&(c)<='f'||(c)>='A'&&(c)<='F')
92 #endif
93 
94 #define getsource(s,e)	(((s)>=(e))?0:mbgetchar(s))
95 
96 #define COLL_MAX	3
97 
98 /*
99  * gobble chars up to <sub> or ) keeping track of (...) and [...]
100  * sub must be one of { '|', '&', 0 }
101  * 0 returned if s runs out
102  */
103 
104 static char*
gobble(Match_t * mp,register char * s,register int sub,int * g,int clear)105 gobble(Match_t* mp, register char* s, register int sub, int* g, int clear)
106 {
107 	register int	p = 0;
108 	register char*	b = 0;
109 	int		c = 0;
110 	int		n;
111 
112 	for (;;)
113 		switch (mbgetchar(s))
114 		{
115 		case '\\':
116 			if (mbgetchar(s))
117 				break;
118 			/*FALLTHROUGH*/
119 		case 0:
120 			return 0;
121 		case '[':
122 			if (!b)
123 			{
124 				if (*s == '!' || *s == '^')
125 					mbgetchar(s);
126 				b = s;
127 			}
128 			else if (*s == '.' || *s == '=' || *s == ':')
129 				c = *s;
130 			break;
131 		case ']':
132 			if (b)
133 			{
134 				if (*(s - 2) == c)
135 					c = 0;
136 				else if (b != (s - 1))
137 					b = 0;
138 			}
139 			break;
140 		case '(':
141 			if (!b)
142 			{
143 				p++;
144 				n = (*g)++;
145 				if (clear)
146 				{
147 					if (!sub)
148 						n++;
149 					if (n < MAXGROUP)
150 						mp->current.beg[n] = mp->current.end[n] = 0;
151 				}
152 			}
153 			break;
154 		case ')':
155 			if (!b && p-- <= 0)
156 				return sub ? 0 : s;
157 			break;
158 		case '|':
159 			if (!b && !p && sub == '|')
160 				return s;
161 			break;
162 		}
163 }
164 
165 static int	grpmatch(Match_t*, int, char*, register char*, char*, int);
166 
167 /*
168  * match a single pattern
169  * e is the end (0) of the substring in s
170  * r marks the start of a repeated subgroup pattern
171  */
172 
173 static int
onematch(Match_t * mp,int g,char * s,char * p,char * e,char * r,int flags)174 onematch(Match_t* mp, int g, char* s, char* p, char* e, char* r, int flags)
175 {
176 	register int 	pc;
177 	register int 	sc;
178 	register int	n;
179 	register int	icase;
180 	char*		olds;
181 	char*		oldp;
182 
183 	icase = flags & STR_ICASE;
184 	do
185 	{
186 		olds = s;
187 		sc = getsource(s, e);
188 		if (icase && isupper(sc))
189 			sc = tolower(sc);
190 		oldp = p;
191 		switch (pc = mbgetchar(p))
192 		{
193 		case '(':
194 		case '*':
195 		case '?':
196 		case '+':
197 		case '@':
198 		case '!':
199 			if (pc == '(' || *p == '(')
200 			{
201 				char*	subp;
202 				int	oldg;
203 
204 				s = olds;
205 				subp = p + (pc != '(');
206 				oldg = g;
207 				n = ++g;
208 				if (g < MAXGROUP && (!r || g > mp->current.groups))
209 					mp->current.beg[g] = mp->current.end[g] = 0;
210 				if (!(p = gobble(mp, subp, 0, &g, !r)))
211 					return 0;
212 				if (pc == '*' || pc == '?' || pc == '+' && oldp == r)
213 				{
214 					if (onematch(mp, g, s, p, e, NiL, flags))
215 						return 1;
216 					if (!sc || !getsource(s, e))
217 					{
218 						mp->current.groups = oldg;
219 						return 0;
220 					}
221 				}
222 				if (pc == '*' || pc == '+')
223 				{
224 					p = oldp;
225 					sc = n - 1;
226 				}
227 				else
228 					sc = g;
229 				pc = (pc != '!');
230 				do
231 				{
232 					if (grpmatch(mp, n, olds, subp, s, flags) == pc)
233 					{
234 						if (n < MAXGROUP)
235 						{
236 							if (!mp->current.beg[n] || mp->current.beg[n] > olds)
237 								mp->current.beg[n] = olds;
238 							if (s > mp->current.end[n])
239 								mp->current.end[n] = s;
240 						}
241 						if (onematch(mp, sc, s, p, e, oldp, flags))
242 						{
243 							if (p == oldp && n < MAXGROUP)
244 							{
245 								if (!mp->current.beg[n] || mp->current.beg[n] > olds)
246 									mp->current.beg[n] = olds;
247 								if (s > mp->current.end[n])
248 									mp->current.end[n] = s;
249 							}
250 							return 1;
251 						}
252 					}
253 				} while (s < e && mbgetchar(s));
254 				mp->current.groups = oldg;
255 				return 0;
256 			}
257 			else if (pc == '*')
258 			{
259 				/*
260 				 * several stars are the same as one
261 				 */
262 
263 				while (*p == '*' && *(p + 1) != '(')
264 					p++;
265 				oldp = p;
266 				switch (pc = mbgetchar(p))
267 				{
268 				case '@':
269 				case '!':
270 				case '+':
271 					n = *p == '(';
272 					break;
273 				case '(':
274 				case '[':
275 				case '?':
276 				case '*':
277 					n = 1;
278 					break;
279 				case 0:
280 				case '|':
281 				case '&':
282 				case ')':
283 					mp->current.next_s = (flags & STR_MAXIMAL) ? e : olds;
284 					mp->next_p = oldp;
285 					mp->current.groups = g;
286 					if (!pc && (!mp->best.next_s || (flags & STR_MAXIMAL) && mp->current.next_s > mp->best.next_s || !(flags & STR_MAXIMAL) && mp->current.next_s < mp->best.next_s))
287 						mp->best = mp->current;
288 					return 1;
289 				case '\\':
290 					if (!(pc = mbgetchar(p)))
291 						return 0;
292 					if (pc >= '0' && pc <= '9')
293 					{
294 						n = pc - '0';
295 						if (n <= g && mp->current.beg[n])
296 							pc = *mp->current.beg[n];
297 					}
298 					/*FALLTHROUGH*/
299 				default:
300 					if (icase && isupper(pc))
301 						pc = tolower(pc);
302 					n = 0;
303 					break;
304 				}
305 				p = oldp;
306 				for (;;)
307 				{
308 					if ((n || pc == sc) && onematch(mp, g, olds, p, e, NiL, flags))
309 						return 1;
310 					if (!sc)
311 						return 0;
312 					olds = s;
313 					sc = getsource(s, e);
314 					if ((flags & STR_ICASE) && isupper(sc))
315 						sc = tolower(sc);
316 				}
317 			}
318 			else if (pc != '?' && pc != sc)
319 				return 0;
320 			break;
321 		case 0:
322 			if (!(flags & STR_MAXIMAL))
323 				sc = 0;
324 			/*FALLTHROUGH*/
325 		case '|':
326 		case '&':
327 		case ')':
328 			if (!sc)
329 			{
330 				mp->current.next_s = olds;
331 				mp->next_p = oldp;
332 				mp->current.groups = g;
333 			}
334 			if (!pc && (!mp->best.next_s || (flags & STR_MAXIMAL) && olds > mp->best.next_s || !(flags & STR_MAXIMAL) && olds < mp->best.next_s))
335 			{
336 				mp->best = mp->current;
337 				mp->best.next_s = olds;
338 				mp->best.groups = g;
339 			}
340 			return !sc;
341 		case '[':
342 			{
343 				/*UNDENT...*/
344 
345 	int	invert;
346 	int	x;
347 	int	ok = 0;
348 	char*	range;
349 
350 	if (!sc)
351 		return 0;
352 	range = 0;
353 	n = 0;
354 	if (invert = *p == '!' || *p == '^')
355 		p++;
356 	for (;;)
357 	{
358 		oldp = p;
359 		if (!(pc = mbgetchar(p)))
360 			return 0;
361 		else if (pc == '[' && (*p == ':' || *p == '=' || *p == '.'))
362 		{
363 			x = 0;
364 			n = mbgetchar(p);
365 			oldp = p;
366 			for (;;)
367 			{
368 				if (!(pc = mbgetchar(p)))
369 					return 0;
370 				if (pc == n && *p == ']')
371 					break;
372 				x++;
373 			}
374 			mbgetchar(p);
375 			if (ok)
376 				/*NOP*/;
377 			else if (n == ':')
378 			{
379 				switch (HASHNKEY5(x, oldp[0], oldp[1], oldp[2], oldp[3], oldp[4]))
380 				{
381 				case HASHNKEY5(5,'a','l','n','u','m'):
382 					if (isalnum(sc))
383 						ok = 1;
384 					break;
385 				case HASHNKEY5(5,'a','l','p','h','a'):
386 					if (isalpha(sc))
387 						ok = 1;
388 					break;
389 				case HASHNKEY5(5,'b','l','a','n','k'):
390 					if (isblank(sc))
391 						ok = 1;
392 					break;
393 				case HASHNKEY5(5,'c','n','t','r','l'):
394 					if (iscntrl(sc))
395 						ok = 1;
396 					break;
397 				case HASHNKEY5(5,'d','i','g','i','t'):
398 					if (isdigit(sc))
399 						ok = 1;
400 					break;
401 				case HASHNKEY5(5,'g','r','a','p','h'):
402 					if (isgraph(sc))
403 						ok = 1;
404 					break;
405 				case HASHNKEY5(5,'l','o','w','e','r'):
406 					if (islower(sc))
407 						ok = 1;
408 					break;
409 				case HASHNKEY5(5,'p','r','i','n','t'):
410 					if (isprint(sc))
411 						ok = 1;
412 					break;
413 				case HASHNKEY5(5,'p','u','n','c','t'):
414 					if (ispunct(sc))
415 						ok = 1;
416 					break;
417 				case HASHNKEY5(5,'s','p','a','c','e'):
418 					if (isspace(sc))
419 						ok = 1;
420 					break;
421 				case HASHNKEY5(5,'u','p','p','e','r'):
422 					if (icase ? islower(sc) : isupper(sc))
423 						ok = 1;
424 					break;
425 				case HASHNKEY5(6,'x','d','i','g','i'):
426 					if (oldp[5] == 't' && isxdigit(sc))
427 						ok = 1;
428 					break;
429 				}
430 			}
431 			else if (range)
432 				goto getrange;
433 			else if (*p == '-' && *(p + 1) != ']')
434 			{
435 				mbgetchar(p);
436 				range = oldp;
437 			}
438 			else if (isalpha(*oldp) && isalpha(*olds) && tolower(*oldp) == tolower(*olds) || sc == mbgetchar(oldp))
439 				ok = 1;
440 			n = 1;
441 		}
442 		else if (pc == ']' && n)
443 		{
444 			if (ok != invert)
445 				break;
446 			return 0;
447 		}
448 		else if (pc == '\\' && (oldp = p, !(pc = mbgetchar(p))))
449 			return 0;
450 		else if (ok)
451 			/*NOP*/;
452 		else if (range)
453 		{
454 		getrange:
455 			if (icase && isupper(pc))
456 				pc = tolower(pc);
457 			x = mbgetchar(range);
458 			if (icase && isupper(x))
459 				x = tolower(x);
460 			if (sc == x || sc == pc || sc > x && sc < pc)
461 				ok = 1;
462 			if (*p == '-' && *(p + 1) != ']')
463 			{
464 				mbgetchar(p);
465 				range = oldp;
466 			}
467 			else
468 				range = 0;
469 			n = 1;
470 		}
471 		else if (*p == '-' && *(p + 1) != ']')
472 		{
473 			mbgetchar(p);
474 			range = oldp;
475 			n = 1;
476 		}
477 		else
478 		{
479 			if (icase && isupper(pc))
480 				pc = tolower(pc);
481 			if (sc == pc)
482 				ok = 1;
483 			n = pc;
484 		}
485 	}
486 
487 				/*...INDENT*/
488 			}
489 			break;
490 		case '\\':
491 			if (!(pc = mbgetchar(p)))
492 				return 0;
493 			if (pc >= '0' && pc <= '9')
494 			{
495 				n = pc - '0';
496 				if (n <= g && (oldp = mp->current.beg[n]))
497 				{
498 					while (oldp < mp->current.end[n])
499 						if (!*olds || *olds++ != *oldp++)
500 							return 0;
501 					s = olds;
502 					break;
503 				}
504 			}
505 			/*FALLTHROUGH*/
506 		default:
507 			if (icase && isupper(pc))
508 				pc = tolower(pc);
509 			if (pc != sc)
510 				return 0;
511 			break;
512 		}
513 	} while (sc);
514 	return 0;
515 }
516 
517 /*
518  * match any pattern in a group
519  * | and & subgroups are parsed here
520  */
521 
522 static int
grpmatch(Match_t * mp,int g,char * s,register char * p,char * e,int flags)523 grpmatch(Match_t* mp, int g, char* s, register char* p, char* e, int flags)
524 {
525 	register char*	a;
526 
527 	do
528 	{
529 		for (a = p; onematch(mp, g, s, a, e, NiL, flags); a++)
530 			if (*(a = mp->next_p) != '&')
531 				return 1;
532 	} while (p = gobble(mp, p, '|', &g, 1));
533 	return 0;
534 }
535 
536 /*
537  * subgroup match
538  * 0 returned if no match
539  * otherwise number of subgroups matched returned
540  * match group begin offsets are even elements of sub
541  * match group end offsets are odd elements of sub
542  * the matched string is from s+sub[0] up to but not
543  * including s+sub[1]
544  */
545 
546 int
strgrpmatch(const char * b,const char * p,ssize_t * sub,int n,int flags)547 strgrpmatch(const char* b, const char* p, ssize_t* sub, int n, int flags)
548 {
549 	register int	i;
550 	register char*	s;
551 	char*		e;
552 	Match_t		match;
553 
554 	s = (char*)b;
555 	match.last_s = e = s + strlen(s);
556 	for (;;)
557 	{
558 		match.best.next_s = 0;
559 		match.current.groups = 0;
560 		if ((i = grpmatch(&match, 0, s, (char*)p, e, flags)) || match.best.next_s)
561 		{
562 			if (!i)
563 				match.current = match.best;
564 			match.current.groups++;
565 			match.current.end[0] = match.current.next_s;
566 			break;
567 		}
568 		if ((flags & STR_LEFT) || s >= e)
569 			return 0;
570 		s++;
571 	}
572 	if ((flags & STR_RIGHT) && match.current.next_s != e)
573 		return 0;
574 	if (!sub)
575 		return 1;
576 	match.current.beg[0] = s;
577 	s = (char*)b;
578 	if (n > match.current.groups)
579 		n = match.current.groups;
580 	for (i = 0; i < n; i++)
581 	{
582 		sub[i * 2] = match.current.end[i] ? match.current.beg[i] - s : 0;
583 		sub[i * 2 + 1] = match.current.end[i] ? match.current.end[i] - s : 0;
584 	}
585 	return n;
586 }
587 
588 /*
589  * compare the string s with the shell pattern p
590  * returns 1 for match 0 otherwise
591  */
592 
593 int
strmatch(const char * s,const char * p)594 strmatch(const char* s, const char* p)
595 {
596 	return strgrpmatch(s, p, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT);
597 }
598