xref: /illumos-gate/usr/src/head/regexp.h (revision ba3594ba)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1988 AT&T	*/
22 /*	  All Rights Reserved  	*/
23 
24 /*
25  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
26  *
27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #ifndef _REGEXP_H
32 #define	_REGEXP_H
33 
34 #include <string.h>
35 
36 #ifdef	__cplusplus
37 extern "C" {
38 #endif
39 
40 #define	CBRA	2
41 #define	CCHR	4
42 #define	CDOT	8
43 #define	CCL	12
44 #define	CXCL	16
45 #define	CDOL	20
46 #define	CCEOF	22
47 #define	CKET	24
48 #define	CBACK	36
49 #define	NCCL	40
50 
51 #define	STAR	01
52 #define	RNGE	03
53 
54 #define	NBRA	9
55 
56 #define	PLACE(c)	ep[c >> 3] |= bittab[c & 07]
57 #define	ISTHERE(c)	(ep[c >> 3] & bittab[c & 07])
58 #define	ecmp(s1, s2, n)	(strncmp(s1, s2, n) == 0)
59 
60 static char	*braslist[NBRA];
61 static char	*braelist[NBRA];
62 int	sed, nbra;
63 char	*loc1, *loc2, *locs;
64 static int	nodelim;
65 
66 int	circf;
67 static int	low;
68 static int	size;
69 
70 static unsigned char	bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
71 
72 int advance(const char *lp, const char *ep);
73 static void getrnge(const char *str);
74 
75 char *
76 compile(char *instring, char *ep, const char *endbuf, int seof)
77 {
78 	INIT	/* Dependent declarations and initializations */
79 	register int c;
80 	register int eof = seof;
81 	char *lastep;
82 	int cclcnt;
83 	char bracket[NBRA], *bracketp;
84 	int closed;
85 	int neg;
86 	int lc;
87 	int i, cflg;
88 	int iflag; /* used for non-ascii characters in brackets */
89 
90 #ifdef __lint
91 	/* make lint happy */
92 	c = nodelim;
93 #endif
94 
95 	lastep = NULL;
96 	if ((c = GETC()) == eof || c == '\n') {
97 		if (c == '\n') {
98 			UNGETC(c);
99 			nodelim = 1;
100 		}
101 		if (*ep == 0 && !sed)
102 			ERROR(41);
103 		RETURN(ep);
104 	}
105 	bracketp = bracket;
106 	circf = closed = nbra = 0;
107 	if (c == '^')
108 		circf++;
109 	else
110 		UNGETC(c);
111 	for (;;) {
112 		if (ep >= endbuf)
113 			ERROR(50);
114 		c = GETC();
115 		if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
116 			lastep = ep;
117 		if (c == eof) {
118 			*ep++ = CCEOF;
119 			if (bracketp != bracket)
120 				ERROR(42);
121 			RETURN(ep);
122 		}
123 		switch (c) {
124 
125 		case '.':
126 			*ep++ = CDOT;
127 			continue;
128 
129 		case '\n':
130 			if (!sed) {
131 				UNGETC(c);
132 				*ep++ = CCEOF;
133 				nodelim = 1;
134 				if (bracketp != bracket)
135 					ERROR(42);
136 				RETURN(ep);
137 			} else ERROR(36);
138 		case '*':
139 			if (lastep == NULL || *lastep == CBRA ||
140 			    *lastep == CKET)
141 				goto defchar;
142 			*lastep |= STAR;
143 			continue;
144 
145 		case '$':
146 			if (PEEKC() != eof && PEEKC() != '\n')
147 				goto defchar;
148 			*ep++ = CDOL;
149 			continue;
150 
151 		case '[':
152 			if (&ep[17] >= endbuf)
153 				ERROR(50);
154 
155 			*ep++ = CCL;
156 			lc = 0;
157 			for (i = 0; i < 16; i++)
158 				ep[i] = 0;
159 
160 			neg = 0;
161 			if ((c = GETC()) == '^') {
162 				neg = 1;
163 				c = GETC();
164 			}
165 			iflag = 1;
166 			do {
167 				c &= 0377;
168 				if (c == '\0' || c == '\n')
169 					ERROR(49);
170 				if ((c & 0200) && iflag) {
171 					iflag = 0;
172 					if (&ep[32] >= endbuf)
173 						ERROR(50);
174 					ep[-1] = CXCL;
175 					for (i = 16; i < 32; i++)
176 						ep[i] = 0;
177 				}
178 				if (c == '-' && lc != 0) {
179 					if ((c = GETC()) == ']') {
180 						PLACE('-');
181 						break;
182 					}
183 					if ((c & 0200) && iflag) {
184 						iflag = 0;
185 						if (&ep[32] >= endbuf)
186 							ERROR(50);
187 						ep[-1] = CXCL;
188 						for (i = 16; i < 32; i++)
189 							ep[i] = 0;
190 					}
191 					while (lc < c) {
192 						PLACE(lc);
193 						lc++;
194 					}
195 				}
196 				lc = c;
197 				PLACE(c);
198 			} while ((c = GETC()) != ']');
199 
200 			if (iflag)
201 				iflag = 16;
202 			else
203 				iflag = 32;
204 
205 			if (neg) {
206 				if (iflag == 32) {
207 					for (cclcnt = 0; cclcnt < iflag;
208 					    cclcnt++)
209 						ep[cclcnt] ^= 0377;
210 					ep[0] &= 0376;
211 				} else {
212 					ep[-1] = NCCL;
213 					/* make nulls match so test fails */
214 					ep[0] |= 01;
215 				}
216 			}
217 
218 			ep += iflag;
219 
220 			continue;
221 
222 		case '\\':
223 			switch (c = GETC()) {
224 
225 			case '(':
226 				if (nbra >= NBRA)
227 					ERROR(43);
228 				*bracketp++ = (char)nbra;
229 				*ep++ = CBRA;
230 				*ep++ = (char)nbra++;
231 				continue;
232 
233 			case ')':
234 				if (bracketp <= bracket)
235 					ERROR(42);
236 				*ep++ = CKET;
237 				*ep++ = *--bracketp;
238 				closed++;
239 				continue;
240 
241 			case '{':
242 				if (lastep == NULL)
243 					goto defchar;
244 				*lastep |= RNGE;
245 				cflg = 0;
246 			nlim:
247 				c = GETC();
248 				i = 0;
249 				do {
250 					if ('0' <= c && c <= '9')
251 						i = 10 * i + c - '0';
252 					else
253 						ERROR(16);
254 				} while (((c = GETC()) != '\\') && (c != ','));
255 				if (i >= 255)
256 					ERROR(11);
257 				*ep++ = (char)i;
258 				if (c == ',') {
259 					if (cflg++)
260 						ERROR(44);
261 					if ((c = GETC()) == '\\')
262 						*ep++ = (char)255;
263 					else {
264 						UNGETC(c);
265 						goto nlim;
266 						/* get 2'nd number */
267 					}
268 				}
269 				if (GETC() != '}')
270 					ERROR(45);
271 				if (!cflg)	/* one number */
272 					*ep++ = (char)i;
273 				else if ((ep[-1] & 0377) < (ep[-2] & 0377))
274 					ERROR(46);
275 				continue;
276 
277 			case '\n':
278 				ERROR(36);
279 
280 			case 'n':
281 				c = '\n';
282 				goto defchar;
283 
284 			default:
285 				if (c >= '1' && c <= '9') {
286 					if ((c -= '1') >= closed)
287 						ERROR(25);
288 					*ep++ = CBACK;
289 					*ep++ = (char)c;
290 					continue;
291 				}
292 			}
293 	/* Drop through to default to use \ to turn off special chars */
294 
295 		defchar:
296 		default:
297 			lastep = ep;
298 			*ep++ = CCHR;
299 			*ep++ = (char)c;
300 		}
301 	}
302 	/*NOTREACHED*/
303 }
304 
305 int
306 step(const char *p1, const char *p2)
307 {
308 	char c;
309 
310 
311 	if (circf) {
312 		loc1 = (char *)p1;
313 		return (advance(p1, p2));
314 	}
315 	/* fast check for first character */
316 	if (*p2 == CCHR) {
317 		c = p2[1];
318 		do {
319 			if (*p1 != c)
320 				continue;
321 			if (advance(p1, p2)) {
322 				loc1 = (char *)p1;
323 				return (1);
324 			}
325 		} while (*p1++);
326 		return (0);
327 	}
328 		/* regular algorithm */
329 	do {
330 		if (advance(p1, p2)) {
331 			loc1 = (char *)p1;
332 			return (1);
333 		}
334 	} while (*p1++);
335 	return (0);
336 }
337 
338 int
339 advance(const char *lp, const char *ep)
340 {
341 	const char *curlp;
342 	int c;
343 	char *bbeg;
344 	register char neg;
345 	size_t ct;
346 
347 	for (;;) {
348 		neg = 0;
349 		switch (*ep++) {
350 
351 		case CCHR:
352 			if (*ep++ == *lp++)
353 				continue;
354 			return (0);
355 			/*FALLTHRU*/
356 
357 		case CDOT:
358 			if (*lp++)
359 				continue;
360 			return (0);
361 			/*FALLTHRU*/
362 
363 		case CDOL:
364 			if (*lp == 0)
365 				continue;
366 			return (0);
367 			/*FALLTHRU*/
368 
369 		case CCEOF:
370 			loc2 = (char *)lp;
371 			return (1);
372 			/*FALLTHRU*/
373 
374 		case CXCL:
375 			c = (unsigned char)*lp++;
376 			if (ISTHERE(c)) {
377 				ep += 32;
378 				continue;
379 			}
380 			return (0);
381 			/*FALLTHRU*/
382 
383 		case NCCL:
384 			neg = 1;
385 			/*FALLTHRU*/
386 
387 		case CCL:
388 			c = *lp++;
389 			if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
390 				ep += 16;
391 				continue;
392 			}
393 			return (0);
394 			/*FALLTHRU*/
395 
396 		case CBRA:
397 			braslist[*ep++] = (char *)lp;
398 			continue;
399 			/*FALLTHRU*/
400 
401 		case CKET:
402 			braelist[*ep++] = (char *)lp;
403 			continue;
404 			/*FALLTHRU*/
405 
406 		case CCHR | RNGE:
407 			c = *ep++;
408 			getrnge(ep);
409 			while (low--)
410 				if (*lp++ != c)
411 					return (0);
412 			curlp = lp;
413 			while (size--)
414 				if (*lp++ != c)
415 					break;
416 			if (size < 0)
417 				lp++;
418 			ep += 2;
419 			goto star;
420 			/*FALLTHRU*/
421 
422 		case CDOT | RNGE:
423 			getrnge(ep);
424 			while (low--)
425 				if (*lp++ == '\0')
426 					return (0);
427 			curlp = lp;
428 			while (size--)
429 				if (*lp++ == '\0')
430 					break;
431 			if (size < 0)
432 				lp++;
433 			ep += 2;
434 			goto star;
435 			/*FALLTHRU*/
436 
437 		case CXCL | RNGE:
438 			getrnge(ep + 32);
439 			while (low--) {
440 				c = (unsigned char)*lp++;
441 				if (!ISTHERE(c))
442 					return (0);
443 			}
444 			curlp = lp;
445 			while (size--) {
446 				c = (unsigned char)*lp++;
447 				if (!ISTHERE(c))
448 					break;
449 			}
450 			if (size < 0)
451 				lp++;
452 			ep += 34;		/* 32 + 2 */
453 			goto star;
454 			/*FALLTHRU*/
455 
456 		case NCCL | RNGE:
457 			neg = 1;
458 			/*FALLTHRU*/
459 
460 		case CCL | RNGE:
461 			getrnge(ep + 16);
462 			while (low--) {
463 				c = *lp++;
464 				if (((c & 0200) || !ISTHERE(c)) ^ neg)
465 					return (0);
466 			}
467 			curlp = lp;
468 			while (size--) {
469 				c = *lp++;
470 				if (((c & 0200) || !ISTHERE(c)) ^ neg)
471 					break;
472 			}
473 			if (size < 0)
474 				lp++;
475 			ep += 18; 		/* 16 + 2 */
476 			goto star;
477 			/*FALLTHRU*/
478 
479 		case CBACK:
480 			bbeg = braslist[*ep];
481 			ct = braelist[*ep++] - bbeg;
482 
483 			if (ecmp(bbeg, lp, ct)) {
484 				lp += ct;
485 				continue;
486 			}
487 			return (0);
488 			/*FALLTHRU*/
489 
490 		case CBACK | STAR:
491 			bbeg = braslist[*ep];
492 			ct = braelist[*ep++] - bbeg;
493 			curlp = lp;
494 			while (ecmp(bbeg, lp, ct))
495 				lp += ct;
496 
497 			while (lp >= curlp) {
498 				if (advance(lp, ep))
499 					return (1);
500 				lp -= ct;
501 			}
502 			return (0);
503 			/*FALLTHRU*/
504 
505 		case CDOT | STAR:
506 			curlp = lp;
507 			while (*lp++);
508 			goto star;
509 			/*FALLTHRU*/
510 
511 		case CCHR | STAR:
512 			curlp = lp;
513 			while (*lp++ == *ep);
514 			ep++;
515 			goto star;
516 			/*FALLTHRU*/
517 
518 		case CXCL | STAR:
519 			curlp = lp;
520 			do {
521 				c = (unsigned char)*lp++;
522 			} while (ISTHERE(c));
523 			ep += 32;
524 			goto star;
525 			/*FALLTHRU*/
526 
527 		case NCCL | STAR:
528 			neg = 1;
529 			/*FALLTHRU*/
530 
531 		case CCL | STAR:
532 			curlp = lp;
533 			do {
534 				c = *lp++;
535 			} while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
536 			ep += 16;
537 			goto star;
538 			/*FALLTHRU*/
539 
540 		star:
541 			do {
542 				if (--lp == locs)
543 					break;
544 				if (advance(lp, ep))
545 					return (1);
546 			} while (lp > curlp);
547 			return (0);
548 
549 		}
550 	}
551 	/*NOTREACHED*/
552 }
553 
554 static void
555 getrnge(const char *str)
556 {
557 	low = *str++ & 0377;
558 	size = ((*str & 0377) == 255)? 20000: (*str &0377) - low;
559 }
560 
561 #ifdef	__cplusplus
562 }
563 #endif
564 
565 #endif	/* _REGEXP_H */
566