xref: /illumos-gate/usr/src/head/regexp.h (revision 9acbbeaf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1988 AT&T	*/
22 /*	  All Rights Reserved  	*/
23 
24 
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #ifndef _REGEXP_H
31 #define	_REGEXP_H
32 
33 #pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SVr4.0 1.9	*/
34 
35 #include <string.h>
36 
37 #ifdef	__cplusplus
38 extern "C" {
39 #endif
40 
41 #define	CBRA	2
42 #define	CCHR	4
43 #define	CDOT	8
44 #define	CCL	12
45 #define	CXCL	16
46 #define	CDOL	20
47 #define	CCEOF	22
48 #define	CKET	24
49 #define	CBACK	36
50 #define	NCCL	40
51 
52 #define	STAR	01
53 #define	RNGE	03
54 
55 #define	NBRA	9
56 
57 #define	PLACE(c)	ep[c >> 3] |= bittab[c & 07]
58 #define	ISTHERE(c)	(ep[c >> 3] & bittab[c & 07])
59 #define	ecmp(s1, s2, n)	(strncmp(s1, s2, n) == 0)
60 
61 static char	*braslist[NBRA];
62 static char	*braelist[NBRA];
63 int	sed, nbra;
64 char	*loc1, *loc2, *locs;
65 static int	nodelim;
66 
67 int	circf;
68 static int	low;
69 static int	size;
70 
71 static unsigned char	bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
72 
73 #ifdef	__STDC__
74 int advance(const char *lp, const char *ep);
75 static void getrnge(const char *str);
76 #else
77 int advance();
78 static void getrnge();
79 #endif
80 
81 char *
82 #ifdef	__STDC__
83 compile(char *instring, char *ep, const char *endbuf, int seof)
84 #else
85 compile(instring, ep, endbuf, seof)
86 register char *ep;
87 char *instring, *endbuf;
88 int seof;
89 #endif
90 {
91 	INIT	/* Dependent declarations and initializations */
92 	register int c;
93 	register int eof = seof;
94 	char *lastep;
95 	int cclcnt;
96 	char bracket[NBRA], *bracketp;
97 	int closed;
98 	int neg;
99 	int lc;
100 	int i, cflg;
101 	int iflag; /* used for non-ascii characters in brackets */
102 
103 #ifdef __lint
104 	/* make lint happy */
105 	c = nodelim;
106 #endif
107 
108 	lastep = NULL;
109 	if ((c = GETC()) == eof || c == '\n') {
110 		if (c == '\n') {
111 			UNGETC(c);
112 			nodelim = 1;
113 		}
114 		if (*ep == 0 && !sed)
115 			ERROR(41);
116 		RETURN(ep);
117 	}
118 	bracketp = bracket;
119 	circf = closed = nbra = 0;
120 	if (c == '^')
121 		circf++;
122 	else
123 		UNGETC(c);
124 	for (;;) {
125 		if (ep >= endbuf)
126 			ERROR(50);
127 		c = GETC();
128 		if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
129 			lastep = ep;
130 		if (c == eof) {
131 			*ep++ = CCEOF;
132 			if (bracketp != bracket)
133 				ERROR(42);
134 			RETURN(ep);
135 		}
136 		switch (c) {
137 
138 		case '.':
139 			*ep++ = CDOT;
140 			continue;
141 
142 		case '\n':
143 			if (!sed) {
144 				UNGETC(c);
145 				*ep++ = CCEOF;
146 				nodelim = 1;
147 				if (bracketp != bracket)
148 					ERROR(42);
149 				RETURN(ep);
150 			} else ERROR(36);
151 		case '*':
152 			if (lastep == NULL || *lastep == CBRA ||
153 			    *lastep == CKET)
154 				goto defchar;
155 			*lastep |= STAR;
156 			continue;
157 
158 		case '$':
159 			if (PEEKC() != eof && PEEKC() != '\n')
160 				goto defchar;
161 			*ep++ = CDOL;
162 			continue;
163 
164 		case '[':
165 			if (&ep[17] >= endbuf)
166 				ERROR(50);
167 
168 			*ep++ = CCL;
169 			lc = 0;
170 			for (i = 0; i < 16; i++)
171 				ep[i] = 0;
172 
173 			neg = 0;
174 			if ((c = GETC()) == '^') {
175 				neg = 1;
176 				c = GETC();
177 			}
178 			iflag = 1;
179 			do {
180 				c &= 0377;
181 				if (c == '\0' || c == '\n')
182 					ERROR(49);
183 				if ((c & 0200) && iflag) {
184 					iflag = 0;
185 					if (&ep[32] >= endbuf)
186 						ERROR(50);
187 					ep[-1] = CXCL;
188 					for (i = 16; i < 32; i++)
189 						ep[i] = 0;
190 				}
191 				if (c == '-' && lc != 0) {
192 					if ((c = GETC()) == ']') {
193 						PLACE('-');
194 						break;
195 					}
196 					if ((c & 0200) && iflag) {
197 						iflag = 0;
198 						if (&ep[32] >= endbuf)
199 							ERROR(50);
200 						ep[-1] = CXCL;
201 						for (i = 16; i < 32; i++)
202 							ep[i] = 0;
203 					}
204 					while (lc < c) {
205 						PLACE(lc);
206 						lc++;
207 					}
208 				}
209 				lc = c;
210 				PLACE(c);
211 			} while ((c = GETC()) != ']');
212 
213 			if (iflag)
214 				iflag = 16;
215 			else
216 				iflag = 32;
217 
218 			if (neg) {
219 				if (iflag == 32) {
220 					for (cclcnt = 0; cclcnt < iflag;
221 					    cclcnt++)
222 						ep[cclcnt] ^= 0377;
223 					ep[0] &= 0376;
224 				} else {
225 					ep[-1] = NCCL;
226 					/* make nulls match so test fails */
227 					ep[0] |= 01;
228 				}
229 			}
230 
231 			ep += iflag;
232 
233 			continue;
234 
235 		case '\\':
236 			switch (c = GETC()) {
237 
238 			case '(':
239 				if (nbra >= NBRA)
240 					ERROR(43);
241 				*bracketp++ = (char)nbra;
242 				*ep++ = CBRA;
243 				*ep++ = (char)nbra++;
244 				continue;
245 
246 			case ')':
247 				if (bracketp <= bracket)
248 					ERROR(42);
249 				*ep++ = CKET;
250 				*ep++ = *--bracketp;
251 				closed++;
252 				continue;
253 
254 			case '{':
255 				if (lastep == NULL)
256 					goto defchar;
257 				*lastep |= RNGE;
258 				cflg = 0;
259 			nlim:
260 				c = GETC();
261 				i = 0;
262 				do {
263 					if ('0' <= c && c <= '9')
264 						i = 10 * i + c - '0';
265 					else
266 						ERROR(16);
267 				} while (((c = GETC()) != '\\') && (c != ','));
268 				if (i >= 255)
269 					ERROR(11);
270 				*ep++ = (char)i;
271 				if (c == ',') {
272 					if (cflg++)
273 						ERROR(44);
274 					if ((c = GETC()) == '\\')
275 						*ep++ = (char)255;
276 					else {
277 						UNGETC(c);
278 						goto nlim;
279 						/* get 2'nd number */
280 					}
281 				}
282 				if (GETC() != '}')
283 					ERROR(45);
284 				if (!cflg)	/* one number */
285 					*ep++ = (char)i;
286 				else if ((ep[-1] & 0377) < (ep[-2] & 0377))
287 					ERROR(46);
288 				continue;
289 
290 			case '\n':
291 				ERROR(36);
292 
293 			case 'n':
294 				c = '\n';
295 				goto defchar;
296 
297 			default:
298 				if (c >= '1' && c <= '9') {
299 					if ((c -= '1') >= closed)
300 						ERROR(25);
301 					*ep++ = CBACK;
302 					*ep++ = (char)c;
303 					continue;
304 				}
305 			}
306 	/* Drop through to default to use \ to turn off special chars */
307 
308 		defchar:
309 		default:
310 			lastep = ep;
311 			*ep++ = CCHR;
312 			*ep++ = (char)c;
313 		}
314 	}
315 	/*NOTREACHED*/
316 }
317 
318 #ifdef	__STDC__
319 int
320 step(const char *p1, const char *p2)
321 #else
322 int
323 step(p1, p2)
324 register char *p1, *p2;
325 #endif
326 {
327 	char c;
328 
329 
330 	if (circf) {
331 		loc1 = (char *)p1;
332 		return (advance(p1, p2));
333 	}
334 	/* fast check for first character */
335 	if (*p2 == CCHR) {
336 		c = p2[1];
337 		do {
338 			if (*p1 != c)
339 				continue;
340 			if (advance(p1, p2)) {
341 				loc1 = (char *)p1;
342 				return (1);
343 			}
344 		} while (*p1++);
345 		return (0);
346 	}
347 		/* regular algorithm */
348 	do {
349 		if (advance(p1, p2)) {
350 			loc1 = (char *)p1;
351 			return (1);
352 		}
353 	} while (*p1++);
354 	return (0);
355 }
356 
357 int
358 #ifdef	__STDC__
359 advance(const char *lp, const char *ep)
360 #else
361 advance(lp, ep)
362 register char *lp, *ep;
363 #endif
364 {
365 #ifdef	__STDC__
366 	const char *curlp;
367 #else
368 	register char *curlp;
369 #endif
370 	int c;
371 	char *bbeg;
372 	register char neg;
373 	size_t ct;
374 
375 	for (;;) {
376 		neg = 0;
377 		switch (*ep++) {
378 
379 		case CCHR:
380 			if (*ep++ == *lp++)
381 				continue;
382 			return (0);
383 			/*FALLTHRU*/
384 
385 		case CDOT:
386 			if (*lp++)
387 				continue;
388 			return (0);
389 			/*FALLTHRU*/
390 
391 		case CDOL:
392 			if (*lp == 0)
393 				continue;
394 			return (0);
395 			/*FALLTHRU*/
396 
397 		case CCEOF:
398 			loc2 = (char *)lp;
399 			return (1);
400 			/*FALLTHRU*/
401 
402 		case CXCL:
403 			c = (unsigned char)*lp++;
404 			if (ISTHERE(c)) {
405 				ep += 32;
406 				continue;
407 			}
408 			return (0);
409 			/*FALLTHRU*/
410 
411 		case NCCL:
412 			neg = 1;
413 			/*FALLTHRU*/
414 
415 		case CCL:
416 			c = *lp++;
417 			if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
418 				ep += 16;
419 				continue;
420 			}
421 			return (0);
422 			/*FALLTHRU*/
423 
424 		case CBRA:
425 			braslist[*ep++] = (char *)lp;
426 			continue;
427 			/*FALLTHRU*/
428 
429 		case CKET:
430 			braelist[*ep++] = (char *)lp;
431 			continue;
432 			/*FALLTHRU*/
433 
434 		case CCHR | RNGE:
435 			c = *ep++;
436 			getrnge(ep);
437 			while (low--)
438 				if (*lp++ != c)
439 					return (0);
440 			curlp = lp;
441 			while (size--)
442 				if (*lp++ != c)
443 					break;
444 			if (size < 0)
445 				lp++;
446 			ep += 2;
447 			goto star;
448 			/*FALLTHRU*/
449 
450 		case CDOT | RNGE:
451 			getrnge(ep);
452 			while (low--)
453 				if (*lp++ == '\0')
454 					return (0);
455 			curlp = lp;
456 			while (size--)
457 				if (*lp++ == '\0')
458 					break;
459 			if (size < 0)
460 				lp++;
461 			ep += 2;
462 			goto star;
463 			/*FALLTHRU*/
464 
465 		case CXCL | RNGE:
466 			getrnge(ep + 32);
467 			while (low--) {
468 				c = (unsigned char)*lp++;
469 				if (!ISTHERE(c))
470 					return (0);
471 			}
472 			curlp = lp;
473 			while (size--) {
474 				c = (unsigned char)*lp++;
475 				if (!ISTHERE(c))
476 					break;
477 			}
478 			if (size < 0)
479 				lp++;
480 			ep += 34;		/* 32 + 2 */
481 			goto star;
482 			/*FALLTHRU*/
483 
484 		case NCCL | RNGE:
485 			neg = 1;
486 			/*FALLTHRU*/
487 
488 		case CCL | RNGE:
489 			getrnge(ep + 16);
490 			while (low--) {
491 				c = *lp++;
492 				if (((c & 0200) || !ISTHERE(c)) ^ neg)
493 					return (0);
494 			}
495 			curlp = lp;
496 			while (size--) {
497 				c = *lp++;
498 				if (((c & 0200) || !ISTHERE(c)) ^ neg)
499 					break;
500 			}
501 			if (size < 0)
502 				lp++;
503 			ep += 18; 		/* 16 + 2 */
504 			goto star;
505 			/*FALLTHRU*/
506 
507 		case CBACK:
508 			bbeg = braslist[*ep];
509 			ct = braelist[*ep++] - bbeg;
510 
511 			if (ecmp(bbeg, lp, ct)) {
512 				lp += ct;
513 				continue;
514 			}
515 			return (0);
516 			/*FALLTHRU*/
517 
518 		case CBACK | STAR:
519 			bbeg = braslist[*ep];
520 			ct = braelist[*ep++] - bbeg;
521 			curlp = lp;
522 			while (ecmp(bbeg, lp, ct))
523 				lp += ct;
524 
525 			while (lp >= curlp) {
526 				if (advance(lp, ep))
527 					return (1);
528 				lp -= ct;
529 			}
530 			return (0);
531 			/*FALLTHRU*/
532 
533 		case CDOT | STAR:
534 			curlp = lp;
535 			while (*lp++);
536 			goto star;
537 			/*FALLTHRU*/
538 
539 		case CCHR | STAR:
540 			curlp = lp;
541 			while (*lp++ == *ep);
542 			ep++;
543 			goto star;
544 			/*FALLTHRU*/
545 
546 		case CXCL | STAR:
547 			curlp = lp;
548 			do {
549 				c = (unsigned char)*lp++;
550 			} while (ISTHERE(c));
551 			ep += 32;
552 			goto star;
553 			/*FALLTHRU*/
554 
555 		case NCCL | STAR:
556 			neg = 1;
557 			/*FALLTHRU*/
558 
559 		case CCL | STAR:
560 			curlp = lp;
561 			do {
562 				c = *lp++;
563 			} while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
564 			ep += 16;
565 			goto star;
566 			/*FALLTHRU*/
567 
568 		star:
569 			do {
570 				if (--lp == locs)
571 					break;
572 				if (advance(lp, ep))
573 					return (1);
574 			} while (lp > curlp);
575 			return (0);
576 
577 		}
578 	}
579 	/*NOTREACHED*/
580 }
581 
582 static void
583 #ifdef	__STDC__
584 getrnge(const char *str)
585 #else
586 getrnge(str)
587 register char *str;
588 #endif
589 {
590 	low = *str++ & 0377;
591 	size = ((*str & 0377) == 255)? 20000: (*str &0377) - low;
592 }
593 
594 #ifdef	__cplusplus
595 }
596 #endif
597 
598 #endif	/* _REGEXP_H */
599