1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*	Copyright (c) 1988 AT&T	*/
22/*	  All Rights Reserved	*/
23
24/*
25 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
26 *
27 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
28 * Use is subject to license terms.
29 */
30
31#ifndef _REGEXP_H
32#define	_REGEXP_H
33
34#include <string.h>
35
36#ifdef	__cplusplus
37extern "C" {
38#endif
39
40#define	CBRA	2
41#define	CCHR	4
42#define	CDOT	8
43#define	CCL	12
44#define	CXCL	16
45#define	CDOL	20
46#define	CCEOF	22
47#define	CKET	24
48#define	CBACK	36
49#define	NCCL	40
50
51#define	STAR	01
52#define	RNGE	03
53
54#define	NBRA	9
55
56#define	PLACE(c)	ep[c >> 3] |= bittab[c & 07]
57#define	ISTHERE(c)	(ep[c >> 3] & bittab[c & 07])
58#define	ecmp(s1, s2, n)	(strncmp(s1, s2, n) == 0)
59
60static char	*braslist[NBRA];
61static char	*braelist[NBRA];
62int	sed, nbra;
63char	*loc1, *loc2, *locs;
64static int	nodelim;
65
66int	circf;
67static int	low;
68static int	size;
69
70static unsigned char	bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
71
72int advance(const char *lp, const char *ep);
73static void getrnge(const char *str);
74
75char *
76compile(char *instring, char *ep, const char *endbuf, int seof)
77{
78	INIT	/* Dependent declarations and initializations */
79	register int c;
80	register int eof = seof;
81	char *lastep;
82	int cclcnt;
83	char bracket[NBRA], *bracketp;
84	int closed;
85	int neg;
86	int lc;
87	int i, cflg;
88	int iflag; /* used for non-ascii characters in brackets */
89
90#ifdef __lint
91	/* make lint happy */
92	c = nodelim;
93#endif
94
95	lastep = NULL;
96	if ((c = GETC()) == eof || c == '\n') {
97		if (c == '\n') {
98			UNGETC(c);
99			nodelim = 1;
100		}
101		if (*ep == 0 && !sed)
102			ERROR(41);
103		RETURN(ep);
104	}
105	bracketp = bracket;
106	circf = closed = nbra = 0;
107	if (c == '^')
108		circf++;
109	else
110		UNGETC(c);
111	for (;;) {
112		if (ep >= endbuf)
113			ERROR(50);
114		c = GETC();
115		if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
116			lastep = ep;
117		if (c == eof) {
118			*ep++ = CCEOF;
119			if (bracketp != bracket)
120				ERROR(42);
121			RETURN(ep);
122		}
123		switch (c) {
124
125		case '.':
126			*ep++ = CDOT;
127			continue;
128
129		case '\n':
130			if (!sed) {
131				UNGETC(c);
132				*ep++ = CCEOF;
133				nodelim = 1;
134				if (bracketp != bracket)
135					ERROR(42);
136				RETURN(ep);
137			} else ERROR(36);
138		case '*':
139			if (lastep == NULL || *lastep == CBRA ||
140			    *lastep == CKET)
141				goto defchar;
142			*lastep |= STAR;
143			continue;
144
145		case '$':
146			if (PEEKC() != eof && PEEKC() != '\n')
147				goto defchar;
148			*ep++ = CDOL;
149			continue;
150
151		case '[':
152			if (&ep[17] >= endbuf)
153				ERROR(50);
154
155			*ep++ = CCL;
156			lc = 0;
157			for (i = 0; i < 16; i++)
158				ep[i] = 0;
159
160			neg = 0;
161			if ((c = GETC()) == '^') {
162				neg = 1;
163				c = GETC();
164			}
165			iflag = 1;
166			do {
167				c &= 0377;
168				if (c == '\0' || c == '\n')
169					ERROR(49);
170				if ((c & 0200) && iflag) {
171					iflag = 0;
172					if (&ep[32] >= endbuf)
173						ERROR(50);
174					ep[-1] = CXCL;
175					for (i = 16; i < 32; i++)
176						ep[i] = 0;
177				}
178				if (c == '-' && lc != 0) {
179					if ((c = GETC()) == ']') {
180						PLACE('-');
181						break;
182					}
183					if ((c & 0200) && iflag) {
184						iflag = 0;
185						if (&ep[32] >= endbuf)
186							ERROR(50);
187						ep[-1] = CXCL;
188						for (i = 16; i < 32; i++)
189							ep[i] = 0;
190					}
191					while (lc < c) {
192						PLACE(lc);
193						lc++;
194					}
195				}
196				lc = c;
197				PLACE(c);
198			} while ((c = GETC()) != ']');
199
200			if (iflag)
201				iflag = 16;
202			else
203				iflag = 32;
204
205			if (neg) {
206				if (iflag == 32) {
207					for (cclcnt = 0; cclcnt < iflag;
208					    cclcnt++)
209						ep[cclcnt] ^= 0377;
210					ep[0] &= 0376;
211				} else {
212					ep[-1] = NCCL;
213					/* make nulls match so test fails */
214					ep[0] |= 01;
215				}
216			}
217
218			ep += iflag;
219
220			continue;
221
222		case '\\':
223			switch (c = GETC()) {
224
225			case '(':
226				if (nbra >= NBRA)
227					ERROR(43);
228				*bracketp++ = (char)nbra;
229				*ep++ = CBRA;
230				*ep++ = (char)nbra++;
231				continue;
232
233			case ')':
234				if (bracketp <= bracket)
235					ERROR(42);
236				*ep++ = CKET;
237				*ep++ = *--bracketp;
238				closed++;
239				continue;
240
241			case '{':
242				if (lastep == NULL)
243					goto defchar;
244				*lastep |= RNGE;
245				cflg = 0;
246			nlim:
247				c = GETC();
248				i = 0;
249				do {
250					if ('0' <= c && c <= '9')
251						i = 10 * i + c - '0';
252					else
253						ERROR(16);
254				} while (((c = GETC()) != '\\') && (c != ','));
255				if (i >= 255)
256					ERROR(11);
257				*ep++ = (char)i;
258				if (c == ',') {
259					if (cflg++)
260						ERROR(44);
261					if ((c = GETC()) == '\\')
262						*ep++ = (char)255;
263					else {
264						UNGETC(c);
265						goto nlim;
266						/* get 2'nd number */
267					}
268				}
269				if (GETC() != '}')
270					ERROR(45);
271				if (!cflg)	/* one number */
272					*ep++ = (char)i;
273				else if ((ep[-1] & 0377) < (ep[-2] & 0377))
274					ERROR(46);
275				continue;
276
277			case '\n':
278				ERROR(36);
279
280			case 'n':
281				c = '\n';
282				goto defchar;
283
284			default:
285				if (c >= '1' && c <= '9') {
286					if ((c -= '1') >= closed)
287						ERROR(25);
288					*ep++ = CBACK;
289					*ep++ = (char)c;
290					continue;
291				}
292				/* FALLTHROUGH */
293			}
294			/* FALLTHROUGH */
295	/* Drop through to default to use \ to turn off special chars */
296
297		defchar:
298		default:
299			lastep = ep;
300			*ep++ = CCHR;
301			*ep++ = (char)c;
302		}
303	}
304	/*NOTREACHED*/
305}
306
307int
308step(const char *p1, const char *p2)
309{
310	char c;
311
312
313	if (circf) {
314		loc1 = (char *)p1;
315		return (advance(p1, p2));
316	}
317	/* fast check for first character */
318	if (*p2 == CCHR) {
319		c = p2[1];
320		do {
321			if (*p1 != c)
322				continue;
323			if (advance(p1, p2)) {
324				loc1 = (char *)p1;
325				return (1);
326			}
327		} while (*p1++);
328		return (0);
329	}
330		/* regular algorithm */
331	do {
332		if (advance(p1, p2)) {
333			loc1 = (char *)p1;
334			return (1);
335		}
336	} while (*p1++);
337	return (0);
338}
339
340int
341advance(const char *lp, const char *ep)
342{
343	const char *curlp;
344	int c;
345	char *bbeg;
346	register char neg;
347	size_t ct;
348
349	for (;;) {
350		neg = 0;
351		switch (*ep++) {
352
353		case CCHR:
354			if (*ep++ == *lp++)
355				continue;
356			return (0);
357			/*FALLTHRU*/
358
359		case CDOT:
360			if (*lp++)
361				continue;
362			return (0);
363			/*FALLTHRU*/
364
365		case CDOL:
366			if (*lp == 0)
367				continue;
368			return (0);
369			/*FALLTHRU*/
370
371		case CCEOF:
372			loc2 = (char *)lp;
373			return (1);
374			/*FALLTHRU*/
375
376		case CXCL:
377			c = (unsigned char)*lp++;
378			if (ISTHERE(c)) {
379				ep += 32;
380				continue;
381			}
382			return (0);
383			/*FALLTHRU*/
384
385		case NCCL:
386			neg = 1;
387			/*FALLTHRU*/
388
389		case CCL:
390			c = *lp++;
391			if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
392				ep += 16;
393				continue;
394			}
395			return (0);
396			/*FALLTHRU*/
397
398		case CBRA:
399			braslist[*ep++] = (char *)lp;
400			continue;
401			/*FALLTHRU*/
402
403		case CKET:
404			braelist[*ep++] = (char *)lp;
405			continue;
406			/*FALLTHRU*/
407
408		case CCHR | RNGE:
409			c = *ep++;
410			getrnge(ep);
411			while (low--)
412				if (*lp++ != c)
413					return (0);
414			curlp = lp;
415			while (size--)
416				if (*lp++ != c)
417					break;
418			if (size < 0)
419				lp++;
420			ep += 2;
421			goto star;
422			/*FALLTHRU*/
423
424		case CDOT | RNGE:
425			getrnge(ep);
426			while (low--)
427				if (*lp++ == '\0')
428					return (0);
429			curlp = lp;
430			while (size--)
431				if (*lp++ == '\0')
432					break;
433			if (size < 0)
434				lp++;
435			ep += 2;
436			goto star;
437			/*FALLTHRU*/
438
439		case CXCL | RNGE:
440			getrnge(ep + 32);
441			while (low--) {
442				c = (unsigned char)*lp++;
443				if (!ISTHERE(c))
444					return (0);
445			}
446			curlp = lp;
447			while (size--) {
448				c = (unsigned char)*lp++;
449				if (!ISTHERE(c))
450					break;
451			}
452			if (size < 0)
453				lp++;
454			ep += 34;		/* 32 + 2 */
455			goto star;
456			/*FALLTHRU*/
457
458		case NCCL | RNGE:
459			neg = 1;
460			/*FALLTHRU*/
461
462		case CCL | RNGE:
463			getrnge(ep + 16);
464			while (low--) {
465				c = *lp++;
466				if (((c & 0200) || !ISTHERE(c)) ^ neg)
467					return (0);
468			}
469			curlp = lp;
470			while (size--) {
471				c = *lp++;
472				if (((c & 0200) || !ISTHERE(c)) ^ neg)
473					break;
474			}
475			if (size < 0)
476				lp++;
477			ep += 18;		/* 16 + 2 */
478			goto star;
479			/*FALLTHRU*/
480
481		case CBACK:
482			bbeg = braslist[*ep];
483			ct = braelist[*ep++] - bbeg;
484
485			if (ecmp(bbeg, lp, ct)) {
486				lp += ct;
487				continue;
488			}
489			return (0);
490			/*FALLTHRU*/
491
492		case CBACK | STAR:
493			bbeg = braslist[*ep];
494			ct = braelist[*ep++] - bbeg;
495			curlp = lp;
496			while (ecmp(bbeg, lp, ct))
497				lp += ct;
498
499			while (lp >= curlp) {
500				if (advance(lp, ep))
501					return (1);
502				lp -= ct;
503			}
504			return (0);
505			/*FALLTHRU*/
506
507		case CDOT | STAR:
508			curlp = lp;
509			while (*lp++)
510				;
511			goto star;
512			/*FALLTHRU*/
513
514		case CCHR | STAR:
515			curlp = lp;
516			while (*lp++ == *ep)
517				;
518			ep++;
519			goto star;
520			/*FALLTHRU*/
521
522		case CXCL | STAR:
523			curlp = lp;
524			do {
525				c = (unsigned char)*lp++;
526			} while (ISTHERE(c));
527			ep += 32;
528			goto star;
529			/*FALLTHRU*/
530
531		case NCCL | STAR:
532			neg = 1;
533			/*FALLTHRU*/
534
535		case CCL | STAR:
536			curlp = lp;
537			do {
538				c = *lp++;
539			} while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
540			ep += 16;
541			goto star;
542			/*FALLTHRU*/
543
544		star:
545			do {
546				if (--lp == locs)
547					break;
548				if (advance(lp, ep))
549					return (1);
550			} while (lp > curlp);
551			return (0);
552
553		}
554	}
555	/*NOTREACHED*/
556}
557
558static void
559getrnge(const char *str)
560{
561	low = *str++ & 0377;
562	size = ((*str & 0377) == 255)? 20000: (*str &0377) - low;
563}
564
565#ifdef	__cplusplus
566}
567#endif
568
569#endif	/* _REGEXP_H */
570