xref: /illumos-gate/usr/src/cmd/awk/b.c (revision 3ee4fc2a)
1*3ee4fc2aSCody Peter Mello /*
2*3ee4fc2aSCody Peter Mello  * Copyright (C) Lucent Technologies 1997
3*3ee4fc2aSCody Peter Mello  * All Rights Reserved
4*3ee4fc2aSCody Peter Mello  *
5*3ee4fc2aSCody Peter Mello  * Permission to use, copy, modify, and distribute this software and
6*3ee4fc2aSCody Peter Mello  * its documentation for any purpose and without fee is hereby
7*3ee4fc2aSCody Peter Mello  * granted, provided that the above copyright notice appear in all
8*3ee4fc2aSCody Peter Mello  * copies and that both that the copyright notice and this
9*3ee4fc2aSCody Peter Mello  * permission notice and warranty disclaimer appear in supporting
10*3ee4fc2aSCody Peter Mello  * documentation, and that the name Lucent Technologies or any of
11*3ee4fc2aSCody Peter Mello  * its entities not be used in advertising or publicity pertaining
12*3ee4fc2aSCody Peter Mello  * to distribution of the software without specific, written prior
13*3ee4fc2aSCody Peter Mello  * permission.
14*3ee4fc2aSCody Peter Mello  *
15*3ee4fc2aSCody Peter Mello  * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16*3ee4fc2aSCody Peter Mello  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17*3ee4fc2aSCody Peter Mello  * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18*3ee4fc2aSCody Peter Mello  * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19*3ee4fc2aSCody Peter Mello  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20*3ee4fc2aSCody Peter Mello  * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21*3ee4fc2aSCody Peter Mello  * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22*3ee4fc2aSCody Peter Mello  * THIS SOFTWARE.
23*3ee4fc2aSCody Peter Mello  */
24*3ee4fc2aSCody Peter Mello 
257c478bd9Sstevel@tonic-gate /*
267c478bd9Sstevel@tonic-gate  * CDDL HEADER START
277c478bd9Sstevel@tonic-gate  *
287c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
297c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
307c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
317c478bd9Sstevel@tonic-gate  * with the License.
327c478bd9Sstevel@tonic-gate  *
337c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
347c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
357c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
367c478bd9Sstevel@tonic-gate  * and limitations under the License.
377c478bd9Sstevel@tonic-gate  *
387c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
397c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
407c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
417c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
427c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
437c478bd9Sstevel@tonic-gate  *
447c478bd9Sstevel@tonic-gate  * CDDL HEADER END
457c478bd9Sstevel@tonic-gate  */
467c478bd9Sstevel@tonic-gate 
477c478bd9Sstevel@tonic-gate /*
481ee2e5faSnakanon  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
497c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
507c478bd9Sstevel@tonic-gate  */
517c478bd9Sstevel@tonic-gate 
521ee2e5faSnakanon /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
531ee2e5faSnakanon /*	  All Rights Reserved  	*/
541ee2e5faSnakanon 
55*3ee4fc2aSCody Peter Mello /* lasciate ogne speranza, voi ch'intrate. */
56*3ee4fc2aSCody Peter Mello 
577c478bd9Sstevel@tonic-gate #define	DEBUG
587c478bd9Sstevel@tonic-gate 
597c478bd9Sstevel@tonic-gate #include "awk.h"
607c478bd9Sstevel@tonic-gate #include "y.tab.h"
617c478bd9Sstevel@tonic-gate 
627c478bd9Sstevel@tonic-gate #define	HAT	(NCHARS-1)	/* matches ^ in regular expr */
637c478bd9Sstevel@tonic-gate 				/* NCHARS is 2**n */
641ee2e5faSnakanon #define	MAXLIN (3 * LINE_MAX)
657c478bd9Sstevel@tonic-gate 
66*3ee4fc2aSCody Peter Mello #define	type(v)		(v)->nobj	/* badly overloaded here */
67*3ee4fc2aSCody Peter Mello #define	info(v)		(v)->ntype	/* badly overloaded here */
681ee2e5faSnakanon #define	left(v)		(v)->narg[0]
691ee2e5faSnakanon #define	right(v)	(v)->narg[1]
701ee2e5faSnakanon #define	parent(v)	(v)->nnext
717c478bd9Sstevel@tonic-gate 
721ee2e5faSnakanon #define	LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
73*3ee4fc2aSCody Peter Mello #define	ELEAF	case EMPTYRE:		/* empty string in regexp */
741ee2e5faSnakanon #define	UNARY	case STAR: case PLUS: case QUEST:
757c478bd9Sstevel@tonic-gate 
761ee2e5faSnakanon /*
771ee2e5faSnakanon  * encoding in tree Nodes:
78*3ee4fc2aSCody Peter Mello  *	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
791ee2e5faSnakanon  *		left is index, right contains value or pointer to value
801ee2e5faSnakanon  *	unary (STAR, PLUS, QUEST): left is child, right is null
811ee2e5faSnakanon  *	binary (CAT, OR): left and right are children
821ee2e5faSnakanon  *	parent contains pointer to parent
831ee2e5faSnakanon  */
847c478bd9Sstevel@tonic-gate 
85*3ee4fc2aSCody Peter Mello int	*setvec;
86*3ee4fc2aSCody Peter Mello int	*tmpset;
87*3ee4fc2aSCody Peter Mello int	maxsetvec = 0;
887c478bd9Sstevel@tonic-gate 
897c478bd9Sstevel@tonic-gate int	rtok;		/* next token in current re */
907c478bd9Sstevel@tonic-gate int	rlxval;
91*3ee4fc2aSCody Peter Mello static uschar	*rlxstr;
92*3ee4fc2aSCody Peter Mello static uschar	*prestr;	/* current position in current re */
93*3ee4fc2aSCody Peter Mello static uschar	*lastre;	/* origin of last re */
947c478bd9Sstevel@tonic-gate 
957c478bd9Sstevel@tonic-gate static	int setcnt;
967c478bd9Sstevel@tonic-gate static	int poscnt;
977c478bd9Sstevel@tonic-gate 
98*3ee4fc2aSCody Peter Mello char	*patbeg;
997c478bd9Sstevel@tonic-gate int	patlen;
1007c478bd9Sstevel@tonic-gate 
1017c478bd9Sstevel@tonic-gate #define	NFA	20	/* cache this many dynamic fa's */
1027c478bd9Sstevel@tonic-gate fa	*fatab[NFA];
1037c478bd9Sstevel@tonic-gate int	nfatab	= 0;	/* entries in fatab */
1047c478bd9Sstevel@tonic-gate 
105*3ee4fc2aSCody Peter Mello static fa	*mkdfa(const char *, int);
1061ee2e5faSnakanon static int	makeinit(fa *, int);
1071ee2e5faSnakanon static void	penter(Node *);
1081ee2e5faSnakanon static void	freetr(Node *);
109*3ee4fc2aSCody Peter Mello static void	overflo(const char *);
110*3ee4fc2aSCody Peter Mello static void	growvec(const char *);
1111ee2e5faSnakanon static void	cfoll(fa *, Node *);
1121ee2e5faSnakanon static void	follow(Node *);
113*3ee4fc2aSCody Peter Mello static Node	*reparse(const char *);
1141ee2e5faSnakanon static int	relex(void);
1151ee2e5faSnakanon static void	freefa(fa *);
1161ee2e5faSnakanon static int	cgoto(fa *, int, int);
1171ee2e5faSnakanon 
1181ee2e5faSnakanon fa *
makedfa(const char * s,int anchor)119*3ee4fc2aSCody Peter Mello makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
1207c478bd9Sstevel@tonic-gate {
1217c478bd9Sstevel@tonic-gate 	int i, use, nuse;
1227c478bd9Sstevel@tonic-gate 	fa *pfa;
123*3ee4fc2aSCody Peter Mello 	static int now = 1;
124*3ee4fc2aSCody Peter Mello 
125*3ee4fc2aSCody Peter Mello 	if (setvec == NULL) {	/* first time through any RE */
126*3ee4fc2aSCody Peter Mello 		maxsetvec = MAXLIN;
127*3ee4fc2aSCody Peter Mello 		setvec = (int *)malloc(maxsetvec * sizeof (int));
128*3ee4fc2aSCody Peter Mello 		tmpset = (int *)malloc(maxsetvec * sizeof (int));
129*3ee4fc2aSCody Peter Mello 		if (setvec == NULL || tmpset == NULL)
130*3ee4fc2aSCody Peter Mello 			overflo("out of space initializing makedfa");
131*3ee4fc2aSCody Peter Mello 	}
1327c478bd9Sstevel@tonic-gate 
1337c478bd9Sstevel@tonic-gate 	if (compile_time)	/* a constant for sure */
1341ee2e5faSnakanon 		return (mkdfa(s, anchor));
1351ee2e5faSnakanon 	for (i = 0; i < nfatab; i++) {	/* is it there already? */
1361ee2e5faSnakanon 		if (fatab[i]->anchor == anchor &&
137*3ee4fc2aSCody Peter Mello 		    strcmp((const char *)fatab[i]->restr, s) == 0) {
138*3ee4fc2aSCody Peter Mello 			fatab[i]->use = now++;
1391ee2e5faSnakanon 			return (fatab[i]);
1401ee2e5faSnakanon 		}
1417c478bd9Sstevel@tonic-gate 	}
1427c478bd9Sstevel@tonic-gate 	pfa = mkdfa(s, anchor);
1437c478bd9Sstevel@tonic-gate 	if (nfatab < NFA) {	/* room for another */
1447c478bd9Sstevel@tonic-gate 		fatab[nfatab] = pfa;
145*3ee4fc2aSCody Peter Mello 		fatab[nfatab]->use = now++;
1467c478bd9Sstevel@tonic-gate 		nfatab++;
1471ee2e5faSnakanon 		return (pfa);
1487c478bd9Sstevel@tonic-gate 	}
1497c478bd9Sstevel@tonic-gate 	use = fatab[0]->use;	/* replace least-recently used */
1507c478bd9Sstevel@tonic-gate 	nuse = 0;
1517c478bd9Sstevel@tonic-gate 	for (i = 1; i < nfatab; i++)
1527c478bd9Sstevel@tonic-gate 		if (fatab[i]->use < use) {
1537c478bd9Sstevel@tonic-gate 			use = fatab[i]->use;
1547c478bd9Sstevel@tonic-gate 			nuse = i;
1557c478bd9Sstevel@tonic-gate 		}
1567c478bd9Sstevel@tonic-gate 	freefa(fatab[nuse]);
1577c478bd9Sstevel@tonic-gate 	fatab[nuse] = pfa;
158*3ee4fc2aSCody Peter Mello 	pfa->use = now++;
1591ee2e5faSnakanon 	return (pfa);
1607c478bd9Sstevel@tonic-gate }
1617c478bd9Sstevel@tonic-gate 
162*3ee4fc2aSCody Peter Mello /*
163*3ee4fc2aSCody Peter Mello  * does the real work of making a dfa
164*3ee4fc2aSCody Peter Mello  * anchor = 1 for anchored matches, else 0
165*3ee4fc2aSCody Peter Mello  */
1661ee2e5faSnakanon fa *
mkdfa(const char * s,int anchor)167*3ee4fc2aSCody Peter Mello mkdfa(const char *s, int anchor)
1687c478bd9Sstevel@tonic-gate {
1691ee2e5faSnakanon 	Node *p, *p1;
1707c478bd9Sstevel@tonic-gate 	fa *f;
1717c478bd9Sstevel@tonic-gate 
1727c478bd9Sstevel@tonic-gate 	p = reparse(s);
1737c478bd9Sstevel@tonic-gate 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
1747c478bd9Sstevel@tonic-gate 		/* put ALL STAR in front of reg.  exp. */
1757c478bd9Sstevel@tonic-gate 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
1767c478bd9Sstevel@tonic-gate 		/* put FINAL after reg.  exp. */
1777c478bd9Sstevel@tonic-gate 
1787c478bd9Sstevel@tonic-gate 	poscnt = 0;
1797c478bd9Sstevel@tonic-gate 	penter(p1);	/* enter parent pointers and leaf indices */
1801ee2e5faSnakanon 	if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
181*3ee4fc2aSCody Peter Mello 		overflo("out of space for fa");
1821ee2e5faSnakanon 	/* penter has computed number of positions in re */
1831ee2e5faSnakanon 	f->accept = poscnt-1;
1847c478bd9Sstevel@tonic-gate 	cfoll(f, p1);	/* set up follow sets */
1857c478bd9Sstevel@tonic-gate 	freetr(p1);
1861ee2e5faSnakanon 	if ((f->posns[0] =
1871ee2e5faSnakanon 	    (int *)calloc(1, *(f->re[0].lfollow) * sizeof (int))) == NULL) {
1887c478bd9Sstevel@tonic-gate 			overflo("out of space in makedfa");
1891ee2e5faSnakanon 	}
1901ee2e5faSnakanon 	if ((f->posns[1] = (int *)calloc(1, sizeof (int))) == NULL)
1917c478bd9Sstevel@tonic-gate 		overflo("out of space in makedfa");
1927c478bd9Sstevel@tonic-gate 	*f->posns[1] = 0;
1937c478bd9Sstevel@tonic-gate 	f->initstat = makeinit(f, anchor);
1947c478bd9Sstevel@tonic-gate 	f->anchor = anchor;
195*3ee4fc2aSCody Peter Mello 	f->restr = (uschar *)tostring(s);
1961ee2e5faSnakanon 	return (f);
1977c478bd9Sstevel@tonic-gate }
1987c478bd9Sstevel@tonic-gate 
1991ee2e5faSnakanon static int
makeinit(fa * f,int anchor)2001ee2e5faSnakanon makeinit(fa *f, int anchor)
2017c478bd9Sstevel@tonic-gate {
202*3ee4fc2aSCody Peter Mello 	int i, k;
2037c478bd9Sstevel@tonic-gate 
2047c478bd9Sstevel@tonic-gate 	f->curstat = 2;
2057c478bd9Sstevel@tonic-gate 	f->out[2] = 0;
2067c478bd9Sstevel@tonic-gate 	f->reset = 0;
2077c478bd9Sstevel@tonic-gate 	k = *(f->re[0].lfollow);
2081ee2e5faSnakanon 	xfree(f->posns[2]);
2091ee2e5faSnakanon 	if ((f->posns[2] = (int *)calloc(1, (k+1) * sizeof (int))) == NULL)
2107c478bd9Sstevel@tonic-gate 		overflo("out of space in makeinit");
2111ee2e5faSnakanon 	for (i = 0; i <= k; i++) {
2127c478bd9Sstevel@tonic-gate 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
2137c478bd9Sstevel@tonic-gate 	}
2147c478bd9Sstevel@tonic-gate 	if ((f->posns[2])[1] == f->accept)
2157c478bd9Sstevel@tonic-gate 		f->out[2] = 1;
2161ee2e5faSnakanon 	for (i = 0; i < NCHARS; i++)
2177c478bd9Sstevel@tonic-gate 		f->gototab[2][i] = 0;
2187c478bd9Sstevel@tonic-gate 	f->curstat = cgoto(f, 2, HAT);
2197c478bd9Sstevel@tonic-gate 	if (anchor) {
2207c478bd9Sstevel@tonic-gate 		*f->posns[2] = k-1;	/* leave out position 0 */
2211ee2e5faSnakanon 		for (i = 0; i < k; i++) {
2227c478bd9Sstevel@tonic-gate 			(f->posns[0])[i] = (f->posns[2])[i];
2237c478bd9Sstevel@tonic-gate 		}
2247c478bd9Sstevel@tonic-gate 
2257c478bd9Sstevel@tonic-gate 		f->out[0] = f->out[2];
2267c478bd9Sstevel@tonic-gate 		if (f->curstat != 2)
2277c478bd9Sstevel@tonic-gate 			--(*f->posns[f->curstat]);
2287c478bd9Sstevel@tonic-gate 	}
2291ee2e5faSnakanon 	return (f->curstat);
2307c478bd9Sstevel@tonic-gate }
2317c478bd9Sstevel@tonic-gate 
2321ee2e5faSnakanon void
penter(Node * p)2331ee2e5faSnakanon penter(Node *p)	/* set up parent pointers and leaf indices */
2347c478bd9Sstevel@tonic-gate {
2351ee2e5faSnakanon 	switch (type(p)) {
236*3ee4fc2aSCody Peter Mello 	ELEAF
2377c478bd9Sstevel@tonic-gate 	LEAF
238*3ee4fc2aSCody Peter Mello 		info(p) = poscnt;
239*3ee4fc2aSCody Peter Mello 		poscnt++;
2407c478bd9Sstevel@tonic-gate 		break;
2417c478bd9Sstevel@tonic-gate 	UNARY
2427c478bd9Sstevel@tonic-gate 		penter(left(p));
2437c478bd9Sstevel@tonic-gate 		parent(left(p)) = p;
2447c478bd9Sstevel@tonic-gate 		break;
2457c478bd9Sstevel@tonic-gate 	case CAT:
2467c478bd9Sstevel@tonic-gate 	case OR:
2477c478bd9Sstevel@tonic-gate 		penter(left(p));
2487c478bd9Sstevel@tonic-gate 		penter(right(p));
2497c478bd9Sstevel@tonic-gate 		parent(left(p)) = p;
2507c478bd9Sstevel@tonic-gate 		parent(right(p)) = p;
2517c478bd9Sstevel@tonic-gate 		break;
252*3ee4fc2aSCody Peter Mello 	default:	/* can't happen */
253*3ee4fc2aSCody Peter Mello 		FATAL("can't happen: unknown type %d in penter", type(p));
2547c478bd9Sstevel@tonic-gate 		break;
2557c478bd9Sstevel@tonic-gate 	}
2567c478bd9Sstevel@tonic-gate }
2577c478bd9Sstevel@tonic-gate 
2581ee2e5faSnakanon static void
freetr(Node * p)2591ee2e5faSnakanon freetr(Node *p)	/* free parse tree */
2607c478bd9Sstevel@tonic-gate {
2617c478bd9Sstevel@tonic-gate 	switch (type(p)) {
262*3ee4fc2aSCody Peter Mello 	ELEAF
2637c478bd9Sstevel@tonic-gate 	LEAF
2647c478bd9Sstevel@tonic-gate 		xfree(p);
2657c478bd9Sstevel@tonic-gate 		break;
2667c478bd9Sstevel@tonic-gate 	UNARY
2677c478bd9Sstevel@tonic-gate 		freetr(left(p));
2687c478bd9Sstevel@tonic-gate 		xfree(p);
2697c478bd9Sstevel@tonic-gate 		break;
2707c478bd9Sstevel@tonic-gate 	case CAT:
2717c478bd9Sstevel@tonic-gate 	case OR:
2727c478bd9Sstevel@tonic-gate 		freetr(left(p));
2737c478bd9Sstevel@tonic-gate 		freetr(right(p));
2747c478bd9Sstevel@tonic-gate 		xfree(p);
2757c478bd9Sstevel@tonic-gate 		break;
276*3ee4fc2aSCody Peter Mello 	default:	/* can't happen */
277*3ee4fc2aSCody Peter Mello 		FATAL("can't happen: unknown type %d in freetr", type(p));
2787c478bd9Sstevel@tonic-gate 		break;
2797c478bd9Sstevel@tonic-gate 	}
2807c478bd9Sstevel@tonic-gate }
2817c478bd9Sstevel@tonic-gate 
282*3ee4fc2aSCody Peter Mello static void
growvec(const char * msg)283*3ee4fc2aSCody Peter Mello growvec(const char *msg)
284*3ee4fc2aSCody Peter Mello {
285*3ee4fc2aSCody Peter Mello 	maxsetvec *= 4;
286*3ee4fc2aSCody Peter Mello 	setvec = (int *)realloc(setvec, maxsetvec * sizeof (int));
287*3ee4fc2aSCody Peter Mello 	tmpset = (int *)realloc(tmpset, maxsetvec * sizeof (int));
288*3ee4fc2aSCody Peter Mello 	if (setvec == NULL || tmpset == NULL)
289*3ee4fc2aSCody Peter Mello 		overflo(msg);
290*3ee4fc2aSCody Peter Mello }
291*3ee4fc2aSCody Peter Mello 
292*3ee4fc2aSCody Peter Mello /*
293*3ee4fc2aSCody Peter Mello  * in the parsing of regular expressions, metacharacters like . have
294*3ee4fc2aSCody Peter Mello  * to be seen literally; \056 is not a metacharacter.
295*3ee4fc2aSCody Peter Mello  */
296*3ee4fc2aSCody Peter Mello 
297*3ee4fc2aSCody Peter Mello /*
298*3ee4fc2aSCody Peter Mello  * find and eval hex string at pp, return new p; only pick up one 8-bit
299*3ee4fc2aSCody Peter Mello  * byte (2 chars).
300*3ee4fc2aSCody Peter Mello  */
301*3ee4fc2aSCody Peter Mello int
hexstr(uschar ** pp)302*3ee4fc2aSCody Peter Mello hexstr(uschar **pp)
303*3ee4fc2aSCody Peter Mello {
304*3ee4fc2aSCody Peter Mello 	uschar *p;
305*3ee4fc2aSCody Peter Mello 	int n = 0;
306*3ee4fc2aSCody Peter Mello 	int i;
307*3ee4fc2aSCody Peter Mello 
308*3ee4fc2aSCody Peter Mello 	for (i = 0, p = (uschar *)*pp; i < 2 && isxdigit(*p); i++, p++) {
309*3ee4fc2aSCody Peter Mello 		if (isdigit(*p))
310*3ee4fc2aSCody Peter Mello 			n = 16 * n + *p - '0';
311*3ee4fc2aSCody Peter Mello 		else if (*p >= 'a' && *p <= 'f')
312*3ee4fc2aSCody Peter Mello 			n = 16 * n + *p - 'a' + 10;
313*3ee4fc2aSCody Peter Mello 		else if (*p >= 'A' && *p <= 'F')
314*3ee4fc2aSCody Peter Mello 			n = 16 * n + *p - 'A' + 10;
315*3ee4fc2aSCody Peter Mello 	}
316*3ee4fc2aSCody Peter Mello 	*pp = (uschar *)p;
317*3ee4fc2aSCody Peter Mello 	return (n);
318*3ee4fc2aSCody Peter Mello }
319*3ee4fc2aSCody Peter Mello 
320*3ee4fc2aSCody Peter Mello #define	isoctdigit(c) ((c) >= '0' && (c) <= '7')
321*3ee4fc2aSCody Peter Mello 
322*3ee4fc2aSCody Peter Mello /* pick up next thing after a \\ and increment *pp */
323*3ee4fc2aSCody Peter Mello int
quoted(uschar ** pp)324*3ee4fc2aSCody Peter Mello quoted(uschar **pp)
3257c478bd9Sstevel@tonic-gate {
326*3ee4fc2aSCody Peter Mello 	uschar *p = *pp;
327*3ee4fc2aSCody Peter Mello 	int c;
328*3ee4fc2aSCody Peter Mello 
329*3ee4fc2aSCody Peter Mello 	if ((c = *p++) == 't')
330*3ee4fc2aSCody Peter Mello 		c = '\t';
331*3ee4fc2aSCody Peter Mello 	else if (c == 'n')
332*3ee4fc2aSCody Peter Mello 		c = '\n';
333*3ee4fc2aSCody Peter Mello 	else if (c == 'f')
334*3ee4fc2aSCody Peter Mello 		c = '\f';
335*3ee4fc2aSCody Peter Mello 	else if (c == 'r')
336*3ee4fc2aSCody Peter Mello 		c = '\r';
337*3ee4fc2aSCody Peter Mello 	else if (c == 'b')
338*3ee4fc2aSCody Peter Mello 		c = '\b';
339*3ee4fc2aSCody Peter Mello 	else if (c == '\\')
340*3ee4fc2aSCody Peter Mello 		c = '\\';
341*3ee4fc2aSCody Peter Mello 	else if (c == 'x') {	/* hexadecimal goo follows */
342*3ee4fc2aSCody Peter Mello 		c = hexstr(&p);	/* this adds a null if number is invalid */
343*3ee4fc2aSCody Peter Mello 	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
344*3ee4fc2aSCody Peter Mello 		int n = c - '0';
345*3ee4fc2aSCody Peter Mello 		if (isoctdigit(*p)) {
346*3ee4fc2aSCody Peter Mello 			n = 8 * n + *p++ - '0';
347*3ee4fc2aSCody Peter Mello 			if (isoctdigit(*p))
348*3ee4fc2aSCody Peter Mello 				n = 8 * n + *p++ - '0';
349*3ee4fc2aSCody Peter Mello 		}
350*3ee4fc2aSCody Peter Mello 		c = n;
351*3ee4fc2aSCody Peter Mello 	} /* else */
352*3ee4fc2aSCody Peter Mello 		/* c = c; */
353*3ee4fc2aSCody Peter Mello 	*pp = p;
354*3ee4fc2aSCody Peter Mello 	return (c);
355*3ee4fc2aSCody Peter Mello }
356*3ee4fc2aSCody Peter Mello 
357*3ee4fc2aSCody Peter Mello char *
cclenter(const char * argp)358*3ee4fc2aSCody Peter Mello cclenter(const char *argp)	/* add a character class */
359*3ee4fc2aSCody Peter Mello {
360*3ee4fc2aSCody Peter Mello 	int i, c, c2;
361*3ee4fc2aSCody Peter Mello 	uschar *p = (uschar *)argp;
362*3ee4fc2aSCody Peter Mello 	uschar *op, *bp;
363*3ee4fc2aSCody Peter Mello 	static uschar *buf = NULL;
364*3ee4fc2aSCody Peter Mello 	static size_t bufsz = 100;
3657c478bd9Sstevel@tonic-gate 
3667c478bd9Sstevel@tonic-gate 	op = p;
367*3ee4fc2aSCody Peter Mello 	if (buf == NULL && (buf = (uschar *)malloc(bufsz)) == NULL)
368*3ee4fc2aSCody Peter Mello 		FATAL("out of space for character class [%.10s...] 1", p);
369*3ee4fc2aSCody Peter Mello 	bp = buf;
370*3ee4fc2aSCody Peter Mello 	for (i = 0; (c = *p++) != 0; ) {
3717c478bd9Sstevel@tonic-gate 		if (c == '\\') {
372*3ee4fc2aSCody Peter Mello 			c = quoted(&p);
373*3ee4fc2aSCody Peter Mello 		} else if (c == '-' && i > 0 && bp[-1] != 0) {
3747c478bd9Sstevel@tonic-gate 			if (*p != 0) {
375*3ee4fc2aSCody Peter Mello 				c = bp[-1];
376*3ee4fc2aSCody Peter Mello 				c2 = *p++;
377*3ee4fc2aSCody Peter Mello 				if (c2 == '\\')
378*3ee4fc2aSCody Peter Mello 					c2 = quoted(&p);
379*3ee4fc2aSCody Peter Mello 				if (c > c2) {	/* empty; ignore */
380*3ee4fc2aSCody Peter Mello 					bp--;
381*3ee4fc2aSCody Peter Mello 					i--;
382*3ee4fc2aSCody Peter Mello 					continue;
383*3ee4fc2aSCody Peter Mello 				}
384*3ee4fc2aSCody Peter Mello 				while (c < c2) {
385*3ee4fc2aSCody Peter Mello 					if (!adjbuf((char **)&buf, &bufsz,
386*3ee4fc2aSCody Peter Mello 					    bp-buf+2, 100, (char **)&bp,
387*3ee4fc2aSCody Peter Mello 					    "cclenter1")) {
388*3ee4fc2aSCody Peter Mello 						FATAL(
389*3ee4fc2aSCody Peter Mello 			"out of space for character class [%.10s...] 2", p);
390*3ee4fc2aSCody Peter Mello 					}
391*3ee4fc2aSCody Peter Mello 					*bp++ = ++c;
392*3ee4fc2aSCody Peter Mello 					i++;
3937c478bd9Sstevel@tonic-gate 				}
3947c478bd9Sstevel@tonic-gate 				continue;
3957c478bd9Sstevel@tonic-gate 			}
3967c478bd9Sstevel@tonic-gate 		}
397*3ee4fc2aSCody Peter Mello 		if (!adjbuf((char **)&buf, &bufsz, bp-buf+2, 100, (char **)&bp,
398*3ee4fc2aSCody Peter Mello 		    "cclenter2"))
399*3ee4fc2aSCody Peter Mello 			FATAL(
400*3ee4fc2aSCody Peter Mello 			    "out of space for character class [%.10s...] 3", p);
401*3ee4fc2aSCody Peter Mello 		*bp++ = c;
402*3ee4fc2aSCody Peter Mello 		i++;
4037c478bd9Sstevel@tonic-gate 	}
404*3ee4fc2aSCody Peter Mello 	*bp = '\0';
405*3ee4fc2aSCody Peter Mello 	dprintf(("cclenter: in = |%s|, out = |%s|\n", op, buf));
4067c478bd9Sstevel@tonic-gate 	xfree(op);
407*3ee4fc2aSCody Peter Mello 	return ((char *)tostring((char *)buf));
4087c478bd9Sstevel@tonic-gate }
4097c478bd9Sstevel@tonic-gate 
4101ee2e5faSnakanon static void
overflo(const char * s)411*3ee4fc2aSCody Peter Mello overflo(const char *s)
4127c478bd9Sstevel@tonic-gate {
413*3ee4fc2aSCody Peter Mello 	FATAL("regular expression too big: %.30s...", gettext((char *)s));
4147c478bd9Sstevel@tonic-gate }
4157c478bd9Sstevel@tonic-gate 
4161ee2e5faSnakanon /* enter follow set of each leaf of vertex v into lfollow[leaf] */
4171ee2e5faSnakanon static void
cfoll(fa * f,Node * v)4181ee2e5faSnakanon cfoll(fa *f, Node *v)
4197c478bd9Sstevel@tonic-gate {
420*3ee4fc2aSCody Peter Mello 	int i;
421*3ee4fc2aSCody Peter Mello 	int *p;
4227c478bd9Sstevel@tonic-gate 
4231ee2e5faSnakanon 	switch (type(v)) {
424*3ee4fc2aSCody Peter Mello 	ELEAF
4257c478bd9Sstevel@tonic-gate 	LEAF
426*3ee4fc2aSCody Peter Mello 		f->re[info(v)].ltype = type(v);
427*3ee4fc2aSCody Peter Mello 		f->re[info(v)].lval.np = right(v);
428*3ee4fc2aSCody Peter Mello 		while (f->accept >= maxsetvec) {	/* guessing here! */
429*3ee4fc2aSCody Peter Mello 			growvec("out of space in cfoll()");
430*3ee4fc2aSCody Peter Mello 		}
4311ee2e5faSnakanon 		for (i = 0; i <= f->accept; i++)
4327c478bd9Sstevel@tonic-gate 			setvec[i] = 0;
4337c478bd9Sstevel@tonic-gate 		setcnt = 0;
4347c478bd9Sstevel@tonic-gate 		follow(v);	/* computes setvec and setcnt */
4351ee2e5faSnakanon 		if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
436*3ee4fc2aSCody Peter Mello 			overflo("out of space building follow set");
437*3ee4fc2aSCody Peter Mello 		f->re[info(v)].lfollow = p;
4387c478bd9Sstevel@tonic-gate 		*p = setcnt;
4391ee2e5faSnakanon 		for (i = f->accept; i >= 0; i--) {
4401ee2e5faSnakanon 			if (setvec[i] == 1)
4411ee2e5faSnakanon 				*++p = i;
4421ee2e5faSnakanon 		}
4437c478bd9Sstevel@tonic-gate 		break;
4447c478bd9Sstevel@tonic-gate 	UNARY
4451ee2e5faSnakanon 		cfoll(f, left(v));
4467c478bd9Sstevel@tonic-gate 		break;
4477c478bd9Sstevel@tonic-gate 	case CAT:
4487c478bd9Sstevel@tonic-gate 	case OR:
4491ee2e5faSnakanon 		cfoll(f, left(v));
4501ee2e5faSnakanon 		cfoll(f, right(v));
4517c478bd9Sstevel@tonic-gate 		break;
452*3ee4fc2aSCody Peter Mello 	default:	/* can't happen */
453*3ee4fc2aSCody Peter Mello 		FATAL("can't happen: unknown type %d in cfoll", type(v));
4547c478bd9Sstevel@tonic-gate 	}
4557c478bd9Sstevel@tonic-gate }
4567c478bd9Sstevel@tonic-gate 
4571ee2e5faSnakanon /*
4581ee2e5faSnakanon  * collects initially active leaves of p into setvec
4591ee2e5faSnakanon  * returns 0 or 1 depending on whether p matches empty string
4601ee2e5faSnakanon  */
4611ee2e5faSnakanon static int
first(Node * p)4621ee2e5faSnakanon first(Node *p)
4637c478bd9Sstevel@tonic-gate {
464*3ee4fc2aSCody Peter Mello 	int b, lp;
4657c478bd9Sstevel@tonic-gate 
4661ee2e5faSnakanon 	switch (type(p)) {
467*3ee4fc2aSCody Peter Mello 	ELEAF
4687c478bd9Sstevel@tonic-gate 	LEAF
469*3ee4fc2aSCody Peter Mello 		lp = info(p);	/* look for high-water mark of subscripts */
470*3ee4fc2aSCody Peter Mello 		while (setcnt >= maxsetvec || lp >= maxsetvec) {
471*3ee4fc2aSCody Peter Mello 			/* guessing here! */
472*3ee4fc2aSCody Peter Mello 			growvec("out of space in first()");
473*3ee4fc2aSCody Peter Mello 		}
474*3ee4fc2aSCody Peter Mello 		if (type(p) == EMPTYRE) {
475*3ee4fc2aSCody Peter Mello 			setvec[lp] = 0;
476*3ee4fc2aSCody Peter Mello 			return (0);
477*3ee4fc2aSCody Peter Mello 		}
478*3ee4fc2aSCody Peter Mello 		if (setvec[lp] != 1) {
479*3ee4fc2aSCody Peter Mello 			setvec[lp] = 1;
4807c478bd9Sstevel@tonic-gate 			setcnt++;
4817c478bd9Sstevel@tonic-gate 		}
482*3ee4fc2aSCody Peter Mello 		if (type(p) == CCL && (*(char *)right(p)) == '\0')
4831ee2e5faSnakanon 			return (0);		/* empty CCL */
4841ee2e5faSnakanon 		else
4851ee2e5faSnakanon 			return (1);
4867c478bd9Sstevel@tonic-gate 	case PLUS:
4871ee2e5faSnakanon 		if (first(left(p)) == 0)
4881ee2e5faSnakanon 			return (0);
4891ee2e5faSnakanon 		return (1);
4907c478bd9Sstevel@tonic-gate 	case STAR:
4917c478bd9Sstevel@tonic-gate 	case QUEST:
4921ee2e5faSnakanon 		(void) first(left(p));
4931ee2e5faSnakanon 		return (0);
4947c478bd9Sstevel@tonic-gate 	case CAT:
4951ee2e5faSnakanon 		if (first(left(p)) == 0 && first(right(p)) == 0)
4961ee2e5faSnakanon 			return (0);
4971ee2e5faSnakanon 		return (1);
4987c478bd9Sstevel@tonic-gate 	case OR:
4997c478bd9Sstevel@tonic-gate 		b = first(right(p));
5001ee2e5faSnakanon 		if (first(left(p)) == 0 || b == 0)
5011ee2e5faSnakanon 			return (0);
5021ee2e5faSnakanon 		return (1);
5037c478bd9Sstevel@tonic-gate 	}
504*3ee4fc2aSCody Peter Mello 	FATAL("can't happen: unknown type %d in first", type(p));
5057c478bd9Sstevel@tonic-gate }
5067c478bd9Sstevel@tonic-gate 
5071ee2e5faSnakanon /* collects leaves that can follow v into setvec */
5081ee2e5faSnakanon static void
follow(Node * v)5091ee2e5faSnakanon follow(Node *v)
5107c478bd9Sstevel@tonic-gate {
5117c478bd9Sstevel@tonic-gate 	Node *p;
5127c478bd9Sstevel@tonic-gate 
5137c478bd9Sstevel@tonic-gate 	if (type(v) == FINAL)
5147c478bd9Sstevel@tonic-gate 		return;
5157c478bd9Sstevel@tonic-gate 	p = parent(v);
5167c478bd9Sstevel@tonic-gate 	switch (type(p)) {
5177c478bd9Sstevel@tonic-gate 	case STAR:
5187c478bd9Sstevel@tonic-gate 	case PLUS:
5191ee2e5faSnakanon 		(void) first(v);
5207c478bd9Sstevel@tonic-gate 		follow(p);
5217c478bd9Sstevel@tonic-gate 		return;
5227c478bd9Sstevel@tonic-gate 
5237c478bd9Sstevel@tonic-gate 	case OR:
5247c478bd9Sstevel@tonic-gate 	case QUEST:
5257c478bd9Sstevel@tonic-gate 		follow(p);
5267c478bd9Sstevel@tonic-gate 		return;
5277c478bd9Sstevel@tonic-gate 
5287c478bd9Sstevel@tonic-gate 	case CAT:
5297c478bd9Sstevel@tonic-gate 		if (v == left(p)) {	/* v is left child of p */
5307c478bd9Sstevel@tonic-gate 			if (first(right(p)) == 0) {
5317c478bd9Sstevel@tonic-gate 				follow(p);
5327c478bd9Sstevel@tonic-gate 				return;
5337c478bd9Sstevel@tonic-gate 			}
5341ee2e5faSnakanon 		} else		/* v is right child */
5357c478bd9Sstevel@tonic-gate 			follow(p);
5367c478bd9Sstevel@tonic-gate 		return;
5377c478bd9Sstevel@tonic-gate 	default:
538*3ee4fc2aSCody Peter Mello 		FATAL("unknown type %d in follow", type(p));
5397c478bd9Sstevel@tonic-gate 		break;
5407c478bd9Sstevel@tonic-gate 	}
5417c478bd9Sstevel@tonic-gate }
5427c478bd9Sstevel@tonic-gate 
5431ee2e5faSnakanon static int
member(int c,const char * sarg)544*3ee4fc2aSCody Peter Mello member(int c, const char *sarg)	/* is c in s? */
5457c478bd9Sstevel@tonic-gate {
546*3ee4fc2aSCody Peter Mello 	uschar *s = (uschar *)sarg;
547*3ee4fc2aSCody Peter Mello 
5487c478bd9Sstevel@tonic-gate 	while (*s)
5497c478bd9Sstevel@tonic-gate 		if (c == *s++)
5501ee2e5faSnakanon 			return (1);
5511ee2e5faSnakanon 	return (0);
5527c478bd9Sstevel@tonic-gate }
5537c478bd9Sstevel@tonic-gate 
5547c478bd9Sstevel@tonic-gate 
5551ee2e5faSnakanon int
match(fa * f,const char * p0)556*3ee4fc2aSCody Peter Mello match(fa *f, const char *p0)	/* shortest match ? */
5577c478bd9Sstevel@tonic-gate {
558*3ee4fc2aSCody Peter Mello 	int s, ns;
559*3ee4fc2aSCody Peter Mello 	uschar *p = (uschar *)p0;
5607c478bd9Sstevel@tonic-gate 
5611ee2e5faSnakanon 	s = f->reset ? makeinit(f, 0) : f->initstat;
5627c478bd9Sstevel@tonic-gate 	if (f->out[s])
5631ee2e5faSnakanon 		return (1);
5647c478bd9Sstevel@tonic-gate 	do {
5651ee2e5faSnakanon 		if ((ns = f->gototab[s][*p]) != 0)
5661ee2e5faSnakanon 			s = ns;
5677c478bd9Sstevel@tonic-gate 		else
5681ee2e5faSnakanon 			s = cgoto(f, s, *p);
5697c478bd9Sstevel@tonic-gate 		if (f->out[s])
5701ee2e5faSnakanon 			return (1);
5717c478bd9Sstevel@tonic-gate 	} while (*p++ != 0);
5721ee2e5faSnakanon 	return (0);
5737c478bd9Sstevel@tonic-gate }
5747c478bd9Sstevel@tonic-gate 
5751ee2e5faSnakanon int
pmatch(fa * f,const char * p0)576*3ee4fc2aSCody Peter Mello pmatch(fa *f, const char *p0)	/* longest match, for sub */
5777c478bd9Sstevel@tonic-gate {
578*3ee4fc2aSCody Peter Mello 	int s, ns;
579*3ee4fc2aSCody Peter Mello 	uschar *p = (uschar *)p0;
580*3ee4fc2aSCody Peter Mello 	uschar *q;
5817c478bd9Sstevel@tonic-gate 	int i, k;
5827c478bd9Sstevel@tonic-gate 
5831ee2e5faSnakanon 	if (f->reset) {
5841ee2e5faSnakanon 		f->initstat = s = makeinit(f, 1);
5851ee2e5faSnakanon 	} else {
5861ee2e5faSnakanon 		s = f->initstat;
5871ee2e5faSnakanon 	}
588*3ee4fc2aSCody Peter Mello 	patbeg = (char *)p;
5897c478bd9Sstevel@tonic-gate 	patlen = -1;
5907c478bd9Sstevel@tonic-gate 	do {
5917c478bd9Sstevel@tonic-gate 		q = p;
5927c478bd9Sstevel@tonic-gate 		do {
5937c478bd9Sstevel@tonic-gate 			if (f->out[s])		/* final state */
5947c478bd9Sstevel@tonic-gate 				patlen = q-p;
5951ee2e5faSnakanon 			if ((ns = f->gototab[s][*q]) != 0)
5961ee2e5faSnakanon 				s = ns;
5977c478bd9Sstevel@tonic-gate 			else
5981ee2e5faSnakanon 				s = cgoto(f, s, *q);
5991ee2e5faSnakanon 			if (s == 1) {	/* no transition */
6007c478bd9Sstevel@tonic-gate 				if (patlen >= 0) {
601*3ee4fc2aSCody Peter Mello 					patbeg = (char *)p;
6021ee2e5faSnakanon 					return (1);
603*3ee4fc2aSCody Peter Mello 				} else {
6047c478bd9Sstevel@tonic-gate 					goto nextin;	/* no match */
605*3ee4fc2aSCody Peter Mello 				}
6061ee2e5faSnakanon 			}
6077c478bd9Sstevel@tonic-gate 		} while (*q++ != 0);
6087c478bd9Sstevel@tonic-gate 		if (f->out[s])
6091ee2e5faSnakanon 			patlen = q - p - 1;	/* don't count $ */
6107c478bd9Sstevel@tonic-gate 		if (patlen >= 0) {
611*3ee4fc2aSCody Peter Mello 			patbeg = (char *)p;
6121ee2e5faSnakanon 			return (1);
6137c478bd9Sstevel@tonic-gate 		}
6147c478bd9Sstevel@tonic-gate 	nextin:
6157c478bd9Sstevel@tonic-gate 		s = 2;
6167c478bd9Sstevel@tonic-gate 		if (f->reset) {
6171ee2e5faSnakanon 			for (i = 2; i <= f->curstat; i++)
6187c478bd9Sstevel@tonic-gate 				xfree(f->posns[i]);
6191ee2e5faSnakanon 			k = *f->posns[0];
6201ee2e5faSnakanon 			if ((f->posns[2] =
621*3ee4fc2aSCody Peter Mello 			    (int *)calloc(k + 1, sizeof (int))) == NULL) {
6227c478bd9Sstevel@tonic-gate 				overflo("out of space in pmatch");
6231ee2e5faSnakanon 			}
6241ee2e5faSnakanon 			for (i = 0; i <= k; i++)
6257c478bd9Sstevel@tonic-gate 				(f->posns[2])[i] = (f->posns[0])[i];
6267c478bd9Sstevel@tonic-gate 			f->initstat = f->curstat = 2;
6277c478bd9Sstevel@tonic-gate 			f->out[2] = f->out[0];
6281ee2e5faSnakanon 			for (i = 0; i < NCHARS; i++)
6297c478bd9Sstevel@tonic-gate 				f->gototab[2][i] = 0;
6307c478bd9Sstevel@tonic-gate 		}
6317c478bd9Sstevel@tonic-gate 	} while (*p++ != 0);
6327c478bd9Sstevel@tonic-gate 	return (0);
6337c478bd9Sstevel@tonic-gate }
6347c478bd9Sstevel@tonic-gate 
6351ee2e5faSnakanon int
nematch(fa * f,const char * p0)636*3ee4fc2aSCody Peter Mello nematch(fa *f, const char *p0)	/* non-empty match, for sub */
6377c478bd9Sstevel@tonic-gate {
638*3ee4fc2aSCody Peter Mello 	int s, ns;
639*3ee4fc2aSCody Peter Mello 	uschar *p = (uschar *)p0;
640*3ee4fc2aSCody Peter Mello 	uschar *q;
6417c478bd9Sstevel@tonic-gate 	int i, k;
6427c478bd9Sstevel@tonic-gate 
6437c478bd9Sstevel@tonic-gate 	if (f->reset) {
6441ee2e5faSnakanon 		f->initstat = s = makeinit(f, 1);
6457c478bd9Sstevel@tonic-gate 	} else {
6467c478bd9Sstevel@tonic-gate 		s = f->initstat;
6477c478bd9Sstevel@tonic-gate 	}
6487c478bd9Sstevel@tonic-gate 	patlen = -1;
6497c478bd9Sstevel@tonic-gate 	while (*p) {
6507c478bd9Sstevel@tonic-gate 		q = p;
6517c478bd9Sstevel@tonic-gate 		do {
6527c478bd9Sstevel@tonic-gate 			if (f->out[s])		/* final state */
6537c478bd9Sstevel@tonic-gate 				patlen = q-p;
6541ee2e5faSnakanon 			if ((ns = f->gototab[s][*q]) != 0)
6551ee2e5faSnakanon 				s = ns;
6567c478bd9Sstevel@tonic-gate 			else
6571ee2e5faSnakanon 				s = cgoto(f, s, *q);
6581ee2e5faSnakanon 			if (s == 1) {	/* no transition */
6597c478bd9Sstevel@tonic-gate 				if (patlen > 0) {
660*3ee4fc2aSCody Peter Mello 					patbeg = (char *)p;
6611ee2e5faSnakanon 					return (1);
6621ee2e5faSnakanon 				} else
6637c478bd9Sstevel@tonic-gate 					goto nnextin;	/* no nonempty match */
6641ee2e5faSnakanon 			}
6657c478bd9Sstevel@tonic-gate 		} while (*q++ != 0);
6667c478bd9Sstevel@tonic-gate 		if (f->out[s])
6671ee2e5faSnakanon 			patlen = q - p - 1;	/* don't count $ */
6681ee2e5faSnakanon 		if (patlen > 0) {
669*3ee4fc2aSCody Peter Mello 			patbeg = (char *)p;
6701ee2e5faSnakanon 			return (1);
6717c478bd9Sstevel@tonic-gate 		}
6727c478bd9Sstevel@tonic-gate 	nnextin:
6737c478bd9Sstevel@tonic-gate 		s = 2;
6747c478bd9Sstevel@tonic-gate 		if (f->reset) {
6751ee2e5faSnakanon 			for (i = 2; i <= f->curstat; i++)
6767c478bd9Sstevel@tonic-gate 				xfree(f->posns[i]);
6771ee2e5faSnakanon 			k = *f->posns[0];
6781ee2e5faSnakanon 			if ((f->posns[2] =
679*3ee4fc2aSCody Peter Mello 			    (int *)calloc(k + 1, sizeof (int))) == NULL) {
6807c478bd9Sstevel@tonic-gate 				overflo("out of state space");
6811ee2e5faSnakanon 			}
6821ee2e5faSnakanon 			for (i = 0; i <= k; i++)
6837c478bd9Sstevel@tonic-gate 				(f->posns[2])[i] = (f->posns[0])[i];
6847c478bd9Sstevel@tonic-gate 			f->initstat = f->curstat = 2;
6857c478bd9Sstevel@tonic-gate 			f->out[2] = f->out[0];
6861ee2e5faSnakanon 			for (i = 0; i < NCHARS; i++)
6877c478bd9Sstevel@tonic-gate 				f->gototab[2][i] = 0;
6887c478bd9Sstevel@tonic-gate 		}
6891ee2e5faSnakanon 		p++;
6907c478bd9Sstevel@tonic-gate 	}
6917c478bd9Sstevel@tonic-gate 	return (0);
6927c478bd9Sstevel@tonic-gate }
6937c478bd9Sstevel@tonic-gate 
6941ee2e5faSnakanon static Node *regexp(void), *primary(void), *concat(Node *);
6951ee2e5faSnakanon static Node *alt(Node *), *unary(Node *);
6967c478bd9Sstevel@tonic-gate 
697*3ee4fc2aSCody Peter Mello /* parses regular expression pointed to by p */
698*3ee4fc2aSCody Peter Mello /* uses relex() to scan regular expression */
6991ee2e5faSnakanon static Node *
reparse(const char * p)700*3ee4fc2aSCody Peter Mello reparse(const char *p)
7017c478bd9Sstevel@tonic-gate {
7027c478bd9Sstevel@tonic-gate 	Node *np;
7037c478bd9Sstevel@tonic-gate 
7041ee2e5faSnakanon 	dprintf(("reparse <%s>\n", p));
705*3ee4fc2aSCody Peter Mello 
706*3ee4fc2aSCody Peter Mello 	/* prestr points to string to be parsed */
707*3ee4fc2aSCody Peter Mello 	lastre = prestr = (uschar *)p;
7087c478bd9Sstevel@tonic-gate 	rtok = relex();
709*3ee4fc2aSCody Peter Mello 	/* GNU compatibility: an empty regexp matches anything */
7101ee2e5faSnakanon 	if (rtok == '\0') {
711*3ee4fc2aSCody Peter Mello 		return (op2(EMPTYRE, NIL, NIL));
7121ee2e5faSnakanon 	}
713*3ee4fc2aSCody Peter Mello 	np = regexp();
714*3ee4fc2aSCody Peter Mello 	if (rtok != '\0')
715*3ee4fc2aSCody Peter Mello 		FATAL("syntax error in regular expression %s at %s",
716*3ee4fc2aSCody Peter Mello 		    lastre, prestr);
717*3ee4fc2aSCody Peter Mello 	return (np);
7187c478bd9Sstevel@tonic-gate }
7197c478bd9Sstevel@tonic-gate 
7201ee2e5faSnakanon static Node *
regexp(void)721*3ee4fc2aSCody Peter Mello regexp(void)	/* top-level parse of reg expr */
7227c478bd9Sstevel@tonic-gate {
7237c478bd9Sstevel@tonic-gate 	return (alt(concat(primary())));
7247c478bd9Sstevel@tonic-gate }
7257c478bd9Sstevel@tonic-gate 
7261ee2e5faSnakanon static Node *
primary(void)7271ee2e5faSnakanon primary(void)
7287c478bd9Sstevel@tonic-gate {
7297c478bd9Sstevel@tonic-gate 	Node *np;
7307c478bd9Sstevel@tonic-gate 
7317c478bd9Sstevel@tonic-gate 	switch (rtok) {
7327c478bd9Sstevel@tonic-gate 	case CHAR:
733*3ee4fc2aSCody Peter Mello 		np = op2(CHAR, NIL, itonp(rlxval));
7347c478bd9Sstevel@tonic-gate 		rtok = relex();
7357c478bd9Sstevel@tonic-gate 		return (unary(np));
7367c478bd9Sstevel@tonic-gate 	case ALL:
7377c478bd9Sstevel@tonic-gate 		rtok = relex();
7387c478bd9Sstevel@tonic-gate 		return (unary(op2(ALL, NIL, NIL)));
739*3ee4fc2aSCody Peter Mello 	case EMPTYRE:
740*3ee4fc2aSCody Peter Mello 		rtok = relex();
741*3ee4fc2aSCody Peter Mello 		return (unary(op2(ALL, NIL, NIL)));
7427c478bd9Sstevel@tonic-gate 	case DOT:
7437c478bd9Sstevel@tonic-gate 		rtok = relex();
7447c478bd9Sstevel@tonic-gate 		return (unary(op2(DOT, NIL, NIL)));
7457c478bd9Sstevel@tonic-gate 	case CCL:
7461ee2e5faSnakanon 		/*LINTED align*/
747*3ee4fc2aSCody Peter Mello 		np = op2(CCL, NIL, (Node *)cclenter((char *)rlxstr));
7487c478bd9Sstevel@tonic-gate 		rtok = relex();
7497c478bd9Sstevel@tonic-gate 		return (unary(np));
7507c478bd9Sstevel@tonic-gate 	case NCCL:
7511ee2e5faSnakanon 		/*LINTED align*/
752*3ee4fc2aSCody Peter Mello 		np = op2(NCCL, NIL, (Node *)cclenter((char *)rlxstr));
7537c478bd9Sstevel@tonic-gate 		rtok = relex();
7547c478bd9Sstevel@tonic-gate 		return (unary(np));
7557c478bd9Sstevel@tonic-gate 	case '^':
7567c478bd9Sstevel@tonic-gate 		rtok = relex();
757*3ee4fc2aSCody Peter Mello 		return (unary(op2(CHAR, NIL, itonp(HAT))));
7587c478bd9Sstevel@tonic-gate 	case '$':
7597c478bd9Sstevel@tonic-gate 		rtok = relex();
7607c478bd9Sstevel@tonic-gate 		return (unary(op2(CHAR, NIL, NIL)));
7617c478bd9Sstevel@tonic-gate 	case '(':
7627c478bd9Sstevel@tonic-gate 		rtok = relex();
7637c478bd9Sstevel@tonic-gate 		if (rtok == ')') {	/* special pleading for () */
7647c478bd9Sstevel@tonic-gate 			rtok = relex();
7651ee2e5faSnakanon 			return (unary(op2(CCL, NIL,
7661ee2e5faSnakanon 			    /*LINTED align*/
767*3ee4fc2aSCody Peter Mello 			    (Node *)tostring(""))));
7687c478bd9Sstevel@tonic-gate 		}
7697c478bd9Sstevel@tonic-gate 		np = regexp();
7707c478bd9Sstevel@tonic-gate 		if (rtok == ')') {
7717c478bd9Sstevel@tonic-gate 			rtok = relex();
7727c478bd9Sstevel@tonic-gate 			return (unary(np));
7731ee2e5faSnakanon 		} else {
774*3ee4fc2aSCody Peter Mello 			FATAL("syntax error in regular expression %s at %s",
775*3ee4fc2aSCody Peter Mello 			    lastre, prestr);
7767c478bd9Sstevel@tonic-gate 		}
777b2be350eSToomas Soome 		/* FALLTHROUGH */
7787c478bd9Sstevel@tonic-gate 	default:
779*3ee4fc2aSCody Peter Mello 		FATAL("illegal primary in regular expression %s at %s",
780*3ee4fc2aSCody Peter Mello 		    lastre, prestr);
7817c478bd9Sstevel@tonic-gate 	}
7827c478bd9Sstevel@tonic-gate 	/*NOTREACHED*/
7831ee2e5faSnakanon 	return (NULL);
7847c478bd9Sstevel@tonic-gate }
7857c478bd9Sstevel@tonic-gate 
7861ee2e5faSnakanon static Node *
concat(Node * np)7871ee2e5faSnakanon concat(Node *np)
7887c478bd9Sstevel@tonic-gate {
7897c478bd9Sstevel@tonic-gate 	switch (rtok) {
790*3ee4fc2aSCody Peter Mello 	case EMPTYRE:
791*3ee4fc2aSCody Peter Mello 	case CHAR:
792*3ee4fc2aSCody Peter Mello 	case DOT:
793*3ee4fc2aSCody Peter Mello 	case ALL:
794*3ee4fc2aSCody Peter Mello 	case CCL:
795*3ee4fc2aSCody Peter Mello 	case NCCL:
796*3ee4fc2aSCody Peter Mello 	case '$':
797*3ee4fc2aSCody Peter Mello 	case '(':
7987c478bd9Sstevel@tonic-gate 		return (concat(op2(CAT, np, primary())));
7997c478bd9Sstevel@tonic-gate 	default:
8007c478bd9Sstevel@tonic-gate 		return (np);
8017c478bd9Sstevel@tonic-gate 	}
8027c478bd9Sstevel@tonic-gate }
8037c478bd9Sstevel@tonic-gate 
8041ee2e5faSnakanon static Node *
alt(Node * np)8051ee2e5faSnakanon alt(Node *np)
8067c478bd9Sstevel@tonic-gate {
8077c478bd9Sstevel@tonic-gate 	if (rtok == OR) {
8087c478bd9Sstevel@tonic-gate 		rtok = relex();
8097c478bd9Sstevel@tonic-gate 		return (alt(op2(OR, np, concat(primary()))));
8107c478bd9Sstevel@tonic-gate 	}
8117c478bd9Sstevel@tonic-gate 	return (np);
8127c478bd9Sstevel@tonic-gate }
8137c478bd9Sstevel@tonic-gate 
8141ee2e5faSnakanon static Node *
unary(Node * np)8151ee2e5faSnakanon unary(Node *np)
8167c478bd9Sstevel@tonic-gate {
8177c478bd9Sstevel@tonic-gate 	switch (rtok) {
8187c478bd9Sstevel@tonic-gate 	case STAR:
8197c478bd9Sstevel@tonic-gate 		rtok = relex();
8207c478bd9Sstevel@tonic-gate 		return (unary(op2(STAR, np, NIL)));
8217c478bd9Sstevel@tonic-gate 	case PLUS:
8227c478bd9Sstevel@tonic-gate 		rtok = relex();
8237c478bd9Sstevel@tonic-gate 		return (unary(op2(PLUS, np, NIL)));
8247c478bd9Sstevel@tonic-gate 	case QUEST:
8257c478bd9Sstevel@tonic-gate 		rtok = relex();
8267c478bd9Sstevel@tonic-gate 		return (unary(op2(QUEST, np, NIL)));
8277c478bd9Sstevel@tonic-gate 	default:
8287c478bd9Sstevel@tonic-gate 		return (np);
8297c478bd9Sstevel@tonic-gate 	}
8307c478bd9Sstevel@tonic-gate }
8317c478bd9Sstevel@tonic-gate 
832*3ee4fc2aSCody Peter Mello /*
833*3ee4fc2aSCody Peter Mello  * Character class definitions conformant to the POSIX locale as
834*3ee4fc2aSCody Peter Mello  * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
835*3ee4fc2aSCody Peter Mello  * and operating character sets are both ASCII (ISO646) or supersets
836*3ee4fc2aSCody Peter Mello  * thereof.
837*3ee4fc2aSCody Peter Mello  *
838*3ee4fc2aSCody Peter Mello  * Note that to avoid overflowing the temporary buffer used in
839*3ee4fc2aSCody Peter Mello  * relex(), the expanded character class (prior to range expansion)
840*3ee4fc2aSCody Peter Mello  * must be less than twice the size of their full name.
841*3ee4fc2aSCody Peter Mello  */
842*3ee4fc2aSCody Peter Mello 
843*3ee4fc2aSCody Peter Mello struct charclass {
844*3ee4fc2aSCody Peter Mello 	const char *cc_name;
845*3ee4fc2aSCody Peter Mello 	int cc_namelen;
846*3ee4fc2aSCody Peter Mello 	int (*cc_func)(int);
847*3ee4fc2aSCody Peter Mello } charclasses[] = {
848*3ee4fc2aSCody Peter Mello 	{ "alnum",	5,	isalnum },
849*3ee4fc2aSCody Peter Mello 	{ "alpha",	5,	isalpha },
850*3ee4fc2aSCody Peter Mello 	{ "blank",	5,	isblank },
851*3ee4fc2aSCody Peter Mello 	{ "cntrl",	5,	iscntrl },
852*3ee4fc2aSCody Peter Mello 	{ "digit",	5,	isdigit },
853*3ee4fc2aSCody Peter Mello 	{ "graph",	5,	isgraph },
854*3ee4fc2aSCody Peter Mello 	{ "lower",	5,	islower },
855*3ee4fc2aSCody Peter Mello 	{ "print",	5,	isprint },
856*3ee4fc2aSCody Peter Mello 	{ "punct",	5,	ispunct },
857*3ee4fc2aSCody Peter Mello 	{ "space",	5,	isspace },
858*3ee4fc2aSCody Peter Mello 	{ "upper",	5,	isupper },
859*3ee4fc2aSCody Peter Mello 	{ "xdigit",	6,	isxdigit },
860*3ee4fc2aSCody Peter Mello 	{ NULL,		0,	NULL },
861*3ee4fc2aSCody Peter Mello };
862*3ee4fc2aSCody Peter Mello 
863*3ee4fc2aSCody Peter Mello 
8641ee2e5faSnakanon static int
relex(void)8651ee2e5faSnakanon relex(void)		/* lexical analyzer for reparse */
8667c478bd9Sstevel@tonic-gate {
867*3ee4fc2aSCody Peter Mello 	int c, n;
868*3ee4fc2aSCody Peter Mello 	int cflag;
869*3ee4fc2aSCody Peter Mello 	static uschar *buf = 0;
870*3ee4fc2aSCody Peter Mello 	static size_t bufsz = 100;
871*3ee4fc2aSCody Peter Mello 	uschar *bp;
872*3ee4fc2aSCody Peter Mello 	struct charclass *cc;
873*3ee4fc2aSCody Peter Mello 	int i;
8747c478bd9Sstevel@tonic-gate 
8757c478bd9Sstevel@tonic-gate 	switch (c = *prestr++) {
8767c478bd9Sstevel@tonic-gate 	case '|': return OR;
8777c478bd9Sstevel@tonic-gate 	case '*': return STAR;
8787c478bd9Sstevel@tonic-gate 	case '+': return PLUS;
8797c478bd9Sstevel@tonic-gate 	case '?': return QUEST;
8807c478bd9Sstevel@tonic-gate 	case '.': return DOT;
8817c478bd9Sstevel@tonic-gate 	case '\0': prestr--; return '\0';
8827c478bd9Sstevel@tonic-gate 	case '^':
8837c478bd9Sstevel@tonic-gate 	case '$':
8847c478bd9Sstevel@tonic-gate 	case '(':
8857c478bd9Sstevel@tonic-gate 	case ')':
8861ee2e5faSnakanon 		return (c);
8877c478bd9Sstevel@tonic-gate 	case '\\':
888*3ee4fc2aSCody Peter Mello 		rlxval = quoted(&prestr);
8891ee2e5faSnakanon 		return (CHAR);
8907c478bd9Sstevel@tonic-gate 	default:
8917c478bd9Sstevel@tonic-gate 		rlxval = c;
8921ee2e5faSnakanon 		return (CHAR);
8931ee2e5faSnakanon 	case '[':
894*3ee4fc2aSCody Peter Mello 		if (buf == NULL && (buf = (uschar *)malloc(bufsz)) == NULL)
895*3ee4fc2aSCody Peter Mello 			FATAL("out of space in reg expr %.10s..", lastre);
896*3ee4fc2aSCody Peter Mello 		bp = buf;
8977c478bd9Sstevel@tonic-gate 		if (*prestr == '^') {
8987c478bd9Sstevel@tonic-gate 			cflag = 1;
8997c478bd9Sstevel@tonic-gate 			prestr++;
9001ee2e5faSnakanon 		} else
9017c478bd9Sstevel@tonic-gate 			cflag = 0;
902*3ee4fc2aSCody Peter Mello 		n = 2 * strlen((const char *)prestr) + 1;
903*3ee4fc2aSCody Peter Mello 		if (!adjbuf((char **)&buf, &bufsz, n, n, (char **)&bp,
904*3ee4fc2aSCody Peter Mello 		    "relex1"))
905*3ee4fc2aSCody Peter Mello 			FATAL("out of space for reg expr %.10s...", lastre);
9067c478bd9Sstevel@tonic-gate 		for (;;) {
9077c478bd9Sstevel@tonic-gate 			if ((c = *prestr++) == '\\') {
908*3ee4fc2aSCody Peter Mello 				*bp++ = '\\';
9091ee2e5faSnakanon 				if ((c = *prestr++) == '\0') {
910*3ee4fc2aSCody Peter Mello 					FATAL("nonterminated character class "
911*3ee4fc2aSCody Peter Mello 					    "%.20s...", lastre);
912*3ee4fc2aSCody Peter Mello 				}
913*3ee4fc2aSCody Peter Mello 				*bp++ = c;
914*3ee4fc2aSCody Peter Mello 			} else if (c == '[' && *prestr == ':') {
915*3ee4fc2aSCody Peter Mello 				/*
916*3ee4fc2aSCody Peter Mello 				 * Handle POSIX character class names.
917*3ee4fc2aSCody Peter Mello 				 * Dag-Erling Smorgrav, des@ofug.org
918*3ee4fc2aSCody Peter Mello 				 */
919*3ee4fc2aSCody Peter Mello 				for (cc = charclasses; cc->cc_name; cc++)
920*3ee4fc2aSCody Peter Mello 					if (strncmp((const char *)prestr + 1,
921*3ee4fc2aSCody Peter Mello 					    (const char *)cc->cc_name,
922*3ee4fc2aSCody Peter Mello 					    cc->cc_namelen) == 0)
923*3ee4fc2aSCody Peter Mello 						break;
924*3ee4fc2aSCody Peter Mello 
925*3ee4fc2aSCody Peter Mello 				if (cc->cc_name == NULL ||
926*3ee4fc2aSCody Peter Mello 				    prestr[1 + cc->cc_namelen] != ':' ||
927*3ee4fc2aSCody Peter Mello 				    prestr[2 + cc->cc_namelen] != ']') {
928*3ee4fc2aSCody Peter Mello 					*bp++ = c;
929*3ee4fc2aSCody Peter Mello 					continue;
9301ee2e5faSnakanon 				}
931*3ee4fc2aSCody Peter Mello 
932*3ee4fc2aSCody Peter Mello 				prestr += cc->cc_namelen + 3;
933*3ee4fc2aSCody Peter Mello 				/*
934*3ee4fc2aSCody Peter Mello 				 * BUG: We begin at 1, instead of 0, since we
935*3ee4fc2aSCody Peter Mello 				 * would otherwise prematurely terminate the
936*3ee4fc2aSCody Peter Mello 				 * string for classes like [[:cntrl:]]. This
937*3ee4fc2aSCody Peter Mello 				 * means that we can't match the NUL character,
938*3ee4fc2aSCody Peter Mello 				 * not without first adapting the entire
939*3ee4fc2aSCody Peter Mello 				 * program to track each string's length.
940*3ee4fc2aSCody Peter Mello 				 */
941*3ee4fc2aSCody Peter Mello 				for (i = 1; i < NCHARS; i++) {
942*3ee4fc2aSCody Peter Mello 					(void) adjbuf((char **)&buf, &bufsz,
943*3ee4fc2aSCody Peter Mello 					    bp - buf + 1, 100, (char **)&bp,
944*3ee4fc2aSCody Peter Mello 					    "relex2");
945*3ee4fc2aSCody Peter Mello 					if (cc->cc_func(i)) {
946*3ee4fc2aSCody Peter Mello 						*bp++ = i;
947*3ee4fc2aSCody Peter Mello 						n++;
948*3ee4fc2aSCody Peter Mello 					}
949*3ee4fc2aSCody Peter Mello 				}
950*3ee4fc2aSCody Peter Mello 			} else if (c == '\0') {
951*3ee4fc2aSCody Peter Mello 				FATAL("nonterminated character class %.20s",
952*3ee4fc2aSCody Peter Mello 				    lastre);
953*3ee4fc2aSCody Peter Mello 			} else if (bp == buf) {	/* 1st char is special */
954*3ee4fc2aSCody Peter Mello 				*bp++ = c;
9557c478bd9Sstevel@tonic-gate 			} else if (c == ']') {
956*3ee4fc2aSCody Peter Mello 				*bp++ = '\0';
957*3ee4fc2aSCody Peter Mello 				rlxstr = (uschar *)tostring((char *)buf);
9587c478bd9Sstevel@tonic-gate 				if (cflag == 0)
9591ee2e5faSnakanon 					return (CCL);
9607c478bd9Sstevel@tonic-gate 				else
9611ee2e5faSnakanon 					return (NCCL);
9627c478bd9Sstevel@tonic-gate 			} else
963*3ee4fc2aSCody Peter Mello 				*bp++ = c;
9647c478bd9Sstevel@tonic-gate 		}
9651ee2e5faSnakanon 		/*NOTREACHED*/
9667c478bd9Sstevel@tonic-gate 	}
9671ee2e5faSnakanon 	return (0);
9687c478bd9Sstevel@tonic-gate }
9697c478bd9Sstevel@tonic-gate 
9701ee2e5faSnakanon static int
cgoto(fa * f,int s,int c)9711ee2e5faSnakanon cgoto(fa *f, int s, int c)
9727c478bd9Sstevel@tonic-gate {
973*3ee4fc2aSCody Peter Mello 	int i, j, k;
974*3ee4fc2aSCody Peter Mello 	int *p, *q;
9757c478bd9Sstevel@tonic-gate 
976*3ee4fc2aSCody Peter Mello 	assert(c == HAT || c < NCHARS);
977*3ee4fc2aSCody Peter Mello 	while (f->accept >= maxsetvec) {	/* guessing here! */
978*3ee4fc2aSCody Peter Mello 		growvec("out of space in cgoto()");
979*3ee4fc2aSCody Peter Mello 	}
9801ee2e5faSnakanon 	for (i = 0; i <= f->accept; i++)
9817c478bd9Sstevel@tonic-gate 		setvec[i] = 0;
9827c478bd9Sstevel@tonic-gate 	setcnt = 0;
9837c478bd9Sstevel@tonic-gate 	/* compute positions of gototab[s,c] into setvec */
9847c478bd9Sstevel@tonic-gate 	p = f->posns[s];
9851ee2e5faSnakanon 	for (i = 1; i <= *p; i++) {
9867c478bd9Sstevel@tonic-gate 		if ((k = f->re[p[i]].ltype) != FINAL) {
987*3ee4fc2aSCody Peter Mello 			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) ||
988*3ee4fc2aSCody Peter Mello 			    (k == DOT && c != 0 && c != HAT) ||
989*3ee4fc2aSCody Peter Mello 			    (k == ALL && c != 0) ||
990*3ee4fc2aSCody Peter Mello 			    (k == EMPTYRE && c != 0) ||
991*3ee4fc2aSCody Peter Mello 			    (k == CCL &&
992*3ee4fc2aSCody Peter Mello 			    member(c, (char *)f->re[p[i]].lval.up)) ||
993*3ee4fc2aSCody Peter Mello 			    (k == NCCL &&
994*3ee4fc2aSCody Peter Mello 			    !member(c, (char *)f->re[p[i]].lval.up) &&
995*3ee4fc2aSCody Peter Mello 			    c != 0 && c != HAT)) {
9961ee2e5faSnakanon 				q = f->re[p[i]].lfollow;
9971ee2e5faSnakanon 				for (j = 1; j <= *q; j++) {
998*3ee4fc2aSCody Peter Mello 					if (q[j] >= maxsetvec) {
999*3ee4fc2aSCody Peter Mello 						growvec("cgoto overflow");
1000*3ee4fc2aSCody Peter Mello 					}
10011ee2e5faSnakanon 					if (setvec[q[j]] == 0) {
10021ee2e5faSnakanon 						setcnt++;
10031ee2e5faSnakanon 						setvec[q[j]] = 1;
10047c478bd9Sstevel@tonic-gate 					}
10057c478bd9Sstevel@tonic-gate 				}
10061ee2e5faSnakanon 			}
10077c478bd9Sstevel@tonic-gate 		}
10087c478bd9Sstevel@tonic-gate 	}
10097c478bd9Sstevel@tonic-gate 	/* determine if setvec is a previous state */
10107c478bd9Sstevel@tonic-gate 	tmpset[0] = setcnt;
10117c478bd9Sstevel@tonic-gate 	j = 1;
10127c478bd9Sstevel@tonic-gate 	for (i = f->accept; i >= 0; i--)
10137c478bd9Sstevel@tonic-gate 		if (setvec[i]) {
10147c478bd9Sstevel@tonic-gate 			tmpset[j++] = i;
10157c478bd9Sstevel@tonic-gate 		}
10167c478bd9Sstevel@tonic-gate 	/* tmpset == previous state? */
10171ee2e5faSnakanon 	for (i = 1; i <= f->curstat; i++) {
10187c478bd9Sstevel@tonic-gate 		p = f->posns[i];
10197c478bd9Sstevel@tonic-gate 		if ((k = tmpset[0]) != p[0])
10207c478bd9Sstevel@tonic-gate 			goto different;
10217c478bd9Sstevel@tonic-gate 		for (j = 1; j <= k; j++)
10227c478bd9Sstevel@tonic-gate 			if (tmpset[j] != p[j])
10237c478bd9Sstevel@tonic-gate 				goto different;
10247c478bd9Sstevel@tonic-gate 		/* setvec is state i */
10257c478bd9Sstevel@tonic-gate 		f->gototab[s][c] = i;
10261ee2e5faSnakanon 		return (i);
10277c478bd9Sstevel@tonic-gate 	different:;
10287c478bd9Sstevel@tonic-gate 	}
10297c478bd9Sstevel@tonic-gate 
10307c478bd9Sstevel@tonic-gate 	/* add tmpset to current set of states */
10317c478bd9Sstevel@tonic-gate 	if (f->curstat >= NSTATES-1) {
10327c478bd9Sstevel@tonic-gate 		f->curstat = 2;
10337c478bd9Sstevel@tonic-gate 		f->reset = 1;
10341ee2e5faSnakanon 		for (i = 2; i < NSTATES; i++)
10357c478bd9Sstevel@tonic-gate 			xfree(f->posns[i]);
10361ee2e5faSnakanon 	} else
10377c478bd9Sstevel@tonic-gate 		++(f->curstat);
10381ee2e5faSnakanon 	for (i = 0; i < NCHARS; i++)
10397c478bd9Sstevel@tonic-gate 		f->gototab[f->curstat][i] = 0;
10407c478bd9Sstevel@tonic-gate 	xfree(f->posns[f->curstat]);
10411ee2e5faSnakanon 	if ((p = (int *)calloc(1, (setcnt + 1) * sizeof (int))) == NULL)
10427c478bd9Sstevel@tonic-gate 		overflo("out of space in cgoto");
10437c478bd9Sstevel@tonic-gate 
10447c478bd9Sstevel@tonic-gate 	f->posns[f->curstat] = p;
10457c478bd9Sstevel@tonic-gate 	f->gototab[s][c] = f->curstat;
10467c478bd9Sstevel@tonic-gate 	for (i = 0; i <= setcnt; i++)
10477c478bd9Sstevel@tonic-gate 		p[i] = tmpset[i];
10487c478bd9Sstevel@tonic-gate 	if (setvec[f->accept])
10497c478bd9Sstevel@tonic-gate 		f->out[f->curstat] = 1;
10507c478bd9Sstevel@tonic-gate 	else
10517c478bd9Sstevel@tonic-gate 		f->out[f->curstat] = 0;
10521ee2e5faSnakanon 	return (f->curstat);
10537c478bd9Sstevel@tonic-gate }
10547c478bd9Sstevel@tonic-gate 
10551ee2e5faSnakanon static void
freefa(fa * f)1056*3ee4fc2aSCody Peter Mello freefa(fa *f)	/* free a finite automaton */
10577c478bd9Sstevel@tonic-gate {
1058*3ee4fc2aSCody Peter Mello 	int i;
10597c478bd9Sstevel@tonic-gate 
10607c478bd9Sstevel@tonic-gate 	if (f == NULL)
10617c478bd9Sstevel@tonic-gate 		return;
10621ee2e5faSnakanon 	for (i = 0; i <= f->curstat; i++)
10637c478bd9Sstevel@tonic-gate 		xfree(f->posns[i]);
1064*3ee4fc2aSCody Peter Mello 	for (i = 0; i <= f->accept; i++) {
10657c478bd9Sstevel@tonic-gate 		xfree(f->re[i].lfollow);
1066*3ee4fc2aSCody Peter Mello 		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
1067*3ee4fc2aSCody Peter Mello 			xfree((f->re[i].lval.np));
1068*3ee4fc2aSCody Peter Mello 	}
10697c478bd9Sstevel@tonic-gate 	xfree(f->restr);
10707c478bd9Sstevel@tonic-gate 	xfree(f);
10717c478bd9Sstevel@tonic-gate }
1072