xref: /illumos-gate/usr/src/tools/cscope-fast/cgrep.c (revision dc1259b6)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate  * with the License.
87c478bd9Sstevel@tonic-gate  *
97c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate  * and limitations under the License.
137c478bd9Sstevel@tonic-gate  *
147c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate  *
207c478bd9Sstevel@tonic-gate  * CDDL HEADER END
217c478bd9Sstevel@tonic-gate  */
227c478bd9Sstevel@tonic-gate /*	Copyright (c) 1990 AT&T	*/
23*dc1259b6SToomas Soome /*	  All Rights Reserved	*/
247c478bd9Sstevel@tonic-gate 
257c478bd9Sstevel@tonic-gate /*
267c478bd9Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
277c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
287c478bd9Sstevel@tonic-gate  */
297c478bd9Sstevel@tonic-gate 
307c478bd9Sstevel@tonic-gate /*
317c478bd9Sstevel@tonic-gate  *	cscope - interactive C symbol or text cross-reference
327c478bd9Sstevel@tonic-gate  *
337c478bd9Sstevel@tonic-gate  *	text searching functions
347c478bd9Sstevel@tonic-gate  */
357c478bd9Sstevel@tonic-gate 
367c478bd9Sstevel@tonic-gate #include <fcntl.h>
377c478bd9Sstevel@tonic-gate #include <setjmp.h>
387c478bd9Sstevel@tonic-gate #include <stdio.h>
397c478bd9Sstevel@tonic-gate #include <string.h>
407c478bd9Sstevel@tonic-gate #include <memory.h>
417c478bd9Sstevel@tonic-gate #include <ctype.h>
427c478bd9Sstevel@tonic-gate #include <unistd.h>
437c478bd9Sstevel@tonic-gate #include "library.h"
447c478bd9Sstevel@tonic-gate 
457c478bd9Sstevel@tonic-gate typedef enum {
467c478bd9Sstevel@tonic-gate 	NO = 0,
477c478bd9Sstevel@tonic-gate 	YES = 1
487c478bd9Sstevel@tonic-gate } BOOL;
497c478bd9Sstevel@tonic-gate 
507c478bd9Sstevel@tonic-gate typedef struct re_bm {
517c478bd9Sstevel@tonic-gate 	int delta0[256];
527c478bd9Sstevel@tonic-gate 	int *delta2;
537c478bd9Sstevel@tonic-gate 	uchar_t *cmap;
547c478bd9Sstevel@tonic-gate 	uchar_t *bmpat;
557c478bd9Sstevel@tonic-gate 	int patlen;
567c478bd9Sstevel@tonic-gate } re_bm;
577c478bd9Sstevel@tonic-gate 
587c478bd9Sstevel@tonic-gate typedef struct Link {
597c478bd9Sstevel@tonic-gate 	uchar_t lit;
607c478bd9Sstevel@tonic-gate 	struct Node *node;
617c478bd9Sstevel@tonic-gate 	struct Link *next;
627c478bd9Sstevel@tonic-gate } Link;
637c478bd9Sstevel@tonic-gate 
647c478bd9Sstevel@tonic-gate typedef struct Node {
657c478bd9Sstevel@tonic-gate 	short out;
667c478bd9Sstevel@tonic-gate 	short d;
677c478bd9Sstevel@tonic-gate 	short shift1;
687c478bd9Sstevel@tonic-gate 	short shift2;
697c478bd9Sstevel@tonic-gate 	long id;
707c478bd9Sstevel@tonic-gate 	struct Node **tab;
717c478bd9Sstevel@tonic-gate 	Link *alts;
727c478bd9Sstevel@tonic-gate 	struct Node *fail;
737c478bd9Sstevel@tonic-gate } Node;
747c478bd9Sstevel@tonic-gate 
757c478bd9Sstevel@tonic-gate typedef struct re_cw {
767c478bd9Sstevel@tonic-gate 	int maxdepth, mindepth;
777c478bd9Sstevel@tonic-gate 	long nodeid;
787c478bd9Sstevel@tonic-gate 	int step[256];
797c478bd9Sstevel@tonic-gate 	uchar_t *cmap;
807c478bd9Sstevel@tonic-gate 	struct Node *root;
817c478bd9Sstevel@tonic-gate } re_cw;
827c478bd9Sstevel@tonic-gate 
837c478bd9Sstevel@tonic-gate typedef enum {
847c478bd9Sstevel@tonic-gate 	/* lit expression types */
857c478bd9Sstevel@tonic-gate 	Literal, Dot, Charclass, EOP,
867c478bd9Sstevel@tonic-gate 
877c478bd9Sstevel@tonic-gate 	/* non-lit expression types */
887c478bd9Sstevel@tonic-gate 	Cat, Alternate, Star, Plus, Quest,
897c478bd9Sstevel@tonic-gate 
907c478bd9Sstevel@tonic-gate 	/* not really expression types, just helping */
917c478bd9Sstevel@tonic-gate 	Lpar, Rpar, Backslash
927c478bd9Sstevel@tonic-gate 
937c478bd9Sstevel@tonic-gate } Exprtype;
947c478bd9Sstevel@tonic-gate 
957c478bd9Sstevel@tonic-gate typedef int ID;
967c478bd9Sstevel@tonic-gate 
977c478bd9Sstevel@tonic-gate typedef struct Expr {
987c478bd9Sstevel@tonic-gate 	Exprtype type;	/* Type of expression (in grammar) */
997c478bd9Sstevel@tonic-gate 	ID id;		/* unique ID of lit expression  */
1007c478bd9Sstevel@tonic-gate 	int lit;	/* Literal character or tag */
1017c478bd9Sstevel@tonic-gate 	int flen;	/* Number of following lit expressions */
1027c478bd9Sstevel@tonic-gate 	ID *follow;	/* Array of IDs of following lit expressions */
1037c478bd9Sstevel@tonic-gate 	struct Expr *l; /* pointer to Left child (or ccl count) */
1047c478bd9Sstevel@tonic-gate 	struct Expr *r; /* pointer to Right child (or ccl mask) */
1057c478bd9Sstevel@tonic-gate 	struct Expr *parent; /* pointer to Parent */
1067c478bd9Sstevel@tonic-gate } Expr;
1077c478bd9Sstevel@tonic-gate 
1087c478bd9Sstevel@tonic-gate typedef struct State {
1097c478bd9Sstevel@tonic-gate 	struct State *tab[256];
1107c478bd9Sstevel@tonic-gate 	int cnt; /* number of matched chars on full match, -1 otherwise  */
1117c478bd9Sstevel@tonic-gate 	int npos;	/* number of IDs in position set for this state. */
1127c478bd9Sstevel@tonic-gate 	int pos;	/* index into posbase for beginning of IDs */
1137c478bd9Sstevel@tonic-gate } State;
1147c478bd9Sstevel@tonic-gate 
1157c478bd9Sstevel@tonic-gate typedef struct FID {
1167c478bd9Sstevel@tonic-gate 	ID	id;	/* Lit Expression id */
1177c478bd9Sstevel@tonic-gate 	int	fcount; /* Number of Lit exp matches before this one. */
1187c478bd9Sstevel@tonic-gate } FID;
1197c478bd9Sstevel@tonic-gate 
1207c478bd9Sstevel@tonic-gate typedef struct Positionset {
1217c478bd9Sstevel@tonic-gate 	int count;	/* Number of lit exps in position set */
1227c478bd9Sstevel@tonic-gate 	ID last;	/* ID of last lit exp in position set */
1237c478bd9Sstevel@tonic-gate 	FID *base;	/* array of MAXID FIDS */
1247c478bd9Sstevel@tonic-gate 			/* 0 means not in position set */
1257c478bd9Sstevel@tonic-gate 			/* -1 means first in position set */
1267c478bd9Sstevel@tonic-gate 			/* n (>0) is ID of prev member of position set. */
1277c478bd9Sstevel@tonic-gate } Positionset;
1287c478bd9Sstevel@tonic-gate 
1297c478bd9Sstevel@tonic-gate typedef struct re_re {
1307c478bd9Sstevel@tonic-gate 	FID  *posbase;	/* Array of IDs from all states */
1317c478bd9Sstevel@tonic-gate 	int nposalloc;	/* Allocated size of posbase */
1327c478bd9Sstevel@tonic-gate 	int posnext;	/* Index into free space in posbase */
1337c478bd9Sstevel@tonic-gate 	int posreset;	/* Index into end of IDs for initial state in posbase */
1347c478bd9Sstevel@tonic-gate 	int maxid;	/* Number of (also maximum ID of) lit expressions */
1357c478bd9Sstevel@tonic-gate 	Expr *root;	/* Pointer to root (EOP) expression */
1367c478bd9Sstevel@tonic-gate 	Expr **ptr;	/* Pointer to array of ptrs to lit expressions. */
1377c478bd9Sstevel@tonic-gate 	uchar_t *cmap;	/* Character mapping array */
1387c478bd9Sstevel@tonic-gate 	Positionset firstpos;
1397c478bd9Sstevel@tonic-gate 	Positionset tmp;
1407c478bd9Sstevel@tonic-gate 	int nstates;	/* Number of current states defined */
1417c478bd9Sstevel@tonic-gate 	int statelim;	/* Limit on number of states before flushing */
1427c478bd9Sstevel@tonic-gate 	State *states;	/* Array of states */
1437c478bd9Sstevel@tonic-gate 	State istate;	/* Initial state */
1447c478bd9Sstevel@tonic-gate } re_re;
1457c478bd9Sstevel@tonic-gate 
1467c478bd9Sstevel@tonic-gate typedef struct {
1477c478bd9Sstevel@tonic-gate 	uchar_t *cmap;
1487c478bd9Sstevel@tonic-gate 	re_re *re_ptr;
1497c478bd9Sstevel@tonic-gate 	re_bm *bm_ptr;
1507c478bd9Sstevel@tonic-gate 	re_cw *cw_ptr;
1517c478bd9Sstevel@tonic-gate 	BOOL fullmatch;
1527c478bd9Sstevel@tonic-gate 	BOOL (*procfn)();
1537c478bd9Sstevel@tonic-gate 	BOOL (*succfn)();
1547c478bd9Sstevel@tonic-gate 	uchar_t *loc1;
1557c478bd9Sstevel@tonic-gate 	uchar_t *loc2;
1567c478bd9Sstevel@tonic-gate 	uchar_t *expression;
1577c478bd9Sstevel@tonic-gate } PATTERN;
1587c478bd9Sstevel@tonic-gate 
1597c478bd9Sstevel@tonic-gate typedef enum {
1607c478bd9Sstevel@tonic-gate 	BEGIN,		/* File is not yet in buffer at all */
1617c478bd9Sstevel@tonic-gate 	MORE,		/* File is partly in buffer */
1627c478bd9Sstevel@tonic-gate 	NO_MORE		/* File has been completely read into buffer */
1637c478bd9Sstevel@tonic-gate } FILE_STAT;
1647c478bd9Sstevel@tonic-gate 
1657c478bd9Sstevel@tonic-gate typedef struct {
1667c478bd9Sstevel@tonic-gate 	uchar_t	*prntbuf; /* current line of input from data file */
1677c478bd9Sstevel@tonic-gate 	uchar_t	*newline; /* end of line (real or sentinel \n) */
1687c478bd9Sstevel@tonic-gate 	long	ln;	/* line number */
1697c478bd9Sstevel@tonic-gate } LINE;
1707c478bd9Sstevel@tonic-gate 
1717c478bd9Sstevel@tonic-gate #define	NL '\n'
1727c478bd9Sstevel@tonic-gate 
1737c478bd9Sstevel@tonic-gate #define	CCL_SIZ		32
1747c478bd9Sstevel@tonic-gate #define	CCL_SET(a, c)	((a)[(c) >> 3] |= bittab[(c) & 07])
1757c478bd9Sstevel@tonic-gate #define	CCL_CLR(a, c)	((a)[(c) >> 3] &= ~bittab[(c) & 07])
1767c478bd9Sstevel@tonic-gate #define	CCL_CHK(a, c)	((a)[(c) >> 3] & bittab[(c) & 07])
1777c478bd9Sstevel@tonic-gate 
1787c478bd9Sstevel@tonic-gate #define	ESIZE (BUFSIZ)
1797c478bd9Sstevel@tonic-gate #define	MAXBUFSIZE (64*BUFSIZ)
1807c478bd9Sstevel@tonic-gate 
1817c478bd9Sstevel@tonic-gate #define	MAXMALLOCS	1024
1827c478bd9Sstevel@tonic-gate #define	MAXLIT	256	/* is plenty big enough */
1837c478bd9Sstevel@tonic-gate #define	LARGE	MAXBUFSIZE+ESIZE+2
1847c478bd9Sstevel@tonic-gate 
1857c478bd9Sstevel@tonic-gate #define	CLEAR(r, rps)	(void) memset((char *)(rps)->base, 0, \
1867c478bd9Sstevel@tonic-gate 			    (int)((r)->maxid * sizeof (FID))), \
1877c478bd9Sstevel@tonic-gate 			    (rps)->count = 0, (rps)->last = -1
1887c478bd9Sstevel@tonic-gate #define	SET(rps, n, cnt) { \
1897c478bd9Sstevel@tonic-gate 	if ((rps)->base[n].id == 0) {\
1907c478bd9Sstevel@tonic-gate 		(rps)->count++;\
1917c478bd9Sstevel@tonic-gate 		(rps)->base[n].fcount = (cnt);\
1927c478bd9Sstevel@tonic-gate 		(rps)->base[n].id = (rps)->last;\
1937c478bd9Sstevel@tonic-gate 		(rps)->last = (n);\
1947c478bd9Sstevel@tonic-gate 	} else if ((cnt) > (rps)->base[n].fcount) {\
1957c478bd9Sstevel@tonic-gate 		(rps)->base[n].fcount = (cnt);\
1967c478bd9Sstevel@tonic-gate 	}}
1977c478bd9Sstevel@tonic-gate 
1987c478bd9Sstevel@tonic-gate #define	START	{ _p = tmp; }
1997c478bd9Sstevel@tonic-gate #define	ADDL(c)	{ if (_p >= &tmp[MAXLIT]) _p--; *_p++ = c; }
2007c478bd9Sstevel@tonic-gate #define	FINISH	{ ADDL(0) if ((_p-tmp) > bestlen) \
2017c478bd9Sstevel@tonic-gate 		    (void) memmove(best, tmp, bestlen = _p-tmp); }
2027c478bd9Sstevel@tonic-gate 
2037c478bd9Sstevel@tonic-gate 
2047c478bd9Sstevel@tonic-gate #define	NEW(N)	(froot?(t = froot, froot = froot->next, t->next = NULL, \
2057c478bd9Sstevel@tonic-gate 		    t->node = N, t): newlink(0, N))
2067c478bd9Sstevel@tonic-gate #define	ADD(N)	if (qtail) qtail = qtail->next = NEW(N); \
2077c478bd9Sstevel@tonic-gate 			else qtail = qhead = NEW(N)
2087c478bd9Sstevel@tonic-gate #define	DEL()	{ Link *_l = qhead; if ((qhead = qhead->next) == NULL) \
209*dc1259b6SToomas Soome 			{ qtail = NULL; } _l->next = froot; froot = _l; }
2107c478bd9Sstevel@tonic-gate 
2117c478bd9Sstevel@tonic-gate static uchar_t	*buffer;
2127c478bd9Sstevel@tonic-gate static uchar_t	*bufend;
2137c478bd9Sstevel@tonic-gate static FILE	*output;
2147c478bd9Sstevel@tonic-gate static char	*format;
2157c478bd9Sstevel@tonic-gate static char	*file;
2167c478bd9Sstevel@tonic-gate static int	file_desc;
2177c478bd9Sstevel@tonic-gate static FILE_STAT file_stat;
2187c478bd9Sstevel@tonic-gate static PATTERN	match_pattern;
2197c478bd9Sstevel@tonic-gate static uchar_t	char_map[2][256];
2207c478bd9Sstevel@tonic-gate static int	iflag;
2217c478bd9Sstevel@tonic-gate static Exprtype	toktype;
2227c478bd9Sstevel@tonic-gate static int	toklit, parno, maxid;
2237c478bd9Sstevel@tonic-gate static uchar_t	tmp[MAXLIT], best[MAXLIT];
2247c478bd9Sstevel@tonic-gate static uchar_t	*_p;
2257c478bd9Sstevel@tonic-gate static int	bestlen;
2267c478bd9Sstevel@tonic-gate static Node	*next_node;
2277c478bd9Sstevel@tonic-gate static Link	*froot, *next_link;
2287c478bd9Sstevel@tonic-gate static jmp_buf	env;
2297c478bd9Sstevel@tonic-gate 
2307c478bd9Sstevel@tonic-gate static int nmalloc;
2317c478bd9Sstevel@tonic-gate static uchar_t	*mallocs[MAXMALLOCS];
2327c478bd9Sstevel@tonic-gate 
2337c478bd9Sstevel@tonic-gate static uchar_t	bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate #ifdef	DEBUG
2367c478bd9Sstevel@tonic-gate #define		TRACE(n)	(n < debug)
2377c478bd9Sstevel@tonic-gate #define		EPRINTSIZE	50000
2387c478bd9Sstevel@tonic-gate static void spr(int c, int *p, uchar_t *buf);
2397c478bd9Sstevel@tonic-gate void epr(Expr *e, uchar_t *res);
2407c478bd9Sstevel@tonic-gate static int debug = 12;
2417c478bd9Sstevel@tonic-gate #endif
2427c478bd9Sstevel@tonic-gate 
2437c478bd9Sstevel@tonic-gate static void init_file(LINE *cur_ptr);
2447c478bd9Sstevel@tonic-gate static void get_line(LINE *cur_ptr, uchar_t *s);
2457c478bd9Sstevel@tonic-gate static void get_ncount(LINE *cur_ptr, uchar_t *s);
2467c478bd9Sstevel@tonic-gate static int execute(void);
2477c478bd9Sstevel@tonic-gate static State *startstate(re_re *r);
2487c478bd9Sstevel@tonic-gate static State *stateof(re_re *r, Positionset *ps);
2497c478bd9Sstevel@tonic-gate static State *nextstate(re_re *r, State *s, int a);
2507c478bd9Sstevel@tonic-gate static State *addstate(re_re *r, Positionset *ps, int cnt);
2517c478bd9Sstevel@tonic-gate static BOOL match(Expr *e, int a);
2527c478bd9Sstevel@tonic-gate static BOOL first_lit(Positionset *fpos, Expr *e);
2537c478bd9Sstevel@tonic-gate static void eptr(re_re *r, Expr *e);
2547c478bd9Sstevel@tonic-gate static void efollow(re_re *r, Positionset *fpos, Expr *e);
2557c478bd9Sstevel@tonic-gate static void follow(Positionset *fpos, Expr *e);
2567c478bd9Sstevel@tonic-gate static void followstate(re_re *r, State *s, int a, Positionset *fpos);
2577c478bd9Sstevel@tonic-gate static Expr *eall(re_re *r, PATTERN *pat);
2587c478bd9Sstevel@tonic-gate static Expr *d0(re_re *r, PATTERN *pat);
2597c478bd9Sstevel@tonic-gate static Expr *d1(re_re *r, PATTERN *pat);
2607c478bd9Sstevel@tonic-gate static Expr *d2(re_re *r, PATTERN *pat);
2617c478bd9Sstevel@tonic-gate static Expr *d3(re_re *r, PATTERN *pat);
2627c478bd9Sstevel@tonic-gate static Expr *newexpr(Exprtype t, int lit, Expr *left, Expr *right);
2637c478bd9Sstevel@tonic-gate static void lex(re_re *r, PATTERN *pat);
2647c478bd9Sstevel@tonic-gate static int re_lit(PATTERN *pat, uchar_t **b, uchar_t **e);
2657c478bd9Sstevel@tonic-gate static void traverse(PATTERN *pat, Expr *e);
2667c478bd9Sstevel@tonic-gate static int ccl(PATTERN *pat, uchar_t *tab);
2677c478bd9Sstevel@tonic-gate static BOOL altlist(), word();
2687c478bd9Sstevel@tonic-gate static BOOL altlist(Expr *e, uchar_t *buf, re_cw *pat);
2697c478bd9Sstevel@tonic-gate static Node *newnode(re_cw *c, int d);
2707c478bd9Sstevel@tonic-gate static Link *newlink(uchar_t lit, Node *n);
2717c478bd9Sstevel@tonic-gate static void fail(Node *root);
2727c478bd9Sstevel@tonic-gate static void zeroroot(Node *root, Node *n);
2737c478bd9Sstevel@tonic-gate static void shift(re_cw *c);
2747c478bd9Sstevel@tonic-gate static void shifttab(Node *n);
2757c478bd9Sstevel@tonic-gate static void shiftprop(re_cw *c, Node *n);
2767c478bd9Sstevel@tonic-gate static void delta_2(re_bm *b);
2777c478bd9Sstevel@tonic-gate static int getstate(re_re *r, Positionset *ps);
2787c478bd9Sstevel@tonic-gate static void savestate(re_re *r);
2797c478bd9Sstevel@tonic-gate static void stateinit(re_re *r);
2807c478bd9Sstevel@tonic-gate static re_bm *re_bmcomp(uchar_t *pb, uchar_t *pe, uchar_t *cmap);
2817c478bd9Sstevel@tonic-gate static re_cw *re_cwinit(uchar_t *cmap);
2827c478bd9Sstevel@tonic-gate static re_cw *re_recw(re_re *r, uchar_t *map);
2837c478bd9Sstevel@tonic-gate static re_re *egprep(PATTERN *pat);
2847c478bd9Sstevel@tonic-gate static void re_cwadd(re_cw *c, uchar_t *s, uchar_t *e);
2857c478bd9Sstevel@tonic-gate static void re_cwcomp(re_cw *c);
2867c478bd9Sstevel@tonic-gate static void eginit(re_re *r);
2877c478bd9Sstevel@tonic-gate static BOOL re_bmexec(PATTERN *pat, uchar_t *s, uchar_t *e, uchar_t **mb,
2887c478bd9Sstevel@tonic-gate     uchar_t **me);
2897c478bd9Sstevel@tonic-gate static BOOL re_cwexec(PATTERN *pat, uchar_t *rs, uchar_t *re, uchar_t **mb,
2907c478bd9Sstevel@tonic-gate     uchar_t **me);
2917c478bd9Sstevel@tonic-gate static BOOL re_reexec(PATTERN *pat, uchar_t *b, uchar_t *e, uchar_t **mb,
2927c478bd9Sstevel@tonic-gate     uchar_t **me);
2937c478bd9Sstevel@tonic-gate static uchar_t *egmalloc(size_t n);
2947c478bd9Sstevel@tonic-gate static void fgetfile(LINE *cur_ptr);
2957c478bd9Sstevel@tonic-gate static void dogre(PATTERN *pat);
2967c478bd9Sstevel@tonic-gate static BOOL pattern_match(PATTERN *pat, LINE *lptr);
2977c478bd9Sstevel@tonic-gate static BOOL fixloc(uchar_t **mb, uchar_t **me);
2987c478bd9Sstevel@tonic-gate static BOOL grepmatch(PATTERN *pat, uchar_t **mb, uchar_t **me);
2997c478bd9Sstevel@tonic-gate static void err(char *s);
3007c478bd9Sstevel@tonic-gate 
3017c478bd9Sstevel@tonic-gate static char *message;
3027c478bd9Sstevel@tonic-gate 
3037c478bd9Sstevel@tonic-gate void
egrepcaseless(int i)3047c478bd9Sstevel@tonic-gate egrepcaseless(int i)
3057c478bd9Sstevel@tonic-gate {
3067c478bd9Sstevel@tonic-gate 	iflag = i;	/* simulate "egrep -i" */
3077c478bd9Sstevel@tonic-gate }
3087c478bd9Sstevel@tonic-gate 
3097c478bd9Sstevel@tonic-gate char *
egrepinit(char * expression)3107c478bd9Sstevel@tonic-gate egrepinit(char *expression)
3117c478bd9Sstevel@tonic-gate {
3127c478bd9Sstevel@tonic-gate 	static int firsttime = 1;
3137c478bd9Sstevel@tonic-gate 	int i;
3147c478bd9Sstevel@tonic-gate 
3157c478bd9Sstevel@tonic-gate 	if (firsttime) {
3167c478bd9Sstevel@tonic-gate 		firsttime = 0;
3177c478bd9Sstevel@tonic-gate 		for (i = 0; i < 256; i++) {
3187c478bd9Sstevel@tonic-gate 			char_map[0][i] = (uchar_t)i;
3197c478bd9Sstevel@tonic-gate 			char_map[1][i] = tolower(i);
3207c478bd9Sstevel@tonic-gate 		}
3217c478bd9Sstevel@tonic-gate 	}
3227c478bd9Sstevel@tonic-gate 	for (i = 0; i < nmalloc; i ++)
3237c478bd9Sstevel@tonic-gate 		free(mallocs[i]);
3247c478bd9Sstevel@tonic-gate 	nmalloc = 0;
3257c478bd9Sstevel@tonic-gate 	message = NULL;
3267c478bd9Sstevel@tonic-gate 
3277c478bd9Sstevel@tonic-gate 	match_pattern.expression = (uchar_t *)expression;
3287c478bd9Sstevel@tonic-gate 	match_pattern.cmap = char_map[iflag];
3297c478bd9Sstevel@tonic-gate 	if (setjmp(env) == 0) {
3307c478bd9Sstevel@tonic-gate 		dogre(&match_pattern);
3317c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3327c478bd9Sstevel@tonic-gate 		{
3337c478bd9Sstevel@tonic-gate 		PATTERN *p = match_pattern;
3347c478bd9Sstevel@tonic-gate 		if (p->procfn == re_bmexec)
3357c478bd9Sstevel@tonic-gate 			if (!p->fullmatch)
3367c478bd9Sstevel@tonic-gate 				if (p->succfn == re_reexec)
3377c478bd9Sstevel@tonic-gate 					printf("PARTIAL BOYER_MOORE\n");
3387c478bd9Sstevel@tonic-gate 				else
3397c478bd9Sstevel@tonic-gate 					printf("PARTIAL B_M with GREP\n");
3407c478bd9Sstevel@tonic-gate 			else
3417c478bd9Sstevel@tonic-gate 				printf("FULL BOYER_MOORE\n");
3427c478bd9Sstevel@tonic-gate 		else if (p->procfn == re_cwexec)
3437c478bd9Sstevel@tonic-gate 			printf("C_W\n");
3447c478bd9Sstevel@tonic-gate 		else
3457c478bd9Sstevel@tonic-gate 			printf("GENERAL\n");
3467c478bd9Sstevel@tonic-gate 		}
3477c478bd9Sstevel@tonic-gate #endif
3487c478bd9Sstevel@tonic-gate 	}
3497c478bd9Sstevel@tonic-gate 	return (message);
3507c478bd9Sstevel@tonic-gate }
3517c478bd9Sstevel@tonic-gate 
3527c478bd9Sstevel@tonic-gate static void
dogre(PATTERN * pat)3537c478bd9Sstevel@tonic-gate dogre(PATTERN *pat)
3547c478bd9Sstevel@tonic-gate {
3557c478bd9Sstevel@tonic-gate 	uchar_t *lb, *le;
3567c478bd9Sstevel@tonic-gate 
3577c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3587c478bd9Sstevel@tonic-gate 	printf("PATTERN %s\n", pat->expression);
3597c478bd9Sstevel@tonic-gate #endif
3607c478bd9Sstevel@tonic-gate 	pat->re_ptr = egprep(pat);
3617c478bd9Sstevel@tonic-gate 	bestlen = re_lit(pat, &lb, &le);
3627c478bd9Sstevel@tonic-gate 
3637c478bd9Sstevel@tonic-gate 	if (bestlen && pat->fullmatch) { /* Full Boyer Moore */
3647c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3657c478bd9Sstevel@tonic-gate 		printf("BESTLEN %d\n", bestlen);
3667c478bd9Sstevel@tonic-gate 		{
3677c478bd9Sstevel@tonic-gate 			uchar_t *p;
3687c478bd9Sstevel@tonic-gate 			for (p = lb; p < le; p++) printf("%c", *p);
3697c478bd9Sstevel@tonic-gate 			printf("\n");
3707c478bd9Sstevel@tonic-gate 		}
3717c478bd9Sstevel@tonic-gate #endif
3727c478bd9Sstevel@tonic-gate 		pat->bm_ptr = re_bmcomp(lb, le, pat->cmap);
3737c478bd9Sstevel@tonic-gate 		pat->procfn = re_bmexec;
3747c478bd9Sstevel@tonic-gate 		return;
3757c478bd9Sstevel@tonic-gate 	}
3767c478bd9Sstevel@tonic-gate 	if (bestlen > 1) {
3777c478bd9Sstevel@tonic-gate 			/* Partial Boyer Moore */
3787c478bd9Sstevel@tonic-gate 		pat->bm_ptr = re_bmcomp(lb, le, pat->cmap);
3797c478bd9Sstevel@tonic-gate 		pat->procfn = re_bmexec;
3807c478bd9Sstevel@tonic-gate 		pat->fullmatch = NO;
3817c478bd9Sstevel@tonic-gate 	} else {
3827c478bd9Sstevel@tonic-gate 		pat->fullmatch = YES;
3837c478bd9Sstevel@tonic-gate 		if ((pat->cw_ptr = re_recw(pat->re_ptr, pat->cmap)) != NULL) {
3847c478bd9Sstevel@tonic-gate 			pat->procfn = re_cwexec; /* CW */
3857c478bd9Sstevel@tonic-gate 			return;
3867c478bd9Sstevel@tonic-gate 		}
3877c478bd9Sstevel@tonic-gate 	}
3887c478bd9Sstevel@tonic-gate 	/* general egrep regular expression */
3897c478bd9Sstevel@tonic-gate 	pat->succfn = re_reexec;
3907c478bd9Sstevel@tonic-gate 
3917c478bd9Sstevel@tonic-gate 	if (pat->fullmatch) {
3927c478bd9Sstevel@tonic-gate 		pat->procfn = pat->succfn;
3937c478bd9Sstevel@tonic-gate 		pat->succfn = NULL;
3947c478bd9Sstevel@tonic-gate 	}
3957c478bd9Sstevel@tonic-gate }
3967c478bd9Sstevel@tonic-gate 
3977c478bd9Sstevel@tonic-gate static BOOL
fixloc(uchar_t ** mb,uchar_t ** me)3987c478bd9Sstevel@tonic-gate fixloc(uchar_t **mb, uchar_t **me)
3997c478bd9Sstevel@tonic-gate {
4007c478bd9Sstevel@tonic-gate 	/* Handle match to null string */
4017c478bd9Sstevel@tonic-gate 
4027c478bd9Sstevel@tonic-gate 	while (*me <= *mb)
4037c478bd9Sstevel@tonic-gate 		(*me)++;
4047c478bd9Sstevel@tonic-gate 
4057c478bd9Sstevel@tonic-gate 	if (*(*me - 1) != NL)
4067c478bd9Sstevel@tonic-gate 		while (**me != NL)
4077c478bd9Sstevel@tonic-gate 			(*me)++;
4087c478bd9Sstevel@tonic-gate 
4097c478bd9Sstevel@tonic-gate 	/* Handle match to new-line only */
4107c478bd9Sstevel@tonic-gate 
4117c478bd9Sstevel@tonic-gate 	if (*mb == *me - 1 && **mb == NL) {
4127c478bd9Sstevel@tonic-gate 		(*me)++;
4137c478bd9Sstevel@tonic-gate 	}
4147c478bd9Sstevel@tonic-gate 
4157c478bd9Sstevel@tonic-gate 	/* Handle match including beginning or ending new-line */
4167c478bd9Sstevel@tonic-gate 
4177c478bd9Sstevel@tonic-gate 	if (**mb == NL)
4187c478bd9Sstevel@tonic-gate 		(*mb)++;
4197c478bd9Sstevel@tonic-gate 	if (*(*me - 1) == NL)
4207c478bd9Sstevel@tonic-gate 		(*me)--;
4217c478bd9Sstevel@tonic-gate 	return (YES);
4227c478bd9Sstevel@tonic-gate }
4237c478bd9Sstevel@tonic-gate 
4247c478bd9Sstevel@tonic-gate static BOOL
grepmatch(PATTERN * pat,uchar_t ** mb,uchar_t ** me)4257c478bd9Sstevel@tonic-gate grepmatch(PATTERN *pat, uchar_t **mb, uchar_t **me)
4267c478bd9Sstevel@tonic-gate {
4277c478bd9Sstevel@tonic-gate 	uchar_t *s, *f;
4287c478bd9Sstevel@tonic-gate 
4297c478bd9Sstevel@tonic-gate 	if (pat->fullmatch)
4307c478bd9Sstevel@tonic-gate 		return (fixloc(mb, me));
4317c478bd9Sstevel@tonic-gate 
4327c478bd9Sstevel@tonic-gate 	for (f = *me - 1; *f != NL; f++) {
4337c478bd9Sstevel@tonic-gate 	}
4347c478bd9Sstevel@tonic-gate 	f++;
4357c478bd9Sstevel@tonic-gate 	for (s = *mb; *s != NL; s--) {
4367c478bd9Sstevel@tonic-gate 	}
4377c478bd9Sstevel@tonic-gate 
4387c478bd9Sstevel@tonic-gate 	if ((*pat->succfn)(pat, s, f, mb, me)) {
4397c478bd9Sstevel@tonic-gate 		return (YES);
4407c478bd9Sstevel@tonic-gate 	} else {
4417c478bd9Sstevel@tonic-gate 		*mb = f;
4427c478bd9Sstevel@tonic-gate 		return (NO);
4437c478bd9Sstevel@tonic-gate 	}
4447c478bd9Sstevel@tonic-gate }
4457c478bd9Sstevel@tonic-gate 
4467c478bd9Sstevel@tonic-gate static void
eginit(re_re * r)4477c478bd9Sstevel@tonic-gate eginit(re_re *r)
4487c478bd9Sstevel@tonic-gate {
4497c478bd9Sstevel@tonic-gate 	unsigned int n;
4507c478bd9Sstevel@tonic-gate 
4517c478bd9Sstevel@tonic-gate 	r->ptr = (Expr **)egmalloc(r->maxid * sizeof (Expr *));
4527c478bd9Sstevel@tonic-gate 	eptr(r, r->root);
4537c478bd9Sstevel@tonic-gate 	n = r->maxid * sizeof (FID);
4547c478bd9Sstevel@tonic-gate 	r->firstpos.base = (FID *)egmalloc(n);
4557c478bd9Sstevel@tonic-gate 	r->tmp.base = (FID *)egmalloc(n);
4567c478bd9Sstevel@tonic-gate 	CLEAR(r, &r->firstpos);
4577c478bd9Sstevel@tonic-gate 	if (!first_lit(&r->firstpos, r->root->l)) {
4587c478bd9Sstevel@tonic-gate 		/*
4597c478bd9Sstevel@tonic-gate 		 * This expression matches the null string!!!!
4607c478bd9Sstevel@tonic-gate 		 * Add EOP to beginning position set.
4617c478bd9Sstevel@tonic-gate 		 */
4627c478bd9Sstevel@tonic-gate 		SET(&r->firstpos, r->root->id, 0)
4637c478bd9Sstevel@tonic-gate 		/* (void) printf("first of root->l == 0, b=%s\n", b); */
4647c478bd9Sstevel@tonic-gate 	}
4657c478bd9Sstevel@tonic-gate 	stateinit(r);
4667c478bd9Sstevel@tonic-gate 	(void) addstate(r, &r->firstpos, 0);
4677c478bd9Sstevel@tonic-gate 	savestate(r);
4687c478bd9Sstevel@tonic-gate }
4697c478bd9Sstevel@tonic-gate 
4707c478bd9Sstevel@tonic-gate static void
eptr(re_re * r,Expr * e)4717c478bd9Sstevel@tonic-gate eptr(re_re *r, Expr *e)
4727c478bd9Sstevel@tonic-gate {
4737c478bd9Sstevel@tonic-gate 	if ((e->id < 0) || (e->id >= r->maxid)) {
4747c478bd9Sstevel@tonic-gate 		err("internal error");
4757c478bd9Sstevel@tonic-gate 	}
4767c478bd9Sstevel@tonic-gate 	r->ptr[e->id] = e;
4777c478bd9Sstevel@tonic-gate 	if (e->type != Charclass) {
4787c478bd9Sstevel@tonic-gate 		if (e->l) eptr(r, e->l);
4797c478bd9Sstevel@tonic-gate 		if (e->r) eptr(r, e->r);
4807c478bd9Sstevel@tonic-gate 	}
4817c478bd9Sstevel@tonic-gate }
4827c478bd9Sstevel@tonic-gate 
4837c478bd9Sstevel@tonic-gate static BOOL
re_reexec(PATTERN * pat,uchar_t * b,uchar_t * e,uchar_t ** mb,uchar_t ** me)4847c478bd9Sstevel@tonic-gate re_reexec(PATTERN *pat, uchar_t *b, uchar_t *e, uchar_t **mb, uchar_t **me)
4857c478bd9Sstevel@tonic-gate {
4867c478bd9Sstevel@tonic-gate 	re_re *r = pat->re_ptr;
4877c478bd9Sstevel@tonic-gate 	State *s, *t;
4887c478bd9Sstevel@tonic-gate 
4897c478bd9Sstevel@tonic-gate #ifdef	DEBUG
4907c478bd9Sstevel@tonic-gate 	if (TRACE(10)) {
4917c478bd9Sstevel@tonic-gate 		uchar_t buf[EPRINTSIZE];
4927c478bd9Sstevel@tonic-gate 		epr(r->root, buf);
4937c478bd9Sstevel@tonic-gate 		(void) printf("expr='%s'\n", buf);
4947c478bd9Sstevel@tonic-gate 	}
4957c478bd9Sstevel@tonic-gate #endif
4967c478bd9Sstevel@tonic-gate 	s = startstate(r);
4977c478bd9Sstevel@tonic-gate 
4987c478bd9Sstevel@tonic-gate 	for (;;) {
4997c478bd9Sstevel@tonic-gate 		uchar_t c;
5007c478bd9Sstevel@tonic-gate 
5017c478bd9Sstevel@tonic-gate 		if (s->cnt >= 0) {
5027c478bd9Sstevel@tonic-gate #ifdef	DEBUG
5037c478bd9Sstevel@tonic-gate 			if (TRACE(6))
5047c478bd9Sstevel@tonic-gate 				(void) printf("match at input '%s'\n", b);
5057c478bd9Sstevel@tonic-gate #endif
5067c478bd9Sstevel@tonic-gate 			*mb = b - s->cnt;
5077c478bd9Sstevel@tonic-gate 			*me = b;
5087c478bd9Sstevel@tonic-gate 			if (fixloc(mb, me))
5097c478bd9Sstevel@tonic-gate 				return (YES);
5107c478bd9Sstevel@tonic-gate 		}
5117c478bd9Sstevel@tonic-gate 
5127c478bd9Sstevel@tonic-gate 		if (b >= e) break;
5137c478bd9Sstevel@tonic-gate 		c = pat->cmap[*b];
5147c478bd9Sstevel@tonic-gate #ifdef	DEBUG
5157c478bd9Sstevel@tonic-gate 		if (TRACE(4))
5167c478bd9Sstevel@tonic-gate 			(void) printf("state %d: char '%c'\n", s-r->states, *b);
5177c478bd9Sstevel@tonic-gate #endif
5187c478bd9Sstevel@tonic-gate 		if ((t = s->tab[c]) != NULL) s = t;
5197c478bd9Sstevel@tonic-gate 		else s = nextstate(r, s, (int)c);
5207c478bd9Sstevel@tonic-gate 		b++;
5217c478bd9Sstevel@tonic-gate 	}
5227c478bd9Sstevel@tonic-gate #ifdef	DEBUG
5237c478bd9Sstevel@tonic-gate 	if (TRACE(3)) {
5247c478bd9Sstevel@tonic-gate 		uchar_t buf[EPRINTSIZE];
5257c478bd9Sstevel@tonic-gate 
5267c478bd9Sstevel@tonic-gate 		epr(r->root, buf);
5277c478bd9Sstevel@tonic-gate 		(void) printf("pat = %s\n", buf);
5287c478bd9Sstevel@tonic-gate 	}
5297c478bd9Sstevel@tonic-gate #endif
5307c478bd9Sstevel@tonic-gate 	return (NO);
5317c478bd9Sstevel@tonic-gate }
5327c478bd9Sstevel@tonic-gate 
5337c478bd9Sstevel@tonic-gate static BOOL
match(Expr * e,int a)5347c478bd9Sstevel@tonic-gate match(Expr *e, int a)
5357c478bd9Sstevel@tonic-gate {
5367c478bd9Sstevel@tonic-gate 	switch (e->type) {
5377c478bd9Sstevel@tonic-gate 	case Dot:	return ((BOOL)(a != NL));
5387c478bd9Sstevel@tonic-gate 	case Literal:	return ((BOOL)(a == e->lit));
5397c478bd9Sstevel@tonic-gate 	case Charclass:	return ((BOOL)(CCL_CHK((uchar_t *)e->r, a)));
5407c478bd9Sstevel@tonic-gate 	default:	return (NO);
5417c478bd9Sstevel@tonic-gate 	}
5427c478bd9Sstevel@tonic-gate }
5437c478bd9Sstevel@tonic-gate 
5447c478bd9Sstevel@tonic-gate /*
5457c478bd9Sstevel@tonic-gate  * generates the followset for a node in fpos
5467c478bd9Sstevel@tonic-gate  */
5477c478bd9Sstevel@tonic-gate static void
follow(Positionset * fpos,Expr * e)5487c478bd9Sstevel@tonic-gate follow(Positionset *fpos, Expr *e)
5497c478bd9Sstevel@tonic-gate {
5507c478bd9Sstevel@tonic-gate 	Expr *p;
5517c478bd9Sstevel@tonic-gate 
5527c478bd9Sstevel@tonic-gate 	if (e->type == EOP)
5537c478bd9Sstevel@tonic-gate 		return;
5547c478bd9Sstevel@tonic-gate 	else
5557c478bd9Sstevel@tonic-gate 		p = e->parent;
5567c478bd9Sstevel@tonic-gate 	switch (p->type) {
5577c478bd9Sstevel@tonic-gate 	case EOP:
5587c478bd9Sstevel@tonic-gate 		SET(fpos, p->id, 0)
5597c478bd9Sstevel@tonic-gate 		break;
5607c478bd9Sstevel@tonic-gate 	case Plus:
5617c478bd9Sstevel@tonic-gate 	case Star:
5627c478bd9Sstevel@tonic-gate 		(void) first_lit(fpos, e);
5637c478bd9Sstevel@tonic-gate 		follow(fpos, p);
5647c478bd9Sstevel@tonic-gate 		break;
5657c478bd9Sstevel@tonic-gate 	case Quest:
5667c478bd9Sstevel@tonic-gate 	case Alternate:
5677c478bd9Sstevel@tonic-gate 		follow(fpos, p);
5687c478bd9Sstevel@tonic-gate 		break;
5697c478bd9Sstevel@tonic-gate 	case Cat:
5707c478bd9Sstevel@tonic-gate 		if (e == p->r || !first_lit(fpos, p->r))
5717c478bd9Sstevel@tonic-gate 			follow(fpos, p);
5727c478bd9Sstevel@tonic-gate 		break;
5737c478bd9Sstevel@tonic-gate 	default:
5747c478bd9Sstevel@tonic-gate 		break;
5757c478bd9Sstevel@tonic-gate 	}
5767c478bd9Sstevel@tonic-gate }
5777c478bd9Sstevel@tonic-gate 
5787c478bd9Sstevel@tonic-gate /*
5797c478bd9Sstevel@tonic-gate  * first_lit returns NO if e is nullable and in the process,
5807c478bd9Sstevel@tonic-gate  * ets up fpos.
5817c478bd9Sstevel@tonic-gate  */
5827c478bd9Sstevel@tonic-gate static BOOL
first_lit(Positionset * fpos,Expr * e)5837c478bd9Sstevel@tonic-gate first_lit(Positionset *fpos, Expr *e)
5847c478bd9Sstevel@tonic-gate {
5857c478bd9Sstevel@tonic-gate 	BOOL k;
5867c478bd9Sstevel@tonic-gate 
5877c478bd9Sstevel@tonic-gate 	switch (e->type) {
5887c478bd9Sstevel@tonic-gate 	case Literal:
5897c478bd9Sstevel@tonic-gate 	case Dot:
5907c478bd9Sstevel@tonic-gate 	case Charclass:
5917c478bd9Sstevel@tonic-gate 		SET(fpos, e->id, 0)
5927c478bd9Sstevel@tonic-gate 		return (YES);
5937c478bd9Sstevel@tonic-gate 	case EOP:
5947c478bd9Sstevel@tonic-gate 	case Star:
5957c478bd9Sstevel@tonic-gate 	case Quest:
5967c478bd9Sstevel@tonic-gate 		(void) first_lit(fpos, e->l);
5977c478bd9Sstevel@tonic-gate 		return (NO);
5987c478bd9Sstevel@tonic-gate 	case Plus:
5997c478bd9Sstevel@tonic-gate 		return (first_lit(fpos, e->l));
6007c478bd9Sstevel@tonic-gate 	case Cat:
6017c478bd9Sstevel@tonic-gate 		return ((BOOL)(first_lit(fpos, e->l) || first_lit(fpos, e->r)));
6027c478bd9Sstevel@tonic-gate 	case Alternate:
6037c478bd9Sstevel@tonic-gate 		k = first_lit(fpos, e->r);
6047c478bd9Sstevel@tonic-gate 		return ((BOOL)(first_lit(fpos, e->l) && k));
6057c478bd9Sstevel@tonic-gate 	default:
6067c478bd9Sstevel@tonic-gate 		err("internal error");
6077c478bd9Sstevel@tonic-gate 	}
6087c478bd9Sstevel@tonic-gate 	return (NO);
6097c478bd9Sstevel@tonic-gate }
6107c478bd9Sstevel@tonic-gate 
6117c478bd9Sstevel@tonic-gate static void
efollow(re_re * r,Positionset * fpos,Expr * e)6127c478bd9Sstevel@tonic-gate efollow(re_re *r, Positionset *fpos, Expr *e)
6137c478bd9Sstevel@tonic-gate {
6147c478bd9Sstevel@tonic-gate 	ID i, *p;
6157c478bd9Sstevel@tonic-gate 
6167c478bd9Sstevel@tonic-gate 	CLEAR(r, fpos);
6177c478bd9Sstevel@tonic-gate 	follow(fpos, e);
6187c478bd9Sstevel@tonic-gate 	e->flen = fpos->count;
6197c478bd9Sstevel@tonic-gate 	e->follow = (ID *)egmalloc(e->flen * sizeof (ID));
6207c478bd9Sstevel@tonic-gate 	p = e->follow;
6217c478bd9Sstevel@tonic-gate #ifdef	DEBUG
6227c478bd9Sstevel@tonic-gate 	printf("ID = %d LIT %c FLEN = %d\n", e->id, e->lit, e->flen);
6237c478bd9Sstevel@tonic-gate #endif
6247c478bd9Sstevel@tonic-gate 	for (i = fpos->last; i > 0; i = fpos->base[i].id) {
6257c478bd9Sstevel@tonic-gate 		*p++ = i;
6267c478bd9Sstevel@tonic-gate #ifdef	DEBUG
6277c478bd9Sstevel@tonic-gate 	printf("FOLLOW ID = %d LIT %c\n", r->ptr[i]->id, r->ptr[i]->lit);
6287c478bd9Sstevel@tonic-gate #endif
6297c478bd9Sstevel@tonic-gate 	}
6307c478bd9Sstevel@tonic-gate 	if (p != e->follow + e->flen) {
6317c478bd9Sstevel@tonic-gate 		err("internal error");
6327c478bd9Sstevel@tonic-gate 	}
6337c478bd9Sstevel@tonic-gate }
6347c478bd9Sstevel@tonic-gate 
6357c478bd9Sstevel@tonic-gate static State *
addstate(re_re * r,Positionset * ps,int cnt)6367c478bd9Sstevel@tonic-gate addstate(re_re *r, Positionset *ps, int cnt)
6377c478bd9Sstevel@tonic-gate {
6387c478bd9Sstevel@tonic-gate 	ID j;
6397c478bd9Sstevel@tonic-gate 	FID *p, *q;
6407c478bd9Sstevel@tonic-gate 	State *s;
6417c478bd9Sstevel@tonic-gate 
6427c478bd9Sstevel@tonic-gate 	if (cnt) {
6437c478bd9Sstevel@tonic-gate 		s = r->states + getstate(r, ps);
6447c478bd9Sstevel@tonic-gate 		(void) memset((char *)s->tab, 0, sizeof (s->tab));
6457c478bd9Sstevel@tonic-gate 		s->cnt = r->istate.cnt;
6467c478bd9Sstevel@tonic-gate 	} else {
6477c478bd9Sstevel@tonic-gate 		s = &r->istate;
6487c478bd9Sstevel@tonic-gate 		s->cnt = -1;
6497c478bd9Sstevel@tonic-gate 	}
6507c478bd9Sstevel@tonic-gate 	s->pos = r->posnext;
6517c478bd9Sstevel@tonic-gate 	r->posnext += ps->count;
6527c478bd9Sstevel@tonic-gate 	s->npos = ps->count;
6537c478bd9Sstevel@tonic-gate 	p = r->posbase + s->pos;
6547c478bd9Sstevel@tonic-gate 	for (j = ps->last; j > 0; p++, j = q->id) {
6557c478bd9Sstevel@tonic-gate 		q = &ps->base[j];
6567c478bd9Sstevel@tonic-gate 		p->id = j;
6577c478bd9Sstevel@tonic-gate 		p->fcount = q->fcount;
6587c478bd9Sstevel@tonic-gate 		if (p->id == r->root->id && s->cnt < p->fcount)
6597c478bd9Sstevel@tonic-gate 			s->cnt = p->fcount;
6607c478bd9Sstevel@tonic-gate 	}
6617c478bd9Sstevel@tonic-gate #ifdef	DEBUG
6627c478bd9Sstevel@tonic-gate 	if (TRACE(3)) {
6637c478bd9Sstevel@tonic-gate 		uchar_t buf[2000];
6647c478bd9Sstevel@tonic-gate 		spr(s->npos, s->pos+r->posbase, buf);
6657c478bd9Sstevel@tonic-gate 		(void) printf("new state[%d] %s%s\n", s-r->states, buf,
6667c478bd9Sstevel@tonic-gate 		    s->cnt?" cnt":"");
6677c478bd9Sstevel@tonic-gate 	}
6687c478bd9Sstevel@tonic-gate #endif
6697c478bd9Sstevel@tonic-gate 	return (s);
6707c478bd9Sstevel@tonic-gate }
6717c478bd9Sstevel@tonic-gate 
6727c478bd9Sstevel@tonic-gate static State *
nextstate(re_re * r,State * s,int a)6737c478bd9Sstevel@tonic-gate nextstate(re_re *r, State *s, int a)
6747c478bd9Sstevel@tonic-gate {
6757c478bd9Sstevel@tonic-gate 	State *news;
6767c478bd9Sstevel@tonic-gate 
6777c478bd9Sstevel@tonic-gate 	CLEAR(r, &r->tmp);
6787c478bd9Sstevel@tonic-gate 	followstate(r, s, a, &r->tmp);
6797c478bd9Sstevel@tonic-gate 	if (s != &r->istate) followstate(r, &r->istate, a, &r->tmp);
6807c478bd9Sstevel@tonic-gate 
6817c478bd9Sstevel@tonic-gate #ifdef	DEBUG
6827c478bd9Sstevel@tonic-gate 	if (TRACE(5)) {
6837c478bd9Sstevel@tonic-gate 		uchar_t buf[2000];
6847c478bd9Sstevel@tonic-gate 		ppr(&r->tmp, buf);
6857c478bd9Sstevel@tonic-gate 		(void) printf("nextstate(%d, '%c'): found %s\n", s-r->states,
6867c478bd9Sstevel@tonic-gate 		    a, buf);
6877c478bd9Sstevel@tonic-gate 	}
6887c478bd9Sstevel@tonic-gate #endif
6897c478bd9Sstevel@tonic-gate 	if (r->tmp.count == 0)
6907c478bd9Sstevel@tonic-gate 		news = &r->istate;
6917c478bd9Sstevel@tonic-gate 	else if ((news = stateof(r, &r->tmp)) == NULL)
6927c478bd9Sstevel@tonic-gate 		news = addstate(r, &r->tmp, 1);
6937c478bd9Sstevel@tonic-gate 	s->tab[a] = news;
6947c478bd9Sstevel@tonic-gate #ifdef	DEBUG
6957c478bd9Sstevel@tonic-gate 	if (TRACE(5)) {
6967c478bd9Sstevel@tonic-gate 		(void) printf("nextstate(%d, '%c'): returning %ld\n",
6977c478bd9Sstevel@tonic-gate 		    s-r->states, a, news);
6987c478bd9Sstevel@tonic-gate 	}
6997c478bd9Sstevel@tonic-gate #endif
7007c478bd9Sstevel@tonic-gate 	return (news);
7017c478bd9Sstevel@tonic-gate }
7027c478bd9Sstevel@tonic-gate 
7037c478bd9Sstevel@tonic-gate static void
followstate(re_re * r,State * s,int a,Positionset * fpos)7047c478bd9Sstevel@tonic-gate followstate(re_re *r, State *s, int a, Positionset *fpos)
7057c478bd9Sstevel@tonic-gate {
7067c478bd9Sstevel@tonic-gate 	int j;
7077c478bd9Sstevel@tonic-gate 	ID *q, *eq;
7087c478bd9Sstevel@tonic-gate 	Expr *e;
7097c478bd9Sstevel@tonic-gate 
7107c478bd9Sstevel@tonic-gate 	for (j = s->pos; j < (s->pos + s->npos); j++) {
7117c478bd9Sstevel@tonic-gate 		e = r->ptr[r->posbase[j].id];
7127c478bd9Sstevel@tonic-gate 		if (e->type == EOP) continue;
7137c478bd9Sstevel@tonic-gate 		if (match(e, a)) {
7147c478bd9Sstevel@tonic-gate 			if (e->follow == NULL) efollow(r, &r->firstpos, e);
7157c478bd9Sstevel@tonic-gate 			for (q = e->follow, eq = q + e->flen; q < eq; q++) {
7167c478bd9Sstevel@tonic-gate 				SET(fpos, *q, r->posbase[j].fcount + 1)
7177c478bd9Sstevel@tonic-gate #ifdef	DEBUG
718*dc1259b6SToomas Soome 				printf("CHAR %c FC %c COUNT %d\n", a,
719*dc1259b6SToomas Soome 				    r->ptr[*q]->lit, r->posbase[j].fcount+1);
7207c478bd9Sstevel@tonic-gate #endif
7217c478bd9Sstevel@tonic-gate 			}
7227c478bd9Sstevel@tonic-gate 		}
7237c478bd9Sstevel@tonic-gate 	}
7247c478bd9Sstevel@tonic-gate }
7257c478bd9Sstevel@tonic-gate 
7267c478bd9Sstevel@tonic-gate static uchar_t *
egmalloc(size_t n)7277c478bd9Sstevel@tonic-gate egmalloc(size_t n)
7287c478bd9Sstevel@tonic-gate {
7297c478bd9Sstevel@tonic-gate 	uchar_t *x;
7307c478bd9Sstevel@tonic-gate 
7317c478bd9Sstevel@tonic-gate 	x = (uchar_t *)mymalloc(n);
7327c478bd9Sstevel@tonic-gate 	mallocs[nmalloc++] = x;
7337c478bd9Sstevel@tonic-gate 	if (nmalloc >= MAXMALLOCS)
7347c478bd9Sstevel@tonic-gate 		nmalloc = MAXMALLOCS - 1;
7357c478bd9Sstevel@tonic-gate 	return (x);
7367c478bd9Sstevel@tonic-gate }
7377c478bd9Sstevel@tonic-gate 
7387c478bd9Sstevel@tonic-gate #ifdef	DEBUG
7397c478bd9Sstevel@tonic-gate void
ppr(Positionse * ps,char * p)7407c478bd9Sstevel@tonic-gate ppr(Positionse *ps, char *p)
7417c478bd9Sstevel@tonic-gate {
7427c478bd9Sstevel@tonic-gate 	ID n;
7437c478bd9Sstevel@tonic-gate 
7447c478bd9Sstevel@tonic-gate 	if (ps->count < 1) {
7457c478bd9Sstevel@tonic-gate 		(void) sprintf(p, "{}");
7467c478bd9Sstevel@tonic-gate 		return;
7477c478bd9Sstevel@tonic-gate 	}
7487c478bd9Sstevel@tonic-gate 	*p++ = '{';
7497c478bd9Sstevel@tonic-gate 	for (n = ps->last; n > 0; n = ps->base[n].id) {
7507c478bd9Sstevel@tonic-gate 		(void) sprintf(p, "%d,", n);
7517c478bd9Sstevel@tonic-gate 		p = strchr(p, 0);
7527c478bd9Sstevel@tonic-gate 	}
7537c478bd9Sstevel@tonic-gate 	p[-1] = '}';
7547c478bd9Sstevel@tonic-gate }
7557c478bd9Sstevel@tonic-gate 
7567c478bd9Sstevel@tonic-gate void
epr(Expr * e,uchar_t * res)7577c478bd9Sstevel@tonic-gate epr(Expr *e, uchar_t *res)
7587c478bd9Sstevel@tonic-gate {
7597c478bd9Sstevel@tonic-gate 	uchar_t r1[EPRINTSIZE], r2[EPRINTSIZE];
7607c478bd9Sstevel@tonic-gate 	int i;
7617c478bd9Sstevel@tonic-gate 
7627c478bd9Sstevel@tonic-gate 	if (e == NULL) {
7637c478bd9Sstevel@tonic-gate 		(void) sprintf(res, "!0!");
7647c478bd9Sstevel@tonic-gate 		return;
7657c478bd9Sstevel@tonic-gate 	}
7667c478bd9Sstevel@tonic-gate 	switch (e->type) {
7677c478bd9Sstevel@tonic-gate 	case Dot:
7687c478bd9Sstevel@tonic-gate 	case Literal:
7697c478bd9Sstevel@tonic-gate 		spr(e->flen, e->follow, r1);
7707c478bd9Sstevel@tonic-gate 		(void) sprintf(res, "%c%s", e->lit, r1);
7717c478bd9Sstevel@tonic-gate 		break;
7727c478bd9Sstevel@tonic-gate 	case Charclass:
7737c478bd9Sstevel@tonic-gate 		*res++ = '[';
7747c478bd9Sstevel@tonic-gate 		for (i = 0; i < 256; i++)
7757c478bd9Sstevel@tonic-gate 			if (CCL_CHK((uchar_t *)e->r, i)) {
7767c478bd9Sstevel@tonic-gate 				*res++ = i;
7777c478bd9Sstevel@tonic-gate 			}
7787c478bd9Sstevel@tonic-gate 		*res++ = ']';
7797c478bd9Sstevel@tonic-gate 		*res = '\0';
7807c478bd9Sstevel@tonic-gate 		break;
7817c478bd9Sstevel@tonic-gate 	case Cat:
7827c478bd9Sstevel@tonic-gate 		epr(e->l, r1);
7837c478bd9Sstevel@tonic-gate 		epr(e->r, r2);
7847c478bd9Sstevel@tonic-gate 		(void) sprintf(res, "%s%s", r1, r2);
7857c478bd9Sstevel@tonic-gate 		break;
7867c478bd9Sstevel@tonic-gate 	case Alternate:
7877c478bd9Sstevel@tonic-gate 		epr(e->l, r1);
7887c478bd9Sstevel@tonic-gate 		epr(e->r, r2);
7897c478bd9Sstevel@tonic-gate 		(void) sprintf(res, "(%s|%s)", r1, r2);
7907c478bd9Sstevel@tonic-gate 		break;
7917c478bd9Sstevel@tonic-gate 	case Star:
7927c478bd9Sstevel@tonic-gate 		epr(e->l, r1);
7937c478bd9Sstevel@tonic-gate 		(void) sprintf(res, "(%s)*", r1);
7947c478bd9Sstevel@tonic-gate 		break;
7957c478bd9Sstevel@tonic-gate 	case Plus:
7967c478bd9Sstevel@tonic-gate 		epr(e->l, r1);
7977c478bd9Sstevel@tonic-gate 		(void) sprintf(res, "(%s)+", r1);
7987c478bd9Sstevel@tonic-gate 		break;
7997c478bd9Sstevel@tonic-gate 	case Quest:
8007c478bd9Sstevel@tonic-gate 		epr(e->l, r1);
8017c478bd9Sstevel@tonic-gate 		(void) sprintf(res, "(%s)?", r1);
8027c478bd9Sstevel@tonic-gate 		break;
8037c478bd9Sstevel@tonic-gate 	case EOP:
8047c478bd9Sstevel@tonic-gate 		epr(e->l, r1);
8057c478bd9Sstevel@tonic-gate 		(void) sprintf(res, "%s<EOP>", r1);
8067c478bd9Sstevel@tonic-gate 		break;
8077c478bd9Sstevel@tonic-gate 	default:
8087c478bd9Sstevel@tonic-gate 		(void) sprintf(res, "<undef type %d>", e->type);
8097c478bd9Sstevel@tonic-gate 		err(res);
8107c478bd9Sstevel@tonic-gate 		break;
8117c478bd9Sstevel@tonic-gate 	}
8127c478bd9Sstevel@tonic-gate }
8137c478bd9Sstevel@tonic-gate 
8147c478bd9Sstevel@tonic-gate static void
spr(int c,int * p,uchar_t * buf)8157c478bd9Sstevel@tonic-gate spr(int c, int *p, uchar_t *buf)
8167c478bd9Sstevel@tonic-gate {
8177c478bd9Sstevel@tonic-gate 	if (c > 0) {
8187c478bd9Sstevel@tonic-gate 		*buf++ = '{';
8197c478bd9Sstevel@tonic-gate 		*buf = '\0';
8207c478bd9Sstevel@tonic-gate 		while (--c > 0) {
8217c478bd9Sstevel@tonic-gate 			(void) sprintf(buf, "%d,", *p++);
8227c478bd9Sstevel@tonic-gate 			buf = strchr(buf, 0);
8237c478bd9Sstevel@tonic-gate 		}
8247c478bd9Sstevel@tonic-gate 		(void) sprintf(buf, "%d}", *p);
8257c478bd9Sstevel@tonic-gate 	} else
8267c478bd9Sstevel@tonic-gate 		(void) sprintf(buf, "{}");
8277c478bd9Sstevel@tonic-gate }
8287c478bd9Sstevel@tonic-gate #endif
8297c478bd9Sstevel@tonic-gate 
8307c478bd9Sstevel@tonic-gate static void
stateinit(re_re * r)8317c478bd9Sstevel@tonic-gate stateinit(re_re *r)
8327c478bd9Sstevel@tonic-gate {
8337c478bd9Sstevel@tonic-gate 	/* CONSTANTCONDITION */
8347c478bd9Sstevel@tonic-gate 	r->statelim = (sizeof (int) < 4 ? 32 : 128);
8357c478bd9Sstevel@tonic-gate 	r->states = (State *)egmalloc(r->statelim * sizeof (State));
8367c478bd9Sstevel@tonic-gate 
8377c478bd9Sstevel@tonic-gate 	/* CONSTANTCONDITION */
8387c478bd9Sstevel@tonic-gate 	r->nposalloc = (sizeof (int) < 4 ? 2048 : 8192);
8397c478bd9Sstevel@tonic-gate 	r->posbase = (FID *)egmalloc(r->nposalloc * sizeof (FID));
8407c478bd9Sstevel@tonic-gate 	r->nstates = r->posnext = 0;
8417c478bd9Sstevel@tonic-gate }
8427c478bd9Sstevel@tonic-gate 
8437c478bd9Sstevel@tonic-gate static void
clrstates(re_re * r)8447c478bd9Sstevel@tonic-gate clrstates(re_re *r)
8457c478bd9Sstevel@tonic-gate {
8467c478bd9Sstevel@tonic-gate 	r->nstates = 0;		/* reclaim space for states and positions */
8477c478bd9Sstevel@tonic-gate 	r->posnext = r->posreset;
8487c478bd9Sstevel@tonic-gate 	(void) memset((char *)r->istate.tab, 0, sizeof (r->istate.tab));
8497c478bd9Sstevel@tonic-gate }
8507c478bd9Sstevel@tonic-gate 
8517c478bd9Sstevel@tonic-gate static void
savestate(re_re * r)8527c478bd9Sstevel@tonic-gate savestate(re_re *r)
8537c478bd9Sstevel@tonic-gate {
8547c478bd9Sstevel@tonic-gate 	r->posreset = r->posnext;	/* save for reset */
8557c478bd9Sstevel@tonic-gate }
8567c478bd9Sstevel@tonic-gate 
8577c478bd9Sstevel@tonic-gate static State *
startstate(re_re * r)8587c478bd9Sstevel@tonic-gate startstate(re_re *r)
8597c478bd9Sstevel@tonic-gate {
8607c478bd9Sstevel@tonic-gate 	return (&r->istate);
8617c478bd9Sstevel@tonic-gate }
8627c478bd9Sstevel@tonic-gate 
8637c478bd9Sstevel@tonic-gate static int
getstate(re_re * r,Positionset * ps)8647c478bd9Sstevel@tonic-gate getstate(re_re *r, Positionset *ps)
8657c478bd9Sstevel@tonic-gate {
8667c478bd9Sstevel@tonic-gate 	if (r->nstates >= r->statelim ||
8677c478bd9Sstevel@tonic-gate 	    r->posnext + ps->count >= r->nposalloc) {
8687c478bd9Sstevel@tonic-gate 		clrstates(r);
8697c478bd9Sstevel@tonic-gate #ifdef	DEBUG
8707c478bd9Sstevel@tonic-gate 		printf("%d STATES FLUSHED\n", r->statelim);
8717c478bd9Sstevel@tonic-gate #endif
8727c478bd9Sstevel@tonic-gate 	}
8737c478bd9Sstevel@tonic-gate 	return (r->nstates++);
8747c478bd9Sstevel@tonic-gate }
8757c478bd9Sstevel@tonic-gate 
8767c478bd9Sstevel@tonic-gate static State *
stateof(re_re * r,Positionset * ps)8777c478bd9Sstevel@tonic-gate stateof(re_re *r, Positionset *ps)
8787c478bd9Sstevel@tonic-gate {
8797c478bd9Sstevel@tonic-gate 	State *s;
8807c478bd9Sstevel@tonic-gate 	int i;
8817c478bd9Sstevel@tonic-gate 	FID *p, *e;
8827c478bd9Sstevel@tonic-gate 
8837c478bd9Sstevel@tonic-gate 	for (i = 0, s = r->states; i < r->nstates; i++, s++) {
8847c478bd9Sstevel@tonic-gate 		if (s->npos == ps->count) {
8857c478bd9Sstevel@tonic-gate 			for (p = s->pos+r->posbase, e = p+s->npos; p < e; p++)
8867c478bd9Sstevel@tonic-gate 				if (ps->base[p->id].id == 0 ||
887*dc1259b6SToomas Soome 				    ps->base[p->id].fcount != p->fcount)
888*dc1259b6SToomas Soome 					goto next;
8897c478bd9Sstevel@tonic-gate 			return (s);
8907c478bd9Sstevel@tonic-gate 		}
8917c478bd9Sstevel@tonic-gate 	next:;
8927c478bd9Sstevel@tonic-gate 	}
8937c478bd9Sstevel@tonic-gate 	return (NULL);
8947c478bd9Sstevel@tonic-gate }
8957c478bd9Sstevel@tonic-gate 
8967c478bd9Sstevel@tonic-gate static re_re *
egprep(PATTERN * pat)8977c478bd9Sstevel@tonic-gate egprep(PATTERN *pat)
8987c478bd9Sstevel@tonic-gate {
8997c478bd9Sstevel@tonic-gate 	re_re *r;
9007c478bd9Sstevel@tonic-gate 
9017c478bd9Sstevel@tonic-gate 	r = (re_re *)egmalloc(sizeof (re_re));
9027c478bd9Sstevel@tonic-gate 	(void) memset((char *)r, 0, sizeof (re_re));
9037c478bd9Sstevel@tonic-gate 
9047c478bd9Sstevel@tonic-gate 	pat->loc1 = pat->expression;
9057c478bd9Sstevel@tonic-gate 	pat->loc2 = pat->expression + strlen((char *)pat->expression);
9067c478bd9Sstevel@tonic-gate 
9077c478bd9Sstevel@tonic-gate 	parno = 0;
9087c478bd9Sstevel@tonic-gate 	maxid = 1;
9097c478bd9Sstevel@tonic-gate 	r->cmap = pat->cmap;
9107c478bd9Sstevel@tonic-gate 	lex(r, pat);
9117c478bd9Sstevel@tonic-gate 	r->root = newexpr(EOP, '#', eall(r, pat), (Expr *)NULL);
9127c478bd9Sstevel@tonic-gate 	r->maxid = maxid;
9137c478bd9Sstevel@tonic-gate 
9147c478bd9Sstevel@tonic-gate 	eginit(r);
9157c478bd9Sstevel@tonic-gate 	return (r);
9167c478bd9Sstevel@tonic-gate }
9177c478bd9Sstevel@tonic-gate 
9187c478bd9Sstevel@tonic-gate static Expr *
newexpr(Exprtype t,int lit,Expr * left,Expr * right)9197c478bd9Sstevel@tonic-gate newexpr(Exprtype t, int lit, Expr *left, Expr *right)
9207c478bd9Sstevel@tonic-gate {
9217c478bd9Sstevel@tonic-gate 	Expr *e = (Expr *)egmalloc(sizeof (Expr));
9227c478bd9Sstevel@tonic-gate 
9237c478bd9Sstevel@tonic-gate 	e->type = t;
9247c478bd9Sstevel@tonic-gate 	e->parent = NULL;
9257c478bd9Sstevel@tonic-gate 	e->lit = lit;
9267c478bd9Sstevel@tonic-gate 
9277c478bd9Sstevel@tonic-gate 	if (e->lit) e->id = maxid++;
9287c478bd9Sstevel@tonic-gate 	else e->id = 0;
9297c478bd9Sstevel@tonic-gate 
9307c478bd9Sstevel@tonic-gate 	if ((e->l = left) != NULL) {
9317c478bd9Sstevel@tonic-gate 		left->parent = e;
9327c478bd9Sstevel@tonic-gate 	}
9337c478bd9Sstevel@tonic-gate 	if ((e->r = right) != NULL) {
9347c478bd9Sstevel@tonic-gate 		right->parent = e;
9357c478bd9Sstevel@tonic-gate 	}
9367c478bd9Sstevel@tonic-gate 	e->follow = NULL;
9377c478bd9Sstevel@tonic-gate 	e->flen = 0;
9387c478bd9Sstevel@tonic-gate 	return (e);
9397c478bd9Sstevel@tonic-gate }
9407c478bd9Sstevel@tonic-gate 
9417c478bd9Sstevel@tonic-gate static void
lex(re_re * r,PATTERN * pat)9427c478bd9Sstevel@tonic-gate lex(re_re *r, PATTERN *pat)
9437c478bd9Sstevel@tonic-gate {
9447c478bd9Sstevel@tonic-gate 	if (pat->loc1 == pat->loc2) {
9457c478bd9Sstevel@tonic-gate 		toktype = EOP;
9467c478bd9Sstevel@tonic-gate 		toklit = -1;
9477c478bd9Sstevel@tonic-gate 	} else switch (toklit = *pat->loc1++) {
9487c478bd9Sstevel@tonic-gate 	case '.':	toktype = Dot; break;
9497c478bd9Sstevel@tonic-gate 	case '*':	toktype = Star; break;
9507c478bd9Sstevel@tonic-gate 	case '+':	toktype = Plus; break;
9517c478bd9Sstevel@tonic-gate 	case '?':	toktype = Quest; break;
9527c478bd9Sstevel@tonic-gate 	case '[':	toktype = Charclass; break;
9537c478bd9Sstevel@tonic-gate 	case '|':	toktype = Alternate; break;
9547c478bd9Sstevel@tonic-gate 	case '(':	toktype = Lpar; break;
9557c478bd9Sstevel@tonic-gate 	case ')':	toktype = Rpar; break;
9567c478bd9Sstevel@tonic-gate 	case '\\':	toktype = Backslash;
9577c478bd9Sstevel@tonic-gate 			if (pat->loc1 == pat->loc2) {
9587c478bd9Sstevel@tonic-gate 				err("syntax error - missing character "
9597c478bd9Sstevel@tonic-gate 				    "after \\");
960*dc1259b6SToomas Soome 			} else {
961*dc1259b6SToomas Soome 				toklit = r->cmap[*pat->loc1++];
962*dc1259b6SToomas Soome 			}
9637c478bd9Sstevel@tonic-gate 			break;
9647c478bd9Sstevel@tonic-gate 	case '^': case '$':	toktype = Literal; toklit = NL; break;
9657c478bd9Sstevel@tonic-gate 	default:	toktype = Literal; toklit = r->cmap[toklit]; break;
9667c478bd9Sstevel@tonic-gate 	}
9677c478bd9Sstevel@tonic-gate }
9687c478bd9Sstevel@tonic-gate 
9697c478bd9Sstevel@tonic-gate static int
ccl(PATTERN * pat,uchar_t * tab)9707c478bd9Sstevel@tonic-gate ccl(PATTERN *pat, uchar_t *tab)
9717c478bd9Sstevel@tonic-gate {
9727c478bd9Sstevel@tonic-gate 	int i;
9737c478bd9Sstevel@tonic-gate 	int range = 0;
9747c478bd9Sstevel@tonic-gate 	int lastc = -1;
9757c478bd9Sstevel@tonic-gate 	int count = 0;
9767c478bd9Sstevel@tonic-gate 	BOOL comp = NO;
9777c478bd9Sstevel@tonic-gate 
9787c478bd9Sstevel@tonic-gate 	(void) memset((char *)tab, 0, CCL_SIZ * sizeof (uchar_t));
9797c478bd9Sstevel@tonic-gate 	if (*pat->loc1 == '^') {
9807c478bd9Sstevel@tonic-gate 		pat->loc1++;
9817c478bd9Sstevel@tonic-gate 		comp = YES;
9827c478bd9Sstevel@tonic-gate 	}
9837c478bd9Sstevel@tonic-gate 	if (*pat->loc1 == ']') {
9847c478bd9Sstevel@tonic-gate 		uchar_t c = pat->cmap[*pat->loc1];
9857c478bd9Sstevel@tonic-gate 		CCL_SET(tab, c);
9867c478bd9Sstevel@tonic-gate 		lastc = *pat->loc1++;
9877c478bd9Sstevel@tonic-gate 	}
9887c478bd9Sstevel@tonic-gate 	/* scan for chars */
989*dc1259b6SToomas Soome 	for (; (pat->loc1 < pat->loc2) && (*pat->loc1 != ']'); pat->loc1++) {
9907c478bd9Sstevel@tonic-gate 		if (*pat->loc1 == '-') {
9917c478bd9Sstevel@tonic-gate 			if (lastc < 0) CCL_SET(tab, pat->cmap['-']);
9927c478bd9Sstevel@tonic-gate 			else range = 1;
9937c478bd9Sstevel@tonic-gate 			continue;
9947c478bd9Sstevel@tonic-gate 		}
9957c478bd9Sstevel@tonic-gate 		if (range) {
9967c478bd9Sstevel@tonic-gate 			for (i = *pat->loc1; i >= lastc; i--) {
9977c478bd9Sstevel@tonic-gate 				CCL_SET(tab, pat->cmap[i]);
9987c478bd9Sstevel@tonic-gate 			}
9997c478bd9Sstevel@tonic-gate 		} else {
10007c478bd9Sstevel@tonic-gate 			uchar_t c = pat->cmap[*pat->loc1];
10017c478bd9Sstevel@tonic-gate 			CCL_SET(tab, c);
10027c478bd9Sstevel@tonic-gate 		}
10037c478bd9Sstevel@tonic-gate 
10047c478bd9Sstevel@tonic-gate 		range = 0;
10057c478bd9Sstevel@tonic-gate 
10067c478bd9Sstevel@tonic-gate 		lastc = *pat->loc1;
10077c478bd9Sstevel@tonic-gate 	}
10087c478bd9Sstevel@tonic-gate 	if (range) CCL_SET(tab, pat->cmap['-']);
10097c478bd9Sstevel@tonic-gate 
10107c478bd9Sstevel@tonic-gate 	if (pat->loc1 < pat->loc2) pat->loc1++;
10117c478bd9Sstevel@tonic-gate 	else err("syntax error - missing ]");
10127c478bd9Sstevel@tonic-gate 
10137c478bd9Sstevel@tonic-gate 	if (comp) {
10147c478bd9Sstevel@tonic-gate 		CCL_SET(tab, pat->cmap[NL]);
10157c478bd9Sstevel@tonic-gate 		for (i = 0; i < CCL_SIZ; i++) tab[i] ^= 0xff;
10167c478bd9Sstevel@tonic-gate 	}
10177c478bd9Sstevel@tonic-gate 	for (i = 0; i < 256; i++) {
10187c478bd9Sstevel@tonic-gate 		if (pat->cmap[i] != i) CCL_CLR(tab, i);
10197c478bd9Sstevel@tonic-gate 		if (CCL_CHK(tab, i)) {
10207c478bd9Sstevel@tonic-gate 			lastc = i;
10217c478bd9Sstevel@tonic-gate 			count++;
10227c478bd9Sstevel@tonic-gate 		}
10237c478bd9Sstevel@tonic-gate 	}
10247c478bd9Sstevel@tonic-gate 	if (count == 1)
10257c478bd9Sstevel@tonic-gate 		*tab = (char)lastc;
10267c478bd9Sstevel@tonic-gate 	return (count);
10277c478bd9Sstevel@tonic-gate }
10287c478bd9Sstevel@tonic-gate 
10297c478bd9Sstevel@tonic-gate /*
10307c478bd9Sstevel@tonic-gate  * egrep patterns:
10317c478bd9Sstevel@tonic-gate  *
10327c478bd9Sstevel@tonic-gate  * Alternation:	d0:	d1 { '|' d1 }*
10337c478bd9Sstevel@tonic-gate  * Concatenation:	d1:	d2 { d2 }*
10347c478bd9Sstevel@tonic-gate  * Repetition:	d2:	d3 { '*' | '?' | '+' }
10357c478bd9Sstevel@tonic-gate  * Literal:	d3:	lit | '.' | '[]' | '(' d0 ')'
10367c478bd9Sstevel@tonic-gate  */
10377c478bd9Sstevel@tonic-gate 
10387c478bd9Sstevel@tonic-gate static Expr *
d3(re_re * r,PATTERN * pat)10397c478bd9Sstevel@tonic-gate d3(re_re *r, PATTERN *pat)
10407c478bd9Sstevel@tonic-gate {
10417c478bd9Sstevel@tonic-gate 	Expr *e;
10427c478bd9Sstevel@tonic-gate 	uchar_t *tab;
10437c478bd9Sstevel@tonic-gate 	int count;
10447c478bd9Sstevel@tonic-gate 
10457c478bd9Sstevel@tonic-gate 	switch (toktype) {
10467c478bd9Sstevel@tonic-gate 	case Backslash:
10477c478bd9Sstevel@tonic-gate 	case Literal:
10487c478bd9Sstevel@tonic-gate 		e = newexpr(Literal, toklit, (Expr *)NULL, (Expr *)NULL);
10497c478bd9Sstevel@tonic-gate 		lex(r, pat);
10507c478bd9Sstevel@tonic-gate 		break;
10517c478bd9Sstevel@tonic-gate 	case Dot:
10527c478bd9Sstevel@tonic-gate 		e = newexpr(Dot, '.', (Expr *)NULL, (Expr *)NULL);
10537c478bd9Sstevel@tonic-gate 		lex(r, pat);
10547c478bd9Sstevel@tonic-gate 		break;
10557c478bd9Sstevel@tonic-gate 	case Charclass:
10567c478bd9Sstevel@tonic-gate 		tab = egmalloc(CCL_SIZ * sizeof (uchar_t));
10577c478bd9Sstevel@tonic-gate 		count = ccl(pat, tab);
10587c478bd9Sstevel@tonic-gate 		if (count == 1) {
10597c478bd9Sstevel@tonic-gate 			toklit = *tab;
10607c478bd9Sstevel@tonic-gate 			e = newexpr(Literal, toklit, (Expr *)NULL,
10617c478bd9Sstevel@tonic-gate 			    (Expr *)NULL);
10627c478bd9Sstevel@tonic-gate 		} else {
10637c478bd9Sstevel@tonic-gate 			e = newexpr(Charclass, '[', (Expr *)NULL, (Expr *)NULL);
10647c478bd9Sstevel@tonic-gate 			e->l = (Expr *)count;	/* number of chars */
10657c478bd9Sstevel@tonic-gate 			e->r = (Expr *)tab;	/* bitmap of chars */
10667c478bd9Sstevel@tonic-gate 		}
10677c478bd9Sstevel@tonic-gate 		lex(r, pat);
10687c478bd9Sstevel@tonic-gate 		break;
10697c478bd9Sstevel@tonic-gate 	case Lpar:
10707c478bd9Sstevel@tonic-gate 		lex(r, pat);
10717c478bd9Sstevel@tonic-gate 		count = ++parno;
10727c478bd9Sstevel@tonic-gate 		e = d0(r, pat);
10737c478bd9Sstevel@tonic-gate 		if (toktype == Rpar)
10747c478bd9Sstevel@tonic-gate 			lex(r, pat);
10757c478bd9Sstevel@tonic-gate 		else
10767c478bd9Sstevel@tonic-gate 			err("syntax error - missing )");
10777c478bd9Sstevel@tonic-gate 		return (e);
10787c478bd9Sstevel@tonic-gate 	default:
10797c478bd9Sstevel@tonic-gate 		err("syntax error");
10807c478bd9Sstevel@tonic-gate 		e = NULL;
10817c478bd9Sstevel@tonic-gate 	}
10827c478bd9Sstevel@tonic-gate 	return (e);
10837c478bd9Sstevel@tonic-gate }
10847c478bd9Sstevel@tonic-gate 
10857c478bd9Sstevel@tonic-gate static Expr *
d2(re_re * r,PATTERN * pat)10867c478bd9Sstevel@tonic-gate d2(re_re *r, PATTERN *pat)
10877c478bd9Sstevel@tonic-gate {
10887c478bd9Sstevel@tonic-gate 	Expr *e;
10897c478bd9Sstevel@tonic-gate 	Exprtype t;
10907c478bd9Sstevel@tonic-gate 
10917c478bd9Sstevel@tonic-gate 	e = d3(r, pat);
10927c478bd9Sstevel@tonic-gate 	while ((toktype == Star) || (toktype == Plus) || (toktype == Quest)) {
10937c478bd9Sstevel@tonic-gate 		t = toktype;
10947c478bd9Sstevel@tonic-gate 		lex(r, pat);
10957c478bd9Sstevel@tonic-gate 		e = newexpr(t, 0, e, (Expr *)NULL);
10967c478bd9Sstevel@tonic-gate 	}
10977c478bd9Sstevel@tonic-gate 	return (e);
10987c478bd9Sstevel@tonic-gate }
10997c478bd9Sstevel@tonic-gate 
11007c478bd9Sstevel@tonic-gate static Expr *
d1(re_re * r,PATTERN * pat)11017c478bd9Sstevel@tonic-gate d1(re_re *r, PATTERN *pat)
11027c478bd9Sstevel@tonic-gate {
11037c478bd9Sstevel@tonic-gate 	Expr *e, *f;
11047c478bd9Sstevel@tonic-gate 
11057c478bd9Sstevel@tonic-gate 	e = d2(r, pat);
11067c478bd9Sstevel@tonic-gate 	while ((toktype == Literal) || (toktype == Dot) || (toktype == Lpar) ||
1107*dc1259b6SToomas Soome 	    (toktype == Backslash) || (toktype == Charclass)) {
11087c478bd9Sstevel@tonic-gate 		f = d2(r, pat);
11097c478bd9Sstevel@tonic-gate 		e = newexpr(Cat, 0, e, f);
11107c478bd9Sstevel@tonic-gate 	}
11117c478bd9Sstevel@tonic-gate 	return (e);
11127c478bd9Sstevel@tonic-gate }
11137c478bd9Sstevel@tonic-gate 
11147c478bd9Sstevel@tonic-gate static Expr *
d0(re_re * r,PATTERN * pat)11157c478bd9Sstevel@tonic-gate d0(re_re *r, PATTERN *pat)
11167c478bd9Sstevel@tonic-gate {
11177c478bd9Sstevel@tonic-gate 	Expr *e, *f;
11187c478bd9Sstevel@tonic-gate 
11197c478bd9Sstevel@tonic-gate 	e = d1(r, pat);
11207c478bd9Sstevel@tonic-gate 	while (toktype == Alternate) {
11217c478bd9Sstevel@tonic-gate 		lex(r, pat);
11227c478bd9Sstevel@tonic-gate 		if (toktype == EOP)
11237c478bd9Sstevel@tonic-gate 			continue;
11247c478bd9Sstevel@tonic-gate 		f = d1(r, pat);
11257c478bd9Sstevel@tonic-gate 		e = newexpr(Alternate, 0, e, f);
11267c478bd9Sstevel@tonic-gate 	}
11277c478bd9Sstevel@tonic-gate 	return (e);
11287c478bd9Sstevel@tonic-gate }
11297c478bd9Sstevel@tonic-gate 
11307c478bd9Sstevel@tonic-gate static Expr *
eall(re_re * r,PATTERN * pat)11317c478bd9Sstevel@tonic-gate eall(re_re *r, PATTERN *pat)
11327c478bd9Sstevel@tonic-gate {
11337c478bd9Sstevel@tonic-gate 	Expr *e;
11347c478bd9Sstevel@tonic-gate 
11357c478bd9Sstevel@tonic-gate 	while (toktype == Alternate)	/* bogus but user-friendly */
11367c478bd9Sstevel@tonic-gate 		lex(r, pat);
11377c478bd9Sstevel@tonic-gate 	e = d0(r, pat);
11387c478bd9Sstevel@tonic-gate 	while (toktype == Alternate)	/* bogus but user-friendly */
11397c478bd9Sstevel@tonic-gate 		lex(r, pat);
11407c478bd9Sstevel@tonic-gate 	if (toktype != EOP)
11417c478bd9Sstevel@tonic-gate 		err("syntax error");
11427c478bd9Sstevel@tonic-gate 	return (e);
11437c478bd9Sstevel@tonic-gate }
11447c478bd9Sstevel@tonic-gate 
11457c478bd9Sstevel@tonic-gate static void
err(char * s)11467c478bd9Sstevel@tonic-gate err(char *s)
11477c478bd9Sstevel@tonic-gate {
11487c478bd9Sstevel@tonic-gate 	message = s;
11497c478bd9Sstevel@tonic-gate 	longjmp(env, 1);
11507c478bd9Sstevel@tonic-gate }
11517c478bd9Sstevel@tonic-gate 
11527c478bd9Sstevel@tonic-gate static int
re_lit(PATTERN * pat,uchar_t ** b,uchar_t ** e)11537c478bd9Sstevel@tonic-gate re_lit(PATTERN *pat, uchar_t **b, uchar_t **e)
11547c478bd9Sstevel@tonic-gate {
11557c478bd9Sstevel@tonic-gate 	bestlen = 0;
11567c478bd9Sstevel@tonic-gate 	pat->fullmatch = YES;
11577c478bd9Sstevel@tonic-gate 	START
11587c478bd9Sstevel@tonic-gate 	traverse(pat, pat->re_ptr->root->l);
11597c478bd9Sstevel@tonic-gate 	FINISH
11607c478bd9Sstevel@tonic-gate 	if (bestlen < 2)
11617c478bd9Sstevel@tonic-gate 		return (0);
11627c478bd9Sstevel@tonic-gate 	*b = egmalloc(bestlen * sizeof (uchar_t));
11637c478bd9Sstevel@tonic-gate 	(void) memmove(*b, best, bestlen);
11647c478bd9Sstevel@tonic-gate 	*e = *b + bestlen - 1;
11657c478bd9Sstevel@tonic-gate 	return (bestlen - 1);
11667c478bd9Sstevel@tonic-gate }
11677c478bd9Sstevel@tonic-gate 
11687c478bd9Sstevel@tonic-gate static void
traverse(PATTERN * pat,Expr * e)11697c478bd9Sstevel@tonic-gate traverse(PATTERN *pat, Expr *e)
11707c478bd9Sstevel@tonic-gate {
11717c478bd9Sstevel@tonic-gate 	switch (e->type) {
11727c478bd9Sstevel@tonic-gate 	case Literal:
11737c478bd9Sstevel@tonic-gate 		ADDL(e->lit)
11747c478bd9Sstevel@tonic-gate 		break;
11757c478bd9Sstevel@tonic-gate 	case Cat:
11767c478bd9Sstevel@tonic-gate 		traverse(pat, e->l);
11777c478bd9Sstevel@tonic-gate 		traverse(pat, e->r);
11787c478bd9Sstevel@tonic-gate 		break;
11797c478bd9Sstevel@tonic-gate 	case Plus:
11807c478bd9Sstevel@tonic-gate 		traverse(pat, e->l);
11817c478bd9Sstevel@tonic-gate 		FINISH	/* can't go on past a + */
11827c478bd9Sstevel@tonic-gate 		pat->fullmatch = NO;
11837c478bd9Sstevel@tonic-gate 		START	/* but we can start with one! */
11847c478bd9Sstevel@tonic-gate 		traverse(pat, e->l);
11857c478bd9Sstevel@tonic-gate 		break;
11867c478bd9Sstevel@tonic-gate 	default:
11877c478bd9Sstevel@tonic-gate 		FINISH
11887c478bd9Sstevel@tonic-gate 		pat->fullmatch = NO;
11897c478bd9Sstevel@tonic-gate 		START
11907c478bd9Sstevel@tonic-gate 		break;
11917c478bd9Sstevel@tonic-gate 	}
11927c478bd9Sstevel@tonic-gate }
11937c478bd9Sstevel@tonic-gate 
11947c478bd9Sstevel@tonic-gate static re_bm *
re_bmcomp(uchar_t * pb,uchar_t * pe,uchar_t * cmap)11957c478bd9Sstevel@tonic-gate re_bmcomp(uchar_t *pb, uchar_t *pe, uchar_t *cmap)
11967c478bd9Sstevel@tonic-gate {
11977c478bd9Sstevel@tonic-gate 	int j;
11987c478bd9Sstevel@tonic-gate 	int delta[256];
11997c478bd9Sstevel@tonic-gate 	re_bm *b;
12007c478bd9Sstevel@tonic-gate 
12017c478bd9Sstevel@tonic-gate 	b = (re_bm *)egmalloc(sizeof (*b));
12027c478bd9Sstevel@tonic-gate 
12037c478bd9Sstevel@tonic-gate 	b->patlen = pe - pb;
12047c478bd9Sstevel@tonic-gate 	b->cmap = cmap;
12057c478bd9Sstevel@tonic-gate 	b->bmpat = pb;
12067c478bd9Sstevel@tonic-gate 
12077c478bd9Sstevel@tonic-gate 	delta_2(b);
12087c478bd9Sstevel@tonic-gate 
12097c478bd9Sstevel@tonic-gate 	for (j = 0; j < 256; j++)
12107c478bd9Sstevel@tonic-gate 		delta[j] = b->patlen;
12117c478bd9Sstevel@tonic-gate 
12127c478bd9Sstevel@tonic-gate 	for (pe--; pb < pe; pb++)
12137c478bd9Sstevel@tonic-gate 		delta[b->cmap[*pb]] = pe - pb;
12147c478bd9Sstevel@tonic-gate 
12157c478bd9Sstevel@tonic-gate 	delta[b->cmap[*pb]] = LARGE;
12167c478bd9Sstevel@tonic-gate 
12177c478bd9Sstevel@tonic-gate 	for (j = 0; j < 256; j++)
12187c478bd9Sstevel@tonic-gate 		b->delta0[j] = delta[b->cmap[j]];
12197c478bd9Sstevel@tonic-gate 	return (b);
12207c478bd9Sstevel@tonic-gate }
12217c478bd9Sstevel@tonic-gate 
12227c478bd9Sstevel@tonic-gate static void
delta_2(re_bm * b)12237c478bd9Sstevel@tonic-gate delta_2(re_bm *b)
12247c478bd9Sstevel@tonic-gate {
12257c478bd9Sstevel@tonic-gate 	int m = b->patlen;
12267c478bd9Sstevel@tonic-gate 	int i, k, j;
12277c478bd9Sstevel@tonic-gate 
12287c478bd9Sstevel@tonic-gate 	b->delta2 = (int *)egmalloc(m * sizeof (int));
12297c478bd9Sstevel@tonic-gate 
12307c478bd9Sstevel@tonic-gate 	for (j = 0; j < m; j++) {
12317c478bd9Sstevel@tonic-gate 		k = 1;
12327c478bd9Sstevel@tonic-gate again:
12337c478bd9Sstevel@tonic-gate 		while (k <= j && b->bmpat[j-k] == b->bmpat[j]) k++;
12347c478bd9Sstevel@tonic-gate 
12357c478bd9Sstevel@tonic-gate 		for (i = j + 1; i < m; i++) {
12367c478bd9Sstevel@tonic-gate 			if (k <= i && b->bmpat[i-k] != b->bmpat[i]) {
12377c478bd9Sstevel@tonic-gate 				k++;
12387c478bd9Sstevel@tonic-gate 				goto again;
12397c478bd9Sstevel@tonic-gate 			}
12407c478bd9Sstevel@tonic-gate 		}
12417c478bd9Sstevel@tonic-gate 		b->delta2[j] = k + m - j - 1;
12427c478bd9Sstevel@tonic-gate 	}
12437c478bd9Sstevel@tonic-gate }
12447c478bd9Sstevel@tonic-gate 
12457c478bd9Sstevel@tonic-gate static BOOL
re_bmexec(PATTERN * pat,uchar_t * s,uchar_t * e,uchar_t ** mb,uchar_t ** me)12467c478bd9Sstevel@tonic-gate re_bmexec(PATTERN *pat, uchar_t *s, uchar_t *e, uchar_t **mb, uchar_t **me)
12477c478bd9Sstevel@tonic-gate {
12487c478bd9Sstevel@tonic-gate 	re_bm *b = pat->bm_ptr;
12497c478bd9Sstevel@tonic-gate 	uchar_t *sp;
12507c478bd9Sstevel@tonic-gate 	int k;
12517c478bd9Sstevel@tonic-gate 
12527c478bd9Sstevel@tonic-gate 	s += b->patlen - 1;
12537c478bd9Sstevel@tonic-gate 	while ((unsigned long)s < (unsigned long)e) {
12547c478bd9Sstevel@tonic-gate 		while ((unsigned long)(s += b->delta0[*s]) < (unsigned long)e)
12557c478bd9Sstevel@tonic-gate 			;
12567c478bd9Sstevel@tonic-gate 		if ((unsigned long)s < (unsigned long)(e + b->patlen))
12577c478bd9Sstevel@tonic-gate 			return (NO); /* no match */
12587c478bd9Sstevel@tonic-gate 		s -= LARGE;
12597c478bd9Sstevel@tonic-gate 		for (k = b->patlen-2, sp = s-1; k >= 0; k--) {
12607c478bd9Sstevel@tonic-gate 			if (b->cmap[*sp--] != b->bmpat[k]) break;
12617c478bd9Sstevel@tonic-gate 		}
12627c478bd9Sstevel@tonic-gate 		if (k < 0) {
12637c478bd9Sstevel@tonic-gate 			*mb = ++sp;
12647c478bd9Sstevel@tonic-gate 			*me = s+1;
12657c478bd9Sstevel@tonic-gate 			if (grepmatch(pat, mb, me))
12667c478bd9Sstevel@tonic-gate 				return (YES);
12677c478bd9Sstevel@tonic-gate 			s = *mb;
12687c478bd9Sstevel@tonic-gate 		} else if (k < 0) {
12697c478bd9Sstevel@tonic-gate 			s++;
12707c478bd9Sstevel@tonic-gate 		} else {
12717c478bd9Sstevel@tonic-gate 			int j;
12727c478bd9Sstevel@tonic-gate 			j = b->delta2[k];
12737c478bd9Sstevel@tonic-gate 			k = b->delta0[*++sp];
12747c478bd9Sstevel@tonic-gate 			if ((j > k) || (k == (int)LARGE))
12757c478bd9Sstevel@tonic-gate 				k = j;
12767c478bd9Sstevel@tonic-gate 			s = sp + k;
12777c478bd9Sstevel@tonic-gate 		}
12787c478bd9Sstevel@tonic-gate 	}
12797c478bd9Sstevel@tonic-gate 	return (NO);
12807c478bd9Sstevel@tonic-gate }
12817c478bd9Sstevel@tonic-gate 
12827c478bd9Sstevel@tonic-gate static re_cw *
re_recw(re_re * r,uchar_t * map)12837c478bd9Sstevel@tonic-gate re_recw(re_re *r, uchar_t *map)
12847c478bd9Sstevel@tonic-gate {
12857c478bd9Sstevel@tonic-gate 	Expr *e, *root = r->root;
12867c478bd9Sstevel@tonic-gate 	re_cw *pat;
12877c478bd9Sstevel@tonic-gate 	uchar_t *buf;
12887c478bd9Sstevel@tonic-gate 
12897c478bd9Sstevel@tonic-gate 	if (root->type != EOP)
12907c478bd9Sstevel@tonic-gate 		return (NULL);
12917c478bd9Sstevel@tonic-gate 	e = root->l;
12927c478bd9Sstevel@tonic-gate 	pat = re_cwinit(map);
12937c478bd9Sstevel@tonic-gate 	buf = (uchar_t *)egmalloc(20000 * sizeof (uchar_t));
12947c478bd9Sstevel@tonic-gate 	if (!altlist(e, buf, pat)) {
12957c478bd9Sstevel@tonic-gate 		return (NULL);
12967c478bd9Sstevel@tonic-gate 	}
12977c478bd9Sstevel@tonic-gate 	re_cwcomp(pat);
12987c478bd9Sstevel@tonic-gate 	return (pat);
12997c478bd9Sstevel@tonic-gate }
13007c478bd9Sstevel@tonic-gate 
13017c478bd9Sstevel@tonic-gate static BOOL
altlist(Expr * e,uchar_t * buf,re_cw * pat)13027c478bd9Sstevel@tonic-gate altlist(Expr *e, uchar_t *buf, re_cw *pat)
13037c478bd9Sstevel@tonic-gate {
13047c478bd9Sstevel@tonic-gate 	if (e->type == Alternate)
13057c478bd9Sstevel@tonic-gate 		return ((BOOL)(altlist(e->l, buf, pat) &&
13067c478bd9Sstevel@tonic-gate 		    altlist(e->r, buf, pat)));
13077c478bd9Sstevel@tonic-gate 	return (word(e, buf, pat));
13087c478bd9Sstevel@tonic-gate }
13097c478bd9Sstevel@tonic-gate 
13107c478bd9Sstevel@tonic-gate static BOOL
word(Expr * e,uchar_t * buf,re_cw * pat)13117c478bd9Sstevel@tonic-gate word(Expr *e, uchar_t *buf, re_cw *pat)
13127c478bd9Sstevel@tonic-gate {
13137c478bd9Sstevel@tonic-gate 	static uchar_t *p;
13147c478bd9Sstevel@tonic-gate 
13157c478bd9Sstevel@tonic-gate 	if (buf) p = buf;
13167c478bd9Sstevel@tonic-gate 
13177c478bd9Sstevel@tonic-gate 	if (e->type == Cat) {
13187c478bd9Sstevel@tonic-gate 		if (!word(e->l, (uchar_t *)NULL, pat))
13197c478bd9Sstevel@tonic-gate 			return (NO);
13207c478bd9Sstevel@tonic-gate 		if (!word(e->r, (uchar_t *)NULL, pat))
13217c478bd9Sstevel@tonic-gate 			return (NO);
13227c478bd9Sstevel@tonic-gate 	} else if (e->type == Literal)
13237c478bd9Sstevel@tonic-gate 		*p++ = e->lit;
13247c478bd9Sstevel@tonic-gate 	else
13257c478bd9Sstevel@tonic-gate 		return (NO);
13267c478bd9Sstevel@tonic-gate 
13277c478bd9Sstevel@tonic-gate 	if (buf)
13287c478bd9Sstevel@tonic-gate 		re_cwadd(pat, buf, p);
13297c478bd9Sstevel@tonic-gate 	return (YES);
13307c478bd9Sstevel@tonic-gate }
13317c478bd9Sstevel@tonic-gate 
13327c478bd9Sstevel@tonic-gate static re_cw *
re_cwinit(uchar_t * cmap)13337c478bd9Sstevel@tonic-gate re_cwinit(uchar_t *cmap)
13347c478bd9Sstevel@tonic-gate {
13357c478bd9Sstevel@tonic-gate 	re_cw *c;
13367c478bd9Sstevel@tonic-gate 
13377c478bd9Sstevel@tonic-gate 	froot = NULL;
13387c478bd9Sstevel@tonic-gate 	next_node = NULL;
13397c478bd9Sstevel@tonic-gate 	next_link = NULL;
13407c478bd9Sstevel@tonic-gate 	c = (re_cw *)egmalloc(sizeof (*c));
13417c478bd9Sstevel@tonic-gate 	c->nodeid = 0;
13427c478bd9Sstevel@tonic-gate 	c->maxdepth = 0;
13437c478bd9Sstevel@tonic-gate 	c->mindepth = 10000;
13447c478bd9Sstevel@tonic-gate 	c->root = newnode(c, 0);
13457c478bd9Sstevel@tonic-gate 	c->cmap = cmap;
13467c478bd9Sstevel@tonic-gate 	return (c);
13477c478bd9Sstevel@tonic-gate }
13487c478bd9Sstevel@tonic-gate 
13497c478bd9Sstevel@tonic-gate static void
re_cwadd(re_cw * c,uchar_t * s,uchar_t * e)13507c478bd9Sstevel@tonic-gate re_cwadd(re_cw *c, uchar_t *s, uchar_t *e)
13517c478bd9Sstevel@tonic-gate {
13527c478bd9Sstevel@tonic-gate 	Node *p, *state;
13537c478bd9Sstevel@tonic-gate 	Link *l;
13547c478bd9Sstevel@tonic-gate 	int depth;
13557c478bd9Sstevel@tonic-gate 
13567c478bd9Sstevel@tonic-gate 	state = c->root;
13577c478bd9Sstevel@tonic-gate 	while (s <= --e) {
13587c478bd9Sstevel@tonic-gate 		for (l = state->alts; l; l = l->next)
13597c478bd9Sstevel@tonic-gate 			if (l->lit == *e)
13607c478bd9Sstevel@tonic-gate 				break;
13617c478bd9Sstevel@tonic-gate 		if (l == NULL)
13627c478bd9Sstevel@tonic-gate 			break;
13637c478bd9Sstevel@tonic-gate 		else
13647c478bd9Sstevel@tonic-gate 			state = l->node;
13657c478bd9Sstevel@tonic-gate 	}
13667c478bd9Sstevel@tonic-gate 	if (s <= e) {
13677c478bd9Sstevel@tonic-gate 		depth = state->d+1;
13687c478bd9Sstevel@tonic-gate 		l = newlink(*e--, p = newnode(c, depth++));
13697c478bd9Sstevel@tonic-gate 		l->next = state->alts;
13707c478bd9Sstevel@tonic-gate 		state->alts = l;
13717c478bd9Sstevel@tonic-gate 		state = p;
13727c478bd9Sstevel@tonic-gate 		while (s <= e) {
13737c478bd9Sstevel@tonic-gate 			state->alts = newlink(*e--, p = newnode(c, depth++));
13747c478bd9Sstevel@tonic-gate 			state = p;
13757c478bd9Sstevel@tonic-gate 		}
13767c478bd9Sstevel@tonic-gate 		if (c->mindepth >= depth) c->mindepth = depth-1;
13777c478bd9Sstevel@tonic-gate 	}
13787c478bd9Sstevel@tonic-gate 	state->out = 1;
13797c478bd9Sstevel@tonic-gate }
13807c478bd9Sstevel@tonic-gate 
13817c478bd9Sstevel@tonic-gate #ifdef	DEBUG
13827c478bd9Sstevel@tonic-gate static
pr(Node * n)13837c478bd9Sstevel@tonic-gate pr(Node *n)
13847c478bd9Sstevel@tonic-gate {
13857c478bd9Sstevel@tonic-gate 	Link *l;
13867c478bd9Sstevel@tonic-gate 
13877c478bd9Sstevel@tonic-gate 	printf("%d[%d,%d]: ", n->id, n->shift1, n->shift2);
13887c478bd9Sstevel@tonic-gate 	for (l = n->alts; l; l = l->next) {
13897c478bd9Sstevel@tonic-gate 		printf("edge from \"%d\" to \"%d\" label {\"%c\"};",
13907c478bd9Sstevel@tonic-gate 		    n->id, l->node->id, l->lit);
13917c478bd9Sstevel@tonic-gate 		if (l->node->out) {
13927c478bd9Sstevel@tonic-gate 			printf(" draw \"%d\" as Doublecircle;", l->node->id);
13937c478bd9Sstevel@tonic-gate 		}
13947c478bd9Sstevel@tonic-gate 		if (l->node->fail) {
13957c478bd9Sstevel@tonic-gate 			printf(" edge from \"%d\" to \"%d\" dashed;",
13967c478bd9Sstevel@tonic-gate 			    l->node->id, l->node->fail->id);
13977c478bd9Sstevel@tonic-gate 		}
13987c478bd9Sstevel@tonic-gate 		printf("\n");
13997c478bd9Sstevel@tonic-gate 		pr(l->node);
14007c478bd9Sstevel@tonic-gate 	}
14017c478bd9Sstevel@tonic-gate }
14027c478bd9Sstevel@tonic-gate #endif
14037c478bd9Sstevel@tonic-gate 
14047c478bd9Sstevel@tonic-gate static void
fail(Node * root)14057c478bd9Sstevel@tonic-gate fail(Node *root)
14067c478bd9Sstevel@tonic-gate {
14077c478bd9Sstevel@tonic-gate 	Link *qhead = NULL, *qtail = NULL;
14087c478bd9Sstevel@tonic-gate 	Link *l, *ll;
14097c478bd9Sstevel@tonic-gate 	Link *t;
14107c478bd9Sstevel@tonic-gate 	Node *state, *r, *s;
14117c478bd9Sstevel@tonic-gate 	int a;
14127c478bd9Sstevel@tonic-gate 
14137c478bd9Sstevel@tonic-gate 	for (l = root->alts; l; l = l->next) {
14147c478bd9Sstevel@tonic-gate 		ADD(l->node);
14157c478bd9Sstevel@tonic-gate 		l->node->fail = root;
14167c478bd9Sstevel@tonic-gate 	}
14177c478bd9Sstevel@tonic-gate 	while (qhead) {
14187c478bd9Sstevel@tonic-gate 		r = qhead->node;
14197c478bd9Sstevel@tonic-gate 		DEL();
14207c478bd9Sstevel@tonic-gate 		for (l = r->alts; l; l = l->next) {
14217c478bd9Sstevel@tonic-gate 			s = l->node;
14227c478bd9Sstevel@tonic-gate 			a = l->lit;
14237c478bd9Sstevel@tonic-gate 			ADD(s);
14247c478bd9Sstevel@tonic-gate 			state = r->fail;
14257c478bd9Sstevel@tonic-gate 			while (state) {
14267c478bd9Sstevel@tonic-gate 				for (ll = state->alts; ll; ll = ll->next)
14277c478bd9Sstevel@tonic-gate 					if (ll->lit == a)
14287c478bd9Sstevel@tonic-gate 						break;
14297c478bd9Sstevel@tonic-gate 				if (ll || (state == root)) {
14307c478bd9Sstevel@tonic-gate 					if (ll)
14317c478bd9Sstevel@tonic-gate 						state = ll->node;
14327c478bd9Sstevel@tonic-gate 					/*
14337c478bd9Sstevel@tonic-gate 					 *	do it here as only other exit is
14347c478bd9Sstevel@tonic-gate 					 *	state 0
14357c478bd9Sstevel@tonic-gate 					 */
14367c478bd9Sstevel@tonic-gate 					if (state->out) {
14377c478bd9Sstevel@tonic-gate 						s->out = 1;
14387c478bd9Sstevel@tonic-gate 					}
14397c478bd9Sstevel@tonic-gate 					break;
14407c478bd9Sstevel@tonic-gate 				} else
14417c478bd9Sstevel@tonic-gate 					state = state->fail;
14427c478bd9Sstevel@tonic-gate 			}
14437c478bd9Sstevel@tonic-gate 			s->fail = state;
14447c478bd9Sstevel@tonic-gate 		}
14457c478bd9Sstevel@tonic-gate 	}
14467c478bd9Sstevel@tonic-gate 	zeroroot(root, root);
14477c478bd9Sstevel@tonic-gate }
14487c478bd9Sstevel@tonic-gate 
14497c478bd9Sstevel@tonic-gate static void
zeroroot(Node * root,Node * n)14507c478bd9Sstevel@tonic-gate zeroroot(Node *root, Node *n)
14517c478bd9Sstevel@tonic-gate {
14527c478bd9Sstevel@tonic-gate 	Link *l;
14537c478bd9Sstevel@tonic-gate 
14547c478bd9Sstevel@tonic-gate 	if (n->fail == root)
14557c478bd9Sstevel@tonic-gate 		n->fail = NULL;
14567c478bd9Sstevel@tonic-gate 	for (l = n->alts; l; l = l->next)
14577c478bd9Sstevel@tonic-gate 		zeroroot(root, l->node);
14587c478bd9Sstevel@tonic-gate }
14597c478bd9Sstevel@tonic-gate 
14607c478bd9Sstevel@tonic-gate static void
shift(re_cw * c)14617c478bd9Sstevel@tonic-gate shift(re_cw *c)
14627c478bd9Sstevel@tonic-gate {
14637c478bd9Sstevel@tonic-gate 	Link *qhead = NULL, *qtail = NULL;
14647c478bd9Sstevel@tonic-gate 	Link *l;
14657c478bd9Sstevel@tonic-gate 	Link *t;
14667c478bd9Sstevel@tonic-gate 	Node *state, *r, *s;
14677c478bd9Sstevel@tonic-gate 	int k;
14687c478bd9Sstevel@tonic-gate 
14697c478bd9Sstevel@tonic-gate 	for (k = 0; k < 256; k++)
14707c478bd9Sstevel@tonic-gate 		c->step[k] = c->mindepth+1;
14717c478bd9Sstevel@tonic-gate 	c->root->shift1 = 1;
14727c478bd9Sstevel@tonic-gate 	c->root->shift2 = c->mindepth;
14737c478bd9Sstevel@tonic-gate 	for (l = c->root->alts; l; l = l->next) {
14747c478bd9Sstevel@tonic-gate 		l->node->shift2 = c->root->shift2;
14757c478bd9Sstevel@tonic-gate 		ADD(l->node);
14767c478bd9Sstevel@tonic-gate 		l->node->fail = NULL;
14777c478bd9Sstevel@tonic-gate 	}
14787c478bd9Sstevel@tonic-gate 	while (qhead) {
14797c478bd9Sstevel@tonic-gate 		r = qhead->node;
14807c478bd9Sstevel@tonic-gate 		DEL();
14817c478bd9Sstevel@tonic-gate 		r->shift1 = c->mindepth;
14827c478bd9Sstevel@tonic-gate 		r->shift2 = c->mindepth;
14837c478bd9Sstevel@tonic-gate 		if ((state = r->fail) != NULL) {
14847c478bd9Sstevel@tonic-gate 			do {
14857c478bd9Sstevel@tonic-gate 				k = r->d - state->d;
14867c478bd9Sstevel@tonic-gate 				if (k < state->shift1) {
14877c478bd9Sstevel@tonic-gate 					state->shift1 = k;
14887c478bd9Sstevel@tonic-gate 				}
14897c478bd9Sstevel@tonic-gate 				if (r->out && (k < state->shift2)) {
14907c478bd9Sstevel@tonic-gate 					state->shift2 = k;
14917c478bd9Sstevel@tonic-gate 				}
14927c478bd9Sstevel@tonic-gate 			} while ((state = state->fail) != NULL);
14937c478bd9Sstevel@tonic-gate 		}
14947c478bd9Sstevel@tonic-gate 		for (l = r->alts; l; l = l->next) {
14957c478bd9Sstevel@tonic-gate 			s = l->node;
14967c478bd9Sstevel@tonic-gate 			ADD(s);
14977c478bd9Sstevel@tonic-gate 		}
14987c478bd9Sstevel@tonic-gate 	}
14997c478bd9Sstevel@tonic-gate 	shiftprop(c, c->root);
15007c478bd9Sstevel@tonic-gate 	shifttab(c->root);
15017c478bd9Sstevel@tonic-gate 	c->step[0] = 1;
15027c478bd9Sstevel@tonic-gate }
15037c478bd9Sstevel@tonic-gate 
15047c478bd9Sstevel@tonic-gate static void
shifttab(Node * n)15057c478bd9Sstevel@tonic-gate shifttab(Node *n)
15067c478bd9Sstevel@tonic-gate {
15077c478bd9Sstevel@tonic-gate 	Link *l;
15087c478bd9Sstevel@tonic-gate 	Node **nn;
15097c478bd9Sstevel@tonic-gate 
15107c478bd9Sstevel@tonic-gate 	n->tab = nn = (Node **)egmalloc(256 * sizeof (Node *));
15117c478bd9Sstevel@tonic-gate 	(void) memset((char *)n->tab, 0, 256 * sizeof (Node *));
15127c478bd9Sstevel@tonic-gate 	for (l = n->alts; l; l = l->next)
15137c478bd9Sstevel@tonic-gate 		nn[l->lit] = l->node;
15147c478bd9Sstevel@tonic-gate }
15157c478bd9Sstevel@tonic-gate 
15167c478bd9Sstevel@tonic-gate static void
shiftprop(re_cw * c,Node * n)15177c478bd9Sstevel@tonic-gate shiftprop(re_cw *c, Node *n)
15187c478bd9Sstevel@tonic-gate {
15197c478bd9Sstevel@tonic-gate 	Link *l;
15207c478bd9Sstevel@tonic-gate 	Node *nn;
15217c478bd9Sstevel@tonic-gate 
15227c478bd9Sstevel@tonic-gate 	for (l = n->alts; l; l = l->next) {
15237c478bd9Sstevel@tonic-gate 		if (c->step[l->lit] > l->node->d)
15247c478bd9Sstevel@tonic-gate 			c->step[l->lit] = l->node->d;
15257c478bd9Sstevel@tonic-gate 		nn = l->node;
15267c478bd9Sstevel@tonic-gate 		if (n->shift2 < nn->shift2)
15277c478bd9Sstevel@tonic-gate 			nn->shift2 = n->shift2;
15287c478bd9Sstevel@tonic-gate 		shiftprop(c, nn);
15297c478bd9Sstevel@tonic-gate 	}
15307c478bd9Sstevel@tonic-gate }
15317c478bd9Sstevel@tonic-gate 
15327c478bd9Sstevel@tonic-gate static void
re_cwcomp(re_cw * c)15337c478bd9Sstevel@tonic-gate re_cwcomp(re_cw *c)
15347c478bd9Sstevel@tonic-gate {
15357c478bd9Sstevel@tonic-gate 	fail(c->root);
15367c478bd9Sstevel@tonic-gate 	shift(c);
15377c478bd9Sstevel@tonic-gate }
15387c478bd9Sstevel@tonic-gate 
15397c478bd9Sstevel@tonic-gate static BOOL
re_cwexec(PATTERN * pat,uchar_t * rs,uchar_t * re,uchar_t ** mb,uchar_t ** me)15407c478bd9Sstevel@tonic-gate re_cwexec(PATTERN *pat, uchar_t *rs, uchar_t *re, uchar_t **mb, uchar_t **me)
15417c478bd9Sstevel@tonic-gate {
15427c478bd9Sstevel@tonic-gate 	Node *state;
15437c478bd9Sstevel@tonic-gate 	Link *l;
15447c478bd9Sstevel@tonic-gate 	uchar_t *sp;
15457c478bd9Sstevel@tonic-gate 	uchar_t *s;
15467c478bd9Sstevel@tonic-gate 	uchar_t *e;
15477c478bd9Sstevel@tonic-gate 	Node *ostate;
15487c478bd9Sstevel@tonic-gate 	int k;
15497c478bd9Sstevel@tonic-gate 	re_cw *c = pat->cw_ptr;
15507c478bd9Sstevel@tonic-gate 	uchar_t fake[1];
15517c478bd9Sstevel@tonic-gate 	uchar_t mappedsp;
15527c478bd9Sstevel@tonic-gate 
15537c478bd9Sstevel@tonic-gate 	fake[0] = 0;
15547c478bd9Sstevel@tonic-gate 	state = c->root;
15557c478bd9Sstevel@tonic-gate 
15567c478bd9Sstevel@tonic-gate 	s = rs+c->mindepth-1;
15577c478bd9Sstevel@tonic-gate 	e = re;
15587c478bd9Sstevel@tonic-gate 	while (s < e) {
15597c478bd9Sstevel@tonic-gate 		/* scan */
15607c478bd9Sstevel@tonic-gate 		for (sp = s; (ostate = state) != NULL; ) {
15617c478bd9Sstevel@tonic-gate 			mappedsp = c->cmap[*sp];
15627c478bd9Sstevel@tonic-gate 			if (state->tab) {
15637c478bd9Sstevel@tonic-gate 				if ((state = state->tab[mappedsp]) == NULL)
15647c478bd9Sstevel@tonic-gate 					goto nomatch;
15657c478bd9Sstevel@tonic-gate 			} else {
15667c478bd9Sstevel@tonic-gate 				for (l = state->alts; ; l = l->next) {
15677c478bd9Sstevel@tonic-gate 					if (l == NULL)
15687c478bd9Sstevel@tonic-gate 						goto nomatch;
15697c478bd9Sstevel@tonic-gate 					if (l->lit == mappedsp) {
15707c478bd9Sstevel@tonic-gate 						state = l->node;
15717c478bd9Sstevel@tonic-gate 						break;
15727c478bd9Sstevel@tonic-gate 					}
15737c478bd9Sstevel@tonic-gate 				}
15747c478bd9Sstevel@tonic-gate 			}
15757c478bd9Sstevel@tonic-gate 			if (state->out) {
15767c478bd9Sstevel@tonic-gate 				*mb = sp;
15777c478bd9Sstevel@tonic-gate 				*me = s+1;
15787c478bd9Sstevel@tonic-gate 				if (fixloc(mb, me))
15797c478bd9Sstevel@tonic-gate 					return (YES);
15807c478bd9Sstevel@tonic-gate 			}
15817c478bd9Sstevel@tonic-gate 			if (--sp < rs) {
15827c478bd9Sstevel@tonic-gate 				sp = fake;
15837c478bd9Sstevel@tonic-gate 				break;
15847c478bd9Sstevel@tonic-gate 			}
15857c478bd9Sstevel@tonic-gate 		}
15867c478bd9Sstevel@tonic-gate 	nomatch:
15877c478bd9Sstevel@tonic-gate 		k = c->step[c->cmap[*sp]] - ostate->d - 1;
15887c478bd9Sstevel@tonic-gate 		if (k < ostate->shift1)
15897c478bd9Sstevel@tonic-gate 			k = ostate->shift1;
15907c478bd9Sstevel@tonic-gate 		if (k > ostate->shift2)
15917c478bd9Sstevel@tonic-gate 			k = ostate->shift2;
15927c478bd9Sstevel@tonic-gate 		s += k;
15937c478bd9Sstevel@tonic-gate 		state = c->root;
15947c478bd9Sstevel@tonic-gate 	}
15957c478bd9Sstevel@tonic-gate 	return (NO);
15967c478bd9Sstevel@tonic-gate }
15977c478bd9Sstevel@tonic-gate 
15987c478bd9Sstevel@tonic-gate static Node *
newnode(re_cw * c,int d)15997c478bd9Sstevel@tonic-gate newnode(re_cw *c, int d)
16007c478bd9Sstevel@tonic-gate {
16017c478bd9Sstevel@tonic-gate 	static Node *lim = NULL;
16027c478bd9Sstevel@tonic-gate 	static int incr = 256;
16037c478bd9Sstevel@tonic-gate 
16047c478bd9Sstevel@tonic-gate 	if (!next_node) lim = NULL;
16057c478bd9Sstevel@tonic-gate 	if (next_node == lim) {
16067c478bd9Sstevel@tonic-gate 		next_node = (Node *)egmalloc(incr * sizeof (Node));
16077c478bd9Sstevel@tonic-gate 		lim = next_node + incr;
16087c478bd9Sstevel@tonic-gate 	}
16097c478bd9Sstevel@tonic-gate 	next_node->d = d;
16107c478bd9Sstevel@tonic-gate 	if (d > c->maxdepth) c->maxdepth = d;
16117c478bd9Sstevel@tonic-gate 	next_node->id = c->nodeid++;
16127c478bd9Sstevel@tonic-gate 	next_node->alts = NULL;
16137c478bd9Sstevel@tonic-gate 	next_node->tab = NULL;
16147c478bd9Sstevel@tonic-gate 	next_node->out = 0;
16157c478bd9Sstevel@tonic-gate 	return (next_node++);
16167c478bd9Sstevel@tonic-gate }
16177c478bd9Sstevel@tonic-gate 
16187c478bd9Sstevel@tonic-gate static Link *
newlink(uchar_t lit,Node * n)16197c478bd9Sstevel@tonic-gate newlink(uchar_t lit, Node *n)
16207c478bd9Sstevel@tonic-gate {
16217c478bd9Sstevel@tonic-gate 	static Link *lim = NULL;
16227c478bd9Sstevel@tonic-gate 	static int incr = 256;
16237c478bd9Sstevel@tonic-gate 
16247c478bd9Sstevel@tonic-gate 	if (!next_link) lim = NULL;
16257c478bd9Sstevel@tonic-gate 	if (next_link == lim) {
16267c478bd9Sstevel@tonic-gate 		next_link = (Link *)egmalloc(incr * sizeof (Link));
16277c478bd9Sstevel@tonic-gate 		lim = next_link + incr;
16287c478bd9Sstevel@tonic-gate 	}
16297c478bd9Sstevel@tonic-gate 	next_link->lit = lit;
16307c478bd9Sstevel@tonic-gate 	next_link->node = n;
16317c478bd9Sstevel@tonic-gate 	next_link->next = NULL;
16327c478bd9Sstevel@tonic-gate 	return (next_link++);
16337c478bd9Sstevel@tonic-gate }
16347c478bd9Sstevel@tonic-gate 
16357c478bd9Sstevel@tonic-gate int
egrep(char * f,FILE * o,char * fo)16367c478bd9Sstevel@tonic-gate egrep(char *f, FILE *o, char *fo)
16377c478bd9Sstevel@tonic-gate {
16387c478bd9Sstevel@tonic-gate 	uchar_t		buff[MAXBUFSIZE + ESIZE];
16397c478bd9Sstevel@tonic-gate 
16407c478bd9Sstevel@tonic-gate 	buffer = buff;
16417c478bd9Sstevel@tonic-gate 	*buffer++ = NL;  /* New line precedes buffer to prevent runover */
16427c478bd9Sstevel@tonic-gate 	file = f;
16437c478bd9Sstevel@tonic-gate 	output = o;
16447c478bd9Sstevel@tonic-gate 	format = fo;
16457c478bd9Sstevel@tonic-gate 	return (execute());
16467c478bd9Sstevel@tonic-gate }
16477c478bd9Sstevel@tonic-gate 
16487c478bd9Sstevel@tonic-gate static int
execute(void)16497c478bd9Sstevel@tonic-gate execute(void)
16507c478bd9Sstevel@tonic-gate {
16517c478bd9Sstevel@tonic-gate 	LINE		current;
16527c478bd9Sstevel@tonic-gate 	BOOL		matched;
16537c478bd9Sstevel@tonic-gate 	BOOL	all_searched; /* file being matched until end */
16547c478bd9Sstevel@tonic-gate 
16557c478bd9Sstevel@tonic-gate 	if ((file_desc = open(file, O_RDONLY)) < 0) {
16567c478bd9Sstevel@tonic-gate 		return (-1);
16577c478bd9Sstevel@tonic-gate 	}
16587c478bd9Sstevel@tonic-gate 	init_file(&current);
16597c478bd9Sstevel@tonic-gate 		/* while there is more get more text from the data stream */
16607c478bd9Sstevel@tonic-gate 	for (;;) {
16617c478bd9Sstevel@tonic-gate 		all_searched = NO;
16627c478bd9Sstevel@tonic-gate 
16637c478bd9Sstevel@tonic-gate 		/*
16647c478bd9Sstevel@tonic-gate 		 * Find next new-line in buffer.
16657c478bd9Sstevel@tonic-gate 		 * Begin after previous new line character
16667c478bd9Sstevel@tonic-gate 		 */
16677c478bd9Sstevel@tonic-gate 
16687c478bd9Sstevel@tonic-gate 		current.prntbuf = current.newline + 1;
16697c478bd9Sstevel@tonic-gate 		current.ln++; /* increment line number */
16707c478bd9Sstevel@tonic-gate 
16717c478bd9Sstevel@tonic-gate 		if (current.prntbuf < bufend) {
16727c478bd9Sstevel@tonic-gate 			/* There is more text in buffer */
16737c478bd9Sstevel@tonic-gate 
16747c478bd9Sstevel@tonic-gate 			/*
16757c478bd9Sstevel@tonic-gate 			 * Take our next
16767c478bd9Sstevel@tonic-gate 			 * "line" as the entire remaining buffer.
16777c478bd9Sstevel@tonic-gate 			 * However, if there is more of the file yet
16787c478bd9Sstevel@tonic-gate 			 * to be read in, exclude any incomplete
16797c478bd9Sstevel@tonic-gate 			 * line at end.
16807c478bd9Sstevel@tonic-gate 			 */
16817c478bd9Sstevel@tonic-gate 			if (file_stat == NO_MORE) {
16827c478bd9Sstevel@tonic-gate 				all_searched = YES;
16837c478bd9Sstevel@tonic-gate 				current.newline = bufend - 1;
16847c478bd9Sstevel@tonic-gate 				if (*current.newline != NL) {
16857c478bd9Sstevel@tonic-gate 					current.newline = bufend;
16867c478bd9Sstevel@tonic-gate 				}
16877c478bd9Sstevel@tonic-gate 			} else {
16887c478bd9Sstevel@tonic-gate 				/*
16897c478bd9Sstevel@tonic-gate 				 * Find end of the last
16907c478bd9Sstevel@tonic-gate 				 * complete line in the buffer.
16917c478bd9Sstevel@tonic-gate 				 */
16927c478bd9Sstevel@tonic-gate 				current.newline = bufend;
16937c478bd9Sstevel@tonic-gate 				while (*--current.newline != NL) {
16947c478bd9Sstevel@tonic-gate 				}
16957c478bd9Sstevel@tonic-gate 				if (current.newline < current.prntbuf) {
16967c478bd9Sstevel@tonic-gate 					/* end not found */
16977c478bd9Sstevel@tonic-gate 					current.newline = bufend;
16987c478bd9Sstevel@tonic-gate 				}
16997c478bd9Sstevel@tonic-gate 			}
17007c478bd9Sstevel@tonic-gate 		} else {
17017c478bd9Sstevel@tonic-gate 			/* There is no more text in the buffer. */
17027c478bd9Sstevel@tonic-gate 			current.newline = bufend;
17037c478bd9Sstevel@tonic-gate 		}
17047c478bd9Sstevel@tonic-gate 		if (current.newline >= bufend) {
17057c478bd9Sstevel@tonic-gate 			/*
17067c478bd9Sstevel@tonic-gate 			 * There is no more text in the buffer,
17077c478bd9Sstevel@tonic-gate 			 * or no new line was found.
17087c478bd9Sstevel@tonic-gate 			 */
17097c478bd9Sstevel@tonic-gate 			switch (file_stat) {
17107c478bd9Sstevel@tonic-gate 			case MORE:	/* file partly unread */
17117c478bd9Sstevel@tonic-gate 			case BEGIN:
17127c478bd9Sstevel@tonic-gate 				fgetfile(&current);
17137c478bd9Sstevel@tonic-gate 
17147c478bd9Sstevel@tonic-gate 				current.ln--;
17157c478bd9Sstevel@tonic-gate 				continue; /* with while loop */
17167c478bd9Sstevel@tonic-gate 			case NO_MORE:
17177c478bd9Sstevel@tonic-gate 				break;
17187c478bd9Sstevel@tonic-gate 			}
17197c478bd9Sstevel@tonic-gate 			/* Nothing more to read in for this file. */
17207c478bd9Sstevel@tonic-gate 			if (current.newline <= current.prntbuf) {
17217c478bd9Sstevel@tonic-gate 				/* Nothing in the buffer, either */
17227c478bd9Sstevel@tonic-gate 				/* We are done with the file. */
17237c478bd9Sstevel@tonic-gate 				current.ln--;
17247c478bd9Sstevel@tonic-gate 				break; /* out of while loop */
17257c478bd9Sstevel@tonic-gate 			}
17267c478bd9Sstevel@tonic-gate 			/* There is no NL at the end of the file */
17277c478bd9Sstevel@tonic-gate 		}
17287c478bd9Sstevel@tonic-gate 
17297c478bd9Sstevel@tonic-gate 		matched = pattern_match(&match_pattern, &current);
17307c478bd9Sstevel@tonic-gate 		if (matched) {
17317c478bd9Sstevel@tonic-gate 			int nc;
17327c478bd9Sstevel@tonic-gate 
17337c478bd9Sstevel@tonic-gate 			get_ncount(&current, match_pattern.loc1);
17347c478bd9Sstevel@tonic-gate 			get_line(&current, match_pattern.loc2);
17357c478bd9Sstevel@tonic-gate 
17367c478bd9Sstevel@tonic-gate 			nc = current.newline + 1 - current.prntbuf;
17377c478bd9Sstevel@tonic-gate 			(void) fprintf(output, format, file, current.ln);
17387c478bd9Sstevel@tonic-gate 			(void) fwrite((char *)current.prntbuf, 1, nc, output);
17397c478bd9Sstevel@tonic-gate 		} else {
17407c478bd9Sstevel@tonic-gate 			if (all_searched)
17417c478bd9Sstevel@tonic-gate 				break; /* out of while loop */
17427c478bd9Sstevel@tonic-gate 
17437c478bd9Sstevel@tonic-gate 			get_ncount(&current, current.newline + 1);
17447c478bd9Sstevel@tonic-gate 		}
17457c478bd9Sstevel@tonic-gate 	}
17467c478bd9Sstevel@tonic-gate 
17477c478bd9Sstevel@tonic-gate 	(void) close(file_desc);
17487c478bd9Sstevel@tonic-gate 	return (0);
17497c478bd9Sstevel@tonic-gate }
17507c478bd9Sstevel@tonic-gate 
17517c478bd9Sstevel@tonic-gate static void
init_file(LINE * cur_ptr)17527c478bd9Sstevel@tonic-gate init_file(LINE *cur_ptr)
17537c478bd9Sstevel@tonic-gate {
17547c478bd9Sstevel@tonic-gate 	file_stat = BEGIN;
17557c478bd9Sstevel@tonic-gate 	cur_ptr->ln = 0;
17567c478bd9Sstevel@tonic-gate 	bufend = buffer;
17577c478bd9Sstevel@tonic-gate 	cur_ptr->newline = buffer - 1;
17587c478bd9Sstevel@tonic-gate }
17597c478bd9Sstevel@tonic-gate 
17607c478bd9Sstevel@tonic-gate static BOOL
pattern_match(PATTERN * pat,LINE * lptr)17617c478bd9Sstevel@tonic-gate pattern_match(PATTERN *pat, LINE *lptr)
17627c478bd9Sstevel@tonic-gate {
17637c478bd9Sstevel@tonic-gate 	if ((*pat->procfn)(pat, lptr->prntbuf - 1, lptr->newline + 1,
17647c478bd9Sstevel@tonic-gate 	    &pat->loc1, &pat->loc2)) {
17657c478bd9Sstevel@tonic-gate 		return (YES);
17667c478bd9Sstevel@tonic-gate 	} else {
17677c478bd9Sstevel@tonic-gate 		pat->loc1 = lptr->prntbuf;
17687c478bd9Sstevel@tonic-gate 		pat->loc2 = lptr->newline - 1;
17697c478bd9Sstevel@tonic-gate 		return (NO);
17707c478bd9Sstevel@tonic-gate 	}
17717c478bd9Sstevel@tonic-gate } /* end of function pattern_match() */
17727c478bd9Sstevel@tonic-gate 
17737c478bd9Sstevel@tonic-gate static void
fgetfile(LINE * cur_ptr)17747c478bd9Sstevel@tonic-gate fgetfile(LINE *cur_ptr)
17757c478bd9Sstevel@tonic-gate {
17767c478bd9Sstevel@tonic-gate 	/*
17777c478bd9Sstevel@tonic-gate 	 * This function reads as much of the current file into the buffer
17787c478bd9Sstevel@tonic-gate 	 * as will fit.
17797c478bd9Sstevel@tonic-gate 	 */
17807c478bd9Sstevel@tonic-gate 	int	bytes;	  /* bytes read in current buffer */
17817c478bd9Sstevel@tonic-gate 	int	bufsize = MAXBUFSIZE; /* free space in data buffer */
1782*dc1259b6SToomas Soome 	int	save_current;
17837c478bd9Sstevel@tonic-gate 	uchar_t	*begin = cur_ptr->prntbuf;
17847c478bd9Sstevel@tonic-gate 
17857c478bd9Sstevel@tonic-gate 	/*
17867c478bd9Sstevel@tonic-gate 	 * Bytes of current incomplete line, if any, save_current in buffer.
17877c478bd9Sstevel@tonic-gate 	 * These must be saved.
17887c478bd9Sstevel@tonic-gate 	 */
17897c478bd9Sstevel@tonic-gate 	save_current = (int)(bufend - begin);
17907c478bd9Sstevel@tonic-gate 
17917c478bd9Sstevel@tonic-gate 	if (file_stat == MORE) {
17927c478bd9Sstevel@tonic-gate 		/*
17937c478bd9Sstevel@tonic-gate 		 * A portion of the file fills the buffer. We must clear
17947c478bd9Sstevel@tonic-gate 		 * out the dead wood to make room for more of the file.
17957c478bd9Sstevel@tonic-gate 		 */
17967c478bd9Sstevel@tonic-gate 
17977c478bd9Sstevel@tonic-gate 		int k = 0;
17987c478bd9Sstevel@tonic-gate 
17997c478bd9Sstevel@tonic-gate 		k = begin - buffer;
18007c478bd9Sstevel@tonic-gate 		if (!k) {
18017c478bd9Sstevel@tonic-gate 			/*
18027c478bd9Sstevel@tonic-gate 			 * We have one humungous current line,
18037c478bd9Sstevel@tonic-gate 			 * which fills the whole buffer.
18047c478bd9Sstevel@tonic-gate 			 * Toss it.
18057c478bd9Sstevel@tonic-gate 			 */
18067c478bd9Sstevel@tonic-gate 			begin = bufend;
18077c478bd9Sstevel@tonic-gate 			k = begin - buffer;
18087c478bd9Sstevel@tonic-gate 			save_current = 0;
18097c478bd9Sstevel@tonic-gate 		}
18107c478bd9Sstevel@tonic-gate 
18117c478bd9Sstevel@tonic-gate 		begin = buffer;
18127c478bd9Sstevel@tonic-gate 		bufend = begin + save_current;
18137c478bd9Sstevel@tonic-gate 
18147c478bd9Sstevel@tonic-gate 		bufsize = MAXBUFSIZE - save_current;
18157c478bd9Sstevel@tonic-gate 
18167c478bd9Sstevel@tonic-gate 		if (save_current > 0) {
18177c478bd9Sstevel@tonic-gate 			/*
18187c478bd9Sstevel@tonic-gate 			 * Must save portion of current line.
18197c478bd9Sstevel@tonic-gate 			 * Copy to beginning of buffer.
18207c478bd9Sstevel@tonic-gate 			 */
18217c478bd9Sstevel@tonic-gate 			(void) memmove(buffer, buffer + k, save_current);
18227c478bd9Sstevel@tonic-gate 		}
18237c478bd9Sstevel@tonic-gate 	}
18247c478bd9Sstevel@tonic-gate 
18257c478bd9Sstevel@tonic-gate 	/* Now read in the file. */
18267c478bd9Sstevel@tonic-gate 
18277c478bd9Sstevel@tonic-gate 	do {
18287c478bd9Sstevel@tonic-gate 		bytes = read(file_desc, (char *)bufend, (unsigned int)bufsize);
18297c478bd9Sstevel@tonic-gate 		if (bytes < 0) {
18307c478bd9Sstevel@tonic-gate 			/* can't read any more of file */
18317c478bd9Sstevel@tonic-gate 			bytes = 0;
18327c478bd9Sstevel@tonic-gate 		}
18337c478bd9Sstevel@tonic-gate 		bufend += bytes;
18347c478bd9Sstevel@tonic-gate 		bufsize -= bytes;
18357c478bd9Sstevel@tonic-gate 	} while (bytes > 0 && bufsize > 0);
18367c478bd9Sstevel@tonic-gate 
18377c478bd9Sstevel@tonic-gate 
18387c478bd9Sstevel@tonic-gate 	if (begin >= bufend) {
18397c478bd9Sstevel@tonic-gate 		/* No new lines or incomplete line in buffer */
18407c478bd9Sstevel@tonic-gate 		file_stat = NO_MORE;
18417c478bd9Sstevel@tonic-gate 	} else if (bufsize) {
18427c478bd9Sstevel@tonic-gate 		/* Still space in the buffer, so we have read entire file */
18437c478bd9Sstevel@tonic-gate 		file_stat = NO_MORE;
18447c478bd9Sstevel@tonic-gate 	} else {
18457c478bd9Sstevel@tonic-gate 		/* We filled entire buffer, so there may be more yet to read */
18467c478bd9Sstevel@tonic-gate 		file_stat = MORE;
18477c478bd9Sstevel@tonic-gate 	}
18487c478bd9Sstevel@tonic-gate 		/* Note: bufend is 1 past last good char */
18497c478bd9Sstevel@tonic-gate 	*bufend = NL;	/* Sentinel new-line character */
18507c478bd9Sstevel@tonic-gate 	/* Set newline to character preceding the current line */
18517c478bd9Sstevel@tonic-gate 	cur_ptr->newline = begin - 1;
18527c478bd9Sstevel@tonic-gate }
18537c478bd9Sstevel@tonic-gate 
18547c478bd9Sstevel@tonic-gate static void
get_line(LINE * cur_ptr,uchar_t * s)18557c478bd9Sstevel@tonic-gate get_line(LINE *cur_ptr, uchar_t *s)
18567c478bd9Sstevel@tonic-gate {
18577c478bd9Sstevel@tonic-gate 	while (*s++ != NL) {
18587c478bd9Sstevel@tonic-gate 	}
18597c478bd9Sstevel@tonic-gate 	cur_ptr->newline = --s;
18607c478bd9Sstevel@tonic-gate 	cur_ptr->ln++;
18617c478bd9Sstevel@tonic-gate }
18627c478bd9Sstevel@tonic-gate 
18637c478bd9Sstevel@tonic-gate static void
get_ncount(LINE * cur_ptr,uchar_t * s)18647c478bd9Sstevel@tonic-gate get_ncount(LINE *cur_ptr, uchar_t *s)
18657c478bd9Sstevel@tonic-gate {
18667c478bd9Sstevel@tonic-gate 	uchar_t	*p = cur_ptr->prntbuf;
18677c478bd9Sstevel@tonic-gate 
18687c478bd9Sstevel@tonic-gate 	while (*--s != NL) {
18697c478bd9Sstevel@tonic-gate 	}
18707c478bd9Sstevel@tonic-gate 	cur_ptr->newline = s++;
18717c478bd9Sstevel@tonic-gate 	while ((s > p) &&
18727c478bd9Sstevel@tonic-gate 	    (p = (uchar_t *)memchr((char *)p, NL, s - p)) != NULL) {
18737c478bd9Sstevel@tonic-gate 		cur_ptr->ln++;
18747c478bd9Sstevel@tonic-gate 		++p;
18757c478bd9Sstevel@tonic-gate 	}
18767c478bd9Sstevel@tonic-gate 	cur_ptr->ln--;
18777c478bd9Sstevel@tonic-gate 	cur_ptr->prntbuf = s;
18787c478bd9Sstevel@tonic-gate }
1879