1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1985-2010 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                  Common Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*            http://www.opensource.org/licenses/cpl1.0.txt             *
11*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                   Phong Vo <kpv@research.att.com>                    *
20*                                                                      *
21***********************************************************************/
22#pragma prototyped
23
24/*
25 * posix regex executor
26 * single sized-record interface
27 */
28
29#include "reglib.h"
30
31#if _AST_REGEX_DEBUG
32
33#define DEBUG_TEST(f,y,n)	((debug&(debug_flag=f))?(y):(n))
34#define DEBUG_CODE(f,y,n)	do if(debug&(f)){y}else{n} while(0)
35#define DEBUG_INIT()		do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_exec_debug")) debug |= strtoul(t, NiL, 0); } } while (0)
36
37static unsigned long	debug;
38static unsigned long	debug_flag;
39
40static const char*	rexnames[] =
41{
42	"REX_NULL",
43	"REX_ALT",
44	"REX_ALT_CATCH",
45	"REX_BACK",
46	"REX_BEG",
47	"REX_BEG_STR",
48	"REX_BM",
49	"REX_CAT",
50	"REX_CLASS",
51	"REX_COLL_CLASS",
52	"REX_CONJ",
53	"REX_CONJ_LEFT",
54	"REX_CONJ_RIGHT",
55	"REX_DONE",
56	"REX_DOT",
57	"REX_END",
58	"REX_END_STR",
59	"REX_EXEC",
60	"REX_FIN_STR",
61	"REX_GROUP",
62	"REX_GROUP_CATCH",
63	"REX_GROUP_AHEAD",
64	"REX_GROUP_AHEAD_CATCH",
65	"REX_GROUP_AHEAD_NOT",
66	"REX_GROUP_BEHIND",
67	"REX_GROUP_BEHIND_CATCH",
68	"REX_GROUP_BEHIND_NOT",
69	"REX_GROUP_BEHIND_NOT_CATCH",
70	"REX_GROUP_COND",
71	"REX_GROUP_COND_CATCH",
72	"REX_GROUP_CUT",
73	"REX_GROUP_CUT_CATCH",
74	"REX_KMP",
75	"REX_NEG",
76	"REX_NEG_CATCH",
77	"REX_NEST",
78	"REX_ONECHAR",
79	"REX_REP",
80	"REX_REP_CATCH",
81	"REX_STRING",
82	"REX_TRIE",
83	"REX_WBEG",
84	"REX_WEND",
85	"REX_WORD",
86	"REX_WORD_NOT"
87};
88
89static const char* rexname(Rex_t* rex)
90{
91	if (!rex)
92		return "NIL";
93	if (rex->type >= elementsof(rexnames))
94		return "ERROR";
95	return rexnames[rex->type];
96}
97
98#else
99
100#define DEBUG_INIT()
101#define DEBUG_TEST(f,y,n)	(n)
102#define DEBUG_CODE(f,y,n)	do {n} while(0)
103
104#endif
105
106#define BEG_ALT		1	/* beginning of an alt			*/
107#define BEG_ONE		2	/* beginning of one iteration of a rep	*/
108#define BEG_REP		3	/* beginning of a repetition		*/
109#define BEG_SUB		4	/* beginning of a subexpression		*/
110#define END_ANY		5	/* end of any of above			*/
111
112/*
113 * returns from parse()
114 */
115
116#define NONE		0	/* no parse found			*/
117#define GOOD		1	/* some parse was found			*/
118#define CUT		2	/* no match and no backtrack		*/
119#define BEST		3	/* an unbeatable parse was found	*/
120#define BAD		4	/* error ocurred			*/
121
122/*
123 * REG_SHELL_DOT test
124 */
125
126#define LEADING(e,r,s)	(*(s)==(e)->leading&&((s)==(e)->beg||*((s)-1)==(r)->explicit))
127
128/*
129 * Pos_t is for comparing parses. An entry is made in the
130 * array at the beginning and at the end of each Group_t,
131 * each iteration in a Group_t, and each Binary_t.
132 */
133
134typedef struct
135{
136	unsigned char*	p;		/* where in string		*/
137	size_t		length;		/* length in string		*/
138	short		serial;		/* preorder subpattern number	*/
139	short		be;		/* which end of pair		*/
140} Pos_t;
141
142/* ===== begin library support ===== */
143
144#define vector(t,v,i)	(((i)<(v)->max)?(t*)((v)->vec+(i)*(v)->siz):(t*)vecseek(&(v),i))
145
146static Vector_t*
147vecopen(int inc, int siz)
148{
149	Vector_t*	v;
150	Stk_t*		sp;
151
152	if (inc <= 0)
153		inc = 16;
154	if (!(sp = stkopen(STK_SMALL|STK_NULL)))
155		return 0;
156	if (!(v = (Vector_t*)stkseek(sp, sizeof(Vector_t) + inc * siz)))
157	{
158		stkclose(sp);
159		return 0;
160	}
161	v->stk = sp;
162	v->vec = (char*)v + sizeof(Vector_t);
163	v->max = v->inc = inc;
164	v->siz = siz;
165	v->cur = 0;
166	return v;
167}
168
169static void*
170vecseek(Vector_t** p, int index)
171{
172	Vector_t*	v = *p;
173
174	if (index >= v->max)
175	{
176		while ((v->max += v->inc) <= index);
177		if (!(v = (Vector_t*)stkseek(v->stk, sizeof(Vector_t) + v->max * v->siz)))
178			return 0;
179		*p = v;
180		v->vec = (char*)v + sizeof(Vector_t);
181	}
182	return v->vec + index * v->siz;
183}
184
185static void
186vecclose(Vector_t* v)
187{
188	if (v)
189		stkclose(v->stk);
190}
191
192typedef struct
193{
194	Stk_pos_t	pos;
195	char		data[1];
196} Stk_frame_t;
197
198#define stknew(s,p)	((p)->offset=stktell(s),(p)->base=stkfreeze(s,0))
199#define stkold(s,p)	stkset(s,(p)->base,(p)->offset)
200
201#define stkframe(s)	(*((Stk_frame_t**)stktop(s)-1))
202#define stkdata(s,t)	((t*)stkframe(s)->data)
203#define stkpop(s)	stkold(s,&(stkframe(s)->pos))
204
205static void*
206stkpush(Stk_t* sp, size_t size)
207{
208	Stk_frame_t*	f;
209	Stk_pos_t	p;
210
211	stknew(sp, &p);
212	size = sizeof(Stk_frame_t) + sizeof(size_t) + size - 1;
213	if (!(f = (Stk_frame_t*)stkalloc(sp, sizeof(Stk_frame_t) + sizeof(Stk_frame_t*) + size - 1)))
214		return 0;
215	f->pos = p;
216	stkframe(sp) = f;
217	return f->data;
218}
219
220/* ===== end library support ===== */
221
222/*
223 * Match_frame_t is for saving and restoring match records
224 * around alternate attempts, so that fossils will not be
225 * left in the match array.  These are the only entries in
226 * the match array that are not otherwise guaranteed to
227 * have current data in them when they get used.
228 */
229
230typedef struct
231{
232	size_t			size;
233	regmatch_t*		match;
234	regmatch_t		save[1];
235} Match_frame_t;
236
237#define matchframe	stkdata(stkstd,Match_frame_t)
238#define matchpush(e,x)	((x)->re.group.number?_matchpush(e,x):0)
239#define matchcopy(e,x)	((x)->re.group.number?memcpy(matchframe->match,matchframe->save,matchframe->size):(void*)0)
240#define matchpop(e,x)	((x)->re.group.number?memcpy(matchframe->match,matchframe->save,matchframe->size),stkpop(stkstd):(void*)0)
241
242#define pospop(e)	(--(e)->pos->cur)
243
244/*
245 * allocate a frame and push a match onto the stack
246 */
247
248static int
249_matchpush(Env_t* env, Rex_t* rex)
250{
251	Match_frame_t*	f;
252	regmatch_t*	m;
253	regmatch_t*	e;
254	regmatch_t*	s;
255	int		num;
256
257	if (rex->re.group.number <= 0 || (num = rex->re.group.last - rex->re.group.number + 1) <= 0)
258		num = 0;
259	if (!(f = (Match_frame_t*)stkpush(stkstd, sizeof(Match_frame_t) + (num - 1) * sizeof(regmatch_t))))
260	{
261		env->error = REG_ESPACE;
262		return 1;
263	}
264	f->size = num * sizeof(regmatch_t);
265	f->match = m = env->match + rex->re.group.number;
266	e = m + num;
267	s = f->save;
268	while (m < e)
269	{
270		*s++ = *m;
271		*m++ = state.nomatch;
272	}
273	return 0;
274}
275
276/*
277 * allocate a frame and push a pos onto the stack
278 */
279
280static int
281pospush(Env_t* env, Rex_t* rex, unsigned char* p, int be)
282{
283	Pos_t*	pos;
284
285	if (!(pos = vector(Pos_t, env->pos, env->pos->cur)))
286	{
287		env->error = REG_ESPACE;
288		return 1;
289	}
290	pos->serial = rex->serial;
291	pos->p = p;
292	pos->be = be;
293	env->pos->cur++;
294	return 0;
295}
296
297/*
298 * two matches are known to have the same length
299 * os is start of old pos array, ns is start of new,
300 * oend and nend are end+1 pointers to ends of arrays.
301 * oe and ne are ends (not end+1) of subarrays.
302 * returns 1 if new is better, -1 if old, else 0.
303 */
304
305static int
306better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
307{
308	Pos_t*	oe;
309	Pos_t*	ne;
310	int	k;
311	int	n;
312
313	if (env->error)
314		return -1;
315	for (;;)
316	{
317		DEBUG_CODE(0x0080,{sfprintf(sfstdout, "   %-*.*sold ", (level + 3) * 4, (level + 3) * 4, "");for (oe = os; oe < oend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n   %-*.*snew ", (level + 3) * 4, (level + 3) * 4, "");for (oe = ns; oe < nend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n");},{0;});
318		if (ns >= nend)
319			return DEBUG_TEST(0x8000,(os < oend),(0));
320		if (os >= oend)
321			return DEBUG_TEST(0x8000,(-1),(1));
322		n = os->serial;
323		if (ns->serial > n)
324			return -1;
325		if (n > ns->serial)
326		{
327			env->error = REG_PANIC;
328			return -1;
329		}
330		if (ns->p > os->p)
331			return 1;
332		if (os->p > ns->p)
333			return -1;
334		oe = os;
335		k = 0;
336		for (;;)
337			if ((++oe)->serial == n)
338			{
339				if (oe->be != END_ANY)
340					k++;
341				else if (k-- <= 0)
342					break;
343			}
344		ne = ns;
345		k = 0;
346		for (;;)
347			if ((++ne)->serial == n)
348			{
349				if (ne->be != END_ANY)
350					k++;
351				else if (k-- <= 0)
352					break;
353			}
354		if (ne->p > oe->p)
355			return 1;
356		if (oe->p > ne->p)
357			return -1;
358		if (k = better(env, os + 1, ns + 1, oe, ne, level + 1))
359			return k;
360		os = oe + 1;
361		ns = ne + 1;
362	}
363}
364
365#if _AST_REGEX_DEBUG
366
367static void
368showmatch(regmatch_t* p)
369{
370	sfputc(sfstdout, '(');
371	if (p->rm_so < 0)
372		sfputc(sfstdout, '?');
373	else
374		sfprintf(sfstdout, "%d", p->rm_so);
375	sfputc(sfstdout, ',');
376	if (p->rm_eo < 0)
377		sfputc(sfstdout, '?');
378	else
379		sfprintf(sfstdout, "%d", p->rm_eo);
380	sfputc(sfstdout, ')');
381}
382
383static int
384_better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
385{
386	int	i;
387
388	DEBUG_CODE(0x0040,{sfprintf(sfstdout, "AHA better old ");for (i = 0; i <= env->nsub; i++)showmatch(&env->best[i]);sfprintf(sfstdout, "\n           new ");for (i = 0; i <= env->nsub; i++)showmatch(&env->match[i]);sfprintf(sfstdout, "\n");},{0;});
389	i = better(env, os, ns, oend, nend, 0);
390	DEBUG_TEST(0x0040,(sfprintf(sfstdout, "           %s\n", i <= 0 ? "OLD" : "NEW")),(0));
391	return i;
392}
393
394#define better	_better
395
396#endif
397
398#define follow(e,r,c,s)	((r)->next?parse(e,(r)->next,c,s):(c)?parse(e,c,0,s):BEST)
399
400static int		parse(Env_t*, Rex_t*, Rex_t*, unsigned char*);
401
402static int
403parserep(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s, int n)
404{
405	int	i;
406	int	r = NONE;
407	Rex_t	catcher;
408
409	DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep %s %d %d %d %d `%-.*s'\n", __LINE__, debug_flag, rexname(rex->re.group.expr.rex), rex->re.group.number, rex->lo, n, rex->hi, env->end - s, s)),(0));
410	if ((rex->flags & REG_MINIMAL) && n >= rex->lo && n < rex->hi)
411	{
412		if (env->stack && pospush(env, rex, s, END_ANY))
413			return BAD;
414		i = follow(env, rex, cont, s);
415		if (env->stack)
416			pospop(env);
417		switch (i)
418		{
419		case BAD:
420			return BAD;
421		case CUT:
422			return CUT;
423		case BEST:
424		case GOOD:
425			return BEST;
426		}
427	}
428	if (n < rex->hi)
429	{
430		catcher.type = REX_REP_CATCH;
431		catcher.serial = rex->serial;
432		catcher.re.rep_catch.ref = rex;
433		catcher.re.rep_catch.cont = cont;
434		catcher.re.rep_catch.beg = s;
435		catcher.re.rep_catch.n = n + 1;
436		catcher.next = rex->next;
437		if (n == 0)
438			rex->re.rep_catch.beg = s;
439		if (env->stack)
440		{
441			if (matchpush(env, rex))
442				return BAD;
443			if (pospush(env, rex, s, BEG_ONE))
444				return BAD;
445DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH %d   (%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
446		}
447		r = parse(env, rex->re.group.expr.rex, &catcher, s);
448		DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep parse %d %d `%-.*s'\n", __LINE__, debug_flag, rex->re.group.number, r, env->end - s, s)),(0));
449		if (env->stack)
450		{
451			pospop(env);
452			matchpop(env, rex);
453DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP  %d %d (%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, r, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
454		}
455		switch (r)
456		{
457		case BAD:
458			return BAD;
459		case BEST:
460			return BEST;
461		case CUT:
462			r = NONE;
463			break;
464		case GOOD:
465			if (rex->flags & REG_MINIMAL)
466				return BEST;
467			r = GOOD;
468			break;
469		}
470	}
471	if (n < rex->lo)
472		return r;
473	if (!(rex->flags & REG_MINIMAL) || n >= rex->hi)
474	{
475		if (env->stack && pospush(env, rex, s, END_ANY))
476			return BAD;
477		i = follow(env, rex, cont, s);
478		if (env->stack)
479			pospop(env);
480		switch (i)
481		{
482		case BAD:
483			r = BAD;
484			break;
485		case CUT:
486			r = CUT;
487			break;
488		case BEST:
489			r = BEST;
490			break;
491		case GOOD:
492			r = (rex->flags & REG_MINIMAL) ? BEST : GOOD;
493			break;
494		}
495	}
496	return r;
497}
498
499static int
500parsetrie(Env_t* env, Trie_node_t* x, Rex_t* rex, Rex_t* cont, unsigned char* s)
501{
502	unsigned char*	p;
503	int		r;
504
505	if (p = rex->map)
506	{
507		for (;;)
508		{
509			if (s >= env->end)
510				return NONE;
511			while (x->c != p[*s])
512				if (!(x = x->sib))
513					return NONE;
514			if (x->end)
515				break;
516			x = x->son;
517			s++;
518		}
519	}
520	else
521	{
522		for (;;)
523		{
524			if (s >= env->end)
525				return NONE;
526			while (x->c != *s)
527				if (!(x = x->sib))
528					return NONE;
529			if (x->end)
530				break;
531			x = x->son;
532			s++;
533		}
534	}
535	s++;
536	if (rex->flags & REG_MINIMAL)
537		switch (follow(env, rex, cont, s))
538		{
539		case BAD:
540			return BAD;
541		case CUT:
542			return CUT;
543		case BEST:
544		case GOOD:
545			return BEST;
546		}
547	if (x->son)
548		switch (parsetrie(env, x->son, rex, cont, s))
549		{
550		case BAD:
551			return BAD;
552		case CUT:
553			return CUT;
554		case BEST:
555			return BEST;
556		case GOOD:
557			if (rex->flags & REG_MINIMAL)
558				return BEST;
559			r = GOOD;
560			break;
561		default:
562			r = NONE;
563			break;
564		}
565	else
566		r = NONE;
567	if (!(rex->flags & REG_MINIMAL))
568		switch (follow(env, rex, cont, s))
569		{
570		case BAD:
571			return BAD;
572		case CUT:
573			return CUT;
574		case BEST:
575			return BEST;
576		case GOOD:
577			return GOOD;
578	}
579	return r;
580}
581
582static int
583collelt(register Celt_t* ce, char* key, int c, int x)
584{
585	Ckey_t	elt;
586
587	mbxfrm(elt, key, COLL_KEY_MAX);
588	for (;; ce++)
589	{
590		switch (ce->typ)
591		{
592		case COLL_call:
593			if (!x && (*ce->fun)(c))
594				return 1;
595			continue;
596		case COLL_char:
597			if (!strcmp((char*)ce->beg, (char*)elt))
598				return 1;
599			continue;
600		case COLL_range:
601			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max)
602				return 1;
603			continue;
604		case COLL_range_lc:
605			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswlower(c) || !iswupper(c)))
606				return 1;
607			continue;
608		case COLL_range_uc:
609			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswupper(c) || !iswlower(c)))
610				return 1;
611			continue;
612		}
613		break;
614	}
615	return 0;
616}
617
618static int
619collic(register Celt_t* ce, char* key, register char* nxt, int c, int x)
620{
621	if (!x)
622	{
623		if (collelt(ce, key, c, x))
624			return 1;
625		if (iswlower(c))
626			c = towupper(c);
627		else if (iswupper(c))
628			c = towlower(c);
629		else
630			return 0;
631		x = mbconv(key, c);
632		key[x] = 0;
633		return collelt(ce, key, c, 0);
634	}
635	while (*nxt)
636	{
637		if (collic(ce, key, nxt + 1, c, x))
638			return 1;
639		if (islower(*nxt))
640			*nxt = toupper(*nxt);
641		else if (isupper(*nxt))
642			*nxt = tolower(*nxt);
643		else
644			return 0;
645		nxt++;
646	}
647	return collelt(ce, key, c, x);
648}
649
650static int
651collmatch(Rex_t* rex, unsigned char* s, unsigned char* e, unsigned char** p)
652{
653	unsigned char*		t;
654	wchar_t			c;
655	int			w;
656	int			r;
657	int			x;
658	int			ic;
659	Ckey_t			key;
660	Ckey_t			elt;
661
662	ic = (rex->flags & REG_ICASE);
663	if ((w = MBSIZE(s)) > 1)
664	{
665		memcpy((char*)key, (char*)s, w);
666		key[w] = 0;
667		t = s;
668		c = mbchar(t);
669#if !_lib_wctype
670		c &= 0xff;
671#endif
672		x = 0;
673	}
674	else
675	{
676		c = s[0];
677		if (ic && isupper(c))
678			c = tolower(c);
679		key[0] = c;
680		key[1] = 0;
681		if (isalpha(c))
682		{
683			x = e - s;
684			if (x > COLL_KEY_MAX)
685				x = COLL_KEY_MAX;
686			while (w < x)
687			{
688				c = s[w];
689				if (!isalpha(c))
690					break;
691				r = mbxfrm(elt, key, COLL_KEY_MAX);
692				if (ic && isupper(c))
693					c = tolower(c);
694				key[w] = c;
695				key[w + 1] = 0;
696				if (mbxfrm(elt, key, COLL_KEY_MAX) != r)
697					break;
698				w++;
699			}
700		}
701		key[w] = 0;
702		c = key[0];
703		x = w - 1;
704	}
705	r = 1;
706	for (;;)
707	{
708		if (ic ? collic(rex->re.collate.elements, (char*)key, (char*)key, c, x) : collelt(rex->re.collate.elements, (char*)key, c, x))
709			break;
710		if (!x)
711		{
712			r = 0;
713			break;
714		}
715		w = x--;
716		key[w] = 0;
717	}
718	*p = s + w;
719	return rex->re.collate.invert ? !r : r;
720}
721
722static unsigned char*
723nestmatch(register unsigned char* s, register unsigned char* e, const unsigned short* type, register int co)
724{
725	register int	c;
726	register int	cc;
727	unsigned int	n;
728	int		oc;
729
730	if (type[co] & (REX_NEST_literal|REX_NEST_quote))
731	{
732		n = (type[co] & REX_NEST_literal) ? REX_NEST_terminator : (REX_NEST_escape|REX_NEST_terminator);
733		while (s < e)
734		{
735			c = *s++;
736			if (c == co)
737				return s;
738			else if (type[c] & n)
739			{
740				if (s >= e || (type[c] & REX_NEST_terminator))
741					break;
742				s++;
743			}
744		}
745	}
746	else
747	{
748		cc = type[co] >> REX_NEST_SHIFT;
749		oc = type[co] & (REX_NEST_open|REX_NEST_close);
750		n = 1;
751		while (s < e)
752		{
753			c = *s++;
754			switch (type[c] & (REX_NEST_escape|REX_NEST_open|REX_NEST_close|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
755			{
756			case REX_NEST_delimiter:
757			case REX_NEST_terminator:
758				return oc ? 0 : s;
759			case REX_NEST_separator:
760				if (!oc)
761					return s;
762				break;
763			case REX_NEST_escape:
764				if (s >= e)
765					return 0;
766				s++;
767				break;
768			case REX_NEST_open|REX_NEST_close:
769				if (c == cc)
770				{
771					if (!--n)
772						return s;
773				}
774				/*FALLTHROUGH*/
775			case REX_NEST_open:
776				if (c == co)
777				{
778					if (!++n)
779						return 0;
780				}
781				else if (!(s = nestmatch(s, e, type, c)))
782					return 0;
783				break;
784			case REX_NEST_close:
785				if (c != cc)
786					return 0;
787				if (!--n)
788					return s;
789				break;
790			}
791		}
792		return (oc || !(type[UCHAR_MAX+1] & REX_NEST_terminator)) ? 0 : s;
793	}
794	return 0;
795}
796
797static int
798parse(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s)
799{
800	int		c;
801	int		d;
802	int		i;
803	int		m;
804	int		n;
805	int		r;
806	int*		f;
807	unsigned char*	p;
808	unsigned char*	t;
809	unsigned char*	b;
810	unsigned char*	e;
811	char*		u;
812	regmatch_t*	o;
813	Trie_node_t*	x;
814	Rex_t*		q;
815	Rex_t		catcher;
816	Rex_t		next;
817
818	for (;;)
819	{
820DEBUG_TEST(0x0008,(sfprintf(sfstdout, "AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
821		switch (rex->type)
822		{
823		case REX_ALT:
824			if (env->stack)
825			{
826				if (matchpush(env, rex))
827					return BAD;
828				if (pospush(env, rex, s, BEG_ALT))
829					return BAD;
830				catcher.type = REX_ALT_CATCH;
831				catcher.serial = rex->serial;
832				catcher.re.alt_catch.cont = cont;
833				catcher.next = rex->next;
834				r = parse(env, rex->re.group.expr.binary.left, &catcher, s);
835				if (r < BEST || (rex->flags & REG_MINIMAL))
836				{
837					matchcopy(env, rex);
838					((Pos_t*)env->pos->vec + env->pos->cur - 1)->serial = catcher.serial = rex->re.group.expr.binary.serial;
839					n = parse(env, rex->re.group.expr.binary.right, &catcher, s);
840					if (n != NONE)
841						r = n;
842				}
843				pospop(env);
844				matchpop(env, rex);
845			}
846			else
847			{
848				if ((r = parse(env, rex->re.group.expr.binary.left, cont, s)) == NONE)
849					r = parse(env, rex->re.group.expr.binary.right, cont, s);
850				if (r == GOOD)
851					r = BEST;
852			}
853			return r;
854		case REX_ALT_CATCH:
855			if (pospush(env, rex, s, END_ANY))
856				return BAD;
857			r = follow(env, rex, rex->re.alt_catch.cont, s);
858			pospop(env);
859			return r;
860		case REX_BACK:
861			o = &env->match[rex->lo];
862			if (o->rm_so < 0)
863				return NONE;
864			i = o->rm_eo - o->rm_so;
865			e = s + i;
866			if (e > env->end)
867				return NONE;
868			t = env->beg + o->rm_so;
869			if (!(p = rex->map))
870			{
871				while (s < e)
872					if (*s++ != *t++)
873						return NONE;
874			}
875			else if (!mbwide())
876			{
877				while (s < e)
878					if (p[*s++] != p[*t++])
879						return NONE;
880			}
881			else
882			{
883				while (s < e)
884				{
885					c = mbchar(s);
886					d = mbchar(t);
887					if (towupper(c) != towupper(d))
888						return NONE;
889				}
890			}
891			break;
892		case REX_BEG:
893			if ((!(rex->flags & REG_NEWLINE) || s <= env->beg || *(s - 1) != '\n') && ((env->flags & REG_NOTBOL) || s != env->beg))
894				return NONE;
895			break;
896		case REX_CLASS:
897			if (LEADING(env, rex, s))
898				return NONE;
899			n = rex->hi;
900			if (n > env->end - s)
901				n = env->end - s;
902			m = rex->lo;
903			if (m > n)
904				return NONE;
905			r = NONE;
906			if (!(rex->flags & REG_MINIMAL))
907			{
908				for (i = 0; i < n; i++)
909					if (!settst(rex->re.charclass, s[i]))
910					{
911						n = i;
912						break;
913					}
914				for (s += n; n-- >= m; s--)
915					switch (follow(env, rex, cont, s))
916					{
917					case BAD:
918						return BAD;
919					case CUT:
920						return CUT;
921					case BEST:
922						return BEST;
923					case GOOD:
924						r = GOOD;
925						break;
926					}
927			}
928			else
929			{
930				for (e = s + m; s < e; s++)
931					if (!settst(rex->re.charclass, *s))
932						return r;
933				e += n - m;
934				for (;;)
935				{
936					switch (follow(env, rex, cont, s))
937					{
938					case BAD:
939						return BAD;
940					case CUT:
941						return CUT;
942					case BEST:
943					case GOOD:
944						return BEST;
945					}
946					if (s >= e || !settst(rex->re.charclass, *s))
947						break;
948					s++;
949				}
950			}
951			return r;
952		case REX_COLL_CLASS:
953			if (LEADING(env, rex, s))
954				return NONE;
955			n = rex->hi;
956			if (n > env->end - s)
957				n = env->end - s;
958			m = rex->lo;
959			if (m > n)
960				return NONE;
961			r = NONE;
962			e = env->end;
963			if (!(rex->flags & REG_MINIMAL))
964			{
965				if (!(b = (unsigned char*)stkpush(stkstd, n)))
966				{
967					env->error = REG_ESPACE;
968					return BAD;
969				}
970				for (i = 0; s < e && i < n && collmatch(rex, s, e, &t); i++)
971				{
972					b[i] = t - s;
973					s = t;
974				}
975				for (; i-- >= rex->lo; s -= b[i])
976					switch (follow(env, rex, cont, s))
977					{
978					case BAD:
979						stkpop(stkstd);
980						return BAD;
981					case CUT:
982						stkpop(stkstd);
983						return CUT;
984					case BEST:
985						stkpop(stkstd);
986						return BEST;
987					case GOOD:
988						r = GOOD;
989						break;
990					}
991				stkpop(stkstd);
992			}
993			else
994			{
995				for (i = 0; i < m && s < e; i++, s = t)
996					if (!collmatch(rex, s, e, &t))
997						return r;
998				while (i++ <= n)
999				{
1000					switch (follow(env, rex, cont, s))
1001					{
1002					case BAD:
1003						return BAD;
1004					case CUT:
1005						return CUT;
1006					case BEST:
1007					case GOOD:
1008						return BEST;
1009					}
1010					if (s >= e || !collmatch(rex, s, e, &s))
1011						break;
1012				}
1013			}
1014			return r;
1015		case REX_CONJ:
1016			next.type = REX_CONJ_RIGHT;
1017			next.re.conj_right.cont = cont;
1018			next.next = rex->next;
1019			catcher.type = REX_CONJ_LEFT;
1020			catcher.re.conj_left.right = rex->re.group.expr.binary.right;
1021			catcher.re.conj_left.cont = &next;
1022			catcher.re.conj_left.beg = s;
1023			catcher.next = 0;
1024			return parse(env, rex->re.group.expr.binary.left, &catcher, s);
1025		case REX_CONJ_LEFT:
1026			rex->re.conj_left.cont->re.conj_right.end = s;
1027			cont = rex->re.conj_left.cont;
1028			s = rex->re.conj_left.beg;
1029			rex = rex->re.conj_left.right;
1030			continue;
1031		case REX_CONJ_RIGHT:
1032			if (rex->re.conj_right.end != s)
1033				return NONE;
1034			cont = rex->re.conj_right.cont;
1035			break;
1036		case REX_DONE:
1037			if (!env->stack)
1038				return BEST;
1039			n = s - env->beg;
1040			r = env->nsub;
1041			DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1042			if ((i = env->best[0].rm_eo) >= 0)
1043			{
1044				if (rex->flags & REG_MINIMAL)
1045				{
1046					if (n > i)
1047						return GOOD;
1048				}
1049				else
1050				{
1051					if (n < i)
1052						return GOOD;
1053				}
1054				if (n == i && better(env,
1055						     (Pos_t*)env->bestpos->vec,
1056				   		     (Pos_t*)env->pos->vec,
1057				   		     (Pos_t*)env->bestpos->vec+env->bestpos->cur,
1058				   		     (Pos_t*)env->pos->vec+env->pos->cur,
1059						     0) <= 0)
1060					return GOOD;
1061			}
1062			env->best[0].rm_eo = n;
1063			memcpy(&env->best[1], &env->match[1], r * sizeof(regmatch_t));
1064			n = env->pos->cur;
1065			if (!vector(Pos_t, env->bestpos, n))
1066			{
1067				env->error = REG_ESPACE;
1068				return BAD;
1069			}
1070			env->bestpos->cur = n;
1071			memcpy(env->bestpos->vec, env->pos->vec, n * sizeof(Pos_t));
1072			DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1073			return GOOD;
1074		case REX_DOT:
1075			if (LEADING(env, rex, s))
1076				return NONE;
1077			n = rex->hi;
1078			if (n > env->end - s)
1079				n = env->end - s;
1080			m = rex->lo;
1081			if (m > n)
1082				return NONE;
1083			if ((c = rex->explicit) >= 0 && !mbwide())
1084				for (i = 0; i < n; i++)
1085					if (s[i] == c)
1086					{
1087						n = i;
1088						break;
1089					}
1090			r = NONE;
1091			if (!(rex->flags & REG_MINIMAL))
1092			{
1093				if (!mbwide())
1094				{
1095					for (s += n; n-- >= m; s--)
1096						switch (follow(env, rex, cont, s))
1097						{
1098						case BAD:
1099							return BAD;
1100						case CUT:
1101							return CUT;
1102						case BEST:
1103							return BEST;
1104						case GOOD:
1105							r = GOOD;
1106							break;
1107						}
1108				}
1109				else
1110				{
1111					if (!(b = (unsigned char*)stkpush(stkstd, n)))
1112					{
1113						env->error = REG_ESPACE;
1114						return BAD;
1115					}
1116					e = env->end;
1117					for (i = 0; s < e && i < n && *s != c; i++)
1118						s += b[i] = MBSIZE(s);
1119					for (; i-- >= m; s -= b[i])
1120						switch (follow(env, rex, cont, s))
1121						{
1122						case BAD:
1123							stkpop(stkstd);
1124							return BAD;
1125						case CUT:
1126							stkpop(stkstd);
1127							return CUT;
1128						case BEST:
1129							stkpop(stkstd);
1130							return BEST;
1131						case GOOD:
1132							r = GOOD;
1133							break;
1134						}
1135					stkpop(stkstd);
1136				}
1137			}
1138			else
1139			{
1140				if (!mbwide())
1141				{
1142					e = s + n;
1143					for (s += m; s <= e; s++)
1144						switch (follow(env, rex, cont, s))
1145						{
1146						case BAD:
1147							return BAD;
1148						case CUT:
1149							return CUT;
1150						case BEST:
1151						case GOOD:
1152							return BEST;
1153						}
1154				}
1155				else
1156				{
1157					e = env->end;
1158					for (i = 0; s < e && i < m && *s != c; i++)
1159						s += MBSIZE(s);
1160					if (i >= m)
1161						for (; s <= e && i <= n; s += MBSIZE(s), i++)
1162							switch (follow(env, rex, cont, s))
1163							{
1164							case BAD:
1165								return BAD;
1166							case CUT:
1167								return CUT;
1168							case BEST:
1169							case GOOD:
1170								return BEST;
1171							}
1172				}
1173			}
1174			return r;
1175		case REX_END:
1176			if ((!(rex->flags & REG_NEWLINE) || *s != '\n') && ((env->flags & REG_NOTEOL) || s < env->end))
1177				return NONE;
1178			break;
1179		case REX_GROUP:
1180DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
1181			if (env->stack)
1182			{
1183				if (rex->re.group.number)
1184					env->match[rex->re.group.number].rm_so = s - env->beg;
1185				if (pospush(env, rex, s, BEG_SUB))
1186					return BAD;
1187				catcher.re.group_catch.eo = rex->re.group.number ? &env->match[rex->re.group.number].rm_eo : (regoff_t*)0;
1188			}
1189			catcher.type = REX_GROUP_CATCH;
1190			catcher.serial = rex->serial;
1191			catcher.re.group_catch.cont = cont;
1192			catcher.next = rex->next;
1193			r = parse(env, rex->re.group.expr.rex, &catcher, s);
1194			if (env->stack)
1195			{
1196				pospop(env);
1197				if (rex->re.group.number)
1198					env->match[rex->re.group.number].rm_so = -1;
1199			}
1200			return r;
1201		case REX_GROUP_CATCH:
1202DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s=>%s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rexname(rex->re.group_catch.cont), env->end - s, s)),(0));
1203			if (env->stack)
1204			{
1205				if (rex->re.group_catch.eo)
1206					*rex->re.group_catch.eo = s - env->beg;
1207				if (pospush(env, rex, s, END_ANY))
1208					return BAD;
1209			}
1210			r = follow(env, rex, rex->re.group_catch.cont, s);
1211			if (env->stack)
1212			{
1213				pospop(env);
1214				if (rex->re.group_catch.eo)
1215					*rex->re.group_catch.eo = -1;
1216			}
1217			return r;
1218		case REX_GROUP_AHEAD:
1219			catcher.type = REX_GROUP_AHEAD_CATCH;
1220			catcher.flags = rex->flags;
1221			catcher.serial = rex->serial;
1222			catcher.re.rep_catch.beg = s;
1223			catcher.re.rep_catch.cont = cont;
1224			catcher.next = rex->next;
1225			return parse(env, rex->re.group.expr.rex, &catcher, s);
1226		case REX_GROUP_AHEAD_CATCH:
1227			return follow(env, rex, rex->re.rep_catch.cont, rex->re.rep_catch.beg);
1228		case REX_GROUP_AHEAD_NOT:
1229			r = parse(env, rex->re.group.expr.rex, NiL, s);
1230			if (r == NONE)
1231				r = follow(env, rex, cont, s);
1232			else if (r != BAD)
1233				r = NONE;
1234			return r;
1235		case REX_GROUP_BEHIND:
1236			if ((s - env->beg) < rex->re.group.size)
1237				return NONE;
1238			catcher.type = REX_GROUP_BEHIND_CATCH;
1239			catcher.flags = rex->flags;
1240			catcher.serial = rex->serial;
1241			catcher.re.behind_catch.beg = s;
1242			catcher.re.behind_catch.end = e = env->end;
1243			catcher.re.behind_catch.cont = cont;
1244			catcher.next = rex->next;
1245			for (t = s - rex->re.group.size; t >= env->beg; t--)
1246			{
1247				env->end = s;
1248				r = parse(env, rex->re.group.expr.rex, &catcher, t);
1249				env->end = e;
1250				if (r != NONE)
1251					return r;
1252			}
1253			return NONE;
1254		case REX_GROUP_BEHIND_CATCH:
1255			if (s != rex->re.behind_catch.beg)
1256				return NONE;
1257			env->end = rex->re.behind_catch.end;
1258			return follow(env, rex, rex->re.behind_catch.cont, rex->re.behind_catch.beg);
1259		case REX_GROUP_BEHIND_NOT:
1260			if ((s - env->beg) < rex->re.group.size)
1261				r = NONE;
1262			else
1263			{
1264				catcher.type = REX_GROUP_BEHIND_NOT_CATCH;
1265				catcher.re.neg_catch.beg = s;
1266				catcher.next = 0;
1267				e = env->end;
1268				env->end = s;
1269				for (t = s - rex->re.group.size; t >= env->beg; t--)
1270				{
1271					r = parse(env, rex->re.group.expr.rex, &catcher, t);
1272					if (r != NONE)
1273						break;
1274				}
1275				env->end = e;
1276			}
1277			if (r == NONE)
1278				r = follow(env, rex, cont, s);
1279			else if (r != BAD)
1280				r = NONE;
1281			return r;
1282		case REX_GROUP_BEHIND_NOT_CATCH:
1283			return s == rex->re.neg_catch.beg ? GOOD : NONE;
1284		case REX_GROUP_COND:
1285			if (q = rex->re.group.expr.binary.right)
1286			{
1287				catcher.re.cond_catch.next[0] = q->re.group.expr.binary.right;
1288				catcher.re.cond_catch.next[1] = q->re.group.expr.binary.left;
1289			}
1290			else
1291				catcher.re.cond_catch.next[0] = catcher.re.cond_catch.next[1] = 0;
1292			if (q = rex->re.group.expr.binary.left)
1293			{
1294				catcher.type = REX_GROUP_COND_CATCH;
1295				catcher.flags = rex->flags;
1296				catcher.serial = rex->serial;
1297				catcher.re.cond_catch.yes = 0;
1298				catcher.re.cond_catch.beg = s;
1299				catcher.re.cond_catch.cont = cont;
1300				catcher.next = rex->next;
1301				r = parse(env, q, &catcher, s);
1302				if (r == BAD || catcher.re.cond_catch.yes)
1303					return r;
1304			}
1305			else if (!rex->re.group.size || rex->re.group.size > 0 && env->match[rex->re.group.size].rm_so >= 0)
1306				r = GOOD;
1307			else
1308				r = NONE;
1309			if (q = catcher.re.cond_catch.next[r != NONE])
1310			{
1311				catcher.type = REX_CAT;
1312				catcher.flags = q->flags;
1313				catcher.serial = q->serial;
1314				catcher.re.group_catch.cont = cont;
1315				catcher.next = rex->next;
1316				return parse(env, q, &catcher, s);
1317			}
1318			return follow(env, rex, cont, s);
1319		case REX_GROUP_COND_CATCH:
1320			rex->re.cond_catch.yes = 1;
1321			catcher.type = REX_CAT;
1322			catcher.flags = rex->flags;
1323			catcher.serial = rex->serial;
1324			catcher.re.group_catch.cont = rex->re.cond_catch.cont;
1325			catcher.next = rex->next;
1326			return parse(env, rex->re.cond_catch.next[1], &catcher, rex->re.cond_catch.beg);
1327		case REX_CAT:
1328			return follow(env, rex, rex->re.group_catch.cont, s);
1329		case REX_GROUP_CUT:
1330			catcher.type = REX_GROUP_CUT_CATCH;
1331			catcher.flags = rex->flags;
1332			catcher.serial = rex->serial;
1333			catcher.re.group_catch.cont = cont;
1334			catcher.next = rex->next;
1335			return parse(env, rex->re.group.expr.rex, &catcher, s);
1336		case REX_GROUP_CUT_CATCH:
1337			switch (r = follow(env, rex, rex->re.group_catch.cont, s))
1338			{
1339			case GOOD:
1340				r = BEST;
1341				break;
1342			case NONE:
1343				r = CUT;
1344				break;
1345			}
1346			return r;
1347		case REX_KMP:
1348			f = rex->re.string.fail;
1349			b = rex->re.string.base;
1350			n = rex->re.string.size;
1351			t = s;
1352			e = env->end;
1353			if (p = rex->map)
1354			{
1355				while (t + n <= e)
1356				{
1357					for (i = -1; t < e; t++)
1358					{
1359						while (i >= 0 && b[i+1] != p[*t])
1360							i = f[i];
1361						if (b[i+1] == p[*t])
1362							i++;
1363						if (i + 1 == n)
1364						{
1365							t++;
1366							if (env->stack)
1367								env->best[0].rm_so = t - s - n;
1368							switch (follow(env, rex, cont, t))
1369							{
1370							case BAD:
1371								return BAD;
1372							case CUT:
1373								return CUT;
1374							case BEST:
1375							case GOOD:
1376								return BEST;
1377							}
1378							t -= n - 1;
1379							break;
1380						}
1381					}
1382				}
1383			}
1384			else
1385			{
1386				while (t + n <= e)
1387				{
1388					for (i = -1; t < e; t++)
1389					{
1390						while (i >= 0 && b[i+1] != *t)
1391							i = f[i];
1392						if (b[i+1] == *t)
1393							i++;
1394						if (i + 1 == n)
1395						{
1396							t++;
1397							if (env->stack)
1398								env->best[0].rm_so = t - s - n;
1399							switch (follow(env, rex, cont, t))
1400							{
1401							case BAD:
1402								return BAD;
1403							case CUT:
1404								return CUT;
1405							case BEST:
1406							case GOOD:
1407								return BEST;
1408							}
1409							t -= n - 1;
1410							break;
1411						}
1412					}
1413				}
1414			}
1415			return NONE;
1416		case REX_NEG:
1417			if (LEADING(env, rex, s))
1418				return NONE;
1419			i = env->end - s;
1420			n = ((i + 7) >> 3) + 1;
1421			catcher.type = REX_NEG_CATCH;
1422			catcher.re.neg_catch.beg = s;
1423			if (!(p = (unsigned char*)stkpush(stkstd, n)))
1424				return BAD;
1425			memset(catcher.re.neg_catch.index = p, 0, n);
1426			catcher.next = rex->next;
1427			if (parse(env, rex->re.group.expr.rex, &catcher, s) == BAD)
1428				r = BAD;
1429			else
1430			{
1431				r = NONE;
1432				for (; i >= 0; i--)
1433					if (!bittst(p, i))
1434					{
1435						switch (follow(env, rex, cont, s + i))
1436						{
1437						case BAD:
1438							r = BAD;
1439							break;
1440						case BEST:
1441							r = BEST;
1442							break;
1443						case CUT:
1444							r = CUT;
1445							break;
1446						case GOOD:
1447							r = GOOD;
1448							/*FALLTHROUGH*/
1449						default:
1450							continue;
1451						}
1452						break;
1453					}
1454			}
1455			stkpop(stkstd);
1456			return r;
1457		case REX_NEG_CATCH:
1458			bitset(rex->re.neg_catch.index, s - rex->re.neg_catch.beg);
1459			return NONE;
1460		case REX_NEST:
1461			if (s >= env->end)
1462				return NONE;
1463			do
1464			{
1465				if ((c = *s++) == rex->re.nest.primary)
1466				{
1467					if (s >= env->end || !(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1468						return NONE;
1469					break;
1470				}
1471				if (rex->re.nest.primary >= 0)
1472					return NONE;
1473			    	if (rex->re.nest.type[c] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
1474					break;
1475			    	if (!(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1476					return NONE;
1477			} while (s < env->end && !(rex->re.nest.type[*(s-1)] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)));
1478			break;
1479		case REX_NULL:
1480			break;
1481		case REX_ONECHAR:
1482			n = rex->hi;
1483			if (n > env->end - s)
1484				n = env->end - s;
1485			m = rex->lo;
1486			if (m > n)
1487				return NONE;
1488			r = NONE;
1489			c = rex->re.onechar;
1490			if (!(rex->flags & REG_MINIMAL))
1491			{
1492				if (!mbwide())
1493				{
1494					if (p = rex->map)
1495					{
1496						for (i = 0; i < n; i++, s++)
1497							if (p[*s] != c)
1498								break;
1499					}
1500					else
1501					{
1502						for (i = 0; i < n; i++, s++)
1503							if (*s != c)
1504								break;
1505					}
1506					for (; i-- >= m; s--)
1507						switch (follow(env, rex, cont, s))
1508						{
1509						case BAD:
1510							return BAD;
1511						case BEST:
1512							return BEST;
1513						case CUT:
1514							return CUT;
1515						case GOOD:
1516							r = GOOD;
1517							break;
1518						}
1519				}
1520				else
1521				{
1522					if (!(b = (unsigned char*)stkpush(stkstd, n)))
1523					{
1524						env->error = REG_ESPACE;
1525						return BAD;
1526					}
1527					e = env->end;
1528					if (!(rex->flags & REG_ICASE))
1529					{
1530						for (i = 0; s < e && i < n; i++, s = t)
1531						{
1532							t = s;
1533							if (mbchar(t) != c)
1534								break;
1535							b[i] = t - s;
1536						}
1537					}
1538					else
1539					{
1540						for (i = 0; s < e && i < n; i++, s = t)
1541						{
1542							t = s;
1543							if (towupper(mbchar(t)) != c)
1544								break;
1545							b[i] = t - s;
1546						}
1547					}
1548					for (; i-- >= m; s -= b[i])
1549						switch (follow(env, rex, cont, s))
1550						{
1551						case BAD:
1552							stkpop(stkstd);
1553							return BAD;
1554						case BEST:
1555							stkpop(stkstd);
1556							return BEST;
1557						case CUT:
1558							stkpop(stkstd);
1559							return CUT;
1560						case GOOD:
1561							r = GOOD;
1562							break;
1563						}
1564					stkpop(stkstd);
1565				}
1566			}
1567			else
1568			{
1569				if (!mbwide())
1570				{
1571					e = s + m;
1572					if (p = rex->map)
1573					{
1574						for (; s < e; s++)
1575							if (p[*s] != c)
1576								return r;
1577						e += n - m;
1578						for (;;)
1579						{
1580							switch (follow(env, rex, cont, s))
1581							{
1582							case BAD:
1583								return BAD;
1584							case CUT:
1585								return CUT;
1586							case BEST:
1587							case GOOD:
1588								return BEST;
1589							}
1590							if (s >= e || p[*s++] != c)
1591								break;
1592						}
1593					}
1594					else
1595					{
1596						for (; s < e; s++)
1597							if (*s != c)
1598								return r;
1599						e += n - m;
1600						for (;;)
1601						{
1602							switch (follow(env, rex, cont, s))
1603							{
1604							case BAD:
1605								return BAD;
1606							case CUT:
1607								return CUT;
1608							case BEST:
1609							case GOOD:
1610								return BEST;
1611							}
1612							if (s >= e || *s++ != c)
1613								break;
1614						}
1615					}
1616				}
1617				else
1618				{
1619					if (!(rex->flags & REG_ICASE))
1620					{
1621						for (i = 0; i < m && s < e; i++, s = t)
1622						{
1623							t = s;
1624							if (mbchar(t) != c)
1625								return r;
1626						}
1627						while (i++ <= n)
1628						{
1629							switch (follow(env, rex, cont, s))
1630							{
1631							case BAD:
1632								return BAD;
1633							case CUT:
1634								return CUT;
1635							case BEST:
1636							case GOOD:
1637								return BEST;
1638							}
1639							if (s >= e || mbchar(s) != c)
1640								break;
1641						}
1642					}
1643					else
1644					{
1645						for (i = 0; i < m && s < e; i++, s = t)
1646						{
1647							t = s;
1648							if (towupper(mbchar(t)) != c)
1649								return r;
1650						}
1651						while (i++ <= n)
1652						{
1653							switch (follow(env, rex, cont, s))
1654							{
1655							case BAD:
1656								return BAD;
1657							case CUT:
1658								return CUT;
1659							case BEST:
1660							case GOOD:
1661								return BEST;
1662							}
1663							if (s >= e || towupper(mbchar(s)) != c)
1664								break;
1665						}
1666					}
1667				}
1668			}
1669			return r;
1670		case REX_REP:
1671			if (env->stack && pospush(env, rex, s, BEG_REP))
1672				return BAD;
1673			r = parserep(env, rex, cont, s, 0);
1674			if (env->stack)
1675				pospop(env);
1676			return r;
1677		case REX_REP_CATCH:
1678			DEBUG_TEST(0x0020,(sfprintf(sfstdout, "AHA#%04d 0x%04x %s n %d len %d s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.rep_catch.n, s - rex->re.rep_catch.beg, env->end - s, s)),(0));
1679			if (env->stack && pospush(env, rex, s, END_ANY))
1680				return BAD;
1681			if (s == rex->re.rep_catch.beg && rex->re.rep_catch.n > rex->re.rep_catch.ref->lo)
1682			{
1683				/*
1684				 * optional empty iteration
1685				 */
1686
1687DEBUG_TEST(0x0002,(sfprintf(sfstdout, "AHA#%04d %p re.group.back=%d re.group.expr.rex=%s\n", __LINE__, rex->re.rep_catch.ref->re.group.expr.rex, rex->re.rep_catch.ref->re.group.expr.rex->re.group.back, rexname(rex->re.rep_catch.ref->re.group.expr.rex))),(0));
1688				if (!env->stack || s != rex->re.rep_catch.ref->re.rep_catch.beg && !rex->re.rep_catch.ref->re.group.expr.rex->re.group.back)
1689					r = NONE;
1690				else if (pospush(env, rex, s, END_ANY))
1691					r = BAD;
1692				else
1693				{
1694					r = follow(env, rex, rex->re.rep_catch.cont, s);
1695					pospop(env);
1696				}
1697			}
1698			else
1699				r = parserep(env, rex->re.rep_catch.ref, rex->re.rep_catch.cont, s, rex->re.rep_catch.n);
1700			if (env->stack)
1701				pospop(env);
1702			return r;
1703		case REX_STRING:
1704DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s \"%-.*s\" `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.string.size, rex->re.string.base, env->end - s, s)),(0));
1705			if (rex->re.string.size > (env->end - s))
1706				return NONE;
1707			t = rex->re.string.base;
1708			e = t + rex->re.string.size;
1709			if (!(p = rex->map))
1710			{
1711				while (t < e)
1712					if (*s++ != *t++)
1713						return NONE;
1714			}
1715			else if (!mbwide())
1716			{
1717				while (t < e)
1718					if (p[*s++] != *t++)
1719						return NONE;
1720			}
1721			else
1722			{
1723				while (t < e)
1724				{
1725					c = mbchar(s);
1726					d = mbchar(t);
1727					if (towupper(c) != d)
1728						return NONE;
1729				}
1730			}
1731			break;
1732		case REX_TRIE:
1733			if (((s + rex->re.trie.min) > env->end) || !(x = rex->re.trie.root[rex->map ? rex->map[*s] : *s]))
1734				return NONE;
1735			return parsetrie(env, x, rex, cont, s);
1736		case REX_EXEC:
1737			u = 0;
1738			r = (*env->disc->re_execf)(env->regex, rex->re.exec.data, rex->re.exec.text, rex->re.exec.size, (const char*)s, env->end - s, &u, env->disc);
1739			e = (unsigned char*)u;
1740			if (e >= s && e <= env->end)
1741				s = e;
1742			switch (r)
1743			{
1744			case 0:
1745				break;
1746			case REG_NOMATCH:
1747				return NONE;
1748			default:
1749				env->error = r;
1750				return BAD;
1751			}
1752			break;
1753		case REX_WBEG:
1754			if (!isword(*s) || s > env->beg && isword(*(s - 1)))
1755				return NONE;
1756			break;
1757		case REX_WEND:
1758			if (isword(*s) || s > env->beg && !isword(*(s - 1)))
1759				return NONE;
1760			break;
1761		case REX_WORD:
1762			if (s > env->beg && isword(*(s - 1)) == isword(*s))
1763				return NONE;
1764			break;
1765		case REX_WORD_NOT:
1766			if (s == env->beg || isword(*(s - 1)) != isword(*s))
1767				return NONE;
1768			break;
1769		case REX_BEG_STR:
1770			if (s != env->beg)
1771				return NONE;
1772			break;
1773		case REX_END_STR:
1774			for (t = s; t < env->end && *t == '\n'; t++);
1775			if (t < env->end)
1776				return NONE;
1777			break;
1778		case REX_FIN_STR:
1779			if (s < env->end)
1780				return NONE;
1781			break;
1782		}
1783		if (!(rex = rex->next))
1784		{
1785			if (!(rex = cont))
1786				break;
1787			cont = 0;
1788		}
1789	}
1790	return GOOD;
1791}
1792
1793#if _AST_REGEX_DEBUG
1794
1795static void
1796listnode(Rex_t* e, int level)
1797{
1798	int	i;
1799
1800	if (e)
1801	{
1802		do
1803		{
1804			for (i = 0; i < level; i++)
1805				sfprintf(sfstderr, "  ");
1806			sfprintf(sfstderr, "%s\n", rexname(e));
1807			switch (e->type)
1808			{
1809			case REX_ALT:
1810			case REX_CONJ:
1811				listnode(e->re.group.expr.binary.left, level + 1);
1812				listnode(e->re.group.expr.binary.right, level + 1);
1813				break;
1814			case REX_GROUP:
1815			case REX_GROUP_AHEAD:
1816			case REX_GROUP_AHEAD_NOT:
1817			case REX_GROUP_BEHIND:
1818			case REX_GROUP_BEHIND_NOT:
1819			case REX_GROUP_CUT:
1820			case REX_NEG:
1821			case REX_REP:
1822				listnode(e->re.group.expr.rex, level + 1);
1823				break;
1824			}
1825		} while (e = e->next);
1826	}
1827}
1828
1829static int
1830list(Env_t* env, Rex_t* rex)
1831{
1832	sfprintf(sfstderr, "AHA regex hard=%d stack=%p\n", env->hard, env->stack);
1833	if (rex)
1834		listnode(rex, 1);
1835	return 0;
1836}
1837
1838#endif
1839
1840/*
1841 * returning REG_BADPAT or REG_ESPACE is not explicitly
1842 * countenanced by the standard
1843 */
1844
1845int
1846regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags)
1847{
1848	register int	n;
1849	register int	i;
1850	int		j;
1851	int		k;
1852	int		m;
1853	int		advance;
1854	Env_t*		env;
1855	Rex_t*		e;
1856
1857	DEBUG_INIT();
1858	DEBUG_TEST(0x0001,(sfprintf(sfstdout, "AHA#%04d 0x%04x regnexec %d 0x%08x `%-.*s'\n", __LINE__, debug_flag, nmatch, flags, len, s)),(0));
1859	if (!p || !(env = p->env))
1860		return REG_BADPAT;
1861	if (!s)
1862		return fatal(env->disc, REG_BADPAT, NiL);
1863	if (len < env->min)
1864	{
1865		DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, env->min)),(0));
1866		return REG_NOMATCH;
1867	}
1868	env->regex = p;
1869	env->beg = (unsigned char*)s;
1870	env->end = env->beg + len;
1871	stknew(stkstd, &env->stk);
1872	env->flags &= ~REG_EXEC;
1873	env->flags |= (flags & REG_EXEC);
1874	advance = 0;
1875	if (env->stack = env->hard || !(env->flags & REG_NOSUB) && nmatch)
1876	{
1877		n = env->nsub;
1878		if (!(env->match = (regmatch_t*)stkpush(stkstd, 2 * (n + 1) * sizeof(regmatch_t))) ||
1879		    !env->pos && !(env->pos = vecopen(16, sizeof(Pos_t))) ||
1880		    !env->bestpos && !(env->bestpos = vecopen(16, sizeof(Pos_t))))
1881		{
1882			k = REG_ESPACE;
1883			goto done;
1884		}
1885		env->pos->cur = env->bestpos->cur = 0;
1886		env->best = &env->match[n + 1];
1887		env->best[0].rm_so = 0;
1888		env->best[0].rm_eo = -1;
1889		for (i = 0; i <= n; i++)
1890			env->match[i] = state.nomatch;
1891		if (flags & REG_ADVANCE)
1892			advance = 1;
1893	}
1894	DEBUG_TEST(0x1000,(list(env,env->rex)),(0));
1895	k = REG_NOMATCH;
1896	if ((e = env->rex)->type == REX_BM)
1897	{
1898		DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM\n", __LINE__)),(0));
1899		if (len < e->re.bm.right)
1900		{
1901			DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, e->re.bm.right)),(0));
1902			goto done;
1903		}
1904		else if (!(flags & REG_LEFT))
1905		{
1906			register unsigned char*	buf = (unsigned char*)s;
1907			register size_t		index = e->re.bm.left + e->re.bm.size;
1908			register size_t		mid = len - e->re.bm.right;
1909			register size_t*	skip = e->re.bm.skip;
1910			register size_t*	fail = e->re.bm.fail;
1911			register Bm_mask_t**	mask = e->re.bm.mask;
1912			Bm_mask_t		m;
1913			size_t			x;
1914
1915			DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM len=%d right=%d left=%d size=%d %d %d\n", __LINE__, len, e->re.bm.right, e->re.bm.left, e->re.bm.size, index, mid)),(0));
1916			for (;;)
1917			{
1918				while ((index += skip[buf[index]]) < mid);
1919				if (index < HIT)
1920				{
1921					DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, index, HIT)),(0));
1922					goto done;
1923				}
1924				index -= HIT;
1925				m = mask[n = e->re.bm.size - 1][buf[index]];
1926				do
1927				{
1928					if (!n--)
1929					{
1930						if (e->re.bm.back < 0)
1931							goto possible;
1932						if (advance)
1933						{
1934							i = index - e->re.bm.back;
1935							s += i;
1936							if (env->stack)
1937								env->best[0].rm_so += i;
1938							goto possible;
1939						}
1940						x = index;
1941						if (index < e->re.bm.back)
1942							index = 0;
1943						else
1944							index -= e->re.bm.back;
1945						while (index <= x)
1946						{
1947							if ((i = parse(env, e->next, &env->done, buf + index)) != NONE)
1948							{
1949								if (env->stack)
1950									env->best[0].rm_so = index;
1951								n = env->nsub;
1952								goto hit;
1953							}
1954							index++;
1955						}
1956						index += e->re.bm.size;
1957						break;
1958					}
1959				} while (m &= mask[n][buf[--index]]);
1960				if ((index += fail[n + 1]) >= len)
1961					goto done;
1962			}
1963		}
1964 possible:
1965		n = env->nsub;
1966		e = e->next;
1967	}
1968	j = env->once || (flags & REG_LEFT);
1969	DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d parse once=%d\n", __LINE__, j)),(0));
1970	while ((i = parse(env, e, &env->done, (unsigned char*)s)) == NONE || advance && !env->best[0].rm_eo && !(advance = 0))
1971	{
1972		if (j)
1973			goto done;
1974		i = MBSIZE(s);
1975		s += i;
1976		if ((unsigned char*)s > env->end - env->min)
1977			goto done;
1978		if (env->stack)
1979			env->best[0].rm_so += i;
1980	}
1981	if ((flags & REG_LEFT) && env->stack && env->best[0].rm_so)
1982		goto done;
1983 hit:
1984	if (k = env->error)
1985		goto done;
1986	if (i == CUT)
1987	{
1988		k = env->error = REG_NOMATCH;
1989		goto done;
1990	}
1991	if (!(env->flags & REG_NOSUB))
1992	{
1993		k = (env->flags & (REG_SHELL|REG_AUGMENTED)) == (REG_SHELL|REG_AUGMENTED);
1994		for (i = j = m = 0; j < nmatch; i++)
1995			if (!i || !k || (i & 1))
1996			{
1997				if (i > n)
1998					match[j] = state.nomatch;
1999				else
2000					match[m = j] = env->best[i];
2001				j++;
2002			}
2003		if (k)
2004		{
2005			while (m > 0 && match[m].rm_so == -1 && match[m].rm_eo == -1)
2006				m--;
2007			((regex_t*)p)->re_nsub = m;
2008		}
2009	}
2010	k = 0;
2011 done:
2012	stkold(stkstd, &env->stk);
2013	env->stk.base = 0;
2014	if (k > REG_NOMATCH)
2015		fatal(p->env->disc, k, NiL);
2016	return k;
2017}
2018
2019void
2020regfree(regex_t* p)
2021{
2022	Env_t*	env;
2023
2024	if (p && (env = p->env))
2025	{
2026#if _REG_subcomp
2027		if (env->sub)
2028		{
2029			regsubfree(p);
2030			p->re_sub = 0;
2031		}
2032#endif
2033		p->env = 0;
2034		if (--env->refs <= 0 && !(env->disc->re_flags & REG_NOFREE))
2035		{
2036			drop(env->disc, env->rex);
2037			if (env->pos)
2038				vecclose(env->pos);
2039			if (env->bestpos)
2040				vecclose(env->bestpos);
2041			if (env->stk.base)
2042				stkold(stkstd, &env->stk);
2043			alloc(env->disc, env, 0);
2044		}
2045	}
2046}
2047