1da2e3ebchin/***********************************************************************
2da2e3ebchin*                                                                      *
3da2e3ebchin*               This software is part of the ast package               *
43e14f97Roger A. Faulkner*          Copyright (c) 1985-2010 AT&T Intellectual Property          *
5da2e3ebchin*                      and is licensed under the                       *
6da2e3ebchin*                  Common Public License, Version 1.0                  *
77c2fbfbApril Chin*                    by AT&T Intellectual Property                     *
8da2e3ebchin*                                                                      *
9da2e3ebchin*                A copy of the License is available at                 *
10da2e3ebchin*            http://www.opensource.org/licenses/cpl1.0.txt             *
11da2e3ebchin*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12da2e3ebchin*                                                                      *
13da2e3ebchin*              Information and Software Systems Research               *
14da2e3ebchin*                            AT&T Research                             *
15da2e3ebchin*                           Florham Park NJ                            *
16da2e3ebchin*                                                                      *
17da2e3ebchin*                 Glenn Fowler <gsf@research.att.com>                  *
18da2e3ebchin*                  David Korn <dgk@research.att.com>                   *
19da2e3ebchin*                   Phong Vo <kpv@research.att.com>                    *
20da2e3ebchin*                                                                      *
21da2e3ebchin***********************************************************************/
22da2e3ebchin#pragma prototyped
23da2e3ebchin
24da2e3ebchin/*
25da2e3ebchin * posix regex decompiler
26da2e3ebchin */
27da2e3ebchin
28da2e3ebchin#include "reglib.h"
29da2e3ebchin
30da2e3ebchin#undef	ismeta
31da2e3ebchin#define ismeta(c,t,e,d)	(state.magic[c] && state.magic[c][(t)+(e)] >= T_META || (c) == (d))
32da2e3ebchin#define meta(f,c,t,e,d)	do { if (ismeta(c,t,e,d)) sfputc(f, '\\'); sfputc(f, c); } while (0)
33da2e3ebchin
34da2e3ebchinstatic void
35da2e3ebchindetrie(Trie_node_t* x, Sfio_t* sp, char* b, char* p, char* e, int delimiter)
36da2e3ebchin{
37da2e3ebchin	register Trie_node_t*	y;
38da2e3ebchin	char*			o;
39da2e3ebchin	int			k;
40da2e3ebchin
41da2e3ebchin	o = p;
42da2e3ebchin	k = 1;
43da2e3ebchin	do
44da2e3ebchin	{
45da2e3ebchin		if (k)
46da2e3ebchin		{
47da2e3ebchin			o = p;
48da2e3ebchin			if (p < e)
49da2e3ebchin				*p++ = x->c;
50da2e3ebchin		}
51da2e3ebchin		sfputc(sp, x->c);
52da2e3ebchin		for (y = x->sib; y; y = y->sib)
53da2e3ebchin		{
54da2e3ebchin			sfputc(sp, '|');
55da2e3ebchin			sfputc(sp, '<');
56da2e3ebchin			sfwrite(sp, b, p - b);
57da2e3ebchin			sfputc(sp, '>');
58da2e3ebchin			detrie(y, sp, b, p, e, delimiter);
59da2e3ebchin		}
60da2e3ebchin		if (x->end && x->son)
61da2e3ebchin		{
62da2e3ebchin			sfputc(sp, '|');
63da2e3ebchin			sfputc(sp, '{');
64da2e3ebchin			sfwrite(sp, b, p - b);
65da2e3ebchin			sfputc(sp, '}');
66da2e3ebchin			p = o;
67da2e3ebchin		}
68da2e3ebchin	} while (x = x->son);
69da2e3ebchin}
70da2e3ebchin
71da2e3ebchinstatic int
72da2e3ebchindecomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags)
73da2e3ebchin{
74da2e3ebchin	Rex_t*		q;
75da2e3ebchin	unsigned char*	s;
76da2e3ebchin	unsigned char*	t;
77da2e3ebchin	int		c;
783e14f97Roger A. Faulkner	int		m;
79da2e3ebchin	int		cb;
80da2e3ebchin	int		cd;
81da2e3ebchin	int		cr;
82da2e3ebchin	int		ib;
83da2e3ebchin	int		ie;
84da2e3ebchin	int		nb;
85da2e3ebchin	int		ne;
86da2e3ebchin	unsigned char	ic[2*UCHAR_MAX];
87da2e3ebchin	unsigned char	nc[2*UCHAR_MAX];
88da2e3ebchin
89da2e3ebchin	do
90da2e3ebchin	{
91da2e3ebchin		switch (e->type)
92da2e3ebchin		{
93da2e3ebchin		case REX_ALT:
94da2e3ebchin			if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
95da2e3ebchin				return 1;
96da2e3ebchin			sfputc(sp, '|');
97da2e3ebchin			if (e->re.group.expr.binary.right && decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags))
98da2e3ebchin				return 1;
99da2e3ebchin			break;
100da2e3ebchin		case REX_BACK:
101da2e3ebchin			sfprintf(sp, "\\%d", e->lo);
102da2e3ebchin			break;
103da2e3ebchin		case REX_BEG:
104da2e3ebchin			if (type < SRE)
105da2e3ebchin				sfputc(sp, '^');
106da2e3ebchin			break;
107da2e3ebchin		case REX_END:
108da2e3ebchin			if (type < SRE)
109da2e3ebchin				sfputc(sp, '$');
110da2e3ebchin			break;
111da2e3ebchin		case REX_WBEG:
112da2e3ebchin			meta(sp, '<', type, 1, delimiter);
113da2e3ebchin			break;
114da2e3ebchin		case REX_WEND:
115da2e3ebchin			meta(sp, '<', type, 1, delimiter);
116da2e3ebchin			break;
117da2e3ebchin		case REX_WORD:
118da2e3ebchin			sfprintf(sp, "\\w");
119da2e3ebchin			break;
120da2e3ebchin		case REX_CLASS:
121da2e3ebchin		case REX_COLL_CLASS:
122da2e3ebchin		case REX_ONECHAR:
123da2e3ebchin		case REX_DOT:
124da2e3ebchin		case REX_REP:
125da2e3ebchin			if (type >= SRE)
126da2e3ebchin			{
127da2e3ebchin				c = ')';
128da2e3ebchin				if (e->hi == RE_DUP_INF)
129da2e3ebchin				{
130da2e3ebchin					if (!e->lo)
131da2e3ebchin						sfputc(sp, '*');
132da2e3ebchin					else if (e->lo == 1)
133da2e3ebchin						sfputc(sp, '+');
134da2e3ebchin					else
135da2e3ebchin						sfprintf(sp, "{%d,}", e->lo);
136da2e3ebchin				}
137da2e3ebchin				else if (e->hi != 1)
138da2e3ebchin					sfprintf(sp, "{%d,%d}", e->lo, e->hi);
139da2e3ebchin				else if (e->lo == 0)
140da2e3ebchin					sfputc(sp, '?');
141da2e3ebchin				else
142da2e3ebchin					c = 0;
143da2e3ebchin			}
144da2e3ebchin			switch (e->type)
145da2e3ebchin			{
146da2e3ebchin			case REX_REP:
147da2e3ebchin				if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
148da2e3ebchin					return 1;
149da2e3ebchin				break;
150da2e3ebchin			case REX_CLASS:
151da2e3ebchin				sfputc(sp, '[');
152da2e3ebchin				nb = ne = ib = ie = -2;
153da2e3ebchin				cb = cd = cr = 0;
154da2e3ebchin				s = nc;
155da2e3ebchin				t = ic;
1563e14f97Roger A. Faulkner				for (m = 0; m <= UCHAR_MAX; m++)
1573e14f97Roger A. Faulkner					if (settst(e->re.charclass, m))
158da2e3ebchin					{
1593e14f97Roger A. Faulkner						if (m == ']')
160da2e3ebchin							cb = 1;
1613e14f97Roger A. Faulkner						else if (m == '-')
162da2e3ebchin							cr = 1;
1633e14f97Roger A. Faulkner						else if (m == delimiter)
164da2e3ebchin							cd = 1;
165da2e3ebchin						else if (nb < 0)
1663e14f97Roger A. Faulkner							ne = nb = m;
1673e14f97Roger A. Faulkner						else if (ne == (m - 1))
1683e14f97Roger A. Faulkner							ne = m;
169da2e3ebchin						else
170da2e3ebchin						{
171da2e3ebchin							if (ne == nb)
172da2e3ebchin								*s++ = ne;
173da2e3ebchin							else
174da2e3ebchin							{
175da2e3ebchin								*s++ = nb;
176da2e3ebchin								*s++ = '-';
177da2e3ebchin								*s++ = ne;
178da2e3ebchin							}
1793e14f97Roger A. Faulkner							ne = nb = m;
180da2e3ebchin						}
181da2e3ebchin					}
182da2e3ebchin					else
183da2e3ebchin					{
1843e14f97Roger A. Faulkner						if (m == ']')
185da2e3ebchin							cb = -1;
1863e14f97Roger A. Faulkner						else if (m == '-')
187da2e3ebchin							cr = -1;
1883e14f97Roger A. Faulkner						else if (m == delimiter)
189da2e3ebchin							cd = -1;
190da2e3ebchin						else if (ib < 0)
1913e14f97Roger A. Faulkner							ie = ib = m;
1923e14f97Roger A. Faulkner						else if (ie == (m - 1))
1933e14f97Roger A. Faulkner							ie = m;
194da2e3ebchin						else
195da2e3ebchin						{
196da2e3ebchin							if (ie == ib)
197da2e3ebchin								*t++ = ie;
198da2e3ebchin							else
199da2e3ebchin							{
200da2e3ebchin								*t++ = ib;
201da2e3ebchin								*t++ = '-';
202da2e3ebchin								*t++ = ie;
203da2e3ebchin							}
2043e14f97Roger A. Faulkner							ie = ib = m;
205da2e3ebchin						}
206da2e3ebchin					}
207da2e3ebchin				if (nb >= 0)
208da2e3ebchin				{
209da2e3ebchin					*s++ = nb;
210da2e3ebchin					if (ne != nb)
211da2e3ebchin					{
212da2e3ebchin						*s++ = '-';
213da2e3ebchin						*s++ = ne;
214da2e3ebchin					}
215da2e3ebchin				}
216da2e3ebchin				if (ib >= 0)
217da2e3ebchin				{
218da2e3ebchin					*t++ = ib;
219da2e3ebchin					if (ie != ib)
220da2e3ebchin					{
221da2e3ebchin						*t++ = '-';
222da2e3ebchin						*t++ = ie;
223da2e3ebchin					}
224da2e3ebchin				}
225da2e3ebchin				if ((t - ic + 1) < (s - nc + (nc[0] == '^')))
226da2e3ebchin				{
227da2e3ebchin					sfputc(sp, '^');
228da2e3ebchin					if (cb < 0)
229da2e3ebchin						sfputc(sp, ']');
230da2e3ebchin					if (cr < 0)
231da2e3ebchin						sfputc(sp, '-');
2323e14f97Roger A. Faulkner					if (cd < 0 && delimiter > 0)
233da2e3ebchin					{
234da2e3ebchin						if (flags & REG_ESCAPE)
235da2e3ebchin							sfputc(sp, '\\');
236da2e3ebchin						sfputc(sp, delimiter);
237da2e3ebchin					}
238da2e3ebchin					sfwrite(sp, ic, t - ic);
239da2e3ebchin				}
240da2e3ebchin				else
241da2e3ebchin				{
242da2e3ebchin					if (cb > 0)
243da2e3ebchin						sfputc(sp, ']');
244da2e3ebchin					if (cr > 0)
245da2e3ebchin						sfputc(sp, '-');
2463e14f97Roger A. Faulkner					if (cd > 0 && delimiter > 0)
247da2e3ebchin					{
248da2e3ebchin						if (flags & REG_ESCAPE)
249da2e3ebchin							sfputc(sp, '\\');
250da2e3ebchin						sfputc(sp, delimiter);
251da2e3ebchin					}
252da2e3ebchin					if (nc[0] == '^')
253da2e3ebchin					{
254da2e3ebchin						sfwrite(sp, nc + 1, s - nc - 1);
255da2e3ebchin						sfputc(sp, '^');
256da2e3ebchin					}
257da2e3ebchin					else
258da2e3ebchin						sfwrite(sp, nc, s - nc);
259da2e3ebchin				}
260da2e3ebchin				sfputc(sp, ']');
261da2e3ebchin				break;
262da2e3ebchin			case REX_COLL_CLASS:
263da2e3ebchin				break;
264da2e3ebchin			case REX_ONECHAR:
265da2e3ebchin				meta(sp, e->re.onechar, type, 0, delimiter);
266da2e3ebchin				break;
267da2e3ebchin			case REX_DOT:
268da2e3ebchin				sfputc(sp, '.');
269da2e3ebchin				break;
270da2e3ebchin			}
271da2e3ebchin			if (type < SRE)
272da2e3ebchin			{
273da2e3ebchin				if (e->hi == RE_DUP_INF)
274da2e3ebchin				{
275da2e3ebchin					if (!e->lo)
276da2e3ebchin						sfputc(sp, '*');
277da2e3ebchin					else if (e->lo == 1 && ismeta('+', type, 0, delimiter))
278da2e3ebchin						meta(sp, '+', type, 1, delimiter);
279da2e3ebchin					else
280da2e3ebchin					{
281da2e3ebchin						meta(sp, '{', type, 1, delimiter);
282da2e3ebchin						sfprintf(sp, "%d,", e->lo);
283da2e3ebchin						meta(sp, '}', type, 1, delimiter);
284da2e3ebchin					}
285da2e3ebchin				}
286da2e3ebchin				else if (e->hi != 1 || e->lo == 0 && !ismeta('?', type, 0, delimiter))
287da2e3ebchin				{
288da2e3ebchin					meta(sp, '{', type, 1, delimiter);
289da2e3ebchin					sfprintf(sp, "%d,%d", e->lo, e->hi);
290da2e3ebchin					meta(sp, '}', type, 1, delimiter);
291da2e3ebchin				}
292da2e3ebchin				else if (e->lo == 0)
293da2e3ebchin					meta(sp, '?', type, 1, delimiter);
294da2e3ebchin			}
295da2e3ebchin			else if (c)
296da2e3ebchin				sfputc(sp, c);
297da2e3ebchin			break;
298da2e3ebchin		case REX_STRING:
299da2e3ebchin		case REX_KMP:
300da2e3ebchin			t = (s = e->re.string.base) + e->re.string.size;
301da2e3ebchin			while (s < t)
302da2e3ebchin			{
303da2e3ebchin				c = *s++;
304da2e3ebchin				meta(sp, c, type, 0, delimiter);
305da2e3ebchin			}
306da2e3ebchin			break;
307da2e3ebchin		case REX_TRIE:
308da2e3ebchin			ib = 0;
309da2e3ebchin			for (c = 0; c <= UCHAR_MAX; c++)
310da2e3ebchin				if (e->re.trie.root[c])
311da2e3ebchin				{
312da2e3ebchin					char	pfx[1024];
313da2e3ebchin
314da2e3ebchin					if (ib)
315da2e3ebchin						sfputc(sp, '|');
316da2e3ebchin					else
317da2e3ebchin						ib = 1;
318da2e3ebchin					detrie(e->re.trie.root[c], sp, pfx, pfx, &pfx[sizeof(pfx)], delimiter);
319da2e3ebchin				}
320da2e3ebchin			break;
321da2e3ebchin		case REX_NEG:
322da2e3ebchin			if (type >= SRE)
323da2e3ebchin				sfprintf(sp, "!(");
324da2e3ebchin			if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
325da2e3ebchin				return 1;
326da2e3ebchin			if (type >= SRE)
327da2e3ebchin				sfputc(sp, ')');
328da2e3ebchin			else
329da2e3ebchin				sfputc(sp, '!');
330da2e3ebchin			break;
331da2e3ebchin		case REX_CONJ:
332da2e3ebchin			if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
333da2e3ebchin				return 1;
334da2e3ebchin			sfputc(sp, '&');
335da2e3ebchin			if (decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags))
336da2e3ebchin				return 1;
337da2e3ebchin			break;
338da2e3ebchin		case REX_GROUP:
339da2e3ebchin			if (type >= SRE)
340da2e3ebchin				sfputc(sp, '@');
341da2e3ebchin			meta(sp, '(', type, 1, delimiter);
342da2e3ebchin			if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
343da2e3ebchin				return 1;
344da2e3ebchin			meta(sp, ')', type, 1, delimiter);
345da2e3ebchin			break;
346da2e3ebchin		case REX_GROUP_AHEAD:
347da2e3ebchin		case REX_GROUP_AHEAD_NOT:
348da2e3ebchin		case REX_GROUP_BEHIND:
349da2e3ebchin		case REX_GROUP_BEHIND_NOT:
350da2e3ebchin			meta(sp, '(', type, 1, delimiter);
351da2e3ebchin			sfputc(sp, '?');
352da2e3ebchin			if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
353da2e3ebchin				return 1;
354da2e3ebchin			meta(sp, ')', type, 1, delimiter);
355da2e3ebchin			break;
356da2e3ebchin		case REX_GROUP_COND:
357da2e3ebchin			meta(sp, '(', type, 1, delimiter);
358da2e3ebchin			sfputc(sp, '?');
359da2e3ebchin			if (e->re.group.expr.binary.left && decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
360da2e3ebchin				return 1;
361da2e3ebchin			if (q = e->re.group.expr.binary.right)
362da2e3ebchin			{
363da2e3ebchin				sfputc(sp, ':');
364da2e3ebchin				if (q->re.group.expr.binary.left && decomp(q->re.group.expr.binary.left, sp, type, delimiter, flags))
365da2e3ebchin					return 1;
366da2e3ebchin				sfputc(sp, ':');
367da2e3ebchin				if (q->re.group.expr.binary.right && decomp(q->re.group.expr.binary.right, sp, type, delimiter, flags))
368da2e3ebchin					return 1;
369da2e3ebchin			}
370da2e3ebchin			meta(sp, ')', type, 1, delimiter);
371da2e3ebchin			break;
372da2e3ebchin		case REX_GROUP_CUT:
373da2e3ebchin			meta(sp, '(', type, 1, delimiter);
374da2e3ebchin			sfputc(sp, '?');
375da2e3ebchin			if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
376da2e3ebchin				return 1;
377da2e3ebchin			meta(sp, ')', type, 1, delimiter);
378da2e3ebchin			break;
379da2e3ebchin		case REX_BM:
380da2e3ebchin			break;
381da2e3ebchin		default:
382da2e3ebchin			sfprintf(sp, "<ERROR:REX_%d>", e->type);
383da2e3ebchin			break;
384da2e3ebchin		}
385da2e3ebchin	} while (e = e->next);
386da2e3ebchin	return 0;
387da2e3ebchin}
388da2e3ebchin
389da2e3ebchin/*
390da2e3ebchin * reconstruct pattern from compiled re p into sp
391da2e3ebchin */
392da2e3ebchin
393da2e3ebchinsize_t
394da2e3ebchinregdecomp(regex_t* p, regflags_t flags, char* buf, size_t n)
395da2e3ebchin{
396da2e3ebchin	Sfio_t*		sp;
397da2e3ebchin	char*		s;
398da2e3ebchin	int		type;
399da2e3ebchin	int		delimiter;
400da2e3ebchin	size_t		r;
401da2e3ebchin
402da2e3ebchin	if (!(sp = sfstropen()))
403da2e3ebchin		return 0;
4043e14f97Roger A. Faulkner	if (flags == (regflags_t)~0)
405da2e3ebchin		flags = p->env->flags;
406da2e3ebchin	switch (flags & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL))
407da2e3ebchin	{
408da2e3ebchin	case 0:
409da2e3ebchin		type = BRE;
410da2e3ebchin		break;
411da2e3ebchin	case REG_AUGMENTED:
412da2e3ebchin	case REG_AUGMENTED|REG_EXTENDED:
413da2e3ebchin		type = ARE;
414da2e3ebchin		break;
415da2e3ebchin	case REG_EXTENDED:
416da2e3ebchin		type = ERE;
417da2e3ebchin		break;
418da2e3ebchin	case REG_SHELL:
419da2e3ebchin		type = SRE;
420da2e3ebchin		break;
421da2e3ebchin	default:
422da2e3ebchin		type = KRE;
423da2e3ebchin		break;
424da2e3ebchin	}
425da2e3ebchin	if (flags & REG_DELIMITED)
426da2e3ebchin	{
427da2e3ebchin		delimiter = '/';
428da2e3ebchin		sfputc(sp, delimiter);
429da2e3ebchin	}
430da2e3ebchin	else
4313e14f97Roger A. Faulkner		delimiter = -1;
432da2e3ebchin	if (decomp(p->env->rex, sp, type, delimiter, flags))
433da2e3ebchin		r = 0;
434da2e3ebchin	else
435da2e3ebchin	{
4363e14f97Roger A. Faulkner		if (delimiter > 0)
437da2e3ebchin			sfputc(sp, delimiter);
438da2e3ebchin		if ((r = sfstrtell(sp) + 1) <= n)
439da2e3ebchin		{
440da2e3ebchin			if (!(s = sfstruse(sp)))
441da2e3ebchin				r = 0;
442da2e3ebchin			else
443da2e3ebchin				memcpy(buf, s, r);
444da2e3ebchin		}
445da2e3ebchin	}
446da2e3ebchin	sfstrclose(sp);
447da2e3ebchin	return r;
448da2e3ebchin}
449