1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1985-2010 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                  Common Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*            http://www.opensource.org/licenses/cpl1.0.txt             *
11*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                   Phong Vo <kpv@research.att.com>                    *
20*                                                                      *
21***********************************************************************/
22#pragma prototyped
23
24/*
25 * posix regex decompiler
26 */
27
28#include "reglib.h"
29
30#undef	ismeta
31#define ismeta(c,t,e,d)	(state.magic[c] && state.magic[c][(t)+(e)] >= T_META || (c) == (d))
32#define meta(f,c,t,e,d)	do { if (ismeta(c,t,e,d)) sfputc(f, '\\'); sfputc(f, c); } while (0)
33
34static void
35detrie(Trie_node_t* x, Sfio_t* sp, char* b, char* p, char* e, int delimiter)
36{
37	register Trie_node_t*	y;
38	char*			o;
39	int			k;
40
41	o = p;
42	k = 1;
43	do
44	{
45		if (k)
46		{
47			o = p;
48			if (p < e)
49				*p++ = x->c;
50		}
51		sfputc(sp, x->c);
52		for (y = x->sib; y; y = y->sib)
53		{
54			sfputc(sp, '|');
55			sfputc(sp, '<');
56			sfwrite(sp, b, p - b);
57			sfputc(sp, '>');
58			detrie(y, sp, b, p, e, delimiter);
59		}
60		if (x->end && x->son)
61		{
62			sfputc(sp, '|');
63			sfputc(sp, '{');
64			sfwrite(sp, b, p - b);
65			sfputc(sp, '}');
66			p = o;
67		}
68	} while (x = x->son);
69}
70
71static int
72decomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags)
73{
74	Rex_t*		q;
75	unsigned char*	s;
76	unsigned char*	t;
77	int		c;
78	int		m;
79	int		cb;
80	int		cd;
81	int		cr;
82	int		ib;
83	int		ie;
84	int		nb;
85	int		ne;
86	unsigned char	ic[2*UCHAR_MAX];
87	unsigned char	nc[2*UCHAR_MAX];
88
89	do
90	{
91		switch (e->type)
92		{
93		case REX_ALT:
94			if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
95				return 1;
96			sfputc(sp, '|');
97			if (e->re.group.expr.binary.right && decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags))
98				return 1;
99			break;
100		case REX_BACK:
101			sfprintf(sp, "\\%d", e->lo);
102			break;
103		case REX_BEG:
104			if (type < SRE)
105				sfputc(sp, '^');
106			break;
107		case REX_END:
108			if (type < SRE)
109				sfputc(sp, '$');
110			break;
111		case REX_WBEG:
112			meta(sp, '<', type, 1, delimiter);
113			break;
114		case REX_WEND:
115			meta(sp, '<', type, 1, delimiter);
116			break;
117		case REX_WORD:
118			sfprintf(sp, "\\w");
119			break;
120		case REX_CLASS:
121		case REX_COLL_CLASS:
122		case REX_ONECHAR:
123		case REX_DOT:
124		case REX_REP:
125			if (type >= SRE)
126			{
127				c = ')';
128				if (e->hi == RE_DUP_INF)
129				{
130					if (!e->lo)
131						sfputc(sp, '*');
132					else if (e->lo == 1)
133						sfputc(sp, '+');
134					else
135						sfprintf(sp, "{%d,}", e->lo);
136				}
137				else if (e->hi != 1)
138					sfprintf(sp, "{%d,%d}", e->lo, e->hi);
139				else if (e->lo == 0)
140					sfputc(sp, '?');
141				else
142					c = 0;
143			}
144			switch (e->type)
145			{
146			case REX_REP:
147				if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
148					return 1;
149				break;
150			case REX_CLASS:
151				sfputc(sp, '[');
152				nb = ne = ib = ie = -2;
153				cb = cd = cr = 0;
154				s = nc;
155				t = ic;
156				for (m = 0; m <= UCHAR_MAX; m++)
157					if (settst(e->re.charclass, m))
158					{
159						if (m == ']')
160							cb = 1;
161						else if (m == '-')
162							cr = 1;
163						else if (m == delimiter)
164							cd = 1;
165						else if (nb < 0)
166							ne = nb = m;
167						else if (ne == (m - 1))
168							ne = m;
169						else
170						{
171							if (ne == nb)
172								*s++ = ne;
173							else
174							{
175								*s++ = nb;
176								*s++ = '-';
177								*s++ = ne;
178							}
179							ne = nb = m;
180						}
181					}
182					else
183					{
184						if (m == ']')
185							cb = -1;
186						else if (m == '-')
187							cr = -1;
188						else if (m == delimiter)
189							cd = -1;
190						else if (ib < 0)
191							ie = ib = m;
192						else if (ie == (m - 1))
193							ie = m;
194						else
195						{
196							if (ie == ib)
197								*t++ = ie;
198							else
199							{
200								*t++ = ib;
201								*t++ = '-';
202								*t++ = ie;
203							}
204							ie = ib = m;
205						}
206					}
207				if (nb >= 0)
208				{
209					*s++ = nb;
210					if (ne != nb)
211					{
212						*s++ = '-';
213						*s++ = ne;
214					}
215				}
216				if (ib >= 0)
217				{
218					*t++ = ib;
219					if (ie != ib)
220					{
221						*t++ = '-';
222						*t++ = ie;
223					}
224				}
225				if ((t - ic + 1) < (s - nc + (nc[0] == '^')))
226				{
227					sfputc(sp, '^');
228					if (cb < 0)
229						sfputc(sp, ']');
230					if (cr < 0)
231						sfputc(sp, '-');
232					if (cd < 0 && delimiter > 0)
233					{
234						if (flags & REG_ESCAPE)
235							sfputc(sp, '\\');
236						sfputc(sp, delimiter);
237					}
238					sfwrite(sp, ic, t - ic);
239				}
240				else
241				{
242					if (cb > 0)
243						sfputc(sp, ']');
244					if (cr > 0)
245						sfputc(sp, '-');
246					if (cd > 0 && delimiter > 0)
247					{
248						if (flags & REG_ESCAPE)
249							sfputc(sp, '\\');
250						sfputc(sp, delimiter);
251					}
252					if (nc[0] == '^')
253					{
254						sfwrite(sp, nc + 1, s - nc - 1);
255						sfputc(sp, '^');
256					}
257					else
258						sfwrite(sp, nc, s - nc);
259				}
260				sfputc(sp, ']');
261				break;
262			case REX_COLL_CLASS:
263				break;
264			case REX_ONECHAR:
265				meta(sp, e->re.onechar, type, 0, delimiter);
266				break;
267			case REX_DOT:
268				sfputc(sp, '.');
269				break;
270			}
271			if (type < SRE)
272			{
273				if (e->hi == RE_DUP_INF)
274				{
275					if (!e->lo)
276						sfputc(sp, '*');
277					else if (e->lo == 1 && ismeta('+', type, 0, delimiter))
278						meta(sp, '+', type, 1, delimiter);
279					else
280					{
281						meta(sp, '{', type, 1, delimiter);
282						sfprintf(sp, "%d,", e->lo);
283						meta(sp, '}', type, 1, delimiter);
284					}
285				}
286				else if (e->hi != 1 || e->lo == 0 && !ismeta('?', type, 0, delimiter))
287				{
288					meta(sp, '{', type, 1, delimiter);
289					sfprintf(sp, "%d,%d", e->lo, e->hi);
290					meta(sp, '}', type, 1, delimiter);
291				}
292				else if (e->lo == 0)
293					meta(sp, '?', type, 1, delimiter);
294			}
295			else if (c)
296				sfputc(sp, c);
297			break;
298		case REX_STRING:
299		case REX_KMP:
300			t = (s = e->re.string.base) + e->re.string.size;
301			while (s < t)
302			{
303				c = *s++;
304				meta(sp, c, type, 0, delimiter);
305			}
306			break;
307		case REX_TRIE:
308			ib = 0;
309			for (c = 0; c <= UCHAR_MAX; c++)
310				if (e->re.trie.root[c])
311				{
312					char	pfx[1024];
313
314					if (ib)
315						sfputc(sp, '|');
316					else
317						ib = 1;
318					detrie(e->re.trie.root[c], sp, pfx, pfx, &pfx[sizeof(pfx)], delimiter);
319				}
320			break;
321		case REX_NEG:
322			if (type >= SRE)
323				sfprintf(sp, "!(");
324			if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
325				return 1;
326			if (type >= SRE)
327				sfputc(sp, ')');
328			else
329				sfputc(sp, '!');
330			break;
331		case REX_CONJ:
332			if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
333				return 1;
334			sfputc(sp, '&');
335			if (decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags))
336				return 1;
337			break;
338		case REX_GROUP:
339			if (type >= SRE)
340				sfputc(sp, '@');
341			meta(sp, '(', type, 1, delimiter);
342			if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
343				return 1;
344			meta(sp, ')', type, 1, delimiter);
345			break;
346		case REX_GROUP_AHEAD:
347		case REX_GROUP_AHEAD_NOT:
348		case REX_GROUP_BEHIND:
349		case REX_GROUP_BEHIND_NOT:
350			meta(sp, '(', type, 1, delimiter);
351			sfputc(sp, '?');
352			if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
353				return 1;
354			meta(sp, ')', type, 1, delimiter);
355			break;
356		case REX_GROUP_COND:
357			meta(sp, '(', type, 1, delimiter);
358			sfputc(sp, '?');
359			if (e->re.group.expr.binary.left && decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
360				return 1;
361			if (q = e->re.group.expr.binary.right)
362			{
363				sfputc(sp, ':');
364				if (q->re.group.expr.binary.left && decomp(q->re.group.expr.binary.left, sp, type, delimiter, flags))
365					return 1;
366				sfputc(sp, ':');
367				if (q->re.group.expr.binary.right && decomp(q->re.group.expr.binary.right, sp, type, delimiter, flags))
368					return 1;
369			}
370			meta(sp, ')', type, 1, delimiter);
371			break;
372		case REX_GROUP_CUT:
373			meta(sp, '(', type, 1, delimiter);
374			sfputc(sp, '?');
375			if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
376				return 1;
377			meta(sp, ')', type, 1, delimiter);
378			break;
379		case REX_BM:
380			break;
381		default:
382			sfprintf(sp, "<ERROR:REX_%d>", e->type);
383			break;
384		}
385	} while (e = e->next);
386	return 0;
387}
388
389/*
390 * reconstruct pattern from compiled re p into sp
391 */
392
393size_t
394regdecomp(regex_t* p, regflags_t flags, char* buf, size_t n)
395{
396	Sfio_t*		sp;
397	char*		s;
398	int		type;
399	int		delimiter;
400	size_t		r;
401
402	if (!(sp = sfstropen()))
403		return 0;
404	if (flags == (regflags_t)~0)
405		flags = p->env->flags;
406	switch (flags & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL))
407	{
408	case 0:
409		type = BRE;
410		break;
411	case REG_AUGMENTED:
412	case REG_AUGMENTED|REG_EXTENDED:
413		type = ARE;
414		break;
415	case REG_EXTENDED:
416		type = ERE;
417		break;
418	case REG_SHELL:
419		type = SRE;
420		break;
421	default:
422		type = KRE;
423		break;
424	}
425	if (flags & REG_DELIMITED)
426	{
427		delimiter = '/';
428		sfputc(sp, delimiter);
429	}
430	else
431		delimiter = -1;
432	if (decomp(p->env->rex, sp, type, delimiter, flags))
433		r = 0;
434	else
435	{
436		if (delimiter > 0)
437			sfputc(sp, delimiter);
438		if ((r = sfstrtell(sp) + 1) <= n)
439		{
440			if (!(s = sfstruse(sp)))
441				r = 0;
442			else
443				memcpy(buf, s, r);
444		}
445	}
446	sfstrclose(sp);
447	return r;
448}
449