1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1985-2010 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                  Common Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*            http://www.opensource.org/licenses/cpl1.0.txt             *
11*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                   Phong Vo <kpv@research.att.com>                    *
20*                                                                      *
21***********************************************************************/
22#pragma prototyped
23
24/*
25 * posix regex ed(1) style substitute compile
26 */
27
28#include "reglib.h"
29
30static const regflags_t	submap[] =
31{
32	'g',	REG_SUB_ALL,
33	'l',	REG_SUB_LOWER,
34	'n',	REG_SUB_NUMBER,
35	'p',	REG_SUB_PRINT,
36	's',	REG_SUB_STOP,
37	'u',	REG_SUB_UPPER,
38	'w',	REG_SUB_WRITE|REG_SUB_LAST,
39	0,	0
40};
41
42int
43regsubflags(regex_t* p, register const char* s, char** e, int delim, register const regflags_t* map, int* pm, regflags_t* pf)
44{
45	register int			c;
46	register const regflags_t*	m;
47	regflags_t			flags;
48	int				minmatch;
49	regdisc_t*			disc;
50
51	flags = pf ? *pf : 0;
52	minmatch = pm ? *pm : 0;
53	if (!map)
54		map = submap;
55	while (!(flags & REG_SUB_LAST))
56	{
57		if  (!(c = *s++) || c == delim)
58		{
59			s--;
60			break;
61		}
62		else if (c >= '0' && c <= '9')
63		{
64			if (minmatch)
65			{
66				disc = p->env->disc;
67				regfree(p);
68				return fatal(disc, REG_EFLAGS, s - 1);
69			}
70			minmatch = c - '0';
71			while (*s >= '0' && *s <= '9')
72				minmatch = minmatch * 10 + *s++ - '0';
73		}
74		else
75		{
76			for (m = map; *m; m++)
77				if (*m++ == c)
78				{
79					if (flags & *m)
80					{
81						disc = p->env->disc;
82						regfree(p);
83						return fatal(disc, REG_EFLAGS, s - 1);
84					}
85					flags |= *m--;
86					break;
87				}
88			if (!*m)
89			{
90				s--;
91				break;
92			}
93		}
94	}
95	if (pf)
96		*pf = flags;
97	if (pm)
98		*pm = minmatch;
99	if (e)
100		*e = (char*)s;
101	return 0;
102}
103
104/*
105 * compile substitute rhs and optional flags
106 */
107
108int
109regsubcomp(regex_t* p, register const char* s, const regflags_t* map, int minmatch, regflags_t flags)
110{
111	register regsub_t*	sub;
112	register int		c;
113	register int		d;
114	register char*		t;
115	register regsubop_t*	op;
116	char*			e;
117	const char*		r;
118	int			sre;
119	int			f;
120	int			g;
121	int			n;
122	int			nops;
123	const char*		o;
124	regdisc_t*		disc;
125
126	disc = p->env->disc;
127	if (p->env->flags & REG_NOSUB)
128	{
129		regfree(p);
130		return fatal(disc, REG_BADPAT, NiL);
131	}
132	if (!(sub = (regsub_t*)alloc(p->env->disc, 0, sizeof(regsub_t) + strlen(s))) || !(sub->re_ops = (regsubop_t*)alloc(p->env->disc, 0, (nops = 8) * sizeof(regsubop_t))))
133	{
134		if (sub)
135			alloc(p->env->disc, sub, 0);
136		regfree(p);
137		return fatal(disc, REG_ESPACE, s);
138	}
139	sub->re_buf = sub->re_end = 0;
140	p->re_sub = sub;
141	p->env->sub = 1;
142	op = sub->re_ops;
143	o = s;
144	if (!(p->env->flags & REG_DELIMITED))
145		d = 0;
146	else
147		switch (d = *(s - 1))
148		{
149		case '\\':
150		case '\n':
151		case '\r':
152			regfree(p);
153			return fatal(disc, REG_EDELIM, s);
154		}
155	sre = p->env->flags & REG_SHELL;
156	t = sub->re_rhs;
157	if (d)
158	{
159		r = s;
160		for (;;)
161		{
162			if (!*s)
163			{
164				if (p->env->flags & REG_MUSTDELIM)
165				{
166					regfree(p);
167					return fatal(disc, REG_EDELIM, r);
168				}
169				break;
170			}
171			else if (*s == d)
172			{
173				flags |= REG_SUB_FULL;
174				s++;
175				break;
176			}
177			else if (*s++ == '\\' && !*s++)
178			{
179				regfree(p);
180				return fatal(disc, REG_EESCAPE, r);
181			}
182		}
183		if (*s)
184		{
185			if (n = regsubflags(p, s, &e, d, map, &minmatch, &flags))
186				return n;
187			s = (const char*)e;
188		}
189		p->re_npat = s - o;
190		s = r;
191	}
192	else
193		p->re_npat = 0;
194	op->op = f = g = flags & (REG_SUB_LOWER|REG_SUB_UPPER);
195	op->off = 0;
196	while ((c = *s++) != d)
197	{
198	again:
199		if (!c)
200		{
201			p->re_npat = s - o - 1;
202			break;
203		}
204		else if (c == '\\')
205		{
206			if (*s == c)
207			{
208				*t++ = *s++;
209				continue;
210			}
211			if ((c = *s++) == d)
212				goto again;
213			if (!c)
214			{
215				regfree(p);
216				return fatal(disc, REG_EESCAPE, s - 2);
217			}
218			if (c == '&')
219			{
220				*t++ = c;
221				continue;
222			}
223		}
224		else if (c == '&')
225		{
226			if (sre)
227			{
228				*t++ = c;
229				continue;
230			}
231		}
232		else
233		{
234			switch (op->op)
235			{
236			case REG_SUB_UPPER:
237				if (islower(c))
238					c = toupper(c);
239				break;
240			case REG_SUB_LOWER:
241				if (isupper(c))
242					c = tolower(c);
243				break;
244			case REG_SUB_UPPER|REG_SUB_LOWER:
245				if (isupper(c))
246					c = tolower(c);
247				else if (islower(c))
248					c = toupper(c);
249				break;
250			}
251			*t++ = c;
252			continue;
253		}
254		switch (c)
255		{
256		case 0:
257			s--;
258			continue;
259		case '&':
260			c = 0;
261			break;
262		case '0': case '1': case '2': case '3': case '4':
263		case '5': case '6': case '7': case '8': case '9':
264			c -= '0';
265			if (isdigit(*s) && (p->env->flags & REG_MULTIREF))
266				c = c * 10 + *s++ - '0';
267			break;
268		case 'l':
269			if (c = *s)
270			{
271				s++;
272				if (isupper(c))
273					c = tolower(c);
274				*t++ = c;
275			}
276			continue;
277		case 'u':
278			if (c = *s)
279			{
280				s++;
281				if (islower(c))
282					c = toupper(c);
283				*t++ = c;
284			}
285			continue;
286		case 'E':
287			f = g;
288		set:
289			if ((op->len = (t - sub->re_rhs) - op->off) && (n = ++op - sub->re_ops) >= nops)
290			{
291				if (!(sub->re_ops = (regsubop_t*)alloc(p->env->disc, sub->re_ops, (nops *= 2) * sizeof(regsubop_t))))
292				{
293					regfree(p);
294					return fatal(disc, REG_ESPACE, NiL);
295				}
296				op = sub->re_ops + n;
297			}
298			op->op = f;
299			op->off = t - sub->re_rhs;
300			continue;
301		case 'L':
302			g = f;
303			f = REG_SUB_LOWER;
304			goto set;
305		case 'U':
306			g = f;
307			f = REG_SUB_UPPER;
308			goto set;
309		default:
310			if (!sre)
311			{
312				*t++ = chresc(s - 2, &e);
313				s = (const char*)e;
314				continue;
315			}
316			s--;
317			c = -1;
318			break;
319		}
320		if (c > p->re_nsub)
321		{
322			regfree(p);
323			return fatal(disc, REG_ESUBREG, s - 1);
324		}
325		if ((n = op - sub->re_ops) >= (nops - 2))
326		{
327			if (!(sub->re_ops = (regsubop_t*)alloc(p->env->disc, sub->re_ops, (nops *= 2) * sizeof(regsubop_t))))
328			{
329				regfree(p);
330				return fatal(disc, REG_ESPACE, NiL);
331			}
332			op = sub->re_ops + n;
333		}
334		if (op->len = (t - sub->re_rhs) - op->off)
335			op++;
336		op->op = f;
337		op->off = c;
338		op->len = 0;
339		op++;
340		op->op = f;
341		op->off = t - sub->re_rhs;
342	}
343	if ((op->len = (t - sub->re_rhs) - op->off) && (n = ++op - sub->re_ops) >= nops)
344	{
345		if (!(sub->re_ops = (regsubop_t*)alloc(p->env->disc, sub->re_ops, (nops *= 2) * sizeof(regsubop_t))))
346		{
347			regfree(p);
348			return fatal(disc, REG_ESPACE, NiL);
349		}
350		op = sub->re_ops + n;
351	}
352	op->len = -1;
353	sub->re_flags = flags;
354	sub->re_min = minmatch;
355	return 0;
356}
357
358void
359regsubfree(regex_t* p)
360{
361	Env_t*		env;
362	regsub_t*	sub;
363
364	if (p && (env = p->env) && env->sub && (sub = p->re_sub))
365	{
366		env->sub = 0;
367		p->re_sub = 0;
368		if (!(env->disc->re_flags & REG_NOFREE))
369		{
370			if (sub->re_buf)
371				alloc(env->disc, sub->re_buf, 0);
372			if (sub->re_ops)
373				alloc(env->disc, sub->re_ops, 0);
374			alloc(env->disc, sub, 0);
375		}
376	}
377}
378