1da2e3ebchin/***********************************************************************
2da2e3ebchin*                                                                      *
3da2e3ebchin*               This software is part of the ast package               *
43e14f97Roger A. Faulkner*          Copyright (c) 1985-2010 AT&T Intellectual Property          *
5da2e3ebchin*                      and is licensed under the                       *
6da2e3ebchin*                  Common Public License, Version 1.0                  *
77c2fbfbApril Chin*                    by AT&T Intellectual Property                     *
8da2e3ebchin*                                                                      *
9da2e3ebchin*                A copy of the License is available at                 *
10da2e3ebchin*            http://www.opensource.org/licenses/cpl1.0.txt             *
11da2e3ebchin*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12da2e3ebchin*                                                                      *
13da2e3ebchin*              Information and Software Systems Research               *
14da2e3ebchin*                            AT&T Research                             *
15da2e3ebchin*                           Florham Park NJ                            *
16da2e3ebchin*                                                                      *
17da2e3ebchin*                 Glenn Fowler <gsf@research.att.com>                  *
18da2e3ebchin*                  David Korn <dgk@research.att.com>                   *
19da2e3ebchin*                   Phong Vo <kpv@research.att.com>                    *
20da2e3ebchin*                                                                      *
21da2e3ebchin***********************************************************************/
22da2e3ebchin#pragma prototyped
23da2e3ebchin
24da2e3ebchin/*
25da2e3ebchin * posix regex ed(1) style substitute compile
26da2e3ebchin */
27da2e3ebchin
28da2e3ebchin#include "reglib.h"
29da2e3ebchin
30da2e3ebchinstatic const regflags_t	submap[] =
31da2e3ebchin{
32da2e3ebchin	'g',	REG_SUB_ALL,
33da2e3ebchin	'l',	REG_SUB_LOWER,
34da2e3ebchin	'n',	REG_SUB_NUMBER,
35da2e3ebchin	'p',	REG_SUB_PRINT,
36da2e3ebchin	's',	REG_SUB_STOP,
37da2e3ebchin	'u',	REG_SUB_UPPER,
38da2e3ebchin	'w',	REG_SUB_WRITE|REG_SUB_LAST,
39da2e3ebchin	0,	0
40da2e3ebchin};
41da2e3ebchin
42da2e3ebchinint
43da2e3ebchinregsubflags(regex_t* p, register const char* s, char** e, int delim, register const regflags_t* map, int* pm, regflags_t* pf)
44da2e3ebchin{
45da2e3ebchin	register int			c;
46da2e3ebchin	register const regflags_t*	m;
47da2e3ebchin	regflags_t			flags;
48da2e3ebchin	int				minmatch;
49da2e3ebchin	regdisc_t*			disc;
50da2e3ebchin
51da2e3ebchin	flags = pf ? *pf : 0;
52da2e3ebchin	minmatch = pm ? *pm : 0;
53da2e3ebchin	if (!map)
54da2e3ebchin		map = submap;
55da2e3ebchin	while (!(flags & REG_SUB_LAST))
56da2e3ebchin	{
57da2e3ebchin		if  (!(c = *s++) || c == delim)
58da2e3ebchin		{
59da2e3ebchin			s--;
60da2e3ebchin			break;
61da2e3ebchin		}
62da2e3ebchin		else if (c >= '0' && c <= '9')
63da2e3ebchin		{
64da2e3ebchin			if (minmatch)
65da2e3ebchin			{
66da2e3ebchin				disc = p->env->disc;
67da2e3ebchin				regfree(p);
68da2e3ebchin				return fatal(disc, REG_EFLAGS, s - 1);
69da2e3ebchin			}
70da2e3ebchin			minmatch = c - '0';
71da2e3ebchin			while (*s >= '0' && *s <= '9')
72da2e3ebchin				minmatch = minmatch * 10 + *s++ - '0';
73da2e3ebchin		}
74da2e3ebchin		else
75da2e3ebchin		{
76da2e3ebchin			for (m = map; *m; m++)
77da2e3ebchin				if (*m++ == c)
78da2e3ebchin				{
79da2e3ebchin					if (flags & *m)
80da2e3ebchin					{
81da2e3ebchin						disc = p->env->disc;
82da2e3ebchin						regfree(p);
83da2e3ebchin						return fatal(disc, REG_EFLAGS, s - 1);
84da2e3ebchin					}
85da2e3ebchin					flags |= *m--;
86da2e3ebchin					break;
87da2e3ebchin				}
88da2e3ebchin			if (!*m)
89da2e3ebchin			{
90da2e3ebchin				s--;
91da2e3ebchin				break;
92da2e3ebchin			}
93da2e3ebchin		}
94da2e3ebchin	}
95da2e3ebchin	if (pf)
96da2e3ebchin		*pf = flags;
97da2e3ebchin	if (pm)
98da2e3ebchin		*pm = minmatch;
99da2e3ebchin	if (e)
100da2e3ebchin		*e = (char*)s;
101da2e3ebchin	return 0;
102da2e3ebchin}
103da2e3ebchin
104da2e3ebchin/*
105da2e3ebchin * compile substitute rhs and optional flags
106da2e3ebchin */
107da2e3ebchin
108da2e3ebchinint
109da2e3ebchinregsubcomp(regex_t* p, register const char* s, const regflags_t* map, int minmatch, regflags_t flags)
110da2e3ebchin{
111da2e3ebchin	register regsub_t*	sub;
112da2e3ebchin	register int		c;
113da2e3ebchin	register int		d;
114da2e3ebchin	register char*		t;
115da2e3ebchin	register regsubop_t*	op;
116da2e3ebchin	char*			e;
117da2e3ebchin	const char*		r;
118da2e3ebchin	int			sre;
119da2e3ebchin	int			f;
120da2e3ebchin	int			g;
121da2e3ebchin	int			n;
122da2e3ebchin	int			nops;
123da2e3ebchin	const char*		o;
124da2e3ebchin	regdisc_t*		disc;
125da2e3ebchin
126da2e3ebchin	disc = p->env->disc;
127da2e3ebchin	if (p->env->flags & REG_NOSUB)
128da2e3ebchin	{
129da2e3ebchin		regfree(p);
130da2e3ebchin		return fatal(disc, REG_BADPAT, NiL);
131da2e3ebchin	}
132da2e3ebchin	if (!(sub = (regsub_t*)alloc(p->env->disc, 0, sizeof(regsub_t) + strlen(s))) || !(sub->re_ops = (regsubop_t*)alloc(p->env->disc, 0, (nops = 8) * sizeof(regsubop_t))))
133da2e3ebchin	{
134da2e3ebchin		if (sub)
135da2e3ebchin			alloc(p->env->disc, sub, 0);
136da2e3ebchin		regfree(p);
137da2e3ebchin		return fatal(disc, REG_ESPACE, s);
138da2e3ebchin	}
139da2e3ebchin	sub->re_buf = sub->re_end = 0;
140da2e3ebchin	p->re_sub = sub;
141da2e3ebchin	p->env->sub = 1;
142da2e3ebchin	op = sub->re_ops;
143da2e3ebchin	o = s;
144da2e3ebchin	if (!(p->env->flags & REG_DELIMITED))
145da2e3ebchin		d = 0;
146da2e3ebchin	else
147da2e3ebchin		switch (d = *(s - 1))
148da2e3ebchin		{
149da2e3ebchin		case '\\':
150da2e3ebchin		case '\n':
151da2e3ebchin		case '\r':
152da2e3ebchin			regfree(p);
153da2e3ebchin			return fatal(disc, REG_EDELIM, s);
154da2e3ebchin		}
155da2e3ebchin	sre = p->env->flags & REG_SHELL;
156da2e3ebchin	t = sub->re_rhs;
157da2e3ebchin	if (d)
158da2e3ebchin	{
159da2e3ebchin		r = s;
160da2e3ebchin		for (;;)
161da2e3ebchin		{
162da2e3ebchin			if (!*s)
163da2e3ebchin			{
164da2e3ebchin				if (p->env->flags & REG_MUSTDELIM)
165da2e3ebchin				{
166da2e3ebchin					regfree(p);
167da2e3ebchin					return fatal(disc, REG_EDELIM, r);
168da2e3ebchin				}
169da2e3ebchin				break;
170da2e3ebchin			}
171da2e3ebchin			else if (*s == d)
172da2e3ebchin			{
173da2e3ebchin				flags |= REG_SUB_FULL;
174da2e3ebchin				s++;
175da2e3ebchin				break;
176da2e3ebchin			}
177da2e3ebchin			else if (*s++ == '\\' && !*s++)
178da2e3ebchin			{
179da2e3ebchin				regfree(p);
180da2e3ebchin				return fatal(disc, REG_EESCAPE, r);
181da2e3ebchin			}
182da2e3ebchin		}
183da2e3ebchin		if (*s)
184da2e3ebchin		{
185da2e3ebchin			if (n = regsubflags(p, s, &e, d, map, &minmatch, &flags))
186da2e3ebchin				return n;
187da2e3ebchin			s = (const char*)e;
188da2e3ebchin		}
189da2e3ebchin		p->re_npat = s - o;
190da2e3ebchin		s = r;
191da2e3ebchin	}
192da2e3ebchin	else
193da2e3ebchin		p->re_npat = 0;
194da2e3ebchin	op->op = f = g = flags & (REG_SUB_LOWER|REG_SUB_UPPER);
195da2e3ebchin	op->off = 0;
196da2e3ebchin	while ((c = *s++) != d)
197da2e3ebchin	{
198da2e3ebchin	again:
199da2e3ebchin		if (!c)
200da2e3ebchin		{
201da2e3ebchin			p->re_npat = s - o - 1;
202da2e3ebchin			break;
203da2e3ebchin		}
204da2e3ebchin		else if (c == '\\')
205da2e3ebchin		{
206da2e3ebchin			if (*s == c)
207da2e3ebchin			{
208da2e3ebchin				*t++ = *s++;
209da2e3ebchin				continue;
210da2e3ebchin			}
211da2e3ebchin			if ((c = *s++) == d)
212da2e3ebchin				goto again;
213da2e3ebchin			if (!c)
214da2e3ebchin			{
215da2e3ebchin				regfree(p);
216da2e3ebchin				return fatal(disc, REG_EESCAPE, s - 2);
217da2e3ebchin			}
218da2e3ebchin			if (c == '&')
219da2e3ebchin			{
220da2e3ebchin				*t++ = c;
221da2e3ebchin				continue;
222da2e3ebchin			}
223da2e3ebchin		}
224da2e3ebchin		else if (c == '&')
225da2e3ebchin		{
226da2e3ebchin			if (sre)
227da2e3ebchin			{
228da2e3ebchin				*t++ = c;
229da2e3ebchin				continue;
230da2e3ebchin			}
231da2e3ebchin		}
232da2e3ebchin		else
233da2e3ebchin		{
234da2e3ebchin			switch (op->op)
235da2e3ebchin			{
236da2e3ebchin			case REG_SUB_UPPER:
237da2e3ebchin				if (islower(c))
238da2e3ebchin					c = toupper(c);
239da2e3ebchin				break;
240da2e3ebchin			case REG_SUB_LOWER:
241da2e3ebchin				if (isupper(c))
242da2e3ebchin					c = tolower(c);
243da2e3ebchin				break;
244da2e3ebchin			case REG_SUB_UPPER|REG_SUB_LOWER:
245da2e3ebchin				if (isupper(c))
246da2e3ebchin					c = tolower(c);
247da2e3ebchin				else if (islower(c))
248da2e3ebchin					c = toupper(c);
249da2e3ebchin				break;
250da2e3ebchin			}
251da2e3ebchin			*t++ = c;
252da2e3ebchin			continue;
253da2e3ebchin		}
254da2e3ebchin		switch (c)
255da2e3ebchin		{
256da2e3ebchin		case 0:
257da2e3ebchin			s--;
258da2e3ebchin			continue;
259da2e3ebchin		case '&':
260da2e3ebchin			c = 0;
261da2e3ebchin			break;
262da2e3ebchin		case '0': case '1': case '2': case '3': case '4':
263da2e3ebchin		case '5': case '6': case '7': case '8': case '9':
264da2e3ebchin			c -= '0';
2657c2fbfbApril Chin			if (isdigit(*s) && (p->env->flags & REG_MULTIREF))
266da2e3ebchin				c = c * 10 + *s++ - '0';
267da2e3ebchin			break;
268da2e3ebchin		case 'l':
269da2e3ebchin			if (c = *s)
270da2e3ebchin			{
271da2e3ebchin				s++;
272da2e3ebchin				if (isupper(c))
273da2e3ebchin					c = tolower(c);
274da2e3ebchin				*t++ = c;
275da2e3ebchin			}
276da2e3ebchin			continue;
277da2e3ebchin		case 'u':
278da2e3ebchin			if (c = *s)
279da2e3ebchin			{
280da2e3ebchin				s++;
281da2e3ebchin				if (islower(c))
282da2e3ebchin					c = toupper(c);
283da2e3ebchin				*t++ = c;
284da2e3ebchin			}
285da2e3ebchin			continue;
286da2e3ebchin		case 'E':
287da2e3ebchin			f = g;
288da2e3ebchin		set:
289da2e3ebchin			if ((op->len = (t - sub->re_rhs) - op->off) && (n = ++op - sub->re_ops) >= nops)
290da2e3ebchin			{
291da2e3ebchin				if (!(sub->re_ops = (regsubop_t*)alloc(p->env->disc, sub->re_ops, (nops *= 2) * sizeof(regsubop_t))))
292da2e3ebchin				{
293da2e3ebchin					regfree(p);
294da2e3ebchin					return fatal(disc, REG_ESPACE, NiL);
295da2e3ebchin				}
296da2e3ebchin				op = sub->re_ops + n;
297da2e3ebchin			}
298da2e3ebchin			op->op = f;
299da2e3ebchin			op->off = t - sub->re_rhs;
300da2e3ebchin			continue;
301da2e3ebchin		case 'L':
302da2e3ebchin			g = f;
303da2e3ebchin			f = REG_SUB_LOWER;
304da2e3ebchin			goto set;
305da2e3ebchin		case 'U':
306da2e3ebchin			g = f;
307da2e3ebchin			f = REG_SUB_UPPER;
308da2e3ebchin			goto set;
309da2e3ebchin		default:
310da2e3ebchin			if (!sre)
311da2e3ebchin			{
312da2e3ebchin				*t++ = chresc(s - 2, &e);
313da2e3ebchin				s = (const char*)e;
314da2e3ebchin				continue;
315da2e3ebchin			}
316da2e3ebchin			s--;
317da2e3ebchin			c = -1;
318da2e3ebchin			break;
319da2e3ebchin		}
320da2e3ebchin		if (c > p->re_nsub)
321da2e3ebchin		{
322da2e3ebchin			regfree(p);
323da2e3ebchin			return fatal(disc, REG_ESUBREG, s - 1);
324da2e3ebchin		}
325da2e3ebchin		if ((n = op - sub->re_ops) >= (nops - 2))
326da2e3ebchin		{
327da2e3ebchin			if (!(sub->re_ops = (regsubop_t*)alloc(p->env->disc, sub->re_ops, (nops *= 2) * sizeof(regsubop_t))))
328da2e3ebchin			{
329da2e3ebchin				regfree(p);
330da2e3ebchin				return fatal(disc, REG_ESPACE, NiL);
331da2e3ebchin			}
332da2e3ebchin			op = sub->re_ops + n;
333da2e3ebchin		}
334da2e3ebchin		if (op->len = (t - sub->re_rhs) - op->off)
335da2e3ebchin			op++;
336da2e3ebchin		op->op = f;
337da2e3ebchin		op->off = c;
338da2e3ebchin		op->len = 0;
339da2e3ebchin		op++;
340da2e3ebchin		op->op = f;
341da2e3ebchin		op->off = t - sub->re_rhs;
342da2e3ebchin	}
343da2e3ebchin	if ((op->len = (t - sub->re_rhs) - op->off) && (n = ++op - sub->re_ops) >= nops)
344da2e3ebchin	{
345da2e3ebchin		if (!(sub->re_ops = (regsubop_t*)alloc(p->env->disc, sub->re_ops, (nops *= 2) * sizeof(regsubop_t))))
346da2e3ebchin		{
347da2e3ebchin			regfree(p);
348da2e3ebchin			return fatal(disc, REG_ESPACE, NiL);
349da2e3ebchin		}
350da2e3ebchin		op = sub->re_ops + n;
351da2e3ebchin	}
352da2e3ebchin	op->len = -1;
353da2e3ebchin	sub->re_flags = flags;
354da2e3ebchin	sub->re_min = minmatch;
355da2e3ebchin	return 0;
356da2e3ebchin}
357da2e3ebchin
358da2e3ebchinvoid
359da2e3ebchinregsubfree(regex_t* p)
360da2e3ebchin{
361da2e3ebchin	Env_t*		env;
362da2e3ebchin	regsub_t*	sub;
363da2e3ebchin
364da2e3ebchin	if (p && (env = p->env) && env->sub && (sub = p->re_sub))
365da2e3ebchin	{
366da2e3ebchin		env->sub = 0;
367da2e3ebchin		p->re_sub = 0;
368da2e3ebchin		if (!(env->disc->re_flags & REG_NOFREE))
369da2e3ebchin		{
370da2e3ebchin			if (sub->re_buf)
371da2e3ebchin				alloc(env->disc, sub->re_buf, 0);
372da2e3ebchin			if (sub->re_ops)
373da2e3ebchin				alloc(env->disc, sub->re_ops, 0);
374da2e3ebchin			alloc(env->disc, sub, 0);
375da2e3ebchin		}
376da2e3ebchin	}
377da2e3ebchin}
378