1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1985-2010 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                  Common Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*            http://www.opensource.org/licenses/cpl1.0.txt             *
11*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                   Phong Vo <kpv@research.att.com>                    *
20*                                                                      *
21***********************************************************************/
22#pragma prototyped
23/*
24 * regcmp implementation
25 */
26
27#include <ast.h>
28#include <libgen.h>
29#include <regex.h>
30#include <align.h>
31
32#define INC		(2*1024)
33#define TOT		(16*1024)
34#define SUB		10
35
36typedef struct
37{
38	char*		cur;
39	regex_t		re;
40	unsigned char	sub[SUB];
41	int		nsub;
42	size_t		size;
43	char		buf[ALIGN_BOUND2];
44} Regex_t;
45
46__DEFINE__(char*, __loc1, 0);
47
48static void*
49block(void* handle, void* data, size_t size)
50{
51	register Regex_t*	re = (Regex_t*)handle;
52
53	if (data || (size = roundof(size, ALIGN_BOUND2)) > (re->buf + re->size - re->cur))
54		return 0;
55	data = (void*)re->cur;
56	re->cur += size;
57	return data;
58}
59
60char*
61regcmp(const char* pattern, ...)
62{
63	register char*		s;
64	register Regex_t*	re;
65	register size_t		n;
66	register int		c;
67	register int		p;
68	int			b;
69	int			i;
70	int			j;
71	int			nsub;
72	register Sfio_t*	sp;
73	unsigned char		paren[128];
74	unsigned char		sub[SUB];
75	va_list			ap;
76
77	va_start(ap, pattern);
78	if (!pattern || !*pattern || !(sp = sfstropen()))
79		return 0;
80	memset(paren, 0, sizeof(paren));
81	n = 0;
82	p = -1;
83	b = 0;
84	nsub = 0;
85	s = (char*)pattern;
86	do
87	{
88		while (c = *s++)
89		{
90			if (c == '\\')
91			{
92				sfputc(sp, c);
93				if (!(c = *s++))
94					break;
95			}
96			else if (b)
97			{
98				if (c == ']')
99					b = 0;
100			}
101			else if (c == '[')
102			{
103				b = 1;
104				if (*s == '^')
105				{
106					sfputc(sp, c);
107					c = *s++;
108				}
109				if (*s == ']')
110				{
111					sfputc(sp, c);
112					c = *s++;
113				}
114			}
115			else if (c == '(')
116			{
117				/*
118				 * someone explain in one sentence why
119				 * a cast is needed to make this work
120				 */
121
122				if (p < (int)(elementsof(paren) - 1))
123					p++;
124				paren[p] = ++n;
125			}
126			else if (c == ')' && p >= 0)
127			{
128				for (i = p; i > 0; i--)
129					if (paren[i])
130						break;
131				if (*s == '$' && (j = *(s + 1)) >= '0' && j <= '9')
132				{
133					s += 2;
134					j -= '0';
135					if (nsub <= j)
136					{
137						if (!nsub)
138							memset(sub, 0, sizeof(sub));
139						nsub = j + 1;
140					}
141					sub[j] = paren[i] + 1;
142				}
143				paren[i] = 0;
144			}
145			sfputc(sp, c);
146		}
147	} while (s = va_arg(ap, char*));
148	va_end(ap);
149	if (!(s = sfstruse(sp)))
150	{
151		sfstrclose(sp);
152		return 0;
153	}
154	re = 0;
155	n = 0;
156	do
157	{
158		if ((n += INC) > TOT || !(re = newof(re, Regex_t, 0, n)))
159		{
160			if (re)
161				free(re);
162			sfstrclose(sp);
163			return 0;
164		}
165		re->cur = re->buf;
166		re->size = n + ALIGN_BOUND2 - sizeof(Regex_t);
167		regalloc(re, block, REG_NOFREE);
168		c = regcomp(&re->re, s, REG_EXTENDED|REG_LENIENT|REG_NULL);
169		regalloc(NiL, NiL, 0);
170	} while (c == REG_ESPACE);
171	sfstrclose(sp);
172	if (c)
173	{
174		free(re);
175		return 0;
176	}
177	if (re->nsub = nsub)
178		memcpy(re->sub, sub, (nsub + 1) * sizeof(sub[0]));
179	return (char*)re;
180}
181
182char*
183regex(const char* handle, const char* subject, ...)
184{
185	register Regex_t*	re;
186	register int		n;
187	register int		i;
188	register int		k;
189	char*			sub[SUB + 1];
190	regmatch_t		match[SUB + 1];
191	va_list			ap;
192
193	va_start(ap, subject);
194	if (!(re = (Regex_t*)handle) || !subject)
195		return 0;
196	for (n = 0; n < re->nsub; n++)
197		sub[n] = va_arg(ap, char*);
198	va_end(ap);
199	if (regexec(&re->re, subject, SUB + 1, match, 0))
200		return 0;
201	for (n = 0; n < re->nsub; n++)
202		if (i = re->sub[n])
203		{
204			i--;
205			k = match[i].rm_eo - match[i].rm_so;
206			strncpy(sub[n], subject + match[i].rm_so, k);
207			*(sub[n] + k) = 0;
208		}
209	__loc1 = (char*)subject + match[0].rm_so;
210	return (char*)subject + match[0].rm_eo;
211}
212