1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1985-2012 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 
24 /*
25  * posix regex record executor
26  * multiple record sized-buffer interface
27  */
28 
29 #include "reglib.h"
30 
31 /*
32  * call regnexec() on records selected by Boyer-Moore
33  */
34 
35 int
regrexec(const regex_t * p,const char * s,size_t len,size_t nmatch,regmatch_t * match,regflags_t flags,int sep,void * handle,regrecord_t record)36 regrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags, int sep, void* handle, regrecord_t record)
37 {
38 	register unsigned char*	buf = (unsigned char*)s;
39 	register unsigned char*	beg;
40 	register unsigned char*	l;
41 	register unsigned char*	r;
42 	register unsigned char*	x;
43 	register size_t*	skip;
44 	register size_t*	fail;
45 	register Bm_mask_t**	mask;
46 	register size_t		index;
47 	register ssize_t	n;
48 	unsigned char*		end;
49 	size_t			mid;
50 	int			complete;
51 	int			exactlen;
52 	int			leftlen;
53 	int			rightlen;
54 	int			inv;
55 	Bm_mask_t		m;
56 	Env_t*			env;
57 	Rex_t*			e;
58 
59 	if (!s || !p || !(env = p->env) || (e = env->rex)->type != REX_BM)
60 		return REG_BADPAT;
61 	inv = (flags & REG_INVERT) != 0;
62 	buf = beg = (unsigned char*)s;
63 	end = buf + len;
64 	mid = (len < e->re.bm.right) ? 0 : (len - e->re.bm.right);
65 	skip = e->re.bm.skip;
66 	fail = e->re.bm.fail;
67 	mask = e->re.bm.mask;
68 	complete = e->re.bm.complete && !nmatch;
69 	exactlen = e->re.bm.size;
70 	leftlen = e->re.bm.left + exactlen;
71 	rightlen = exactlen + e->re.bm.right;
72 	index = leftlen++;
73 	for (;;)
74 	{
75 		while ((index += skip[buf[index]]) < mid);
76 		if (index < HIT)
77 			goto impossible;
78 		index -= HIT;
79 		m = mask[n = exactlen - 1][buf[index]];
80 		do
81 		{
82 			if (!n--)
83 				goto possible;
84 		} while (m &= mask[n][buf[--index]]);
85 		if ((index += fail[n + 1]) < len)
86 			continue;
87  impossible:
88 		if (inv)
89 		{
90 			l = r = buf + len;
91 			goto invert;
92 		}
93 		n = 0;
94 		goto done;
95  possible:
96 		r = (l = buf + index) + exactlen;
97 		while (l > beg)
98 			if (*--l == sep)
99 			{
100 				l++;
101 				break;
102 			}
103 		if ((r - l) < leftlen)
104 			goto spanned;
105 		while (r < end && *r != sep)
106 			r++;
107 		if ((r - (buf + index)) < rightlen)
108 			goto spanned;
109 		if (complete || (env->rex = ((r - l) > 128) ? e : e->next) && !(n = regnexec(p, (char*)l, r - l, nmatch, match, flags)))
110 		{
111 			if (inv)
112 			{
113  invert:
114 				x = beg;
115 				while (beg < l)
116 				{
117 					while (x < l && *x != sep)
118 						x++;
119 					if (n = (*record)(handle, (char*)beg, x - beg))
120 						goto done;
121 					beg = ++x;
122 				}
123 			}
124 			else if (n = (*record)(handle, (char*)l, r - l))
125 				goto done;
126 			if ((index = (r - buf) + leftlen) >= len)
127 			{
128 				n = (inv && (++r - buf) < len) ? (*record)(handle, (char*)r, (buf + len) - r): 0;
129 				goto done;
130 			}
131 			beg = r + 1;
132 		}
133 		else if (n != REG_NOMATCH)
134 			goto done;
135 		else
136 		{
137  spanned:
138 			if ((index += exactlen) >= mid)
139 				goto impossible;
140 		}
141 	}
142  done:
143 	env->rex = e;
144 	return n;
145 }
146 
147 /*
148  * 20120528: regoff_t changed from int to ssize_t
149  */
150 
151 #if defined(__EXPORT__)
152 #define extern		__EXPORT__
153 #endif
154 
155 #undef	regrexec
156 #if _map_libc
157 #define regrexec	_ast_regrexec
158 #endif
159 
160 extern int
regrexec(const regex_t * p,const char * s,size_t len,size_t nmatch,oldregmatch_t * oldmatch,regflags_t flags,int sep,void * handle,regrecord_t record)161 regrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, oldregmatch_t* oldmatch, regflags_t flags, int sep, void* handle, regrecord_t record)
162 {
163 	if (oldmatch)
164 	{
165 		regmatch_t*	match;
166 		ssize_t		i;
167 		int		r;
168 
169 		if (!(match = oldof(0, regmatch_t, nmatch, 0)))
170 			return -1;
171 		if (!(r = regrexec_20120528(p, s, len, nmatch, match, flags, sep, handle, record)))
172 			for (i = 0; i < nmatch; i++)
173 			{
174 				oldmatch[i].rm_so = match[i].rm_so;
175 				oldmatch[i].rm_eo = match[i].rm_eo;
176 			}
177 		free(match);
178 		return r;
179 	}
180 	return regrexec_20120528(p, s, len, 0, NiL, flags, sep, handle, record);
181 }
182