1da2e3ebdSchin /***********************************************************************
2da2e3ebdSchin *                                                                      *
3da2e3ebdSchin *               This software is part of the ast package               *
4*b30d1939SAndy Fiddaman *          Copyright (c) 1985-2012 AT&T Intellectual Property          *
5da2e3ebdSchin *                      and is licensed under the                       *
6*b30d1939SAndy Fiddaman *                 Eclipse Public License, Version 1.0                  *
77c2fbfb3SApril Chin *                    by AT&T Intellectual Property                     *
8da2e3ebdSchin *                                                                      *
9da2e3ebdSchin *                A copy of the License is available at                 *
10*b30d1939SAndy Fiddaman *          http://www.eclipse.org/org/documents/epl-v10.html           *
11*b30d1939SAndy Fiddaman *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12da2e3ebdSchin *                                                                      *
13da2e3ebdSchin *              Information and Software Systems Research               *
14da2e3ebdSchin *                            AT&T Research                             *
15da2e3ebdSchin *                           Florham Park NJ                            *
16da2e3ebdSchin *                                                                      *
17da2e3ebdSchin *                 Glenn Fowler <gsf@research.att.com>                  *
18da2e3ebdSchin *                  David Korn <dgk@research.att.com>                   *
19da2e3ebdSchin *                   Phong Vo <kpv@research.att.com>                    *
20da2e3ebdSchin *                                                                      *
21da2e3ebdSchin ***********************************************************************/
22da2e3ebdSchin #pragma prototyped
23da2e3ebdSchin 
24da2e3ebdSchin /*
25da2e3ebdSchin  * regex library interface
26da2e3ebdSchin  */
27da2e3ebdSchin 
28da2e3ebdSchin #ifdef	_AST_STD_I
29da2e3ebdSchin #define _REGEX_H	-1
30da2e3ebdSchin #define regex_t		int
31da2e3ebdSchin #define regmatch_t	int
32da2e3ebdSchin #endif
33da2e3ebdSchin #ifndef _REGEX_H
34da2e3ebdSchin #define _REGEX_H	1
35da2e3ebdSchin #undef	regex_t
36da2e3ebdSchin #undef	regmatch_t
37da2e3ebdSchin 
38da2e3ebdSchin #include <ast_common.h>
39*b30d1939SAndy Fiddaman #include <ast_wchar.h>
40*b30d1939SAndy Fiddaman #include <ast_api.h>
41da2e3ebdSchin 
42*b30d1939SAndy Fiddaman #define REG_VERSION	20100930L
43da2e3ebdSchin 
44da2e3ebdSchin /* regcomp flags */
45da2e3ebdSchin 
46da2e3ebdSchin #define REG_AUGMENTED	0x00000001	/* enable ! & < >		*/
47da2e3ebdSchin #define REG_EXTENDED	0x00000002	/* enable ( | )			*/
48da2e3ebdSchin #define REG_ICASE	0x00000004	/* ignore case in match		*/
49da2e3ebdSchin #define REG_NEWLINE	0x00000008	/* ^/$ match embedded \n	*/
50da2e3ebdSchin #define REG_NOSUB	0x00000010	/* don't report subexp matches	*/
51da2e3ebdSchin #define REG_SHELL	0x00000020	/* shell pattern syntax		*/
52da2e3ebdSchin 
53da2e3ebdSchin /* nonstandard regcomp flags */
54da2e3ebdSchin 
55da2e3ebdSchin #define REG_LEFT	0x00000100	/* implicit ^...		*/
56da2e3ebdSchin #define REG_LITERAL	0x00000200	/* no operators			*/
57da2e3ebdSchin #define REG_MINIMAL	0x00000400	/* minimal match		*/
58da2e3ebdSchin #define REG_NULL	0x00000800	/* allow null patterns		*/
59da2e3ebdSchin #define REG_RIGHT	0x00001000	/* implicit ...$		*/
60da2e3ebdSchin #define REG_LENIENT	0x00002000	/* look the other way		*/
61da2e3ebdSchin #define REG_ESCAPE	0x00004000	/* \ escapes delimiter in [...]	*/
62da2e3ebdSchin #define REG_FIRST	0x00008000	/* first match found will do	*/
63da2e3ebdSchin #define REG_MULTIPLE	0x00010000	/* multiple \n sep patterns	*/
64da2e3ebdSchin #define REG_DISCIPLINE	0x00020000	/* regex_t.re_disc is valid	*/
65da2e3ebdSchin #define REG_SPAN	0x00040000	/* . matches \n			*/
66da2e3ebdSchin #define REG_COMMENT	0x00080000	/* ignore pattern space & #...\n*/
67da2e3ebdSchin #define REG_MULTIREF	0x00100000	/* multiple digit backrefs	*/
68da2e3ebdSchin #define REG_MUSTDELIM	0x08000000	/* all delimiters required	*/
69da2e3ebdSchin #define REG_DELIMITED	0x10000000	/* pattern[0] is delimiter	*/
703e14f97fSRoger A. Faulkner #define REG_CLASS_ESCAPE 0x80000000	/* \ escapes in [...]		*/
71da2e3ebdSchin 
72da2e3ebdSchin #define REG_SHELL_DOT	0x00200000	/* explicit leading . match	*/
73da2e3ebdSchin #define REG_SHELL_ESCAPED 0x00400000	/* \ not special		*/
743e14f97fSRoger A. Faulkner #define REG_SHELL_GROUP	0x20000000	/* (|&) inside [@|&](...) only	*/
75da2e3ebdSchin #define REG_SHELL_PATH	0x00800000	/* explicit / match		*/
76da2e3ebdSchin 
7734f9b3eeSRoland Mainz #define REG_REGEXP	0x40000000	/* <regexp.h> compatibility	*/
7834f9b3eeSRoland Mainz 
79da2e3ebdSchin /* regexec flags */
80da2e3ebdSchin 
81da2e3ebdSchin #define REG_NOTBOL	0x00000040	/* ^ is not a special char	*/
82da2e3ebdSchin #define REG_NOTEOL	0x00000080	/* $ is not a special char	*/
83da2e3ebdSchin 
84da2e3ebdSchin /* nonstandard regexec flags */
85da2e3ebdSchin 
86da2e3ebdSchin #define REG_INVERT	0x01000000	/* invert regrexec match sense	*/
87da2e3ebdSchin #define REG_STARTEND	0x02000000	/* subject==match[0].rm_{so,eo} */
88da2e3ebdSchin #define REG_ADVANCE	0x04000000	/* advance match[0].rm_{so,eo}	*/
89da2e3ebdSchin 
90da2e3ebdSchin /* regalloc flags */
91da2e3ebdSchin 
92da2e3ebdSchin #define REG_NOFREE	0x00000001	/* don't free			*/
93da2e3ebdSchin 
94da2e3ebdSchin /* regsub flags */
95da2e3ebdSchin 
96da2e3ebdSchin #define REG_SUB_ALL	0x00000001	/* substitute all occurrences	*/
97da2e3ebdSchin #define REG_SUB_LOWER	0x00000002	/* substitute to lower case	*/
98da2e3ebdSchin #define REG_SUB_UPPER	0x00000004	/* substitute to upper case	*/
99da2e3ebdSchin #define REG_SUB_PRINT	0x00000010	/* internal no-op		*/
100da2e3ebdSchin #define REG_SUB_NUMBER	0x00000020	/* internal no-op		*/
101da2e3ebdSchin #define REG_SUB_STOP	0x00000040	/* internal no-op		*/
102da2e3ebdSchin #define REG_SUB_WRITE	0x00000080	/* internal no-op		*/
103da2e3ebdSchin #define REG_SUB_LAST	0x00000100	/* last substitution option	*/
104da2e3ebdSchin #define REG_SUB_FULL	0x00000200	/* fully delimited		*/
105da2e3ebdSchin #define REG_SUB_USER	0x00001000	/* first user flag bit		*/
106da2e3ebdSchin 
107da2e3ebdSchin /* regex error codes */
108da2e3ebdSchin 
109da2e3ebdSchin #define REG_ENOSYS	(-1)		/* not supported		*/
110da2e3ebdSchin #define REG_NOMATCH	1		/* regexec didn't match		*/
111da2e3ebdSchin #define REG_BADPAT	2		/* invalid regular expression	*/
112da2e3ebdSchin #define REG_ECOLLATE	3		/* invalid collation element	*/
113da2e3ebdSchin #define REG_ECTYPE	4		/* invalid character class	*/
114da2e3ebdSchin #define REG_EESCAPE	5		/* trailing \ in pattern	*/
115da2e3ebdSchin #define REG_ESUBREG	6		/* invalid \digit backreference	*/
116da2e3ebdSchin #define REG_EBRACK	7		/* [...] imbalance		*/
117da2e3ebdSchin #define REG_EPAREN	8		/* \(...\) or (...) imbalance	*/
118da2e3ebdSchin #define REG_EBRACE	9		/* \{...\} or {...} imbalance	*/
119da2e3ebdSchin #define REG_BADBR	10		/* invalid {...} digits		*/
120da2e3ebdSchin #define REG_ERANGE	11		/* invalid [...] range endpoint	*/
121da2e3ebdSchin #define REG_ESPACE	12		/* out of space			*/
1223e14f97fSRoger A. Faulkner #define REG_BADRPT	13		/* unary op not preceded by re	*/
123da2e3ebdSchin #define REG_ENULL	14		/* empty subexpr in pattern	*/
124da2e3ebdSchin #define REG_ECOUNT	15		/* re component count overflow	*/
125da2e3ebdSchin #define REG_BADESC	16		/* invalid \char escape		*/
126da2e3ebdSchin #define REG_VERSIONID	17		/* version id (pseudo error)	*/
127da2e3ebdSchin #define REG_EFLAGS	18		/* flags conflict		*/
128da2e3ebdSchin #define REG_EDELIM	19		/* invalid or omitted delimiter	*/
129da2e3ebdSchin #define REG_PANIC	20		/* unrecoverable internal error	*/
130da2e3ebdSchin 
131da2e3ebdSchin struct regex_s; typedef struct regex_s regex_t;
132da2e3ebdSchin struct regdisc_s; typedef struct regdisc_s regdisc_t;
133da2e3ebdSchin 
134da2e3ebdSchin typedef int (*regclass_t)(int);
1353e14f97fSRoger A. Faulkner typedef uint32_t regflags_t;
136da2e3ebdSchin typedef int (*regerror_t)(const regex_t*, regdisc_t*, int, ...);
137da2e3ebdSchin typedef void* (*regcomp_t)(const regex_t*, const char*, size_t, regdisc_t*);
138da2e3ebdSchin typedef int (*regexec_t)(const regex_t*, void*, const char*, size_t, const char*, size_t, char**, regdisc_t*);
139da2e3ebdSchin typedef void* (*regresize_t)(void*, void*, size_t);
140da2e3ebdSchin typedef int (*regrecord_t)(void*, const char*, size_t);
141da2e3ebdSchin 
142*b30d1939SAndy Fiddaman #if ASTAPI(20120528)
143*b30d1939SAndy Fiddaman typedef ssize_t regoff_t;
144*b30d1939SAndy Fiddaman #else
145*b30d1939SAndy Fiddaman typedef int regoff_t;
146*b30d1939SAndy Fiddaman #endif
147*b30d1939SAndy Fiddaman 
148da2e3ebdSchin typedef struct regmatch_s
149da2e3ebdSchin {
150da2e3ebdSchin 	regoff_t	rm_so;		/* offset of start		*/
151da2e3ebdSchin 	regoff_t	rm_eo;		/* offset of end		*/
152da2e3ebdSchin } regmatch_t;
153da2e3ebdSchin 
154da2e3ebdSchin typedef struct regsub_s
155da2e3ebdSchin {
156da2e3ebdSchin 	regflags_t	re_flags;	/* regsubcomp() flags		*/
157da2e3ebdSchin 	char*		re_buf;		/* regsubexec() output buffer	*/
158da2e3ebdSchin 	size_t		re_len;		/* re_buf length		*/
159da2e3ebdSchin 	int		re_min;		/* regsubcomp() min matches	*/
160da2e3ebdSchin #ifdef _REG_SUB_PRIVATE_
161da2e3ebdSchin 	_REG_SUB_PRIVATE_
162da2e3ebdSchin #endif
163da2e3ebdSchin } regsub_t;
164da2e3ebdSchin 
165da2e3ebdSchin struct regdisc_s
166da2e3ebdSchin {
167da2e3ebdSchin 	unsigned long	re_version;	/* discipline version		*/
168da2e3ebdSchin 	regflags_t	re_flags;	/* discipline flags		*/
169da2e3ebdSchin 	regerror_t	re_errorf;	/* error function		*/
170da2e3ebdSchin 	int		re_errorlevel;	/* errorf level			*/
171da2e3ebdSchin 	regresize_t	re_resizef;	/* alloc/free function		*/
172da2e3ebdSchin 	void*		re_resizehandle;/* resizef handle		*/
173da2e3ebdSchin 	regcomp_t	re_compf;	/* (?{...}) compile function	*/
174da2e3ebdSchin 	regexec_t	re_execf;	/* (?{...}) execute function	*/
175da2e3ebdSchin 	unsigned char*	re_map;		/* external to native ccode map	*/
176da2e3ebdSchin };
177da2e3ebdSchin 
178da2e3ebdSchin typedef struct regstat_s
179da2e3ebdSchin {
180*b30d1939SAndy Fiddaman 	regflags_t	re_flags;	/* REG_*			*/
181da2e3ebdSchin 	ssize_t		re_min;		/* min anchored match length	*/
182da2e3ebdSchin 	ssize_t		re_max;		/* max anchored match length	*/
183da2e3ebdSchin 	ssize_t		re_record;	/* regrexec() match length	*/
184*b30d1939SAndy Fiddaman 	regflags_t	re_info;	/* REG_* info			*/
185da2e3ebdSchin } regstat_t;
186da2e3ebdSchin 
187da2e3ebdSchin struct regex_s
188da2e3ebdSchin {
189da2e3ebdSchin 	size_t		re_nsub;	/* number of subexpressions	*/
190da2e3ebdSchin 	struct reglib_s*re_info;	/* library private info		*/
191da2e3ebdSchin 	size_t		re_npat;	/* number of pattern chars used	*/
192da2e3ebdSchin 	regdisc_t*	re_disc;	/* REG_DISCIPLINE discipline	*/
193da2e3ebdSchin 	regsub_t*	re_sub;		/* regsubcomp() data		*/
194da2e3ebdSchin };
195da2e3ebdSchin 
196da2e3ebdSchin #define reginit(disc)	(memset(disc,0,sizeof(*(disc))),(disc)->re_version=REG_VERSION)
197da2e3ebdSchin 
198da2e3ebdSchin #if _BLD_ast && defined(__EXPORT__)
199da2e3ebdSchin #define extern		__EXPORT__
200da2e3ebdSchin #endif
201da2e3ebdSchin 
202da2e3ebdSchin extern int	regcomp(regex_t*, const char*, regflags_t);
203da2e3ebdSchin extern size_t	regerror(int, const regex_t*, char*, size_t);
204da2e3ebdSchin extern int	regexec(const regex_t*, const char*, size_t, regmatch_t*, regflags_t);
205da2e3ebdSchin extern void	regfree(regex_t*);
206da2e3ebdSchin 
207da2e3ebdSchin /* nonstandard hooks */
208da2e3ebdSchin 
209da2e3ebdSchin #define _REG_cache	1	/* have regcache()			*/
210da2e3ebdSchin #define _REG_class	1	/* have regclass()			*/
211da2e3ebdSchin #define _REG_collate	1	/* have regcollate(), regclass()	*/
212da2e3ebdSchin #define _REG_comb	1	/* have regcomb()			*/
213da2e3ebdSchin #define _REG_decomp	1	/* have regdecomp()			*/
214da2e3ebdSchin #define _REG_dup	1	/* have regdup()			*/
215da2e3ebdSchin #define _REG_fatal	1	/* have regfatal(), regfatalpat()	*/
216da2e3ebdSchin #define _REG_ncomp	1	/* have regncomp()			*/
217da2e3ebdSchin #define _REG_nexec	1	/* have regnexec()			*/
218da2e3ebdSchin #define _REG_rexec	1	/* have regrexec(), regrecord()		*/
219da2e3ebdSchin #define _REG_stat	1	/* have regstat()			*/
220da2e3ebdSchin #define _REG_subcomp	1	/* have regsubcomp(), regsubexec()	*/
221da2e3ebdSchin 
222da2e3ebdSchin extern regclass_t regclass(const char*, char**);
223da2e3ebdSchin extern int	regaddclass(const char*, regclass_t);
224*b30d1939SAndy Fiddaman extern int	regcollate(const char*, char**, char*, size_t, wchar_t*);
225da2e3ebdSchin extern int	regcomb(regex_t*, regex_t*);
226da2e3ebdSchin extern size_t	regdecomp(regex_t*, regflags_t, char*, size_t);
227da2e3ebdSchin extern int	regdup(regex_t*, regex_t*);
228da2e3ebdSchin extern int	regncomp(regex_t*, const char*, size_t, regflags_t);
229da2e3ebdSchin extern int	regnexec(const regex_t*, const char*, size_t, size_t, regmatch_t*, regflags_t);
230da2e3ebdSchin extern void	regfatal(regex_t*, int, int);
231da2e3ebdSchin extern void	regfatalpat(regex_t*, int, int, const char*);
232da2e3ebdSchin extern int	regrecord(const regex_t*);
233da2e3ebdSchin extern int	regrexec(const regex_t*, const char*, size_t, size_t, regmatch_t*, regflags_t, int, void*, regrecord_t);
234da2e3ebdSchin extern regstat_t* regstat(const regex_t*);
235da2e3ebdSchin 
236da2e3ebdSchin extern regex_t*	regcache(const char*, regflags_t, int*);
237da2e3ebdSchin 
238da2e3ebdSchin extern int	regsubcomp(regex_t*, const char*, const regflags_t*, int, regflags_t);
239da2e3ebdSchin extern int	regsubexec(const regex_t*, const char*, size_t, regmatch_t*);
240da2e3ebdSchin extern int	regsubflags(regex_t*, const char*, char**, int, const regflags_t*, int*, regflags_t*);
241da2e3ebdSchin extern void	regsubfree(regex_t*);
242da2e3ebdSchin 
243da2e3ebdSchin /* obsolete hooks */
244da2e3ebdSchin 
245da2e3ebdSchin #ifndef _SFIO_H
246da2e3ebdSchin struct _sfio_s;
247da2e3ebdSchin #endif
248da2e3ebdSchin 
249da2e3ebdSchin extern void	regalloc(void*, regresize_t, regflags_t);
250da2e3ebdSchin extern int	regsub(const regex_t*, struct _sfio_s*, const char*, const char*, size_t, regmatch_t*, regflags_t);
251da2e3ebdSchin 
252da2e3ebdSchin #undef	extern
253da2e3ebdSchin 
254da2e3ebdSchin #endif
255