1da2e3ebdSchin /*********************************************************************** 2da2e3ebdSchin * * 3da2e3ebdSchin * This software is part of the ast package * 4*b30d1939SAndy Fiddaman * Copyright (c) 1985-2012 AT&T Intellectual Property * 5da2e3ebdSchin * and is licensed under the * 6*b30d1939SAndy Fiddaman * Eclipse Public License, Version 1.0 * 77c2fbfb3SApril Chin * by AT&T Intellectual Property * 8da2e3ebdSchin * * 9da2e3ebdSchin * A copy of the License is available at * 10*b30d1939SAndy Fiddaman * http://www.eclipse.org/org/documents/epl-v10.html * 11*b30d1939SAndy Fiddaman * (with md5 checksum b35adb5213ca9657e911e9befb180842) * 12da2e3ebdSchin * * 13da2e3ebdSchin * Information and Software Systems Research * 14da2e3ebdSchin * AT&T Research * 15da2e3ebdSchin * Florham Park NJ * 16da2e3ebdSchin * * 17da2e3ebdSchin * Glenn Fowler <gsf@research.att.com> * 18da2e3ebdSchin * David Korn <dgk@research.att.com> * 19da2e3ebdSchin * Phong Vo <kpv@research.att.com> * 20da2e3ebdSchin * * 21da2e3ebdSchin ***********************************************************************/ 22da2e3ebdSchin #pragma prototyped 23da2e3ebdSchin 24da2e3ebdSchin /* 25da2e3ebdSchin * regex library interface 26da2e3ebdSchin */ 27da2e3ebdSchin 28da2e3ebdSchin #ifdef _AST_STD_I 29da2e3ebdSchin #define _REGEX_H -1 30da2e3ebdSchin #define regex_t int 31da2e3ebdSchin #define regmatch_t int 32da2e3ebdSchin #endif 33da2e3ebdSchin #ifndef _REGEX_H 34da2e3ebdSchin #define _REGEX_H 1 35da2e3ebdSchin #undef regex_t 36da2e3ebdSchin #undef regmatch_t 37da2e3ebdSchin 38da2e3ebdSchin #include <ast_common.h> 39*b30d1939SAndy Fiddaman #include <ast_wchar.h> 40*b30d1939SAndy Fiddaman #include <ast_api.h> 41da2e3ebdSchin 42*b30d1939SAndy Fiddaman #define REG_VERSION 20100930L 43da2e3ebdSchin 44da2e3ebdSchin /* regcomp flags */ 45da2e3ebdSchin 46da2e3ebdSchin #define REG_AUGMENTED 0x00000001 /* enable ! & < > */ 47da2e3ebdSchin #define REG_EXTENDED 0x00000002 /* enable ( | ) */ 48da2e3ebdSchin #define REG_ICASE 0x00000004 /* ignore case in match */ 49da2e3ebdSchin #define REG_NEWLINE 0x00000008 /* ^/$ match embedded \n */ 50da2e3ebdSchin #define REG_NOSUB 0x00000010 /* don't report subexp matches */ 51da2e3ebdSchin #define REG_SHELL 0x00000020 /* shell pattern syntax */ 52da2e3ebdSchin 53da2e3ebdSchin /* nonstandard regcomp flags */ 54da2e3ebdSchin 55da2e3ebdSchin #define REG_LEFT 0x00000100 /* implicit ^... */ 56da2e3ebdSchin #define REG_LITERAL 0x00000200 /* no operators */ 57da2e3ebdSchin #define REG_MINIMAL 0x00000400 /* minimal match */ 58da2e3ebdSchin #define REG_NULL 0x00000800 /* allow null patterns */ 59da2e3ebdSchin #define REG_RIGHT 0x00001000 /* implicit ...$ */ 60da2e3ebdSchin #define REG_LENIENT 0x00002000 /* look the other way */ 61da2e3ebdSchin #define REG_ESCAPE 0x00004000 /* \ escapes delimiter in [...] */ 62da2e3ebdSchin #define REG_FIRST 0x00008000 /* first match found will do */ 63da2e3ebdSchin #define REG_MULTIPLE 0x00010000 /* multiple \n sep patterns */ 64da2e3ebdSchin #define REG_DISCIPLINE 0x00020000 /* regex_t.re_disc is valid */ 65da2e3ebdSchin #define REG_SPAN 0x00040000 /* . matches \n */ 66da2e3ebdSchin #define REG_COMMENT 0x00080000 /* ignore pattern space & #...\n*/ 67da2e3ebdSchin #define REG_MULTIREF 0x00100000 /* multiple digit backrefs */ 68da2e3ebdSchin #define REG_MUSTDELIM 0x08000000 /* all delimiters required */ 69da2e3ebdSchin #define REG_DELIMITED 0x10000000 /* pattern[0] is delimiter */ 703e14f97fSRoger A. Faulkner #define REG_CLASS_ESCAPE 0x80000000 /* \ escapes in [...] */ 71da2e3ebdSchin 72da2e3ebdSchin #define REG_SHELL_DOT 0x00200000 /* explicit leading . match */ 73da2e3ebdSchin #define REG_SHELL_ESCAPED 0x00400000 /* \ not special */ 743e14f97fSRoger A. Faulkner #define REG_SHELL_GROUP 0x20000000 /* (|&) inside [@|&](...) only */ 75da2e3ebdSchin #define REG_SHELL_PATH 0x00800000 /* explicit / match */ 76da2e3ebdSchin 7734f9b3eeSRoland Mainz #define REG_REGEXP 0x40000000 /* <regexp.h> compatibility */ 7834f9b3eeSRoland Mainz 79da2e3ebdSchin /* regexec flags */ 80da2e3ebdSchin 81da2e3ebdSchin #define REG_NOTBOL 0x00000040 /* ^ is not a special char */ 82da2e3ebdSchin #define REG_NOTEOL 0x00000080 /* $ is not a special char */ 83da2e3ebdSchin 84da2e3ebdSchin /* nonstandard regexec flags */ 85da2e3ebdSchin 86da2e3ebdSchin #define REG_INVERT 0x01000000 /* invert regrexec match sense */ 87da2e3ebdSchin #define REG_STARTEND 0x02000000 /* subject==match[0].rm_{so,eo} */ 88da2e3ebdSchin #define REG_ADVANCE 0x04000000 /* advance match[0].rm_{so,eo} */ 89da2e3ebdSchin 90da2e3ebdSchin /* regalloc flags */ 91da2e3ebdSchin 92da2e3ebdSchin #define REG_NOFREE 0x00000001 /* don't free */ 93da2e3ebdSchin 94da2e3ebdSchin /* regsub flags */ 95da2e3ebdSchin 96da2e3ebdSchin #define REG_SUB_ALL 0x00000001 /* substitute all occurrences */ 97da2e3ebdSchin #define REG_SUB_LOWER 0x00000002 /* substitute to lower case */ 98da2e3ebdSchin #define REG_SUB_UPPER 0x00000004 /* substitute to upper case */ 99da2e3ebdSchin #define REG_SUB_PRINT 0x00000010 /* internal no-op */ 100da2e3ebdSchin #define REG_SUB_NUMBER 0x00000020 /* internal no-op */ 101da2e3ebdSchin #define REG_SUB_STOP 0x00000040 /* internal no-op */ 102da2e3ebdSchin #define REG_SUB_WRITE 0x00000080 /* internal no-op */ 103da2e3ebdSchin #define REG_SUB_LAST 0x00000100 /* last substitution option */ 104da2e3ebdSchin #define REG_SUB_FULL 0x00000200 /* fully delimited */ 105da2e3ebdSchin #define REG_SUB_USER 0x00001000 /* first user flag bit */ 106da2e3ebdSchin 107da2e3ebdSchin /* regex error codes */ 108da2e3ebdSchin 109da2e3ebdSchin #define REG_ENOSYS (-1) /* not supported */ 110da2e3ebdSchin #define REG_NOMATCH 1 /* regexec didn't match */ 111da2e3ebdSchin #define REG_BADPAT 2 /* invalid regular expression */ 112da2e3ebdSchin #define REG_ECOLLATE 3 /* invalid collation element */ 113da2e3ebdSchin #define REG_ECTYPE 4 /* invalid character class */ 114da2e3ebdSchin #define REG_EESCAPE 5 /* trailing \ in pattern */ 115da2e3ebdSchin #define REG_ESUBREG 6 /* invalid \digit backreference */ 116da2e3ebdSchin #define REG_EBRACK 7 /* [...] imbalance */ 117da2e3ebdSchin #define REG_EPAREN 8 /* \(...\) or (...) imbalance */ 118da2e3ebdSchin #define REG_EBRACE 9 /* \{...\} or {...} imbalance */ 119da2e3ebdSchin #define REG_BADBR 10 /* invalid {...} digits */ 120da2e3ebdSchin #define REG_ERANGE 11 /* invalid [...] range endpoint */ 121da2e3ebdSchin #define REG_ESPACE 12 /* out of space */ 1223e14f97fSRoger A. Faulkner #define REG_BADRPT 13 /* unary op not preceded by re */ 123da2e3ebdSchin #define REG_ENULL 14 /* empty subexpr in pattern */ 124da2e3ebdSchin #define REG_ECOUNT 15 /* re component count overflow */ 125da2e3ebdSchin #define REG_BADESC 16 /* invalid \char escape */ 126da2e3ebdSchin #define REG_VERSIONID 17 /* version id (pseudo error) */ 127da2e3ebdSchin #define REG_EFLAGS 18 /* flags conflict */ 128da2e3ebdSchin #define REG_EDELIM 19 /* invalid or omitted delimiter */ 129da2e3ebdSchin #define REG_PANIC 20 /* unrecoverable internal error */ 130da2e3ebdSchin 131da2e3ebdSchin struct regex_s; typedef struct regex_s regex_t; 132da2e3ebdSchin struct regdisc_s; typedef struct regdisc_s regdisc_t; 133da2e3ebdSchin 134da2e3ebdSchin typedef int (*regclass_t)(int); 1353e14f97fSRoger A. Faulkner typedef uint32_t regflags_t; 136da2e3ebdSchin typedef int (*regerror_t)(const regex_t*, regdisc_t*, int, ...); 137da2e3ebdSchin typedef void* (*regcomp_t)(const regex_t*, const char*, size_t, regdisc_t*); 138da2e3ebdSchin typedef int (*regexec_t)(const regex_t*, void*, const char*, size_t, const char*, size_t, char**, regdisc_t*); 139da2e3ebdSchin typedef void* (*regresize_t)(void*, void*, size_t); 140da2e3ebdSchin typedef int (*regrecord_t)(void*, const char*, size_t); 141da2e3ebdSchin 142*b30d1939SAndy Fiddaman #if ASTAPI(20120528) 143*b30d1939SAndy Fiddaman typedef ssize_t regoff_t; 144*b30d1939SAndy Fiddaman #else 145*b30d1939SAndy Fiddaman typedef int regoff_t; 146*b30d1939SAndy Fiddaman #endif 147*b30d1939SAndy Fiddaman 148da2e3ebdSchin typedef struct regmatch_s 149da2e3ebdSchin { 150da2e3ebdSchin regoff_t rm_so; /* offset of start */ 151da2e3ebdSchin regoff_t rm_eo; /* offset of end */ 152da2e3ebdSchin } regmatch_t; 153da2e3ebdSchin 154da2e3ebdSchin typedef struct regsub_s 155da2e3ebdSchin { 156da2e3ebdSchin regflags_t re_flags; /* regsubcomp() flags */ 157da2e3ebdSchin char* re_buf; /* regsubexec() output buffer */ 158da2e3ebdSchin size_t re_len; /* re_buf length */ 159da2e3ebdSchin int re_min; /* regsubcomp() min matches */ 160da2e3ebdSchin #ifdef _REG_SUB_PRIVATE_ 161da2e3ebdSchin _REG_SUB_PRIVATE_ 162da2e3ebdSchin #endif 163da2e3ebdSchin } regsub_t; 164da2e3ebdSchin 165da2e3ebdSchin struct regdisc_s 166da2e3ebdSchin { 167da2e3ebdSchin unsigned long re_version; /* discipline version */ 168da2e3ebdSchin regflags_t re_flags; /* discipline flags */ 169da2e3ebdSchin regerror_t re_errorf; /* error function */ 170da2e3ebdSchin int re_errorlevel; /* errorf level */ 171da2e3ebdSchin regresize_t re_resizef; /* alloc/free function */ 172da2e3ebdSchin void* re_resizehandle;/* resizef handle */ 173da2e3ebdSchin regcomp_t re_compf; /* (?{...}) compile function */ 174da2e3ebdSchin regexec_t re_execf; /* (?{...}) execute function */ 175da2e3ebdSchin unsigned char* re_map; /* external to native ccode map */ 176da2e3ebdSchin }; 177da2e3ebdSchin 178da2e3ebdSchin typedef struct regstat_s 179da2e3ebdSchin { 180*b30d1939SAndy Fiddaman regflags_t re_flags; /* REG_* */ 181da2e3ebdSchin ssize_t re_min; /* min anchored match length */ 182da2e3ebdSchin ssize_t re_max; /* max anchored match length */ 183da2e3ebdSchin ssize_t re_record; /* regrexec() match length */ 184*b30d1939SAndy Fiddaman regflags_t re_info; /* REG_* info */ 185da2e3ebdSchin } regstat_t; 186da2e3ebdSchin 187da2e3ebdSchin struct regex_s 188da2e3ebdSchin { 189da2e3ebdSchin size_t re_nsub; /* number of subexpressions */ 190da2e3ebdSchin struct reglib_s*re_info; /* library private info */ 191da2e3ebdSchin size_t re_npat; /* number of pattern chars used */ 192da2e3ebdSchin regdisc_t* re_disc; /* REG_DISCIPLINE discipline */ 193da2e3ebdSchin regsub_t* re_sub; /* regsubcomp() data */ 194da2e3ebdSchin }; 195da2e3ebdSchin 196da2e3ebdSchin #define reginit(disc) (memset(disc,0,sizeof(*(disc))),(disc)->re_version=REG_VERSION) 197da2e3ebdSchin 198da2e3ebdSchin #if _BLD_ast && defined(__EXPORT__) 199da2e3ebdSchin #define extern __EXPORT__ 200da2e3ebdSchin #endif 201da2e3ebdSchin 202da2e3ebdSchin extern int regcomp(regex_t*, const char*, regflags_t); 203da2e3ebdSchin extern size_t regerror(int, const regex_t*, char*, size_t); 204da2e3ebdSchin extern int regexec(const regex_t*, const char*, size_t, regmatch_t*, regflags_t); 205da2e3ebdSchin extern void regfree(regex_t*); 206da2e3ebdSchin 207da2e3ebdSchin /* nonstandard hooks */ 208da2e3ebdSchin 209da2e3ebdSchin #define _REG_cache 1 /* have regcache() */ 210da2e3ebdSchin #define _REG_class 1 /* have regclass() */ 211da2e3ebdSchin #define _REG_collate 1 /* have regcollate(), regclass() */ 212da2e3ebdSchin #define _REG_comb 1 /* have regcomb() */ 213da2e3ebdSchin #define _REG_decomp 1 /* have regdecomp() */ 214da2e3ebdSchin #define _REG_dup 1 /* have regdup() */ 215da2e3ebdSchin #define _REG_fatal 1 /* have regfatal(), regfatalpat() */ 216da2e3ebdSchin #define _REG_ncomp 1 /* have regncomp() */ 217da2e3ebdSchin #define _REG_nexec 1 /* have regnexec() */ 218da2e3ebdSchin #define _REG_rexec 1 /* have regrexec(), regrecord() */ 219da2e3ebdSchin #define _REG_stat 1 /* have regstat() */ 220da2e3ebdSchin #define _REG_subcomp 1 /* have regsubcomp(), regsubexec() */ 221da2e3ebdSchin 222da2e3ebdSchin extern regclass_t regclass(const char*, char**); 223da2e3ebdSchin extern int regaddclass(const char*, regclass_t); 224*b30d1939SAndy Fiddaman extern int regcollate(const char*, char**, char*, size_t, wchar_t*); 225da2e3ebdSchin extern int regcomb(regex_t*, regex_t*); 226da2e3ebdSchin extern size_t regdecomp(regex_t*, regflags_t, char*, size_t); 227da2e3ebdSchin extern int regdup(regex_t*, regex_t*); 228da2e3ebdSchin extern int regncomp(regex_t*, const char*, size_t, regflags_t); 229da2e3ebdSchin extern int regnexec(const regex_t*, const char*, size_t, size_t, regmatch_t*, regflags_t); 230da2e3ebdSchin extern void regfatal(regex_t*, int, int); 231da2e3ebdSchin extern void regfatalpat(regex_t*, int, int, const char*); 232da2e3ebdSchin extern int regrecord(const regex_t*); 233da2e3ebdSchin extern int regrexec(const regex_t*, const char*, size_t, size_t, regmatch_t*, regflags_t, int, void*, regrecord_t); 234da2e3ebdSchin extern regstat_t* regstat(const regex_t*); 235da2e3ebdSchin 236da2e3ebdSchin extern regex_t* regcache(const char*, regflags_t, int*); 237da2e3ebdSchin 238da2e3ebdSchin extern int regsubcomp(regex_t*, const char*, const regflags_t*, int, regflags_t); 239da2e3ebdSchin extern int regsubexec(const regex_t*, const char*, size_t, regmatch_t*); 240da2e3ebdSchin extern int regsubflags(regex_t*, const char*, char**, int, const regflags_t*, int*, regflags_t*); 241da2e3ebdSchin extern void regsubfree(regex_t*); 242da2e3ebdSchin 243da2e3ebdSchin /* obsolete hooks */ 244da2e3ebdSchin 245da2e3ebdSchin #ifndef _SFIO_H 246da2e3ebdSchin struct _sfio_s; 247da2e3ebdSchin #endif 248da2e3ebdSchin 249da2e3ebdSchin extern void regalloc(void*, regresize_t, regflags_t); 250da2e3ebdSchin extern int regsub(const regex_t*, struct _sfio_s*, const char*, const char*, size_t, regmatch_t*, regflags_t); 251da2e3ebdSchin 252da2e3ebdSchin #undef extern 253da2e3ebdSchin 254da2e3ebdSchin #endif 255