xref: /illumos-gate/usr/src/cmd/expr/compile.c (revision bbf21555)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate  * with the License.
87c478bd9Sstevel@tonic-gate  *
97c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate  * and limitations under the License.
137c478bd9Sstevel@tonic-gate  *
147c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate  *
207c478bd9Sstevel@tonic-gate  * CDDL HEADER END
217c478bd9Sstevel@tonic-gate  */
227c478bd9Sstevel@tonic-gate /*
237c478bd9Sstevel@tonic-gate  * Copyright 1995-2003 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
2548bbca81SDaniel Hoffman  * Copyright (c) 2016 by Delphix. All rights reserved.
267c478bd9Sstevel@tonic-gate  */
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate /*
297c478bd9Sstevel@tonic-gate  * xcompile, xstep, xadvance - simulate compile(3g), step(3g), advance(3g)
307c478bd9Sstevel@tonic-gate  *	using regcomp(3c), regexec(3c) interfaces. This is an XCU4
317c478bd9Sstevel@tonic-gate  *	porting aid. switches out to libgen compile/step if collation
327c478bd9Sstevel@tonic-gate  *	table not present.
337c478bd9Sstevel@tonic-gate  *
347c478bd9Sstevel@tonic-gate  *	Goal is to work with vi and sed/ed.
35023a3eeeSToomas Soome  *	Returns expbuf in dhl format (encoding of first two bytes).
36023a3eeeSToomas Soome  *	Note also that this is profoundly single threaded.  You
377c478bd9Sstevel@tonic-gate  *	cannot call compile twice with two separate search strings
387c478bd9Sstevel@tonic-gate  *	because the second call will wipe out the earlier stored string.
397c478bd9Sstevel@tonic-gate  *	This must be fixed, plus a general cleanup should be performed
407c478bd9Sstevel@tonic-gate  *	if this is to be integrated into libc.
417c478bd9Sstevel@tonic-gate  *
427c478bd9Sstevel@tonic-gate  */
437c478bd9Sstevel@tonic-gate 
447c478bd9Sstevel@tonic-gate #include <stdio.h>
457c478bd9Sstevel@tonic-gate #include <widec.h>
467c478bd9Sstevel@tonic-gate #include <sys/types.h>
477c478bd9Sstevel@tonic-gate #include <regex.h>
487c478bd9Sstevel@tonic-gate #include <locale.h>
497c478bd9Sstevel@tonic-gate #include <stdlib.h>
507c478bd9Sstevel@tonic-gate #include <locale.h>
517c478bd9Sstevel@tonic-gate #include <string.h>
527c478bd9Sstevel@tonic-gate #include <unistd.h>
537c478bd9Sstevel@tonic-gate #include <regexpr.h>
547c478bd9Sstevel@tonic-gate 
557c478bd9Sstevel@tonic-gate /*
567c478bd9Sstevel@tonic-gate  * psuedo compile/step/advance global variables
577c478bd9Sstevel@tonic-gate  */
587c478bd9Sstevel@tonic-gate extern int nbra;
59023a3eeeSToomas Soome extern char *locs;		/* for stopping execess recursion */
60023a3eeeSToomas Soome extern char *loc1;		/* 1st character which matched RE */
61023a3eeeSToomas Soome extern char *loc2;		/* char after lst char in matched RE */
62023a3eeeSToomas Soome extern char *braslist[];	/* start of nbra subexp  */
63023a3eeeSToomas Soome extern char *braelist[];	/* end of nbra subexp    */
647c478bd9Sstevel@tonic-gate extern int regerrno;
657c478bd9Sstevel@tonic-gate extern int reglength;
667c478bd9Sstevel@tonic-gate 
677c478bd9Sstevel@tonic-gate int regcomp_flags;		/* interface to specify cflags for regcomp */
687c478bd9Sstevel@tonic-gate 
697c478bd9Sstevel@tonic-gate void regex_comp_free(void *a);
707c478bd9Sstevel@tonic-gate static int dhl_step(const char *str, const char *ep);
717c478bd9Sstevel@tonic-gate static int dhl_advance(const char *str, const char *ep);
727c478bd9Sstevel@tonic-gate static int map_errnos(int);		/* Convert regcomp error */
737c478bd9Sstevel@tonic-gate static int dhl_doit(const char *, const regex_t *, const int flags);
74023a3eeeSToomas Soome static char *dhl_compile(const char *instr, char *ep, char *endbuf);
757c478bd9Sstevel@tonic-gate 
767c478bd9Sstevel@tonic-gate /*
777c478bd9Sstevel@tonic-gate  * # of sub re's: NOTE: For now limit on bra list defined here
787c478bd9Sstevel@tonic-gate  * but fix is to add maxbra define to to regex.h
797c478bd9Sstevel@tonic-gate  * One problem is that a bigger number is a performance hit since
807c478bd9Sstevel@tonic-gate  * regexec() has a slow initialization loop that goes around SEPSIZE times
817c478bd9Sstevel@tonic-gate  */
827c478bd9Sstevel@tonic-gate #define	SEPSIZE 20
837c478bd9Sstevel@tonic-gate static regmatch_t rm[SEPSIZE];		/* ptr to list of RE matches */
847c478bd9Sstevel@tonic-gate 
857c478bd9Sstevel@tonic-gate /*
867c478bd9Sstevel@tonic-gate  * Structure to contain dl encoded first two bytes for vi, plus hold two
877c478bd9Sstevel@tonic-gate  * regex structures, one for advance and one for step.
887c478bd9Sstevel@tonic-gate  */
897c478bd9Sstevel@tonic-gate static struct regex_comp {
90023a3eeeSToomas Soome 	char	r_head[2];		/* Header for DL encoding for vi */
917c478bd9Sstevel@tonic-gate 	regex_t r_stp;			/* For use by step */
927c478bd9Sstevel@tonic-gate 	regex_t r_adv;			/* For use by advance */
937c478bd9Sstevel@tonic-gate } reg_comp;
947c478bd9Sstevel@tonic-gate 
957c478bd9Sstevel@tonic-gate /*
967c478bd9Sstevel@tonic-gate  * global value for the size of a regex_comp structure:
977c478bd9Sstevel@tonic-gate  */
987c478bd9Sstevel@tonic-gate size_t regexc_size = sizeof (reg_comp);
997c478bd9Sstevel@tonic-gate 
1007c478bd9Sstevel@tonic-gate 
1017c478bd9Sstevel@tonic-gate char *
compile(const char * instr,char * expbuf,char * endbuf)1027c478bd9Sstevel@tonic-gate compile(const char *instr, char *expbuf, char *endbuf)
1037c478bd9Sstevel@tonic-gate {
1047c478bd9Sstevel@tonic-gate 	return (dhl_compile(instr, expbuf, endbuf));
1057c478bd9Sstevel@tonic-gate }
1067c478bd9Sstevel@tonic-gate 
1077c478bd9Sstevel@tonic-gate int
step(const char * instr,const char * expbuf)1087c478bd9Sstevel@tonic-gate step(const char *instr, const char *expbuf)
1097c478bd9Sstevel@tonic-gate {
1107c478bd9Sstevel@tonic-gate 	return (dhl_step(instr, expbuf));
1117c478bd9Sstevel@tonic-gate }
1127c478bd9Sstevel@tonic-gate 
1137c478bd9Sstevel@tonic-gate int
advance(const char * instr,const char * expbuf)1147c478bd9Sstevel@tonic-gate advance(const char *instr, const char *expbuf)
1157c478bd9Sstevel@tonic-gate {
1167c478bd9Sstevel@tonic-gate 	return (dhl_advance(instr, expbuf));
1177c478bd9Sstevel@tonic-gate }
1187c478bd9Sstevel@tonic-gate 
1197c478bd9Sstevel@tonic-gate 
1207c478bd9Sstevel@tonic-gate /*
1217c478bd9Sstevel@tonic-gate  * the compile and step routines here simulate the old libgen routines of
122*bbf21555SRichard Lowe  * compile/step Re: regexpr(3GEN). in order to do this, we must assume
1237c478bd9Sstevel@tonic-gate  * that expbuf[] consists of the following format:
1247c478bd9Sstevel@tonic-gate  *	1) the first two bytes consist of a special encoding - see below.
1257c478bd9Sstevel@tonic-gate  *	2) the next part is a regex_t used by regexec()/regcomp() for step
1267c478bd9Sstevel@tonic-gate  *	3) the final part is a regex_t used by regexec()/regcomp() for advance
1277c478bd9Sstevel@tonic-gate  *
1287c478bd9Sstevel@tonic-gate  * the special encoding of the first two bytes is referenced throughout
1297c478bd9Sstevel@tonic-gate  * vi. apparently expbuf[0] is set to:
1307c478bd9Sstevel@tonic-gate  *	= 0 upon initialization
1317c478bd9Sstevel@tonic-gate  *	= 1 if the first char of the RE is a ^
1327c478bd9Sstevel@tonic-gate  *	= 0 if the first char of the RE isn't a ^
1337c478bd9Sstevel@tonic-gate  * and expbuf[1-35+]	= bitmap of the type of RE chars in the expression.
1347c478bd9Sstevel@tonic-gate  * this is apparently 0 if there's no RE.
1357c478bd9Sstevel@tonic-gate  * Here, we use expbuf[0] in a similar fashion; and expbuf[1] is non-zero
1367c478bd9Sstevel@tonic-gate  * if there's at least 1 RE in the string.
1377c478bd9Sstevel@tonic-gate  * I say "apparently" as the code to compile()/step() is poorly written.
1387c478bd9Sstevel@tonic-gate  */
1397c478bd9Sstevel@tonic-gate static char *
dhl_compile(const char * instr,char * expbuf,char * endbuf)140023a3eeeSToomas Soome dhl_compile(const char *instr,	/* the regular expression		*/
141023a3eeeSToomas Soome     char *expbuf,		/* where the compiled RE gets placed	*/
142023a3eeeSToomas Soome     char *endbuf)		/* ending addr of expbuf		*/
1437c478bd9Sstevel@tonic-gate {
1447c478bd9Sstevel@tonic-gate 	int rv;
1457c478bd9Sstevel@tonic-gate 	int alloc = 0;
1467c478bd9Sstevel@tonic-gate 	char adv_instr[4096];	/* PLENTY big temp buffer */
1477c478bd9Sstevel@tonic-gate 	char *instrp;		/* PLENTY big temp buffer */
1487c478bd9Sstevel@tonic-gate 
149023a3eeeSToomas Soome 	if (*instr == '\0') {
1507c478bd9Sstevel@tonic-gate 		regerrno = 41;
1517c478bd9Sstevel@tonic-gate 		return (NULL);
1527c478bd9Sstevel@tonic-gate 	}
1537c478bd9Sstevel@tonic-gate 
1547c478bd9Sstevel@tonic-gate 	/*
1557c478bd9Sstevel@tonic-gate 	 * Check values of expbuf and endbuf
1567c478bd9Sstevel@tonic-gate 	 */
1577c478bd9Sstevel@tonic-gate 	if (expbuf == NULL) {
1587c478bd9Sstevel@tonic-gate 		if ((expbuf = malloc(regexc_size)) == NULL) {
1597c478bd9Sstevel@tonic-gate 			regerrno = 50;
1607c478bd9Sstevel@tonic-gate 			return (NULL);
1617c478bd9Sstevel@tonic-gate 		}
1627c478bd9Sstevel@tonic-gate 		memset(&reg_comp, 0, regexc_size);
1637c478bd9Sstevel@tonic-gate 		alloc = 1;
1647c478bd9Sstevel@tonic-gate 		endbuf = expbuf + regexc_size;
1657c478bd9Sstevel@tonic-gate 	} else {		/* Check if enough memory was allocated */
1667c478bd9Sstevel@tonic-gate 		if (expbuf + regexc_size > endbuf) {
1677c478bd9Sstevel@tonic-gate 			regerrno = 50;
1687c478bd9Sstevel@tonic-gate 			return (NULL);
1697c478bd9Sstevel@tonic-gate 		}
1707c478bd9Sstevel@tonic-gate 		memcpy(&reg_comp, expbuf, regexc_size);
1717c478bd9Sstevel@tonic-gate 	}
1727c478bd9Sstevel@tonic-gate 
1737c478bd9Sstevel@tonic-gate 	/*
1747c478bd9Sstevel@tonic-gate 	 * Clear global flags
1757c478bd9Sstevel@tonic-gate 	 */
1767c478bd9Sstevel@tonic-gate 	nbra = 0;
1777c478bd9Sstevel@tonic-gate 	regerrno = 0;
1787c478bd9Sstevel@tonic-gate 
1797c478bd9Sstevel@tonic-gate 	/*
1807c478bd9Sstevel@tonic-gate 	 * Free any data being held for previous search strings
1817c478bd9Sstevel@tonic-gate 	 */
1827c478bd9Sstevel@tonic-gate 	regex_comp_free(&reg_comp);
1837c478bd9Sstevel@tonic-gate 
1847c478bd9Sstevel@tonic-gate 	/*
1857c478bd9Sstevel@tonic-gate 	 * We call regcomp twice, once to get a regex_t for use by step()
1867c478bd9Sstevel@tonic-gate 	 * and then again with for use by advance()
1877c478bd9Sstevel@tonic-gate 	 */
1887c478bd9Sstevel@tonic-gate 	if ((rv = regcomp(&reg_comp.r_stp, instr, regcomp_flags)) != 0) {
1897c478bd9Sstevel@tonic-gate 		regerrno = map_errnos(rv);	/* Convert regcomp error */
1907c478bd9Sstevel@tonic-gate 		goto out;
1917c478bd9Sstevel@tonic-gate 	}
1927c478bd9Sstevel@tonic-gate 	/*
1937c478bd9Sstevel@tonic-gate 	 * To support advance, which assumes an implicit ^ to match at start
1947c478bd9Sstevel@tonic-gate 	 * of line we prepend a ^ to the pattern by copying to a temp buffer
1957c478bd9Sstevel@tonic-gate 	 */
1967c478bd9Sstevel@tonic-gate 
1977c478bd9Sstevel@tonic-gate 	if (instr[0] == '^')
198023a3eeeSToomas Soome 		instrp = (char *)instr; /* String already has leading ^ */
1997c478bd9Sstevel@tonic-gate 	else {
2007c478bd9Sstevel@tonic-gate 		adv_instr[0] = '^';
2017c478bd9Sstevel@tonic-gate 		strncpy(&adv_instr[1], instr, 2048);
2027c478bd9Sstevel@tonic-gate 		instrp = adv_instr;
2037c478bd9Sstevel@tonic-gate 	}
2047c478bd9Sstevel@tonic-gate 
2057c478bd9Sstevel@tonic-gate 	if ((rv = regcomp(&reg_comp.r_adv, instrp, regcomp_flags)) != 0) {
2067c478bd9Sstevel@tonic-gate 		regerrno = map_errnos(rv);	/* Convert regcomp error */
2077c478bd9Sstevel@tonic-gate 		goto out;
2087c478bd9Sstevel@tonic-gate 	}
2097c478bd9Sstevel@tonic-gate 
2107c478bd9Sstevel@tonic-gate 	/*
2117c478bd9Sstevel@tonic-gate 	 * update global variables
2127c478bd9Sstevel@tonic-gate 	 */
213023a3eeeSToomas Soome 	nbra = (int)reg_comp.r_adv.re_nsub > 0 ?
214023a3eeeSToomas Soome 	    (int)reg_comp.r_adv.re_nsub : 0;
2157c478bd9Sstevel@tonic-gate 	regerrno = 0;
2167c478bd9Sstevel@tonic-gate 
2177c478bd9Sstevel@tonic-gate 	/*
2187c478bd9Sstevel@tonic-gate 	 * Set the header flags for use by vi
2197c478bd9Sstevel@tonic-gate 	 */
220023a3eeeSToomas Soome 	if (instr[0] == '^')		/* if beginning of string,	*/
2217c478bd9Sstevel@tonic-gate 		reg_comp.r_head[0] = 1;	/* set special flag		*/
2227c478bd9Sstevel@tonic-gate 	else
2237c478bd9Sstevel@tonic-gate 		reg_comp.r_head[0] = 0;	/* clear special flag		*/
2247c478bd9Sstevel@tonic-gate 	/*
2257c478bd9Sstevel@tonic-gate 	 * note that for a single BRE, nbra will be 0 here.
2267c478bd9Sstevel@tonic-gate 	 * we're guaranteed that, at this point, a RE has been found.
2277c478bd9Sstevel@tonic-gate 	 */
2287c478bd9Sstevel@tonic-gate 	reg_comp.r_head[1] = 1;	/* set special flag		*/
2297c478bd9Sstevel@tonic-gate 	/*
2307c478bd9Sstevel@tonic-gate 	 * Copy our reg_comp structure to expbuf
2317c478bd9Sstevel@tonic-gate 	 */
232023a3eeeSToomas Soome 	(void) memcpy(expbuf, (char *)&reg_comp, regexc_size);
2337c478bd9Sstevel@tonic-gate 
2347c478bd9Sstevel@tonic-gate out:
2357c478bd9Sstevel@tonic-gate 	/*
2367c478bd9Sstevel@tonic-gate 	 * Return code from libgen regcomp with mods.  Note weird return
2377c478bd9Sstevel@tonic-gate 	 * value - if space is malloc'd return pointer to start of space,
23848bbca81SDaniel Hoffman 	 * if user provided their own space, return pointer to 1+last byte
23948bbca81SDaniel Hoffman 	 * of that space.
2407c478bd9Sstevel@tonic-gate 	 */
2417c478bd9Sstevel@tonic-gate 	if (regerrno != 0) {
2427c478bd9Sstevel@tonic-gate 		if (alloc)
2437c478bd9Sstevel@tonic-gate 			free(expbuf);
2447c478bd9Sstevel@tonic-gate 		return (NULL);
2457c478bd9Sstevel@tonic-gate 	}
2467c478bd9Sstevel@tonic-gate 	reglength = regexc_size;
2477c478bd9Sstevel@tonic-gate 
2487c478bd9Sstevel@tonic-gate 	if (alloc)
2497c478bd9Sstevel@tonic-gate 		return (expbuf);
2507c478bd9Sstevel@tonic-gate 	else
2517c478bd9Sstevel@tonic-gate 		return (expbuf + regexc_size);
2527c478bd9Sstevel@tonic-gate }
2537c478bd9Sstevel@tonic-gate 
2547c478bd9Sstevel@tonic-gate 
2557c478bd9Sstevel@tonic-gate /*
2567c478bd9Sstevel@tonic-gate  * dhl_step: step through a string until a RE match is found, or end of str
2577c478bd9Sstevel@tonic-gate  */
2587c478bd9Sstevel@tonic-gate static int
dhl_step(const char * str,const char * ep)259023a3eeeSToomas Soome dhl_step(const char *str,	/* characters to be checked for a match	*/
260023a3eeeSToomas Soome     const char *ep)		/* compiled RE from dhl_compile()	*/
2617c478bd9Sstevel@tonic-gate {
2627c478bd9Sstevel@tonic-gate 	/*
2637c478bd9Sstevel@tonic-gate 	 * Check if we're passed a null ep
2647c478bd9Sstevel@tonic-gate 	 */
2657c478bd9Sstevel@tonic-gate 	if (ep == NULL) {
2667c478bd9Sstevel@tonic-gate 		regerrno = 41;	/* No remembered search string error */
2677c478bd9Sstevel@tonic-gate 		return (0);
2687c478bd9Sstevel@tonic-gate 	}
2697c478bd9Sstevel@tonic-gate 	/*
2707c478bd9Sstevel@tonic-gate 	 * Call common routine with r_stp (step) structure
2717c478bd9Sstevel@tonic-gate 	 */
272023a3eeeSToomas Soome 	return (dhl_doit(str, &(((struct regex_comp *)ep)->r_stp),
2737c478bd9Sstevel@tonic-gate 	    ((locs != NULL) ? REG_NOTBOL : 0)));
2747c478bd9Sstevel@tonic-gate }
2757c478bd9Sstevel@tonic-gate 
2767c478bd9Sstevel@tonic-gate /*
2777c478bd9Sstevel@tonic-gate  * dhl_advance: implement advance
2787c478bd9Sstevel@tonic-gate  */
2797c478bd9Sstevel@tonic-gate static int
dhl_advance(const char * str,const char * ep)280023a3eeeSToomas Soome dhl_advance(const char *str,	/* characters to be checked for a match	*/
281023a3eeeSToomas Soome     const char *ep)		/* compiled RE from dhl_compile()	*/
2827c478bd9Sstevel@tonic-gate {
2837c478bd9Sstevel@tonic-gate 	int rv;
2847c478bd9Sstevel@tonic-gate 	/*
2857c478bd9Sstevel@tonic-gate 	 * Check if we're passed a null ep
2867c478bd9Sstevel@tonic-gate 	 */
2877c478bd9Sstevel@tonic-gate 	if (ep == NULL) {
2887c478bd9Sstevel@tonic-gate 		regerrno = 41;	/* No remembered search string error */
2897c478bd9Sstevel@tonic-gate 		return (0);
2907c478bd9Sstevel@tonic-gate 	}
2917c478bd9Sstevel@tonic-gate 	/*
2927c478bd9Sstevel@tonic-gate 	 * Call common routine with r_adv (advance) structure
2937c478bd9Sstevel@tonic-gate 	 */
294023a3eeeSToomas Soome 	rv = dhl_doit(str, &(((struct regex_comp *)ep)->r_adv), 0);
2957c478bd9Sstevel@tonic-gate 	loc1 = NULL;		/* Clear it per the compile man page */
2967c478bd9Sstevel@tonic-gate 	return (rv);
2977c478bd9Sstevel@tonic-gate }
2987c478bd9Sstevel@tonic-gate 
2997c478bd9Sstevel@tonic-gate /*
3007c478bd9Sstevel@tonic-gate  * dhl_doit - common code for step and advance
3017c478bd9Sstevel@tonic-gate  */
3027c478bd9Sstevel@tonic-gate static int
dhl_doit(const char * str,const regex_t * rep,const int flags)303023a3eeeSToomas Soome dhl_doit(const char *str,	/* characters to be checked for a match	*/
304023a3eeeSToomas Soome     const regex_t *rep,
305023a3eeeSToomas Soome     const int flags)		/* flags to be passed to regexec directly */
3067c478bd9Sstevel@tonic-gate {
3077c478bd9Sstevel@tonic-gate 	int rv;
3087c478bd9Sstevel@tonic-gate 	int i;
3097c478bd9Sstevel@tonic-gate 	regmatch_t *prm;	/* ptr to current regmatch_t		*/
3107c478bd9Sstevel@tonic-gate 
3117c478bd9Sstevel@tonic-gate 	/*
3127c478bd9Sstevel@tonic-gate 	 * Check if we're passed a null regex_t
3137c478bd9Sstevel@tonic-gate 	 */
3147c478bd9Sstevel@tonic-gate 	if (rep == NULL) {
3157c478bd9Sstevel@tonic-gate 		regerrno = 41;	/* No remembered search string error */
3167c478bd9Sstevel@tonic-gate 		return (0);
3177c478bd9Sstevel@tonic-gate 	}
3187c478bd9Sstevel@tonic-gate 
3197c478bd9Sstevel@tonic-gate 	regerrno = 0;
3207c478bd9Sstevel@tonic-gate 	prm = &rm[0];
3217c478bd9Sstevel@tonic-gate 
3227c478bd9Sstevel@tonic-gate 	if ((rv = regexec(rep, str, SEPSIZE, prm, flags)) != REG_OK) {
3237c478bd9Sstevel@tonic-gate 		if (rv == REG_NOMATCH)
3247c478bd9Sstevel@tonic-gate 			return (0);
3257c478bd9Sstevel@tonic-gate 		regerrno = map_errnos(rv);
3267c478bd9Sstevel@tonic-gate 		return (0);
3277c478bd9Sstevel@tonic-gate 	}
3287c478bd9Sstevel@tonic-gate 
3297c478bd9Sstevel@tonic-gate 	loc1 = (char *)str + prm->rm_so;
3307c478bd9Sstevel@tonic-gate 	loc2 = (char *)str + prm->rm_eo;
3317c478bd9Sstevel@tonic-gate 
3327c478bd9Sstevel@tonic-gate 	/*
3337c478bd9Sstevel@tonic-gate 	 * Now we need to fill up the bra lists with all of the sub re's
3347c478bd9Sstevel@tonic-gate 	 * Note we subtract nsub -1, and preincrement prm.
3357c478bd9Sstevel@tonic-gate 	 */
3367c478bd9Sstevel@tonic-gate 	for (i = 0; i <= rep->re_nsub; i++) {
3377c478bd9Sstevel@tonic-gate 		prm++;		/* XXX inc past first subexp */
3387c478bd9Sstevel@tonic-gate 		braslist[i] = (char *)str + prm->rm_so;
3397c478bd9Sstevel@tonic-gate 		braelist[i] = (char *)str + prm->rm_eo;
3407c478bd9Sstevel@tonic-gate 		if (i >= SEPSIZE) {
341023a3eeeSToomas Soome 			regerrno = 50;	/* regex overflow */
3427c478bd9Sstevel@tonic-gate 			return (0);
3437c478bd9Sstevel@tonic-gate 		}
3447c478bd9Sstevel@tonic-gate 	}
3457c478bd9Sstevel@tonic-gate 
3467c478bd9Sstevel@tonic-gate 	/*
3477c478bd9Sstevel@tonic-gate 	 * Inverse logic, a zero from regexec - success, is a 1
3487c478bd9Sstevel@tonic-gate 	 * from advance/step.
3497c478bd9Sstevel@tonic-gate 	 */
3507c478bd9Sstevel@tonic-gate 
3517c478bd9Sstevel@tonic-gate 	return (rv == 0);
3527c478bd9Sstevel@tonic-gate }
3537c478bd9Sstevel@tonic-gate 
3547c478bd9Sstevel@tonic-gate 
3557c478bd9Sstevel@tonic-gate /*
3567c478bd9Sstevel@tonic-gate  *	regerrno to compile/step error mapping:
3577c478bd9Sstevel@tonic-gate  *	This is really a big compromise.  Some errors don't map at all
3587c478bd9Sstevel@tonic-gate  *	like regcomp error 15 is generated by both compile() error types
359023a3eeeSToomas Soome  *	44 & 46.  So which one should we map to?
3607c478bd9Sstevel@tonic-gate  *	Note REG_ESUB Can't happen- 9 is no longer max num of subexpressions
3617c478bd9Sstevel@tonic-gate  *	To do your errors right use xregerr() to get the regcomp error
3627c478bd9Sstevel@tonic-gate  *	string and print that.
3637c478bd9Sstevel@tonic-gate  *
364023a3eeeSToomas Soome  * |    regcomp/regexec              |  Compile/step/advance                |
3657c478bd9Sstevel@tonic-gate  * +---------------------------------+--------------------------------------+
3667c478bd9Sstevel@tonic-gate  * 0 REG_OK	  Pattern matched	1  - Pattern matched
3677c478bd9Sstevel@tonic-gate  * 1 REG_NOMATCH  No match		0  - Pattern didn't match
3687c478bd9Sstevel@tonic-gate  * 2 REG_ECOLLATE Bad collation elmnt.	67 - Returned by compile on mbtowc err
3697c478bd9Sstevel@tonic-gate  * 3 REG_EESCAPE  trailing \ in patrn	45 - } expected after \.
3707c478bd9Sstevel@tonic-gate  * 4 REG_ENEWLINE \n before end pattrn	36 - Illegal or missing delimiter.
371023a3eeeSToomas Soome  * 5 REG_ENSUB    Over 9 \( \) pairs	43 - Too many \(
3727c478bd9Sstevel@tonic-gate  * 6 REG_ESUBREG  Bad number in \[0-9]  25 - ``\digit'' out of range.
3737c478bd9Sstevel@tonic-gate  * 7 REG_EBRACK   [ ] inbalance		49 - [ ] imbalance.
3747c478bd9Sstevel@tonic-gate  * 8 REG_EPAREN   ( ) inbalance         42 - \(~\) imbalance.
3757c478bd9Sstevel@tonic-gate  * 9 REG_EBRACE   \{ \} inbalance       45 - } expected after \.
3767c478bd9Sstevel@tonic-gate  * 10 REG_ERANGE  bad range endpoint	11 - Range endpoint too large.
3777c478bd9Sstevel@tonic-gate  * 11 REG_ESPACE  no memory for pattern 50 - Regular expression overflow.
3787c478bd9Sstevel@tonic-gate  * 12 REG_BADRPT  invalid repetition	36 - Illegal or missing delimiter.
3797c478bd9Sstevel@tonic-gate  * 13 REG_ECTYPE  invalid char-class    67 - illegal byte sequence
3807c478bd9Sstevel@tonic-gate  * 14 REG_BADPAT  syntax error		50 - Regular expression overflow.
3817c478bd9Sstevel@tonic-gate  * 15 REG_BADBR   \{ \} contents bad	46 - First number exceeds 2nd in \{~\}
3827c478bd9Sstevel@tonic-gate  * 16 REG_EFATAL  internal error	50 - Regular expression overflow.
3837c478bd9Sstevel@tonic-gate  * 17 REG_ECHAR   bad mulitbyte char	67 - illegal byte sequence
3847c478bd9Sstevel@tonic-gate  * 18 REG_STACK   stack overflow	50 - Regular expression overflow.
3857c478bd9Sstevel@tonic-gate  * 19 REG_ENOSYS  function not supported 50- Regular expression overflow.
3867c478bd9Sstevel@tonic-gate  *
3877c478bd9Sstevel@tonic-gate  *	For reference here's the compile/step errno's. We don't generate
3887c478bd9Sstevel@tonic-gate  *	41 here - it's done earlier, nor 44 since we can't tell if from 46.
3897c478bd9Sstevel@tonic-gate  *
3907c478bd9Sstevel@tonic-gate  *	11 - Range endpoint too large.
3917c478bd9Sstevel@tonic-gate  *	16 - Bad number.
3927c478bd9Sstevel@tonic-gate  *	25 - ``\digit'' out of range.
3937c478bd9Sstevel@tonic-gate  *	36 - Illegal or missing delimiter.
3947c478bd9Sstevel@tonic-gate  *	41 - No remembered search string.
3957c478bd9Sstevel@tonic-gate  *	42 - \(~\) imbalance.
3967c478bd9Sstevel@tonic-gate  *	43 - Too many \(.
3977c478bd9Sstevel@tonic-gate  *	44 - More than 2 numbers given in "\{~\}"
3987c478bd9Sstevel@tonic-gate  *	45 - } expected after \.
3997c478bd9Sstevel@tonic-gate  *	46 - First number exceeds 2nd in "\{~\}"
4007c478bd9Sstevel@tonic-gate  *	49 - [ ] imbalance.
4017c478bd9Sstevel@tonic-gate  *	50 - Regular expression overflow.
4027c478bd9Sstevel@tonic-gate  */
4037c478bd9Sstevel@tonic-gate 
4047c478bd9Sstevel@tonic-gate static int
map_errnos(int Errno)4057c478bd9Sstevel@tonic-gate map_errnos(int Errno)
4067c478bd9Sstevel@tonic-gate {
4077c478bd9Sstevel@tonic-gate 	switch (Errno) {
4087c478bd9Sstevel@tonic-gate 	case REG_ECOLLATE:
4097c478bd9Sstevel@tonic-gate 		regerrno = 67;
4107c478bd9Sstevel@tonic-gate 		break;
4117c478bd9Sstevel@tonic-gate 	case REG_EESCAPE:
4127c478bd9Sstevel@tonic-gate 		regerrno = 45;
4137c478bd9Sstevel@tonic-gate 		break;
4147c478bd9Sstevel@tonic-gate 	case REG_ENEWLINE:
4157c478bd9Sstevel@tonic-gate 		regerrno = 36;
4167c478bd9Sstevel@tonic-gate 		break;
4177c478bd9Sstevel@tonic-gate 	case REG_ENSUB:
4187c478bd9Sstevel@tonic-gate 		regerrno = 43;
4197c478bd9Sstevel@tonic-gate 		break;
4207c478bd9Sstevel@tonic-gate 	case REG_ESUBREG:
4217c478bd9Sstevel@tonic-gate 		regerrno = 25;
4227c478bd9Sstevel@tonic-gate 		break;
4237c478bd9Sstevel@tonic-gate 	case REG_EBRACK:
4247c478bd9Sstevel@tonic-gate 		regerrno = 49;
4257c478bd9Sstevel@tonic-gate 		break;
4267c478bd9Sstevel@tonic-gate 	case REG_EPAREN:
4277c478bd9Sstevel@tonic-gate 		regerrno = 42;
4287c478bd9Sstevel@tonic-gate 		break;
4297c478bd9Sstevel@tonic-gate 	case REG_EBRACE:
4307c478bd9Sstevel@tonic-gate 		regerrno = 45;
4317c478bd9Sstevel@tonic-gate 		break;
4327c478bd9Sstevel@tonic-gate 	case REG_ERANGE:
4337c478bd9Sstevel@tonic-gate 		regerrno = 11;
4347c478bd9Sstevel@tonic-gate 		break;
4357c478bd9Sstevel@tonic-gate 	case REG_ESPACE:
4367c478bd9Sstevel@tonic-gate 		regerrno = 50;
4377c478bd9Sstevel@tonic-gate 		break;
4387c478bd9Sstevel@tonic-gate 	case REG_BADRPT:
4397c478bd9Sstevel@tonic-gate 		regerrno = 36;
4407c478bd9Sstevel@tonic-gate 		break;
4417c478bd9Sstevel@tonic-gate 	case REG_ECTYPE:
4427c478bd9Sstevel@tonic-gate 		regerrno = 67;
4437c478bd9Sstevel@tonic-gate 		break;
4447c478bd9Sstevel@tonic-gate 	case REG_BADPAT:
4457c478bd9Sstevel@tonic-gate 		regerrno = 50;
4467c478bd9Sstevel@tonic-gate 		break;
4477c478bd9Sstevel@tonic-gate 	case REG_BADBR:
4487c478bd9Sstevel@tonic-gate 		regerrno = 46;
4497c478bd9Sstevel@tonic-gate 		break;
4507c478bd9Sstevel@tonic-gate 	case REG_EFATAL:
4517c478bd9Sstevel@tonic-gate 		regerrno = 50;
4527c478bd9Sstevel@tonic-gate 		break;
4537c478bd9Sstevel@tonic-gate 	case REG_ECHAR:
4547c478bd9Sstevel@tonic-gate 		regerrno = 67;
4557c478bd9Sstevel@tonic-gate 		break;
4567c478bd9Sstevel@tonic-gate 	case REG_STACK:
4577c478bd9Sstevel@tonic-gate 		regerrno = 50;
4587c478bd9Sstevel@tonic-gate 		break;
4597c478bd9Sstevel@tonic-gate 	case REG_ENOSYS:
4607c478bd9Sstevel@tonic-gate 		regerrno = 50;
4617c478bd9Sstevel@tonic-gate 		break;
4627c478bd9Sstevel@tonic-gate 	default:
4637c478bd9Sstevel@tonic-gate 		regerrno = 50;
4647c478bd9Sstevel@tonic-gate 		break;
4657c478bd9Sstevel@tonic-gate 	}
4667c478bd9Sstevel@tonic-gate 	return (regerrno);
4677c478bd9Sstevel@tonic-gate }
4687c478bd9Sstevel@tonic-gate 
4697c478bd9Sstevel@tonic-gate /*
4707c478bd9Sstevel@tonic-gate  *  This is a routine to clean up the subtle substructure of the struct
4717c478bd9Sstevel@tonic-gate  *  regex_comp type for use by clients of this module.  Since the struct
4727c478bd9Sstevel@tonic-gate  *  type is private, we use a generic interface, and trust the
4737c478bd9Sstevel@tonic-gate  *  application to be damn sure that this operation is valid for the
4747c478bd9Sstevel@tonic-gate  *  named memory.
4757c478bd9Sstevel@tonic-gate  */
4767c478bd9Sstevel@tonic-gate 
4777c478bd9Sstevel@tonic-gate void
regex_comp_free(void * a)478023a3eeeSToomas Soome regex_comp_free(void *a)
4797c478bd9Sstevel@tonic-gate {
4807c478bd9Sstevel@tonic-gate 	/*
4817c478bd9Sstevel@tonic-gate 	 * Free any data being held for previous search strings
4827c478bd9Sstevel@tonic-gate 	 */
4837c478bd9Sstevel@tonic-gate 
484023a3eeeSToomas Soome 	if (a == NULL) {
4857c478bd9Sstevel@tonic-gate 		return;
4867c478bd9Sstevel@tonic-gate 	}
4877c478bd9Sstevel@tonic-gate 
4887c478bd9Sstevel@tonic-gate 	regfree(&((struct regex_comp *)a)->r_stp);
4897c478bd9Sstevel@tonic-gate 	regfree(&((struct regex_comp *)a)->r_adv);
4907c478bd9Sstevel@tonic-gate }
491