1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 1995-2003 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate /* 28*7c478bd9Sstevel@tonic-gate * xcompile, xstep, xadvance - simulate compile(3g), step(3g), advance(3g) 29*7c478bd9Sstevel@tonic-gate * using regcomp(3c), regexec(3c) interfaces. This is an XCU4 30*7c478bd9Sstevel@tonic-gate * porting aid. switches out to libgen compile/step if collation 31*7c478bd9Sstevel@tonic-gate * table not present. 32*7c478bd9Sstevel@tonic-gate * 33*7c478bd9Sstevel@tonic-gate * Goal is to work with vi and sed/ed. 34*7c478bd9Sstevel@tonic-gate * Returns expbuf in dhl format (encoding of first two bytes). 35*7c478bd9Sstevel@tonic-gate * Note also that this is profoundly single threaded. You 36*7c478bd9Sstevel@tonic-gate * cannot call compile twice with two separate search strings 37*7c478bd9Sstevel@tonic-gate * because the second call will wipe out the earlier stored string. 38*7c478bd9Sstevel@tonic-gate * This must be fixed, plus a general cleanup should be performed 39*7c478bd9Sstevel@tonic-gate * if this is to be integrated into libc. 40*7c478bd9Sstevel@tonic-gate * 41*7c478bd9Sstevel@tonic-gate */ 42*7c478bd9Sstevel@tonic-gate 43*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 44*7c478bd9Sstevel@tonic-gate 45*7c478bd9Sstevel@tonic-gate #include <stdio.h> 46*7c478bd9Sstevel@tonic-gate #include <widec.h> 47*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 48*7c478bd9Sstevel@tonic-gate #include <regex.h> 49*7c478bd9Sstevel@tonic-gate #include <locale.h> 50*7c478bd9Sstevel@tonic-gate #include <stdlib.h> 51*7c478bd9Sstevel@tonic-gate #include <locale.h> 52*7c478bd9Sstevel@tonic-gate #include <string.h> 53*7c478bd9Sstevel@tonic-gate #include <unistd.h> 54*7c478bd9Sstevel@tonic-gate #include <regexpr.h> 55*7c478bd9Sstevel@tonic-gate 56*7c478bd9Sstevel@tonic-gate /* 57*7c478bd9Sstevel@tonic-gate * psuedo compile/step/advance global variables 58*7c478bd9Sstevel@tonic-gate */ 59*7c478bd9Sstevel@tonic-gate extern int nbra; 60*7c478bd9Sstevel@tonic-gate extern char *locs; /* for stopping execess recursion */ 61*7c478bd9Sstevel@tonic-gate extern char *loc1; /* 1st character which matched RE */ 62*7c478bd9Sstevel@tonic-gate extern char *loc2; /* char after lst char in matched RE */ 63*7c478bd9Sstevel@tonic-gate extern char *braslist[]; /* start of nbra subexp */ 64*7c478bd9Sstevel@tonic-gate extern char *braelist[]; /* end of nbra subexp */ 65*7c478bd9Sstevel@tonic-gate extern int regerrno; 66*7c478bd9Sstevel@tonic-gate extern int reglength; 67*7c478bd9Sstevel@tonic-gate 68*7c478bd9Sstevel@tonic-gate int regcomp_flags; /* interface to specify cflags for regcomp */ 69*7c478bd9Sstevel@tonic-gate 70*7c478bd9Sstevel@tonic-gate void regex_comp_free(void *a); 71*7c478bd9Sstevel@tonic-gate static int dhl_step(const char *str, const char *ep); 72*7c478bd9Sstevel@tonic-gate static int dhl_advance(const char *str, const char *ep); 73*7c478bd9Sstevel@tonic-gate static int map_errnos(int); /* Convert regcomp error */ 74*7c478bd9Sstevel@tonic-gate static int dhl_doit(const char *, const regex_t *, const int flags); 75*7c478bd9Sstevel@tonic-gate static char * dhl_compile(const char *instr, char *ep, char *endbuf); 76*7c478bd9Sstevel@tonic-gate 77*7c478bd9Sstevel@tonic-gate /* 78*7c478bd9Sstevel@tonic-gate * # of sub re's: NOTE: For now limit on bra list defined here 79*7c478bd9Sstevel@tonic-gate * but fix is to add maxbra define to to regex.h 80*7c478bd9Sstevel@tonic-gate * One problem is that a bigger number is a performance hit since 81*7c478bd9Sstevel@tonic-gate * regexec() has a slow initialization loop that goes around SEPSIZE times 82*7c478bd9Sstevel@tonic-gate */ 83*7c478bd9Sstevel@tonic-gate #define SEPSIZE 20 84*7c478bd9Sstevel@tonic-gate static regmatch_t rm[SEPSIZE]; /* ptr to list of RE matches */ 85*7c478bd9Sstevel@tonic-gate 86*7c478bd9Sstevel@tonic-gate /* 87*7c478bd9Sstevel@tonic-gate * Structure to contain dl encoded first two bytes for vi, plus hold two 88*7c478bd9Sstevel@tonic-gate * regex structures, one for advance and one for step. 89*7c478bd9Sstevel@tonic-gate */ 90*7c478bd9Sstevel@tonic-gate static struct regex_comp { 91*7c478bd9Sstevel@tonic-gate char r_head[2]; /* Header for DL encoding for vi */ 92*7c478bd9Sstevel@tonic-gate regex_t r_stp; /* For use by step */ 93*7c478bd9Sstevel@tonic-gate regex_t r_adv; /* For use by advance */ 94*7c478bd9Sstevel@tonic-gate } reg_comp; 95*7c478bd9Sstevel@tonic-gate 96*7c478bd9Sstevel@tonic-gate /* 97*7c478bd9Sstevel@tonic-gate * global value for the size of a regex_comp structure: 98*7c478bd9Sstevel@tonic-gate */ 99*7c478bd9Sstevel@tonic-gate size_t regexc_size = sizeof (reg_comp); 100*7c478bd9Sstevel@tonic-gate 101*7c478bd9Sstevel@tonic-gate 102*7c478bd9Sstevel@tonic-gate char * 103*7c478bd9Sstevel@tonic-gate compile(const char *instr, char *expbuf, char *endbuf) 104*7c478bd9Sstevel@tonic-gate { 105*7c478bd9Sstevel@tonic-gate return (dhl_compile(instr, expbuf, endbuf)); 106*7c478bd9Sstevel@tonic-gate } 107*7c478bd9Sstevel@tonic-gate 108*7c478bd9Sstevel@tonic-gate int 109*7c478bd9Sstevel@tonic-gate step(const char *instr, const char *expbuf) 110*7c478bd9Sstevel@tonic-gate { 111*7c478bd9Sstevel@tonic-gate return (dhl_step(instr, expbuf)); 112*7c478bd9Sstevel@tonic-gate } 113*7c478bd9Sstevel@tonic-gate 114*7c478bd9Sstevel@tonic-gate int 115*7c478bd9Sstevel@tonic-gate advance(const char *instr, const char *expbuf) 116*7c478bd9Sstevel@tonic-gate { 117*7c478bd9Sstevel@tonic-gate return (dhl_advance(instr, expbuf)); 118*7c478bd9Sstevel@tonic-gate } 119*7c478bd9Sstevel@tonic-gate 120*7c478bd9Sstevel@tonic-gate 121*7c478bd9Sstevel@tonic-gate /* 122*7c478bd9Sstevel@tonic-gate * the compile and step routines here simulate the old libgen routines of 123*7c478bd9Sstevel@tonic-gate * compile/step Re: regexpr(3G). in order to do this, we must assume 124*7c478bd9Sstevel@tonic-gate * that expbuf[] consists of the following format: 125*7c478bd9Sstevel@tonic-gate * 1) the first two bytes consist of a special encoding - see below. 126*7c478bd9Sstevel@tonic-gate * 2) the next part is a regex_t used by regexec()/regcomp() for step 127*7c478bd9Sstevel@tonic-gate * 3) the final part is a regex_t used by regexec()/regcomp() for advance 128*7c478bd9Sstevel@tonic-gate * 129*7c478bd9Sstevel@tonic-gate * the special encoding of the first two bytes is referenced throughout 130*7c478bd9Sstevel@tonic-gate * vi. apparently expbuf[0] is set to: 131*7c478bd9Sstevel@tonic-gate * = 0 upon initialization 132*7c478bd9Sstevel@tonic-gate * = 1 if the first char of the RE is a ^ 133*7c478bd9Sstevel@tonic-gate * = 0 if the first char of the RE isn't a ^ 134*7c478bd9Sstevel@tonic-gate * and expbuf[1-35+] = bitmap of the type of RE chars in the expression. 135*7c478bd9Sstevel@tonic-gate * this is apparently 0 if there's no RE. 136*7c478bd9Sstevel@tonic-gate * Here, we use expbuf[0] in a similar fashion; and expbuf[1] is non-zero 137*7c478bd9Sstevel@tonic-gate * if there's at least 1 RE in the string. 138*7c478bd9Sstevel@tonic-gate * I say "apparently" as the code to compile()/step() is poorly written. 139*7c478bd9Sstevel@tonic-gate */ 140*7c478bd9Sstevel@tonic-gate static char * 141*7c478bd9Sstevel@tonic-gate dhl_compile(instr, expbuf, endbuf) 142*7c478bd9Sstevel@tonic-gate const char *instr; /* the regular expression */ 143*7c478bd9Sstevel@tonic-gate char *expbuf; /* where the compiled RE gets placed */ 144*7c478bd9Sstevel@tonic-gate char *endbuf; /* ending addr of expbuf */ 145*7c478bd9Sstevel@tonic-gate { 146*7c478bd9Sstevel@tonic-gate int rv; 147*7c478bd9Sstevel@tonic-gate int alloc = 0; 148*7c478bd9Sstevel@tonic-gate char adv_instr[4096]; /* PLENTY big temp buffer */ 149*7c478bd9Sstevel@tonic-gate char *instrp; /* PLENTY big temp buffer */ 150*7c478bd9Sstevel@tonic-gate 151*7c478bd9Sstevel@tonic-gate if (*instr == (char) NULL) { 152*7c478bd9Sstevel@tonic-gate regerrno = 41; 153*7c478bd9Sstevel@tonic-gate return (NULL); 154*7c478bd9Sstevel@tonic-gate } 155*7c478bd9Sstevel@tonic-gate 156*7c478bd9Sstevel@tonic-gate /* 157*7c478bd9Sstevel@tonic-gate * Check values of expbuf and endbuf 158*7c478bd9Sstevel@tonic-gate */ 159*7c478bd9Sstevel@tonic-gate if (expbuf == NULL) { 160*7c478bd9Sstevel@tonic-gate if ((expbuf = malloc(regexc_size)) == NULL) { 161*7c478bd9Sstevel@tonic-gate regerrno = 50; 162*7c478bd9Sstevel@tonic-gate return (NULL); 163*7c478bd9Sstevel@tonic-gate } 164*7c478bd9Sstevel@tonic-gate memset(®_comp, 0, regexc_size); 165*7c478bd9Sstevel@tonic-gate alloc = 1; 166*7c478bd9Sstevel@tonic-gate endbuf = expbuf + regexc_size; 167*7c478bd9Sstevel@tonic-gate } else { /* Check if enough memory was allocated */ 168*7c478bd9Sstevel@tonic-gate if (expbuf + regexc_size > endbuf) { 169*7c478bd9Sstevel@tonic-gate regerrno = 50; 170*7c478bd9Sstevel@tonic-gate return (NULL); 171*7c478bd9Sstevel@tonic-gate } 172*7c478bd9Sstevel@tonic-gate memcpy(®_comp, expbuf, regexc_size); 173*7c478bd9Sstevel@tonic-gate } 174*7c478bd9Sstevel@tonic-gate 175*7c478bd9Sstevel@tonic-gate /* 176*7c478bd9Sstevel@tonic-gate * Clear global flags 177*7c478bd9Sstevel@tonic-gate */ 178*7c478bd9Sstevel@tonic-gate nbra = 0; 179*7c478bd9Sstevel@tonic-gate regerrno = 0; 180*7c478bd9Sstevel@tonic-gate 181*7c478bd9Sstevel@tonic-gate /* 182*7c478bd9Sstevel@tonic-gate * Free any data being held for previous search strings 183*7c478bd9Sstevel@tonic-gate */ 184*7c478bd9Sstevel@tonic-gate regex_comp_free(®_comp); 185*7c478bd9Sstevel@tonic-gate 186*7c478bd9Sstevel@tonic-gate /* 187*7c478bd9Sstevel@tonic-gate * We call regcomp twice, once to get a regex_t for use by step() 188*7c478bd9Sstevel@tonic-gate * and then again with for use by advance() 189*7c478bd9Sstevel@tonic-gate */ 190*7c478bd9Sstevel@tonic-gate if ((rv = regcomp(®_comp.r_stp, instr, regcomp_flags)) != 0) { 191*7c478bd9Sstevel@tonic-gate regerrno = map_errnos(rv); /* Convert regcomp error */ 192*7c478bd9Sstevel@tonic-gate goto out; 193*7c478bd9Sstevel@tonic-gate } 194*7c478bd9Sstevel@tonic-gate /* 195*7c478bd9Sstevel@tonic-gate * To support advance, which assumes an implicit ^ to match at start 196*7c478bd9Sstevel@tonic-gate * of line we prepend a ^ to the pattern by copying to a temp buffer 197*7c478bd9Sstevel@tonic-gate */ 198*7c478bd9Sstevel@tonic-gate 199*7c478bd9Sstevel@tonic-gate if (instr[0] == '^') 200*7c478bd9Sstevel@tonic-gate instrp = (char *) instr; /* String already has leading ^ */ 201*7c478bd9Sstevel@tonic-gate else { 202*7c478bd9Sstevel@tonic-gate adv_instr[0] = '^'; 203*7c478bd9Sstevel@tonic-gate strncpy(&adv_instr[1], instr, 2048); 204*7c478bd9Sstevel@tonic-gate instrp = adv_instr; 205*7c478bd9Sstevel@tonic-gate } 206*7c478bd9Sstevel@tonic-gate 207*7c478bd9Sstevel@tonic-gate if ((rv = regcomp(®_comp.r_adv, instrp, regcomp_flags)) != 0) { 208*7c478bd9Sstevel@tonic-gate regerrno = map_errnos(rv); /* Convert regcomp error */ 209*7c478bd9Sstevel@tonic-gate goto out; 210*7c478bd9Sstevel@tonic-gate } 211*7c478bd9Sstevel@tonic-gate 212*7c478bd9Sstevel@tonic-gate /* 213*7c478bd9Sstevel@tonic-gate * update global variables 214*7c478bd9Sstevel@tonic-gate */ 215*7c478bd9Sstevel@tonic-gate nbra = (int) reg_comp.r_adv.re_nsub > 0 ? 216*7c478bd9Sstevel@tonic-gate (int) reg_comp.r_adv.re_nsub : 0; 217*7c478bd9Sstevel@tonic-gate regerrno = 0; 218*7c478bd9Sstevel@tonic-gate 219*7c478bd9Sstevel@tonic-gate /* 220*7c478bd9Sstevel@tonic-gate * Set the header flags for use by vi 221*7c478bd9Sstevel@tonic-gate */ 222*7c478bd9Sstevel@tonic-gate if (instr[0] == '^') /* if beginning of string, */ 223*7c478bd9Sstevel@tonic-gate reg_comp.r_head[0] = 1; /* set special flag */ 224*7c478bd9Sstevel@tonic-gate else 225*7c478bd9Sstevel@tonic-gate reg_comp.r_head[0] = 0; /* clear special flag */ 226*7c478bd9Sstevel@tonic-gate /* 227*7c478bd9Sstevel@tonic-gate * note that for a single BRE, nbra will be 0 here. 228*7c478bd9Sstevel@tonic-gate * we're guaranteed that, at this point, a RE has been found. 229*7c478bd9Sstevel@tonic-gate */ 230*7c478bd9Sstevel@tonic-gate reg_comp.r_head[1] = 1; /* set special flag */ 231*7c478bd9Sstevel@tonic-gate /* 232*7c478bd9Sstevel@tonic-gate * Copy our reg_comp structure to expbuf 233*7c478bd9Sstevel@tonic-gate */ 234*7c478bd9Sstevel@tonic-gate (void) memcpy(expbuf, (char *) ®_comp, regexc_size); 235*7c478bd9Sstevel@tonic-gate 236*7c478bd9Sstevel@tonic-gate out: 237*7c478bd9Sstevel@tonic-gate /* 238*7c478bd9Sstevel@tonic-gate * Return code from libgen regcomp with mods. Note weird return 239*7c478bd9Sstevel@tonic-gate * value - if space is malloc'd return pointer to start of space, 240*7c478bd9Sstevel@tonic-gate * if user provided his own space, return pointer to 1+last byte 241*7c478bd9Sstevel@tonic-gate * of his space. 242*7c478bd9Sstevel@tonic-gate */ 243*7c478bd9Sstevel@tonic-gate if (regerrno != 0) { 244*7c478bd9Sstevel@tonic-gate if (alloc) 245*7c478bd9Sstevel@tonic-gate free(expbuf); 246*7c478bd9Sstevel@tonic-gate return (NULL); 247*7c478bd9Sstevel@tonic-gate } 248*7c478bd9Sstevel@tonic-gate reglength = regexc_size; 249*7c478bd9Sstevel@tonic-gate 250*7c478bd9Sstevel@tonic-gate if (alloc) 251*7c478bd9Sstevel@tonic-gate return (expbuf); 252*7c478bd9Sstevel@tonic-gate else 253*7c478bd9Sstevel@tonic-gate return (expbuf + regexc_size); 254*7c478bd9Sstevel@tonic-gate } 255*7c478bd9Sstevel@tonic-gate 256*7c478bd9Sstevel@tonic-gate 257*7c478bd9Sstevel@tonic-gate /* 258*7c478bd9Sstevel@tonic-gate * dhl_step: step through a string until a RE match is found, or end of str 259*7c478bd9Sstevel@tonic-gate */ 260*7c478bd9Sstevel@tonic-gate static int 261*7c478bd9Sstevel@tonic-gate dhl_step(str, ep) 262*7c478bd9Sstevel@tonic-gate const char *str; /* characters to be checked for a match */ 263*7c478bd9Sstevel@tonic-gate const char *ep; /* compiled RE from dhl_compile() */ 264*7c478bd9Sstevel@tonic-gate { 265*7c478bd9Sstevel@tonic-gate /* 266*7c478bd9Sstevel@tonic-gate * Check if we're passed a null ep 267*7c478bd9Sstevel@tonic-gate */ 268*7c478bd9Sstevel@tonic-gate if (ep == NULL) { 269*7c478bd9Sstevel@tonic-gate regerrno = 41; /* No remembered search string error */ 270*7c478bd9Sstevel@tonic-gate return (0); 271*7c478bd9Sstevel@tonic-gate } 272*7c478bd9Sstevel@tonic-gate /* 273*7c478bd9Sstevel@tonic-gate * Call common routine with r_stp (step) structure 274*7c478bd9Sstevel@tonic-gate */ 275*7c478bd9Sstevel@tonic-gate return (dhl_doit(str, &(((struct regex_comp *) ep)->r_stp), 276*7c478bd9Sstevel@tonic-gate ((locs != NULL) ? REG_NOTBOL : 0))); 277*7c478bd9Sstevel@tonic-gate } 278*7c478bd9Sstevel@tonic-gate 279*7c478bd9Sstevel@tonic-gate /* 280*7c478bd9Sstevel@tonic-gate * dhl_advance: implement advance 281*7c478bd9Sstevel@tonic-gate */ 282*7c478bd9Sstevel@tonic-gate static int 283*7c478bd9Sstevel@tonic-gate dhl_advance(str, ep) 284*7c478bd9Sstevel@tonic-gate const char *str; /* characters to be checked for a match */ 285*7c478bd9Sstevel@tonic-gate const char *ep; /* compiled RE from dhl_compile() */ 286*7c478bd9Sstevel@tonic-gate { 287*7c478bd9Sstevel@tonic-gate int rv; 288*7c478bd9Sstevel@tonic-gate /* 289*7c478bd9Sstevel@tonic-gate * Check if we're passed a null ep 290*7c478bd9Sstevel@tonic-gate */ 291*7c478bd9Sstevel@tonic-gate if (ep == NULL) { 292*7c478bd9Sstevel@tonic-gate regerrno = 41; /* No remembered search string error */ 293*7c478bd9Sstevel@tonic-gate return (0); 294*7c478bd9Sstevel@tonic-gate } 295*7c478bd9Sstevel@tonic-gate /* 296*7c478bd9Sstevel@tonic-gate * Call common routine with r_adv (advance) structure 297*7c478bd9Sstevel@tonic-gate */ 298*7c478bd9Sstevel@tonic-gate rv = dhl_doit(str, &(((struct regex_comp *) ep)->r_adv), 0); 299*7c478bd9Sstevel@tonic-gate loc1 = NULL; /* Clear it per the compile man page */ 300*7c478bd9Sstevel@tonic-gate return (rv); 301*7c478bd9Sstevel@tonic-gate } 302*7c478bd9Sstevel@tonic-gate 303*7c478bd9Sstevel@tonic-gate /* 304*7c478bd9Sstevel@tonic-gate * dhl_doit - common code for step and advance 305*7c478bd9Sstevel@tonic-gate */ 306*7c478bd9Sstevel@tonic-gate static int 307*7c478bd9Sstevel@tonic-gate dhl_doit(str, rep, flags) 308*7c478bd9Sstevel@tonic-gate const char *str; /* characters to be checked for a match */ 309*7c478bd9Sstevel@tonic-gate const regex_t *rep; 310*7c478bd9Sstevel@tonic-gate const int flags; /* flags to be passed to regexec directly */ 311*7c478bd9Sstevel@tonic-gate { 312*7c478bd9Sstevel@tonic-gate int rv; 313*7c478bd9Sstevel@tonic-gate int i; 314*7c478bd9Sstevel@tonic-gate regmatch_t *prm; /* ptr to current regmatch_t */ 315*7c478bd9Sstevel@tonic-gate 316*7c478bd9Sstevel@tonic-gate /* 317*7c478bd9Sstevel@tonic-gate * Check if we're passed a null regex_t 318*7c478bd9Sstevel@tonic-gate */ 319*7c478bd9Sstevel@tonic-gate if (rep == NULL) { 320*7c478bd9Sstevel@tonic-gate regerrno = 41; /* No remembered search string error */ 321*7c478bd9Sstevel@tonic-gate return (0); 322*7c478bd9Sstevel@tonic-gate } 323*7c478bd9Sstevel@tonic-gate 324*7c478bd9Sstevel@tonic-gate regerrno = 0; 325*7c478bd9Sstevel@tonic-gate prm = &rm[0]; 326*7c478bd9Sstevel@tonic-gate 327*7c478bd9Sstevel@tonic-gate if ((rv = regexec(rep, str, SEPSIZE, prm, flags)) != REG_OK) { 328*7c478bd9Sstevel@tonic-gate if (rv == REG_NOMATCH) 329*7c478bd9Sstevel@tonic-gate return (0); 330*7c478bd9Sstevel@tonic-gate regerrno = map_errnos(rv); 331*7c478bd9Sstevel@tonic-gate return (0); 332*7c478bd9Sstevel@tonic-gate } 333*7c478bd9Sstevel@tonic-gate 334*7c478bd9Sstevel@tonic-gate loc1 = (char *)str + prm->rm_so; 335*7c478bd9Sstevel@tonic-gate loc2 = (char *)str + prm->rm_eo; 336*7c478bd9Sstevel@tonic-gate 337*7c478bd9Sstevel@tonic-gate /* 338*7c478bd9Sstevel@tonic-gate * Now we need to fill up the bra lists with all of the sub re's 339*7c478bd9Sstevel@tonic-gate * Note we subtract nsub -1, and preincrement prm. 340*7c478bd9Sstevel@tonic-gate */ 341*7c478bd9Sstevel@tonic-gate for (i = 0; i <= rep->re_nsub; i++) { 342*7c478bd9Sstevel@tonic-gate prm++; /* XXX inc past first subexp */ 343*7c478bd9Sstevel@tonic-gate braslist[i] = (char *)str + prm->rm_so; 344*7c478bd9Sstevel@tonic-gate braelist[i] = (char *)str + prm->rm_eo; 345*7c478bd9Sstevel@tonic-gate if (i >= SEPSIZE) { 346*7c478bd9Sstevel@tonic-gate regerrno = 50; /* regex overflow */ 347*7c478bd9Sstevel@tonic-gate return (0); 348*7c478bd9Sstevel@tonic-gate } 349*7c478bd9Sstevel@tonic-gate } 350*7c478bd9Sstevel@tonic-gate 351*7c478bd9Sstevel@tonic-gate /* 352*7c478bd9Sstevel@tonic-gate * Inverse logic, a zero from regexec - success, is a 1 353*7c478bd9Sstevel@tonic-gate * from advance/step. 354*7c478bd9Sstevel@tonic-gate */ 355*7c478bd9Sstevel@tonic-gate 356*7c478bd9Sstevel@tonic-gate return (rv == 0); 357*7c478bd9Sstevel@tonic-gate } 358*7c478bd9Sstevel@tonic-gate 359*7c478bd9Sstevel@tonic-gate 360*7c478bd9Sstevel@tonic-gate /* 361*7c478bd9Sstevel@tonic-gate * regerrno to compile/step error mapping: 362*7c478bd9Sstevel@tonic-gate * This is really a big compromise. Some errors don't map at all 363*7c478bd9Sstevel@tonic-gate * like regcomp error 15 is generated by both compile() error types 364*7c478bd9Sstevel@tonic-gate * 44 & 46. So which one should we map to? 365*7c478bd9Sstevel@tonic-gate * Note REG_ESUB Can't happen- 9 is no longer max num of subexpressions 366*7c478bd9Sstevel@tonic-gate * To do your errors right use xregerr() to get the regcomp error 367*7c478bd9Sstevel@tonic-gate * string and print that. 368*7c478bd9Sstevel@tonic-gate * 369*7c478bd9Sstevel@tonic-gate * | regcomp/regexec | Compile/step/advance | 370*7c478bd9Sstevel@tonic-gate * +---------------------------------+--------------------------------------+ 371*7c478bd9Sstevel@tonic-gate * 0 REG_OK Pattern matched 1 - Pattern matched 372*7c478bd9Sstevel@tonic-gate * 1 REG_NOMATCH No match 0 - Pattern didn't match 373*7c478bd9Sstevel@tonic-gate * 2 REG_ECOLLATE Bad collation elmnt. 67 - Returned by compile on mbtowc err 374*7c478bd9Sstevel@tonic-gate * 3 REG_EESCAPE trailing \ in patrn 45 - } expected after \. 375*7c478bd9Sstevel@tonic-gate * 4 REG_ENEWLINE \n before end pattrn 36 - Illegal or missing delimiter. 376*7c478bd9Sstevel@tonic-gate * 5 REG_ENSUB Over 9 \( \) pairs 43 - Too many \( 377*7c478bd9Sstevel@tonic-gate * 6 REG_ESUBREG Bad number in \[0-9] 25 - ``\digit'' out of range. 378*7c478bd9Sstevel@tonic-gate * 7 REG_EBRACK [ ] inbalance 49 - [ ] imbalance. 379*7c478bd9Sstevel@tonic-gate * 8 REG_EPAREN ( ) inbalance 42 - \(~\) imbalance. 380*7c478bd9Sstevel@tonic-gate * 9 REG_EBRACE \{ \} inbalance 45 - } expected after \. 381*7c478bd9Sstevel@tonic-gate * 10 REG_ERANGE bad range endpoint 11 - Range endpoint too large. 382*7c478bd9Sstevel@tonic-gate * 11 REG_ESPACE no memory for pattern 50 - Regular expression overflow. 383*7c478bd9Sstevel@tonic-gate * 12 REG_BADRPT invalid repetition 36 - Illegal or missing delimiter. 384*7c478bd9Sstevel@tonic-gate * 13 REG_ECTYPE invalid char-class 67 - illegal byte sequence 385*7c478bd9Sstevel@tonic-gate * 14 REG_BADPAT syntax error 50 - Regular expression overflow. 386*7c478bd9Sstevel@tonic-gate * 15 REG_BADBR \{ \} contents bad 46 - First number exceeds 2nd in \{~\} 387*7c478bd9Sstevel@tonic-gate * 16 REG_EFATAL internal error 50 - Regular expression overflow. 388*7c478bd9Sstevel@tonic-gate * 17 REG_ECHAR bad mulitbyte char 67 - illegal byte sequence 389*7c478bd9Sstevel@tonic-gate * 18 REG_STACK stack overflow 50 - Regular expression overflow. 390*7c478bd9Sstevel@tonic-gate * 19 REG_ENOSYS function not supported 50- Regular expression overflow. 391*7c478bd9Sstevel@tonic-gate * 392*7c478bd9Sstevel@tonic-gate * For reference here's the compile/step errno's. We don't generate 393*7c478bd9Sstevel@tonic-gate * 41 here - it's done earlier, nor 44 since we can't tell if from 46. 394*7c478bd9Sstevel@tonic-gate * 395*7c478bd9Sstevel@tonic-gate * 11 - Range endpoint too large. 396*7c478bd9Sstevel@tonic-gate * 16 - Bad number. 397*7c478bd9Sstevel@tonic-gate * 25 - ``\digit'' out of range. 398*7c478bd9Sstevel@tonic-gate * 36 - Illegal or missing delimiter. 399*7c478bd9Sstevel@tonic-gate * 41 - No remembered search string. 400*7c478bd9Sstevel@tonic-gate * 42 - \(~\) imbalance. 401*7c478bd9Sstevel@tonic-gate * 43 - Too many \(. 402*7c478bd9Sstevel@tonic-gate * 44 - More than 2 numbers given in "\{~\}" 403*7c478bd9Sstevel@tonic-gate * 45 - } expected after \. 404*7c478bd9Sstevel@tonic-gate * 46 - First number exceeds 2nd in "\{~\}" 405*7c478bd9Sstevel@tonic-gate * 49 - [ ] imbalance. 406*7c478bd9Sstevel@tonic-gate * 50 - Regular expression overflow. 407*7c478bd9Sstevel@tonic-gate */ 408*7c478bd9Sstevel@tonic-gate 409*7c478bd9Sstevel@tonic-gate static int 410*7c478bd9Sstevel@tonic-gate map_errnos(int Errno) 411*7c478bd9Sstevel@tonic-gate { 412*7c478bd9Sstevel@tonic-gate switch (Errno) { 413*7c478bd9Sstevel@tonic-gate case REG_ECOLLATE: 414*7c478bd9Sstevel@tonic-gate regerrno = 67; 415*7c478bd9Sstevel@tonic-gate break; 416*7c478bd9Sstevel@tonic-gate case REG_EESCAPE: 417*7c478bd9Sstevel@tonic-gate regerrno = 45; 418*7c478bd9Sstevel@tonic-gate break; 419*7c478bd9Sstevel@tonic-gate case REG_ENEWLINE: 420*7c478bd9Sstevel@tonic-gate regerrno = 36; 421*7c478bd9Sstevel@tonic-gate break; 422*7c478bd9Sstevel@tonic-gate case REG_ENSUB: 423*7c478bd9Sstevel@tonic-gate regerrno = 43; 424*7c478bd9Sstevel@tonic-gate break; 425*7c478bd9Sstevel@tonic-gate case REG_ESUBREG: 426*7c478bd9Sstevel@tonic-gate regerrno = 25; 427*7c478bd9Sstevel@tonic-gate break; 428*7c478bd9Sstevel@tonic-gate case REG_EBRACK: 429*7c478bd9Sstevel@tonic-gate regerrno = 49; 430*7c478bd9Sstevel@tonic-gate break; 431*7c478bd9Sstevel@tonic-gate case REG_EPAREN: 432*7c478bd9Sstevel@tonic-gate regerrno = 42; 433*7c478bd9Sstevel@tonic-gate break; 434*7c478bd9Sstevel@tonic-gate case REG_EBRACE: 435*7c478bd9Sstevel@tonic-gate regerrno = 45; 436*7c478bd9Sstevel@tonic-gate break; 437*7c478bd9Sstevel@tonic-gate case REG_ERANGE: 438*7c478bd9Sstevel@tonic-gate regerrno = 11; 439*7c478bd9Sstevel@tonic-gate break; 440*7c478bd9Sstevel@tonic-gate case REG_ESPACE: 441*7c478bd9Sstevel@tonic-gate regerrno = 50; 442*7c478bd9Sstevel@tonic-gate break; 443*7c478bd9Sstevel@tonic-gate case REG_BADRPT: 444*7c478bd9Sstevel@tonic-gate regerrno = 36; 445*7c478bd9Sstevel@tonic-gate break; 446*7c478bd9Sstevel@tonic-gate case REG_ECTYPE: 447*7c478bd9Sstevel@tonic-gate regerrno = 67; 448*7c478bd9Sstevel@tonic-gate break; 449*7c478bd9Sstevel@tonic-gate case REG_BADPAT: 450*7c478bd9Sstevel@tonic-gate regerrno = 50; 451*7c478bd9Sstevel@tonic-gate break; 452*7c478bd9Sstevel@tonic-gate case REG_BADBR: 453*7c478bd9Sstevel@tonic-gate regerrno = 46; 454*7c478bd9Sstevel@tonic-gate break; 455*7c478bd9Sstevel@tonic-gate case REG_EFATAL: 456*7c478bd9Sstevel@tonic-gate regerrno = 50; 457*7c478bd9Sstevel@tonic-gate break; 458*7c478bd9Sstevel@tonic-gate case REG_ECHAR: 459*7c478bd9Sstevel@tonic-gate regerrno = 67; 460*7c478bd9Sstevel@tonic-gate break; 461*7c478bd9Sstevel@tonic-gate case REG_STACK: 462*7c478bd9Sstevel@tonic-gate regerrno = 50; 463*7c478bd9Sstevel@tonic-gate break; 464*7c478bd9Sstevel@tonic-gate case REG_ENOSYS: 465*7c478bd9Sstevel@tonic-gate regerrno = 50; 466*7c478bd9Sstevel@tonic-gate break; 467*7c478bd9Sstevel@tonic-gate default: 468*7c478bd9Sstevel@tonic-gate regerrno = 50; 469*7c478bd9Sstevel@tonic-gate break; 470*7c478bd9Sstevel@tonic-gate } 471*7c478bd9Sstevel@tonic-gate return (regerrno); 472*7c478bd9Sstevel@tonic-gate } 473*7c478bd9Sstevel@tonic-gate 474*7c478bd9Sstevel@tonic-gate /* 475*7c478bd9Sstevel@tonic-gate * This is a routine to clean up the subtle substructure of the struct 476*7c478bd9Sstevel@tonic-gate * regex_comp type for use by clients of this module. Since the struct 477*7c478bd9Sstevel@tonic-gate * type is private, we use a generic interface, and trust the 478*7c478bd9Sstevel@tonic-gate * application to be damn sure that this operation is valid for the 479*7c478bd9Sstevel@tonic-gate * named memory. 480*7c478bd9Sstevel@tonic-gate */ 481*7c478bd9Sstevel@tonic-gate 482*7c478bd9Sstevel@tonic-gate void 483*7c478bd9Sstevel@tonic-gate regex_comp_free(void * a) 484*7c478bd9Sstevel@tonic-gate { 485*7c478bd9Sstevel@tonic-gate /* 486*7c478bd9Sstevel@tonic-gate * Free any data being held for previous search strings 487*7c478bd9Sstevel@tonic-gate */ 488*7c478bd9Sstevel@tonic-gate 489*7c478bd9Sstevel@tonic-gate if (((struct regex_comp *) a) == NULL) { 490*7c478bd9Sstevel@tonic-gate return; 491*7c478bd9Sstevel@tonic-gate } 492*7c478bd9Sstevel@tonic-gate 493*7c478bd9Sstevel@tonic-gate regfree(&((struct regex_comp *)a)->r_stp); 494*7c478bd9Sstevel@tonic-gate regfree(&((struct regex_comp *)a)->r_adv); 495*7c478bd9Sstevel@tonic-gate } 496