/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * grep - pattern matching program - combined grep, egrep, and fgrep. * Based on MKS grep command, with XCU & Solaris mods. */ /* * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved. * */ /* * Copyright 2020 Peter Tribble. * Copyright 2018 RackTop Systems. * Copyright 2018 Nexenta Systems, Inc. * Copyright 2013 Damian Bogel. All rights reserved. * Copyright 2020 Oxide Computer Company */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define STDIN_FILENAME gettext("(standard input)") #define BSIZE 512 /* Size of block for -b */ #define BUFSIZE 8192 /* Input buffer size */ #define MAX_DEPTH 1000 /* how deep to recurse */ #define AFTER 1 /* 'After' Context */ #define BEFORE 2 /* 'Before' Context */ #define CONTEXT (AFTER|BEFORE) /* Full Context */ #define M_CSETSIZE 256 /* singlebyte chars */ static int bmglen; /* length of BMG pattern */ static char *bmgpat; /* BMG pattern */ static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */ typedef struct _PATTERN { char *pattern; /* original pattern */ struct _PATTERN *next; regex_t re; /* compiled pattern */ } PATTERN; static PATTERN *patterns; static char errstr[128]; /* regerror string buffer */ static int regflags = 0; /* regcomp options */ static int matched = 0; /* return of the grep() */ static int errors = 0; /* count of errors */ static uchar_t fgrep = 0; /* Invoked as fgrep */ static uchar_t egrep = 0; /* Invoked as egrep */ static boolean_t nvflag = B_TRUE; /* Print matching lines */ static uchar_t cflag; /* Count of matches */ static uchar_t iflag; /* Case insensitve matching */ static uchar_t Hflag; /* Precede lines by file name */ static uchar_t hflag; /* Suppress printing of filename */ static uchar_t lflag; /* Print file names of matches */ static uchar_t Lflag; /* Print file names of non-matches */ static uchar_t nflag; /* Precede lines by line number */ static uchar_t rflag; /* Search directories recursively */ static uchar_t bflag; /* Precede matches by block number */ static uchar_t sflag; /* Suppress file error messages */ static uchar_t qflag; /* Suppress standard output */ static uchar_t wflag; /* Search for expression as a word */ static uchar_t xflag; /* Anchoring */ static uchar_t Eflag; /* Egrep or -E flag */ static uchar_t Fflag; /* Fgrep or -F flag */ static uchar_t Rflag; /* Like rflag, but follow symlinks */ static uchar_t outfn; /* Put out file name */ static uchar_t conflag; /* show context of matches */ static uchar_t oflag; /* Print only matching output */ static char *cmdname; static char *stdin_label; /* Optional lable for stdin */ static int use_bmg, mblocale; static size_t prntbuflen, conbuflen; static unsigned long conalen, conblen, conmatches; static char *prntbuf, *conbuf; static void addfile(const char *fn); static void addpattern(char *s); static void fixpatterns(void); static void usage(void); static int grep(int, const char *); static void bmgcomp(char *, int); static char *bmgexec(char *, char *); static int recursive(const char *, const struct stat *, int, struct FTW *); static void process_path(const char *); static void process_file(const char *, int); /* * These are values that we use to return from getopt_long. They start at * SHRT_MAX to avoid any possible conflict with the normal options. These are * used for long options that have no short option equivalent. */ enum grep_opts { OPT_LABEL = SHRT_MAX + 1 }; static struct option grep_options[] = { { "label", required_argument, NULL, OPT_LABEL }, { NULL } }; /* * mainline for grep */ int main(int argc, char **argv) { char *ap, *test; int c; int fflag = 0; int i, n_pattern = 0, n_file = 0; char **pattern_list = NULL; char **file_list = NULL; (void) setlocale(LC_ALL, ""); #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ #endif (void) textdomain(TEXT_DOMAIN); /* * true if this is running on the multibyte locale */ mblocale = (MB_CUR_MAX > 1); /* * Skip leading slashes */ cmdname = argv[0]; if (ap = strrchr(cmdname, '/')) cmdname = ap + 1; ap = cmdname; /* * Detect egrep/fgrep via command name, map to -E and -F options. */ if (*ap == 'e' || *ap == 'E') { regflags |= REG_EXTENDED; egrep++; } else { if (*ap == 'f' || *ap == 'F') { fgrep++; regflags |= REG_NOSPEC; } } /* check for non-standard "-line-count" option */ for (i = 1; i < argc; i++) { if (strcmp(argv[i], "--") == 0) break; /* isdigit() check prevents negative arguments */ if ((argv[i][0] == '-') && isdigit(argv[i][1])) { if (strlen(&argv[i][1]) != strspn(&argv[i][1], "0123456789")) { (void) fprintf(stderr, gettext( "%s: Bad number flag\n"), argv[0]); usage(); } errno = 0; conalen = conblen = strtoul(&argv[i][1], (char **)NULL, 10); if (errno != 0 || conalen >= ULONG_MAX) { (void) fprintf(stderr, gettext( "%s: Bad context argument\n"), argv[0]); } else if (conalen) conflag = CONTEXT; while (i < argc) { argv[i] = argv[i + 1]; i++; } argc--; } } while ((c = getopt_long(argc, argv, "+vwchHilLnrbse:f:qxEFIRA:B:C:o", grep_options, NULL)) != EOF) { unsigned long tval; switch (c) { case 'v': /* POSIX: negate matches */ nvflag = B_FALSE; break; case 'c': /* POSIX: write count */ cflag++; break; case 'i': /* POSIX: ignore case */ iflag++; regflags |= REG_ICASE; break; /* * The last of -l and -L are honored. */ case 'l': /* POSIX: Write filenames only */ lflag++; Lflag = 0; break; case 'L': /* Write non-matching filenames */ Lflag++; lflag = 0; break; case 'n': /* POSIX: Write line numbers */ nflag++; break; case 'r': /* Solaris: search recursively */ rflag++; break; case 'b': /* Solaris: Write file block numbers */ bflag++; break; case 's': /* POSIX: No error msgs for files */ sflag++; break; case 'e': /* POSIX: pattern list */ n_pattern++; pattern_list = realloc(pattern_list, sizeof (char *) * n_pattern); if (pattern_list == NULL) { (void) fprintf(stderr, gettext("%s: out of memory\n"), cmdname); exit(2); } *(pattern_list + n_pattern - 1) = optarg; break; case 'f': /* POSIX: pattern file */ fflag = 1; n_file++; file_list = realloc(file_list, sizeof (char *) * n_file); if (file_list == NULL) { (void) fprintf(stderr, gettext("%s: out of memory\n"), cmdname); exit(2); } *(file_list + n_file - 1) = optarg; break; /* based on options order h or H is set as in GNU grep */ case 'h': /* Solaris: suppress printing of file name */ hflag = 1; Hflag = 0; break; /* Solaris: precede every match with file name */ case 'H': Hflag = 1; hflag = 0; break; case 'q': /* POSIX: quiet: status only */ qflag++; break; case 'w': /* Solaris: treat pattern as word */ wflag++; break; case 'x': /* POSIX: full line matches */ xflag++; break; case 'E': /* POSIX: Extended RE's */ regflags |= REG_EXTENDED; Eflag++; break; case 'F': /* POSIX: strings, not RE's */ Fflag++; regflags |= REG_NOSPEC; break; case 'R': /* Solaris: like rflag, but follow symlinks */ Rflag++; rflag++; break; case 'A': /* print N lines after each match */ errno = 0; conalen = strtoul(optarg, &test, 10); /* *test will be non-null if optarg is negative */ if (errno != 0 || *test != '\0' || conalen >= ULONG_MAX) { (void) fprintf(stderr, gettext( "%s: Bad context argument: %s\n"), argv[0], optarg); exit(2); } if (conalen) conflag |= AFTER; else conflag &= ~AFTER; break; case 'B': /* print N lines before each match */ errno = 0; conblen = strtoul(optarg, &test, 10); /* *test will be non-null if optarg is negative */ if (errno != 0 || *test != '\0' || conblen >= ULONG_MAX) { (void) fprintf(stderr, gettext( "%s: Bad context argument: %s\n"), argv[0], optarg); exit(2); } if (conblen) conflag |= BEFORE; else conflag &= ~BEFORE; break; case 'C': /* print N lines around each match */ errno = 0; tval = strtoul(optarg, &test, 10); /* *test will be non-null if optarg is negative */ if (errno != 0 || *test != '\0' || tval >= ULONG_MAX) { (void) fprintf(stderr, gettext( "%s: Bad context argument: %s\n"), argv[0], optarg); exit(2); } if (tval) { if ((conflag & BEFORE) == 0) conblen = tval; if ((conflag & AFTER) == 0) conalen = tval; conflag = CONTEXT; } break; case OPT_LABEL: stdin_label = optarg; break; case 'o': oflag++; break; default: usage(); } } /* * If we're invoked as egrep or fgrep we need to do some checks */ if (egrep || fgrep) { /* * Use of -E or -F with egrep or fgrep is illegal */ if (Eflag || Fflag) usage(); /* * Don't allow use of wflag with egrep / fgrep */ if (wflag) usage(); /* * For Solaris the -s flag is equivalent to XCU -q */ if (sflag) qflag++; /* * done with above checks - set the appropriate flags */ if (egrep) Eflag++; else /* Else fgrep */ Fflag++; } if (wflag && (Eflag || Fflag)) { /* * -w cannot be specified with grep -F */ usage(); } /* * -E and -F flags are mutually exclusive - check for this */ if (Eflag && Fflag) usage(); /* * -l or -L overrides -H like in GNU grep. It also overrides -o. */ if (lflag || Lflag) { Hflag = 0; oflag = 0; } /* * -c, -l and -q flags are mutually exclusive * We have -c override -l like in Solaris. * -q overrides -l & -c programmatically in grep() function. * -c overrides -o in GNU grep, we honor that. */ if (cflag) { lflag = 0; Lflag = 0; oflag = 0; } /* * If -o is set then we ignore all context related options, like other * greps. */ if (oflag) { conflag = 0; } /* * These flags are a semantic mess with no clear answers as to their * behvaior. Based on some experimentation GNU grep will exit zero if a * non-match is present, but never print anything. BSD grep seems to * exit 1 and not print anything, even if there would have been a match. * Also, you probably don't want to ask about what happens with grep -x * -o -v, some implementations seem to just ignore -v. */ if (oflag && !nvflag) { (void) fprintf(stderr, gettext("%s: the combination of -v and " "-o is not supported currently\n"), argv[0]); exit(2); } argv += optind - 1; argc -= optind - 1; /* * Now handling -e and -f option */ if (pattern_list) { for (i = 0; i < n_pattern; i++) { addpattern(pattern_list[i]); } free(pattern_list); } if (file_list) { for (i = 0; i < n_file; i++) { addfile(file_list[i]); } free(file_list); } /* * No -e or -f? Make sure there is one more arg, use it as the pattern. */ if (patterns == NULL && !fflag) { if (argc < 2) usage(); addpattern(argv[1]); argc--; argv++; } /* * Compile Patterns and also decide if BMG can be used */ fixpatterns(); if (stdin_label == NULL) { stdin_label = STDIN_FILENAME; } /* Process all files: stdin, or rest of arg list */ if (argc < 2) { matched = grep(0, stdin_label); } else { if (Hflag || (argc > 2 && hflag == 0)) outfn = 1; /* Print filename on match line */ for (argv++; *argv != NULL; argv++) { process_path(*argv); } } /* * Return() here is used instead of exit */ (void) fflush(stdout); if (errors) return (2); return (matched ? 0 : 1); } static void process_path(const char *path) { struct stat st; int walkflags = FTW_CHDIR; char *buf = NULL; if (rflag) { if (stat(path, &st) != -1 && (st.st_mode & S_IFMT) == S_IFDIR) { if (!hflag) outfn = 1; /* Print filename unless -h */ /* * Add trailing slash if arg * is directory, to resolve symlinks. */ if (path[strlen(path) - 1] != '/') { (void) asprintf(&buf, "%s/", path); if (buf != NULL) path = buf; } /* * Search through subdirs if path is directory. * Don't follow symlinks if Rflag is not set. */ if (!Rflag) walkflags |= FTW_PHYS; if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) { if (!sflag) (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"), cmdname, path); errors = 1; } return; } } process_file(path, 0); } /* * Read and process all files in directory recursively. */ static int recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw) { /* * Process files and follow symlinks if Rflag set. */ if (info != FTW_F) { /* Report broken symlinks and unreadable files */ if (!sflag && (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) { (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"), cmdname, name); } return (0); } /* Skip devices and pipes if Rflag is not set */ if (!Rflag && !S_ISREG(statp->st_mode)) return (0); /* Pass offset to relative name from FTW_CHDIR */ process_file(name, ftw->base); return (0); } /* * Opens file and call grep function. */ static void process_file(const char *name, int base) { int fd; if ((fd = open(name + base, O_RDONLY)) == -1) { errors = 1; if (!sflag) /* Silent mode */ (void) fprintf(stderr, gettext( "%s: can't open \"%s\"\n"), cmdname, name); return; } matched |= grep(fd, name); (void) close(fd); if (ferror(stdout)) { (void) fprintf(stderr, gettext( "%s: error writing to stdout\n"), cmdname); (void) fflush(stdout); exit(2); } } /* * Add a file of strings to the pattern list. */ static void addfile(const char *fn) { FILE *fp; char *inbuf; char *bufp; size_t bufsiz, buflen, bufused; /* * Open the pattern file */ if ((fp = fopen(fn, "r")) == NULL) { (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"), cmdname, fn); exit(2); } bufsiz = BUFSIZE; if ((inbuf = malloc(bufsiz)) == NULL) { (void) fprintf(stderr, gettext("%s: out of memory\n"), cmdname); exit(2); } bufp = inbuf; bufused = 0; /* * Read in the file, reallocing as we need more memory */ while (fgets(bufp, bufsiz - bufused, fp) != NULL) { buflen = strlen(bufp); bufused += buflen; if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') { /* * if this line does not fit to the buffer, * realloc larger buffer */ bufsiz += BUFSIZE; if ((inbuf = realloc(inbuf, bufsiz)) == NULL) { (void) fprintf(stderr, gettext("%s: out of memory\n"), cmdname); exit(2); } bufp = inbuf + bufused; continue; } if (bufp[buflen - 1] == '\n') { bufp[--buflen] = '\0'; } addpattern(inbuf); bufp = inbuf; bufused = 0; } free(inbuf); free(prntbuf); free(conbuf); (void) fclose(fp); } /* * Add a string to the pattern list. */ static void addpattern(char *s) { PATTERN *pp; char *wordbuf; char *np; for (; ; ) { np = strchr(s, '\n'); if (np != NULL) *np = '\0'; if ((pp = malloc(sizeof (PATTERN))) == NULL) { (void) fprintf(stderr, gettext( "%s: out of memory\n"), cmdname); exit(2); } if (wflag) { /* * Solaris wflag support: Add '<' '>' to pattern to * select it as a word. Doesn't make sense with -F * but we're Libertarian. */ size_t slen, wordlen; slen = strlen(s); wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */ if ((wordbuf = malloc(wordlen)) == NULL) { (void) fprintf(stderr, gettext("%s: out of memory\n"), cmdname); exit(2); } (void) strcpy(wordbuf, "\\<"); (void) strcpy(wordbuf + 2, s); (void) strcpy(wordbuf + 2 + slen, "\\>"); } else { if ((wordbuf = strdup(s)) == NULL) { (void) fprintf(stderr, gettext("%s: out of memory\n"), cmdname); exit(2); } } pp->pattern = wordbuf; pp->next = patterns; patterns = pp; if (np == NULL) break; s = np + 1; } } /* * Check if a given grep pattern that is being used with egrep or grep can be * considered 'simple'. That is there are no characters that would be treated * differently from fgrep. In this particular case, we're a little bit * conservative and look for characters that are: * * o 7-bit ASCII * o Letters * o Numbers * o Meta-characters not used in BREs/EREs: !, @, #, /, -, _, <, >, = * * This can certianly be made more complex and less restrictive with additional * testing. */ static boolean_t simple_pattern(const char *str) { for (; *str != '\0'; str++) { if (!isascii(*str)) { return (B_FALSE); } if (isalnum(*str)) { continue; } switch (*str) { case '!': case '@': case '#': case '/': case '-': case '_': case '<': case '>': case '=': continue; default: return (B_FALSE); } } return (B_TRUE); } /* * Fix patterns. * Must do after all arguments read, in case later -i option. */ static void fixpatterns(void) { PATTERN *pp; int rv, fix_pattern; /* * Decide if we are able to run the Boyer-Moore-Gosper algorithm. * Use the Boyer-Moore-Gosper algorithm if: * - fgrep or non-BRE/ERE (Fflag || simple_pattern()) * - singlebyte locale (!mblocale) * - no ignoring case (!iflag) * - no printing line numbers (!nflag) * - no negating the output (nvflag) * - only one pattern (patterns != NULL && patterns->next == * NULL) * - non zero length pattern (strlen(patterns->pattern) != 0) * - no context required (conflag == 0) * - no exact matches (!oflag) * - no word matches (!wlag) */ use_bmg = !mblocale && !iflag && !nflag && nvflag && !oflag && (patterns != NULL && patterns->next == NULL) && !wflag && (strlen(patterns->pattern) != 0) && conflag == 0 && (Fflag || simple_pattern(patterns->pattern)); if (use_bmg) { return; } /* * Fix the specified pattern if -x is specified. */ fix_pattern = !Fflag && xflag; for (pp = patterns; pp != NULL; pp = pp->next) { if (fix_pattern) { char *cp, *cq; size_t plen, nplen; plen = strlen(pp->pattern); /* '^' pattern '$' */ nplen = 1 + plen + 1 + 1; if ((cp = malloc(nplen)) == NULL) { (void) fprintf(stderr, gettext("%s: out of memory\n"), cmdname); exit(2); } cq = cp; *cq++ = '^'; cq = strcpy(cq, pp->pattern) + plen; *cq++ = '$'; *cq = '\0'; free(pp->pattern); pp->pattern = cp; } /* * Compile the regular expression, give an informative error * message, and exit if it didn't compile. */ if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) { (void) regerror(rv, &pp->re, errstr, sizeof (errstr)); (void) fprintf(stderr, gettext("%s: RE error in %s: %s\n"), cmdname, pp->pattern, errstr); exit(2); } free(pp->pattern); } } /* * Search a newline from the beginning of the string */ static char * find_nl(const char *ptr, size_t len) { while (len-- != 0) { if (*ptr++ == '\n') { return ((char *)--ptr); } } return (NULL); } /* * Search a newline from the end of the string */ static char * rfind_nl(const char *ptr, size_t len) { const char *uptr = ptr + len; while (len--) { if (*--uptr == '\n') { return ((char *)uptr); } } return (NULL); } /* * Do grep on a single file. * Return true in any lines matched. * * We have two strategies: * The fast one is used when we have a single pattern with * a string known to occur in the pattern. We can then * do a BMG match on the whole buffer. * This is an order of magnitude faster. * Otherwise we split the buffer into lines, * and check for a match on each line. */ static int grep(int fd, const char *fn) { PATTERN *pp; off_t data_len; /* length of the data chunk */ off_t line_len; /* length of the current line */ off_t line_offset; /* current line's offset from the beginning */ off_t blkoffset; /* line_offset but context-compatible */ long long lineno, linenum; long long matches = 0; /* Number of matching lines */ long long conacnt = 0, conbcnt = 0; /* context line count */ int newlinep; /* 0 if the last line of file has no newline */ char *ptr, *ptrend, *prntptr, *prntptrend; char *nextptr = NULL, *nextend = NULL; char *conptr = NULL, *conptrend = NULL; char *matchptr = NULL; int conaprnt = 0, conbprnt = 0, lastmatch = 0; boolean_t nearmatch; /* w/in N+1 of last match */ boolean_t havematch = B_FALSE; /* have a match in context */ boolean_t sameline = B_FALSE; /* Are we still on the same line? */ size_t prntlen; if (patterns == NULL) return (0); /* no patterns to match -- just return */ pp = patterns; if (use_bmg) { bmgcomp(pp->pattern, strlen(pp->pattern)); } if (prntbuf == NULL) { prntbuflen = BUFSIZE; if ((prntbuf = malloc(prntbuflen + 1)) == NULL) { (void) fprintf(stderr, gettext("%s: out of memory\n"), cmdname); exit(2); } } if (conflag != 0 && (conbuf == NULL)) { conbuflen = BUFSIZE; if ((conbuf = malloc(BUFSIZE+1)) == NULL) { (void) fprintf(stderr, gettext("%s: out of memory\n"), cmdname); exit(2); } } nearmatch = (conmatches != 0); blkoffset = line_offset = 0; lineno = 0; linenum = 1; newlinep = 1; data_len = 0; for (; ; ) { long count; off_t offset = 0; char separate; char *startmatch = NULL; /* -o, start of match */ char *postmatch = NULL; /* -o, character after match */ boolean_t last_ctx = B_FALSE, eof = B_FALSE; if (data_len == 0) { /* * If no data in the buffer, reset ptr */ ptr = prntbuf; if (conflag != 0 && conptr == NULL) { conptr = conbuf; conptrend = conptr - 1; } } if (ptr == prntbuf) { /* * The current data chunk starts from prntbuf. * This means either the buffer has no data * or the buffer has no newline. * So, read more data from input. */ count = read(fd, ptr + data_len, prntbuflen - data_len); if (count < 0) { /* read error */ if (cflag) { if (outfn && !rflag) { (void) fprintf(stdout, "%s:", fn); } if (!qflag && !rflag) { (void) fprintf(stdout, "%lld\n", matches); } } return (0); } else if (count == 0) { /* no new data */ eof = B_TRUE; if (data_len == 0) { /* end of file already reached */ if (conflag != 0) { if (conptrend >= conptr) *conptrend = '\n'; last_ctx = B_TRUE; goto L_next_line; } else { goto out; } } /* last line of file has no newline */ ptrend = ptr + data_len; newlinep = 0; goto L_start_process; } offset = data_len; data_len += count; } /* * Look for newline in the chunk * between ptr + offset and ptr + data_len - offset. */ ptrend = find_nl(ptr + offset, data_len - offset); if (ptrend == NULL) { /* no newline found in this chunk */ if (ptr > prntbuf) { /* * Move remaining data to the beginning * of the buffer. * Remaining data lie from ptr for * data_len bytes. */ (void) memmove(prntbuf, ptr, data_len); } if (data_len == prntbuflen) { /* * Not enough room in the buffer */ if (prntbuflen > SIZE_MAX - BUFSIZE) { (void) fprintf(stderr, gettext("%s: buflen would" " overflow\n"), cmdname); exit(2); } prntbuflen += BUFSIZE; prntbuf = realloc(prntbuf, prntbuflen + 1); if (prntbuf == NULL) { (void) fprintf(stderr, gettext("%s: out of memory\n"), cmdname); exit(2); } } ptr = prntbuf; /* read the next input */ continue; } L_start_process: /* * Beginning of the chunk: ptr * End of the chunk: ptr + data_len * Beginning of the line: ptr * End of the line: ptrend * * conptr: Beginning of the context. * conptrend: If context is empty, conptr - 1 (invalid memory). * Otherwise, Last newline in the context. */ if (use_bmg) { /* * Use Boyer-Moore-Gosper algorithm to find out if * this chunk (not this line) contains the specified * pattern. If not, restart from the last line * of this chunk. */ char *bline; bline = bmgexec(ptr, ptr + data_len); if (bline == NULL) { /* * No pattern found in this chunk. * Need to find the last line * in this chunk. */ ptrend = rfind_nl(ptr, data_len); /* * When this chunk does not contain newline, * ptrend becomes NULL, which should happen * when the last line of file does not end * with a newline. At such a point, * newlinep should have been set to 0. * Therefore, just after jumping to * L_skip_line, the main for-loop quits, * and the line_len value won't be * used. */ line_len = ptrend - ptr; goto L_skip_line; } if (bline > ptrend) { /* * Pattern found not in the first line * of this chunk. * Discard the first line. */ line_len = ptrend - ptr; goto L_skip_line; } /* * Pattern found in the first line of this chunk. * Using this result. */ *ptrend = '\0'; line_len = ptrend - ptr; /* * before jumping to L_next_line, * need to handle xflag if specified */ if (xflag && (line_len != bmglen || strcmp(bmgpat, ptr) != 0)) { /* didn't match */ pp = NULL; } else { pp = patterns; /* to make it happen */ } goto L_next_line; } /* * When using -o, we might actually loop around while still on * the same line. In such a case, we need to make sure we don't * increment the line number. */ if (!sameline) { lineno++; } else { sameline = B_FALSE; } /* * Line starts from ptr and ends at ptrend. * line_len will be the length of the line. */ *ptrend = '\0'; line_len = ptrend - ptr; /* * From now, the process will be performed based * on the line from ptr to ptrend. */ for (pp = patterns; pp; pp = pp->next) { int rv; regmatch_t rm; size_t nmatch = 0; /* * The current implementation of regexec has a higher * cost when you ask for match information. As a result, * we only ask for a match when we know that we need it * specifically. This is always needed for -o because we * rely on it to tell us what we matched. For fgrep -x * we need it so we can determine whether we matched the * entire line. */ if (oflag || (Fflag && xflag)) nmatch = 1; rv = regexec(&pp->re, ptr, nmatch, &rm, 0); if (rv == REG_OK) { /* * fgrep in this form cannot insert the * metacharacters to verify whether or not we * were the entire line. As a result, we check * the pattern length against the line length. */ if (Fflag && xflag && line_len != rm.rm_eo - rm.rm_so) { continue; } /* matched */ if (oflag) { startmatch = ptr + rm.rm_so; postmatch = ptr + rm.rm_eo; } break; } switch (rv) { case REG_NOMATCH: break; case REG_ECHAR: (void) fprintf(stderr, gettext( "%s: input file \"%s\": line %lld: invalid multibyte character\n"), cmdname, fn, lineno); break; default: (void) regerror(rv, &pp->re, errstr, sizeof (errstr)); (void) fprintf(stderr, gettext( "%s: input file \"%s\": line %lld: %s\n"), cmdname, fn, lineno, errstr); exit(2); } } /* * Context is set up as follows: * For a 'Before' context, we maintain a set of pointers * containing 'N' lines of context. If the current number of * lines contained is greater than N, and N isn't a match, the * start pointer is moved forward to the next newline. * * If we ever find a match, we print out immediately. * 'nearmatch' tells us if we're within N+1 lines of the last * match ; if we are, and we find another match, we don't * separate the matches. 'nearmatch' becomes false when * a line gets rotated out of the context. * * For an 'After' context, we simply wait until we've found a * match, then create a context N+1 lines big. If we don't find * a match within the context, we print out the current context. * Otherwise, we save a reference to the new matching line, * print out the other context, and reset our context pointers * to the new matching line. * * 'nearmatch' becomes false when we find a non-matching line * that isn't a part of any context. * * A full-context is implemented as a combination of the * 'Before' and 'After' context logic. Before we find a match, * we follow the Before logic. When we find a match, we * follow the After logic. 'nearmatch' is handled by the Before * logic. */ if (conflag == 0) goto L_next_line; /* Do we have room to add this line to the context buffer? */ while ((line_len + 1) > (conbuflen - ((conptrend >= conptr) ? conptrend - conbuf : 0))) { char *oldconbuf = conbuf; char *oldconptr = conptr; long tmp = matchptr - conptr; if (conbuflen > SIZE_MAX - BUFSIZE) { (void) fprintf(stderr, gettext("%s: buflen would overflow\n"), cmdname); exit(2); } conbuflen += BUFSIZE; conbuf = realloc(conbuf, conbuflen + 1); if (conbuf == NULL) { (void) fprintf(stderr, gettext("%s: out of memory\n"), cmdname); exit(2); } conptr = conbuf + (conptr - oldconbuf); conptrend = conptr + (conptrend - oldconptr); if (matchptr) matchptr = conptr + tmp; } (void) memcpy(conptrend + 1, ptr, line_len); conptrend += line_len + 1; *conptrend = '\n'; if (nvflag == (pp != NULL)) { /* matched */ if (havematch) { if ((conflag & AFTER) != 0) { conaprnt = 1; nextend = conptrend; conptrend = conptr + lastmatch; nextptr = conptrend + 1; *nextend = '\n'; } } else { if (conflag == AFTER) { conptr = conptrend - (line_len); linenum = lineno; } blkoffset = line_offset - (conptrend - conptr - line_len); } if (conflag == BEFORE) conbprnt = 1; lastmatch = conptrend - conptr; havematch = B_TRUE; goto L_next_line; } if (!havematch) { if ((conflag & BEFORE) != 0) { if (conbcnt >= conblen) { char *tmp = conptr; conptr = find_nl(conptr, conptrend - conptr) + 1; if (bflag) blkoffset += conptr - tmp; linenum++; nearmatch = B_TRUE; } else { conbcnt++; } } if (conflag == AFTER) nearmatch = B_TRUE; } else { if (++conacnt >= conalen && !conaprnt && conalen) conaprnt = 1; else lastmatch = conptrend - conptr; } L_next_line: /* * Here, if pp points to non-NULL, something has been matched * to the pattern. */ if (!last_ctx && nvflag == (pp != NULL)) { matches++; if (!nextend) { if (conflag != 0) { matchptr = conptrend; } else if (oflag) { matchptr = postmatch - 1; } else { matchptr = ptrend; } } } if (pp != NULL && oflag && postmatch == NULL) { (void) fprintf(stderr, gettext("%s: internal error, " "-o set, but failed to find postmatch\n"), cmdname); abort(); } /* * Set up some print context so that we can treat * single-line matches as a zero-N context. * Apply CLI flags to each line of the context. * * For context, we only print if we both have a match and are * either at the end of the data stream, or we've previously * declared that we want to print for a particular context. */ if (havematch && (eof || conaprnt || conbprnt)) { /* * We'd normally do this earlier, but we had to * escape early because we reached the end of the data. */ if (eof && nextptr) conptrend = nextend; prntlen = conptrend - conptr + 1; prntptr = conptr; if (conmatches++ && nearmatch && !cflag) (void) fwrite("--\n", 1, 3, stdout); } else if (conflag == 0 && nvflag == (pp != NULL)) { *ptrend = '\n'; if (oflag) { prntptr = startmatch; } else { prntptr = ptr; } prntlen = line_len + 1; linenum = lineno; blkoffset = line_offset; if (oflag) { blkoffset += startmatch - ptr; } } else if (eof) { /* No match and no more data */ goto out; } else { /* No match, or we're not done building context */ goto L_skip_line; } if (oflag) { prntptrend = postmatch - 1; } else { prntptrend = prntptr - 1; } while (oflag || (prntptrend = find_nl(prntptrend + 1, prntlen)) != NULL) { /* * GNU grep uses '-' for context lines and ':' for * matching lines, so replicate that here. */ if (prntptrend == matchptr) { if (eof && nextptr) { matchptr = nextend; nextptr = NULL; } else { matchptr = NULL; } separate = ':'; } else { separate = '-'; } /* * Handle q, l, and c flags. */ if (qflag) { /* no need to continue */ /* * End of this line is ptrend. * We have read up to ptr + data_len. */ off_t pos; pos = ptr + data_len - (ptrend + 1); (void) lseek(fd, -pos, SEEK_CUR); exit(0); } if (lflag) { (void) printf("%s\n", fn); goto out; } if (Lflag) { goto out; } if (!cflag) { if (Hflag || outfn) { (void) printf("%s%c", fn, separate); } if (bflag) { (void) printf("%lld%c", (offset_t) (blkoffset / BSIZE), separate); } if (nflag) { (void) printf("%lld%c", linenum, separate); } (void) fwrite(prntptr, 1, prntptrend - prntptr + 1, stdout); if (oflag) { (void) fputc('\n', stdout); } } if (ferror(stdout)) { return (0); } /* * With -o we'll only ever take this loop once. Manually * break out. */ if (oflag) { goto L_skip_line; } linenum++; prntlen -= prntptrend - prntptr + 1; blkoffset += prntptrend - prntptr + 1; prntptr = prntptrend + 1; } if (eof) goto out; /* * Update context buffer and variables post-print */ if (conflag != 0) { conptr = conbuf; conaprnt = conbprnt = 0; nearmatch = B_FALSE; conacnt = conbcnt = 0; if (nextptr) { (void) memmove(conbuf, nextptr, nextend - nextptr + 1); blkoffset += nextptr - conptrend - 1; conptrend = conptr + (nextend - nextptr); matchptr = conptrend; linenum = lineno; lastmatch = conptrend - conptr; havematch = B_TRUE; } else { conptrend = conptr - 1; conacnt = 0; lastmatch = 0; havematch = B_FALSE; } nextptr = nextend = NULL; } L_skip_line: if (!newlinep) break; if (oflag && postmatch != NULL) { line_len = postmatch - 1 - ptr; ptr = postmatch; sameline = B_TRUE; } else { ptr = ptrend + 1; } data_len -= line_len + 1; line_offset += line_len + 1; } out: if (cflag) { if (Hflag || outfn) { (void) printf("%s:", fn); } if (!qflag) { (void) printf("%lld\n", matches); } } /* * -L tells us to print the filename only when it doesn't match. So we * run through the normal operationa and then invert it. */ if (Lflag) { if (matches == 0) { (void) printf("%s\n", fn); matches = 1; } else { matches = 0; } } return (matches != 0); } /* * usage message for grep */ static void usage(void) { (void) fprintf(stderr, gettext("usage: %5s"), cmdname); if (!egrep && !fgrep) (void) fprintf(stderr, gettext(" [-E|-F]")); (void) fprintf(stderr, gettext(" [-bchHilLnoqrRsvx] [-A num] [-B num] " "[-C num|-num]\n [--label=name] [-e pattern_list]... " "[-f pattern_file]...\n [pattern_list] [file]...\n")); exit(2); /* NOTREACHED */ } /* * Compile literal pattern into BMG tables */ static void bmgcomp(char *pat, int len) { int i; int tlen; unsigned char *uc = (unsigned char *)pat; bmglen = len; bmgpat = pat; for (i = 0; i < M_CSETSIZE; i++) { bmgtab[i] = len; } len--; for (tlen = len, i = 0; i <= len; i++, tlen--) { bmgtab[*uc++] = tlen; } } /* * BMG search. */ static char * bmgexec(char *str, char *end) { int t; char *k, *s, *p; k = str + bmglen - 1; if (bmglen == 1) { return (memchr(str, bmgpat[0], end - str)); } for (; ; ) { /* inner loop, should be most optimized */ while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) { k += t; } if (k >= end) { return (NULL); } for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) { if (p == bmgpat) { return (s); } } k++; } /* NOTREACHED */ }