xref: /illumos-gate/usr/src/cmd/grep/grep.c (revision dbe930bf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * grep - pattern matching program - combined grep, egrep, and fgrep.
29  *	Based on MKS grep command, with XCU & Solaris mods.
30  */
31 
32 /*
33  * Copyright 1985, 1992 by Mortice Kern Systems Inc.  All rights reserved.
34  *
35  */
36 
37 /*
38  * Copyright 2020 Peter Tribble.
39  * Copyright 2018 RackTop Systems.
40  * Copyright 2018 Nexenta Systems, Inc.
41  * Copyright 2013 Damian Bogel. All rights reserved.
42  * Copyright 2020 Oxide Computer Company
43  */
44 
45 #include <string.h>
46 #include <stdlib.h>
47 #include <ctype.h>
48 #include <stdarg.h>
49 #include <regex.h>
50 #include <limits.h>
51 #include <sys/types.h>
52 #include <sys/stat.h>
53 #include <fcntl.h>
54 #include <stdio.h>
55 #include <locale.h>
56 #include <wchar.h>
57 #include <errno.h>
58 #include <unistd.h>
59 #include <wctype.h>
60 #include <ftw.h>
61 #include <sys/param.h>
62 #include <getopt.h>
63 
64 #define	STDIN_FILENAME gettext("(standard input)")
65 
66 #define	BSIZE		512		/* Size of block for -b */
67 #define	BUFSIZE		8192		/* Input buffer size */
68 #define	MAX_DEPTH	1000		/* how deep to recurse */
69 
70 #define	AFTER	1			/* 'After' Context */
71 #define	BEFORE	2			/* 'Before' Context */
72 #define	CONTEXT	(AFTER|BEFORE)		/* Full Context */
73 
74 #define	M_CSETSIZE	256		/* singlebyte chars */
75 static int	bmglen;			/* length of BMG pattern */
76 static char	*bmgpat;		/* BMG pattern */
77 static int	bmgtab[M_CSETSIZE];	/* BMG delta1 table */
78 
79 typedef	struct	_PATTERN	{
80 	char	*pattern;		/* original pattern */
81 	struct	_PATTERN	*next;
82 	regex_t	re;			/* compiled pattern */
83 } PATTERN;
84 
85 static PATTERN	*patterns;
86 static char	errstr[128];		/* regerror string buffer */
87 static int	regflags = 0;		/* regcomp options */
88 static int	matched = 0;		/* return of the grep() */
89 static int	errors = 0;		/* count of errors */
90 static uchar_t	fgrep = 0;		/* Invoked as fgrep */
91 static uchar_t	egrep = 0;		/* Invoked as egrep */
92 static boolean_t	nvflag = B_TRUE;	/* Print matching lines */
93 static uchar_t	cflag;			/* Count of matches */
94 static uchar_t	iflag;			/* Case insensitve matching */
95 static uchar_t	Hflag;			/* Precede lines by file name */
96 static uchar_t	hflag;			/* Suppress printing of filename */
97 static uchar_t	lflag;			/* Print file names of matches */
98 static uchar_t	Lflag;			/* Print file names of non-matches */
99 static uchar_t	nflag;			/* Precede lines by line number */
100 static uchar_t	rflag;			/* Search directories recursively */
101 static uchar_t	bflag;			/* Precede matches by block number */
102 static uchar_t	sflag;			/* Suppress file error messages */
103 static uchar_t	qflag;			/* Suppress standard output */
104 static uchar_t	wflag;			/* Search for expression as a word */
105 static uchar_t	xflag;			/* Anchoring */
106 static uchar_t	Eflag;			/* Egrep or -E flag */
107 static uchar_t	Fflag;			/* Fgrep or -F flag */
108 static uchar_t	Rflag;			/* Like rflag, but follow symlinks */
109 static uchar_t	outfn;			/* Put out file name */
110 static uchar_t	conflag;		/* show context of matches */
111 static uchar_t	oflag;			/* Print only matching output */
112 static char	*cmdname;
113 static char	*stdin_label;		/* Optional lable for stdin */
114 
115 static int	use_bmg, mblocale;
116 
117 static size_t	prntbuflen, conbuflen;
118 static unsigned long	conalen, conblen, conmatches;
119 static char	*prntbuf, *conbuf;
120 
121 static void	addfile(const char *fn);
122 static void	addpattern(char *s);
123 static void	fixpatterns(void);
124 static void	usage(void);
125 static int	grep(int, const char *);
126 static void	bmgcomp(char *, int);
127 static char	*bmgexec(char *, char *);
128 static int	recursive(const char *, const struct stat *, int, struct FTW *);
129 static void	process_path(const char *);
130 static void	process_file(const char *, int);
131 
132 /*
133  * These are values that we use to return from getopt_long. They start at
134  * SHRT_MAX to avoid any possible conflict with the normal options. These are
135  * used for long options that have no short option equivalent.
136  */
137 enum grep_opts {
138 	OPT_LABEL = SHRT_MAX + 1
139 };
140 
141 static struct option grep_options[] = {
142 	{ "label", required_argument, NULL, OPT_LABEL },
143 	{ NULL }
144 };
145 
146 /*
147  * mainline for grep
148  */
149 int
main(int argc,char ** argv)150 main(int argc, char **argv)
151 {
152 	char	*ap, *test;
153 	int	c;
154 	int	fflag = 0;
155 	int	i, n_pattern = 0, n_file = 0;
156 	char	**pattern_list = NULL;
157 	char	**file_list = NULL;
158 
159 	(void) setlocale(LC_ALL, "");
160 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
161 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it weren't */
162 #endif
163 	(void) textdomain(TEXT_DOMAIN);
164 
165 	/*
166 	 * true if this is running on the multibyte locale
167 	 */
168 	mblocale = (MB_CUR_MAX > 1);
169 	/*
170 	 * Skip leading slashes
171 	 */
172 	cmdname = argv[0];
173 	if (ap = strrchr(cmdname, '/'))
174 		cmdname = ap + 1;
175 
176 	ap = cmdname;
177 	/*
178 	 * Detect egrep/fgrep via command name, map to -E and -F options.
179 	 */
180 	if (*ap == 'e' || *ap == 'E') {
181 		regflags |= REG_EXTENDED;
182 		egrep++;
183 	} else {
184 		if (*ap == 'f' || *ap == 'F') {
185 			fgrep++;
186 			regflags |= REG_NOSPEC;
187 		}
188 	}
189 
190 	/* check for non-standard "-line-count" option */
191 	for (i = 1; i < argc; i++) {
192 		if (strcmp(argv[i], "--") == 0)
193 			break;
194 
195 		/* isdigit() check prevents negative arguments */
196 		if ((argv[i][0] == '-') && isdigit(argv[i][1])) {
197 			if (strlen(&argv[i][1]) !=
198 			    strspn(&argv[i][1], "0123456789")) {
199 				(void) fprintf(stderr, gettext(
200 				    "%s: Bad number flag\n"), argv[0]);
201 				usage();
202 			}
203 
204 			errno = 0;
205 			conalen = conblen = strtoul(&argv[i][1], (char **)NULL,
206 			    10);
207 
208 			if (errno != 0 || conalen >= ULONG_MAX) {
209 				(void) fprintf(stderr, gettext(
210 				    "%s: Bad context argument\n"), argv[0]);
211 			} else if (conalen)
212 				conflag = CONTEXT;
213 
214 			while (i < argc) {
215 				argv[i] = argv[i + 1];
216 				i++;
217 			}
218 			argc--;
219 		}
220 	}
221 
222 	while ((c = getopt_long(argc, argv, "+vwchHilLnrbse:f:qxEFIRA:B:C:o",
223 	    grep_options, NULL)) != EOF) {
224 		unsigned long tval;
225 		switch (c) {
226 		case 'v':	/* POSIX: negate matches */
227 			nvflag = B_FALSE;
228 			break;
229 
230 		case 'c':	/* POSIX: write count */
231 			cflag++;
232 			break;
233 
234 		case 'i':	/* POSIX: ignore case */
235 			iflag++;
236 			regflags |= REG_ICASE;
237 			break;
238 
239 		/*
240 		 * The last of -l and -L are honored.
241 		 */
242 		case 'l':	/* POSIX: Write filenames only */
243 			lflag++;
244 			Lflag = 0;
245 			break;
246 
247 		case 'L':	/* Write non-matching filenames */
248 			Lflag++;
249 			lflag = 0;
250 			break;
251 
252 		case 'n':	/* POSIX: Write line numbers */
253 			nflag++;
254 			break;
255 
256 		case 'r':	/* Solaris: search recursively */
257 			rflag++;
258 			break;
259 
260 		case 'b':	/* Solaris: Write file block numbers */
261 			bflag++;
262 			break;
263 
264 		case 's':	/* POSIX: No error msgs for files */
265 			sflag++;
266 			break;
267 
268 		case 'e':	/* POSIX: pattern list */
269 			n_pattern++;
270 			pattern_list = realloc(pattern_list,
271 			    sizeof (char *) * n_pattern);
272 			if (pattern_list == NULL) {
273 				(void) fprintf(stderr,
274 				    gettext("%s: out of memory\n"),
275 				    cmdname);
276 				exit(2);
277 			}
278 			*(pattern_list + n_pattern - 1) = optarg;
279 			break;
280 
281 		case 'f':	/* POSIX: pattern file */
282 			fflag = 1;
283 			n_file++;
284 			file_list = realloc(file_list,
285 			    sizeof (char *) * n_file);
286 			if (file_list == NULL) {
287 				(void) fprintf(stderr,
288 				    gettext("%s: out of memory\n"),
289 				    cmdname);
290 				exit(2);
291 			}
292 			*(file_list + n_file - 1) = optarg;
293 			break;
294 
295 		/* based on options order h or H is set as in GNU grep */
296 		case 'h':	/* Solaris: suppress printing of file name */
297 			hflag = 1;
298 			Hflag = 0;
299 			break;
300 		/* Solaris: precede every match with file name */
301 		case 'H':
302 			Hflag = 1;
303 			hflag = 0;
304 			break;
305 
306 		case 'q':	/* POSIX: quiet: status only */
307 			qflag++;
308 			break;
309 
310 		case 'w':	/* Solaris: treat pattern as word */
311 			wflag++;
312 			break;
313 
314 		case 'x':	/* POSIX: full line matches */
315 			xflag++;
316 			break;
317 
318 		case 'E':	/* POSIX: Extended RE's */
319 			regflags |= REG_EXTENDED;
320 			Eflag++;
321 			break;
322 
323 		case 'F':	/* POSIX: strings, not RE's */
324 			Fflag++;
325 			regflags |= REG_NOSPEC;
326 			break;
327 
328 		case 'R':	/* Solaris: like rflag, but follow symlinks */
329 			Rflag++;
330 			rflag++;
331 			break;
332 
333 		case 'A':	/* print N lines after each match */
334 			errno = 0;
335 			conalen = strtoul(optarg, &test, 10);
336 			/* *test will be non-null if optarg is negative */
337 			if (errno != 0 || *test != '\0' ||
338 			    conalen >= ULONG_MAX) {
339 				(void) fprintf(stderr, gettext(
340 				    "%s: Bad context argument: %s\n"),
341 				    argv[0], optarg);
342 				exit(2);
343 			}
344 			if (conalen)
345 				conflag |= AFTER;
346 			else
347 				conflag &= ~AFTER;
348 			break;
349 		case 'B':	/* print N lines before each match */
350 			errno = 0;
351 			conblen = strtoul(optarg, &test, 10);
352 			/* *test will be non-null if optarg is negative */
353 			if (errno != 0 || *test != '\0' ||
354 			    conblen >= ULONG_MAX) {
355 				(void) fprintf(stderr, gettext(
356 				    "%s: Bad context argument: %s\n"),
357 				    argv[0], optarg);
358 				exit(2);
359 			}
360 			if (conblen)
361 				conflag |= BEFORE;
362 			else
363 				conflag &= ~BEFORE;
364 			break;
365 		case 'C':	/* print N lines around each match */
366 			errno = 0;
367 			tval = strtoul(optarg, &test, 10);
368 			/* *test will be non-null if optarg is negative */
369 			if (errno != 0 || *test != '\0' || tval >= ULONG_MAX) {
370 				(void) fprintf(stderr, gettext(
371 				    "%s: Bad context argument: %s\n"),
372 				    argv[0], optarg);
373 				exit(2);
374 			}
375 			if (tval) {
376 				if ((conflag & BEFORE) == 0)
377 					conblen = tval;
378 				if ((conflag & AFTER) == 0)
379 					conalen = tval;
380 				conflag = CONTEXT;
381 			}
382 			break;
383 
384 		case OPT_LABEL:
385 			stdin_label = optarg;
386 			break;
387 
388 		case 'o':
389 			oflag++;
390 			break;
391 
392 		default:
393 			usage();
394 		}
395 	}
396 	/*
397 	 * If we're invoked as egrep or fgrep we need to do some checks
398 	 */
399 
400 	if (egrep || fgrep) {
401 		/*
402 		 * Use of -E or -F with egrep or fgrep is illegal
403 		 */
404 		if (Eflag || Fflag)
405 			usage();
406 		/*
407 		 * Don't allow use of wflag with egrep / fgrep
408 		 */
409 		if (wflag)
410 			usage();
411 		/*
412 		 * For Solaris the -s flag is equivalent to XCU -q
413 		 */
414 		if (sflag)
415 			qflag++;
416 		/*
417 		 * done with above checks - set the appropriate flags
418 		 */
419 		if (egrep)
420 			Eflag++;
421 		else			/* Else fgrep */
422 			Fflag++;
423 	}
424 
425 	if (wflag && (Eflag || Fflag)) {
426 		/*
427 		 * -w cannot be specified with grep -F
428 		 */
429 		usage();
430 	}
431 
432 	/*
433 	 * -E and -F flags are mutually exclusive - check for this
434 	 */
435 	if (Eflag && Fflag)
436 		usage();
437 
438 	/*
439 	 * -l or -L overrides -H like in GNU grep. It also overrides -o.
440 	 */
441 	if (lflag || Lflag) {
442 		Hflag = 0;
443 		oflag = 0;
444 	}
445 
446 	/*
447 	 * -c, -l and -q flags are mutually exclusive
448 	 * We have -c override -l like in Solaris.
449 	 * -q overrides -l & -c programmatically in grep() function.
450 	 * -c overrides -o in GNU grep, we honor that.
451 	 */
452 	if (cflag) {
453 		lflag = 0;
454 		Lflag = 0;
455 		oflag = 0;
456 	}
457 
458 	/*
459 	 * If -o is set then we ignore all context related options, like other
460 	 * greps.
461 	 */
462 	if (oflag) {
463 		conflag = 0;
464 	}
465 
466 	/*
467 	 * These flags are a semantic mess with no clear answers as to their
468 	 * behvaior. Based on some experimentation GNU grep will exit zero if a
469 	 * non-match is present, but never print anything. BSD grep seems to
470 	 * exit 1 and not print anything, even if there would have been a match.
471 	 * Also, you probably don't want to ask about what happens with grep -x
472 	 * -o -v, some implementations seem to just ignore -v.
473 	 */
474 	if (oflag && !nvflag) {
475 		(void) fprintf(stderr, gettext("%s: the combination of -v and "
476 		    "-o is not supported currently\n"), argv[0]);
477 		exit(2);
478 	}
479 
480 	argv += optind - 1;
481 	argc -= optind - 1;
482 
483 	/*
484 	 * Now handling -e and -f option
485 	 */
486 	if (pattern_list) {
487 		for (i = 0; i < n_pattern; i++) {
488 			addpattern(pattern_list[i]);
489 		}
490 		free(pattern_list);
491 	}
492 	if (file_list) {
493 		for (i = 0; i < n_file; i++) {
494 			addfile(file_list[i]);
495 		}
496 		free(file_list);
497 	}
498 
499 	/*
500 	 * No -e or -f?  Make sure there is one more arg, use it as the pattern.
501 	 */
502 	if (patterns == NULL && !fflag) {
503 		if (argc < 2)
504 			usage();
505 		addpattern(argv[1]);
506 		argc--;
507 		argv++;
508 	}
509 
510 	/*
511 	 * Compile Patterns and also decide if BMG can be used
512 	 */
513 	fixpatterns();
514 
515 	if (stdin_label == NULL) {
516 		stdin_label = STDIN_FILENAME;
517 	}
518 
519 	/* Process all files: stdin, or rest of arg list */
520 	if (argc < 2) {
521 		matched = grep(0, stdin_label);
522 	} else {
523 		if (Hflag || (argc > 2 && hflag == 0))
524 			outfn = 1;	/* Print filename on match line */
525 		for (argv++; *argv != NULL; argv++) {
526 			process_path(*argv);
527 		}
528 	}
529 	/*
530 	 * Return() here is used instead of exit
531 	 */
532 
533 	(void) fflush(stdout);
534 
535 	if (errors)
536 		return (2);
537 	return (matched ? 0 : 1);
538 }
539 
540 static void
process_path(const char * path)541 process_path(const char *path)
542 {
543 	struct	stat st;
544 	int	walkflags = FTW_CHDIR;
545 	char	*buf = NULL;
546 
547 	if (rflag) {
548 		if (stat(path, &st) != -1 &&
549 		    (st.st_mode & S_IFMT) == S_IFDIR) {
550 			if (!hflag)
551 				outfn = 1; /* Print filename unless -h */
552 
553 			/*
554 			 * Add trailing slash if arg
555 			 * is directory, to resolve symlinks.
556 			 */
557 			if (path[strlen(path) - 1] != '/') {
558 				(void) asprintf(&buf, "%s/", path);
559 				if (buf != NULL)
560 					path = buf;
561 			}
562 
563 			/*
564 			 * Search through subdirs if path is directory.
565 			 * Don't follow symlinks if Rflag is not set.
566 			 */
567 			if (!Rflag)
568 				walkflags |= FTW_PHYS;
569 
570 			if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) {
571 				if (!sflag)
572 					(void) fprintf(stderr,
573 					    gettext("%s: can't open \"%s\"\n"),
574 					    cmdname, path);
575 				errors = 1;
576 			}
577 			return;
578 		}
579 	}
580 	process_file(path, 0);
581 }
582 
583 /*
584  * Read and process all files in directory recursively.
585  */
586 static int
recursive(const char * name,const struct stat * statp,int info,struct FTW * ftw)587 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw)
588 {
589 	/*
590 	 * Process files and follow symlinks if Rflag set.
591 	 */
592 	if (info != FTW_F) {
593 		/* Report broken symlinks and unreadable files */
594 		if (!sflag &&
595 		    (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) {
596 			(void) fprintf(stderr,
597 			    gettext("%s: can't open \"%s\"\n"), cmdname, name);
598 		}
599 		return (0);
600 	}
601 
602 
603 	/* Skip devices and pipes if Rflag is not set */
604 	if (!Rflag && !S_ISREG(statp->st_mode))
605 		return (0);
606 	/* Pass offset to relative name from FTW_CHDIR */
607 	process_file(name, ftw->base);
608 	return (0);
609 }
610 
611 /*
612  * Opens file and call grep function.
613  */
614 static void
process_file(const char * name,int base)615 process_file(const char *name, int base)
616 {
617 	int fd;
618 
619 	if ((fd = open(name + base, O_RDONLY)) == -1) {
620 		errors = 1;
621 		if (!sflag) /* Silent mode */
622 			(void) fprintf(stderr, gettext(
623 			    "%s: can't open \"%s\"\n"),
624 			    cmdname, name);
625 		return;
626 	}
627 	matched |= grep(fd, name);
628 	(void) close(fd);
629 
630 	if (ferror(stdout)) {
631 		(void) fprintf(stderr, gettext(
632 		    "%s: error writing to stdout\n"),
633 		    cmdname);
634 		(void) fflush(stdout);
635 		exit(2);
636 	}
637 
638 }
639 
640 /*
641  * Add a file of strings to the pattern list.
642  */
643 static void
addfile(const char * fn)644 addfile(const char *fn)
645 {
646 	FILE	*fp;
647 	char	*inbuf;
648 	char	*bufp;
649 	size_t	bufsiz, buflen, bufused;
650 
651 	/*
652 	 * Open the pattern file
653 	 */
654 	if ((fp = fopen(fn, "r")) == NULL) {
655 		(void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
656 		    cmdname, fn);
657 		exit(2);
658 	}
659 	bufsiz = BUFSIZE;
660 	if ((inbuf = malloc(bufsiz)) == NULL) {
661 		(void) fprintf(stderr,
662 		    gettext("%s: out of memory\n"), cmdname);
663 		exit(2);
664 	}
665 	bufp = inbuf;
666 	bufused = 0;
667 	/*
668 	 * Read in the file, reallocing as we need more memory
669 	 */
670 	while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
671 		buflen = strlen(bufp);
672 		bufused += buflen;
673 		if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
674 			/*
675 			 * if this line does not fit to the buffer,
676 			 * realloc larger buffer
677 			 */
678 			bufsiz += BUFSIZE;
679 			if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
680 				(void) fprintf(stderr,
681 				    gettext("%s: out of memory\n"),
682 				    cmdname);
683 				exit(2);
684 			}
685 			bufp = inbuf + bufused;
686 			continue;
687 		}
688 		if (bufp[buflen - 1] == '\n') {
689 			bufp[--buflen] = '\0';
690 		}
691 		addpattern(inbuf);
692 
693 		bufp = inbuf;
694 		bufused = 0;
695 	}
696 	free(inbuf);
697 	free(prntbuf);
698 	free(conbuf);
699 	(void) fclose(fp);
700 }
701 
702 /*
703  * Add a string to the pattern list.
704  */
705 static void
addpattern(char * s)706 addpattern(char *s)
707 {
708 	PATTERN	*pp;
709 	char	*wordbuf;
710 	char	*np;
711 
712 	for (; ; ) {
713 		np = strchr(s, '\n');
714 		if (np != NULL)
715 			*np = '\0';
716 		if ((pp = malloc(sizeof (PATTERN))) == NULL) {
717 			(void) fprintf(stderr, gettext(
718 			    "%s: out of memory\n"),
719 			    cmdname);
720 			exit(2);
721 		}
722 		if (wflag) {
723 			/*
724 			 * Solaris wflag support: Add '<' '>' to pattern to
725 			 * select it as a word. Doesn't make sense with -F
726 			 * but we're Libertarian.
727 			 */
728 			size_t	slen, wordlen;
729 
730 			slen = strlen(s);
731 			wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
732 			if ((wordbuf = malloc(wordlen)) == NULL) {
733 				(void) fprintf(stderr,
734 				    gettext("%s: out of memory\n"),
735 				    cmdname);
736 				exit(2);
737 			}
738 			(void) strcpy(wordbuf, "\\<");
739 			(void) strcpy(wordbuf + 2, s);
740 			(void) strcpy(wordbuf + 2 + slen, "\\>");
741 		} else {
742 			if ((wordbuf = strdup(s)) == NULL) {
743 				(void) fprintf(stderr,
744 				    gettext("%s: out of memory\n"),
745 				    cmdname);
746 				exit(2);
747 			}
748 		}
749 		pp->pattern = wordbuf;
750 		pp->next = patterns;
751 		patterns = pp;
752 		if (np == NULL)
753 			break;
754 		s = np + 1;
755 	}
756 }
757 
758 /*
759  * Check if a given grep pattern that is being used with egrep or grep can be
760  * considered 'simple'. That is there are no characters that would be treated
761  * differently from fgrep. In this particular case, we're a little bit
762  * conservative and look for characters that are:
763  *
764  *  o 7-bit ASCII
765  *  o Letters
766  *  o Numbers
767  *  o Meta-characters not used in BREs/EREs: !, @, #, /, -, _, <, >, =
768  *
769  * This can certianly be made more complex and less restrictive with additional
770  * testing.
771  */
772 static boolean_t
simple_pattern(const char * str)773 simple_pattern(const char *str)
774 {
775 	for (; *str != '\0'; str++) {
776 		if (!isascii(*str)) {
777 			return (B_FALSE);
778 		}
779 
780 		if (isalnum(*str)) {
781 			continue;
782 		}
783 
784 		switch (*str) {
785 		case '!':
786 		case '@':
787 		case '#':
788 		case '/':
789 		case '-':
790 		case '_':
791 		case '<':
792 		case '>':
793 		case '=':
794 			continue;
795 		default:
796 			return (B_FALSE);
797 		}
798 	}
799 
800 	return (B_TRUE);
801 }
802 
803 /*
804  * Fix patterns.
805  * Must do after all arguments read, in case later -i option.
806  */
807 static void
fixpatterns(void)808 fixpatterns(void)
809 {
810 	PATTERN	*pp;
811 	int	rv, fix_pattern;
812 
813 	/*
814 	 * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
815 	 * Use the Boyer-Moore-Gosper algorithm if:
816 	 * - fgrep or non-BRE/ERE	(Fflag || simple_pattern())
817 	 * - singlebyte locale		(!mblocale)
818 	 * - no ignoring case		(!iflag)
819 	 * - no printing line numbers	(!nflag)
820 	 * - no negating the output	(nvflag)
821 	 * - only one pattern		(patterns != NULL && patterns->next ==
822 	 *				    NULL)
823 	 * - non zero length pattern	(strlen(patterns->pattern) != 0)
824 	 * - no context required	(conflag == 0)
825 	 * - no exact matches		(!oflag)
826 	 * - no word matches		(!wlag)
827 	 */
828 	use_bmg = !mblocale && !iflag && !nflag && nvflag && !oflag &&
829 	    (patterns != NULL && patterns->next == NULL) && !wflag &&
830 	    (strlen(patterns->pattern) != 0) && conflag == 0 &&
831 	    (Fflag || simple_pattern(patterns->pattern));
832 
833 	if (use_bmg) {
834 		return;
835 	}
836 
837 	/*
838 	 * Fix the specified pattern if -x is specified.
839 	 */
840 	fix_pattern = !Fflag && xflag;
841 
842 	for (pp = patterns; pp != NULL; pp = pp->next) {
843 		if (fix_pattern) {
844 			char	*cp, *cq;
845 			size_t	plen, nplen;
846 
847 			plen = strlen(pp->pattern);
848 			/* '^' pattern '$' */
849 			nplen = 1 + plen + 1 + 1;
850 			if ((cp = malloc(nplen)) == NULL) {
851 				(void) fprintf(stderr,
852 				    gettext("%s: out of memory\n"),
853 				    cmdname);
854 				exit(2);
855 			}
856 			cq = cp;
857 			*cq++ = '^';
858 			cq = strcpy(cq, pp->pattern) + plen;
859 			*cq++ = '$';
860 			*cq = '\0';
861 			free(pp->pattern);
862 			pp->pattern = cp;
863 		}
864 
865 		/*
866 		 * Compile the regular expression, give an informative error
867 		 * message, and exit if it didn't compile.
868 		 */
869 		if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
870 			(void) regerror(rv, &pp->re, errstr, sizeof (errstr));
871 			(void) fprintf(stderr,
872 			    gettext("%s: RE error in %s: %s\n"),
873 			    cmdname, pp->pattern, errstr);
874 			exit(2);
875 		}
876 		free(pp->pattern);
877 	}
878 }
879 
880 /*
881  * Search a newline from the beginning of the string
882  */
883 static char *
find_nl(const char * ptr,size_t len)884 find_nl(const char *ptr, size_t len)
885 {
886 	while (len-- != 0) {
887 		if (*ptr++ == '\n') {
888 			return ((char *)--ptr);
889 		}
890 	}
891 	return (NULL);
892 }
893 
894 /*
895  * Search a newline from the end of the string
896  */
897 static char *
rfind_nl(const char * ptr,size_t len)898 rfind_nl(const char *ptr, size_t len)
899 {
900 	const char	*uptr = ptr + len;
901 	while (len--) {
902 		if (*--uptr == '\n') {
903 			return ((char *)uptr);
904 		}
905 	}
906 	return (NULL);
907 }
908 
909 /*
910  * Do grep on a single file.
911  * Return true in any lines matched.
912  *
913  * We have two strategies:
914  * The fast one is used when we have a single pattern with
915  * a string known to occur in the pattern. We can then
916  * do a BMG match on the whole buffer.
917  * This is an order of magnitude faster.
918  * Otherwise we split the buffer into lines,
919  * and check for a match on each line.
920  */
921 static int
grep(int fd,const char * fn)922 grep(int fd, const char *fn)
923 {
924 	PATTERN *pp;
925 	off_t	data_len;	/* length of the data chunk */
926 	off_t	line_len;	/* length of the current line */
927 	off_t	line_offset;	/* current line's offset from the beginning */
928 	off_t	blkoffset;	/* line_offset but context-compatible */
929 	long long	lineno, linenum;
930 	long long	matches = 0;	/* Number of matching lines */
931 	long long	conacnt = 0, conbcnt = 0;	/* context line count */
932 	int	newlinep;	/* 0 if the last line of file has no newline */
933 	char	*ptr, *ptrend, *prntptr, *prntptrend;
934 	char	*nextptr = NULL, *nextend = NULL;
935 	char	*conptr = NULL, *conptrend = NULL;
936 	char	*matchptr = NULL;
937 	int	conaprnt = 0, conbprnt = 0, lastmatch = 0;
938 	boolean_t	nearmatch; /* w/in N+1 of last match */
939 	boolean_t	havematch = B_FALSE; /* have a match in context */
940 	boolean_t	sameline = B_FALSE; /* Are we still on the same line? */
941 	size_t	prntlen;
942 
943 	if (patterns == NULL)
944 		return (0);	/* no patterns to match -- just return */
945 
946 	pp = patterns;
947 
948 	if (use_bmg) {
949 		bmgcomp(pp->pattern, strlen(pp->pattern));
950 	}
951 
952 	if (prntbuf == NULL) {
953 		prntbuflen = BUFSIZE;
954 		if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
955 			(void) fprintf(stderr, gettext("%s: out of memory\n"),
956 			    cmdname);
957 			exit(2);
958 		}
959 	}
960 
961 	if (conflag != 0 && (conbuf == NULL)) {
962 		conbuflen = BUFSIZE;
963 		if ((conbuf = malloc(BUFSIZE+1)) == NULL) {
964 			(void) fprintf(stderr, gettext("%s: out of memory\n"),
965 			    cmdname);
966 			exit(2);
967 		}
968 	}
969 
970 	nearmatch = (conmatches != 0);
971 	blkoffset = line_offset = 0;
972 	lineno = 0;
973 	linenum = 1;
974 	newlinep = 1;
975 	data_len = 0;
976 	for (; ; ) {
977 		long	count;
978 		off_t	offset = 0;
979 		char	separate;
980 		char	*startmatch = NULL; /* -o, start of match */
981 		char	*postmatch = NULL; /* -o, character after match */
982 		boolean_t	last_ctx = B_FALSE, eof = B_FALSE;
983 
984 		if (data_len == 0) {
985 			/*
986 			 * If no data in the buffer, reset ptr
987 			 */
988 			ptr = prntbuf;
989 			if (conflag != 0 && conptr == NULL) {
990 				conptr = conbuf;
991 				conptrend = conptr - 1;
992 			}
993 		}
994 		if (ptr == prntbuf) {
995 			/*
996 			 * The current data chunk starts from prntbuf.
997 			 * This means either the buffer has no data
998 			 * or the buffer has no newline.
999 			 * So, read more data from input.
1000 			 */
1001 			count = read(fd, ptr + data_len, prntbuflen - data_len);
1002 			if (count < 0) {
1003 				/* read error */
1004 				if (cflag) {
1005 					if (outfn && !rflag) {
1006 						(void) fprintf(stdout,
1007 						    "%s:", fn);
1008 					}
1009 					if (!qflag && !rflag) {
1010 						(void) fprintf(stdout, "%lld\n",
1011 						    matches);
1012 					}
1013 				}
1014 				return (0);
1015 			} else if (count == 0) {
1016 				/* no new data */
1017 				eof = B_TRUE;
1018 
1019 				if (data_len == 0) {
1020 					/* end of file already reached */
1021 					if (conflag != 0) {
1022 						if (conptrend >= conptr)
1023 							*conptrend = '\n';
1024 						last_ctx = B_TRUE;
1025 						goto L_next_line;
1026 					} else {
1027 						goto out;
1028 					}
1029 				}
1030 				/* last line of file has no newline */
1031 				ptrend = ptr + data_len;
1032 				newlinep = 0;
1033 				goto L_start_process;
1034 			}
1035 			offset = data_len;
1036 			data_len += count;
1037 		}
1038 
1039 		/*
1040 		 * Look for newline in the chunk
1041 		 * between ptr + offset and ptr + data_len - offset.
1042 		 */
1043 		ptrend = find_nl(ptr + offset, data_len - offset);
1044 		if (ptrend == NULL) {
1045 			/* no newline found in this chunk */
1046 			if (ptr > prntbuf) {
1047 				/*
1048 				 * Move remaining data to the beginning
1049 				 * of the buffer.
1050 				 * Remaining data lie from ptr for
1051 				 * data_len bytes.
1052 				 */
1053 				(void) memmove(prntbuf, ptr, data_len);
1054 			}
1055 			if (data_len == prntbuflen) {
1056 				/*
1057 				 * Not enough room in the buffer
1058 				 */
1059 				if (prntbuflen > SIZE_MAX - BUFSIZE) {
1060 					(void) fprintf(stderr,
1061 					    gettext("%s: buflen would"
1062 					    " overflow\n"),
1063 					    cmdname);
1064 					exit(2);
1065 				}
1066 
1067 				prntbuflen += BUFSIZE;
1068 				prntbuf = realloc(prntbuf, prntbuflen + 1);
1069 				if (prntbuf == NULL) {
1070 					(void) fprintf(stderr,
1071 					    gettext("%s: out of memory\n"),
1072 					    cmdname);
1073 					exit(2);
1074 				}
1075 			}
1076 			ptr = prntbuf;
1077 			/* read the next input */
1078 			continue;
1079 		}
1080 L_start_process:
1081 
1082 		/*
1083 		 * Beginning of the chunk:	ptr
1084 		 * End of the chunk:		ptr + data_len
1085 		 * Beginning of the line:	ptr
1086 		 * End of the line:		ptrend
1087 		 *
1088 		 * conptr:	Beginning of the context.
1089 		 * conptrend: If context is empty, conptr - 1 (invalid memory).
1090 		 *	Otherwise, Last newline in the context.
1091 		 */
1092 
1093 		if (use_bmg) {
1094 			/*
1095 			 * Use Boyer-Moore-Gosper algorithm to find out if
1096 			 * this chunk (not this line) contains the specified
1097 			 * pattern.  If not, restart from the last line
1098 			 * of this chunk.
1099 			 */
1100 			char	*bline;
1101 			bline = bmgexec(ptr, ptr + data_len);
1102 			if (bline == NULL) {
1103 				/*
1104 				 * No pattern found in this chunk.
1105 				 * Need to find the last line
1106 				 * in this chunk.
1107 				 */
1108 				ptrend = rfind_nl(ptr, data_len);
1109 
1110 				/*
1111 				 * When this chunk does not contain newline,
1112 				 * ptrend becomes NULL, which should happen
1113 				 * when the last line of file does not end
1114 				 * with a newline.  At such a point,
1115 				 * newlinep should have been set to 0.
1116 				 * Therefore, just after jumping to
1117 				 * L_skip_line, the main for-loop quits,
1118 				 * and the line_len value won't be
1119 				 * used.
1120 				 */
1121 				line_len = ptrend - ptr;
1122 				goto L_skip_line;
1123 			}
1124 			if (bline > ptrend) {
1125 				/*
1126 				 * Pattern found not in the first line
1127 				 * of this chunk.
1128 				 * Discard the first line.
1129 				 */
1130 				line_len = ptrend - ptr;
1131 				goto L_skip_line;
1132 			}
1133 			/*
1134 			 * Pattern found in the first line of this chunk.
1135 			 * Using this result.
1136 			 */
1137 			*ptrend = '\0';
1138 			line_len = ptrend - ptr;
1139 
1140 			/*
1141 			 * before jumping to L_next_line,
1142 			 * need to handle xflag if specified
1143 			 */
1144 			if (xflag && (line_len != bmglen ||
1145 			    strcmp(bmgpat, ptr) != 0)) {
1146 				/* didn't match */
1147 				pp = NULL;
1148 			} else {
1149 				pp = patterns; /* to make it happen */
1150 			}
1151 			goto L_next_line;
1152 		}
1153 
1154 		/*
1155 		 * When using -o, we might actually loop around while still on
1156 		 * the same line. In such a case, we need to make sure we don't
1157 		 * increment the line number.
1158 		 */
1159 		if (!sameline) {
1160 			lineno++;
1161 		} else {
1162 			sameline = B_FALSE;
1163 		}
1164 
1165 		/*
1166 		 * Line starts from ptr and ends at ptrend.
1167 		 * line_len will be the length of the line.
1168 		 */
1169 		*ptrend = '\0';
1170 		line_len = ptrend - ptr;
1171 
1172 		/*
1173 		 * From now, the process will be performed based
1174 		 * on the line from ptr to ptrend.
1175 		 */
1176 		for (pp = patterns; pp; pp = pp->next) {
1177 			int	rv;
1178 			regmatch_t rm;
1179 			size_t nmatch = 0;
1180 
1181 			/*
1182 			 * The current implementation of regexec has a higher
1183 			 * cost when you ask for match information. As a result,
1184 			 * we only ask for a match when we know that we need it
1185 			 * specifically. This is always needed for -o because we
1186 			 * rely on it to tell us what we matched. For fgrep -x
1187 			 * we need it so we can determine whether we matched the
1188 			 * entire line.
1189 			 */
1190 			if (oflag || (Fflag && xflag))
1191 				nmatch = 1;
1192 
1193 			rv = regexec(&pp->re, ptr, nmatch, &rm, 0);
1194 			if (rv == REG_OK) {
1195 				/*
1196 				 * fgrep in this form cannot insert the
1197 				 * metacharacters to verify whether or not we
1198 				 * were the entire line. As a result, we check
1199 				 * the pattern length against the line length.
1200 				 */
1201 				if (Fflag && xflag &&
1202 				    line_len != rm.rm_eo - rm.rm_so) {
1203 					continue;
1204 				}
1205 
1206 				/* matched */
1207 				if (oflag) {
1208 					startmatch = ptr + rm.rm_so;
1209 					postmatch = ptr + rm.rm_eo;
1210 				}
1211 				break;
1212 			}
1213 
1214 			switch (rv) {
1215 			case REG_NOMATCH:
1216 				break;
1217 			case REG_ECHAR:
1218 				(void) fprintf(stderr, gettext(
1219 	    "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1220 				    cmdname, fn, lineno);
1221 				break;
1222 			default:
1223 				(void) regerror(rv, &pp->re, errstr,
1224 				    sizeof (errstr));
1225 				(void) fprintf(stderr, gettext(
1226 	    "%s: input file \"%s\": line %lld: %s\n"),
1227 				    cmdname, fn, lineno, errstr);
1228 				exit(2);
1229 			}
1230 		}
1231 
1232 		/*
1233 		 * Context is set up as follows:
1234 		 * For a 'Before' context, we maintain a set of pointers
1235 		 * containing 'N' lines of context. If the current number of
1236 		 * lines contained is greater than N, and N isn't a match, the
1237 		 * start pointer is moved forward to the next newline.
1238 		 *
1239 		 * If we ever find a match, we print out immediately.
1240 		 * 'nearmatch' tells us if we're within N+1 lines of the last
1241 		 * match ; if we are, and we find another match, we don't
1242 		 * separate the matches. 'nearmatch' becomes false when
1243 		 * a line gets rotated out of the context.
1244 		 *
1245 		 * For an 'After' context, we simply wait until we've found a
1246 		 * match, then create a context N+1 lines big. If we don't find
1247 		 * a match within the context, we print out the current context.
1248 		 * Otherwise, we save a reference to the new matching line,
1249 		 * print out the other context, and reset our context pointers
1250 		 * to the new matching line.
1251 		 *
1252 		 * 'nearmatch' becomes false when we find a non-matching line
1253 		 * that isn't a part of any context.
1254 		 *
1255 		 * A full-context is implemented as a combination of the
1256 		 * 'Before' and 'After' context logic. Before we find a match,
1257 		 * we follow the Before logic. When we find a match, we
1258 		 * follow the After logic. 'nearmatch' is handled by the Before
1259 		 * logic.
1260 		 */
1261 
1262 		if (conflag == 0)
1263 			goto L_next_line;
1264 
1265 		/* Do we have room to add this line to the context buffer? */
1266 		while ((line_len + 1) > (conbuflen -
1267 		    ((conptrend >= conptr) ? conptrend - conbuf : 0))) {
1268 			char *oldconbuf = conbuf;
1269 			char *oldconptr = conptr;
1270 			long tmp = matchptr - conptr;
1271 
1272 			if (conbuflen > SIZE_MAX - BUFSIZE) {
1273 				(void) fprintf(stderr,
1274 				    gettext("%s: buflen would overflow\n"),
1275 				    cmdname);
1276 				exit(2);
1277 			}
1278 
1279 			conbuflen += BUFSIZE;
1280 			conbuf = realloc(conbuf, conbuflen + 1);
1281 			if (conbuf == NULL) {
1282 				(void) fprintf(stderr,
1283 				    gettext("%s: out of memory\n"),
1284 				    cmdname);
1285 				exit(2);
1286 			}
1287 
1288 			conptr = conbuf + (conptr - oldconbuf);
1289 			conptrend = conptr + (conptrend - oldconptr);
1290 			if (matchptr)
1291 				matchptr = conptr + tmp;
1292 		}
1293 		(void) memcpy(conptrend + 1, ptr, line_len);
1294 		conptrend += line_len + 1;
1295 		*conptrend = '\n';
1296 
1297 		if (nvflag == (pp != NULL)) {
1298 			/* matched */
1299 			if (havematch) {
1300 				if ((conflag & AFTER) != 0) {
1301 					conaprnt = 1;
1302 					nextend = conptrend;
1303 					conptrend = conptr + lastmatch;
1304 					nextptr = conptrend + 1;
1305 					*nextend = '\n';
1306 				}
1307 			} else {
1308 				if (conflag == AFTER) {
1309 					conptr = conptrend - (line_len);
1310 					linenum = lineno;
1311 				}
1312 				blkoffset = line_offset -
1313 				    (conptrend - conptr - line_len);
1314 			}
1315 
1316 			if (conflag == BEFORE)
1317 				conbprnt = 1;
1318 
1319 			lastmatch = conptrend - conptr;
1320 			havematch = B_TRUE;
1321 			goto L_next_line;
1322 		}
1323 
1324 		if (!havematch) {
1325 			if ((conflag & BEFORE) != 0) {
1326 				if (conbcnt >= conblen) {
1327 					char *tmp = conptr;
1328 					conptr = find_nl(conptr,
1329 					    conptrend - conptr) + 1;
1330 					if (bflag)
1331 						blkoffset += conptr - tmp;
1332 					linenum++;
1333 					nearmatch = B_TRUE;
1334 				} else {
1335 					conbcnt++;
1336 				}
1337 			}
1338 			if (conflag == AFTER)
1339 				nearmatch = B_TRUE;
1340 		} else  {
1341 			if (++conacnt >= conalen && !conaprnt && conalen)
1342 				conaprnt = 1;
1343 			else
1344 				lastmatch = conptrend - conptr;
1345 		}
1346 
1347 L_next_line:
1348 		/*
1349 		 * Here, if pp points to non-NULL, something has been matched
1350 		 * to the pattern.
1351 		 */
1352 		if (!last_ctx && nvflag == (pp != NULL)) {
1353 			matches++;
1354 			if (!nextend) {
1355 				if (conflag != 0) {
1356 					matchptr = conptrend;
1357 				} else if (oflag) {
1358 					matchptr = postmatch - 1;
1359 				} else {
1360 					matchptr = ptrend;
1361 				}
1362 			}
1363 		}
1364 
1365 		if (pp != NULL && oflag && postmatch == NULL) {
1366 			(void) fprintf(stderr, gettext("%s: internal error, "
1367 			    "-o set, but failed to find postmatch\n"), cmdname);
1368 			abort();
1369 		}
1370 
1371 		/*
1372 		 * Set up some print context so that we can treat
1373 		 * single-line matches as a zero-N context.
1374 		 * Apply CLI flags to each line of the context.
1375 		 *
1376 		 * For context, we only print if we both have a match and are
1377 		 * either at the end of the data stream, or we've previously
1378 		 * declared that we want to print for a particular context.
1379 		 */
1380 		if (havematch && (eof || conaprnt || conbprnt)) {
1381 
1382 			/*
1383 			 * We'd normally do this earlier, but we had to
1384 			 * escape early because we reached the end of the data.
1385 			 */
1386 			if (eof && nextptr)
1387 				conptrend = nextend;
1388 
1389 			prntlen = conptrend - conptr + 1;
1390 			prntptr = conptr;
1391 			if (conmatches++ && nearmatch && !cflag)
1392 				(void) fwrite("--\n", 1, 3, stdout);
1393 		} else if (conflag == 0 && nvflag == (pp != NULL)) {
1394 			*ptrend = '\n';
1395 			if (oflag) {
1396 				prntptr = startmatch;
1397 			} else {
1398 				prntptr = ptr;
1399 			}
1400 			prntlen = line_len + 1;
1401 			linenum = lineno;
1402 			blkoffset = line_offset;
1403 			if (oflag) {
1404 				blkoffset += startmatch - ptr;
1405 			}
1406 		} else if (eof) {
1407 			/* No match and no more data */
1408 			goto out;
1409 		} else {
1410 			/* No match, or we're not done building context */
1411 			goto L_skip_line;
1412 		}
1413 
1414 		if (oflag) {
1415 			prntptrend = postmatch - 1;
1416 		} else {
1417 			prntptrend = prntptr - 1;
1418 		}
1419 		while (oflag || (prntptrend = find_nl(prntptrend + 1,
1420 		    prntlen)) != NULL) {
1421 			/*
1422 			 * GNU grep uses '-' for context lines and ':' for
1423 			 * matching lines, so replicate that here.
1424 			 */
1425 			if (prntptrend == matchptr) {
1426 				if (eof && nextptr) {
1427 					matchptr = nextend;
1428 					nextptr = NULL;
1429 				} else {
1430 					matchptr = NULL;
1431 				}
1432 				separate = ':';
1433 			} else {
1434 				separate = '-';
1435 			}
1436 
1437 			/*
1438 			 * Handle q, l, and c flags.
1439 			 */
1440 			if (qflag) {
1441 				/* no need to continue */
1442 				/*
1443 				 * End of this line is ptrend.
1444 				 * We have read up to ptr + data_len.
1445 				 */
1446 				off_t	pos;
1447 				pos = ptr + data_len - (ptrend + 1);
1448 				(void) lseek(fd, -pos, SEEK_CUR);
1449 				exit(0);
1450 			}
1451 			if (lflag) {
1452 				(void) printf("%s\n", fn);
1453 				goto out;
1454 			}
1455 			if (Lflag) {
1456 				goto out;
1457 			}
1458 			if (!cflag) {
1459 				if (Hflag || outfn) {
1460 					(void) printf("%s%c", fn, separate);
1461 				}
1462 				if (bflag) {
1463 					(void) printf("%lld%c", (offset_t)
1464 					    (blkoffset / BSIZE), separate);
1465 				}
1466 				if (nflag) {
1467 					(void) printf("%lld%c", linenum,
1468 					    separate);
1469 				}
1470 				(void) fwrite(prntptr, 1,
1471 				    prntptrend - prntptr + 1, stdout);
1472 
1473 				if (oflag) {
1474 					(void) fputc('\n', stdout);
1475 				}
1476 			}
1477 			if (ferror(stdout)) {
1478 				return (0);
1479 			}
1480 
1481 			/*
1482 			 * With -o we'll only ever take this loop once. Manually
1483 			 * break out.
1484 			 */
1485 			if (oflag) {
1486 				goto L_skip_line;
1487 			}
1488 
1489 			linenum++;
1490 			prntlen -= prntptrend - prntptr + 1;
1491 			blkoffset += prntptrend - prntptr + 1;
1492 			prntptr = prntptrend + 1;
1493 		}
1494 
1495 		if (eof)
1496 			goto out;
1497 
1498 		/*
1499 		 * Update context buffer and variables post-print
1500 		 */
1501 		if (conflag != 0) {
1502 			conptr = conbuf;
1503 			conaprnt = conbprnt = 0;
1504 			nearmatch = B_FALSE;
1505 			conacnt = conbcnt = 0;
1506 
1507 			if (nextptr) {
1508 				(void) memmove(conbuf, nextptr,
1509 				    nextend - nextptr + 1);
1510 				blkoffset += nextptr - conptrend - 1;
1511 				conptrend = conptr + (nextend - nextptr);
1512 				matchptr = conptrend;
1513 				linenum = lineno;
1514 				lastmatch = conptrend - conptr;
1515 				havematch = B_TRUE;
1516 			} else {
1517 				conptrend = conptr - 1;
1518 				conacnt = 0;
1519 				lastmatch = 0;
1520 				havematch = B_FALSE;
1521 			}
1522 			nextptr = nextend = NULL;
1523 		}
1524 
1525 L_skip_line:
1526 		if (!newlinep)
1527 			break;
1528 
1529 		if (oflag && postmatch != NULL) {
1530 			line_len = postmatch - 1 - ptr;
1531 			ptr = postmatch;
1532 			sameline = B_TRUE;
1533 		} else {
1534 			ptr = ptrend + 1;
1535 		}
1536 		data_len -= line_len + 1;
1537 		line_offset += line_len + 1;
1538 	}
1539 
1540 out:
1541 	if (cflag) {
1542 		if (Hflag || outfn) {
1543 			(void) printf("%s:", fn);
1544 		}
1545 		if (!qflag) {
1546 			(void) printf("%lld\n", matches);
1547 		}
1548 	}
1549 
1550 	/*
1551 	 * -L tells us to print the filename only when it doesn't match. So we
1552 	 * run through the normal operationa and then invert it.
1553 	 */
1554 	if (Lflag) {
1555 		if (matches == 0) {
1556 			(void) printf("%s\n", fn);
1557 			matches = 1;
1558 		} else {
1559 			matches = 0;
1560 		}
1561 	}
1562 
1563 	return (matches != 0);
1564 }
1565 
1566 /*
1567  * usage message for grep
1568  */
1569 static void
usage(void)1570 usage(void)
1571 {
1572 	(void) fprintf(stderr, gettext("usage: %5s"), cmdname);
1573 	if (!egrep && !fgrep)
1574 		(void) fprintf(stderr, gettext(" [-E|-F]"));
1575 	(void) fprintf(stderr, gettext(" [-bchHilLnoqrRsvx] [-A num] [-B num] "
1576 	    "[-C num|-num]\n             [--label=name] [-e pattern_list]... "
1577 	    "[-f pattern_file]...\n             [pattern_list] [file]...\n"));
1578 	exit(2);
1579 	/* NOTREACHED */
1580 }
1581 
1582 /*
1583  * Compile literal pattern into BMG tables
1584  */
1585 static void
bmgcomp(char * pat,int len)1586 bmgcomp(char *pat, int len)
1587 {
1588 	int	i;
1589 	int	tlen;
1590 	unsigned char	*uc = (unsigned char *)pat;
1591 
1592 	bmglen = len;
1593 	bmgpat = pat;
1594 
1595 	for (i = 0; i < M_CSETSIZE; i++) {
1596 		bmgtab[i] = len;
1597 	}
1598 
1599 	len--;
1600 	for (tlen = len, i = 0; i <= len; i++, tlen--) {
1601 		bmgtab[*uc++] = tlen;
1602 	}
1603 }
1604 
1605 /*
1606  * BMG search.
1607  */
1608 static char *
bmgexec(char * str,char * end)1609 bmgexec(char *str, char *end)
1610 {
1611 	int	t;
1612 	char	*k, *s, *p;
1613 
1614 	k = str + bmglen - 1;
1615 	if (bmglen == 1) {
1616 		return (memchr(str, bmgpat[0], end - str));
1617 	}
1618 	for (; ; ) {
1619 		/* inner loop, should be most optimized */
1620 		while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1621 			k += t;
1622 		}
1623 		if (k >= end) {
1624 			return (NULL);
1625 		}
1626 		for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1627 			if (p == bmgpat) {
1628 				return (s);
1629 			}
1630 		}
1631 		k++;
1632 	}
1633 	/* NOTREACHED */
1634 }
1635