xref: /illumos-gate/usr/src/cmd/grep/grep.c (revision 2e5ac464)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * grep - pattern matching program - combined grep, egrep, and fgrep.
29  *	Based on MKS grep command, with XCU & Solaris mods.
30  */
31 
32 /*
33  * Copyright 1985, 1992 by Mortice Kern Systems Inc.  All rights reserved.
34  *
35  */
36 
37 /*
38  * Copyright 2020 Peter Tribble.
39  * Copyright 2018 RackTop Systems.
40  * Copyright 2018 Nexenta Systems, Inc.
41  * Copyright 2013 Damian Bogel. All rights reserved.
42  * Copyright 2020 Oxide Computer Company
43  */
44 
45 #include <string.h>
46 #include <stdlib.h>
47 #include <ctype.h>
48 #include <stdarg.h>
49 #include <regex.h>
50 #include <limits.h>
51 #include <sys/types.h>
52 #include <sys/stat.h>
53 #include <fcntl.h>
54 #include <stdio.h>
55 #include <locale.h>
56 #include <wchar.h>
57 #include <errno.h>
58 #include <unistd.h>
59 #include <wctype.h>
60 #include <ftw.h>
61 #include <sys/param.h>
62 
63 #define	STDIN_FILENAME gettext("(standard input)")
64 
65 #define	BSIZE		512		/* Size of block for -b */
66 #define	BUFSIZE		8192		/* Input buffer size */
67 #define	MAX_DEPTH	1000		/* how deep to recurse */
68 
69 #define	AFTER	1			/* 'After' Context */
70 #define	BEFORE	2			/* 'Before' Context */
71 #define	CONTEXT	(AFTER|BEFORE)		/* Full Context */
72 
73 #define	M_CSETSIZE	256		/* singlebyte chars */
74 static int	bmglen;			/* length of BMG pattern */
75 static char	*bmgpat;		/* BMG pattern */
76 static int	bmgtab[M_CSETSIZE];	/* BMG delta1 table */
77 
78 typedef	struct	_PATTERN	{
79 	char	*pattern;		/* original pattern */
80 	wchar_t	*wpattern;		/* wide, lowercased pattern */
81 	struct	_PATTERN	*next;
82 	regex_t	re;			/* compiled pattern */
83 } PATTERN;
84 
85 static PATTERN	*patterns;
86 static char	errstr[128];		/* regerror string buffer */
87 static int	regflags = 0;		/* regcomp options */
88 static int	matched = 0;		/* return of the grep() */
89 static int	errors = 0;		/* count of errors */
90 static uchar_t	fgrep = 0;		/* Invoked as fgrep */
91 static uchar_t	egrep = 0;		/* Invoked as egrep */
92 static boolean_t	nvflag = B_TRUE;	/* Print matching lines */
93 static uchar_t	cflag;			/* Count of matches */
94 static uchar_t	iflag;			/* Case insensitve matching */
95 static uchar_t	Hflag;			/* Precede lines by file name */
96 static uchar_t	hflag;			/* Suppress printing of filename */
97 static uchar_t	lflag;			/* Print file names of matches */
98 static uchar_t	Lflag;			/* Print file names of non-matches */
99 static uchar_t	nflag;			/* Precede lines by line number */
100 static uchar_t	rflag;			/* Search directories recursively */
101 static uchar_t	bflag;			/* Precede matches by block number */
102 static uchar_t	sflag;			/* Suppress file error messages */
103 static uchar_t	qflag;			/* Suppress standard output */
104 static uchar_t	wflag;			/* Search for expression as a word */
105 static uchar_t	xflag;			/* Anchoring */
106 static uchar_t	Eflag;			/* Egrep or -E flag */
107 static uchar_t	Fflag;			/* Fgrep or -F flag */
108 static uchar_t	Rflag;			/* Like rflag, but follow symlinks */
109 static uchar_t	outfn;			/* Put out file name */
110 static uchar_t	conflag;		/* show context of matches */
111 static char	*cmdname;
112 
113 static int	use_wchar, use_bmg, mblocale;
114 
115 static size_t	outbuflen, prntbuflen, conbuflen;
116 static unsigned long	conalen, conblen, conmatches;
117 static char	*prntbuf, *conbuf;
118 static wchar_t	*outline;
119 
120 static void	addfile(const char *fn);
121 static void	addpattern(char *s);
122 static void	fixpatterns(void);
123 static void	usage(void);
124 static int	grep(int, const char *);
125 static void	bmgcomp(char *, int);
126 static char	*bmgexec(char *, char *);
127 static int	recursive(const char *, const struct stat *, int, struct FTW *);
128 static void	process_path(const char *);
129 static void	process_file(const char *, int);
130 
131 /*
132  * mainline for grep
133  */
134 int
135 main(int argc, char **argv)
136 {
137 	char	*ap, *test;
138 	int	c;
139 	int	fflag = 0;
140 	int	i, n_pattern = 0, n_file = 0;
141 	char	**pattern_list = NULL;
142 	char	**file_list = NULL;
143 
144 	(void) setlocale(LC_ALL, "");
145 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
146 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it weren't */
147 #endif
148 	(void) textdomain(TEXT_DOMAIN);
149 
150 	/*
151 	 * true if this is running on the multibyte locale
152 	 */
153 	mblocale = (MB_CUR_MAX > 1);
154 	/*
155 	 * Skip leading slashes
156 	 */
157 	cmdname = argv[0];
158 	if (ap = strrchr(cmdname, '/'))
159 		cmdname = ap + 1;
160 
161 	ap = cmdname;
162 	/*
163 	 * Detect egrep/fgrep via command name, map to -E and -F options.
164 	 */
165 	if (*ap == 'e' || *ap == 'E') {
166 		regflags |= REG_EXTENDED;
167 		egrep++;
168 	} else {
169 		if (*ap == 'f' || *ap == 'F') {
170 			fgrep++;
171 		}
172 	}
173 
174 	/* check for non-standard "-line-count" option */
175 	for (i = 1; i < argc; i++) {
176 		if (strcmp(argv[i], "--") == 0)
177 			break;
178 
179 		/* isdigit() check prevents negative arguments */
180 		if ((argv[i][0] == '-') && isdigit(argv[i][1])) {
181 			if (strlen(&argv[i][1]) !=
182 			    strspn(&argv[i][1], "0123456789")) {
183 				(void) fprintf(stderr, gettext(
184 				    "%s: Bad number flag\n"), argv[0]);
185 				usage();
186 			}
187 
188 			errno = 0;
189 			conalen = conblen = strtoul(&argv[i][1], (char **)NULL,
190 			    10);
191 
192 			if (errno != 0 || conalen >= ULONG_MAX) {
193 				(void) fprintf(stderr, gettext(
194 				    "%s: Bad context argument\n"), argv[0]);
195 			} else if (conalen)
196 				conflag = CONTEXT;
197 
198 			while (i < argc) {
199 				argv[i] = argv[i + 1];
200 				i++;
201 			}
202 			argc--;
203 		}
204 	}
205 
206 	while ((c = getopt(argc, argv, "vwchHilLnrbse:f:qxEFIRA:B:C:")) !=
207 	    EOF) {
208 		unsigned long tval;
209 		switch (c) {
210 		case 'v':	/* POSIX: negate matches */
211 			nvflag = B_FALSE;
212 			break;
213 
214 		case 'c':	/* POSIX: write count */
215 			cflag++;
216 			break;
217 
218 		case 'i':	/* POSIX: ignore case */
219 			iflag++;
220 			regflags |= REG_ICASE;
221 			break;
222 
223 		/*
224 		 * The last of -l and -L are honored.
225 		 */
226 		case 'l':	/* POSIX: Write filenames only */
227 			lflag++;
228 			Lflag = 0;
229 			break;
230 
231 		case 'L':	/* Write non-matching filenames */
232 			Lflag++;
233 			lflag = 0;
234 			break;
235 
236 		case 'n':	/* POSIX: Write line numbers */
237 			nflag++;
238 			break;
239 
240 		case 'r':	/* Solaris: search recursively */
241 			rflag++;
242 			break;
243 
244 		case 'b':	/* Solaris: Write file block numbers */
245 			bflag++;
246 			break;
247 
248 		case 's':	/* POSIX: No error msgs for files */
249 			sflag++;
250 			break;
251 
252 		case 'e':	/* POSIX: pattern list */
253 			n_pattern++;
254 			pattern_list = realloc(pattern_list,
255 			    sizeof (char *) * n_pattern);
256 			if (pattern_list == NULL) {
257 				(void) fprintf(stderr,
258 				    gettext("%s: out of memory\n"),
259 				    cmdname);
260 				exit(2);
261 			}
262 			*(pattern_list + n_pattern - 1) = optarg;
263 			break;
264 
265 		case 'f':	/* POSIX: pattern file */
266 			fflag = 1;
267 			n_file++;
268 			file_list = realloc(file_list,
269 			    sizeof (char *) * n_file);
270 			if (file_list == NULL) {
271 				(void) fprintf(stderr,
272 				    gettext("%s: out of memory\n"),
273 				    cmdname);
274 				exit(2);
275 			}
276 			*(file_list + n_file - 1) = optarg;
277 			break;
278 
279 		/* based on options order h or H is set as in GNU grep */
280 		case 'h':	/* Solaris: suppress printing of file name */
281 			hflag = 1;
282 			Hflag = 0;
283 			break;
284 		/* Solaris: precede every match with file name */
285 		case 'H':
286 			Hflag = 1;
287 			hflag = 0;
288 			break;
289 
290 		case 'q':	/* POSIX: quiet: status only */
291 			qflag++;
292 			break;
293 
294 		case 'w':	/* Solaris: treat pattern as word */
295 			wflag++;
296 			break;
297 
298 		case 'x':	/* POSIX: full line matches */
299 			xflag++;
300 			break;
301 
302 		case 'E':	/* POSIX: Extended RE's */
303 			regflags |= REG_EXTENDED;
304 			Eflag++;
305 			break;
306 
307 		case 'F':	/* POSIX: strings, not RE's */
308 			Fflag++;
309 			break;
310 
311 		case 'R':	/* Solaris: like rflag, but follow symlinks */
312 			Rflag++;
313 			rflag++;
314 			break;
315 
316 		case 'A':	/* print N lines after each match */
317 			errno = 0;
318 			conalen = strtoul(optarg, &test, 10);
319 			/* *test will be non-null if optarg is negative */
320 			if (errno != 0 || *test != '\0' ||
321 			    conalen >= ULONG_MAX) {
322 				(void) fprintf(stderr, gettext(
323 				    "%s: Bad context argument: %s\n"),
324 				    argv[0], optarg);
325 				exit(2);
326 			}
327 			if (conalen)
328 				conflag |= AFTER;
329 			else
330 				conflag &= ~AFTER;
331 			break;
332 		case 'B':	/* print N lines before each match */
333 			errno = 0;
334 			conblen = strtoul(optarg, &test, 10);
335 			/* *test will be non-null if optarg is negative */
336 			if (errno != 0 || *test != '\0' ||
337 			    conblen >= ULONG_MAX) {
338 				(void) fprintf(stderr, gettext(
339 				    "%s: Bad context argument: %s\n"),
340 				    argv[0], optarg);
341 				exit(2);
342 			}
343 			if (conblen)
344 				conflag |= BEFORE;
345 			else
346 				conflag &= ~BEFORE;
347 			break;
348 		case 'C':	/* print N lines around each match */
349 			errno = 0;
350 			tval = strtoul(optarg, &test, 10);
351 			/* *test will be non-null if optarg is negative */
352 			if (errno != 0 || *test != '\0' || tval >= ULONG_MAX) {
353 				(void) fprintf(stderr, gettext(
354 				    "%s: Bad context argument: %s\n"),
355 				    argv[0], optarg);
356 				exit(2);
357 			}
358 			if (tval) {
359 				if ((conflag & BEFORE) == 0)
360 					conblen = tval;
361 				if ((conflag & AFTER) == 0)
362 					conalen = tval;
363 				conflag = CONTEXT;
364 			}
365 			break;
366 
367 		default:
368 			usage();
369 		}
370 	}
371 	/*
372 	 * If we're invoked as egrep or fgrep we need to do some checks
373 	 */
374 
375 	if (egrep || fgrep) {
376 		/*
377 		 * Use of -E or -F with egrep or fgrep is illegal
378 		 */
379 		if (Eflag || Fflag)
380 			usage();
381 		/*
382 		 * Don't allow use of wflag with egrep / fgrep
383 		 */
384 		if (wflag)
385 			usage();
386 		/*
387 		 * For Solaris the -s flag is equivalent to XCU -q
388 		 */
389 		if (sflag)
390 			qflag++;
391 		/*
392 		 * done with above checks - set the appropriate flags
393 		 */
394 		if (egrep)
395 			Eflag++;
396 		else			/* Else fgrep */
397 			Fflag++;
398 	}
399 
400 	if (wflag && (Eflag || Fflag)) {
401 		/*
402 		 * -w cannot be specified with grep -F
403 		 */
404 		usage();
405 	}
406 
407 	/*
408 	 * -E and -F flags are mutually exclusive - check for this
409 	 */
410 	if (Eflag && Fflag)
411 		usage();
412 
413 	/*
414 	 * -l or -L overrides -H like in GNU grep
415 	 */
416 	if (lflag || Lflag)
417 		Hflag = 0;
418 
419 	/*
420 	 * -c, -l and -q flags are mutually exclusive
421 	 * We have -c override -l like in Solaris.
422 	 * -q overrides -l & -c programmatically in grep() function.
423 	 */
424 	if (cflag && (lflag || Lflag)) {
425 		lflag = 0;
426 		Lflag = 0;
427 	}
428 
429 	argv += optind - 1;
430 	argc -= optind - 1;
431 
432 	/*
433 	 * Now handling -e and -f option
434 	 */
435 	if (pattern_list) {
436 		for (i = 0; i < n_pattern; i++) {
437 			addpattern(pattern_list[i]);
438 		}
439 		free(pattern_list);
440 	}
441 	if (file_list) {
442 		for (i = 0; i < n_file; i++) {
443 			addfile(file_list[i]);
444 		}
445 		free(file_list);
446 	}
447 
448 	/*
449 	 * No -e or -f?  Make sure there is one more arg, use it as the pattern.
450 	 */
451 	if (patterns == NULL && !fflag) {
452 		if (argc < 2)
453 			usage();
454 		addpattern(argv[1]);
455 		argc--;
456 		argv++;
457 	}
458 
459 	/*
460 	 * If -x flag is not specified or -i flag is specified
461 	 * with fgrep in a multibyte locale, need to use
462 	 * the wide character APIs.  Otherwise, byte-oriented
463 	 * process will be done.
464 	 */
465 	use_wchar = Fflag && mblocale && (!xflag || iflag);
466 
467 	/*
468 	 * Compile Patterns and also decide if BMG can be used
469 	 */
470 	fixpatterns();
471 
472 	/* Process all files: stdin, or rest of arg list */
473 	if (argc < 2) {
474 		matched = grep(0, STDIN_FILENAME);
475 	} else {
476 		if (Hflag || (argc > 2 && hflag == 0))
477 			outfn = 1;	/* Print filename on match line */
478 		for (argv++; *argv != NULL; argv++) {
479 			process_path(*argv);
480 		}
481 	}
482 	/*
483 	 * Return() here is used instead of exit
484 	 */
485 
486 	(void) fflush(stdout);
487 
488 	if (errors)
489 		return (2);
490 	return (matched ? 0 : 1);
491 }
492 
493 static void
494 process_path(const char *path)
495 {
496 	struct	stat st;
497 	int	walkflags = FTW_CHDIR;
498 	char	*buf = NULL;
499 
500 	if (rflag) {
501 		if (stat(path, &st) != -1 &&
502 		    (st.st_mode & S_IFMT) == S_IFDIR) {
503 			if (!hflag)
504 				outfn = 1; /* Print filename unless -h */
505 
506 			/*
507 			 * Add trailing slash if arg
508 			 * is directory, to resolve symlinks.
509 			 */
510 			if (path[strlen(path) - 1] != '/') {
511 				(void) asprintf(&buf, "%s/", path);
512 				if (buf != NULL)
513 					path = buf;
514 			}
515 
516 			/*
517 			 * Search through subdirs if path is directory.
518 			 * Don't follow symlinks if Rflag is not set.
519 			 */
520 			if (!Rflag)
521 				walkflags |= FTW_PHYS;
522 
523 			if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) {
524 				if (!sflag)
525 					(void) fprintf(stderr,
526 					    gettext("%s: can't open \"%s\"\n"),
527 					    cmdname, path);
528 				errors = 1;
529 			}
530 			return;
531 		}
532 	}
533 	process_file(path, 0);
534 }
535 
536 /*
537  * Read and process all files in directory recursively.
538  */
539 static int
540 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw)
541 {
542 	/*
543 	 * Process files and follow symlinks if Rflag set.
544 	 */
545 	if (info != FTW_F) {
546 		/* Report broken symlinks and unreadable files */
547 		if (!sflag &&
548 		    (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) {
549 			(void) fprintf(stderr,
550 			    gettext("%s: can't open \"%s\"\n"), cmdname, name);
551 		}
552 		return (0);
553 	}
554 
555 
556 	/* Skip devices and pipes if Rflag is not set */
557 	if (!Rflag && !S_ISREG(statp->st_mode))
558 		return (0);
559 	/* Pass offset to relative name from FTW_CHDIR */
560 	process_file(name, ftw->base);
561 	return (0);
562 }
563 
564 /*
565  * Opens file and call grep function.
566  */
567 static void
568 process_file(const char *name, int base)
569 {
570 	int fd;
571 
572 	if ((fd = open(name + base, O_RDONLY)) == -1) {
573 		errors = 1;
574 		if (!sflag) /* Silent mode */
575 			(void) fprintf(stderr, gettext(
576 			    "%s: can't open \"%s\"\n"),
577 			    cmdname, name);
578 		return;
579 	}
580 	matched |= grep(fd, name);
581 	(void) close(fd);
582 
583 	if (ferror(stdout)) {
584 		(void) fprintf(stderr, gettext(
585 		    "%s: error writing to stdout\n"),
586 		    cmdname);
587 		(void) fflush(stdout);
588 		exit(2);
589 	}
590 
591 }
592 
593 /*
594  * Add a file of strings to the pattern list.
595  */
596 static void
597 addfile(const char *fn)
598 {
599 	FILE	*fp;
600 	char	*inbuf;
601 	char	*bufp;
602 	size_t	bufsiz, buflen, bufused;
603 
604 	/*
605 	 * Open the pattern file
606 	 */
607 	if ((fp = fopen(fn, "r")) == NULL) {
608 		(void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
609 		    cmdname, fn);
610 		exit(2);
611 	}
612 	bufsiz = BUFSIZE;
613 	if ((inbuf = malloc(bufsiz)) == NULL) {
614 		(void) fprintf(stderr,
615 		    gettext("%s: out of memory\n"), cmdname);
616 		exit(2);
617 	}
618 	bufp = inbuf;
619 	bufused = 0;
620 	/*
621 	 * Read in the file, reallocing as we need more memory
622 	 */
623 	while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
624 		buflen = strlen(bufp);
625 		bufused += buflen;
626 		if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
627 			/*
628 			 * if this line does not fit to the buffer,
629 			 * realloc larger buffer
630 			 */
631 			bufsiz += BUFSIZE;
632 			if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
633 				(void) fprintf(stderr,
634 				    gettext("%s: out of memory\n"),
635 				    cmdname);
636 				exit(2);
637 			}
638 			bufp = inbuf + bufused;
639 			continue;
640 		}
641 		if (bufp[buflen - 1] == '\n') {
642 			bufp[--buflen] = '\0';
643 		}
644 		addpattern(inbuf);
645 
646 		bufp = inbuf;
647 		bufused = 0;
648 	}
649 	free(inbuf);
650 	free(prntbuf);
651 	free(conbuf);
652 	(void) fclose(fp);
653 }
654 
655 /*
656  * Add a string to the pattern list.
657  */
658 static void
659 addpattern(char *s)
660 {
661 	PATTERN	*pp;
662 	char	*wordbuf;
663 	char	*np;
664 
665 	for (; ; ) {
666 		np = strchr(s, '\n');
667 		if (np != NULL)
668 			*np = '\0';
669 		if ((pp = malloc(sizeof (PATTERN))) == NULL) {
670 			(void) fprintf(stderr, gettext(
671 			    "%s: out of memory\n"),
672 			    cmdname);
673 			exit(2);
674 		}
675 		if (wflag) {
676 			/*
677 			 * Solaris wflag support: Add '<' '>' to pattern to
678 			 * select it as a word. Doesn't make sense with -F
679 			 * but we're Libertarian.
680 			 */
681 			size_t	slen, wordlen;
682 
683 			slen = strlen(s);
684 			wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
685 			if ((wordbuf = malloc(wordlen)) == NULL) {
686 				(void) fprintf(stderr,
687 				    gettext("%s: out of memory\n"),
688 				    cmdname);
689 				exit(2);
690 			}
691 			(void) strcpy(wordbuf, "\\<");
692 			(void) strcpy(wordbuf + 2, s);
693 			(void) strcpy(wordbuf + 2 + slen, "\\>");
694 		} else {
695 			if ((wordbuf = strdup(s)) == NULL) {
696 				(void) fprintf(stderr,
697 				    gettext("%s: out of memory\n"),
698 				    cmdname);
699 				exit(2);
700 			}
701 		}
702 		pp->pattern = wordbuf;
703 		pp->next = patterns;
704 		patterns = pp;
705 		if (np == NULL)
706 			break;
707 		s = np + 1;
708 	}
709 }
710 
711 /*
712  * Fix patterns.
713  * Must do after all arguments read, in case later -i option.
714  */
715 static void
716 fixpatterns(void)
717 {
718 	PATTERN	*pp;
719 	int	rv, fix_pattern, npatterns;
720 
721 	/*
722 	 * Fix the specified pattern if -x is specified.
723 	 */
724 	fix_pattern = !Fflag && xflag;
725 
726 	for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
727 		npatterns++;
728 		if (fix_pattern) {
729 			char	*cp, *cq;
730 			size_t	plen, nplen;
731 
732 			plen = strlen(pp->pattern);
733 			/* '^' pattern '$' */
734 			nplen = 1 + plen + 1 + 1;
735 			if ((cp = malloc(nplen)) == NULL) {
736 				(void) fprintf(stderr,
737 				    gettext("%s: out of memory\n"),
738 				    cmdname);
739 				exit(2);
740 			}
741 			cq = cp;
742 			*cq++ = '^';
743 			cq = strcpy(cq, pp->pattern) + plen;
744 			*cq++ = '$';
745 			*cq = '\0';
746 			free(pp->pattern);
747 			pp->pattern = cp;
748 		}
749 
750 		if (Fflag) {
751 			if (use_wchar) {
752 				/*
753 				 * Fflag && mblocale && iflag
754 				 * Fflag && mblocale && !xflag
755 				 */
756 				size_t	n;
757 				n = strlen(pp->pattern) + 1;
758 				if ((pp->wpattern =
759 				    malloc(sizeof (wchar_t) * n)) == NULL) {
760 					(void) fprintf(stderr,
761 					    gettext("%s: out of memory\n"),
762 					    cmdname);
763 					exit(2);
764 				}
765 				if (mbstowcs(pp->wpattern, pp->pattern, n) ==
766 				    (size_t)-1) {
767 					(void) fprintf(stderr,
768 					    gettext("%s: failed to convert "
769 					    "\"%s\" to wide-characters\n"),
770 					    cmdname, pp->pattern);
771 					exit(2);
772 				}
773 				if (iflag) {
774 					wchar_t	*wp;
775 					for (wp = pp->wpattern; *wp != L'\0';
776 					    wp++) {
777 						*wp = towlower((wint_t)*wp);
778 					}
779 				}
780 				free(pp->pattern);
781 			} else {
782 				/*
783 				 * Fflag && mblocale && !iflag
784 				 * Fflag && !mblocale && iflag
785 				 * Fflag && !mblocale && !iflag
786 				 */
787 				if (iflag) {
788 					unsigned char	*cp;
789 					for (cp = (unsigned char *)pp->pattern;
790 					    *cp != '\0'; cp++) {
791 						*cp = tolower(*cp);
792 					}
793 				}
794 			}
795 			/*
796 			 * fgrep: No regular expressions.
797 			 */
798 			continue;
799 		}
800 
801 		/*
802 		 * For non-fgrep, compile the regular expression,
803 		 * give an informative error message, and exit if
804 		 * it didn't compile.
805 		 */
806 		if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
807 			(void) regerror(rv, &pp->re, errstr, sizeof (errstr));
808 			(void) fprintf(stderr,
809 			    gettext("%s: RE error in %s: %s\n"),
810 			    cmdname, pp->pattern, errstr);
811 			exit(2);
812 		}
813 		free(pp->pattern);
814 	}
815 
816 	/*
817 	 * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
818 	 * Use the Boyer-Moore-Gosper algorithm if:
819 	 * - fgrep			(Fflag)
820 	 * - singlebyte locale		(!mblocale)
821 	 * - no ignoring case		(!iflag)
822 	 * - no printing line numbers	(!nflag)
823 	 * - no negating the output	(nvflag)
824 	 * - only one pattern		(npatterns == 1)
825 	 * - non zero length pattern	(strlen(patterns->pattern) != 0)
826 	 * - no context required	(conflag == 0)
827 	 *
828 	 * It's guaranteed patterns->pattern is still alive
829 	 * when Fflag && !mblocale.
830 	 */
831 	use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
832 	    (npatterns == 1) && (strlen(patterns->pattern) != 0) &&
833 	    conflag == 0;
834 }
835 
836 /*
837  * Search a newline from the beginning of the string
838  */
839 static char *
840 find_nl(const char *ptr, size_t len)
841 {
842 	while (len-- != 0) {
843 		if (*ptr++ == '\n') {
844 			return ((char *)--ptr);
845 		}
846 	}
847 	return (NULL);
848 }
849 
850 /*
851  * Search a newline from the end of the string
852  */
853 static char *
854 rfind_nl(const char *ptr, size_t len)
855 {
856 	const char	*uptr = ptr + len;
857 	while (len--) {
858 		if (*--uptr == '\n') {
859 			return ((char *)uptr);
860 		}
861 	}
862 	return (NULL);
863 }
864 
865 /*
866  * Duplicate the specified string converting each character
867  * into a lower case.
868  */
869 static char *
870 istrdup(const char *s1)
871 {
872 	static size_t	ibuflen = 0;
873 	static char	*ibuf = NULL;
874 	size_t	slen;
875 	char	*p;
876 
877 	slen = strlen(s1);
878 	if (slen >= ibuflen) {
879 		/* ibuf does not fit to s1 */
880 		ibuflen = slen + 1;
881 		ibuf = realloc(ibuf, ibuflen);
882 		if (ibuf == NULL) {
883 			(void) fprintf(stderr,
884 			    gettext("%s: out of memory\n"), cmdname);
885 			exit(2);
886 		}
887 	}
888 	p = ibuf;
889 	do {
890 		*p++ = tolower(*s1);
891 	} while (*s1++ != '\0');
892 	return (ibuf);
893 }
894 
895 /*
896  * Do grep on a single file.
897  * Return true in any lines matched.
898  *
899  * We have two strategies:
900  * The fast one is used when we have a single pattern with
901  * a string known to occur in the pattern. We can then
902  * do a BMG match on the whole buffer.
903  * This is an order of magnitude faster.
904  * Otherwise we split the buffer into lines,
905  * and check for a match on each line.
906  */
907 static int
908 grep(int fd, const char *fn)
909 {
910 	PATTERN *pp;
911 	off_t	data_len;	/* length of the data chunk */
912 	off_t	line_len;	/* length of the current line */
913 	off_t	line_offset;	/* current line's offset from the beginning */
914 	off_t	blkoffset;	/* line_offset but context-compatible */
915 	long long	lineno, linenum;
916 	long long	matches = 0;	/* Number of matching lines */
917 	long long	conacnt = 0, conbcnt = 0;	/* context line count */
918 	int	newlinep;	/* 0 if the last line of file has no newline */
919 	char	*ptr, *ptrend, *prntptr, *prntptrend;
920 	char	*nextptr = NULL, *nextend = NULL;
921 	char	*conptr = NULL, *conptrend = NULL;
922 	char	*matchptr = NULL;
923 	int	conaprnt = 0, conbprnt = 0, lastmatch = 0;
924 	boolean_t	nearmatch; /* w/in N+1 of last match */
925 	boolean_t	havematch = B_FALSE; /* have a match in context */
926 	size_t	prntlen;
927 
928 	if (patterns == NULL)
929 		return (0);	/* no patterns to match -- just return */
930 
931 	pp = patterns;
932 
933 	if (use_bmg) {
934 		bmgcomp(pp->pattern, strlen(pp->pattern));
935 	}
936 
937 	if (use_wchar && outline == NULL) {
938 		outbuflen = BUFSIZE + 1;
939 		outline = malloc(sizeof (wchar_t) * outbuflen);
940 		if (outline == NULL) {
941 			(void) fprintf(stderr, gettext("%s: out of memory\n"),
942 			    cmdname);
943 			exit(2);
944 		}
945 	}
946 
947 	if (prntbuf == NULL) {
948 		prntbuflen = BUFSIZE;
949 		if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
950 			(void) fprintf(stderr, gettext("%s: out of memory\n"),
951 			    cmdname);
952 			exit(2);
953 		}
954 	}
955 
956 	if (conflag != 0 && (conbuf == NULL)) {
957 		conbuflen = BUFSIZE;
958 		if ((conbuf = malloc(BUFSIZE+1)) == NULL) {
959 			(void) fprintf(stderr, gettext("%s: out of memory\n"),
960 			    cmdname);
961 			exit(2);
962 		}
963 	}
964 
965 	nearmatch = (conmatches != 0);
966 	blkoffset = line_offset = 0;
967 	lineno = 0;
968 	linenum = 1;
969 	newlinep = 1;
970 	data_len = 0;
971 	for (; ; ) {
972 		long	count;
973 		off_t	offset = 0;
974 		char	separate;
975 		boolean_t	last_ctx = B_FALSE, eof = B_FALSE;
976 
977 		if (data_len == 0) {
978 			/*
979 			 * If no data in the buffer, reset ptr
980 			 */
981 			ptr = prntbuf;
982 			if (conflag != 0 && conptr == NULL) {
983 				conptr = conbuf;
984 				conptrend = conptr - 1;
985 			}
986 		}
987 		if (ptr == prntbuf) {
988 			/*
989 			 * The current data chunk starts from prntbuf.
990 			 * This means either the buffer has no data
991 			 * or the buffer has no newline.
992 			 * So, read more data from input.
993 			 */
994 			count = read(fd, ptr + data_len, prntbuflen - data_len);
995 			if (count < 0) {
996 				/* read error */
997 				if (cflag) {
998 					if (outfn && !rflag) {
999 						(void) fprintf(stdout,
1000 						    "%s:", fn);
1001 					}
1002 					if (!qflag && !rflag) {
1003 						(void) fprintf(stdout, "%lld\n",
1004 						    matches);
1005 					}
1006 				}
1007 				return (0);
1008 			} else if (count == 0) {
1009 				/* no new data */
1010 				eof = B_TRUE;
1011 
1012 				if (data_len == 0) {
1013 					/* end of file already reached */
1014 					if (conflag != 0) {
1015 						if (conptrend >= conptr)
1016 							*conptrend = '\n';
1017 						last_ctx = B_TRUE;
1018 						goto L_next_line;
1019 					} else {
1020 						goto out;
1021 					}
1022 				}
1023 				/* last line of file has no newline */
1024 				ptrend = ptr + data_len;
1025 				newlinep = 0;
1026 				goto L_start_process;
1027 			}
1028 			offset = data_len;
1029 			data_len += count;
1030 		}
1031 
1032 		/*
1033 		 * Look for newline in the chunk
1034 		 * between ptr + offset and ptr + data_len - offset.
1035 		 */
1036 		ptrend = find_nl(ptr + offset, data_len - offset);
1037 		if (ptrend == NULL) {
1038 			/* no newline found in this chunk */
1039 			if (ptr > prntbuf) {
1040 				/*
1041 				 * Move remaining data to the beginning
1042 				 * of the buffer.
1043 				 * Remaining data lie from ptr for
1044 				 * data_len bytes.
1045 				 */
1046 				(void) memmove(prntbuf, ptr, data_len);
1047 			}
1048 			if (data_len == prntbuflen) {
1049 				/*
1050 				 * Not enough room in the buffer
1051 				 */
1052 				if (prntbuflen > SIZE_MAX - BUFSIZE) {
1053 					(void) fprintf(stderr,
1054 					    gettext("%s: buflen would"
1055 					    " overflow\n"),
1056 					    cmdname);
1057 					exit(2);
1058 				}
1059 
1060 				prntbuflen += BUFSIZE;
1061 				prntbuf = realloc(prntbuf, prntbuflen + 1);
1062 				if (prntbuf == NULL) {
1063 					(void) fprintf(stderr,
1064 					    gettext("%s: out of memory\n"),
1065 					    cmdname);
1066 					exit(2);
1067 				}
1068 			}
1069 			ptr = prntbuf;
1070 			/* read the next input */
1071 			continue;
1072 		}
1073 L_start_process:
1074 
1075 		/*
1076 		 * Beginning of the chunk:	ptr
1077 		 * End of the chunk:		ptr + data_len
1078 		 * Beginning of the line:	ptr
1079 		 * End of the line:		ptrend
1080 		 *
1081 		 * conptr:	Beginning of the context.
1082 		 * conptrend: If context is empty, conptr - 1 (invalid memory).
1083 		 *	Otherwise, Last newline in the context.
1084 		 */
1085 
1086 		if (use_bmg) {
1087 			/*
1088 			 * Use Boyer-Moore-Gosper algorithm to find out if
1089 			 * this chunk (not this line) contains the specified
1090 			 * pattern.  If not, restart from the last line
1091 			 * of this chunk.
1092 			 */
1093 			char	*bline;
1094 			bline = bmgexec(ptr, ptr + data_len);
1095 			if (bline == NULL) {
1096 				/*
1097 				 * No pattern found in this chunk.
1098 				 * Need to find the last line
1099 				 * in this chunk.
1100 				 */
1101 				ptrend = rfind_nl(ptr, data_len);
1102 
1103 				/*
1104 				 * When this chunk does not contain newline,
1105 				 * ptrend becomes NULL, which should happen
1106 				 * when the last line of file does not end
1107 				 * with a newline.  At such a point,
1108 				 * newlinep should have been set to 0.
1109 				 * Therefore, just after jumping to
1110 				 * L_skip_line, the main for-loop quits,
1111 				 * and the line_len value won't be
1112 				 * used.
1113 				 */
1114 				line_len = ptrend - ptr;
1115 				goto L_skip_line;
1116 			}
1117 			if (bline > ptrend) {
1118 				/*
1119 				 * Pattern found not in the first line
1120 				 * of this chunk.
1121 				 * Discard the first line.
1122 				 */
1123 				line_len = ptrend - ptr;
1124 				goto L_skip_line;
1125 			}
1126 			/*
1127 			 * Pattern found in the first line of this chunk.
1128 			 * Using this result.
1129 			 */
1130 			*ptrend = '\0';
1131 			line_len = ptrend - ptr;
1132 
1133 			/*
1134 			 * before jumping to L_next_line,
1135 			 * need to handle xflag if specified
1136 			 */
1137 			if (xflag && (line_len != bmglen ||
1138 			    strcmp(bmgpat, ptr) != 0)) {
1139 				/* didn't match */
1140 				pp = NULL;
1141 			} else {
1142 				pp = patterns; /* to make it happen */
1143 			}
1144 			goto L_next_line;
1145 		}
1146 		lineno++;
1147 		/*
1148 		 * Line starts from ptr and ends at ptrend.
1149 		 * line_len will be the length of the line.
1150 		 */
1151 		*ptrend = '\0';
1152 		line_len = ptrend - ptr;
1153 
1154 		/*
1155 		 * From now, the process will be performed based
1156 		 * on the line from ptr to ptrend.
1157 		 */
1158 		if (use_wchar) {
1159 			size_t	len;
1160 
1161 			if (line_len >= outbuflen) {
1162 				outbuflen = line_len + 1;
1163 				outline = realloc(outline,
1164 				    sizeof (wchar_t) * outbuflen);
1165 				if (outline == NULL) {
1166 					(void) fprintf(stderr,
1167 					    gettext("%s: out of memory\n"),
1168 					    cmdname);
1169 					exit(2);
1170 				}
1171 			}
1172 
1173 			len = mbstowcs(outline, ptr, line_len);
1174 			if (len == (size_t)-1) {
1175 				(void) fprintf(stderr, gettext(
1176 	"%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1177 				    cmdname, fn, lineno);
1178 				/* never match a line with invalid sequence */
1179 				goto L_skip_line;
1180 			}
1181 			outline[len] = L'\0';
1182 
1183 			if (iflag) {
1184 				wchar_t	*cp;
1185 				for (cp = outline; *cp != '\0'; cp++) {
1186 					*cp = towlower((wint_t)*cp);
1187 				}
1188 			}
1189 
1190 			if (xflag) {
1191 				for (pp = patterns; pp; pp = pp->next) {
1192 					if (outline[0] == pp->wpattern[0] &&
1193 					    wcscmp(outline,
1194 					    pp->wpattern) == 0) {
1195 						/* matched */
1196 						break;
1197 					}
1198 				}
1199 			} else {
1200 				for (pp = patterns; pp; pp = pp->next) {
1201 					if (wcswcs(outline, pp->wpattern)
1202 					    != NULL) {
1203 						/* matched */
1204 						break;
1205 					}
1206 				}
1207 			}
1208 		} else if (Fflag) {
1209 			/* fgrep in byte-oriented handling */
1210 			char	*fptr;
1211 			if (iflag) {
1212 				fptr = istrdup(ptr);
1213 			} else {
1214 				fptr = ptr;
1215 			}
1216 			if (xflag) {
1217 				/* fgrep -x */
1218 				for (pp = patterns; pp; pp = pp->next) {
1219 					if (fptr[0] == pp->pattern[0] &&
1220 					    strcmp(fptr, pp->pattern) == 0) {
1221 						/* matched */
1222 						break;
1223 					}
1224 				}
1225 			} else {
1226 				for (pp = patterns; pp; pp = pp->next) {
1227 					if (strstr(fptr, pp->pattern) != NULL) {
1228 						/* matched */
1229 						break;
1230 					}
1231 				}
1232 			}
1233 		} else {
1234 			/* grep or egrep */
1235 			for (pp = patterns; pp; pp = pp->next) {
1236 				int	rv;
1237 
1238 				rv = regexec(&pp->re, ptr, 0, NULL, 0);
1239 				if (rv == REG_OK) {
1240 					/* matched */
1241 					break;
1242 				}
1243 
1244 				switch (rv) {
1245 				case REG_NOMATCH:
1246 					break;
1247 				case REG_ECHAR:
1248 					(void) fprintf(stderr, gettext(
1249 	    "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1250 					    cmdname, fn, lineno);
1251 					break;
1252 				default:
1253 					(void) regerror(rv, &pp->re, errstr,
1254 					    sizeof (errstr));
1255 					(void) fprintf(stderr, gettext(
1256 	    "%s: input file \"%s\": line %lld: %s\n"),
1257 					    cmdname, fn, lineno, errstr);
1258 					exit(2);
1259 				}
1260 			}
1261 		}
1262 
1263 		/*
1264 		 * Context is set up as follows:
1265 		 * For a 'Before' context, we maintain a set of pointers
1266 		 * containing 'N' lines of context. If the current number of
1267 		 * lines contained is greater than N, and N isn't a match, the
1268 		 * start pointer is moved forward to the next newline.
1269 		 *
1270 		 * If we ever find a match, we print out immediately.
1271 		 * 'nearmatch' tells us if we're within N+1 lines of the last
1272 		 * match ; if we are, and we find another match, we don't
1273 		 * separate the matches. 'nearmatch' becomes false when
1274 		 * a line gets rotated out of the context.
1275 		 *
1276 		 * For an 'After' context, we simply wait until we've found a
1277 		 * match, then create a context N+1 lines big. If we don't find
1278 		 * a match within the context, we print out the current context.
1279 		 * Otherwise, we save a reference to the new matching line,
1280 		 * print out the other context, and reset our context pointers
1281 		 * to the new matching line.
1282 		 *
1283 		 * 'nearmatch' becomes false when we find a non-matching line
1284 		 * that isn't a part of any context.
1285 		 *
1286 		 * A full-context is implemented as a combination of the
1287 		 * 'Before' and 'After' context logic. Before we find a match,
1288 		 * we follow the Before logic. When we find a match, we
1289 		 * follow the After logic. 'nearmatch' is handled by the Before
1290 		 * logic.
1291 		 */
1292 
1293 		if (conflag == 0)
1294 			goto L_next_line;
1295 
1296 		/* Do we have room to add this line to the context buffer? */
1297 		while ((line_len + 1) > (conbuflen -
1298 		    ((conptrend >= conptr) ? conptrend - conbuf : 0))) {
1299 			char *oldconbuf = conbuf;
1300 			char *oldconptr = conptr;
1301 			long tmp = matchptr - conptr;
1302 
1303 			if (conbuflen > SIZE_MAX - BUFSIZE) {
1304 				(void) fprintf(stderr,
1305 				    gettext("%s: buflen would overflow\n"),
1306 				    cmdname);
1307 				exit(2);
1308 			}
1309 
1310 			conbuflen += BUFSIZE;
1311 			conbuf = realloc(conbuf, conbuflen + 1);
1312 			if (conbuf == NULL) {
1313 				(void) fprintf(stderr,
1314 				    gettext("%s: out of memory\n"),
1315 				    cmdname);
1316 				exit(2);
1317 			}
1318 
1319 			conptr = conbuf + (conptr - oldconbuf);
1320 			conptrend = conptr + (conptrend - oldconptr);
1321 			if (matchptr)
1322 				matchptr = conptr + tmp;
1323 		}
1324 		(void) memcpy(conptrend + 1, ptr, line_len);
1325 		conptrend += line_len + 1;
1326 		*conptrend = '\n';
1327 
1328 		if (nvflag == (pp != NULL)) {
1329 			/* matched */
1330 			if (havematch) {
1331 				if ((conflag & AFTER) != 0) {
1332 					conaprnt = 1;
1333 					nextend = conptrend;
1334 					conptrend = conptr + lastmatch;
1335 					nextptr = conptrend + 1;
1336 					*nextend = '\n';
1337 				}
1338 			} else {
1339 				if (conflag == AFTER) {
1340 					conptr = conptrend - (line_len);
1341 					linenum = lineno;
1342 				}
1343 				blkoffset = line_offset -
1344 				    (conptrend - conptr - line_len);
1345 			}
1346 
1347 			if (conflag == BEFORE)
1348 				conbprnt = 1;
1349 
1350 			lastmatch = conptrend - conptr;
1351 			havematch = B_TRUE;
1352 			goto L_next_line;
1353 		}
1354 
1355 		if (!havematch) {
1356 			if ((conflag & BEFORE) != 0) {
1357 				if (conbcnt >= conblen) {
1358 					char *tmp = conptr;
1359 					conptr = find_nl(conptr,
1360 					    conptrend - conptr) + 1;
1361 					if (bflag)
1362 						blkoffset += conptr - tmp;
1363 					linenum++;
1364 					nearmatch = B_TRUE;
1365 				} else {
1366 					conbcnt++;
1367 				}
1368 			}
1369 			if (conflag == AFTER)
1370 				nearmatch = B_TRUE;
1371 		} else  {
1372 			if (++conacnt >= conalen && !conaprnt && conalen)
1373 				conaprnt = 1;
1374 			else
1375 				lastmatch = conptrend - conptr;
1376 		}
1377 
1378 L_next_line:
1379 		/*
1380 		 * Here, if pp points to non-NULL, something has been matched
1381 		 * to the pattern.
1382 		 */
1383 		if (!last_ctx && nvflag == (pp != NULL)) {
1384 			matches++;
1385 			if (!nextend)
1386 				matchptr = (conflag != 0) ? conptrend : ptrend;
1387 		}
1388 
1389 		/*
1390 		 * Set up some print context so that we can treat
1391 		 * single-line matches as a zero-N context.
1392 		 * Apply CLI flags to each line of the context.
1393 		 *
1394 		 * For context, we only print if we both have a match and are
1395 		 * either at the end of the data stream, or we've previously
1396 		 * declared that we want to print for a particular context.
1397 		 */
1398 		if (havematch && (eof || conaprnt || conbprnt)) {
1399 
1400 			/*
1401 			 * We'd normally do this earlier, but we had to
1402 			 * escape early because we reached the end of the data.
1403 			 */
1404 			if (eof && nextptr)
1405 				conptrend = nextend;
1406 
1407 			prntlen = conptrend - conptr + 1;
1408 			prntptr = conptr;
1409 			if (conmatches++ && nearmatch && !cflag)
1410 				(void) fwrite("--\n", 1, 3, stdout);
1411 		} else if (conflag == 0 && nvflag == (pp != NULL)) {
1412 			*ptrend = '\n';
1413 			prntlen = line_len + 1;
1414 			prntptr = ptr;
1415 			linenum = lineno;
1416 			blkoffset = line_offset;
1417 		} else if (eof) {
1418 			/* No match and no more data */
1419 			goto out;
1420 		} else {
1421 			/* No match, or we're not done building context */
1422 			goto L_skip_line;
1423 		}
1424 
1425 		prntptrend = prntptr - 1;
1426 		while ((prntptrend = find_nl(prntptrend + 1,
1427 		    prntlen)) != NULL) {
1428 
1429 			/*
1430 			 * GNU grep uses '-' for context lines and ':' for
1431 			 * matching lines, so replicate that here.
1432 			 */
1433 			if (prntptrend == matchptr) {
1434 				if (eof && nextptr) {
1435 					matchptr = nextend;
1436 					nextptr = NULL;
1437 				} else {
1438 					matchptr = NULL;
1439 				}
1440 				separate = ':';
1441 			} else {
1442 				separate = '-';
1443 			}
1444 
1445 			/*
1446 			 * Handle q, l, and c flags.
1447 			 */
1448 			if (qflag) {
1449 				/* no need to continue */
1450 				/*
1451 				 * End of this line is ptrend.
1452 				 * We have read up to ptr + data_len.
1453 				 */
1454 				off_t	pos;
1455 				pos = ptr + data_len - (ptrend + 1);
1456 				(void) lseek(fd, -pos, SEEK_CUR);
1457 				exit(0);
1458 			}
1459 			if (lflag) {
1460 				(void) printf("%s\n", fn);
1461 				goto out;
1462 			}
1463 			if (Lflag) {
1464 				goto out;
1465 			}
1466 			if (!cflag) {
1467 				if (Hflag || outfn) {
1468 					(void) printf("%s%c", fn, separate);
1469 				}
1470 				if (bflag) {
1471 					(void) printf("%lld%c", (offset_t)
1472 					    (blkoffset / BSIZE), separate);
1473 				}
1474 				if (nflag) {
1475 					(void) printf("%lld%c", linenum,
1476 					    separate);
1477 				}
1478 				(void) fwrite(prntptr, 1,
1479 				    prntptrend - prntptr + 1, stdout);
1480 			}
1481 			if (ferror(stdout)) {
1482 				return (0);
1483 			}
1484 			linenum++;
1485 			prntlen -= prntptrend - prntptr + 1;
1486 			blkoffset += prntptrend - prntptr + 1;
1487 			prntptr = prntptrend + 1;
1488 		}
1489 
1490 		if (eof)
1491 			goto out;
1492 
1493 		/*
1494 		 * Update context buffer and variables post-print
1495 		 */
1496 		if (conflag != 0) {
1497 			conptr = conbuf;
1498 			conaprnt = conbprnt = 0;
1499 			nearmatch = B_FALSE;
1500 			conacnt = conbcnt = 0;
1501 
1502 			if (nextptr) {
1503 				(void) memmove(conbuf, nextptr,
1504 				    nextend - nextptr + 1);
1505 				blkoffset += nextptr - conptrend - 1;
1506 				conptrend = conptr + (nextend - nextptr);
1507 				matchptr = conptrend;
1508 				linenum = lineno;
1509 				lastmatch = conptrend - conptr;
1510 				havematch = B_TRUE;
1511 			} else {
1512 				conptrend = conptr - 1;
1513 				conacnt = 0;
1514 				lastmatch = 0;
1515 				havematch = B_FALSE;
1516 			}
1517 			nextptr = nextend = NULL;
1518 		}
1519 
1520 L_skip_line:
1521 		if (!newlinep)
1522 			break;
1523 
1524 		data_len -= line_len + 1;
1525 		line_offset += line_len + 1;
1526 		ptr = ptrend + 1;
1527 	}
1528 
1529 out:
1530 	if (cflag) {
1531 		if (Hflag || outfn) {
1532 			(void) printf("%s:", fn);
1533 		}
1534 		if (!qflag) {
1535 			(void) printf("%lld\n", matches);
1536 		}
1537 	}
1538 
1539 	/*
1540 	 * -L tells us to print the filename only when it doesn't match. So we
1541 	 * run through the normal operationa and then invert it.
1542 	 */
1543 	if (Lflag) {
1544 		if (matches == 0) {
1545 			(void) printf("%s\n", fn);
1546 			matches = 1;
1547 		} else {
1548 			matches = 0;
1549 		}
1550 	}
1551 
1552 	return (matches != 0);
1553 }
1554 
1555 /*
1556  * usage message for grep
1557  */
1558 static void
1559 usage(void)
1560 {
1561 	(void) fprintf(stderr, gettext("usage: %5s"), cmdname);
1562 	if (!egrep && !fgrep)
1563 		(void) fprintf(stderr, gettext(" [-E|-F]"));
1564 	(void) fprintf(stderr, gettext(" [-bchHilLnqrRsvx] [-A num] [-B num] "
1565 	    "[-C num|-num]\n             [-e pattern_list]... "
1566 	    "[-f pattern_file]... [pattern_list] [file]...\n"));
1567 	exit(2);
1568 	/* NOTREACHED */
1569 }
1570 
1571 /*
1572  * Compile literal pattern into BMG tables
1573  */
1574 static void
1575 bmgcomp(char *pat, int len)
1576 {
1577 	int	i;
1578 	int	tlen;
1579 	unsigned char	*uc = (unsigned char *)pat;
1580 
1581 	bmglen = len;
1582 	bmgpat = pat;
1583 
1584 	for (i = 0; i < M_CSETSIZE; i++) {
1585 		bmgtab[i] = len;
1586 	}
1587 
1588 	len--;
1589 	for (tlen = len, i = 0; i <= len; i++, tlen--) {
1590 		bmgtab[*uc++] = tlen;
1591 	}
1592 }
1593 
1594 /*
1595  * BMG search.
1596  */
1597 static char *
1598 bmgexec(char *str, char *end)
1599 {
1600 	int	t;
1601 	char	*k, *s, *p;
1602 
1603 	k = str + bmglen - 1;
1604 	if (bmglen == 1) {
1605 		return (memchr(str, bmgpat[0], end - str));
1606 	}
1607 	for (; ; ) {
1608 		/* inner loop, should be most optimized */
1609 		while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1610 			k += t;
1611 		}
1612 		if (k >= end) {
1613 			return (NULL);
1614 		}
1615 		for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1616 			if (p == bmgpat) {
1617 				return (s);
1618 			}
1619 		}
1620 		k++;
1621 	}
1622 	/* NOTREACHED */
1623 }
1624