xref: /illumos-gate/usr/src/cmd/grep/grep.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
31 /*	  All Rights Reserved	*/
32 
33 #pragma ident	"%Z%%M%	%I%	%E% SMI"
34 
35 /*
36  * grep -- print lines matching (or not matching) a pattern
37  *
38  *	status returns:
39  *		0 - ok, and some matches
40  *		1 - ok, but no matches
41  *		2 - some error
42  */
43 
44 #include <sys/types.h>
45 
46 #include <ctype.h>
47 #include <fcntl.h>
48 #include <locale.h>
49 #include <memory.h>
50 #include <regexpr.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include <unistd.h>
55 
56 static const char * const errstr[] = {
57 	"Range endpoint too large.",
58 	"Bad number.",
59 	"``\\digit'' out of range.",
60 	"No remembered search string.",
61 	"\\( \\) imbalance.",
62 	"Too many \\(.",
63 	"More than 2 numbers given in \\{ \\}.",
64 	"} expected after \\.",
65 	"First number exceeds second in \\{ \\}.",
66 	"[ ] imbalance.",
67 	"Regular expression overflow.",
68 	"Illegal byte sequence.",
69 	"Unknown regexp error code!!",
70 	NULL
71 };
72 
73 #define	errmsg(msg, arg)	(void) fprintf(stderr, gettext(msg), arg)
74 #define	BLKSIZE	512
75 #define	GBUFSIZ	8192
76 
77 static int	temp;
78 static long long	lnum;
79 static char	*linebuf;
80 static char	*prntbuf = NULL;
81 static long	fw_lPrntBufLen = 0;
82 static int	nflag;
83 static int	bflag;
84 static int	lflag;
85 static int	cflag;
86 static int	vflag;
87 static int	sflag;
88 static int	iflag;
89 static int	wflag;
90 static int	hflag;
91 static int	errflg;
92 static int	nfile;
93 static long long	tln;
94 static int	nsucc;
95 static int	nlflag;
96 static char	*ptr, *ptrend;
97 static char	*expbuf;
98 
99 static void	execute(char *);
100 static void	regerr(int);
101 static int	succeed(char *);
102 
103 int
104 main(
105     int		argc,
106     char 	**argv)
107 {
108 	int	c;
109 	char	*arg;
110 	extern int	optind;
111 
112 	(void) setlocale(LC_ALL, "");
113 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
114 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
115 #endif
116 	(void) textdomain(TEXT_DOMAIN);
117 
118 	while ((c = getopt(argc, argv, "hblcnsviyw")) != -1)
119 		switch (c) {
120 		case 'h':
121 			hflag++;
122 			break;
123 		case 'v':
124 			vflag++;
125 			break;
126 		case 'c':
127 			cflag++;
128 			break;
129 		case 'n':
130 			nflag++;
131 			break;
132 		case 'b':
133 			bflag++;
134 			break;
135 		case 's':
136 			sflag++;
137 			break;
138 		case 'l':
139 			lflag++;
140 			break;
141 		case 'y':
142 		case 'i':
143 			iflag++;
144 			break;
145 		case 'w':
146 			wflag++;
147 			break;
148 		case '?':
149 			errflg++;
150 		}
151 
152 	if (errflg || (optind >= argc)) {
153 		errmsg("Usage: grep -hblcnsviw pattern file . . .\n",
154 			(char *)NULL);
155 		exit(2);
156 	}
157 
158 	argv = &argv[optind];
159 	argc -= optind;
160 	nfile = argc - 1;
161 
162 	if (strrchr(*argv, '\n') != NULL)
163 		regerr(41);
164 
165 	if (iflag) {
166 		for (arg = *argv; *arg != NULL; ++arg)
167 			*arg = (char)tolower((int)((unsigned char)*arg));
168 	}
169 
170 	if (wflag) {
171 		unsigned int	wordlen;
172 		char		*wordbuf;
173 
174 		wordlen = strlen(*argv) + 4;
175 		if ((wordbuf = malloc(wordlen)) == NULL) {
176 			errmsg("grep: Out of memory for word\n", (char *)NULL);
177 			exit(2);
178 		}
179 
180 		(void) strcpy(wordbuf, "\\<");
181 		(void) strcat(wordbuf, *argv);
182 		(void) strcat(wordbuf, "\\>");
183 		*argv = wordbuf;
184 	}
185 
186 	expbuf = compile(*argv, (char *)0, (char *)0);
187 	if (regerrno)
188 		regerr(regerrno);
189 
190 	if (--argc == 0)
191 		execute(NULL);
192 	else
193 		while (argc-- > 0)
194 			execute(*++argv);
195 
196 	return (nsucc == 2 ? 2 : (nsucc == 0 ? 1 : 0));
197 }
198 
199 static void
200 execute(
201     char	*file)
202 {
203 	char	*lbuf, *p;
204 	long	count;
205 	long	offset = 0;
206 	char	*next_ptr = NULL;
207 	long	next_count = 0;
208 
209 	tln = 0;
210 
211 	if (prntbuf == NULL) {
212 		fw_lPrntBufLen = GBUFSIZ + 1;
213 		if ((prntbuf = malloc(fw_lPrntBufLen)) == NULL) {
214 			exit(2); /* out of memory - BAIL */
215 		}
216 		if ((linebuf = malloc(fw_lPrntBufLen)) == NULL) {
217 			exit(2); /* out of memory - BAIL */
218 		}
219 	}
220 
221 	if (file == NULL)
222 		temp = 0;
223 	else if ((temp = open(file, O_RDONLY)) == -1) {
224 		if (!sflag)
225 			errmsg("grep: can't open %s\n", file);
226 		nsucc = 2;
227 		return;
228 	}
229 
230 	/* read in first block of bytes */
231 	if ((count = read(temp, prntbuf, GBUFSIZ)) <= 0) {
232 		(void) close(temp);
233 
234 		if (cflag) {
235 			if (nfile > 1 && !hflag && file)
236 				(void) fprintf(stdout, "%s:", file);
237 			(void) fprintf(stdout, "%lld\n", tln);
238 		}
239 		return;
240 	}
241 
242 	lnum = 0;
243 	ptr = prntbuf;
244 	for (;;) {
245 		/* look for next newline */
246 		if ((ptrend = memchr(ptr + offset, '\n', count)) == NULL) {
247 			offset += count;
248 
249 			/*
250 			 * shift unused data to the beginning of the buffer
251 			 */
252 			if (ptr > prntbuf) {
253 				(void) memmove(prntbuf, ptr, offset);
254 				ptr = prntbuf;
255 			}
256 
257 			/*
258 			 * re-allocate a larger buffer if this one is full
259 			 */
260 			if (offset + GBUFSIZ > fw_lPrntBufLen) {
261 				/*
262 				 * allocate a new buffer and preserve the
263 				 * contents...
264 				 */
265 				fw_lPrntBufLen += GBUFSIZ;
266 				if ((prntbuf = realloc(prntbuf,
267 				    fw_lPrntBufLen)) == NULL)
268 					exit(2);
269 
270 				/*
271 				 * set up a bigger linebuffer (this is only used
272 				 * for case insensitive operations). Contents do
273 				 * not have to be preserved.
274 				 */
275 				free(linebuf);
276 				if ((linebuf = malloc(fw_lPrntBufLen)) == NULL)
277 					exit(2);
278 
279 				ptr = prntbuf;
280 			}
281 
282 			p = prntbuf + offset;
283 			if ((count = read(temp, p, GBUFSIZ)) > 0)
284 				continue;
285 
286 			if (offset == 0)
287 				/* end of file already reached */
288 				break;
289 
290 			/* last line of file has no newline */
291 			ptrend = ptr + offset;
292 			nlflag = 0;
293 		} else {
294 			next_ptr = ptrend + 1;
295 			next_count = offset + count - (next_ptr - ptr);
296 			nlflag = 1;
297 		}
298 		lnum++;
299 		*ptrend = '\0';
300 
301 		if (iflag) {
302 			/*
303 			 * Make a lower case copy of the record
304 			 */
305 			p = ptr;
306 			for (lbuf = linebuf; p < ptrend; )
307 				*lbuf++ = (char)tolower((int)
308 				    (unsigned char)*p++);
309 			*lbuf = '\0';
310 			lbuf = linebuf;
311 		} else
312 			/*
313 			 * Use record as is
314 			 */
315 			lbuf = ptr;
316 
317 		/* lflag only once */
318 		if ((step(lbuf, expbuf) ^ vflag) && succeed(file) == 1)
319 			break;
320 
321 		if (!nlflag)
322 			break;
323 
324 		ptr = next_ptr;
325 		count = next_count;
326 		offset = 0;
327 	}
328 	(void) close(temp);
329 
330 	if (cflag) {
331 		if (nfile > 1 && !hflag && file)
332 			(void) fprintf(stdout, "%s:", file);
333 		(void) fprintf(stdout, "%lld\n", tln);
334 	}
335 }
336 
337 static int
338 succeed(
339     char	*f)
340 {
341 	int nchars;
342 	nsucc = (nsucc == 2) ? 2 : 1;
343 
344 	if (f == NULL)
345 		f = "<stdin>";
346 
347 	if (cflag) {
348 		tln++;
349 		return (0);
350 	}
351 
352 	if (lflag) {
353 		(void) fprintf(stdout, "%s\n", f);
354 		return (1);
355 	}
356 
357 	if (nfile > 1 && !hflag)
358 		/* print filename */
359 		(void) fprintf(stdout, "%s:", f);
360 
361 	if (bflag)
362 		/* print block number */
363 		(void) fprintf(stdout, "%lld:", (offset_t)
364 			((lseek(temp, (off_t)0, SEEK_CUR) - 1) / BLKSIZE));
365 
366 	if (nflag)
367 		/* print line number */
368 		(void) fprintf(stdout, "%lld:", lnum);
369 
370 	if (nlflag) {
371 		/* newline at end of line */
372 		*ptrend = '\n';
373 		nchars = ptrend - ptr + 1;
374 	} else {
375 		/* don't write sentinel \0 */
376 		nchars = ptrend - ptr;
377 	}
378 
379 	(void) fwrite(ptr, 1, nchars, stdout);
380 	return (0);
381 }
382 
383 static void
384 regerr(
385     int	err)
386 {
387 	errmsg("grep: RE error %d: ", err);
388 	switch (err) {
389 		case 11:
390 			err = 0;
391 			break;
392 		case 16:
393 			err = 1;
394 			break;
395 		case 25:
396 			err = 2;
397 			break;
398 		case 41:
399 			err = 3;
400 			break;
401 		case 42:
402 			err = 4;
403 			break;
404 		case 43:
405 			err = 5;
406 			break;
407 		case 44:
408 			err = 6;
409 			break;
410 		case 45:
411 			err = 7;
412 			break;
413 		case 46:
414 			err = 8;
415 			break;
416 		case 49:
417 			err = 9;
418 			break;
419 		case 50:
420 			err = 10;
421 			break;
422 		case 67:
423 			err = 11;
424 			break;
425 		default:
426 			err = 12;
427 			break;
428 	}
429 
430 	errmsg("%s\n", gettext(errstr[err]));
431 	exit(2);
432 }
433