xref: /illumos-gate/usr/src/cmd/mandoc/mansearch.c (revision cec8643b)
1 /*	$Id: mansearch.c,v 1.80 2018/12/13 11:55:46 schwarze Exp $ */
2 /*
3  * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013-2018 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/mman.h>
21 #include <sys/types.h>
22 
23 #include <assert.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <glob.h>
30 #include <limits.h>
31 #include <regex.h>
32 #include <stdio.h>
33 #include <stdint.h>
34 #include <stddef.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 
39 #include "mandoc_aux.h"
40 #include "mandoc_ohash.h"
41 #include "manconf.h"
42 #include "mansearch.h"
43 #include "dbm.h"
44 
45 struct	expr {
46 	/* Used for terms: */
47 	struct dbm_match match;   /* Match type and expression. */
48 	uint64_t	 bits;    /* Type mask. */
49 	/* Used for OR and AND groups: */
50 	struct expr	*next;    /* Next child in the parent group. */
51 	struct expr	*child;   /* First child in this group. */
52 	enum { EXPR_TERM, EXPR_OR, EXPR_AND } type;
53 };
54 
55 const char *const mansearch_keynames[KEY_MAX] = {
56 	"arch",	"sec",	"Xr",	"Ar",	"Fa",	"Fl",	"Dv",	"Fn",
57 	"Ic",	"Pa",	"Cm",	"Li",	"Em",	"Cd",	"Va",	"Ft",
58 	"Tn",	"Er",	"Ev",	"Sy",	"Sh",	"In",	"Ss",	"Ox",
59 	"An",	"Mt",	"St",	"Bx",	"At",	"Nx",	"Fx",	"Lk",
60 	"Ms",	"Bsx",	"Dx",	"Rs",	"Vt",	"Lb",	"Nm",	"Nd"
61 };
62 
63 
64 static	struct ohash	*manmerge(struct expr *, struct ohash *);
65 static	struct ohash	*manmerge_term(struct expr *, struct ohash *);
66 static	struct ohash	*manmerge_or(struct expr *, struct ohash *);
67 static	struct ohash	*manmerge_and(struct expr *, struct ohash *);
68 static	char		*buildnames(const struct dbm_page *);
69 static	char		*buildoutput(size_t, struct dbm_page *);
70 static	size_t		 lstlen(const char *, size_t);
71 static	void		 lstcat(char *, size_t *, const char *, const char *);
72 static	int		 lstmatch(const char *, const char *);
73 static	struct expr	*exprcomp(const struct mansearch *,
74 				int, char *[], int *);
75 static	struct expr	*expr_and(const struct mansearch *,
76 				int, char *[], int *);
77 static	struct expr	*exprterm(const struct mansearch *,
78 				int, char *[], int *);
79 static	void		 exprfree(struct expr *);
80 static	int		 manpage_compare(const void *, const void *);
81 
82 
83 int
mansearch(const struct mansearch * search,const struct manpaths * paths,int argc,char * argv[],struct manpage ** res,size_t * sz)84 mansearch(const struct mansearch *search,
85 		const struct manpaths *paths,
86 		int argc, char *argv[],
87 		struct manpage **res, size_t *sz)
88 {
89 	char		 buf[PATH_MAX];
90 	struct dbm_res	*rp;
91 	struct expr	*e;
92 	struct dbm_page	*page;
93 	struct manpage	*mpage;
94 	struct ohash	*htab;
95 	size_t		 cur, i, maxres, outkey;
96 	unsigned int	 slot;
97 	int		 argi, chdir_status, getcwd_status, im;
98 
99 	argi = 0;
100 	if ((e = exprcomp(search, argc, argv, &argi)) == NULL) {
101 		*sz = 0;
102 		return 0;
103 	}
104 
105 	cur = maxres = 0;
106 	if (res != NULL)
107 		*res = NULL;
108 
109 	outkey = KEY_Nd;
110 	if (search->outkey != NULL)
111 		for (im = 0; im < KEY_MAX; im++)
112 			if (0 == strcasecmp(search->outkey,
113 			    mansearch_keynames[im])) {
114 				outkey = im;
115 				break;
116 			}
117 
118 	/*
119 	 * Remember the original working directory, if possible.
120 	 * This will be needed if the second or a later directory
121 	 * is given as a relative path.
122 	 * Do not error out if the current directory is not
123 	 * searchable: Maybe it won't be needed after all.
124 	 */
125 
126 	if (getcwd(buf, PATH_MAX) == NULL) {
127 		getcwd_status = 0;
128 		(void)strlcpy(buf, strerror(errno), sizeof(buf));
129 	} else
130 		getcwd_status = 1;
131 
132 	/*
133 	 * Loop over the directories (containing databases) for us to
134 	 * search.
135 	 * Don't let missing/bad databases/directories phase us.
136 	 * In each, try to open the resident database and, if it opens,
137 	 * scan it for our match expression.
138 	 */
139 
140 	chdir_status = 0;
141 	for (i = 0; i < paths->sz; i++) {
142 		if (chdir_status && paths->paths[i][0] != '/') {
143 			if ( ! getcwd_status) {
144 				warnx("%s: getcwd: %s", paths->paths[i], buf);
145 				continue;
146 			} else if (chdir(buf) == -1) {
147 				warn("%s", buf);
148 				continue;
149 			}
150 		}
151 		if (chdir(paths->paths[i]) == -1) {
152 			warn("%s", paths->paths[i]);
153 			continue;
154 		}
155 		chdir_status = 1;
156 
157 		if (dbm_open(MANDOC_DB) == -1) {
158 			if (errno != ENOENT)
159 				warn("%s/%s", paths->paths[i], MANDOC_DB);
160 			continue;
161 		}
162 
163 		if ((htab = manmerge(e, NULL)) == NULL) {
164 			dbm_close();
165 			continue;
166 		}
167 
168 		for (rp = ohash_first(htab, &slot); rp != NULL;
169 		    rp = ohash_next(htab, &slot)) {
170 			page = dbm_page_get(rp->page);
171 
172 			if (lstmatch(search->sec, page->sect) == 0 ||
173 			    lstmatch(search->arch, page->arch) == 0 ||
174 			    (search->argmode == ARG_NAME &&
175 			     rp->bits <= (int32_t)(NAME_SYN & NAME_MASK)))
176 				continue;
177 
178 			if (res == NULL) {
179 				cur = 1;
180 				break;
181 			}
182 			if (cur + 1 > maxres) {
183 				maxres += 1024;
184 				*res = mandoc_reallocarray(*res,
185 				    maxres, sizeof(**res));
186 			}
187 			mpage = *res + cur;
188 			mandoc_asprintf(&mpage->file, "%s/%s",
189 			    paths->paths[i], page->file + 1);
190 			if (access(chdir_status ? page->file + 1 :
191 			    mpage->file, R_OK) == -1) {
192 				warn("%s", mpage->file);
193 				warnx("outdated mandoc.db contains "
194 				    "bogus %s entry, run makewhatis %s",
195 				    page->file + 1, paths->paths[i]);
196 				free(mpage->file);
197 				free(rp);
198 				continue;
199 			}
200 			mpage->names = buildnames(page);
201 			mpage->output = buildoutput(outkey, page);
202 			mpage->ipath = i;
203 			mpage->sec = *page->sect - '0';
204 			if (mpage->sec < 0 || mpage->sec > 9)
205 				mpage->sec = 10;
206 			mpage->form = *page->file;
207 			free(rp);
208 			cur++;
209 		}
210 		ohash_delete(htab);
211 		free(htab);
212 		dbm_close();
213 
214 		/*
215 		 * In man(1) mode, prefer matches in earlier trees
216 		 * over matches in later trees.
217 		 */
218 
219 		if (cur && search->firstmatch)
220 			break;
221 	}
222 	if (res != NULL)
223 		qsort(*res, cur, sizeof(struct manpage), manpage_compare);
224 	if (chdir_status && getcwd_status && chdir(buf) == -1)
225 		warn("%s", buf);
226 	exprfree(e);
227 	*sz = cur;
228 	return res != NULL || cur;
229 }
230 
231 /*
232  * Merge the results for the expression tree rooted at e
233  * into the the result list htab.
234  */
235 static struct ohash *
manmerge(struct expr * e,struct ohash * htab)236 manmerge(struct expr *e, struct ohash *htab)
237 {
238 	switch (e->type) {
239 	case EXPR_TERM:
240 		return manmerge_term(e, htab);
241 	case EXPR_OR:
242 		return manmerge_or(e->child, htab);
243 	case EXPR_AND:
244 		return manmerge_and(e->child, htab);
245 	default:
246 		abort();
247 	}
248 }
249 
250 static struct ohash *
manmerge_term(struct expr * e,struct ohash * htab)251 manmerge_term(struct expr *e, struct ohash *htab)
252 {
253 	struct dbm_res	 res, *rp;
254 	uint64_t	 ib;
255 	unsigned int	 slot;
256 	int		 im;
257 
258 	if (htab == NULL) {
259 		htab = mandoc_malloc(sizeof(*htab));
260 		mandoc_ohash_init(htab, 4, offsetof(struct dbm_res, page));
261 	}
262 
263 	for (im = 0, ib = 1; im < KEY_MAX; im++, ib <<= 1) {
264 		if ((e->bits & ib) == 0)
265 			continue;
266 
267 		switch (ib) {
268 		case TYPE_arch:
269 			dbm_page_byarch(&e->match);
270 			break;
271 		case TYPE_sec:
272 			dbm_page_bysect(&e->match);
273 			break;
274 		case TYPE_Nm:
275 			dbm_page_byname(&e->match);
276 			break;
277 		case TYPE_Nd:
278 			dbm_page_bydesc(&e->match);
279 			break;
280 		default:
281 			dbm_page_bymacro(im - 2, &e->match);
282 			break;
283 		}
284 
285 		/*
286 		 * When hashing for deduplication, use the unique
287 		 * page ID itself instead of a hash function;
288 		 * that is quite efficient.
289 		 */
290 
291 		for (;;) {
292 			res = dbm_page_next();
293 			if (res.page == -1)
294 				break;
295 			slot = ohash_lookup_memory(htab,
296 			    (char *)&res, sizeof(res.page), res.page);
297 			if ((rp = ohash_find(htab, slot)) != NULL)
298 				continue;
299 			rp = mandoc_malloc(sizeof(*rp));
300 			*rp = res;
301 			ohash_insert(htab, slot, rp);
302 		}
303 	}
304 	return htab;
305 }
306 
307 static struct ohash *
manmerge_or(struct expr * e,struct ohash * htab)308 manmerge_or(struct expr *e, struct ohash *htab)
309 {
310 	while (e != NULL) {
311 		htab = manmerge(e, htab);
312 		e = e->next;
313 	}
314 	return htab;
315 }
316 
317 static struct ohash *
manmerge_and(struct expr * e,struct ohash * htab)318 manmerge_and(struct expr *e, struct ohash *htab)
319 {
320 	struct ohash	*hand, *h1, *h2;
321 	struct dbm_res	*res;
322 	unsigned int	 slot1, slot2;
323 
324 	/* Evaluate the first term of the AND clause. */
325 
326 	hand = manmerge(e, NULL);
327 
328 	while ((e = e->next) != NULL) {
329 
330 		/* Evaluate the next term and prepare for ANDing. */
331 
332 		h2 = manmerge(e, NULL);
333 		if (ohash_entries(h2) < ohash_entries(hand)) {
334 			h1 = h2;
335 			h2 = hand;
336 		} else
337 			h1 = hand;
338 		hand = mandoc_malloc(sizeof(*hand));
339 		mandoc_ohash_init(hand, 4, offsetof(struct dbm_res, page));
340 
341 		/* Keep all pages that are in both result sets. */
342 
343 		for (res = ohash_first(h1, &slot1); res != NULL;
344 		    res = ohash_next(h1, &slot1)) {
345 			if (ohash_find(h2, ohash_lookup_memory(h2,
346 			    (char *)res, sizeof(res->page),
347 			    res->page)) == NULL)
348 				free(res);
349 			else
350 				ohash_insert(hand, ohash_lookup_memory(hand,
351 				    (char *)res, sizeof(res->page),
352 				    res->page), res);
353 		}
354 
355 		/* Discard the merged results. */
356 
357 		for (res = ohash_first(h2, &slot2); res != NULL;
358 		    res = ohash_next(h2, &slot2))
359 			free(res);
360 		ohash_delete(h2);
361 		free(h2);
362 		ohash_delete(h1);
363 		free(h1);
364 	}
365 
366 	/* Merge the result of the AND into htab. */
367 
368 	if (htab == NULL)
369 		return hand;
370 
371 	for (res = ohash_first(hand, &slot1); res != NULL;
372 	    res = ohash_next(hand, &slot1)) {
373 		slot2 = ohash_lookup_memory(htab,
374 		    (char *)res, sizeof(res->page), res->page);
375 		if (ohash_find(htab, slot2) == NULL)
376 			ohash_insert(htab, slot2, res);
377 		else
378 			free(res);
379 	}
380 
381 	/* Discard the merged result. */
382 
383 	ohash_delete(hand);
384 	free(hand);
385 	return htab;
386 }
387 
388 void
mansearch_free(struct manpage * res,size_t sz)389 mansearch_free(struct manpage *res, size_t sz)
390 {
391 	size_t	 i;
392 
393 	for (i = 0; i < sz; i++) {
394 		free(res[i].file);
395 		free(res[i].names);
396 		free(res[i].output);
397 	}
398 	free(res);
399 }
400 
401 static int
manpage_compare(const void * vp1,const void * vp2)402 manpage_compare(const void *vp1, const void *vp2)
403 {
404 	const struct manpage	*mp1, *mp2;
405 	const char		*cp1, *cp2;
406 	size_t			 sz1, sz2;
407 	int			 diff;
408 
409 	mp1 = vp1;
410 	mp2 = vp2;
411 	if ((diff = mp1->sec - mp2->sec))
412 		return diff;
413 
414 	/* Fall back to alphabetic ordering of names. */
415 	sz1 = strcspn(mp1->names, "(");
416 	sz2 = strcspn(mp2->names, "(");
417 	if (sz1 < sz2)
418 		sz1 = sz2;
419 	if ((diff = strncasecmp(mp1->names, mp2->names, sz1)))
420 		return diff;
421 
422 	/* For identical names and sections, prefer arch-dependent. */
423 	cp1 = strchr(mp1->names + sz1, '/');
424 	cp2 = strchr(mp2->names + sz2, '/');
425 	return cp1 != NULL && cp2 != NULL ? strcasecmp(cp1, cp2) :
426 	    cp1 != NULL ? -1 : cp2 != NULL ? 1 : 0;
427 }
428 
429 static char *
buildnames(const struct dbm_page * page)430 buildnames(const struct dbm_page *page)
431 {
432 	char	*buf;
433 	size_t	 i, sz;
434 
435 	sz = lstlen(page->name, 2) + 1 + lstlen(page->sect, 2) +
436 	    (page->arch == NULL ? 0 : 1 + lstlen(page->arch, 2)) + 2;
437 	buf = mandoc_malloc(sz);
438 	i = 0;
439 	lstcat(buf, &i, page->name, ", ");
440 	buf[i++] = '(';
441 	lstcat(buf, &i, page->sect, ", ");
442 	if (page->arch != NULL) {
443 		buf[i++] = '/';
444 		lstcat(buf, &i, page->arch, ", ");
445 	}
446 	buf[i++] = ')';
447 	buf[i++] = '\0';
448 	assert(i == sz);
449 	return buf;
450 }
451 
452 /*
453  * Count the buffer space needed to print the NUL-terminated
454  * list of NUL-terminated strings, when printing sep separator
455  * characters between strings.
456  */
457 static size_t
lstlen(const char * cp,size_t sep)458 lstlen(const char *cp, size_t sep)
459 {
460 	size_t	 sz;
461 
462 	for (sz = 0; *cp != '\0'; cp++) {
463 
464 		/* Skip names appearing only in the SYNOPSIS. */
465 		if (*cp <= (char)(NAME_SYN & NAME_MASK)) {
466 			while (*cp != '\0')
467 				cp++;
468 			continue;
469 		}
470 
471 		/* Skip name class markers. */
472 		if (*cp < ' ')
473 			cp++;
474 
475 		/* Print a separator before each but the first string. */
476 		if (sz)
477 			sz += sep;
478 
479 		/* Copy one string. */
480 		while (*cp != '\0') {
481 			sz++;
482 			cp++;
483 		}
484 	}
485 	return sz;
486 }
487 
488 /*
489  * Print the NUL-terminated list of NUL-terminated strings
490  * into the buffer, seperating strings with sep.
491  */
492 static void
lstcat(char * buf,size_t * i,const char * cp,const char * sep)493 lstcat(char *buf, size_t *i, const char *cp, const char *sep)
494 {
495 	const char	*s;
496 	size_t		 i_start;
497 
498 	for (i_start = *i; *cp != '\0'; cp++) {
499 
500 		/* Skip names appearing only in the SYNOPSIS. */
501 		if (*cp <= (char)(NAME_SYN & NAME_MASK)) {
502 			while (*cp != '\0')
503 				cp++;
504 			continue;
505 		}
506 
507 		/* Skip name class markers. */
508 		if (*cp < ' ')
509 			cp++;
510 
511 		/* Print a separator before each but the first string. */
512 		if (*i > i_start) {
513 			s = sep;
514 			while (*s != '\0')
515 				buf[(*i)++] = *s++;
516 		}
517 
518 		/* Copy one string. */
519 		while (*cp != '\0')
520 			buf[(*i)++] = *cp++;
521 	}
522 
523 }
524 
525 /*
526  * Return 1 if the string *want occurs in any of the strings
527  * in the NUL-terminated string list *have, or 0 otherwise.
528  * If either argument is NULL or empty, assume no filtering
529  * is desired and return 1.
530  */
531 static int
lstmatch(const char * want,const char * have)532 lstmatch(const char *want, const char *have)
533 {
534         if (want == NULL || have == NULL || *have == '\0')
535                 return 1;
536         while (*have != '\0') {
537                 if (strcasestr(have, want) != NULL)
538                         return 1;
539                 have = strchr(have, '\0') + 1;
540         }
541         return 0;
542 }
543 
544 /*
545  * Build a list of values taken by the macro im in the manual page.
546  */
547 static char *
buildoutput(size_t im,struct dbm_page * page)548 buildoutput(size_t im, struct dbm_page *page)
549 {
550 	const char	*oldoutput, *sep, *input;
551 	char		*output, *newoutput, *value;
552 	size_t		 sz, i;
553 
554 	switch (im) {
555 	case KEY_Nd:
556 		return mandoc_strdup(page->desc);
557 	case KEY_Nm:
558 		input = page->name;
559 		break;
560 	case KEY_sec:
561 		input = page->sect;
562 		break;
563 	case KEY_arch:
564 		input = page->arch;
565 		if (input == NULL)
566 			input = "all\0";
567 		break;
568 	default:
569 		input = NULL;
570 		break;
571 	}
572 
573 	if (input != NULL) {
574 		sz = lstlen(input, 3) + 1;
575 		output = mandoc_malloc(sz);
576 		i = 0;
577 		lstcat(output, &i, input, " # ");
578 		output[i++] = '\0';
579 		assert(i == sz);
580 		return output;
581 	}
582 
583 	output = NULL;
584 	dbm_macro_bypage(im - 2, page->addr);
585 	while ((value = dbm_macro_next()) != NULL) {
586 		if (output == NULL) {
587 			oldoutput = "";
588 			sep = "";
589 		} else {
590 			oldoutput = output;
591 			sep = " # ";
592 		}
593 		mandoc_asprintf(&newoutput, "%s%s%s", oldoutput, sep, value);
594 		free(output);
595 		output = newoutput;
596 	}
597 	return output;
598 }
599 
600 /*
601  * Compile a set of string tokens into an expression.
602  * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
603  * "(", "foo=bar", etc.).
604  */
605 static struct expr *
exprcomp(const struct mansearch * search,int argc,char * argv[],int * argi)606 exprcomp(const struct mansearch *search, int argc, char *argv[], int *argi)
607 {
608 	struct expr	*parent, *child;
609 	int		 needterm, nested;
610 
611 	if ((nested = *argi) == argc)
612 		return NULL;
613 	needterm = 1;
614 	parent = child = NULL;
615 	while (*argi < argc) {
616 		if (strcmp(")", argv[*argi]) == 0) {
617 			if (needterm)
618 				warnx("missing term "
619 				    "before closing parenthesis");
620 			needterm = 0;
621 			if (nested)
622 				break;
623 			warnx("ignoring unmatched right parenthesis");
624 			++*argi;
625 			continue;
626 		}
627 		if (strcmp("-o", argv[*argi]) == 0) {
628 			if (needterm) {
629 				if (*argi > 0)
630 					warnx("ignoring -o after %s",
631 					    argv[*argi - 1]);
632 				else
633 					warnx("ignoring initial -o");
634 			}
635 			needterm = 1;
636 			++*argi;
637 			continue;
638 		}
639 		needterm = 0;
640 		if (child == NULL) {
641 			child = expr_and(search, argc, argv, argi);
642 			continue;
643 		}
644 		if (parent == NULL) {
645 			parent = mandoc_calloc(1, sizeof(*parent));
646 			parent->type = EXPR_OR;
647 			parent->next = NULL;
648 			parent->child = child;
649 		}
650 		child->next = expr_and(search, argc, argv, argi);
651 		child = child->next;
652 	}
653 	if (needterm && *argi)
654 		warnx("ignoring trailing %s", argv[*argi - 1]);
655 	return parent == NULL ? child : parent;
656 }
657 
658 static struct expr *
expr_and(const struct mansearch * search,int argc,char * argv[],int * argi)659 expr_and(const struct mansearch *search, int argc, char *argv[], int *argi)
660 {
661 	struct expr	*parent, *child;
662 	int		 needterm;
663 
664 	needterm = 1;
665 	parent = child = NULL;
666 	while (*argi < argc) {
667 		if (strcmp(")", argv[*argi]) == 0) {
668 			if (needterm)
669 				warnx("missing term "
670 				    "before closing parenthesis");
671 			needterm = 0;
672 			break;
673 		}
674 		if (strcmp("-o", argv[*argi]) == 0)
675 			break;
676 		if (strcmp("-a", argv[*argi]) == 0) {
677 			if (needterm) {
678 				if (*argi > 0)
679 					warnx("ignoring -a after %s",
680 					    argv[*argi - 1]);
681 				else
682 					warnx("ignoring initial -a");
683 			}
684 			needterm = 1;
685 			++*argi;
686 			continue;
687 		}
688 		if (needterm == 0)
689 			break;
690 		if (child == NULL) {
691 			child = exprterm(search, argc, argv, argi);
692 			if (child != NULL)
693 				needterm = 0;
694 			continue;
695 		}
696 		needterm = 0;
697 		if (parent == NULL) {
698 			parent = mandoc_calloc(1, sizeof(*parent));
699 			parent->type = EXPR_AND;
700 			parent->next = NULL;
701 			parent->child = child;
702 		}
703 		child->next = exprterm(search, argc, argv, argi);
704 		if (child->next != NULL) {
705 			child = child->next;
706 			needterm = 0;
707 		}
708 	}
709 	if (needterm && *argi)
710 		warnx("ignoring trailing %s", argv[*argi - 1]);
711 	return parent == NULL ? child : parent;
712 }
713 
714 static struct expr *
exprterm(const struct mansearch * search,int argc,char * argv[],int * argi)715 exprterm(const struct mansearch *search, int argc, char *argv[], int *argi)
716 {
717 	char		 errbuf[BUFSIZ];
718 	struct expr	*e;
719 	char		*key, *val;
720 	uint64_t	 iterbit;
721 	int		 cs, i, irc;
722 
723 	if (strcmp("(", argv[*argi]) == 0) {
724 		++*argi;
725 		e = exprcomp(search, argc, argv, argi);
726 		if (*argi < argc) {
727 			assert(strcmp(")", argv[*argi]) == 0);
728 			++*argi;
729 		} else
730 			warnx("unclosed parenthesis");
731 		return e;
732 	}
733 
734 	if (strcmp("-i", argv[*argi]) == 0 && *argi + 1 < argc) {
735 		cs = 0;
736 		++*argi;
737 	} else
738 		cs = 1;
739 
740 	e = mandoc_calloc(1, sizeof(*e));
741 	e->type = EXPR_TERM;
742 	e->bits = 0;
743 	e->next = NULL;
744 	e->child = NULL;
745 
746 	if (search->argmode == ARG_NAME) {
747 		e->bits = TYPE_Nm;
748 		e->match.type = DBM_EXACT;
749 		e->match.str = argv[(*argi)++];
750 		return e;
751 	}
752 
753 	/*
754 	 * Separate macro keys from search string.
755 	 * If needed, request regular expression handling.
756 	 */
757 
758 	if (search->argmode == ARG_WORD) {
759 		e->bits = TYPE_Nm;
760 		e->match.type = DBM_REGEX;
761 #if HAVE_REWB_BSD
762 		mandoc_asprintf(&val, "[[:<:]]%s[[:>:]]", argv[*argi]);
763 #elif HAVE_REWB_SYSV
764 		mandoc_asprintf(&val, "\\<%s\\>", argv[*argi]);
765 #else
766 		mandoc_asprintf(&val,
767 		    "(^|[^a-zA-Z01-9_])%s([^a-zA-Z01-9_]|$)", argv[*argi]);
768 #endif
769 		cs = 0;
770 	} else if ((val = strpbrk(argv[*argi], "=~")) == NULL) {
771 		e->bits = TYPE_Nm | TYPE_Nd;
772 		e->match.type = DBM_REGEX;
773 		val = argv[*argi];
774 		cs = 0;
775 	} else {
776 		if (val == argv[*argi])
777 			e->bits = TYPE_Nm | TYPE_Nd;
778 		if (*val == '=') {
779 			e->match.type = DBM_SUB;
780 			e->match.str = val + 1;
781 		} else
782 			e->match.type = DBM_REGEX;
783 		*val++ = '\0';
784 		if (strstr(argv[*argi], "arch") != NULL)
785 			cs = 0;
786 	}
787 
788 	/* Compile regular expressions. */
789 
790 	if (e->match.type == DBM_REGEX) {
791 		e->match.re = mandoc_malloc(sizeof(*e->match.re));
792 		irc = regcomp(e->match.re, val,
793 		    REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE));
794 		if (irc) {
795 			regerror(irc, e->match.re, errbuf, sizeof(errbuf));
796 			warnx("regcomp /%s/: %s", val, errbuf);
797 		}
798 		if (search->argmode == ARG_WORD)
799 			free(val);
800 		if (irc) {
801 			free(e->match.re);
802 			free(e);
803 			++*argi;
804 			return NULL;
805 		}
806 	}
807 
808 	if (e->bits) {
809 		++*argi;
810 		return e;
811 	}
812 
813 	/*
814 	 * Parse out all possible fields.
815 	 * If the field doesn't resolve, bail.
816 	 */
817 
818 	while (NULL != (key = strsep(&argv[*argi], ","))) {
819 		if ('\0' == *key)
820 			continue;
821 		for (i = 0, iterbit = 1; i < KEY_MAX; i++, iterbit <<= 1) {
822 			if (0 == strcasecmp(key, mansearch_keynames[i])) {
823 				e->bits |= iterbit;
824 				break;
825 			}
826 		}
827 		if (i == KEY_MAX) {
828 			if (strcasecmp(key, "any"))
829 				warnx("treating unknown key "
830 				    "\"%s\" as \"any\"", key);
831 			e->bits |= ~0ULL;
832 		}
833 	}
834 
835 	++*argi;
836 	return e;
837 }
838 
839 static void
exprfree(struct expr * e)840 exprfree(struct expr *e)
841 {
842 	if (e->next != NULL)
843 		exprfree(e->next);
844 	if (e->child != NULL)
845 		exprfree(e->child);
846 	free(e);
847 }
848