195c635eGarrett D'Amore/*
295c635eGarrett D'Amore * Copyright (c) 2002 John Rochester
395c635eGarrett D'Amore * All rights reserved.
495c635eGarrett D'Amore *
595c635eGarrett D'Amore * Redistribution and use in source and binary forms, with or without
695c635eGarrett D'Amore * modification, are permitted provided that the following conditions
795c635eGarrett D'Amore * are met:
895c635eGarrett D'Amore * 1. Redistributions of source code must retain the above copyright
995c635eGarrett D'Amore *    notice, this list of conditions and the following disclaimer,
1095c635eGarrett D'Amore *    in this position and unchanged.
1195c635eGarrett D'Amore * 2. Redistributions in binary form must reproduce the above copyright
1295c635eGarrett D'Amore *    notice, this list of conditions and the following disclaimer in the
1395c635eGarrett D'Amore *    documentation and/or other materials provided with the distribution.
1495c635eGarrett D'Amore * 3. The name of the author may not be used to endorse or promote products
1595c635eGarrett D'Amore *    derived from this software without specific prior written permission
1695c635eGarrett D'Amore *
2795c635eGarrett D'Amore */
2895c635eGarrett D'Amore
2995c635eGarrett D'Amore/*
3095c635eGarrett D'Amore * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
3195c635eGarrett D'Amore * Copyright 2014 Garrett D'Amore <garrett@damore.org>
3295c635eGarrett D'Amore */
3395c635eGarrett D'Amore
3495c635eGarrett D'Amore#include <sys/types.h>
3595c635eGarrett D'Amore#include <sys/stat.h>
3695c635eGarrett D'Amore#include <sys/param.h>
3795c635eGarrett D'Amore
3895c635eGarrett D'Amore#include <ctype.h>
3995c635eGarrett D'Amore#include <dirent.h>
4095c635eGarrett D'Amore#include <err.h>
4195c635eGarrett D'Amore#include <signal.h>
4295c635eGarrett D'Amore#include <stddef.h>
4395c635eGarrett D'Amore#include <stdio.h>
4495c635eGarrett D'Amore#include <stdlib.h>
4595c635eGarrett D'Amore#include <string.h>
4695c635eGarrett D'Amore#include <unistd.h>
4795c635eGarrett D'Amore
4895c635eGarrett D'Amore#include "man.h"
4995c635eGarrett D'Amore#include "stringlist.h"
5095c635eGarrett D'Amore
5195c635eGarrett D'Amore
5295c635eGarrett D'Amore/* Information collected about each man page in a section */
5395c635eGarrett D'Amorestruct page_info {
5495c635eGarrett D'Amore	char	*filename;
5595c635eGarrett D'Amore	char	*name;
5695c635eGarrett D'Amore	char	*suffix;
5795c635eGarrett D'Amore	ino_t	inode;
5895c635eGarrett D'Amore};
5995c635eGarrett D'Amore
6095c635eGarrett D'Amore/* An expanding string */
6195c635eGarrett D'Amorestruct sbuf {
6295c635eGarrett D'Amore	char	*content;	/* the start of the buffer */
6395c635eGarrett D'Amore	char	*end;		/* just past the end of the content */
6495c635eGarrett D'Amore	char	*last;		/* the last allocated character */
6595c635eGarrett D'Amore};
6695c635eGarrett D'Amore
6795c635eGarrett D'Amore/* Remove the last amount characters from the sbuf */
6895c635eGarrett D'Amore#define	sbuf_retract(sbuf, amount) ((sbuf)->end -= (amount))
6995c635eGarrett D'Amore/* Return the length of the sbuf content */
7095c635eGarrett D'Amore#define	sbuf_length(sbuf) ((sbuf)->end - (sbuf)->content)
7195c635eGarrett D'Amore
7295c635eGarrett D'Amoretypedef char *edited_copy(char *from, char *to, int length);
7395c635eGarrett D'Amore
7495c635eGarrett D'Amore/*
7595c635eGarrett D'Amore * While the whatis line is being formed, it is stored in whatis_proto.
7695c635eGarrett D'Amore * When finished, it is reformatted into whatis_final and then appended
7795c635eGarrett D'Amore * to whatis_lines.
7895c635eGarrett D'Amore */
7995c635eGarrett D'Amorestatic struct sbuf	*whatis_proto;
8095c635eGarrett D'Amorestatic struct sbuf	*whatis_final;
8195c635eGarrett D'Amorestatic stringlist	*whatis_lines;	/* collected output lines */
8295c635eGarrett D'Amore
8395c635eGarrett D'Amorestatic char tempfile[MAXPATHLEN];	/* path of temporary file, if any */
8495c635eGarrett D'Amore
8595c635eGarrett D'Amore#define	MDOC_COMMANDS	"ArDvErEvFlLiNmPa"
8695c635eGarrett D'Amore
8795c635eGarrett D'Amore
8895c635eGarrett D'Amore/* Free a struct page_info and its content */
8995c635eGarrett D'Amorestatic void
9095c635eGarrett D'Amorefree_page_info(struct page_info *info)
9195c635eGarrett D'Amore{
9295c635eGarrett D'Amore
9395c635eGarrett D'Amore	free(info->filename);
9495c635eGarrett D'Amore	free(info->name);
9595c635eGarrett D'Amore	free(info->suffix);
9695c635eGarrett D'Amore	free(info);
9795c635eGarrett D'Amore}
9895c635eGarrett D'Amore
9995c635eGarrett D'Amore/*
10095c635eGarrett D'Amore * Allocate and fill in a new struct page_info given the
10195c635eGarrett D'Amore * name of the man section directory and the dirent of the file.
10295c635eGarrett D'Amore * If the file is not a man page, return NULL.
10395c635eGarrett D'Amore */
10495c635eGarrett D'Amorestatic struct page_info *
10595c635eGarrett D'Amorenew_page_info(char *dir, struct dirent *dirent)
10695c635eGarrett D'Amore{
10795c635eGarrett D'Amore	struct page_info *info;
10895c635eGarrett D'Amore	int		basename_length;
10995c635eGarrett D'Amore	char		*suffix;
11095c635eGarrett D'Amore	struct stat	st;
11195c635eGarrett D'Amore
11295c635eGarrett D'Amore	if ((info = malloc(sizeof (struct page_info))) == NULL)
11395c635eGarrett D'Amore		err(1, "malloc");
11495c635eGarrett D'Amore	basename_length = strlen(dirent->d_name);
11595c635eGarrett D'Amore	suffix = &dirent->d_name[basename_length];
11695c635eGarrett D'Amore	if (asprintf(&info->filename, "%s/%s", dir, dirent->d_name) == -1)
11795c635eGarrett D'Amore		err(1, "asprintf");
11895c635eGarrett D'Amore	for (;;) {
11995c635eGarrett D'Amore		if (--suffix == dirent->d_name || !isalnum(*suffix)) {
12095c635eGarrett D'Amore			if (*suffix == '.')
12195c635eGarrett D'Amore				break;
12295c635eGarrett D'Amore			free(info->filename);
12395c635eGarrett D'Amore			free(info);
12495c635eGarrett D'Amore			return (NULL);
12595c635eGarrett D'Amore		}
12695c635eGarrett D'Amore	}
12795c635eGarrett D'Amore	*suffix++ = '\0';
12895c635eGarrett D'Amore	info->name = strdup(dirent->d_name);
12995c635eGarrett D'Amore	info->suffix = strdup(suffix);
13095c635eGarrett D'Amore	if (stat(info->filename, &st) < 0) {
13195c635eGarrett D'Amore		warn("%s", info->filename);
13295c635eGarrett D'Amore		free_page_info(info);
13395c635eGarrett D'Amore		return (NULL);
13495c635eGarrett D'Amore	}
13595c635eGarrett D'Amore	if (!S_ISREG(st.st_mode)) {
13695c635eGarrett D'Amore		free_page_info(info);
13795c635eGarrett D'Amore		return (NULL);
13895c635eGarrett D'Amore	}
13995c635eGarrett D'Amore	info->inode = st.st_ino;
14095c635eGarrett D'Amore	return (info);
14195c635eGarrett D'Amore}
14295c635eGarrett D'Amore
14395c635eGarrett D'Amore/*
14495c635eGarrett D'Amore * Reset sbuf length to 0.
14595c635eGarrett D'Amore */
14695c635eGarrett D'Amorestatic void
14795c635eGarrett D'Amoresbuf_clear(struct sbuf *sbuf)
14895c635eGarrett D'Amore{
14995c635eGarrett D'Amore
15095c635eGarrett D'Amore	sbuf->end = sbuf->content;
15195c635eGarrett D'Amore}
15295c635eGarrett D'Amore
15395c635eGarrett D'Amore/*
15495c635eGarrett D'Amore * Allocate a new sbuf.
15595c635eGarrett D'Amore */
15695c635eGarrett D'Amorestatic struct sbuf *
15795c635eGarrett D'Amorenew_sbuf(void)
15895c635eGarrett D'Amore{
15995c635eGarrett D'Amore	struct sbuf	*sbuf;
16095c635eGarrett D'Amore
16195c635eGarrett D'Amore	if ((sbuf = malloc(sizeof (struct sbuf))) == NULL)
16295c635eGarrett D'Amore		err(1, "malloc");
16395c635eGarrett D'Amore	if ((sbuf->content = (char *)malloc(LINE_ALLOC)) == NULL)
16495c635eGarrett D'Amore		err(1, "malloc");
16595c635eGarrett D'Amore	sbuf->last = sbuf->content + LINE_ALLOC - 1;
16695c635eGarrett D'Amore	sbuf_clear(sbuf);
16795c635eGarrett D'Amore
16895c635eGarrett D'Amore	return (sbuf);
16995c635eGarrett D'Amore}
17095c635eGarrett D'Amore
17195c635eGarrett D'Amore/*
17295c635eGarrett D'Amore * Ensure that there is enough room in the sbuf
17395c635eGarrett D'Amore * for nchars more characters.
17495c635eGarrett D'Amore */
17595c635eGarrett D'Amorestatic void
17695c635eGarrett D'Amoresbuf_need(struct sbuf *sbuf, int nchars)
17795c635eGarrett D'Amore{
17895c635eGarrett D'Amore	char *new_content;
17995c635eGarrett D'Amore	size_t size, cntsize;
18095c635eGarrett D'Amore	size_t grow = 128;
18195c635eGarrett D'Amore
18295c635eGarrett D'Amore	while (grow < nchars) {
18395c635eGarrett D'Amore		grow += 128;	/* we grow in chunks of 128 bytes */
18495c635eGarrett D'Amore	}
18595c635eGarrett D'Amore
18695c635eGarrett D'Amore	/* Grow if the buffer isn't big enough */
18795c635eGarrett D'Amore	if (sbuf->end + nchars > sbuf->last) {
18895c635eGarrett D'Amore		size = sbuf->last + 1 - sbuf->content;
18995c635eGarrett D'Amore		size += grow;
19095c635eGarrett D'Amore		cntsize = sbuf->end - sbuf->content;
19195c635eGarrett D'Amore
19295c635eGarrett D'Amore		if ((new_content = realloc(sbuf->content, size)) == NULL) {
19395c635eGarrett D'Amore			perror("realloc");
19495c635eGarrett D'Amore			if (tempfile[0] != '\0')
19595c635eGarrett D'Amore				(void) unlink(tempfile);
19695c635eGarrett D'Amore			exit(1);
19795c635eGarrett D'Amore		}
19895c635eGarrett D'Amore		sbuf->content = new_content;
19995c635eGarrett D'Amore		sbuf->end = new_content + cntsize;
20095c635eGarrett D'Amore		sbuf->last = new_content + size - 1;
20195c635eGarrett D'Amore	}
20295c635eGarrett D'Amore}
20395c635eGarrett D'Amore
20495c635eGarrett D'Amore/*
20595c635eGarrett D'Amore * Append a string of a given length to the sbuf.
20695c635eGarrett D'Amore */
20795c635eGarrett D'Amorestatic void
20895c635eGarrett D'Amoresbuf_append(struct sbuf *sbuf, const char *text, int length)
20995c635eGarrett D'Amore{
21095c635eGarrett D'Amore	if (length > 0) {
21195c635eGarrett D'Amore		sbuf_need(sbuf, length);
21295c635eGarrett D'Amore		(void) memcpy(sbuf->end, text, length);
21395c635eGarrett D'Amore		sbuf->end += length;
21495c635eGarrett D'Amore	}
21595c635eGarrett D'Amore}
21695c635eGarrett D'Amore
21795c635eGarrett D'Amore/*
21895c635eGarrett D'Amore * Append a null-terminated string to the sbuf.
21995c635eGarrett D'Amore */
22095c635eGarrett D'Amorestatic void
22195c635eGarrett D'Amoresbuf_append_str(struct sbuf *sbuf, char *text)
22295c635eGarrett D'Amore{
22395c635eGarrett D'Amore
22495c635eGarrett D'Amore	sbuf_append(sbuf, text, strlen(text));
22595c635eGarrett D'Amore}
22695c635eGarrett D'Amore
22795c635eGarrett D'Amore/*
22895c635eGarrett D'Amore * Append an edited null-terminated string to the sbuf.
22995c635eGarrett D'Amore */
23095c635eGarrett D'Amorestatic void
23195c635eGarrett D'Amoresbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy)
23295c635eGarrett D'Amore{
23395c635eGarrett D'Amore	int	length;
23495c635eGarrett D'Amore
23595c635eGarrett D'Amore	if ((length = strlen(text)) > 0) {
23695c635eGarrett D'Amore		sbuf_need(sbuf, length);
23795c635eGarrett D'Amore		sbuf->end = copy(text, sbuf->end, length);
23895c635eGarrett D'Amore	}
23995c635eGarrett D'Amore}
24095c635eGarrett D'Amore
24195c635eGarrett D'Amore/*
24295c635eGarrett D'Amore * Strip any of a set of chars from the end of the sbuf.
24395c635eGarrett D'Amore */
24495c635eGarrett D'Amorestatic void
24595c635eGarrett D'Amoresbuf_strip(struct sbuf *sbuf, const char *set)
24695c635eGarrett D'Amore{
24795c635eGarrett D'Amore
24895c635eGarrett D'Amore	while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL)
24995c635eGarrett D'Amore		sbuf->end--;
25095c635eGarrett D'Amore}
25195c635eGarrett D'Amore
25295c635eGarrett D'Amore/*
25395c635eGarrett D'Amore * Return the null-terminated string built by the sbuf.
25495c635eGarrett D'Amore */
25595c635eGarrett D'Amorestatic char *
25695c635eGarrett D'Amoresbuf_content(struct sbuf *sbuf)
25795c635eGarrett D'Amore{
25895c635eGarrett D'Amore
25995c635eGarrett D'Amore	*sbuf->end = '\0';
26095c635eGarrett D'Amore	return (sbuf->content);
26195c635eGarrett D'Amore}
26295c635eGarrett D'Amore
26395c635eGarrett D'Amore/*
26495c635eGarrett D'Amore * Return true if no man page exists in the directory with
26595c635eGarrett D'Amore * any of the names in the stringlist.
26695c635eGarrett D'Amore */
26795c635eGarrett D'Amorestatic int
26895c635eGarrett D'Amoreno_page_exists(char *dir, stringlist *names, char *suffix)
26995c635eGarrett D'Amore{
27095c635eGarrett D'Amore	char	path[MAXPATHLEN];
27195c635eGarrett D'Amore	char	*suffixes[] = { "", ".gz", ".bz2", NULL };
27295c635eGarrett D'Amore	size_t	i;
27395c635eGarrett D'Amore	int	j;
27495c635eGarrett D'Amore
27595c635eGarrett D'Amore	for (i = 0; i < names->sl_cur; i++) {
27695c635eGarrett D'Amore		for (j = 0; suffixes[j] != NULL; j++) {
27795c635eGarrett D'Amore			(void) snprintf(path, MAXPATHLEN, "%s/%s.%s%s",
27895c635eGarrett D'Amore			    dir, names->sl_str[i], suffix, suffixes[j]);
27995c635eGarrett D'Amore			if (access(path, F_OK) == 0) {
28095c635eGarrett D'Amore				return (0);
28195c635eGarrett D'Amore			}
28295c635eGarrett D'Amore		}
28395c635eGarrett D'Amore	}
28495c635eGarrett D'Amore	return (1);
28595c635eGarrett D'Amore}
28695c635eGarrett D'Amore
28795c635eGarrett D'Amore/* ARGSUSED sig */
28895c635eGarrett D'Amorestatic void
28995c635eGarrett D'Amoretrap_signal(int sig)
29095c635eGarrett D'Amore{
29195c635eGarrett D'Amore
29295c635eGarrett D'Amore	if (tempfile[0] != '\0')
29395c635eGarrett D'Amore		(void) unlink(tempfile);
29495c635eGarrett D'Amore
29595c635eGarrett D'Amore	exit(1);
29695c635eGarrett D'Amore}
29795c635eGarrett D'Amore
29895c635eGarrett D'Amore/*
29995c635eGarrett D'Amore * Attempt to open an output file.
30095c635eGarrett D'Amore * Return NULL if unsuccessful.
30195c635eGarrett D'Amore */
30295c635eGarrett D'Amorestatic FILE *
30395c635eGarrett D'Amoreopen_output(char *name)
30495c635eGarrett D'Amore{
30595c635eGarrett D'Amore	FILE	*output;
30695c635eGarrett D'Amore
30795c635eGarrett D'Amore	whatis_lines = sl_init();
30895c635eGarrett D'Amore	(void) snprintf(tempfile, MAXPATHLEN, "%s.tmp", name);
30995c635eGarrett D'Amore	name = tempfile;
31095c635eGarrett D'Amore	if ((output = fopen(name, "w")) == NULL) {
31195c635eGarrett D'Amore		warn("%s", name);
31295c635eGarrett D'Amore		return (NULL);
31395c635eGarrett D'Amore	}
31495c635eGarrett D'Amore	return (output);
31595c635eGarrett D'Amore}
31695c635eGarrett D'Amore
31795c635eGarrett D'Amorestatic int
31895c635eGarrett D'Amorelinesort(const void *a, const void *b)
31995c635eGarrett D'Amore{
32095c635eGarrett D'Amore
32195c635eGarrett D'Amore	return (strcmp((*(const char * const *)a), (*(const char * const *)b)));
32295c635eGarrett D'Amore}
32395c635eGarrett D'Amore
32495c635eGarrett D'Amore/*
32595c635eGarrett D'Amore * Write the unique sorted lines to the output file.
32695c635eGarrett D'Amore */
32795c635eGarrett D'Amorestatic void
32895c635eGarrett D'Amorefinish_output(FILE *output, char *name)
32995c635eGarrett D'Amore{
33095c635eGarrett D'Amore	size_t	i;
33195c635eGarrett D'Amore	char	*prev = NULL;
33295c635eGarrett D'Amore
33395c635eGarrett D'Amore	qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof (char *),
33495c635eGarrett D'Amore	    linesort);
33595c635eGarrett D'Amore	for (i = 0; i < whatis_lines->sl_cur; i++) {
33695c635eGarrett D'Amore		char *line = whatis_lines->sl_str[i];
33795c635eGarrett D'Amore		if (i > 0 && strcmp(line, prev) == 0)
33895c635eGarrett D'Amore			continue;
33995c635eGarrett D'Amore		prev = line;
34095c635eGarrett D'Amore		(void) fputs(line, output);
34195c635eGarrett D'Amore		(void) putc('\n', output);
34295c635eGarrett D'Amore	}
34395c635eGarrett D'Amore	(void) fclose(output);
34495c635eGarrett D'Amore	sl_free(whatis_lines, 1);
34595c635eGarrett D'Amore	(void) rename(tempfile, name);
34695c635eGarrett D'Amore	(void) unlink(tempfile);
34795c635eGarrett D'Amore}
34895c635eGarrett D'Amore
34995c635eGarrett D'Amorestatic FILE *
35095c635eGarrett D'Amoreopen_whatis(char *mandir)
35195c635eGarrett D'Amore{
35295c635eGarrett D'Amore	char	filename[MAXPATHLEN];
35395c635eGarrett D'Amore
35495c635eGarrett D'Amore	(void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS);
35595c635eGarrett D'Amore	return (open_output(filename));
35695c635eGarrett D'Amore}
35795c635eGarrett D'Amore
35895c635eGarrett D'Amorestatic void
35995c635eGarrett D'Amorefinish_whatis(FILE *output, char *mandir)
36095c635eGarrett D'Amore{
36195c635eGarrett D'Amore	char	filename[MAXPATHLEN];
36295c635eGarrett D'Amore
36395c635eGarrett D'Amore	(void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS);
36495c635eGarrett D'Amore	finish_output(output, filename);
36595c635eGarrett D'Amore}
36695c635eGarrett D'Amore
36795c635eGarrett D'Amore/*
36895c635eGarrett D'Amore * Remove trailing spaces from a string, returning a pointer to just
36995c635eGarrett D'Amore * beyond the new last character.
37095c635eGarrett D'Amore */
37195c635eGarrett D'Amorestatic char *
37295c635eGarrett D'Amoretrim_rhs(char *str)
37395c635eGarrett D'Amore{
37495c635eGarrett D'Amore	char	*rhs;
37595c635eGarrett D'Amore
37695c635eGarrett D'Amore	rhs = &str[strlen(str)];
37795c635eGarrett D'Amore	while (--rhs > str && isspace(*rhs))
37895c635eGarrett D'Amore		;
37995c635eGarrett D'Amore	*++rhs = '\0';
38095c635eGarrett D'Amore	return (rhs);
38195c635eGarrett D'Amore}
38295c635eGarrett D'Amore
38395c635eGarrett D'Amore/*
38495c635eGarrett D'Amore * Return a pointer to the next non-space character in the string.
38595c635eGarrett D'Amore */
38695c635eGarrett D'Amorestatic char *
38795c635eGarrett D'Amoreskip_spaces(char *s)
38895c635eGarrett D'Amore{
38995c635eGarrett D'Amore
39095c635eGarrett D'Amore	while (*s != '\0' && isspace(*s))
39195c635eGarrett D'Amore		s++;
39295c635eGarrett D'Amore
39395c635eGarrett D'Amore	return (s);
39495c635eGarrett D'Amore}
39595c635eGarrett D'Amore
39695c635eGarrett D'Amore/*
39795c635eGarrett D'Amore * Return whether the line is of one of the forms:
39895c635eGarrett D'Amore *	.Sh NAME
39995c635eGarrett D'Amore *	.Sh "NAME"
40095c635eGarrett D'Amore *	etc.
40195c635eGarrett D'Amore * assuming that section_start is ".Sh".
40295c635eGarrett D'Amore */
40395c635eGarrett D'Amorestatic int
40495c635eGarrett D'Amorename_section_line(char *line, const char *section_start)
40595c635eGarrett D'Amore{
40695c635eGarrett D'Amore	char		*rhs;
40795c635eGarrett D'Amore
40895c635eGarrett D'Amore	if (strncmp(line, section_start, 3) != 0)
40995c635eGarrett D'Amore		return (0);
41095c635eGarrett D'Amore	line = skip_spaces(line + 3);
41195c635eGarrett D'Amore	rhs = trim_rhs(line);
41295c635eGarrett D'Amore	if (*line == '"') {
41395c635eGarrett D'Amore		line++;
41495c635eGarrett D'Amore		if (*--rhs == '"')
41595c635eGarrett D'Amore			*rhs = '\0';
41695c635eGarrett D'Amore	}
41795c635eGarrett D'Amore	if (strcmp(line, "NAME") == 0)
41895c635eGarrett D'Amore		return (1);
41995c635eGarrett D'Amore
42095c635eGarrett D'Amore	return (0);
42195c635eGarrett D'Amore}
42295c635eGarrett D'Amore
42395c635eGarrett D'Amore/*
42495c635eGarrett D'Amore * Copy characters while removing the most common nroff/troff markup:
42595c635eGarrett D'Amore *	\(em, \(mi, \s[+-N], \&
42695c635eGarrett D'Amore *	\fF, \f(fo, \f[font]
42795c635eGarrett D'Amore *	\*s, \*(st, \*[stringvar]
42895c635eGarrett D'Amore */
42995c635eGarrett D'Amorestatic char *
43095c635eGarrett D'Amorede_nroff_copy(char *from, char *to, int fromlen)
43195c635eGarrett D'Amore{
43295c635eGarrett D'Amore	char	*from_end = &from[fromlen];
43395c635eGarrett D'Amore
43495c635eGarrett D'Amore	while (from < from_end) {
43595c635eGarrett D'Amore		switch (*from) {
43695c635eGarrett D'Amore		case '\\':
43795c635eGarrett D'Amore			switch (*++from) {
43895c635eGarrett D'Amore			case '(':
43995c635eGarrett D'Amore				if (strncmp(&from[1], "em", 2) == 0 ||
44095c635eGarrett D'Amore				    strncmp(&from[1], "mi", 2) == 0) {
44195c635eGarrett D'Amore					from += 3;
44295c635eGarrett D'Amore					continue;
44395c635eGarrett D'Amore				}
44495c635eGarrett D'Amore				break;
44595c635eGarrett D'Amore			case 's':
44695c635eGarrett D'Amore				if (*++from == '-')
44795c635eGarrett D'Amore					from++;
44895c635eGarrett D'Amore				while (isdigit(*from))
44995c635eGarrett D'Amore					from++;
45095c635eGarrett D'Amore				continue;
45195c635eGarrett D'Amore			case 'f':
45295c635eGarrett D'Amore			case '*':
45395c635eGarrett D'Amore				if (*++from == '(') {
45495c635eGarrett D'Amore					from += 3;
45595c635eGarrett D'Amore				} else if (*from == '[') {
45695c635eGarrett D'Amore					while (*++from != ']' &&
45795c635eGarrett D'Amore					    from < from_end)
45895c635eGarrett D'Amore						;
45995c635eGarrett D'Amore					from++;
46095c635eGarrett D'Amore				} else {
46195c635eGarrett D'Amore					from++;
46295c635eGarrett D'Amore				}
46395c635eGarrett D'Amore				continue;
46495c635eGarrett D'Amore			case '&':
46595c635eGarrett D'Amore				from++;
46695c635eGarrett D'Amore				continue;
46795c635eGarrett D'Amore			}
46895c635eGarrett D'Amore			break;
46995c635eGarrett D'Amore		}
47095c635eGarrett D'Amore		*to++ = *from++;
47195c635eGarrett D'Amore	}
47295c635eGarrett D'Amore	return (to);
47395c635eGarrett D'Amore}
47495c635eGarrett D'Amore
47595c635eGarrett D'Amore/*
47695c635eGarrett D'Amore * Append a string with the nroff formatting removed.
47795c635eGarrett D'Amore */
47895c635eGarrett D'Amorestatic void
47995c635eGarrett D'Amoreadd_nroff(char *text)
48095c635eGarrett D'Amore{
48195c635eGarrett D'Amore
48295c635eGarrett D'Amore	sbuf_append_edited(whatis_proto, text, de_nroff_copy);
48395c635eGarrett D'Amore}
48495c635eGarrett D'Amore
48595c635eGarrett D'Amore/*
48695c635eGarrett D'Amore * Appends "name(suffix), " to whatis_final
48795c635eGarrett D'Amore */
48895c635eGarrett D'Amorestatic void
48995c635eGarrett D'Amoreadd_whatis_name(char *name, char *suffix)
49095c635eGarrett D'Amore{
49195c635eGarrett D'Amore
49295c635eGarrett D'Amore	if (*name != '\0') {
49395c635eGarrett D'Amore		sbuf_append_str(whatis_final, name);
49495c635eGarrett D'Amore		sbuf_append(whatis_final, "(", 1);
49595c635eGarrett D'Amore		sbuf_append_str(whatis_final, suffix);
49695c635eGarrett D'Amore		sbuf_append(whatis_final, "), ", 3);
49795c635eGarrett D'Amore	}
49895c635eGarrett D'Amore}
49995c635eGarrett D'Amore
50095c635eGarrett D'Amore/*
50195c635eGarrett D'Amore * Processes an old-style man(7) line. This ignores commands with only
50295c635eGarrett D'Amore * a single number argument.
50395c635eGarrett D'Amore */
50495c635eGarrett D'Amorestatic void
50595c635eGarrett D'Amoreprocess_man_line(char *line)
50695c635eGarrett D'Amore{
50795c635eGarrett D'Amore	char	*p;
50895c635eGarrett D'Amore
50995c635eGarrett D'Amore	if (*line == '.') {
51095c635eGarrett D'Amore		while (isalpha(*++line))
51195c635eGarrett D'Amore			;
51295c635eGarrett D'Amore		p = line = skip_spaces(line);
51395c635eGarrett D'Amore		while (*p != '\0') {
51495c635eGarrett D'Amore			if (!isdigit(*p))
51595c635eGarrett D'Amore				break;
51695c635eGarrett D'Amore			p++;
51795c635eGarrett D'Amore		}
51895c635eGarrett D'Amore		if (*p == '\0')
51995c635eGarrett D'Amore			return;
52095c635eGarrett D'Amore	} else
52195c635eGarrett D'Amore		line = skip_spaces(line);
52295c635eGarrett D'Amore	if (*line != '\0') {
52395c635eGarrett D'Amore		add_nroff(line);
52495c635eGarrett D'Amore		sbuf_append(whatis_proto, " ", 1);
52595c635eGarrett D'Amore	}
52695c635eGarrett D'Amore}
52795c635eGarrett D'Amore
52895c635eGarrett D'Amore/*
52995c635eGarrett D'Amore * Processes a new-style mdoc(7) line.
53095c635eGarrett D'Amore */
53195c635eGarrett D'Amorestatic void
53295c635eGarrett D'Amoreprocess_mdoc_line(char *line)
53395c635eGarrett D'Amore{
53495c635eGarrett D'Amore	int	xref;
53595c635eGarrett D'Amore	int	arg = 0;
53695c635eGarrett D'Amore	char	*line_end = &line[strlen(line)];
53795c635eGarrett D'Amore	int	orig_length = sbuf_length(whatis_proto);
53895c635eGarrett D'Amore	char	*next;
53995c635eGarrett D'Amore
54095c635eGarrett D'Amore	if (*line == '\0')
54195c635eGarrett D'Amore		return;
54295c635eGarrett D'Amore	if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) {
54395c635eGarrett D'Amore		add_nroff(skip_spaces(line));
54495c635eGarrett D'Amore		sbuf_append(whatis_proto, " ", 1);
54595c635eGarrett D'Amore		return;
54695c635eGarrett D'Amore	}
54795c635eGarrett D'Amore	xref = strncmp(line, ".Xr", 3) == 0;
54895c635eGarrett D'Amore	line += 3;
54995c635eGarrett D'Amore	while ((line = skip_spaces(line)) < line_end) {
55095c635eGarrett D'Amore		if (*line == '"') {
55195c635eGarrett D'Amore			next = ++line;
55295c635eGarrett D'Amore			for (;;) {
55395c635eGarrett D'Amore				next = strchr(next, '"');
55495c635eGarrett D'Amore				if (next == NULL)
55595c635eGarrett D'Amore					break;
55695c635eGarrett D'Amore				(void) memmove(next, next + 1, strlen(next));
55795c635eGarrett D'Amore				line_end--;
55895c635eGarrett D'Amore				if (*next != '"')
55995c635eGarrett D'Amore					break;
56095c635eGarrett D'Amore				next++;
56195c635eGarrett D'Amore			}
56295c635eGarrett D'Amore		} else {
56395c635eGarrett D'Amore			next = strpbrk(line, " \t");
56495c635eGarrett D'Amore		}
56595c635eGarrett D'Amore		if (next != NULL)
56695c635eGarrett D'Amore			*next++ = '\0';
56795c635eGarrett D'Amore		else
56895c635eGarrett D'Amore			next = line_end;
56995c635eGarrett D'Amore		if (isupper(*line) && islower(line[1]) && line[2] == '\0') {
57095c635eGarrett D'Amore			if (strcmp(line, "Ns") == 0) {
57195c635eGarrett D'Amore				arg = 0;
57295c635eGarrett D'Amore				line = next;
57395c635eGarrett D'Amore				continue;
57495c635eGarrett D'Amore			}
57595c635eGarrett D'Amore			if (strstr(line, MDOC_COMMANDS) != NULL) {
57695c635eGarrett D'Amore				line = next;
57795c635eGarrett D'Amore				continue;
57895c635eGarrett D'Amore			}
57995c635eGarrett D'Amore		}
58095c635eGarrett D'Amore		if (arg > 0 && strchr(",.:;?!)]", *line) == 0) {
58195c635eGarrett D'Amore			if (xref) {
58295c635eGarrett D'Amore				sbuf_append(whatis_proto, "(", 1);
58395c635eGarrett D'Amore				add_nroff(line);
58495c635eGarrett D'Amore				sbuf_append(whatis_proto, ")", 1);
58595c635eGarrett D'Amore				xref = 0;
58695c635eGarrett D'Amore			} else {
58795c635eGarrett D'Amore				sbuf_append(whatis_proto, " ", 1);
58895c635eGarrett D'Amore			}
58995c635eGarrett D'Amore		}
59095c635eGarrett D'Amore		add_nroff(line);
59195c635eGarrett D'Amore		arg++;
59295c635eGarrett D'Amore		line = next;
59395c635eGarrett D'Amore	}
59495c635eGarrett D'Amore	if (sbuf_length(whatis_proto) > orig_length)
59595c635eGarrett D'Amore		sbuf_append(whatis_proto, " ", 1);
59695c635eGarrett D'Amore}
59795c635eGarrett D'Amore
59895c635eGarrett D'Amore/*
59995c635eGarrett D'Amore * Collect a list of comma-separated names from the text.
60095c635eGarrett D'Amore */
60195c635eGarrett D'Amorestatic void
60295c635eGarrett D'Amorecollect_names(stringlist *names, char *text)
60395c635eGarrett D'Amore{
60495c635eGarrett D'Amore	char	*arg;
60595c635eGarrett D'Amore
60695c635eGarrett D'Amore	for (;;) {
60795c635eGarrett D'Amore		arg = text;
60895c635eGarrett D'Amore		text = strchr(text, ',');
60995c635eGarrett D'Amore		if (text != NULL)
61095c635eGarrett D'Amore			*text++ = '\0';
61195c635eGarrett D'Amore		(void) sl_add(names, arg);
61295c635eGarrett D'Amore		if (text == NULL)
61395c635eGarrett D'Amore			return;
61495c635eGarrett D'Amore		if (*text == ' ')
61595c635eGarrett D'Amore			text++;
61695c635eGarrett D'Amore	}
61795c635eGarrett D'Amore}
61895c635eGarrett D'Amore
62095c635eGarrett D'Amore
62195c635eGarrett D'Amore/*
62295c635eGarrett D'Amore * Process a man page source into a single whatis line and add it
62395c635eGarrett D'Amore * to whatis_lines.
62495c635eGarrett D'Amore */
62595c635eGarrett D'Amorestatic void
62695c635eGarrett D'Amoreprocess_page(struct page_info *page, char *section_dir)
62795c635eGarrett D'Amore{
62895c635eGarrett D'Amore	FILE		*fp;
62995c635eGarrett D'Amore	stringlist	*names;
63095c635eGarrett D'Amore	char		*descr;
63195c635eGarrett D'Amore	int		state = STATE_UNKNOWN;
63295c635eGarrett D'Amore	size_t		i;
63395c635eGarrett D'Amore	char		*line = NULL;
63495c635eGarrett D'Amore	size_t		linecap = 0;
63595c635eGarrett D'Amore
63695c635eGarrett D'Amore	sbuf_clear(whatis_proto);
63795c635eGarrett D'Amore	if ((fp = fopen(page->filename, "r")) == NULL) {
63895c635eGarrett D'Amore		warn("%s", page->filename);
63995c635eGarrett D'Amore		return;
64095c635eGarrett D'Amore	}
64195c635eGarrett D'Amore	while (getline(&line, &linecap, fp) > 0) {
64295c635eGarrett D'Amore		/* Skip comments */
64395c635eGarrett D'Amore		if (strncmp(line, ".\\\"", 3) == 0)
64495c635eGarrett D'Amore			continue;
64595c635eGarrett D'Amore		switch (state) {
64695c635eGarrett D'Amore		/* Haven't reached the NAME section yet */
64795c635eGarrett D'Amore		case STATE_UNKNOWN:
64895c635eGarrett D'Amore			if (name_section_line(line, ".SH"))
64995c635eGarrett D'Amore				state = STATE_MANSTYLE;
65095c635eGarrett D'Amore			else if (name_section_line(line, ".Sh"))
65195c635eGarrett D'Amore				state = STATE_MDOCNAME;
65295c635eGarrett D'Amore			continue;
65395c635eGarrett D'Amore		/* Inside an old-style .SH NAME section */
65495c635eGarrett D'Amore		case STATE_MANSTYLE:
65595c635eGarrett D'Amore			if (strncmp(line, ".SH", 3) == 0 ||
65695c635eGarrett D'Amore			    strncmp(line, ".SS", 3) == 0)
65795c635eGarrett D'Amore				break;
65895c635eGarrett D'Amore			(void) trim_rhs(line);
65995c635eGarrett D'Amore			if (strcmp(line, ".") == 0)
66095c635eGarrett D'Amore				continue;
66195c635eGarrett D'Amore			if (strncmp(line, ".IX", 3) == 0) {
66295c635eGarrett D'Amore				line += 3;
66395c635eGarrett D'Amore				line = skip_spaces(line);
66495c635eGarrett D'Amore			}
66595c635eGarrett D'Amore			process_man_line(line);
66695c635eGarrett D'Amore			continue;
66795c635eGarrett D'Amore		/* Inside a new-style .Sh NAME section (the .Nm part) */
66895c635eGarrett D'Amore		case STATE_MDOCNAME:
66995c635eGarrett D'Amore			(void) trim_rhs(line);
67095c635eGarrett D'Amore			if (strncmp(line, ".Nm", 3) == 0) {
67195c635eGarrett D'Amore				process_mdoc_line(line);
67295c635eGarrett D'Amore				continue;
67395c635eGarrett D'Amore			} else {
67495c635eGarrett D'Amore				if (strcmp(line, ".") == 0)
67595c635eGarrett D'Amore					continue;
67695c635eGarrett D'Amore				sbuf_append(whatis_proto, "- ", 2);
67795c635eGarrett D'Amore				state = STATE_MDOCDESC;
67895c635eGarrett D'Amore			}
67995c635eGarrett D'Amore			/* FALLTHROUGH */
68095c635eGarrett D'Amore		/* Inside a new-style .Sh NAME section (after the .Nm-s) */
68195c635eGarrett D'Amore		case STATE_MDOCDESC:
68295c635eGarrett D'Amore			if (strncmp(line, ".Sh", 3) == 0)
68395c635eGarrett D'Amore				break;
68495c635eGarrett D'Amore			(void) trim_rhs(line);
68595c635eGarrett D'Amore			if (strcmp(line, ".") == 0)
68695c635eGarrett D'Amore				continue;
68795c635eGarrett D'Amore			process_mdoc_line(line);
68895c635eGarrett D'Amore			continue;
68995c635eGarrett D'Amore		}
69095c635eGarrett D'Amore		break;
69195c635eGarrett D'Amore	}
69295c635eGarrett D'Amore	(void) fclose(fp);
69395c635eGarrett D'Amore	sbuf_strip(whatis_proto, " \t.-");
69495c635eGarrett D'Amore	line = sbuf_content(whatis_proto);
69595c635eGarrett D'Amore	/*
69695c635eGarrett D'Amore	 * Line now contains the appropriate data, but without the
69795c635eGarrett D'Amore	 * proper indentation or the section appended to each name.
69895c635eGarrett D'Amore	 */
69995c635eGarrett D'Amore	descr = strstr(line, " - ");
70095c635eGarrett D'Amore	if (descr == NULL) {
70195c635eGarrett D'Amore		descr = strchr(line, ' ');
70295c635eGarrett D'Amore		if (descr == NULL)
70395c635eGarrett D'Amore			return;
70495c635eGarrett D'Amore		*descr++ = '\0';
70595c635eGarrett D'Amore	} else {
70695c635eGarrett D'Amore		*descr = '\0';
70795c635eGarrett D'Amore		descr += 3;
70895c635eGarrett D'Amore	}
70995c635eGarrett D'Amore	names = sl_init();
71095c635eGarrett D'Amore	collect_names(names, line);
71195c635eGarrett D'Amore	sbuf_clear(whatis_final);
71295c635eGarrett D'Amore	if (!sl_find(names, page->name) &&
71395c635eGarrett D'Amore	    no_page_exists(section_dir, names, page->suffix)) {
71495c635eGarrett D'Amore		/*
71595c635eGarrett D'Amore		 * Add the page name since that's the only
71695c635eGarrett D'Amore		 * thing that man(1) will find.
71795c635eGarrett D'Amore		 */
71895c635eGarrett D'Amore		add_whatis_name(page->name, page->suffix);
71995c635eGarrett D'Amore	}
72095c635eGarrett D'Amore	for (i = 0; i < names->sl_cur; i++)
72195c635eGarrett D'Amore		add_whatis_name(names->sl_str[i], page->suffix);
72295c635eGarrett D'Amore	sl_free(names, 0);
72395c635eGarrett D'Amore	/* Remove last ", " */
72495c635eGarrett D'Amore	sbuf_retract(whatis_final, 2);
72595c635eGarrett D'Amore	while (sbuf_length(whatis_final) < INDENT)
72695c635eGarrett D'Amore		sbuf_append(whatis_final, " ", 1);
72795c635eGarrett D'Amore	sbuf_append(whatis_final, " - ", 3);
72895c635eGarrett D'Amore	sbuf_append_str(whatis_final, skip_spaces(descr));
72995c635eGarrett D'Amore	(void) sl_add(whatis_lines, strdup(sbuf_content(whatis_final)));
73095c635eGarrett D'Amore}
73195c635eGarrett D'Amore
73295c635eGarrett D'Amore/*
73395c635eGarrett D'Amore * Sort pages first by inode number, then by name.
73495c635eGarrett D'Amore */
73595c635eGarrett D'Amorestatic int
73695c635eGarrett D'Amorepagesort(const void *a, const void *b)
73795c635eGarrett D'Amore{
73895c635eGarrett D'Amore	const struct page_info *p1 = *(struct page_info * const *) a;
73995c635eGarrett D'Amore	const struct page_info *p2 = *(struct page_info * const *) b;
74095c635eGarrett D'Amore
74195c635eGarrett D'Amore	if (p1->inode == p2->inode)
74295c635eGarrett D'Amore		return (strcmp(p1->name, p2->name));
74395c635eGarrett D'Amore
74495c635eGarrett D'Amore	return (p1->inode - p2->inode);
74595c635eGarrett D'Amore}
74695c635eGarrett D'Amore
74795c635eGarrett D'Amore/*
74895c635eGarrett D'Amore * Process a single man section.
74995c635eGarrett D'Amore */
75095c635eGarrett D'Amorestatic void
75195c635eGarrett D'Amoreprocess_section(char *section_dir)
75295c635eGarrett D'Amore{
75395c635eGarrett D'Amore	struct dirent	**entries;
75495c635eGarrett D'Amore	int		nentries;
75595c635eGarrett D'Amore	struct page_info **pages;
75695c635eGarrett D'Amore	int		npages = 0;
75795c635eGarrett D'Amore	int		i;
75895c635eGarrett D'Amore	ino_t		prev_inode = 0;
75995c635eGarrett D'Amore
76095c635eGarrett D'Amore	/* Scan the man section directory for pages */
76195c635eGarrett D'Amore	nentries = scandir(section_dir, &entries, NULL, alphasort);
76295c635eGarrett D'Amore
76395c635eGarrett D'Amore	/* Collect information about man pages */
76495c635eGarrett D'Amore	pages = (struct page_info **)calloc(nentries,
76595c635eGarrett D'Amore	    sizeof (struct page_info *));
76695c635eGarrett D'Amore	for (i = 0; i < nentries; i++) {
76795c635eGarrett D'Amore		struct page_info *info = new_page_info(section_dir, entries[i]);
76895c635eGarrett D'Amore		if (info != NULL)
76995c635eGarrett D'Amore			pages[npages++] = info;
77095c635eGarrett D'Amore		free(entries[i]);
77195c635eGarrett D'Amore	}
77295c635eGarrett D'Amore	free(entries);
77395c635eGarrett D'Amore	qsort(pages, npages, sizeof (struct page_info *), pagesort);
77495c635eGarrett D'Amore
77595c635eGarrett D'Amore	/* Process each unique page */
77695c635eGarrett D'Amore	for (i = 0; i < npages; i++) {
77795c635eGarrett D'Amore		struct page_info *page = pages[i];
77895c635eGarrett D'Amore		if (page->inode != prev_inode) {
77995c635eGarrett D'Amore			prev_inode = page->inode;
78095c635eGarrett D'Amore			process_page(page, section_dir);
78195c635eGarrett D'Amore		}
78295c635eGarrett D'Amore		free_page_info(page);
78395c635eGarrett D'Amore	}
78495c635eGarrett D'Amore	free(pages);
78595c635eGarrett D'Amore}
78695c635eGarrett D'Amore
78795c635eGarrett D'Amore/*
78895c635eGarrett D'Amore * Return whether the directory entry is a man page section.
78995c635eGarrett D'Amore */
79095c635eGarrett D'Amorestatic int
79195c635eGarrett D'Amoreselect_sections(const struct dirent *entry)
79295c635eGarrett D'Amore{
79395c635eGarrett D'Amore	const char	*p = &entry->d_name[3];
79495c635eGarrett D'Amore
79595c635eGarrett D'Amore	if (strncmp(entry->d_name, "man", 3) != 0)
79695c635eGarrett D'Amore		return (0);
79795c635eGarrett D'Amore	while (*p != '\0') {
79895c635eGarrett D'Amore		if (!isalnum(*p++))
79995c635eGarrett D'Amore			return (0);
80095c635eGarrett D'Amore	}
80195c635eGarrett D'Amore	return (1);
80295c635eGarrett D'Amore}
80395c635eGarrett D'Amore
80495c635eGarrett D'Amore/*
80595c635eGarrett D'Amore * Process a single top-level man directory by finding all the
80695c635eGarrett D'Amore * sub-directories named man* and processing each one in turn.
80795c635eGarrett D'Amore */
80895c635eGarrett D'Amorevoid
80995c635eGarrett D'Amoremwpath(char *path)
81095c635eGarrett D'Amore{
81195c635eGarrett D'Amore	FILE		*fp = NULL;
81295c635eGarrett D'Amore	struct dirent	**entries;
81395c635eGarrett D'Amore	int		nsections;
81495c635eGarrett D'Amore	int		i;
81595c635eGarrett D'Amore
81695c635eGarrett D'Amore	(void) signal(SIGINT, trap_signal);
81795c635eGarrett D'Amore	(void) signal(SIGHUP, trap_signal);
81895c635eGarrett D'Amore	(void) signal(SIGQUIT, trap_signal);
81995c635eGarrett D'Amore	(void) signal(SIGTERM, trap_signal);
82095c635eGarrett D'Amore
82195c635eGarrett D'Amore	whatis_proto = new_sbuf();
82295c635eGarrett D'Amore	whatis_final = new_sbuf();
82395c635eGarrett D'Amore
82495c635eGarrett D'Amore	nsections = scandir(path, &entries, select_sections, alphasort);
82595c635eGarrett D'Amore	if ((fp = open_whatis(path)) == NULL)
82695c635eGarrett D'Amore		return;
82795c635eGarrett D'Amore	for (i = 0; i < nsections; i++) {
82895c635eGarrett D'Amore		char	section_dir[MAXPATHLEN];
82995c635eGarrett D'Amore
83095c635eGarrett D'Amore		(void) snprintf(section_dir, MAXPATHLEN, "%s/%s",
83195c635eGarrett D'Amore		    path, entries[i]->d_name);
83295c635eGarrett D'Amore		process_section(section_dir);
83395c635eGarrett D'Amore		free(entries[i]);
83495c635eGarrett D'Amore	}
83595c635eGarrett D'Amore	free(entries);
83695c635eGarrett D'Amore	finish_whatis(fp, path);
83795c635eGarrett D'Amore}