1/*
2 * Copyright (c) 2002 John Rochester
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer,
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
31 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
32 */
33
34#include <sys/types.h>
35#include <sys/stat.h>
36#include <sys/param.h>
37
38#include <ctype.h>
39#include <dirent.h>
40#include <err.h>
41#include <signal.h>
42#include <stddef.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <string.h>
46#include <unistd.h>
47
48#include "man.h"
49#include "stringlist.h"
50
51
52/* Information collected about each man page in a section */
53struct page_info {
54	char	*filename;
55	char	*name;
56	char	*suffix;
57	ino_t	inode;
58};
59
60/* An expanding string */
61struct sbuf {
62	char	*content;	/* the start of the buffer */
63	char	*end;		/* just past the end of the content */
64	char	*last;		/* the last allocated character */
65};
66
67/* Remove the last amount characters from the sbuf */
68#define	sbuf_retract(sbuf, amount) ((sbuf)->end -= (amount))
69/* Return the length of the sbuf content */
70#define	sbuf_length(sbuf) ((sbuf)->end - (sbuf)->content)
71
72typedef char *edited_copy(char *from, char *to, int length);
73
74/*
75 * While the whatis line is being formed, it is stored in whatis_proto.
76 * When finished, it is reformatted into whatis_final and then appended
77 * to whatis_lines.
78 */
79static struct sbuf	*whatis_proto;
80static struct sbuf	*whatis_final;
81static stringlist	*whatis_lines;	/* collected output lines */
82
83static char tempfile[MAXPATHLEN];	/* path of temporary file, if any */
84
85#define	MDOC_COMMANDS	"ArDvErEvFlLiNmPa"
86
87
88/* Free a struct page_info and its content */
89static void
90free_page_info(struct page_info *info)
91{
92
93	free(info->filename);
94	free(info->name);
95	free(info->suffix);
96	free(info);
97}
98
99/*
100 * Allocate and fill in a new struct page_info given the
101 * name of the man section directory and the dirent of the file.
102 * If the file is not a man page, return NULL.
103 */
104static struct page_info *
105new_page_info(char *dir, struct dirent *dirent)
106{
107	struct page_info *info;
108	int		basename_length;
109	char		*suffix;
110	struct stat	st;
111
112	if ((info = malloc(sizeof (struct page_info))) == NULL)
113		err(1, "malloc");
114	basename_length = strlen(dirent->d_name);
115	suffix = &dirent->d_name[basename_length];
116	if (asprintf(&info->filename, "%s/%s", dir, dirent->d_name) == -1)
117		err(1, "asprintf");
118	for (;;) {
119		if (--suffix == dirent->d_name || !isalnum(*suffix)) {
120			if (*suffix == '.')
121				break;
122			free(info->filename);
123			free(info);
124			return (NULL);
125		}
126	}
127	*suffix++ = '\0';
128	info->name = strdup(dirent->d_name);
129	info->suffix = strdup(suffix);
130	if (stat(info->filename, &st) < 0) {
131		warn("%s", info->filename);
132		free_page_info(info);
133		return (NULL);
134	}
135	if (!S_ISREG(st.st_mode)) {
136		free_page_info(info);
137		return (NULL);
138	}
139	info->inode = st.st_ino;
140	return (info);
141}
142
143/*
144 * Reset sbuf length to 0.
145 */
146static void
147sbuf_clear(struct sbuf *sbuf)
148{
149
150	sbuf->end = sbuf->content;
151}
152
153/*
154 * Allocate a new sbuf.
155 */
156static struct sbuf *
157new_sbuf(void)
158{
159	struct sbuf	*sbuf;
160
161	if ((sbuf = malloc(sizeof (struct sbuf))) == NULL)
162		err(1, "malloc");
163	if ((sbuf->content = (char *)malloc(LINE_ALLOC)) == NULL)
164		err(1, "malloc");
165	sbuf->last = sbuf->content + LINE_ALLOC - 1;
166	sbuf_clear(sbuf);
167
168	return (sbuf);
169}
170
171/*
172 * Ensure that there is enough room in the sbuf
173 * for nchars more characters.
174 */
175static void
176sbuf_need(struct sbuf *sbuf, int nchars)
177{
178	char *new_content;
179	size_t size, cntsize;
180	size_t grow = 128;
181
182	while (grow < nchars) {
183		grow += 128;	/* we grow in chunks of 128 bytes */
184	}
185
186	/* Grow if the buffer isn't big enough */
187	if (sbuf->end + nchars > sbuf->last) {
188		size = sbuf->last + 1 - sbuf->content;
189		size += grow;
190		cntsize = sbuf->end - sbuf->content;
191
192		if ((new_content = realloc(sbuf->content, size)) == NULL) {
193			perror("realloc");
194			if (tempfile[0] != '\0')
195				(void) unlink(tempfile);
196			exit(1);
197		}
198		sbuf->content = new_content;
199		sbuf->end = new_content + cntsize;
200		sbuf->last = new_content + size - 1;
201	}
202}
203
204/*
205 * Append a string of a given length to the sbuf.
206 */
207static void
208sbuf_append(struct sbuf *sbuf, const char *text, int length)
209{
210	if (length > 0) {
211		sbuf_need(sbuf, length);
212		(void) memcpy(sbuf->end, text, length);
213		sbuf->end += length;
214	}
215}
216
217/*
218 * Append a null-terminated string to the sbuf.
219 */
220static void
221sbuf_append_str(struct sbuf *sbuf, char *text)
222{
223
224	sbuf_append(sbuf, text, strlen(text));
225}
226
227/*
228 * Append an edited null-terminated string to the sbuf.
229 */
230static void
231sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy)
232{
233	int	length;
234
235	if ((length = strlen(text)) > 0) {
236		sbuf_need(sbuf, length);
237		sbuf->end = copy(text, sbuf->end, length);
238	}
239}
240
241/*
242 * Strip any of a set of chars from the end of the sbuf.
243 */
244static void
245sbuf_strip(struct sbuf *sbuf, const char *set)
246{
247
248	while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL)
249		sbuf->end--;
250}
251
252/*
253 * Return the null-terminated string built by the sbuf.
254 */
255static char *
256sbuf_content(struct sbuf *sbuf)
257{
258
259	*sbuf->end = '\0';
260	return (sbuf->content);
261}
262
263/*
264 * Return true if no man page exists in the directory with
265 * any of the names in the stringlist.
266 */
267static int
268no_page_exists(char *dir, stringlist *names, char *suffix)
269{
270	char	path[MAXPATHLEN];
271	char	*suffixes[] = { "", ".gz", ".bz2", NULL };
272	size_t	i;
273	int	j;
274
275	for (i = 0; i < names->sl_cur; i++) {
276		for (j = 0; suffixes[j] != NULL; j++) {
277			(void) snprintf(path, MAXPATHLEN, "%s/%s.%s%s",
278			    dir, names->sl_str[i], suffix, suffixes[j]);
279			if (access(path, F_OK) == 0) {
280				return (0);
281			}
282		}
283	}
284	return (1);
285}
286
287/* ARGSUSED sig */
288static void
289trap_signal(int sig)
290{
291
292	if (tempfile[0] != '\0')
293		(void) unlink(tempfile);
294
295	exit(1);
296}
297
298/*
299 * Attempt to open an output file.
300 * Return NULL if unsuccessful.
301 */
302static FILE *
303open_output(char *name)
304{
305	FILE	*output;
306
307	whatis_lines = sl_init();
308	(void) snprintf(tempfile, MAXPATHLEN, "%s.tmp", name);
309	name = tempfile;
310	if ((output = fopen(name, "w")) == NULL) {
311		warn("%s", name);
312		return (NULL);
313	}
314	return (output);
315}
316
317static int
318linesort(const void *a, const void *b)
319{
320
321	return (strcmp((*(const char * const *)a), (*(const char * const *)b)));
322}
323
324/*
325 * Write the unique sorted lines to the output file.
326 */
327static void
328finish_output(FILE *output, char *name)
329{
330	size_t	i;
331	char	*prev = NULL;
332
333	qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof (char *),
334	    linesort);
335	for (i = 0; i < whatis_lines->sl_cur; i++) {
336		char *line = whatis_lines->sl_str[i];
337		if (i > 0 && strcmp(line, prev) == 0)
338			continue;
339		prev = line;
340		(void) fputs(line, output);
341		(void) putc('\n', output);
342	}
343	(void) fclose(output);
344	sl_free(whatis_lines, 1);
345	(void) rename(tempfile, name);
346	(void) unlink(tempfile);
347}
348
349static FILE *
350open_whatis(char *mandir)
351{
352	char	filename[MAXPATHLEN];
353
354	(void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS);
355	return (open_output(filename));
356}
357
358static void
359finish_whatis(FILE *output, char *mandir)
360{
361	char	filename[MAXPATHLEN];
362
363	(void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS);
364	finish_output(output, filename);
365}
366
367/*
368 * Remove trailing spaces from a string, returning a pointer to just
369 * beyond the new last character.
370 */
371static char *
372trim_rhs(char *str)
373{
374	char	*rhs;
375
376	rhs = &str[strlen(str)];
377	while (--rhs > str && isspace(*rhs))
378		;
379	*++rhs = '\0';
380	return (rhs);
381}
382
383/*
384 * Return a pointer to the next non-space character in the string.
385 */
386static char *
387skip_spaces(char *s)
388{
389
390	while (*s != '\0' && isspace(*s))
391		s++;
392
393	return (s);
394}
395
396/*
397 * Return whether the line is of one of the forms:
398 *	.Sh NAME
399 *	.Sh "NAME"
400 *	etc.
401 * assuming that section_start is ".Sh".
402 */
403static int
404name_section_line(char *line, const char *section_start)
405{
406	char		*rhs;
407
408	if (strncmp(line, section_start, 3) != 0)
409		return (0);
410	line = skip_spaces(line + 3);
411	rhs = trim_rhs(line);
412	if (*line == '"') {
413		line++;
414		if (*--rhs == '"')
415			*rhs = '\0';
416	}
417	if (strcmp(line, "NAME") == 0)
418		return (1);
419
420	return (0);
421}
422
423/*
424 * Copy characters while removing the most common nroff/troff markup:
425 *	\(em, \(mi, \s[+-N], \&
426 *	\fF, \f(fo, \f[font]
427 *	\*s, \*(st, \*[stringvar]
428 */
429static char *
430de_nroff_copy(char *from, char *to, int fromlen)
431{
432	char	*from_end = &from[fromlen];
433
434	while (from < from_end) {
435		switch (*from) {
436		case '\\':
437			switch (*++from) {
438			case '(':
439				if (strncmp(&from[1], "em", 2) == 0 ||
440				    strncmp(&from[1], "mi", 2) == 0) {
441					from += 3;
442					continue;
443				}
444				break;
445			case 's':
446				if (*++from == '-')
447					from++;
448				while (isdigit(*from))
449					from++;
450				continue;
451			case 'f':
452			case '*':
453				if (*++from == '(') {
454					from += 3;
455				} else if (*from == '[') {
456					while (*++from != ']' &&
457					    from < from_end)
458						;
459					from++;
460				} else {
461					from++;
462				}
463				continue;
464			case '&':
465				from++;
466				continue;
467			}
468			break;
469		}
470		*to++ = *from++;
471	}
472	return (to);
473}
474
475/*
476 * Append a string with the nroff formatting removed.
477 */
478static void
479add_nroff(char *text)
480{
481
482	sbuf_append_edited(whatis_proto, text, de_nroff_copy);
483}
484
485/*
486 * Appends "name(suffix), " to whatis_final
487 */
488static void
489add_whatis_name(char *name, char *suffix)
490{
491
492	if (*name != '\0') {
493		sbuf_append_str(whatis_final, name);
494		sbuf_append(whatis_final, "(", 1);
495		sbuf_append_str(whatis_final, suffix);
496		sbuf_append(whatis_final, "), ", 3);
497	}
498}
499
500/*
501 * Processes an old-style man(7) line. This ignores commands with only
502 * a single number argument.
503 */
504static void
505process_man_line(char *line)
506{
507	char	*p;
508
509	if (*line == '.') {
510		while (isalpha(*++line))
511			;
512		p = line = skip_spaces(line);
513		while (*p != '\0') {
514			if (!isdigit(*p))
515				break;
516			p++;
517		}
518		if (*p == '\0')
519			return;
520	} else
521		line = skip_spaces(line);
522	if (*line != '\0') {
523		add_nroff(line);
524		sbuf_append(whatis_proto, " ", 1);
525	}
526}
527
528/*
529 * Processes a new-style mdoc(7) line.
530 */
531static void
532process_mdoc_line(char *line)
533{
534	int	xref;
535	int	arg = 0;
536	char	*line_end = &line[strlen(line)];
537	int	orig_length = sbuf_length(whatis_proto);
538	char	*next;
539
540	if (*line == '\0')
541		return;
542	if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) {
543		add_nroff(skip_spaces(line));
544		sbuf_append(whatis_proto, " ", 1);
545		return;
546	}
547	xref = strncmp(line, ".Xr", 3) == 0;
548	line += 3;
549	while ((line = skip_spaces(line)) < line_end) {
550		if (*line == '"') {
551			next = ++line;
552			for (;;) {
553				next = strchr(next, '"');
554				if (next == NULL)
555					break;
556				(void) memmove(next, next + 1, strlen(next));
557				line_end--;
558				if (*next != '"')
559					break;
560				next++;
561			}
562		} else {
563			next = strpbrk(line, " \t");
564		}
565		if (next != NULL)
566			*next++ = '\0';
567		else
568			next = line_end;
569		if (isupper(*line) && islower(line[1]) && line[2] == '\0') {
570			if (strcmp(line, "Ns") == 0) {
571				arg = 0;
572				line = next;
573				continue;
574			}
575			if (strstr(line, MDOC_COMMANDS) != NULL) {
576				line = next;
577				continue;
578			}
579		}
580		if (arg > 0 && strchr(",.:;?!)]", *line) == 0) {
581			if (xref) {
582				sbuf_append(whatis_proto, "(", 1);
583				add_nroff(line);
584				sbuf_append(whatis_proto, ")", 1);
585				xref = 0;
586			} else {
587				sbuf_append(whatis_proto, " ", 1);
588			}
589		}
590		add_nroff(line);
591		arg++;
592		line = next;
593	}
594	if (sbuf_length(whatis_proto) > orig_length)
595		sbuf_append(whatis_proto, " ", 1);
596}
597
598/*
599 * Collect a list of comma-separated names from the text.
600 */
601static void
602collect_names(stringlist *names, char *text)
603{
604	char	*arg;
605
606	for (;;) {
607		arg = text;
608		text = strchr(text, ',');
609		if (text != NULL)
610			*text++ = '\0';
611		(void) sl_add(names, arg);
612		if (text == NULL)
613			return;
614		if (*text == ' ')
615			text++;
616	}
617}
618
619enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC };
620
621/*
622 * Process a man page source into a single whatis line and add it
623 * to whatis_lines.
624 */
625static void
626process_page(struct page_info *page, char *section_dir)
627{
628	FILE		*fp;
629	stringlist	*names;
630	char		*descr;
631	int		state = STATE_UNKNOWN;
632	size_t		i;
633	char		*line = NULL;
634	size_t		linecap = 0;
635
636	sbuf_clear(whatis_proto);
637	if ((fp = fopen(page->filename, "r")) == NULL) {
638		warn("%s", page->filename);
639		return;
640	}
641	while (getline(&line, &linecap, fp) > 0) {
642		/* Skip comments */
643		if (strncmp(line, ".\\\"", 3) == 0)
644			continue;
645		switch (state) {
646		/* Haven't reached the NAME section yet */
647		case STATE_UNKNOWN:
648			if (name_section_line(line, ".SH"))
649				state = STATE_MANSTYLE;
650			else if (name_section_line(line, ".Sh"))
651				state = STATE_MDOCNAME;
652			continue;
653		/* Inside an old-style .SH NAME section */
654		case STATE_MANSTYLE:
655			if (strncmp(line, ".SH", 3) == 0 ||
656			    strncmp(line, ".SS", 3) == 0)
657				break;
658			(void) trim_rhs(line);
659			if (strcmp(line, ".") == 0)
660				continue;
661			if (strncmp(line, ".IX", 3) == 0) {
662				line += 3;
663				line = skip_spaces(line);
664			}
665			process_man_line(line);
666			continue;
667		/* Inside a new-style .Sh NAME section (the .Nm part) */
668		case STATE_MDOCNAME:
669			(void) trim_rhs(line);
670			if (strncmp(line, ".Nm", 3) == 0) {
671				process_mdoc_line(line);
672				continue;
673			} else {
674				if (strcmp(line, ".") == 0)
675					continue;
676				sbuf_append(whatis_proto, "- ", 2);
677				state = STATE_MDOCDESC;
678			}
679			/* FALLTHROUGH */
680		/* Inside a new-style .Sh NAME section (after the .Nm-s) */
681		case STATE_MDOCDESC:
682			if (strncmp(line, ".Sh", 3) == 0)
683				break;
684			(void) trim_rhs(line);
685			if (strcmp(line, ".") == 0)
686				continue;
687			process_mdoc_line(line);
688			continue;
689		}
690		break;
691	}
692	(void) fclose(fp);
693	sbuf_strip(whatis_proto, " \t.-");
694	line = sbuf_content(whatis_proto);
695	/*
696	 * Line now contains the appropriate data, but without the
697	 * proper indentation or the section appended to each name.
698	 */
699	descr = strstr(line, " - ");
700	if (descr == NULL) {
701		descr = strchr(line, ' ');
702		if (descr == NULL)
703			return;
704		*descr++ = '\0';
705	} else {
706		*descr = '\0';
707		descr += 3;
708	}
709	names = sl_init();
710	collect_names(names, line);
711	sbuf_clear(whatis_final);
712	if (!sl_find(names, page->name) &&
713	    no_page_exists(section_dir, names, page->suffix)) {
714		/*
715		 * Add the page name since that's the only
716		 * thing that man(1) will find.
717		 */
718		add_whatis_name(page->name, page->suffix);
719	}
720	for (i = 0; i < names->sl_cur; i++)
721		add_whatis_name(names->sl_str[i], page->suffix);
722	sl_free(names, 0);
723	/* Remove last ", " */
724	sbuf_retract(whatis_final, 2);
725	while (sbuf_length(whatis_final) < INDENT)
726		sbuf_append(whatis_final, " ", 1);
727	sbuf_append(whatis_final, " - ", 3);
728	sbuf_append_str(whatis_final, skip_spaces(descr));
729	(void) sl_add(whatis_lines, strdup(sbuf_content(whatis_final)));
730}
731
732/*
733 * Sort pages first by inode number, then by name.
734 */
735static int
736pagesort(const void *a, const void *b)
737{
738	const struct page_info *p1 = *(struct page_info * const *) a;
739	const struct page_info *p2 = *(struct page_info * const *) b;
740
741	if (p1->inode == p2->inode)
742		return (strcmp(p1->name, p2->name));
743
744	return (p1->inode - p2->inode);
745}
746
747/*
748 * Process a single man section.
749 */
750static void
751process_section(char *section_dir)
752{
753	struct dirent	**entries;
754	int		nentries;
755	struct page_info **pages;
756	int		npages = 0;
757	int		i;
758	ino_t		prev_inode = 0;
759
760	/* Scan the man section directory for pages */
761	nentries = scandir(section_dir, &entries, NULL, alphasort);
762
763	/* Collect information about man pages */
764	pages = (struct page_info **)calloc(nentries,
765	    sizeof (struct page_info *));
766	for (i = 0; i < nentries; i++) {
767		struct page_info *info = new_page_info(section_dir, entries[i]);
768		if (info != NULL)
769			pages[npages++] = info;
770		free(entries[i]);
771	}
772	free(entries);
773	qsort(pages, npages, sizeof (struct page_info *), pagesort);
774
775	/* Process each unique page */
776	for (i = 0; i < npages; i++) {
777		struct page_info *page = pages[i];
778		if (page->inode != prev_inode) {
779			prev_inode = page->inode;
780			process_page(page, section_dir);
781		}
782		free_page_info(page);
783	}
784	free(pages);
785}
786
787/*
788 * Return whether the directory entry is a man page section.
789 */
790static int
791select_sections(const struct dirent *entry)
792{
793	const char	*p = &entry->d_name[3];
794
795	if (strncmp(entry->d_name, "man", 3) != 0)
796		return (0);
797	while (*p != '\0') {
798		if (!isalnum(*p++))
799			return (0);
800	}
801	return (1);
802}
803
804/*
805 * Process a single top-level man directory by finding all the
806 * sub-directories named man* and processing each one in turn.
807 */
808void
809mwpath(char *path)
810{
811	FILE		*fp = NULL;
812	struct dirent	**entries;
813	int		nsections;
814	int		i;
815
816	(void) signal(SIGINT, trap_signal);
817	(void) signal(SIGHUP, trap_signal);
818	(void) signal(SIGQUIT, trap_signal);
819	(void) signal(SIGTERM, trap_signal);
820
821	whatis_proto = new_sbuf();
822	whatis_final = new_sbuf();
823
824	nsections = scandir(path, &entries, select_sections, alphasort);
825	if ((fp = open_whatis(path)) == NULL)
826		return;
827	for (i = 0; i < nsections; i++) {
828		char	section_dir[MAXPATHLEN];
829
830		(void) snprintf(section_dir, MAXPATHLEN, "%s/%s",
831		    path, entries[i]->d_name);
832		process_section(section_dir);
833		free(entries[i]);
834	}
835	free(entries);
836	finish_whatis(fp, path);
837}
838