xref: /illumos-gate/usr/src/cmd/mandoc/read.c (revision c66b8046)
1 /*	$Id: read.c,v 1.192 2017/07/20 14:36:36 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include "config.h"
20 
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #include <sys/stat.h>
24 
25 #include <assert.h>
26 #include <ctype.h>
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <stdarg.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <zlib.h>
35 
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "libmandoc.h"
42 
43 #define	REPARSE_LIMIT	1000
44 
45 struct	mparse {
46 	struct roff	 *roff; /* roff parser (!NULL) */
47 	struct roff_man	 *man; /* man parser */
48 	char		 *sodest; /* filename pointed to by .so */
49 	const char	 *file; /* filename of current input file */
50 	struct buf	 *primary; /* buffer currently being parsed */
51 	struct buf	 *secondary; /* preprocessed copy of input */
52 	const char	 *os_s; /* default operating system */
53 	mandocmsg	  mmsg; /* warning/error message handler */
54 	enum mandoclevel  file_status; /* status of current parse */
55 	enum mandocerr	  mmin; /* ignore messages below this */
56 	int		  options; /* parser options */
57 	int		  gzip; /* current input file is gzipped */
58 	int		  filenc; /* encoding of the current file */
59 	int		  reparse_count; /* finite interp. stack */
60 	int		  line; /* line number in the file */
61 };
62 
63 static	void	  choose_parser(struct mparse *);
64 static	void	  resize_buf(struct buf *, size_t);
65 static	int	  mparse_buf_r(struct mparse *, struct buf, size_t, int);
66 static	int	  read_whole_file(struct mparse *, const char *, int,
67 				struct buf *, int *);
68 static	void	  mparse_end(struct mparse *);
69 static	void	  mparse_parse_buffer(struct mparse *, struct buf,
70 			const char *);
71 
72 static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
73 	MANDOCERR_OK,
74 	MANDOCERR_OK,
75 	MANDOCERR_WARNING,
76 	MANDOCERR_ERROR,
77 	MANDOCERR_UNSUPP,
78 	MANDOCERR_MAX,
79 	MANDOCERR_MAX
80 };
81 
82 static	const char * const	mandocerrs[MANDOCERR_MAX] = {
83 	"ok",
84 
85 	"base system convention",
86 
87 	"Mdocdate found",
88 	"Mdocdate missing",
89 	"unknown architecture",
90 	"operating system explicitly specified",
91 	"RCS id missing",
92 	"referenced manual not found",
93 
94 	"generic style suggestion",
95 
96 	"legacy man(7) date format",
97 	"lower case character in document title",
98 	"duplicate RCS id",
99 	"typo in section name",
100 	"unterminated quoted argument",
101 	"useless macro",
102 	"consider using OS macro",
103 	"errnos out of order",
104 	"duplicate errno",
105 	"trailing delimiter",
106 	"no blank before trailing delimiter",
107 	"fill mode already enabled, skipping",
108 	"fill mode already disabled, skipping",
109 	"function name without markup",
110 	"whitespace at end of input line",
111 	"bad comment style",
112 
113 	"generic warning",
114 
115 	/* related to the prologue */
116 	"missing manual title, using UNTITLED",
117 	"missing manual title, using \"\"",
118 	"missing manual section, using \"\"",
119 	"unknown manual section",
120 	"missing date, using today's date",
121 	"cannot parse date, using it verbatim",
122 	"date in the future, using it anyway",
123 	"missing Os macro, using \"\"",
124 	"late prologue macro",
125 	"prologue macros out of order",
126 
127 	/* related to document structure */
128 	".so is fragile, better use ln(1)",
129 	"no document body",
130 	"content before first section header",
131 	"first section is not \"NAME\"",
132 	"NAME section without Nm before Nd",
133 	"NAME section without description",
134 	"description not at the end of NAME",
135 	"bad NAME section content",
136 	"missing comma before name",
137 	"missing description line, using \"\"",
138 	"description line outside NAME section",
139 	"sections out of conventional order",
140 	"duplicate section title",
141 	"unexpected section",
142 	"cross reference to self",
143 	"unusual Xr order",
144 	"unusual Xr punctuation",
145 	"AUTHORS section without An macro",
146 
147 	/* related to macros and nesting */
148 	"obsolete macro",
149 	"macro neither callable nor escaped",
150 	"skipping paragraph macro",
151 	"moving paragraph macro out of list",
152 	"skipping no-space macro",
153 	"blocks badly nested",
154 	"nested displays are not portable",
155 	"moving content out of list",
156 	"first macro on line",
157 	"line scope broken",
158 	"skipping blank line in line scope",
159 
160 	/* related to missing macro arguments */
161 	"skipping empty request",
162 	"conditional request controls empty scope",
163 	"skipping empty macro",
164 	"empty block",
165 	"empty argument, using 0n",
166 	"missing display type, using -ragged",
167 	"list type is not the first argument",
168 	"missing -width in -tag list, using 6n",
169 	"missing utility name, using \"\"",
170 	"missing function name, using \"\"",
171 	"empty head in list item",
172 	"empty list item",
173 	"missing argument, using next line",
174 	"missing font type, using \\fR",
175 	"unknown font type, using \\fR",
176 	"nothing follows prefix",
177 	"empty reference block",
178 	"missing section argument",
179 	"missing -std argument, adding it",
180 	"missing option string, using \"\"",
181 	"missing resource identifier, using \"\"",
182 	"missing eqn box, using \"\"",
183 
184 	/* related to bad macro arguments */
185 	"duplicate argument",
186 	"skipping duplicate argument",
187 	"skipping duplicate display type",
188 	"skipping duplicate list type",
189 	"skipping -width argument",
190 	"wrong number of cells",
191 	"unknown AT&T UNIX version",
192 	"comma in function argument",
193 	"parenthesis in function name",
194 	"unknown library name",
195 	"invalid content in Rs block",
196 	"invalid Boolean argument",
197 	"unknown font, skipping request",
198 	"odd number of characters in request",
199 
200 	/* related to plain text */
201 	"blank line in fill mode, using .sp",
202 	"tab in filled text",
203 	"new sentence, new line",
204 	"invalid escape sequence",
205 	"undefined string, using \"\"",
206 
207 	/* related to tables */
208 	"tbl line starts with span",
209 	"tbl column starts with span",
210 	"skipping vertical bar in tbl layout",
211 
212 	"generic error",
213 
214 	/* related to tables */
215 	"non-alphabetic character in tbl options",
216 	"skipping unknown tbl option",
217 	"missing tbl option argument",
218 	"wrong tbl option argument size",
219 	"empty tbl layout",
220 	"invalid character in tbl layout",
221 	"unmatched parenthesis in tbl layout",
222 	"tbl without any data cells",
223 	"ignoring data in spanned tbl cell",
224 	"ignoring extra tbl data cells",
225 	"data block open at end of tbl",
226 
227 	/* related to document structure and macros */
228 	NULL,
229 	"duplicate prologue macro",
230 	"skipping late title macro",
231 	"input stack limit exceeded, infinite loop?",
232 	"skipping bad character",
233 	"skipping unknown macro",
234 	"skipping insecure request",
235 	"skipping item outside list",
236 	"skipping column outside column list",
237 	"skipping end of block that is not open",
238 	"fewer RS blocks open, skipping",
239 	"inserting missing end of block",
240 	"appending missing end of block",
241 
242 	/* related to request and macro arguments */
243 	"escaped character not allowed in a name",
244 	"NOT IMPLEMENTED: Bd -file",
245 	"skipping display without arguments",
246 	"missing list type, using -item",
247 	"argument is not numeric, using 1",
248 	"missing manual name, using \"\"",
249 	"uname(3) system call failed, using UNKNOWN",
250 	"unknown standard specifier",
251 	"skipping request without numeric argument",
252 	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
253 	".so request failed",
254 	"skipping all arguments",
255 	"skipping excess arguments",
256 	"divide by zero",
257 
258 	"unsupported feature",
259 	"input too large",
260 	"unsupported control character",
261 	"unsupported roff request",
262 	"eqn delim option in tbl",
263 	"unsupported tbl layout modifier",
264 	"ignoring macro in table",
265 };
266 
267 static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
268 	"SUCCESS",
269 	"STYLE",
270 	"WARNING",
271 	"ERROR",
272 	"UNSUPP",
273 	"BADARG",
274 	"SYSERR"
275 };
276 
277 
278 static void
279 resize_buf(struct buf *buf, size_t initial)
280 {
281 
282 	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
283 	buf->buf = mandoc_realloc(buf->buf, buf->sz);
284 }
285 
286 static void
287 choose_parser(struct mparse *curp)
288 {
289 	char		*cp, *ep;
290 	int		 format;
291 
292 	/*
293 	 * If neither command line arguments -mdoc or -man select
294 	 * a parser nor the roff parser found a .Dd or .TH macro
295 	 * yet, look ahead in the main input buffer.
296 	 */
297 
298 	if ((format = roff_getformat(curp->roff)) == 0) {
299 		cp = curp->primary->buf;
300 		ep = cp + curp->primary->sz;
301 		while (cp < ep) {
302 			if (*cp == '.' || *cp == '\'') {
303 				cp++;
304 				if (cp[0] == 'D' && cp[1] == 'd') {
305 					format = MPARSE_MDOC;
306 					break;
307 				}
308 				if (cp[0] == 'T' && cp[1] == 'H') {
309 					format = MPARSE_MAN;
310 					break;
311 				}
312 			}
313 			cp = memchr(cp, '\n', ep - cp);
314 			if (cp == NULL)
315 				break;
316 			cp++;
317 		}
318 	}
319 
320 	if (format == MPARSE_MDOC) {
321 		curp->man->macroset = MACROSET_MDOC;
322 		if (curp->man->mdocmac == NULL)
323 			curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
324 	} else {
325 		curp->man->macroset = MACROSET_MAN;
326 		if (curp->man->manmac == NULL)
327 			curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
328 	}
329 	curp->man->first->tok = TOKEN_NONE;
330 }
331 
332 /*
333  * Main parse routine for a buffer.
334  * It assumes encoding and line numbering are already set up.
335  * It can recurse directly (for invocations of user-defined
336  * macros, inline equations, and input line traps)
337  * and indirectly (for .so file inclusion).
338  */
339 static int
340 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
341 {
342 	struct buf	 ln;
343 	const char	*save_file;
344 	char		*cp;
345 	size_t		 pos; /* byte number in the ln buffer */
346 	enum rofferr	 rr;
347 	int		 of;
348 	int		 lnn; /* line number in the real file */
349 	int		 fd;
350 	unsigned char	 c;
351 
352 	memset(&ln, 0, sizeof(ln));
353 
354 	lnn = curp->line;
355 	pos = 0;
356 
357 	while (i < blk.sz) {
358 		if (0 == pos && '\0' == blk.buf[i])
359 			break;
360 
361 		if (start) {
362 			curp->line = lnn;
363 			curp->reparse_count = 0;
364 
365 			if (lnn < 3 &&
366 			    curp->filenc & MPARSE_UTF8 &&
367 			    curp->filenc & MPARSE_LATIN1)
368 				curp->filenc = preconv_cue(&blk, i);
369 		}
370 
371 		while (i < blk.sz && (start || blk.buf[i] != '\0')) {
372 
373 			/*
374 			 * When finding an unescaped newline character,
375 			 * leave the character loop to process the line.
376 			 * Skip a preceding carriage return, if any.
377 			 */
378 
379 			if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
380 			    '\n' == blk.buf[i + 1])
381 				++i;
382 			if ('\n' == blk.buf[i]) {
383 				++i;
384 				++lnn;
385 				break;
386 			}
387 
388 			/*
389 			 * Make sure we have space for the worst
390 			 * case of 11 bytes: "\\[u10ffff]\0"
391 			 */
392 
393 			if (pos + 11 > ln.sz)
394 				resize_buf(&ln, 256);
395 
396 			/*
397 			 * Encode 8-bit input.
398 			 */
399 
400 			c = blk.buf[i];
401 			if (c & 0x80) {
402 				if ( ! (curp->filenc && preconv_encode(
403 				    &blk, &i, &ln, &pos, &curp->filenc))) {
404 					mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
405 					    curp->line, pos, "0x%x", c);
406 					ln.buf[pos++] = '?';
407 					i++;
408 				}
409 				continue;
410 			}
411 
412 			/*
413 			 * Exclude control characters.
414 			 */
415 
416 			if (c == 0x7f || (c < 0x20 && c != 0x09)) {
417 				mandoc_vmsg(c == 0x00 || c == 0x04 ||
418 				    c > 0x0a ? MANDOCERR_CHAR_BAD :
419 				    MANDOCERR_CHAR_UNSUPP,
420 				    curp, curp->line, pos, "0x%x", c);
421 				i++;
422 				if (c != '\r')
423 					ln.buf[pos++] = '?';
424 				continue;
425 			}
426 
427 			ln.buf[pos++] = blk.buf[i++];
428 		}
429 
430 		if (pos + 1 >= ln.sz)
431 			resize_buf(&ln, 256);
432 
433 		if (i == blk.sz || blk.buf[i] == '\0')
434 			ln.buf[pos++] = '\n';
435 		ln.buf[pos] = '\0';
436 
437 		/*
438 		 * A significant amount of complexity is contained by
439 		 * the roff preprocessor.  It's line-oriented but can be
440 		 * expressed on one line, so we need at times to
441 		 * readjust our starting point and re-run it.  The roff
442 		 * preprocessor can also readjust the buffers with new
443 		 * data, so we pass them in wholesale.
444 		 */
445 
446 		of = 0;
447 
448 		/*
449 		 * Maintain a lookaside buffer of all parsed lines.  We
450 		 * only do this if mparse_keep() has been invoked (the
451 		 * buffer may be accessed with mparse_getkeep()).
452 		 */
453 
454 		if (curp->secondary) {
455 			curp->secondary->buf = mandoc_realloc(
456 			    curp->secondary->buf,
457 			    curp->secondary->sz + pos + 2);
458 			memcpy(curp->secondary->buf +
459 			    curp->secondary->sz,
460 			    ln.buf, pos);
461 			curp->secondary->sz += pos;
462 			curp->secondary->buf
463 				[curp->secondary->sz] = '\n';
464 			curp->secondary->sz++;
465 			curp->secondary->buf
466 				[curp->secondary->sz] = '\0';
467 		}
468 rerun:
469 		rr = roff_parseln(curp->roff, curp->line, &ln, &of);
470 
471 		switch (rr) {
472 		case ROFF_REPARSE:
473 			if (++curp->reparse_count > REPARSE_LIMIT)
474 				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
475 				    curp->line, pos, NULL);
476 			else if (mparse_buf_r(curp, ln, of, 0) == 1 ||
477 			    start == 1) {
478 				pos = 0;
479 				continue;
480 			}
481 			free(ln.buf);
482 			return 0;
483 		case ROFF_APPEND:
484 			pos = strlen(ln.buf);
485 			continue;
486 		case ROFF_RERUN:
487 			goto rerun;
488 		case ROFF_IGN:
489 			pos = 0;
490 			continue;
491 		case ROFF_SO:
492 			if ( ! (curp->options & MPARSE_SO) &&
493 			    (i >= blk.sz || blk.buf[i] == '\0')) {
494 				curp->sodest = mandoc_strdup(ln.buf + of);
495 				free(ln.buf);
496 				return 1;
497 			}
498 			/*
499 			 * We remove `so' clauses from our lookaside
500 			 * buffer because we're going to descend into
501 			 * the file recursively.
502 			 */
503 			if (curp->secondary)
504 				curp->secondary->sz -= pos + 1;
505 			save_file = curp->file;
506 			if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
507 				mparse_readfd(curp, fd, ln.buf + of);
508 				close(fd);
509 				curp->file = save_file;
510 			} else {
511 				curp->file = save_file;
512 				mandoc_vmsg(MANDOCERR_SO_FAIL,
513 				    curp, curp->line, pos,
514 				    ".so %s", ln.buf + of);
515 				ln.sz = mandoc_asprintf(&cp,
516 				    ".sp\nSee the file %s.\n.sp",
517 				    ln.buf + of);
518 				free(ln.buf);
519 				ln.buf = cp;
520 				of = 0;
521 				mparse_buf_r(curp, ln, of, 0);
522 			}
523 			pos = 0;
524 			continue;
525 		default:
526 			break;
527 		}
528 
529 		if (curp->man->macroset == MACROSET_NONE)
530 			choose_parser(curp);
531 
532 		if ((curp->man->macroset == MACROSET_MDOC ?
533 		    mdoc_parseln(curp->man, curp->line, ln.buf, of) :
534 		    man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
535 				break;
536 
537 		/* Temporary buffers typically are not full. */
538 
539 		if (0 == start && '\0' == blk.buf[i])
540 			break;
541 
542 		/* Start the next input line. */
543 
544 		pos = 0;
545 	}
546 
547 	free(ln.buf);
548 	return 1;
549 }
550 
551 static int
552 read_whole_file(struct mparse *curp, const char *file, int fd,
553 		struct buf *fb, int *with_mmap)
554 {
555 	struct stat	 st;
556 	gzFile		 gz;
557 	size_t		 off;
558 	ssize_t		 ssz;
559 
560 	if (fstat(fd, &st) == -1) {
561 		mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
562 		    "fstat: %s", strerror(errno));
563 		return 0;
564 	}
565 
566 	/*
567 	 * If we're a regular file, try just reading in the whole entry
568 	 * via mmap().  This is faster than reading it into blocks, and
569 	 * since each file is only a few bytes to begin with, I'm not
570 	 * concerned that this is going to tank any machines.
571 	 */
572 
573 	if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
574 		if (st.st_size > 0x7fffffff) {
575 			mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
576 			return 0;
577 		}
578 		*with_mmap = 1;
579 		fb->sz = (size_t)st.st_size;
580 		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
581 		if (fb->buf != MAP_FAILED)
582 			return 1;
583 	}
584 
585 	if (curp->gzip) {
586 		if ((gz = gzdopen(fd, "rb")) == NULL) {
587 			mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
588 			    "gzdopen: %s", strerror(errno));
589 			return 0;
590 		}
591 	} else
592 		gz = NULL;
593 
594 	/*
595 	 * If this isn't a regular file (like, say, stdin), then we must
596 	 * go the old way and just read things in bit by bit.
597 	 */
598 
599 	*with_mmap = 0;
600 	off = 0;
601 	fb->sz = 0;
602 	fb->buf = NULL;
603 	for (;;) {
604 		if (off == fb->sz) {
605 			if (fb->sz == (1U << 31)) {
606 				mandoc_msg(MANDOCERR_TOOLARGE, curp,
607 				    0, 0, NULL);
608 				break;
609 			}
610 			resize_buf(fb, 65536);
611 		}
612 		ssz = curp->gzip ?
613 		    gzread(gz, fb->buf + (int)off, fb->sz - off) :
614 		    read(fd, fb->buf + (int)off, fb->sz - off);
615 		if (ssz == 0) {
616 			fb->sz = off;
617 			return 1;
618 		}
619 		if (ssz == -1) {
620 			mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
621 			    "read: %s", strerror(errno));
622 			break;
623 		}
624 		off += (size_t)ssz;
625 	}
626 
627 	free(fb->buf);
628 	fb->buf = NULL;
629 	return 0;
630 }
631 
632 static void
633 mparse_end(struct mparse *curp)
634 {
635 	if (curp->man->macroset == MACROSET_NONE)
636 		curp->man->macroset = MACROSET_MAN;
637 	if (curp->man->macroset == MACROSET_MDOC)
638 		mdoc_endparse(curp->man);
639 	else
640 		man_endparse(curp->man);
641 	roff_endparse(curp->roff);
642 }
643 
644 static void
645 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
646 {
647 	struct buf	*svprimary;
648 	const char	*svfile;
649 	size_t		 offset;
650 	static int	 recursion_depth;
651 
652 	if (64 < recursion_depth) {
653 		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
654 		return;
655 	}
656 
657 	/* Line number is per-file. */
658 	svfile = curp->file;
659 	curp->file = file;
660 	svprimary = curp->primary;
661 	curp->primary = &blk;
662 	curp->line = 1;
663 	recursion_depth++;
664 
665 	/* Skip an UTF-8 byte order mark. */
666 	if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
667 	    (unsigned char)blk.buf[0] == 0xef &&
668 	    (unsigned char)blk.buf[1] == 0xbb &&
669 	    (unsigned char)blk.buf[2] == 0xbf) {
670 		offset = 3;
671 		curp->filenc &= ~MPARSE_LATIN1;
672 	} else
673 		offset = 0;
674 
675 	mparse_buf_r(curp, blk, offset, 1);
676 
677 	if (--recursion_depth == 0)
678 		mparse_end(curp);
679 
680 	curp->primary = svprimary;
681 	curp->file = svfile;
682 }
683 
684 enum mandoclevel
685 mparse_readmem(struct mparse *curp, void *buf, size_t len,
686 		const char *file)
687 {
688 	struct buf blk;
689 
690 	blk.buf = buf;
691 	blk.sz = len;
692 
693 	mparse_parse_buffer(curp, blk, file);
694 	return curp->file_status;
695 }
696 
697 /*
698  * Read the whole file into memory and call the parsers.
699  * Called recursively when an .so request is encountered.
700  */
701 enum mandoclevel
702 mparse_readfd(struct mparse *curp, int fd, const char *file)
703 {
704 	struct buf	 blk;
705 	int		 with_mmap;
706 	int		 save_filenc;
707 
708 	if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
709 		save_filenc = curp->filenc;
710 		curp->filenc = curp->options &
711 		    (MPARSE_UTF8 | MPARSE_LATIN1);
712 		mparse_parse_buffer(curp, blk, file);
713 		curp->filenc = save_filenc;
714 		if (with_mmap)
715 			munmap(blk.buf, blk.sz);
716 		else
717 			free(blk.buf);
718 	}
719 	return curp->file_status;
720 }
721 
722 int
723 mparse_open(struct mparse *curp, const char *file)
724 {
725 	char		 *cp;
726 	int		  fd;
727 
728 	curp->file = file;
729 	cp = strrchr(file, '.');
730 	curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
731 
732 	/* First try to use the filename as it is. */
733 
734 	if ((fd = open(file, O_RDONLY)) != -1)
735 		return fd;
736 
737 	/*
738 	 * If that doesn't work and the filename doesn't
739 	 * already  end in .gz, try appending .gz.
740 	 */
741 
742 	if ( ! curp->gzip) {
743 		mandoc_asprintf(&cp, "%s.gz", file);
744 		fd = open(cp, O_RDONLY);
745 		free(cp);
746 		if (fd != -1) {
747 			curp->gzip = 1;
748 			return fd;
749 		}
750 	}
751 
752 	/* Neither worked, give up. */
753 
754 	mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
755 	return -1;
756 }
757 
758 struct mparse *
759 mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg,
760     enum mandoc_os os_e, const char *os_s)
761 {
762 	struct mparse	*curp;
763 
764 	curp = mandoc_calloc(1, sizeof(struct mparse));
765 
766 	curp->options = options;
767 	curp->mmin = mmin;
768 	curp->mmsg = mmsg;
769 	curp->os_s = os_s;
770 
771 	curp->roff = roff_alloc(curp, options);
772 	curp->man = roff_man_alloc(curp->roff, curp, curp->os_s,
773 		curp->options & MPARSE_QUICK ? 1 : 0);
774 	if (curp->options & MPARSE_MDOC) {
775 		curp->man->macroset = MACROSET_MDOC;
776 		if (curp->man->mdocmac == NULL)
777 			curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
778 	} else if (curp->options & MPARSE_MAN) {
779 		curp->man->macroset = MACROSET_MAN;
780 		if (curp->man->manmac == NULL)
781 			curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
782 	}
783 	curp->man->first->tok = TOKEN_NONE;
784 	curp->man->meta.os_e = os_e;
785 	return curp;
786 }
787 
788 void
789 mparse_reset(struct mparse *curp)
790 {
791 	roff_reset(curp->roff);
792 	roff_man_reset(curp->man);
793 
794 	free(curp->sodest);
795 	curp->sodest = NULL;
796 
797 	if (curp->secondary)
798 		curp->secondary->sz = 0;
799 
800 	curp->file_status = MANDOCLEVEL_OK;
801 	curp->gzip = 0;
802 }
803 
804 void
805 mparse_free(struct mparse *curp)
806 {
807 
808 	roffhash_free(curp->man->mdocmac);
809 	roffhash_free(curp->man->manmac);
810 	roff_man_free(curp->man);
811 	roff_free(curp->roff);
812 	if (curp->secondary)
813 		free(curp->secondary->buf);
814 
815 	free(curp->secondary);
816 	free(curp->sodest);
817 	free(curp);
818 }
819 
820 void
821 mparse_result(struct mparse *curp, struct roff_man **man,
822 	char **sodest)
823 {
824 
825 	if (sodest && NULL != (*sodest = curp->sodest)) {
826 		*man = NULL;
827 		return;
828 	}
829 	if (man)
830 		*man = curp->man;
831 }
832 
833 void
834 mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
835 {
836 	if (curp->file_status > *rc)
837 		*rc = curp->file_status;
838 }
839 
840 void
841 mandoc_vmsg(enum mandocerr t, struct mparse *m,
842 		int ln, int pos, const char *fmt, ...)
843 {
844 	char		 buf[256];
845 	va_list		 ap;
846 
847 	va_start(ap, fmt);
848 	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
849 	va_end(ap);
850 
851 	mandoc_msg(t, m, ln, pos, buf);
852 }
853 
854 void
855 mandoc_msg(enum mandocerr er, struct mparse *m,
856 		int ln, int col, const char *msg)
857 {
858 	enum mandoclevel level;
859 
860 	if (er < m->mmin && er != MANDOCERR_FILE)
861 		return;
862 
863 	level = MANDOCLEVEL_UNSUPP;
864 	while (er < mandoclimits[level])
865 		level--;
866 
867 	if (m->mmsg)
868 		(*m->mmsg)(er, level, m->file, ln, col, msg);
869 
870 	if (m->file_status < level)
871 		m->file_status = level;
872 }
873 
874 const char *
875 mparse_strerror(enum mandocerr er)
876 {
877 
878 	return mandocerrs[er];
879 }
880 
881 const char *
882 mparse_strlevel(enum mandoclevel lvl)
883 {
884 	return mandoclevels[lvl];
885 }
886 
887 void
888 mparse_keep(struct mparse *p)
889 {
890 
891 	assert(NULL == p->secondary);
892 	p->secondary = mandoc_calloc(1, sizeof(struct buf));
893 }
894 
895 const char *
896 mparse_getkeep(const struct mparse *p)
897 {
898 
899 	assert(p->secondary);
900 	return p->secondary->sz ? p->secondary->buf : NULL;
901 }
902