xref: /illumos-gate/usr/src/cmd/mandoc/dbm_map.c (revision a40ea1a7)
1*a40ea1a7SYuri Pankov /*	$Id: dbm_map.c,v 1.8 2017/02/17 14:43:54 schwarze Exp $ */
2*a40ea1a7SYuri Pankov /*
3*a40ea1a7SYuri Pankov  * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
4*a40ea1a7SYuri Pankov  *
5*a40ea1a7SYuri Pankov  * Permission to use, copy, modify, and distribute this software for any
6*a40ea1a7SYuri Pankov  * purpose with or without fee is hereby granted, provided that the above
7*a40ea1a7SYuri Pankov  * copyright notice and this permission notice appear in all copies.
8*a40ea1a7SYuri Pankov  *
9*a40ea1a7SYuri Pankov  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10*a40ea1a7SYuri Pankov  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11*a40ea1a7SYuri Pankov  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12*a40ea1a7SYuri Pankov  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13*a40ea1a7SYuri Pankov  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14*a40ea1a7SYuri Pankov  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15*a40ea1a7SYuri Pankov  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16*a40ea1a7SYuri Pankov  *
17*a40ea1a7SYuri Pankov  * Low-level routines for the map-based version
18*a40ea1a7SYuri Pankov  * of the mandoc database, for read-only access.
19*a40ea1a7SYuri Pankov  * The interface is defined in "dbm_map.h".
20*a40ea1a7SYuri Pankov  */
21*a40ea1a7SYuri Pankov #include "config.h"
22*a40ea1a7SYuri Pankov 
23*a40ea1a7SYuri Pankov #include <sys/mman.h>
24*a40ea1a7SYuri Pankov #include <sys/stat.h>
25*a40ea1a7SYuri Pankov #include <sys/types.h>
26*a40ea1a7SYuri Pankov 
27*a40ea1a7SYuri Pankov #if HAVE_ENDIAN
28*a40ea1a7SYuri Pankov #include <endian.h>
29*a40ea1a7SYuri Pankov #elif HAVE_SYS_ENDIAN
30*a40ea1a7SYuri Pankov #include <sys/endian.h>
31*a40ea1a7SYuri Pankov #elif HAVE_NTOHL
32*a40ea1a7SYuri Pankov #include <arpa/inet.h>
33*a40ea1a7SYuri Pankov #endif
34*a40ea1a7SYuri Pankov #if HAVE_ERR
35*a40ea1a7SYuri Pankov #include <err.h>
36*a40ea1a7SYuri Pankov #endif
37*a40ea1a7SYuri Pankov #include <errno.h>
38*a40ea1a7SYuri Pankov #include <fcntl.h>
39*a40ea1a7SYuri Pankov #include <regex.h>
40*a40ea1a7SYuri Pankov #include <stdint.h>
41*a40ea1a7SYuri Pankov #include <stdlib.h>
42*a40ea1a7SYuri Pankov #include <string.h>
43*a40ea1a7SYuri Pankov #include <unistd.h>
44*a40ea1a7SYuri Pankov 
45*a40ea1a7SYuri Pankov #include "mansearch.h"
46*a40ea1a7SYuri Pankov #include "dbm_map.h"
47*a40ea1a7SYuri Pankov #include "dbm.h"
48*a40ea1a7SYuri Pankov 
49*a40ea1a7SYuri Pankov static struct stat	 st;
50*a40ea1a7SYuri Pankov static char		*dbm_base;
51*a40ea1a7SYuri Pankov static int		 ifd;
52*a40ea1a7SYuri Pankov static int32_t		 max_offset;
53*a40ea1a7SYuri Pankov 
54*a40ea1a7SYuri Pankov /*
55*a40ea1a7SYuri Pankov  * Open a disk-based database for read-only access.
56*a40ea1a7SYuri Pankov  * Validate the file format as far as it is not mandoc-specific.
57*a40ea1a7SYuri Pankov  * Return 0 on success.  Return -1 and set errno on failure.
58*a40ea1a7SYuri Pankov  */
59*a40ea1a7SYuri Pankov int
dbm_map(const char * fname)60*a40ea1a7SYuri Pankov dbm_map(const char *fname)
61*a40ea1a7SYuri Pankov {
62*a40ea1a7SYuri Pankov 	int		 save_errno;
63*a40ea1a7SYuri Pankov 	const int32_t	*magic;
64*a40ea1a7SYuri Pankov 
65*a40ea1a7SYuri Pankov 	if ((ifd = open(fname, O_RDONLY)) == -1)
66*a40ea1a7SYuri Pankov 		return -1;
67*a40ea1a7SYuri Pankov 	if (fstat(ifd, &st) == -1)
68*a40ea1a7SYuri Pankov 		goto fail;
69*a40ea1a7SYuri Pankov 	if (st.st_size < 5) {
70*a40ea1a7SYuri Pankov 		warnx("dbm_map(%s): File too short", fname);
71*a40ea1a7SYuri Pankov 		errno = EFTYPE;
72*a40ea1a7SYuri Pankov 		goto fail;
73*a40ea1a7SYuri Pankov 	}
74*a40ea1a7SYuri Pankov 	if (st.st_size > INT32_MAX) {
75*a40ea1a7SYuri Pankov 		errno = EFBIG;
76*a40ea1a7SYuri Pankov 		goto fail;
77*a40ea1a7SYuri Pankov 	}
78*a40ea1a7SYuri Pankov 	if ((dbm_base = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED,
79*a40ea1a7SYuri Pankov 	    ifd, 0)) == MAP_FAILED)
80*a40ea1a7SYuri Pankov 		goto fail;
81*a40ea1a7SYuri Pankov 	magic = dbm_getint(0);
82*a40ea1a7SYuri Pankov 	if (be32toh(*magic) != MANDOCDB_MAGIC) {
83*a40ea1a7SYuri Pankov 		if (strncmp(dbm_base, "SQLite format 3", 15))
84*a40ea1a7SYuri Pankov 			warnx("dbm_map(%s): "
85*a40ea1a7SYuri Pankov 			    "Bad initial magic %x (expected %x)",
86*a40ea1a7SYuri Pankov 			    fname, be32toh(*magic), MANDOCDB_MAGIC);
87*a40ea1a7SYuri Pankov 		else
88*a40ea1a7SYuri Pankov 			warnx("dbm_map(%s): "
89*a40ea1a7SYuri Pankov 			    "Obsolete format based on SQLite 3",
90*a40ea1a7SYuri Pankov 			    fname);
91*a40ea1a7SYuri Pankov 		errno = EFTYPE;
92*a40ea1a7SYuri Pankov 		goto fail;
93*a40ea1a7SYuri Pankov 	}
94*a40ea1a7SYuri Pankov 	magic = dbm_getint(1);
95*a40ea1a7SYuri Pankov 	if (be32toh(*magic) != MANDOCDB_VERSION) {
96*a40ea1a7SYuri Pankov 		warnx("dbm_map(%s): Bad version number %d (expected %d)",
97*a40ea1a7SYuri Pankov 		    fname, be32toh(*magic), MANDOCDB_VERSION);
98*a40ea1a7SYuri Pankov 		errno = EFTYPE;
99*a40ea1a7SYuri Pankov 		goto fail;
100*a40ea1a7SYuri Pankov 	}
101*a40ea1a7SYuri Pankov 	max_offset = be32toh(*dbm_getint(3)) + sizeof(int32_t);
102*a40ea1a7SYuri Pankov 	if (st.st_size != max_offset) {
103*a40ea1a7SYuri Pankov 		warnx("dbm_map(%s): Inconsistent file size %lld (expected %d)",
104*a40ea1a7SYuri Pankov 		    fname, (long long)st.st_size, max_offset);
105*a40ea1a7SYuri Pankov 		errno = EFTYPE;
106*a40ea1a7SYuri Pankov 		goto fail;
107*a40ea1a7SYuri Pankov 	}
108*a40ea1a7SYuri Pankov 	if ((magic = dbm_get(*dbm_getint(3))) == NULL) {
109*a40ea1a7SYuri Pankov 		errno = EFTYPE;
110*a40ea1a7SYuri Pankov 		goto fail;
111*a40ea1a7SYuri Pankov 	}
112*a40ea1a7SYuri Pankov 	if (be32toh(*magic) != MANDOCDB_MAGIC) {
113*a40ea1a7SYuri Pankov 		warnx("dbm_map(%s): Bad final magic %x (expected %x)",
114*a40ea1a7SYuri Pankov 		    fname, be32toh(*magic), MANDOCDB_MAGIC);
115*a40ea1a7SYuri Pankov 		errno = EFTYPE;
116*a40ea1a7SYuri Pankov 		goto fail;
117*a40ea1a7SYuri Pankov 	}
118*a40ea1a7SYuri Pankov 	return 0;
119*a40ea1a7SYuri Pankov 
120*a40ea1a7SYuri Pankov fail:
121*a40ea1a7SYuri Pankov 	save_errno = errno;
122*a40ea1a7SYuri Pankov 	close(ifd);
123*a40ea1a7SYuri Pankov 	errno = save_errno;
124*a40ea1a7SYuri Pankov 	return -1;
125*a40ea1a7SYuri Pankov }
126*a40ea1a7SYuri Pankov 
127*a40ea1a7SYuri Pankov void
dbm_unmap(void)128*a40ea1a7SYuri Pankov dbm_unmap(void)
129*a40ea1a7SYuri Pankov {
130*a40ea1a7SYuri Pankov 	if (munmap(dbm_base, st.st_size) == -1)
131*a40ea1a7SYuri Pankov 		warn("dbm_unmap: munmap");
132*a40ea1a7SYuri Pankov 	if (close(ifd) == -1)
133*a40ea1a7SYuri Pankov 		warn("dbm_unmap: close");
134*a40ea1a7SYuri Pankov 	dbm_base = (char *)-1;
135*a40ea1a7SYuri Pankov }
136*a40ea1a7SYuri Pankov 
137*a40ea1a7SYuri Pankov /*
138*a40ea1a7SYuri Pankov  * Take a raw integer as it was read from the database.
139*a40ea1a7SYuri Pankov  * Interpret it as an offset into the database file
140*a40ea1a7SYuri Pankov  * and return a pointer to that place in the file.
141*a40ea1a7SYuri Pankov  */
142*a40ea1a7SYuri Pankov void *
dbm_get(int32_t offset)143*a40ea1a7SYuri Pankov dbm_get(int32_t offset)
144*a40ea1a7SYuri Pankov {
145*a40ea1a7SYuri Pankov 	offset = be32toh(offset);
146*a40ea1a7SYuri Pankov 	if (offset < 0) {
147*a40ea1a7SYuri Pankov 		warnx("dbm_get: Database corrupt: offset %d", offset);
148*a40ea1a7SYuri Pankov 		return NULL;
149*a40ea1a7SYuri Pankov 	}
150*a40ea1a7SYuri Pankov 	if (offset >= max_offset) {
151*a40ea1a7SYuri Pankov 		warnx("dbm_get: Database corrupt: offset %d > %d",
152*a40ea1a7SYuri Pankov 		    offset, max_offset);
153*a40ea1a7SYuri Pankov 		return NULL;
154*a40ea1a7SYuri Pankov 	}
155*a40ea1a7SYuri Pankov 	return dbm_base + offset;
156*a40ea1a7SYuri Pankov }
157*a40ea1a7SYuri Pankov 
158*a40ea1a7SYuri Pankov /*
159*a40ea1a7SYuri Pankov  * Assume the database starts with some integers.
160*a40ea1a7SYuri Pankov  * Assume they are numbered starting from 0, increasing.
161*a40ea1a7SYuri Pankov  * Get a pointer to one with the number "offset".
162*a40ea1a7SYuri Pankov  */
163*a40ea1a7SYuri Pankov int32_t *
dbm_getint(int32_t offset)164*a40ea1a7SYuri Pankov dbm_getint(int32_t offset)
165*a40ea1a7SYuri Pankov {
166*a40ea1a7SYuri Pankov 	return (int32_t *)dbm_base + offset;
167*a40ea1a7SYuri Pankov }
168*a40ea1a7SYuri Pankov 
169*a40ea1a7SYuri Pankov /*
170*a40ea1a7SYuri Pankov  * The reverse of dbm_get().
171*a40ea1a7SYuri Pankov  * Take pointer into the database file
172*a40ea1a7SYuri Pankov  * and convert it to the raw integer
173*a40ea1a7SYuri Pankov  * that would be used to refer to that place in the file.
174*a40ea1a7SYuri Pankov  */
175*a40ea1a7SYuri Pankov int32_t
dbm_addr(const void * p)176*a40ea1a7SYuri Pankov dbm_addr(const void *p)
177*a40ea1a7SYuri Pankov {
178*a40ea1a7SYuri Pankov 	return htobe32((const char *)p - dbm_base);
179*a40ea1a7SYuri Pankov }
180*a40ea1a7SYuri Pankov 
181*a40ea1a7SYuri Pankov int
dbm_match(const struct dbm_match * match,const char * str)182*a40ea1a7SYuri Pankov dbm_match(const struct dbm_match *match, const char *str)
183*a40ea1a7SYuri Pankov {
184*a40ea1a7SYuri Pankov 	switch (match->type) {
185*a40ea1a7SYuri Pankov 	case DBM_EXACT:
186*a40ea1a7SYuri Pankov 		return strcmp(str, match->str) == 0;
187*a40ea1a7SYuri Pankov 	case DBM_SUB:
188*a40ea1a7SYuri Pankov 		return strcasestr(str, match->str) != NULL;
189*a40ea1a7SYuri Pankov 	case DBM_REGEX:
190*a40ea1a7SYuri Pankov 		return regexec(match->re, str, 0, NULL, 0) == 0;
191*a40ea1a7SYuri Pankov 	default:
192*a40ea1a7SYuri Pankov 		abort();
193*a40ea1a7SYuri Pankov 	}
194*a40ea1a7SYuri Pankov }
195