1*a40ea1a7SYuri Pankov /* $Id: dbm_map.c,v 1.8 2017/02/17 14:43:54 schwarze Exp $ */
2*a40ea1a7SYuri Pankov /*
3*a40ea1a7SYuri Pankov * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
4*a40ea1a7SYuri Pankov *
5*a40ea1a7SYuri Pankov * Permission to use, copy, modify, and distribute this software for any
6*a40ea1a7SYuri Pankov * purpose with or without fee is hereby granted, provided that the above
7*a40ea1a7SYuri Pankov * copyright notice and this permission notice appear in all copies.
8*a40ea1a7SYuri Pankov *
9*a40ea1a7SYuri Pankov * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10*a40ea1a7SYuri Pankov * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11*a40ea1a7SYuri Pankov * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12*a40ea1a7SYuri Pankov * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13*a40ea1a7SYuri Pankov * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14*a40ea1a7SYuri Pankov * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15*a40ea1a7SYuri Pankov * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16*a40ea1a7SYuri Pankov *
17*a40ea1a7SYuri Pankov * Low-level routines for the map-based version
18*a40ea1a7SYuri Pankov * of the mandoc database, for read-only access.
19*a40ea1a7SYuri Pankov * The interface is defined in "dbm_map.h".
20*a40ea1a7SYuri Pankov */
21*a40ea1a7SYuri Pankov #include "config.h"
22*a40ea1a7SYuri Pankov
23*a40ea1a7SYuri Pankov #include <sys/mman.h>
24*a40ea1a7SYuri Pankov #include <sys/stat.h>
25*a40ea1a7SYuri Pankov #include <sys/types.h>
26*a40ea1a7SYuri Pankov
27*a40ea1a7SYuri Pankov #if HAVE_ENDIAN
28*a40ea1a7SYuri Pankov #include <endian.h>
29*a40ea1a7SYuri Pankov #elif HAVE_SYS_ENDIAN
30*a40ea1a7SYuri Pankov #include <sys/endian.h>
31*a40ea1a7SYuri Pankov #elif HAVE_NTOHL
32*a40ea1a7SYuri Pankov #include <arpa/inet.h>
33*a40ea1a7SYuri Pankov #endif
34*a40ea1a7SYuri Pankov #if HAVE_ERR
35*a40ea1a7SYuri Pankov #include <err.h>
36*a40ea1a7SYuri Pankov #endif
37*a40ea1a7SYuri Pankov #include <errno.h>
38*a40ea1a7SYuri Pankov #include <fcntl.h>
39*a40ea1a7SYuri Pankov #include <regex.h>
40*a40ea1a7SYuri Pankov #include <stdint.h>
41*a40ea1a7SYuri Pankov #include <stdlib.h>
42*a40ea1a7SYuri Pankov #include <string.h>
43*a40ea1a7SYuri Pankov #include <unistd.h>
44*a40ea1a7SYuri Pankov
45*a40ea1a7SYuri Pankov #include "mansearch.h"
46*a40ea1a7SYuri Pankov #include "dbm_map.h"
47*a40ea1a7SYuri Pankov #include "dbm.h"
48*a40ea1a7SYuri Pankov
49*a40ea1a7SYuri Pankov static struct stat st;
50*a40ea1a7SYuri Pankov static char *dbm_base;
51*a40ea1a7SYuri Pankov static int ifd;
52*a40ea1a7SYuri Pankov static int32_t max_offset;
53*a40ea1a7SYuri Pankov
54*a40ea1a7SYuri Pankov /*
55*a40ea1a7SYuri Pankov * Open a disk-based database for read-only access.
56*a40ea1a7SYuri Pankov * Validate the file format as far as it is not mandoc-specific.
57*a40ea1a7SYuri Pankov * Return 0 on success. Return -1 and set errno on failure.
58*a40ea1a7SYuri Pankov */
59*a40ea1a7SYuri Pankov int
dbm_map(const char * fname)60*a40ea1a7SYuri Pankov dbm_map(const char *fname)
61*a40ea1a7SYuri Pankov {
62*a40ea1a7SYuri Pankov int save_errno;
63*a40ea1a7SYuri Pankov const int32_t *magic;
64*a40ea1a7SYuri Pankov
65*a40ea1a7SYuri Pankov if ((ifd = open(fname, O_RDONLY)) == -1)
66*a40ea1a7SYuri Pankov return -1;
67*a40ea1a7SYuri Pankov if (fstat(ifd, &st) == -1)
68*a40ea1a7SYuri Pankov goto fail;
69*a40ea1a7SYuri Pankov if (st.st_size < 5) {
70*a40ea1a7SYuri Pankov warnx("dbm_map(%s): File too short", fname);
71*a40ea1a7SYuri Pankov errno = EFTYPE;
72*a40ea1a7SYuri Pankov goto fail;
73*a40ea1a7SYuri Pankov }
74*a40ea1a7SYuri Pankov if (st.st_size > INT32_MAX) {
75*a40ea1a7SYuri Pankov errno = EFBIG;
76*a40ea1a7SYuri Pankov goto fail;
77*a40ea1a7SYuri Pankov }
78*a40ea1a7SYuri Pankov if ((dbm_base = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED,
79*a40ea1a7SYuri Pankov ifd, 0)) == MAP_FAILED)
80*a40ea1a7SYuri Pankov goto fail;
81*a40ea1a7SYuri Pankov magic = dbm_getint(0);
82*a40ea1a7SYuri Pankov if (be32toh(*magic) != MANDOCDB_MAGIC) {
83*a40ea1a7SYuri Pankov if (strncmp(dbm_base, "SQLite format 3", 15))
84*a40ea1a7SYuri Pankov warnx("dbm_map(%s): "
85*a40ea1a7SYuri Pankov "Bad initial magic %x (expected %x)",
86*a40ea1a7SYuri Pankov fname, be32toh(*magic), MANDOCDB_MAGIC);
87*a40ea1a7SYuri Pankov else
88*a40ea1a7SYuri Pankov warnx("dbm_map(%s): "
89*a40ea1a7SYuri Pankov "Obsolete format based on SQLite 3",
90*a40ea1a7SYuri Pankov fname);
91*a40ea1a7SYuri Pankov errno = EFTYPE;
92*a40ea1a7SYuri Pankov goto fail;
93*a40ea1a7SYuri Pankov }
94*a40ea1a7SYuri Pankov magic = dbm_getint(1);
95*a40ea1a7SYuri Pankov if (be32toh(*magic) != MANDOCDB_VERSION) {
96*a40ea1a7SYuri Pankov warnx("dbm_map(%s): Bad version number %d (expected %d)",
97*a40ea1a7SYuri Pankov fname, be32toh(*magic), MANDOCDB_VERSION);
98*a40ea1a7SYuri Pankov errno = EFTYPE;
99*a40ea1a7SYuri Pankov goto fail;
100*a40ea1a7SYuri Pankov }
101*a40ea1a7SYuri Pankov max_offset = be32toh(*dbm_getint(3)) + sizeof(int32_t);
102*a40ea1a7SYuri Pankov if (st.st_size != max_offset) {
103*a40ea1a7SYuri Pankov warnx("dbm_map(%s): Inconsistent file size %lld (expected %d)",
104*a40ea1a7SYuri Pankov fname, (long long)st.st_size, max_offset);
105*a40ea1a7SYuri Pankov errno = EFTYPE;
106*a40ea1a7SYuri Pankov goto fail;
107*a40ea1a7SYuri Pankov }
108*a40ea1a7SYuri Pankov if ((magic = dbm_get(*dbm_getint(3))) == NULL) {
109*a40ea1a7SYuri Pankov errno = EFTYPE;
110*a40ea1a7SYuri Pankov goto fail;
111*a40ea1a7SYuri Pankov }
112*a40ea1a7SYuri Pankov if (be32toh(*magic) != MANDOCDB_MAGIC) {
113*a40ea1a7SYuri Pankov warnx("dbm_map(%s): Bad final magic %x (expected %x)",
114*a40ea1a7SYuri Pankov fname, be32toh(*magic), MANDOCDB_MAGIC);
115*a40ea1a7SYuri Pankov errno = EFTYPE;
116*a40ea1a7SYuri Pankov goto fail;
117*a40ea1a7SYuri Pankov }
118*a40ea1a7SYuri Pankov return 0;
119*a40ea1a7SYuri Pankov
120*a40ea1a7SYuri Pankov fail:
121*a40ea1a7SYuri Pankov save_errno = errno;
122*a40ea1a7SYuri Pankov close(ifd);
123*a40ea1a7SYuri Pankov errno = save_errno;
124*a40ea1a7SYuri Pankov return -1;
125*a40ea1a7SYuri Pankov }
126*a40ea1a7SYuri Pankov
127*a40ea1a7SYuri Pankov void
dbm_unmap(void)128*a40ea1a7SYuri Pankov dbm_unmap(void)
129*a40ea1a7SYuri Pankov {
130*a40ea1a7SYuri Pankov if (munmap(dbm_base, st.st_size) == -1)
131*a40ea1a7SYuri Pankov warn("dbm_unmap: munmap");
132*a40ea1a7SYuri Pankov if (close(ifd) == -1)
133*a40ea1a7SYuri Pankov warn("dbm_unmap: close");
134*a40ea1a7SYuri Pankov dbm_base = (char *)-1;
135*a40ea1a7SYuri Pankov }
136*a40ea1a7SYuri Pankov
137*a40ea1a7SYuri Pankov /*
138*a40ea1a7SYuri Pankov * Take a raw integer as it was read from the database.
139*a40ea1a7SYuri Pankov * Interpret it as an offset into the database file
140*a40ea1a7SYuri Pankov * and return a pointer to that place in the file.
141*a40ea1a7SYuri Pankov */
142*a40ea1a7SYuri Pankov void *
dbm_get(int32_t offset)143*a40ea1a7SYuri Pankov dbm_get(int32_t offset)
144*a40ea1a7SYuri Pankov {
145*a40ea1a7SYuri Pankov offset = be32toh(offset);
146*a40ea1a7SYuri Pankov if (offset < 0) {
147*a40ea1a7SYuri Pankov warnx("dbm_get: Database corrupt: offset %d", offset);
148*a40ea1a7SYuri Pankov return NULL;
149*a40ea1a7SYuri Pankov }
150*a40ea1a7SYuri Pankov if (offset >= max_offset) {
151*a40ea1a7SYuri Pankov warnx("dbm_get: Database corrupt: offset %d > %d",
152*a40ea1a7SYuri Pankov offset, max_offset);
153*a40ea1a7SYuri Pankov return NULL;
154*a40ea1a7SYuri Pankov }
155*a40ea1a7SYuri Pankov return dbm_base + offset;
156*a40ea1a7SYuri Pankov }
157*a40ea1a7SYuri Pankov
158*a40ea1a7SYuri Pankov /*
159*a40ea1a7SYuri Pankov * Assume the database starts with some integers.
160*a40ea1a7SYuri Pankov * Assume they are numbered starting from 0, increasing.
161*a40ea1a7SYuri Pankov * Get a pointer to one with the number "offset".
162*a40ea1a7SYuri Pankov */
163*a40ea1a7SYuri Pankov int32_t *
dbm_getint(int32_t offset)164*a40ea1a7SYuri Pankov dbm_getint(int32_t offset)
165*a40ea1a7SYuri Pankov {
166*a40ea1a7SYuri Pankov return (int32_t *)dbm_base + offset;
167*a40ea1a7SYuri Pankov }
168*a40ea1a7SYuri Pankov
169*a40ea1a7SYuri Pankov /*
170*a40ea1a7SYuri Pankov * The reverse of dbm_get().
171*a40ea1a7SYuri Pankov * Take pointer into the database file
172*a40ea1a7SYuri Pankov * and convert it to the raw integer
173*a40ea1a7SYuri Pankov * that would be used to refer to that place in the file.
174*a40ea1a7SYuri Pankov */
175*a40ea1a7SYuri Pankov int32_t
dbm_addr(const void * p)176*a40ea1a7SYuri Pankov dbm_addr(const void *p)
177*a40ea1a7SYuri Pankov {
178*a40ea1a7SYuri Pankov return htobe32((const char *)p - dbm_base);
179*a40ea1a7SYuri Pankov }
180*a40ea1a7SYuri Pankov
181*a40ea1a7SYuri Pankov int
dbm_match(const struct dbm_match * match,const char * str)182*a40ea1a7SYuri Pankov dbm_match(const struct dbm_match *match, const char *str)
183*a40ea1a7SYuri Pankov {
184*a40ea1a7SYuri Pankov switch (match->type) {
185*a40ea1a7SYuri Pankov case DBM_EXACT:
186*a40ea1a7SYuri Pankov return strcmp(str, match->str) == 0;
187*a40ea1a7SYuri Pankov case DBM_SUB:
188*a40ea1a7SYuri Pankov return strcasestr(str, match->str) != NULL;
189*a40ea1a7SYuri Pankov case DBM_REGEX:
190*a40ea1a7SYuri Pankov return regexec(match->re, str, 0, NULL, 0) == 0;
191*a40ea1a7SYuri Pankov default:
192*a40ea1a7SYuri Pankov abort();
193*a40ea1a7SYuri Pankov }
194*a40ea1a7SYuri Pankov }
195