1/*	$Id: dbm.c,v 1.6 2018/11/19 19:22:07 schwarze Exp $ */
2/*
3 * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * Map-based version of the mandoc database, for read-only access.
18 * The interface is defined in "dbm.h".
19 */
20#include "config.h"
21
22#include <assert.h>
23#if HAVE_ENDIAN
24#include <endian.h>
25#elif HAVE_SYS_ENDIAN
26#include <sys/endian.h>
27#elif HAVE_NTOHL
28#include <arpa/inet.h>
29#endif
30#if HAVE_ERR
31#include <err.h>
32#endif
33#include <errno.h>
34#include <regex.h>
35#include <stdint.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39
40#include "mansearch.h"
41#include "dbm_map.h"
42#include "dbm.h"
43
44struct macro {
45	int32_t	value;
46	int32_t	pages;
47};
48
49struct page {
50	int32_t	name;
51	int32_t	sect;
52	int32_t	arch;
53	int32_t	desc;
54	int32_t	file;
55};
56
57enum iter {
58	ITER_NONE = 0,
59	ITER_NAME,
60	ITER_SECT,
61	ITER_ARCH,
62	ITER_DESC,
63	ITER_MACRO
64};
65
66static struct macro	*macros[MACRO_MAX];
67static int32_t		 nvals[MACRO_MAX];
68static struct page	*pages;
69static int32_t		 npages;
70static enum iter	 iteration;
71
72static struct dbm_res	 page_bytitle(enum iter, const struct dbm_match *);
73static struct dbm_res	 page_byarch(const struct dbm_match *);
74static struct dbm_res	 page_bymacro(int32_t, const struct dbm_match *);
75static char		*macro_bypage(int32_t, int32_t);
76
77
78/*** top level functions **********************************************/
79
80/*
81 * Open a disk-based mandoc database for read-only access.
82 * Map the pages and macros[] arrays.
83 * Return 0 on success.  Return -1 and set errno on failure.
84 */
85int
86dbm_open(const char *fname)
87{
88	const int32_t	*mp, *ep;
89	int32_t		 im;
90
91	if (dbm_map(fname) == -1)
92		return -1;
93
94	if ((npages = be32toh(*dbm_getint(4))) < 0) {
95		warnx("dbm_open(%s): Invalid number of pages: %d",
96		    fname, npages);
97		goto fail;
98	}
99	pages = (struct page *)dbm_getint(5);
100
101	if ((mp = dbm_get(*dbm_getint(2))) == NULL) {
102		warnx("dbm_open(%s): Invalid offset of macros array", fname);
103		goto fail;
104	}
105	if (be32toh(*mp) != MACRO_MAX) {
106		warnx("dbm_open(%s): Invalid number of macros: %d",
107		    fname, be32toh(*mp));
108		goto fail;
109	}
110	for (im = 0; im < MACRO_MAX; im++) {
111		if ((ep = dbm_get(*++mp)) == NULL) {
112			warnx("dbm_open(%s): Invalid offset of macro %d",
113			    fname, im);
114			goto fail;
115		}
116		nvals[im] = be32toh(*ep);
117		macros[im] = (struct macro *)++ep;
118	}
119	return 0;
120
121fail:
122	dbm_unmap();
123	errno = EFTYPE;
124	return -1;
125}
126
127void
128dbm_close(void)
129{
130	dbm_unmap();
131}
132
133
134/*** functions for handling pages *************************************/
135
136int32_t
137dbm_page_count(void)
138{
139	return npages;
140}
141
142/*
143 * Give the caller pointers to the data for one manual page.
144 */
145struct dbm_page *
146dbm_page_get(int32_t ip)
147{
148	static struct dbm_page	 res;
149
150	assert(ip >= 0);
151	assert(ip < npages);
152	res.name = dbm_get(pages[ip].name);
153	if (res.name == NULL)
154		res.name = "(NULL)\0";
155	res.sect = dbm_get(pages[ip].sect);
156	if (res.sect == NULL)
157		res.sect = "(NULL)\0";
158	res.arch = pages[ip].arch ? dbm_get(pages[ip].arch) : NULL;
159	res.desc = dbm_get(pages[ip].desc);
160	if (res.desc == NULL)
161		res.desc = "(NULL)";
162	res.file = dbm_get(pages[ip].file);
163	if (res.file == NULL)
164		res.file = " (NULL)\0";
165	res.addr = dbm_addr(pages + ip);
166	return &res;
167}
168
169/*
170 * Functions to start filtered iterations over manual pages.
171 */
172void
173dbm_page_byname(const struct dbm_match *match)
174{
175	assert(match != NULL);
176	page_bytitle(ITER_NAME, match);
177}
178
179void
180dbm_page_bysect(const struct dbm_match *match)
181{
182	assert(match != NULL);
183	page_bytitle(ITER_SECT, match);
184}
185
186void
187dbm_page_byarch(const struct dbm_match *match)
188{
189	assert(match != NULL);
190	page_byarch(match);
191}
192
193void
194dbm_page_bydesc(const struct dbm_match *match)
195{
196	assert(match != NULL);
197	page_bytitle(ITER_DESC, match);
198}
199
200void
201dbm_page_bymacro(int32_t im, const struct dbm_match *match)
202{
203	assert(im >= 0);
204	assert(im < MACRO_MAX);
205	assert(match != NULL);
206	page_bymacro(im, match);
207}
208
209/*
210 * Return the number of the next manual page in the current iteration.
211 */
212struct dbm_res
213dbm_page_next(void)
214{
215	struct dbm_res			 res = {-1, 0};
216
217	switch(iteration) {
218	case ITER_NONE:
219		return res;
220	case ITER_ARCH:
221		return page_byarch(NULL);
222	case ITER_MACRO:
223		return page_bymacro(0, NULL);
224	default:
225		return page_bytitle(iteration, NULL);
226	}
227}
228
229/*
230 * Functions implementing the iteration over manual pages.
231 */
232static struct dbm_res
233page_bytitle(enum iter arg_iter, const struct dbm_match *arg_match)
234{
235	static const struct dbm_match	*match;
236	static const char		*cp;
237	static int32_t			 ip;
238	struct dbm_res			 res = {-1, 0};
239
240	assert(arg_iter == ITER_NAME || arg_iter == ITER_DESC ||
241	    arg_iter == ITER_SECT);
242
243	/* Initialize for a new iteration. */
244
245	if (arg_match != NULL) {
246		iteration = arg_iter;
247		match = arg_match;
248		switch (iteration) {
249		case ITER_NAME:
250			cp = dbm_get(pages[0].name);
251			break;
252		case ITER_SECT:
253			cp = dbm_get(pages[0].sect);
254			break;
255		case ITER_DESC:
256			cp = dbm_get(pages[0].desc);
257			break;
258		default:
259			abort();
260		}
261		if (cp == NULL) {
262			iteration = ITER_NONE;
263			match = NULL;
264			cp = NULL;
265			ip = npages;
266		} else
267			ip = 0;
268		return res;
269	}
270
271	/* Search for a name. */
272
273	while (ip < npages) {
274		if (iteration == ITER_NAME)
275			cp++;
276		if (dbm_match(match, cp))
277			break;
278		cp = strchr(cp, '\0') + 1;
279		if (iteration == ITER_DESC)
280			ip++;
281		else if (*cp == '\0') {
282			cp++;
283			ip++;
284		}
285	}
286
287	/* Reached the end without a match. */
288
289	if (ip == npages) {
290		iteration = ITER_NONE;
291		match = NULL;
292		cp = NULL;
293		return res;
294	}
295
296	/* Found a match; save the quality for later retrieval. */
297
298	res.page = ip;
299	res.bits = iteration == ITER_NAME ? cp[-1] : 0;
300
301	/* Skip the remaining names of this page. */
302
303	if (++ip < npages) {
304		do {
305			cp++;
306		} while (cp[-1] != '\0' ||
307		    (iteration != ITER_DESC && cp[-2] != '\0'));
308	}
309	return res;
310}
311
312static struct dbm_res
313page_byarch(const struct dbm_match *arg_match)
314{
315	static const struct dbm_match	*match;
316	struct dbm_res			 res = {-1, 0};
317	static int32_t			 ip;
318	const char			*cp;
319
320	/* Initialize for a new iteration. */
321
322	if (arg_match != NULL) {
323		iteration = ITER_ARCH;
324		match = arg_match;
325		ip = 0;
326		return res;
327	}
328
329	/* Search for an architecture. */
330
331	for ( ; ip < npages; ip++)
332		if (pages[ip].arch)
333			for (cp = dbm_get(pages[ip].arch);
334			    *cp != '\0';
335			    cp = strchr(cp, '\0') + 1)
336				if (dbm_match(match, cp)) {
337					res.page = ip++;
338					return res;
339				}
340
341	/* Reached the end without a match. */
342
343	iteration = ITER_NONE;
344	match = NULL;
345	return res;
346}
347
348static struct dbm_res
349page_bymacro(int32_t arg_im, const struct dbm_match *arg_match)
350{
351	static const struct dbm_match	*match;
352	static const int32_t		*pp;
353	static const char		*cp;
354	static int32_t			 im, iv;
355	struct dbm_res			 res = {-1, 0};
356
357	assert(im >= 0);
358	assert(im < MACRO_MAX);
359
360	/* Initialize for a new iteration. */
361
362	if (arg_match != NULL) {
363		iteration = ITER_MACRO;
364		match = arg_match;
365		im = arg_im;
366		cp = nvals[im] ? dbm_get(macros[im]->value) : NULL;
367		pp = NULL;
368		iv = -1;
369		return res;
370	}
371	if (iteration != ITER_MACRO)
372		return res;
373
374	/* Find the next matching macro value. */
375
376	while (pp == NULL || *pp == 0) {
377		if (++iv == nvals[im]) {
378			iteration = ITER_NONE;
379			return res;
380		}
381		if (iv)
382			cp = strchr(cp, '\0') + 1;
383		if (dbm_match(match, cp))
384			pp = dbm_get(macros[im][iv].pages);
385	}
386
387	/* Found a matching page. */
388
389	res.page = (struct page *)dbm_get(*pp++) - pages;
390	return res;
391}
392
393
394/*** functions for handling macros ************************************/
395
396int32_t
397dbm_macro_count(int32_t im)
398{
399	assert(im >= 0);
400	assert(im < MACRO_MAX);
401	return nvals[im];
402}
403
404struct dbm_macro *
405dbm_macro_get(int32_t im, int32_t iv)
406{
407	static struct dbm_macro macro;
408
409	assert(im >= 0);
410	assert(im < MACRO_MAX);
411	assert(iv >= 0);
412	assert(iv < nvals[im]);
413	macro.value = dbm_get(macros[im][iv].value);
414	macro.pp = dbm_get(macros[im][iv].pages);
415	return &macro;
416}
417
418/*
419 * Filtered iteration over macro entries.
420 */
421void
422dbm_macro_bypage(int32_t im, int32_t ip)
423{
424	assert(im >= 0);
425	assert(im < MACRO_MAX);
426	assert(ip != 0);
427	macro_bypage(im, ip);
428}
429
430char *
431dbm_macro_next(void)
432{
433	return macro_bypage(MACRO_MAX, 0);
434}
435
436static char *
437macro_bypage(int32_t arg_im, int32_t arg_ip)
438{
439	static const int32_t	*pp;
440	static int32_t		 im, ip, iv;
441
442	/* Initialize for a new iteration. */
443
444	if (arg_im < MACRO_MAX && arg_ip != 0) {
445		im = arg_im;
446		ip = arg_ip;
447		pp = dbm_get(macros[im]->pages);
448		iv = 0;
449		return NULL;
450	}
451	if (im >= MACRO_MAX)
452		return NULL;
453
454	/* Search for the next value. */
455
456	while (iv < nvals[im]) {
457		if (*pp == ip)
458			break;
459		if (*pp == 0)
460			iv++;
461		pp++;
462	}
463
464	/* Reached the end without a match. */
465
466	if (iv == nvals[im]) {
467		im = MACRO_MAX;
468		ip = 0;
469		pp = NULL;
470		return NULL;
471	}
472
473	/* Found a match; skip the remaining pages of this entry. */
474
475	if (++iv < nvals[im])
476		while (*pp++ != 0)
477			continue;
478
479	return dbm_get(macros[im][iv - 1].value);
480}
481