1*cec8643bSMichal Nowak /* $Id: mandocdb.c,v 1.262 2018/12/30 00:49:55 schwarze Exp $ */ 2a40ea1a7SYuri Pankov /* 3a40ea1a7SYuri Pankov * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4*cec8643bSMichal Nowak * Copyright (c) 2011-2018 Ingo Schwarze <schwarze@openbsd.org> 5a40ea1a7SYuri Pankov * Copyright (c) 2016 Ed Maste <emaste@freebsd.org> 6a40ea1a7SYuri Pankov * 7a40ea1a7SYuri Pankov * Permission to use, copy, modify, and distribute this software for any 8a40ea1a7SYuri Pankov * purpose with or without fee is hereby granted, provided that the above 9a40ea1a7SYuri Pankov * copyright notice and this permission notice appear in all copies. 10a40ea1a7SYuri Pankov * 11a40ea1a7SYuri Pankov * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 12a40ea1a7SYuri Pankov * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13a40ea1a7SYuri Pankov * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 14a40ea1a7SYuri Pankov * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15a40ea1a7SYuri Pankov * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16a40ea1a7SYuri Pankov * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17a40ea1a7SYuri Pankov * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18a40ea1a7SYuri Pankov */ 19a40ea1a7SYuri Pankov #include "config.h" 20a40ea1a7SYuri Pankov 21a40ea1a7SYuri Pankov #include <sys/types.h> 226640c13bSYuri Pankov #include <sys/mman.h> 23a40ea1a7SYuri Pankov #include <sys/stat.h> 24a40ea1a7SYuri Pankov 25a40ea1a7SYuri Pankov #include <assert.h> 26a40ea1a7SYuri Pankov #include <ctype.h> 27a40ea1a7SYuri Pankov #if HAVE_ERR 28a40ea1a7SYuri Pankov #include <err.h> 29a40ea1a7SYuri Pankov #endif 30a40ea1a7SYuri Pankov #include <errno.h> 31a40ea1a7SYuri Pankov #include <fcntl.h> 32a40ea1a7SYuri Pankov #if HAVE_FTS 33a40ea1a7SYuri Pankov #include <fts.h> 34a40ea1a7SYuri Pankov #else 35a40ea1a7SYuri Pankov #include "compat_fts.h" 36a40ea1a7SYuri Pankov #endif 37a40ea1a7SYuri Pankov #include <limits.h> 38a40ea1a7SYuri Pankov #if HAVE_SANDBOX_INIT 39a40ea1a7SYuri Pankov #include <sandbox.h> 40a40ea1a7SYuri Pankov #endif 41a40ea1a7SYuri Pankov #include <stdarg.h> 42a40ea1a7SYuri Pankov #include <stddef.h> 43a40ea1a7SYuri Pankov #include <stdio.h> 44a40ea1a7SYuri Pankov #include <stdint.h> 45a40ea1a7SYuri Pankov #include <stdlib.h> 46a40ea1a7SYuri Pankov #include <string.h> 47a40ea1a7SYuri Pankov #include <unistd.h> 48a40ea1a7SYuri Pankov 49a40ea1a7SYuri Pankov #include "mandoc_aux.h" 50a40ea1a7SYuri Pankov #include "mandoc_ohash.h" 51a40ea1a7SYuri Pankov #include "mandoc.h" 52a40ea1a7SYuri Pankov #include "roff.h" 53a40ea1a7SYuri Pankov #include "mdoc.h" 54a40ea1a7SYuri Pankov #include "man.h" 55*cec8643bSMichal Nowak #include "mandoc_parse.h" 56a40ea1a7SYuri Pankov #include "manconf.h" 57a40ea1a7SYuri Pankov #include "mansearch.h" 58a40ea1a7SYuri Pankov #include "dba_array.h" 59a40ea1a7SYuri Pankov #include "dba.h" 60a40ea1a7SYuri Pankov 61a40ea1a7SYuri Pankov extern const char *const mansearch_keynames[]; 62a40ea1a7SYuri Pankov 63a40ea1a7SYuri Pankov enum op { 64a40ea1a7SYuri Pankov OP_DEFAULT = 0, /* new dbs from dir list or default config */ 65a40ea1a7SYuri Pankov OP_CONFFILE, /* new databases from custom config file */ 66a40ea1a7SYuri Pankov OP_UPDATE, /* delete/add entries in existing database */ 67a40ea1a7SYuri Pankov OP_DELETE, /* delete entries from existing database */ 68a40ea1a7SYuri Pankov OP_TEST /* change no databases, report potential problems */ 69a40ea1a7SYuri Pankov }; 70a40ea1a7SYuri Pankov 71a40ea1a7SYuri Pankov struct str { 72a40ea1a7SYuri Pankov const struct mpage *mpage; /* if set, the owning parse */ 73a40ea1a7SYuri Pankov uint64_t mask; /* bitmask in sequence */ 74a40ea1a7SYuri Pankov char key[]; /* rendered text */ 75a40ea1a7SYuri Pankov }; 76a40ea1a7SYuri Pankov 77a40ea1a7SYuri Pankov struct inodev { 78a40ea1a7SYuri Pankov ino_t st_ino; 79a40ea1a7SYuri Pankov dev_t st_dev; 80a40ea1a7SYuri Pankov }; 81a40ea1a7SYuri Pankov 82a40ea1a7SYuri Pankov struct mpage { 83a40ea1a7SYuri Pankov struct inodev inodev; /* used for hashing routine */ 84a40ea1a7SYuri Pankov struct dba_array *dba; 85a40ea1a7SYuri Pankov char *sec; /* section from file content */ 86a40ea1a7SYuri Pankov char *arch; /* architecture from file content */ 87a40ea1a7SYuri Pankov char *title; /* title from file content */ 88a40ea1a7SYuri Pankov char *desc; /* description from file content */ 89a40ea1a7SYuri Pankov struct mpage *next; /* singly linked list */ 90a40ea1a7SYuri Pankov struct mlink *mlinks; /* singly linked list */ 91a40ea1a7SYuri Pankov int name_head_done; 92a40ea1a7SYuri Pankov enum form form; /* format from file content */ 93a40ea1a7SYuri Pankov }; 94a40ea1a7SYuri Pankov 95a40ea1a7SYuri Pankov struct mlink { 96a40ea1a7SYuri Pankov char file[PATH_MAX]; /* filename rel. to manpath */ 97a40ea1a7SYuri Pankov char *dsec; /* section from directory */ 98a40ea1a7SYuri Pankov char *arch; /* architecture from directory */ 99a40ea1a7SYuri Pankov char *name; /* name from file name (not empty) */ 100a40ea1a7SYuri Pankov char *fsec; /* section from file name suffix */ 101a40ea1a7SYuri Pankov struct mlink *next; /* singly linked list */ 102a40ea1a7SYuri Pankov struct mpage *mpage; /* parent */ 103a40ea1a7SYuri Pankov int gzip; /* filename has a .gz suffix */ 104a40ea1a7SYuri Pankov enum form dform; /* format from directory */ 105a40ea1a7SYuri Pankov enum form fform; /* format from file name suffix */ 106a40ea1a7SYuri Pankov }; 107a40ea1a7SYuri Pankov 108a40ea1a7SYuri Pankov typedef int (*mdoc_fp)(struct mpage *, const struct roff_meta *, 109a40ea1a7SYuri Pankov const struct roff_node *); 110a40ea1a7SYuri Pankov 111a40ea1a7SYuri Pankov struct mdoc_handler { 112a40ea1a7SYuri Pankov mdoc_fp fp; /* optional handler */ 113a40ea1a7SYuri Pankov uint64_t mask; /* set unless handler returns 0 */ 114a40ea1a7SYuri Pankov int taboo; /* node flags that must not be set */ 115a40ea1a7SYuri Pankov }; 116a40ea1a7SYuri Pankov 117a40ea1a7SYuri Pankov 118a40ea1a7SYuri Pankov int mandocdb(int, char *[]); 119a40ea1a7SYuri Pankov 120a40ea1a7SYuri Pankov static void dbadd(struct dba *, struct mpage *); 121a40ea1a7SYuri Pankov static void dbadd_mlink(const struct mlink *mlink); 122a40ea1a7SYuri Pankov static void dbprune(struct dba *); 123a40ea1a7SYuri Pankov static void dbwrite(struct dba *); 124a40ea1a7SYuri Pankov static void filescan(const char *); 125a40ea1a7SYuri Pankov #if HAVE_FTS_COMPARE_CONST 126a40ea1a7SYuri Pankov static int fts_compare(const FTSENT *const *, const FTSENT *const *); 127a40ea1a7SYuri Pankov #else 128a40ea1a7SYuri Pankov static int fts_compare(const FTSENT **, const FTSENT **); 129a40ea1a7SYuri Pankov #endif 130a40ea1a7SYuri Pankov static void mlink_add(struct mlink *, const struct stat *); 131a40ea1a7SYuri Pankov static void mlink_check(struct mpage *, struct mlink *); 132a40ea1a7SYuri Pankov static void mlink_free(struct mlink *); 133a40ea1a7SYuri Pankov static void mlinks_undupe(struct mpage *); 134a40ea1a7SYuri Pankov static void mpages_free(void); 135a40ea1a7SYuri Pankov static void mpages_merge(struct dba *, struct mparse *); 136a40ea1a7SYuri Pankov static void parse_cat(struct mpage *, int); 137a40ea1a7SYuri Pankov static void parse_man(struct mpage *, const struct roff_meta *, 138a40ea1a7SYuri Pankov const struct roff_node *); 139a40ea1a7SYuri Pankov static void parse_mdoc(struct mpage *, const struct roff_meta *, 140a40ea1a7SYuri Pankov const struct roff_node *); 141a40ea1a7SYuri Pankov static int parse_mdoc_head(struct mpage *, const struct roff_meta *, 142a40ea1a7SYuri Pankov const struct roff_node *); 1436640c13bSYuri Pankov static int parse_mdoc_Fa(struct mpage *, const struct roff_meta *, 1446640c13bSYuri Pankov const struct roff_node *); 145a40ea1a7SYuri Pankov static int parse_mdoc_Fd(struct mpage *, const struct roff_meta *, 146a40ea1a7SYuri Pankov const struct roff_node *); 147a40ea1a7SYuri Pankov static void parse_mdoc_fname(struct mpage *, const struct roff_node *); 148a40ea1a7SYuri Pankov static int parse_mdoc_Fn(struct mpage *, const struct roff_meta *, 149a40ea1a7SYuri Pankov const struct roff_node *); 150a40ea1a7SYuri Pankov static int parse_mdoc_Fo(struct mpage *, const struct roff_meta *, 151a40ea1a7SYuri Pankov const struct roff_node *); 152a40ea1a7SYuri Pankov static int parse_mdoc_Nd(struct mpage *, const struct roff_meta *, 153a40ea1a7SYuri Pankov const struct roff_node *); 154a40ea1a7SYuri Pankov static int parse_mdoc_Nm(struct mpage *, const struct roff_meta *, 155a40ea1a7SYuri Pankov const struct roff_node *); 156a40ea1a7SYuri Pankov static int parse_mdoc_Sh(struct mpage *, const struct roff_meta *, 157a40ea1a7SYuri Pankov const struct roff_node *); 158a40ea1a7SYuri Pankov static int parse_mdoc_Va(struct mpage *, const struct roff_meta *, 159a40ea1a7SYuri Pankov const struct roff_node *); 160a40ea1a7SYuri Pankov static int parse_mdoc_Xr(struct mpage *, const struct roff_meta *, 161a40ea1a7SYuri Pankov const struct roff_node *); 162a40ea1a7SYuri Pankov static void putkey(const struct mpage *, char *, uint64_t); 163a40ea1a7SYuri Pankov static void putkeys(const struct mpage *, char *, size_t, uint64_t); 164a40ea1a7SYuri Pankov static void putmdockey(const struct mpage *, 165a40ea1a7SYuri Pankov const struct roff_node *, uint64_t, int); 166a40ea1a7SYuri Pankov static int render_string(char **, size_t *); 167a40ea1a7SYuri Pankov static void say(const char *, const char *, ...) 168a40ea1a7SYuri Pankov __attribute__((__format__ (__printf__, 2, 3))); 169a40ea1a7SYuri Pankov static int set_basedir(const char *, int); 170a40ea1a7SYuri Pankov static int treescan(void); 171a40ea1a7SYuri Pankov static size_t utf8(unsigned int, char [7]); 172a40ea1a7SYuri Pankov 173a40ea1a7SYuri Pankov static int nodb; /* no database changes */ 174a40ea1a7SYuri Pankov static int mparse_options; /* abort the parse early */ 175a40ea1a7SYuri Pankov static int use_all; /* use all found files */ 176a40ea1a7SYuri Pankov static int debug; /* print what we're doing */ 177a40ea1a7SYuri Pankov static int warnings; /* warn about crap */ 178a40ea1a7SYuri Pankov static int write_utf8; /* write UTF-8 output; else ASCII */ 179a40ea1a7SYuri Pankov static int exitcode; /* to be returned by main */ 180a40ea1a7SYuri Pankov static enum op op; /* operational mode */ 181a40ea1a7SYuri Pankov static char basedir[PATH_MAX]; /* current base directory */ 182a40ea1a7SYuri Pankov static struct mpage *mpage_head; /* list of distinct manual pages */ 183a40ea1a7SYuri Pankov static struct ohash mpages; /* table of distinct manual pages */ 184a40ea1a7SYuri Pankov static struct ohash mlinks; /* table of directory entries */ 185a40ea1a7SYuri Pankov static struct ohash names; /* table of all names */ 186a40ea1a7SYuri Pankov static struct ohash strings; /* table of all strings */ 187a40ea1a7SYuri Pankov static uint64_t name_mask; 188a40ea1a7SYuri Pankov 189*cec8643bSMichal Nowak static const struct mdoc_handler mdoc_handlers[MDOC_MAX - MDOC_Dd] = { 190a40ea1a7SYuri Pankov { NULL, 0, NODE_NOPRT }, /* Dd */ 191a40ea1a7SYuri Pankov { NULL, 0, NODE_NOPRT }, /* Dt */ 192a40ea1a7SYuri Pankov { NULL, 0, NODE_NOPRT }, /* Os */ 193a40ea1a7SYuri Pankov { parse_mdoc_Sh, TYPE_Sh, 0 }, /* Sh */ 194a40ea1a7SYuri Pankov { parse_mdoc_head, TYPE_Ss, 0 }, /* Ss */ 195a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Pp */ 196a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* D1 */ 197a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Dl */ 198a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bd */ 199a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ed */ 200a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bl */ 201a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* El */ 202a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* It */ 203a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ad */ 204a40ea1a7SYuri Pankov { NULL, TYPE_An, 0 }, /* An */ 205c66b8046SYuri Pankov { NULL, 0, 0 }, /* Ap */ 206a40ea1a7SYuri Pankov { NULL, TYPE_Ar, 0 }, /* Ar */ 207a40ea1a7SYuri Pankov { NULL, TYPE_Cd, 0 }, /* Cd */ 208a40ea1a7SYuri Pankov { NULL, TYPE_Cm, 0 }, /* Cm */ 209a40ea1a7SYuri Pankov { NULL, TYPE_Dv, 0 }, /* Dv */ 210a40ea1a7SYuri Pankov { NULL, TYPE_Er, 0 }, /* Er */ 211a40ea1a7SYuri Pankov { NULL, TYPE_Ev, 0 }, /* Ev */ 212a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ex */ 2136640c13bSYuri Pankov { parse_mdoc_Fa, 0, 0 }, /* Fa */ 214a40ea1a7SYuri Pankov { parse_mdoc_Fd, 0, 0 }, /* Fd */ 215a40ea1a7SYuri Pankov { NULL, TYPE_Fl, 0 }, /* Fl */ 216a40ea1a7SYuri Pankov { parse_mdoc_Fn, 0, 0 }, /* Fn */ 2176640c13bSYuri Pankov { NULL, TYPE_Ft | TYPE_Vt, 0 }, /* Ft */ 218a40ea1a7SYuri Pankov { NULL, TYPE_Ic, 0 }, /* Ic */ 219a40ea1a7SYuri Pankov { NULL, TYPE_In, 0 }, /* In */ 220a40ea1a7SYuri Pankov { NULL, TYPE_Li, 0 }, /* Li */ 221a40ea1a7SYuri Pankov { parse_mdoc_Nd, 0, 0 }, /* Nd */ 222a40ea1a7SYuri Pankov { parse_mdoc_Nm, 0, 0 }, /* Nm */ 223a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Op */ 224a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ot */ 225a40ea1a7SYuri Pankov { NULL, TYPE_Pa, NODE_NOSRC }, /* Pa */ 226a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Rv */ 227a40ea1a7SYuri Pankov { NULL, TYPE_St, 0 }, /* St */ 228a40ea1a7SYuri Pankov { parse_mdoc_Va, TYPE_Va, 0 }, /* Va */ 229a40ea1a7SYuri Pankov { parse_mdoc_Va, TYPE_Vt, 0 }, /* Vt */ 230a40ea1a7SYuri Pankov { parse_mdoc_Xr, 0, 0 }, /* Xr */ 231a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %A */ 232a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %B */ 233a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %D */ 234a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %I */ 235a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %J */ 236a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %N */ 237a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %O */ 238a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %P */ 239a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %R */ 240a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %T */ 241a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %V */ 242a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ac */ 243a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ao */ 244a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Aq */ 245a40ea1a7SYuri Pankov { NULL, TYPE_At, 0 }, /* At */ 246a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bc */ 247a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bf */ 248a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bo */ 249a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bq */ 250a40ea1a7SYuri Pankov { NULL, TYPE_Bsx, NODE_NOSRC }, /* Bsx */ 251a40ea1a7SYuri Pankov { NULL, TYPE_Bx, NODE_NOSRC }, /* Bx */ 252a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Db */ 253a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Dc */ 254a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Do */ 255a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Dq */ 256a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ec */ 257a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ef */ 258a40ea1a7SYuri Pankov { NULL, TYPE_Em, 0 }, /* Em */ 259a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Eo */ 260a40ea1a7SYuri Pankov { NULL, TYPE_Fx, NODE_NOSRC }, /* Fx */ 261a40ea1a7SYuri Pankov { NULL, TYPE_Ms, 0 }, /* Ms */ 262a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* No */ 263a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ns */ 264a40ea1a7SYuri Pankov { NULL, TYPE_Nx, NODE_NOSRC }, /* Nx */ 265a40ea1a7SYuri Pankov { NULL, TYPE_Ox, NODE_NOSRC }, /* Ox */ 266a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Pc */ 267a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Pf */ 268a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Po */ 269a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Pq */ 270a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Qc */ 271a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ql */ 272a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Qo */ 273a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Qq */ 274a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Re */ 275a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Rs */ 276a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Sc */ 277a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* So */ 278a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Sq */ 279a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Sm */ 280a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Sx */ 281a40ea1a7SYuri Pankov { NULL, TYPE_Sy, 0 }, /* Sy */ 282a40ea1a7SYuri Pankov { NULL, TYPE_Tn, 0 }, /* Tn */ 283a40ea1a7SYuri Pankov { NULL, 0, NODE_NOSRC }, /* Ux */ 284a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Xc */ 285a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Xo */ 286a40ea1a7SYuri Pankov { parse_mdoc_Fo, 0, 0 }, /* Fo */ 287a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Fc */ 288a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Oo */ 289a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Oc */ 290a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bk */ 291a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ek */ 292a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bt */ 293a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Hf */ 294a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Fr */ 295a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ud */ 296a40ea1a7SYuri Pankov { NULL, TYPE_Lb, NODE_NOSRC }, /* Lb */ 297a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Lp */ 298a40ea1a7SYuri Pankov { NULL, TYPE_Lk, 0 }, /* Lk */ 299a40ea1a7SYuri Pankov { NULL, TYPE_Mt, NODE_NOSRC }, /* Mt */ 300a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Brq */ 301a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bro */ 302a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Brc */ 303a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %C */ 304a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Es */ 305a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* En */ 306a40ea1a7SYuri Pankov { NULL, TYPE_Dx, NODE_NOSRC }, /* Dx */ 307a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %Q */ 308a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %U */ 309a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ta */ 310a40ea1a7SYuri Pankov }; 311a40ea1a7SYuri Pankov 312a40ea1a7SYuri Pankov 313a40ea1a7SYuri Pankov int 314a40ea1a7SYuri Pankov mandocdb(int argc, char *argv[]) 315a40ea1a7SYuri Pankov { 316a40ea1a7SYuri Pankov struct manconf conf; 317a40ea1a7SYuri Pankov struct mparse *mp; 318a40ea1a7SYuri Pankov struct dba *dba; 319a40ea1a7SYuri Pankov const char *path_arg, *progname; 320a40ea1a7SYuri Pankov size_t j, sz; 321a40ea1a7SYuri Pankov int ch, i; 322a40ea1a7SYuri Pankov 323a40ea1a7SYuri Pankov #if HAVE_PLEDGE 3246640c13bSYuri Pankov if (pledge("stdio rpath wpath cpath", NULL) == -1) { 325a40ea1a7SYuri Pankov warn("pledge"); 326a40ea1a7SYuri Pankov return (int)MANDOCLEVEL_SYSERR; 327a40ea1a7SYuri Pankov } 328a40ea1a7SYuri Pankov #endif 329a40ea1a7SYuri Pankov 330a40ea1a7SYuri Pankov #if HAVE_SANDBOX_INIT 331a40ea1a7SYuri Pankov if (sandbox_init(kSBXProfileNoInternet, SANDBOX_NAMED, NULL) == -1) { 332a40ea1a7SYuri Pankov warnx("sandbox_init"); 333a40ea1a7SYuri Pankov return (int)MANDOCLEVEL_SYSERR; 334a40ea1a7SYuri Pankov } 335a40ea1a7SYuri Pankov #endif 336a40ea1a7SYuri Pankov 337a40ea1a7SYuri Pankov memset(&conf, 0, sizeof(conf)); 338a40ea1a7SYuri Pankov 339a40ea1a7SYuri Pankov /* 340a40ea1a7SYuri Pankov * We accept a few different invocations. 341a40ea1a7SYuri Pankov * The CHECKOP macro makes sure that invocation styles don't 342a40ea1a7SYuri Pankov * clobber each other. 343a40ea1a7SYuri Pankov */ 344a40ea1a7SYuri Pankov #define CHECKOP(_op, _ch) do \ 345a40ea1a7SYuri Pankov if (OP_DEFAULT != (_op)) { \ 346a40ea1a7SYuri Pankov warnx("-%c: Conflicting option", (_ch)); \ 347a40ea1a7SYuri Pankov goto usage; \ 348a40ea1a7SYuri Pankov } while (/*CONSTCOND*/0) 349a40ea1a7SYuri Pankov 350*cec8643bSMichal Nowak mparse_options = MPARSE_VALIDATE; 351a40ea1a7SYuri Pankov path_arg = NULL; 352a40ea1a7SYuri Pankov op = OP_DEFAULT; 353a40ea1a7SYuri Pankov 354a40ea1a7SYuri Pankov while (-1 != (ch = getopt(argc, argv, "aC:Dd:npQT:tu:v"))) 355a40ea1a7SYuri Pankov switch (ch) { 356a40ea1a7SYuri Pankov case 'a': 357a40ea1a7SYuri Pankov use_all = 1; 358a40ea1a7SYuri Pankov break; 359a40ea1a7SYuri Pankov case 'C': 360a40ea1a7SYuri Pankov CHECKOP(op, ch); 361a40ea1a7SYuri Pankov path_arg = optarg; 362a40ea1a7SYuri Pankov op = OP_CONFFILE; 363a40ea1a7SYuri Pankov break; 364a40ea1a7SYuri Pankov case 'D': 365a40ea1a7SYuri Pankov debug++; 366a40ea1a7SYuri Pankov break; 367a40ea1a7SYuri Pankov case 'd': 368a40ea1a7SYuri Pankov CHECKOP(op, ch); 369a40ea1a7SYuri Pankov path_arg = optarg; 370a40ea1a7SYuri Pankov op = OP_UPDATE; 371a40ea1a7SYuri Pankov break; 372a40ea1a7SYuri Pankov case 'n': 373a40ea1a7SYuri Pankov nodb = 1; 374a40ea1a7SYuri Pankov break; 375a40ea1a7SYuri Pankov case 'p': 376a40ea1a7SYuri Pankov warnings = 1; 377a40ea1a7SYuri Pankov break; 378a40ea1a7SYuri Pankov case 'Q': 379a40ea1a7SYuri Pankov mparse_options |= MPARSE_QUICK; 380a40ea1a7SYuri Pankov break; 381a40ea1a7SYuri Pankov case 'T': 382a40ea1a7SYuri Pankov if (strcmp(optarg, "utf8")) { 383a40ea1a7SYuri Pankov warnx("-T%s: Unsupported output format", 384a40ea1a7SYuri Pankov optarg); 385a40ea1a7SYuri Pankov goto usage; 386a40ea1a7SYuri Pankov } 387a40ea1a7SYuri Pankov write_utf8 = 1; 388a40ea1a7SYuri Pankov break; 389a40ea1a7SYuri Pankov case 't': 390a40ea1a7SYuri Pankov CHECKOP(op, ch); 391a40ea1a7SYuri Pankov dup2(STDOUT_FILENO, STDERR_FILENO); 392a40ea1a7SYuri Pankov op = OP_TEST; 393a40ea1a7SYuri Pankov nodb = warnings = 1; 394a40ea1a7SYuri Pankov break; 395a40ea1a7SYuri Pankov case 'u': 396a40ea1a7SYuri Pankov CHECKOP(op, ch); 397a40ea1a7SYuri Pankov path_arg = optarg; 398a40ea1a7SYuri Pankov op = OP_DELETE; 399a40ea1a7SYuri Pankov break; 400a40ea1a7SYuri Pankov case 'v': 401a40ea1a7SYuri Pankov /* Compatibility with espie@'s makewhatis. */ 402a40ea1a7SYuri Pankov break; 403a40ea1a7SYuri Pankov default: 404a40ea1a7SYuri Pankov goto usage; 405a40ea1a7SYuri Pankov } 406a40ea1a7SYuri Pankov 407a40ea1a7SYuri Pankov argc -= optind; 408a40ea1a7SYuri Pankov argv += optind; 409a40ea1a7SYuri Pankov 410a40ea1a7SYuri Pankov #if HAVE_PLEDGE 411a40ea1a7SYuri Pankov if (nodb) { 412a40ea1a7SYuri Pankov if (pledge("stdio rpath", NULL) == -1) { 413a40ea1a7SYuri Pankov warn("pledge"); 414a40ea1a7SYuri Pankov return (int)MANDOCLEVEL_SYSERR; 415a40ea1a7SYuri Pankov } 416a40ea1a7SYuri Pankov } 417a40ea1a7SYuri Pankov #endif 418a40ea1a7SYuri Pankov 419a40ea1a7SYuri Pankov if (OP_CONFFILE == op && argc > 0) { 420a40ea1a7SYuri Pankov warnx("-C: Too many arguments"); 421a40ea1a7SYuri Pankov goto usage; 422a40ea1a7SYuri Pankov } 423a40ea1a7SYuri Pankov 424a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_OK; 425a40ea1a7SYuri Pankov mchars_alloc(); 426*cec8643bSMichal Nowak mp = mparse_alloc(mparse_options, MANDOC_OS_OTHER, NULL); 427a40ea1a7SYuri Pankov mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev)); 428a40ea1a7SYuri Pankov mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file)); 429a40ea1a7SYuri Pankov 430a40ea1a7SYuri Pankov if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) { 431a40ea1a7SYuri Pankov 432a40ea1a7SYuri Pankov /* 433a40ea1a7SYuri Pankov * Most of these deal with a specific directory. 434a40ea1a7SYuri Pankov * Jump into that directory first. 435a40ea1a7SYuri Pankov */ 436a40ea1a7SYuri Pankov if (OP_TEST != op && 0 == set_basedir(path_arg, 1)) 437a40ea1a7SYuri Pankov goto out; 438a40ea1a7SYuri Pankov 439a40ea1a7SYuri Pankov dba = nodb ? dba_new(128) : dba_read(MANDOC_DB); 440a40ea1a7SYuri Pankov if (dba != NULL) { 441a40ea1a7SYuri Pankov /* 442a40ea1a7SYuri Pankov * The existing database is usable. Process 443a40ea1a7SYuri Pankov * all files specified on the command-line. 444a40ea1a7SYuri Pankov */ 445a40ea1a7SYuri Pankov use_all = 1; 446a40ea1a7SYuri Pankov for (i = 0; i < argc; i++) 447a40ea1a7SYuri Pankov filescan(argv[i]); 448a40ea1a7SYuri Pankov if (nodb == 0) 449a40ea1a7SYuri Pankov dbprune(dba); 450a40ea1a7SYuri Pankov } else { 451a40ea1a7SYuri Pankov /* Database missing or corrupt. */ 452a40ea1a7SYuri Pankov if (op != OP_UPDATE || errno != ENOENT) 453a40ea1a7SYuri Pankov say(MANDOC_DB, "%s: Automatically recreating" 454a40ea1a7SYuri Pankov " from scratch", strerror(errno)); 455a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_OK; 456a40ea1a7SYuri Pankov op = OP_DEFAULT; 457a40ea1a7SYuri Pankov if (0 == treescan()) 458a40ea1a7SYuri Pankov goto out; 459a40ea1a7SYuri Pankov dba = dba_new(128); 460a40ea1a7SYuri Pankov } 461a40ea1a7SYuri Pankov if (OP_DELETE != op) 462a40ea1a7SYuri Pankov mpages_merge(dba, mp); 463a40ea1a7SYuri Pankov if (nodb == 0) 464a40ea1a7SYuri Pankov dbwrite(dba); 465a40ea1a7SYuri Pankov dba_free(dba); 466a40ea1a7SYuri Pankov } else { 467a40ea1a7SYuri Pankov /* 468a40ea1a7SYuri Pankov * If we have arguments, use them as our manpaths. 469a40ea1a7SYuri Pankov * If we don't, use man.conf(5). 470a40ea1a7SYuri Pankov */ 471a40ea1a7SYuri Pankov if (argc > 0) { 472a40ea1a7SYuri Pankov conf.manpath.paths = mandoc_reallocarray(NULL, 473a40ea1a7SYuri Pankov argc, sizeof(char *)); 474a40ea1a7SYuri Pankov conf.manpath.sz = (size_t)argc; 475a40ea1a7SYuri Pankov for (i = 0; i < argc; i++) 476a40ea1a7SYuri Pankov conf.manpath.paths[i] = mandoc_strdup(argv[i]); 477a40ea1a7SYuri Pankov } else 478a40ea1a7SYuri Pankov manconf_parse(&conf, path_arg, NULL, NULL); 479a40ea1a7SYuri Pankov 480a40ea1a7SYuri Pankov if (conf.manpath.sz == 0) { 481a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_BADARG; 482a40ea1a7SYuri Pankov say("", "Empty manpath"); 483a40ea1a7SYuri Pankov } 484a40ea1a7SYuri Pankov 485a40ea1a7SYuri Pankov /* 486a40ea1a7SYuri Pankov * First scan the tree rooted at a base directory, then 487a40ea1a7SYuri Pankov * build a new database and finally move it into place. 488a40ea1a7SYuri Pankov * Ignore zero-length directories and strip trailing 489a40ea1a7SYuri Pankov * slashes. 490a40ea1a7SYuri Pankov */ 491a40ea1a7SYuri Pankov for (j = 0; j < conf.manpath.sz; j++) { 492a40ea1a7SYuri Pankov sz = strlen(conf.manpath.paths[j]); 493a40ea1a7SYuri Pankov if (sz && conf.manpath.paths[j][sz - 1] == '/') 494a40ea1a7SYuri Pankov conf.manpath.paths[j][--sz] = '\0'; 495a40ea1a7SYuri Pankov if (0 == sz) 496a40ea1a7SYuri Pankov continue; 497a40ea1a7SYuri Pankov 498a40ea1a7SYuri Pankov if (j) { 499a40ea1a7SYuri Pankov mandoc_ohash_init(&mpages, 6, 500a40ea1a7SYuri Pankov offsetof(struct mpage, inodev)); 501a40ea1a7SYuri Pankov mandoc_ohash_init(&mlinks, 6, 502a40ea1a7SYuri Pankov offsetof(struct mlink, file)); 503a40ea1a7SYuri Pankov } 504a40ea1a7SYuri Pankov 505a40ea1a7SYuri Pankov if ( ! set_basedir(conf.manpath.paths[j], argc > 0)) 506a40ea1a7SYuri Pankov continue; 507a40ea1a7SYuri Pankov if (0 == treescan()) 508a40ea1a7SYuri Pankov continue; 509a40ea1a7SYuri Pankov dba = dba_new(128); 510a40ea1a7SYuri Pankov mpages_merge(dba, mp); 511a40ea1a7SYuri Pankov if (nodb == 0) 512a40ea1a7SYuri Pankov dbwrite(dba); 513a40ea1a7SYuri Pankov dba_free(dba); 514a40ea1a7SYuri Pankov 515a40ea1a7SYuri Pankov if (j + 1 < conf.manpath.sz) { 516a40ea1a7SYuri Pankov mpages_free(); 517a40ea1a7SYuri Pankov ohash_delete(&mpages); 518a40ea1a7SYuri Pankov ohash_delete(&mlinks); 519a40ea1a7SYuri Pankov } 520a40ea1a7SYuri Pankov } 521a40ea1a7SYuri Pankov } 522a40ea1a7SYuri Pankov out: 523a40ea1a7SYuri Pankov manconf_free(&conf); 524a40ea1a7SYuri Pankov mparse_free(mp); 525a40ea1a7SYuri Pankov mchars_free(); 526a40ea1a7SYuri Pankov mpages_free(); 527a40ea1a7SYuri Pankov ohash_delete(&mpages); 528a40ea1a7SYuri Pankov ohash_delete(&mlinks); 529a40ea1a7SYuri Pankov return exitcode; 530a40ea1a7SYuri Pankov usage: 531a40ea1a7SYuri Pankov progname = getprogname(); 532a40ea1a7SYuri Pankov fprintf(stderr, "usage: %s [-aDnpQ] [-C file] [-Tutf8]\n" 533a40ea1a7SYuri Pankov " %s [-aDnpQ] [-Tutf8] dir ...\n" 534a40ea1a7SYuri Pankov " %s [-DnpQ] [-Tutf8] -d dir [file ...]\n" 535a40ea1a7SYuri Pankov " %s [-Dnp] -u dir [file ...]\n" 536a40ea1a7SYuri Pankov " %s [-Q] -t file ...\n", 537a40ea1a7SYuri Pankov progname, progname, progname, progname, progname); 538a40ea1a7SYuri Pankov 539a40ea1a7SYuri Pankov return (int)MANDOCLEVEL_BADARG; 540a40ea1a7SYuri Pankov } 541a40ea1a7SYuri Pankov 542a40ea1a7SYuri Pankov /* 543a40ea1a7SYuri Pankov * To get a singly linked list in alpha order while inserting entries 544a40ea1a7SYuri Pankov * at the beginning, process directory entries in reverse alpha order. 545a40ea1a7SYuri Pankov */ 546a40ea1a7SYuri Pankov static int 547a40ea1a7SYuri Pankov #if HAVE_FTS_COMPARE_CONST 548a40ea1a7SYuri Pankov fts_compare(const FTSENT *const *a, const FTSENT *const *b) 549a40ea1a7SYuri Pankov #else 550a40ea1a7SYuri Pankov fts_compare(const FTSENT **a, const FTSENT **b) 551a40ea1a7SYuri Pankov #endif 552a40ea1a7SYuri Pankov { 553a40ea1a7SYuri Pankov return -strcmp((*a)->fts_name, (*b)->fts_name); 554a40ea1a7SYuri Pankov } 555a40ea1a7SYuri Pankov 556a40ea1a7SYuri Pankov /* 557a40ea1a7SYuri Pankov * Scan a directory tree rooted at "basedir" for manpages. 558a40ea1a7SYuri Pankov * We use fts(), scanning directory parts along the way for clues to our 559a40ea1a7SYuri Pankov * section and architecture. 560a40ea1a7SYuri Pankov * 561a40ea1a7SYuri Pankov * If use_all has been specified, grok all files. 562a40ea1a7SYuri Pankov * If not, sanitise paths to the following: 563a40ea1a7SYuri Pankov * 564a40ea1a7SYuri Pankov * [./]man*[/<arch>]/<name>.<section> 565a40ea1a7SYuri Pankov * or 566a40ea1a7SYuri Pankov * [./]cat<section>[/<arch>]/<name>.0 567a40ea1a7SYuri Pankov * 568a40ea1a7SYuri Pankov * TODO: accommodate for multi-language directories. 569a40ea1a7SYuri Pankov */ 570a40ea1a7SYuri Pankov static int 571a40ea1a7SYuri Pankov treescan(void) 572a40ea1a7SYuri Pankov { 573a40ea1a7SYuri Pankov char buf[PATH_MAX]; 574a40ea1a7SYuri Pankov FTS *f; 575a40ea1a7SYuri Pankov FTSENT *ff; 576a40ea1a7SYuri Pankov struct mlink *mlink; 577a40ea1a7SYuri Pankov int gzip; 578a40ea1a7SYuri Pankov enum form dform; 579a40ea1a7SYuri Pankov char *dsec, *arch, *fsec, *cp; 580a40ea1a7SYuri Pankov const char *path; 581a40ea1a7SYuri Pankov const char *argv[2]; 582a40ea1a7SYuri Pankov 583a40ea1a7SYuri Pankov argv[0] = "."; 584a40ea1a7SYuri Pankov argv[1] = NULL; 585a40ea1a7SYuri Pankov 586a40ea1a7SYuri Pankov f = fts_open((char * const *)argv, FTS_PHYSICAL | FTS_NOCHDIR, 587a40ea1a7SYuri Pankov fts_compare); 588a40ea1a7SYuri Pankov if (f == NULL) { 589a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 590a40ea1a7SYuri Pankov say("", "&fts_open"); 591a40ea1a7SYuri Pankov return 0; 592a40ea1a7SYuri Pankov } 593a40ea1a7SYuri Pankov 594a40ea1a7SYuri Pankov dsec = arch = NULL; 595a40ea1a7SYuri Pankov dform = FORM_NONE; 596a40ea1a7SYuri Pankov 597a40ea1a7SYuri Pankov while ((ff = fts_read(f)) != NULL) { 598a40ea1a7SYuri Pankov path = ff->fts_path + 2; 599a40ea1a7SYuri Pankov switch (ff->fts_info) { 600a40ea1a7SYuri Pankov 601a40ea1a7SYuri Pankov /* 602a40ea1a7SYuri Pankov * Symbolic links require various sanity checks, 603a40ea1a7SYuri Pankov * then get handled just like regular files. 604a40ea1a7SYuri Pankov */ 605a40ea1a7SYuri Pankov case FTS_SL: 606a40ea1a7SYuri Pankov if (realpath(path, buf) == NULL) { 607a40ea1a7SYuri Pankov if (warnings) 608a40ea1a7SYuri Pankov say(path, "&realpath"); 609a40ea1a7SYuri Pankov continue; 610a40ea1a7SYuri Pankov } 611a40ea1a7SYuri Pankov if (strstr(buf, basedir) != buf 612a40ea1a7SYuri Pankov #ifdef HOMEBREWDIR 613a40ea1a7SYuri Pankov && strstr(buf, HOMEBREWDIR) != buf 614a40ea1a7SYuri Pankov #endif 615a40ea1a7SYuri Pankov ) { 616a40ea1a7SYuri Pankov if (warnings) say("", 617a40ea1a7SYuri Pankov "%s: outside base directory", buf); 618a40ea1a7SYuri Pankov continue; 619a40ea1a7SYuri Pankov } 620a40ea1a7SYuri Pankov /* Use logical inode to avoid mpages dupe. */ 621a40ea1a7SYuri Pankov if (stat(path, ff->fts_statp) == -1) { 622a40ea1a7SYuri Pankov if (warnings) 623a40ea1a7SYuri Pankov say(path, "&stat"); 624a40ea1a7SYuri Pankov continue; 625a40ea1a7SYuri Pankov } 626a40ea1a7SYuri Pankov /* FALLTHROUGH */ 627a40ea1a7SYuri Pankov 628a40ea1a7SYuri Pankov /* 629a40ea1a7SYuri Pankov * If we're a regular file, add an mlink by using the 630a40ea1a7SYuri Pankov * stored directory data and handling the filename. 631a40ea1a7SYuri Pankov */ 632a40ea1a7SYuri Pankov case FTS_F: 633a40ea1a7SYuri Pankov if ( ! strcmp(path, MANDOC_DB)) 634a40ea1a7SYuri Pankov continue; 635a40ea1a7SYuri Pankov if ( ! use_all && ff->fts_level < 2) { 636a40ea1a7SYuri Pankov if (warnings) 637a40ea1a7SYuri Pankov say(path, "Extraneous file"); 638a40ea1a7SYuri Pankov continue; 639a40ea1a7SYuri Pankov } 640a40ea1a7SYuri Pankov gzip = 0; 641a40ea1a7SYuri Pankov fsec = NULL; 642a40ea1a7SYuri Pankov while (fsec == NULL) { 643a40ea1a7SYuri Pankov fsec = strrchr(ff->fts_name, '.'); 644a40ea1a7SYuri Pankov if (fsec == NULL || strcmp(fsec+1, "gz")) 645a40ea1a7SYuri Pankov break; 646a40ea1a7SYuri Pankov gzip = 1; 647a40ea1a7SYuri Pankov *fsec = '\0'; 648a40ea1a7SYuri Pankov fsec = NULL; 649a40ea1a7SYuri Pankov } 650a40ea1a7SYuri Pankov if (fsec == NULL) { 651a40ea1a7SYuri Pankov if ( ! use_all) { 652a40ea1a7SYuri Pankov if (warnings) 653a40ea1a7SYuri Pankov say(path, 654a40ea1a7SYuri Pankov "No filename suffix"); 655a40ea1a7SYuri Pankov continue; 656a40ea1a7SYuri Pankov } 657a40ea1a7SYuri Pankov } else if ( ! strcmp(++fsec, "html")) { 658a40ea1a7SYuri Pankov if (warnings) 659a40ea1a7SYuri Pankov say(path, "Skip html"); 660a40ea1a7SYuri Pankov continue; 661a40ea1a7SYuri Pankov } else if ( ! strcmp(fsec, "ps")) { 662a40ea1a7SYuri Pankov if (warnings) 663a40ea1a7SYuri Pankov say(path, "Skip ps"); 664a40ea1a7SYuri Pankov continue; 665a40ea1a7SYuri Pankov } else if ( ! strcmp(fsec, "pdf")) { 666a40ea1a7SYuri Pankov if (warnings) 667a40ea1a7SYuri Pankov say(path, "Skip pdf"); 668a40ea1a7SYuri Pankov continue; 669a40ea1a7SYuri Pankov } else if ( ! use_all && 670a40ea1a7SYuri Pankov ((dform == FORM_SRC && 671a40ea1a7SYuri Pankov strncmp(fsec, dsec, strlen(dsec))) || 672a40ea1a7SYuri Pankov (dform == FORM_CAT && strcmp(fsec, "0")))) { 673a40ea1a7SYuri Pankov if (warnings) 674a40ea1a7SYuri Pankov say(path, "Wrong filename suffix"); 675a40ea1a7SYuri Pankov continue; 676a40ea1a7SYuri Pankov } else 677a40ea1a7SYuri Pankov fsec[-1] = '\0'; 678a40ea1a7SYuri Pankov 679a40ea1a7SYuri Pankov mlink = mandoc_calloc(1, sizeof(struct mlink)); 680a40ea1a7SYuri Pankov if (strlcpy(mlink->file, path, 681a40ea1a7SYuri Pankov sizeof(mlink->file)) >= 682a40ea1a7SYuri Pankov sizeof(mlink->file)) { 683a40ea1a7SYuri Pankov say(path, "Filename too long"); 684a40ea1a7SYuri Pankov free(mlink); 685a40ea1a7SYuri Pankov continue; 686a40ea1a7SYuri Pankov } 687a40ea1a7SYuri Pankov mlink->dform = dform; 688a40ea1a7SYuri Pankov mlink->dsec = dsec; 689a40ea1a7SYuri Pankov mlink->arch = arch; 690a40ea1a7SYuri Pankov mlink->name = ff->fts_name; 691a40ea1a7SYuri Pankov mlink->fsec = fsec; 692a40ea1a7SYuri Pankov mlink->gzip = gzip; 693a40ea1a7SYuri Pankov mlink_add(mlink, ff->fts_statp); 694a40ea1a7SYuri Pankov continue; 695a40ea1a7SYuri Pankov 696a40ea1a7SYuri Pankov case FTS_D: 697a40ea1a7SYuri Pankov case FTS_DP: 698a40ea1a7SYuri Pankov break; 699a40ea1a7SYuri Pankov 700a40ea1a7SYuri Pankov default: 701a40ea1a7SYuri Pankov if (warnings) 702a40ea1a7SYuri Pankov say(path, "Not a regular file"); 703a40ea1a7SYuri Pankov continue; 704a40ea1a7SYuri Pankov } 705a40ea1a7SYuri Pankov 706a40ea1a7SYuri Pankov switch (ff->fts_level) { 707a40ea1a7SYuri Pankov case 0: 708a40ea1a7SYuri Pankov /* Ignore the root directory. */ 709a40ea1a7SYuri Pankov break; 710a40ea1a7SYuri Pankov case 1: 711a40ea1a7SYuri Pankov /* 712a40ea1a7SYuri Pankov * This might contain manX/ or catX/. 713a40ea1a7SYuri Pankov * Try to infer this from the name. 714a40ea1a7SYuri Pankov * If we're not in use_all, enforce it. 715a40ea1a7SYuri Pankov */ 716a40ea1a7SYuri Pankov cp = ff->fts_name; 717a40ea1a7SYuri Pankov if (ff->fts_info == FTS_DP) { 718a40ea1a7SYuri Pankov dform = FORM_NONE; 719a40ea1a7SYuri Pankov dsec = NULL; 720a40ea1a7SYuri Pankov break; 721a40ea1a7SYuri Pankov } 722a40ea1a7SYuri Pankov 723a40ea1a7SYuri Pankov if ( ! strncmp(cp, "man", 3)) { 724a40ea1a7SYuri Pankov dform = FORM_SRC; 725a40ea1a7SYuri Pankov dsec = cp + 3; 726a40ea1a7SYuri Pankov } else if ( ! strncmp(cp, "cat", 3)) { 727a40ea1a7SYuri Pankov dform = FORM_CAT; 728a40ea1a7SYuri Pankov dsec = cp + 3; 729a40ea1a7SYuri Pankov } else { 730a40ea1a7SYuri Pankov dform = FORM_NONE; 731a40ea1a7SYuri Pankov dsec = NULL; 732a40ea1a7SYuri Pankov } 733a40ea1a7SYuri Pankov 734a40ea1a7SYuri Pankov if (dsec != NULL || use_all) 735a40ea1a7SYuri Pankov break; 736a40ea1a7SYuri Pankov 737a40ea1a7SYuri Pankov if (warnings) 738a40ea1a7SYuri Pankov say(path, "Unknown directory part"); 739a40ea1a7SYuri Pankov fts_set(f, ff, FTS_SKIP); 740a40ea1a7SYuri Pankov break; 741a40ea1a7SYuri Pankov case 2: 742a40ea1a7SYuri Pankov /* 743a40ea1a7SYuri Pankov * Possibly our architecture. 744a40ea1a7SYuri Pankov * If we're descending, keep tabs on it. 745a40ea1a7SYuri Pankov */ 746a40ea1a7SYuri Pankov if (ff->fts_info != FTS_DP && dsec != NULL) 747a40ea1a7SYuri Pankov arch = ff->fts_name; 748a40ea1a7SYuri Pankov else 749a40ea1a7SYuri Pankov arch = NULL; 750a40ea1a7SYuri Pankov break; 751a40ea1a7SYuri Pankov default: 752a40ea1a7SYuri Pankov if (ff->fts_info == FTS_DP || use_all) 753a40ea1a7SYuri Pankov break; 754a40ea1a7SYuri Pankov if (warnings) 755a40ea1a7SYuri Pankov say(path, "Extraneous directory part"); 756a40ea1a7SYuri Pankov fts_set(f, ff, FTS_SKIP); 757a40ea1a7SYuri Pankov break; 758a40ea1a7SYuri Pankov } 759a40ea1a7SYuri Pankov } 760a40ea1a7SYuri Pankov 761a40ea1a7SYuri Pankov fts_close(f); 762a40ea1a7SYuri Pankov return 1; 763a40ea1a7SYuri Pankov } 764a40ea1a7SYuri Pankov 765a40ea1a7SYuri Pankov /* 766a40ea1a7SYuri Pankov * Add a file to the mlinks table. 767a40ea1a7SYuri Pankov * Do not verify that it's a "valid" looking manpage (we'll do that 768a40ea1a7SYuri Pankov * later). 769a40ea1a7SYuri Pankov * 770a40ea1a7SYuri Pankov * Try to infer the manual section, architecture, and page name from the 771a40ea1a7SYuri Pankov * path, assuming it looks like 772a40ea1a7SYuri Pankov * 773a40ea1a7SYuri Pankov * [./]man*[/<arch>]/<name>.<section> 774a40ea1a7SYuri Pankov * or 775a40ea1a7SYuri Pankov * [./]cat<section>[/<arch>]/<name>.0 776a40ea1a7SYuri Pankov * 777a40ea1a7SYuri Pankov * See treescan() for the fts(3) version of this. 778a40ea1a7SYuri Pankov */ 779a40ea1a7SYuri Pankov static void 780a40ea1a7SYuri Pankov filescan(const char *file) 781a40ea1a7SYuri Pankov { 782a40ea1a7SYuri Pankov char buf[PATH_MAX]; 783a40ea1a7SYuri Pankov struct stat st; 784a40ea1a7SYuri Pankov struct mlink *mlink; 785a40ea1a7SYuri Pankov char *p, *start; 786a40ea1a7SYuri Pankov 787a40ea1a7SYuri Pankov assert(use_all); 788a40ea1a7SYuri Pankov 789a40ea1a7SYuri Pankov if (0 == strncmp(file, "./", 2)) 790a40ea1a7SYuri Pankov file += 2; 791a40ea1a7SYuri Pankov 792a40ea1a7SYuri Pankov /* 793a40ea1a7SYuri Pankov * We have to do lstat(2) before realpath(3) loses 794a40ea1a7SYuri Pankov * the information whether this is a symbolic link. 795a40ea1a7SYuri Pankov * We need to know that because for symbolic links, 796a40ea1a7SYuri Pankov * we want to use the orginal file name, while for 797a40ea1a7SYuri Pankov * regular files, we want to use the real path. 798a40ea1a7SYuri Pankov */ 799a40ea1a7SYuri Pankov if (-1 == lstat(file, &st)) { 800a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_BADARG; 801a40ea1a7SYuri Pankov say(file, "&lstat"); 802a40ea1a7SYuri Pankov return; 803a40ea1a7SYuri Pankov } else if (0 == ((S_IFREG | S_IFLNK) & st.st_mode)) { 804a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_BADARG; 805a40ea1a7SYuri Pankov say(file, "Not a regular file"); 806a40ea1a7SYuri Pankov return; 807a40ea1a7SYuri Pankov } 808a40ea1a7SYuri Pankov 809a40ea1a7SYuri Pankov /* 810a40ea1a7SYuri Pankov * We have to resolve the file name to the real path 811a40ea1a7SYuri Pankov * in any case for the base directory check. 812a40ea1a7SYuri Pankov */ 813a40ea1a7SYuri Pankov if (NULL == realpath(file, buf)) { 814a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_BADARG; 815a40ea1a7SYuri Pankov say(file, "&realpath"); 816a40ea1a7SYuri Pankov return; 817a40ea1a7SYuri Pankov } 818a40ea1a7SYuri Pankov 819a40ea1a7SYuri Pankov if (OP_TEST == op) 820a40ea1a7SYuri Pankov start = buf; 821a40ea1a7SYuri Pankov else if (strstr(buf, basedir) == buf) 822a40ea1a7SYuri Pankov start = buf + strlen(basedir); 823a40ea1a7SYuri Pankov #ifdef HOMEBREWDIR 824a40ea1a7SYuri Pankov else if (strstr(buf, HOMEBREWDIR) == buf) 825a40ea1a7SYuri Pankov start = buf; 826a40ea1a7SYuri Pankov #endif 827a40ea1a7SYuri Pankov else { 828a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_BADARG; 829a40ea1a7SYuri Pankov say("", "%s: outside base directory", buf); 830a40ea1a7SYuri Pankov return; 831a40ea1a7SYuri Pankov } 832a40ea1a7SYuri Pankov 833a40ea1a7SYuri Pankov /* 834a40ea1a7SYuri Pankov * Now we are sure the file is inside our tree. 835a40ea1a7SYuri Pankov * If it is a symbolic link, ignore the real path 836a40ea1a7SYuri Pankov * and use the original name. 837a40ea1a7SYuri Pankov * This implies passing stuff like "cat1/../man1/foo.1" 838a40ea1a7SYuri Pankov * on the command line won't work. So don't do that. 839a40ea1a7SYuri Pankov * Note the stat(2) can still fail if the link target 840a40ea1a7SYuri Pankov * doesn't exist. 841a40ea1a7SYuri Pankov */ 842a40ea1a7SYuri Pankov if (S_IFLNK & st.st_mode) { 843a40ea1a7SYuri Pankov if (-1 == stat(buf, &st)) { 844a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_BADARG; 845a40ea1a7SYuri Pankov say(file, "&stat"); 846a40ea1a7SYuri Pankov return; 847a40ea1a7SYuri Pankov } 848a40ea1a7SYuri Pankov if (strlcpy(buf, file, sizeof(buf)) >= sizeof(buf)) { 849a40ea1a7SYuri Pankov say(file, "Filename too long"); 850a40ea1a7SYuri Pankov return; 851a40ea1a7SYuri Pankov } 852a40ea1a7SYuri Pankov start = buf; 853a40ea1a7SYuri Pankov if (OP_TEST != op && strstr(buf, basedir) == buf) 854a40ea1a7SYuri Pankov start += strlen(basedir); 855a40ea1a7SYuri Pankov } 856a40ea1a7SYuri Pankov 857a40ea1a7SYuri Pankov mlink = mandoc_calloc(1, sizeof(struct mlink)); 858a40ea1a7SYuri Pankov mlink->dform = FORM_NONE; 859a40ea1a7SYuri Pankov if (strlcpy(mlink->file, start, sizeof(mlink->file)) >= 860a40ea1a7SYuri Pankov sizeof(mlink->file)) { 861a40ea1a7SYuri Pankov say(start, "Filename too long"); 862a40ea1a7SYuri Pankov free(mlink); 863a40ea1a7SYuri Pankov return; 864a40ea1a7SYuri Pankov } 865a40ea1a7SYuri Pankov 866a40ea1a7SYuri Pankov /* 867a40ea1a7SYuri Pankov * In test mode or when the original name is absolute 868a40ea1a7SYuri Pankov * but outside our tree, guess the base directory. 869a40ea1a7SYuri Pankov */ 870a40ea1a7SYuri Pankov 871a40ea1a7SYuri Pankov if (op == OP_TEST || (start == buf && *start == '/')) { 872a40ea1a7SYuri Pankov if (strncmp(buf, "man/", 4) == 0) 873a40ea1a7SYuri Pankov start = buf + 4; 874a40ea1a7SYuri Pankov else if ((start = strstr(buf, "/man/")) != NULL) 875a40ea1a7SYuri Pankov start += 5; 876a40ea1a7SYuri Pankov else 877a40ea1a7SYuri Pankov start = buf; 878a40ea1a7SYuri Pankov } 879a40ea1a7SYuri Pankov 880a40ea1a7SYuri Pankov /* 881a40ea1a7SYuri Pankov * First try to guess our directory structure. 882a40ea1a7SYuri Pankov * If we find a separator, try to look for man* or cat*. 883a40ea1a7SYuri Pankov * If we find one of these and what's underneath is a directory, 884a40ea1a7SYuri Pankov * assume it's an architecture. 885a40ea1a7SYuri Pankov */ 886a40ea1a7SYuri Pankov if (NULL != (p = strchr(start, '/'))) { 887a40ea1a7SYuri Pankov *p++ = '\0'; 888a40ea1a7SYuri Pankov if (0 == strncmp(start, "man", 3)) { 889a40ea1a7SYuri Pankov mlink->dform = FORM_SRC; 890a40ea1a7SYuri Pankov mlink->dsec = start + 3; 891a40ea1a7SYuri Pankov } else if (0 == strncmp(start, "cat", 3)) { 892a40ea1a7SYuri Pankov mlink->dform = FORM_CAT; 893a40ea1a7SYuri Pankov mlink->dsec = start + 3; 894a40ea1a7SYuri Pankov } 895a40ea1a7SYuri Pankov 896a40ea1a7SYuri Pankov start = p; 897a40ea1a7SYuri Pankov if (NULL != mlink->dsec && NULL != (p = strchr(start, '/'))) { 898a40ea1a7SYuri Pankov *p++ = '\0'; 899a40ea1a7SYuri Pankov mlink->arch = start; 900a40ea1a7SYuri Pankov start = p; 901a40ea1a7SYuri Pankov } 902a40ea1a7SYuri Pankov } 903a40ea1a7SYuri Pankov 904a40ea1a7SYuri Pankov /* 905a40ea1a7SYuri Pankov * Now check the file suffix. 906a40ea1a7SYuri Pankov * Suffix of `.0' indicates a catpage, `.1-9' is a manpage. 907a40ea1a7SYuri Pankov */ 908a40ea1a7SYuri Pankov p = strrchr(start, '\0'); 909a40ea1a7SYuri Pankov while (p-- > start && '/' != *p && '.' != *p) 910a40ea1a7SYuri Pankov /* Loop. */ ; 911a40ea1a7SYuri Pankov 912a40ea1a7SYuri Pankov if ('.' == *p) { 913a40ea1a7SYuri Pankov *p++ = '\0'; 914a40ea1a7SYuri Pankov mlink->fsec = p; 915a40ea1a7SYuri Pankov } 916a40ea1a7SYuri Pankov 917a40ea1a7SYuri Pankov /* 918a40ea1a7SYuri Pankov * Now try to parse the name. 919a40ea1a7SYuri Pankov * Use the filename portion of the path. 920a40ea1a7SYuri Pankov */ 921a40ea1a7SYuri Pankov mlink->name = start; 922a40ea1a7SYuri Pankov if (NULL != (p = strrchr(start, '/'))) { 923a40ea1a7SYuri Pankov mlink->name = p + 1; 924a40ea1a7SYuri Pankov *p = '\0'; 925a40ea1a7SYuri Pankov } 926a40ea1a7SYuri Pankov mlink_add(mlink, &st); 927a40ea1a7SYuri Pankov } 928a40ea1a7SYuri Pankov 929a40ea1a7SYuri Pankov static void 930a40ea1a7SYuri Pankov mlink_add(struct mlink *mlink, const struct stat *st) 931a40ea1a7SYuri Pankov { 932a40ea1a7SYuri Pankov struct inodev inodev; 933a40ea1a7SYuri Pankov struct mpage *mpage; 934a40ea1a7SYuri Pankov unsigned int slot; 935a40ea1a7SYuri Pankov 936a40ea1a7SYuri Pankov assert(NULL != mlink->file); 937a40ea1a7SYuri Pankov 938a40ea1a7SYuri Pankov mlink->dsec = mandoc_strdup(mlink->dsec ? mlink->dsec : ""); 939a40ea1a7SYuri Pankov mlink->arch = mandoc_strdup(mlink->arch ? mlink->arch : ""); 940a40ea1a7SYuri Pankov mlink->name = mandoc_strdup(mlink->name ? mlink->name : ""); 941a40ea1a7SYuri Pankov mlink->fsec = mandoc_strdup(mlink->fsec ? mlink->fsec : ""); 942a40ea1a7SYuri Pankov 943a40ea1a7SYuri Pankov if ('0' == *mlink->fsec) { 944a40ea1a7SYuri Pankov free(mlink->fsec); 945a40ea1a7SYuri Pankov mlink->fsec = mandoc_strdup(mlink->dsec); 946a40ea1a7SYuri Pankov mlink->fform = FORM_CAT; 947a40ea1a7SYuri Pankov } else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec) 948a40ea1a7SYuri Pankov mlink->fform = FORM_SRC; 949a40ea1a7SYuri Pankov else 950a40ea1a7SYuri Pankov mlink->fform = FORM_NONE; 951a40ea1a7SYuri Pankov 952a40ea1a7SYuri Pankov slot = ohash_qlookup(&mlinks, mlink->file); 953a40ea1a7SYuri Pankov assert(NULL == ohash_find(&mlinks, slot)); 954a40ea1a7SYuri Pankov ohash_insert(&mlinks, slot, mlink); 955a40ea1a7SYuri Pankov 956a40ea1a7SYuri Pankov memset(&inodev, 0, sizeof(inodev)); /* Clear padding. */ 957a40ea1a7SYuri Pankov inodev.st_ino = st->st_ino; 958a40ea1a7SYuri Pankov inodev.st_dev = st->st_dev; 959a40ea1a7SYuri Pankov slot = ohash_lookup_memory(&mpages, (char *)&inodev, 960a40ea1a7SYuri Pankov sizeof(struct inodev), inodev.st_ino); 961a40ea1a7SYuri Pankov mpage = ohash_find(&mpages, slot); 962a40ea1a7SYuri Pankov if (NULL == mpage) { 963a40ea1a7SYuri Pankov mpage = mandoc_calloc(1, sizeof(struct mpage)); 964a40ea1a7SYuri Pankov mpage->inodev.st_ino = inodev.st_ino; 965a40ea1a7SYuri Pankov mpage->inodev.st_dev = inodev.st_dev; 966a40ea1a7SYuri Pankov mpage->form = FORM_NONE; 967a40ea1a7SYuri Pankov mpage->next = mpage_head; 968a40ea1a7SYuri Pankov mpage_head = mpage; 969a40ea1a7SYuri Pankov ohash_insert(&mpages, slot, mpage); 970a40ea1a7SYuri Pankov } else 971a40ea1a7SYuri Pankov mlink->next = mpage->mlinks; 972a40ea1a7SYuri Pankov mpage->mlinks = mlink; 973a40ea1a7SYuri Pankov mlink->mpage = mpage; 974a40ea1a7SYuri Pankov } 975a40ea1a7SYuri Pankov 976a40ea1a7SYuri Pankov static void 977a40ea1a7SYuri Pankov mlink_free(struct mlink *mlink) 978a40ea1a7SYuri Pankov { 979a40ea1a7SYuri Pankov 980a40ea1a7SYuri Pankov free(mlink->dsec); 981a40ea1a7SYuri Pankov free(mlink->arch); 982a40ea1a7SYuri Pankov free(mlink->name); 983a40ea1a7SYuri Pankov free(mlink->fsec); 984a40ea1a7SYuri Pankov free(mlink); 985a40ea1a7SYuri Pankov } 986a40ea1a7SYuri Pankov 987a40ea1a7SYuri Pankov static void 988a40ea1a7SYuri Pankov mpages_free(void) 989a40ea1a7SYuri Pankov { 990a40ea1a7SYuri Pankov struct mpage *mpage; 991a40ea1a7SYuri Pankov struct mlink *mlink; 992a40ea1a7SYuri Pankov 993a40ea1a7SYuri Pankov while ((mpage = mpage_head) != NULL) { 994a40ea1a7SYuri Pankov while ((mlink = mpage->mlinks) != NULL) { 995a40ea1a7SYuri Pankov mpage->mlinks = mlink->next; 996a40ea1a7SYuri Pankov mlink_free(mlink); 997a40ea1a7SYuri Pankov } 998a40ea1a7SYuri Pankov mpage_head = mpage->next; 999a40ea1a7SYuri Pankov free(mpage->sec); 1000a40ea1a7SYuri Pankov free(mpage->arch); 1001a40ea1a7SYuri Pankov free(mpage->title); 1002a40ea1a7SYuri Pankov free(mpage->desc); 1003a40ea1a7SYuri Pankov free(mpage); 1004a40ea1a7SYuri Pankov } 1005a40ea1a7SYuri Pankov } 1006a40ea1a7SYuri Pankov 1007a40ea1a7SYuri Pankov /* 1008a40ea1a7SYuri Pankov * For each mlink to the mpage, check whether the path looks like 1009a40ea1a7SYuri Pankov * it is formatted, and if it does, check whether a source manual 1010a40ea1a7SYuri Pankov * exists by the same name, ignoring the suffix. 1011a40ea1a7SYuri Pankov * If both conditions hold, drop the mlink. 1012a40ea1a7SYuri Pankov */ 1013a40ea1a7SYuri Pankov static void 1014a40ea1a7SYuri Pankov mlinks_undupe(struct mpage *mpage) 1015a40ea1a7SYuri Pankov { 1016a40ea1a7SYuri Pankov char buf[PATH_MAX]; 1017a40ea1a7SYuri Pankov struct mlink **prev; 1018a40ea1a7SYuri Pankov struct mlink *mlink; 1019a40ea1a7SYuri Pankov char *bufp; 1020a40ea1a7SYuri Pankov 1021a40ea1a7SYuri Pankov mpage->form = FORM_CAT; 1022a40ea1a7SYuri Pankov prev = &mpage->mlinks; 1023a40ea1a7SYuri Pankov while (NULL != (mlink = *prev)) { 1024a40ea1a7SYuri Pankov if (FORM_CAT != mlink->dform) { 1025a40ea1a7SYuri Pankov mpage->form = FORM_NONE; 1026a40ea1a7SYuri Pankov goto nextlink; 1027a40ea1a7SYuri Pankov } 1028a40ea1a7SYuri Pankov (void)strlcpy(buf, mlink->file, sizeof(buf)); 1029a40ea1a7SYuri Pankov bufp = strstr(buf, "cat"); 1030a40ea1a7SYuri Pankov assert(NULL != bufp); 1031a40ea1a7SYuri Pankov memcpy(bufp, "man", 3); 1032a40ea1a7SYuri Pankov if (NULL != (bufp = strrchr(buf, '.'))) 1033a40ea1a7SYuri Pankov *++bufp = '\0'; 1034a40ea1a7SYuri Pankov (void)strlcat(buf, mlink->dsec, sizeof(buf)); 1035a40ea1a7SYuri Pankov if (NULL == ohash_find(&mlinks, 1036a40ea1a7SYuri Pankov ohash_qlookup(&mlinks, buf))) 1037a40ea1a7SYuri Pankov goto nextlink; 1038a40ea1a7SYuri Pankov if (warnings) 1039a40ea1a7SYuri Pankov say(mlink->file, "Man source exists: %s", buf); 1040a40ea1a7SYuri Pankov if (use_all) 1041a40ea1a7SYuri Pankov goto nextlink; 1042a40ea1a7SYuri Pankov *prev = mlink->next; 1043a40ea1a7SYuri Pankov mlink_free(mlink); 1044a40ea1a7SYuri Pankov continue; 1045a40ea1a7SYuri Pankov nextlink: 1046a40ea1a7SYuri Pankov prev = &(*prev)->next; 1047a40ea1a7SYuri Pankov } 1048a40ea1a7SYuri Pankov } 1049a40ea1a7SYuri Pankov 1050a40ea1a7SYuri Pankov static void 1051a40ea1a7SYuri Pankov mlink_check(struct mpage *mpage, struct mlink *mlink) 1052a40ea1a7SYuri Pankov { 1053a40ea1a7SYuri Pankov struct str *str; 1054a40ea1a7SYuri Pankov unsigned int slot; 1055a40ea1a7SYuri Pankov 1056a40ea1a7SYuri Pankov /* 1057a40ea1a7SYuri Pankov * Check whether the manual section given in a file 1058a40ea1a7SYuri Pankov * agrees with the directory where the file is located. 1059a40ea1a7SYuri Pankov * Some manuals have suffixes like (3p) on their 1060a40ea1a7SYuri Pankov * section number either inside the file or in the 1061a40ea1a7SYuri Pankov * directory name, some are linked into more than one 1062a40ea1a7SYuri Pankov * section, like encrypt(1) = makekey(8). 1063a40ea1a7SYuri Pankov */ 1064a40ea1a7SYuri Pankov 1065a40ea1a7SYuri Pankov if (FORM_SRC == mpage->form && 1066a40ea1a7SYuri Pankov strcasecmp(mpage->sec, mlink->dsec)) 1067a40ea1a7SYuri Pankov say(mlink->file, "Section \"%s\" manual in %s directory", 1068a40ea1a7SYuri Pankov mpage->sec, mlink->dsec); 1069a40ea1a7SYuri Pankov 1070a40ea1a7SYuri Pankov /* 1071a40ea1a7SYuri Pankov * Manual page directories exist for each kernel 1072a40ea1a7SYuri Pankov * architecture as returned by machine(1). 1073a40ea1a7SYuri Pankov * However, many manuals only depend on the 1074a40ea1a7SYuri Pankov * application architecture as returned by arch(1). 1075a40ea1a7SYuri Pankov * For example, some (2/ARM) manuals are shared 1076a40ea1a7SYuri Pankov * across the "armish" and "zaurus" kernel 1077a40ea1a7SYuri Pankov * architectures. 1078a40ea1a7SYuri Pankov * A few manuals are even shared across completely 1079a40ea1a7SYuri Pankov * different architectures, for example fdformat(1) 1080a40ea1a7SYuri Pankov * on amd64, i386, and sparc64. 1081a40ea1a7SYuri Pankov */ 1082a40ea1a7SYuri Pankov 1083a40ea1a7SYuri Pankov if (strcasecmp(mpage->arch, mlink->arch)) 1084a40ea1a7SYuri Pankov say(mlink->file, "Architecture \"%s\" manual in " 1085a40ea1a7SYuri Pankov "\"%s\" directory", mpage->arch, mlink->arch); 1086a40ea1a7SYuri Pankov 1087a40ea1a7SYuri Pankov /* 1088a40ea1a7SYuri Pankov * XXX 1089a40ea1a7SYuri Pankov * parse_cat() doesn't set NAME_TITLE yet. 1090a40ea1a7SYuri Pankov */ 1091a40ea1a7SYuri Pankov 1092a40ea1a7SYuri Pankov if (FORM_CAT == mpage->form) 1093a40ea1a7SYuri Pankov return; 1094a40ea1a7SYuri Pankov 1095a40ea1a7SYuri Pankov /* 1096a40ea1a7SYuri Pankov * Check whether this mlink 1097a40ea1a7SYuri Pankov * appears as a name in the NAME section. 1098a40ea1a7SYuri Pankov */ 1099a40ea1a7SYuri Pankov 1100a40ea1a7SYuri Pankov slot = ohash_qlookup(&names, mlink->name); 1101a40ea1a7SYuri Pankov str = ohash_find(&names, slot); 1102a40ea1a7SYuri Pankov assert(NULL != str); 1103a40ea1a7SYuri Pankov if ( ! (NAME_TITLE & str->mask)) 1104a40ea1a7SYuri Pankov say(mlink->file, "Name missing in NAME section"); 1105a40ea1a7SYuri Pankov } 1106a40ea1a7SYuri Pankov 1107a40ea1a7SYuri Pankov /* 1108a40ea1a7SYuri Pankov * Run through the files in the global vector "mpages" 1109a40ea1a7SYuri Pankov * and add them to the database specified in "basedir". 1110a40ea1a7SYuri Pankov * 1111a40ea1a7SYuri Pankov * This handles the parsing scheme itself, using the cues of directory 1112a40ea1a7SYuri Pankov * and filename to determine whether the file is parsable or not. 1113a40ea1a7SYuri Pankov */ 1114a40ea1a7SYuri Pankov static void 1115a40ea1a7SYuri Pankov mpages_merge(struct dba *dba, struct mparse *mp) 1116a40ea1a7SYuri Pankov { 1117a40ea1a7SYuri Pankov struct mpage *mpage, *mpage_dest; 1118a40ea1a7SYuri Pankov struct mlink *mlink, *mlink_dest; 1119*cec8643bSMichal Nowak struct roff_meta *meta; 1120a40ea1a7SYuri Pankov char *cp; 1121a40ea1a7SYuri Pankov int fd; 1122a40ea1a7SYuri Pankov 1123a40ea1a7SYuri Pankov for (mpage = mpage_head; mpage != NULL; mpage = mpage->next) { 1124a40ea1a7SYuri Pankov mlinks_undupe(mpage); 1125a40ea1a7SYuri Pankov if ((mlink = mpage->mlinks) == NULL) 1126a40ea1a7SYuri Pankov continue; 1127a40ea1a7SYuri Pankov 1128a40ea1a7SYuri Pankov name_mask = NAME_MASK; 1129a40ea1a7SYuri Pankov mandoc_ohash_init(&names, 4, offsetof(struct str, key)); 1130a40ea1a7SYuri Pankov mandoc_ohash_init(&strings, 6, offsetof(struct str, key)); 1131a40ea1a7SYuri Pankov mparse_reset(mp); 1132*cec8643bSMichal Nowak meta = NULL; 1133a40ea1a7SYuri Pankov 1134a40ea1a7SYuri Pankov if ((fd = mparse_open(mp, mlink->file)) == -1) { 1135a40ea1a7SYuri Pankov say(mlink->file, "&open"); 1136a40ea1a7SYuri Pankov goto nextpage; 1137a40ea1a7SYuri Pankov } 1138a40ea1a7SYuri Pankov 1139a40ea1a7SYuri Pankov /* 1140a40ea1a7SYuri Pankov * Interpret the file as mdoc(7) or man(7) source 1141a40ea1a7SYuri Pankov * code, unless it is known to be formatted. 1142a40ea1a7SYuri Pankov */ 1143a40ea1a7SYuri Pankov if (mlink->dform != FORM_CAT || mlink->fform != FORM_CAT) { 1144a40ea1a7SYuri Pankov mparse_readfd(mp, fd, mlink->file); 1145a40ea1a7SYuri Pankov close(fd); 1146a40ea1a7SYuri Pankov fd = -1; 1147*cec8643bSMichal Nowak meta = mparse_result(mp); 1148a40ea1a7SYuri Pankov } 1149a40ea1a7SYuri Pankov 1150*cec8643bSMichal Nowak if (meta != NULL && meta->sodest != NULL) { 1151a40ea1a7SYuri Pankov mlink_dest = ohash_find(&mlinks, 1152*cec8643bSMichal Nowak ohash_qlookup(&mlinks, meta->sodest)); 1153a40ea1a7SYuri Pankov if (mlink_dest == NULL) { 1154*cec8643bSMichal Nowak mandoc_asprintf(&cp, "%s.gz", meta->sodest); 1155a40ea1a7SYuri Pankov mlink_dest = ohash_find(&mlinks, 1156a40ea1a7SYuri Pankov ohash_qlookup(&mlinks, cp)); 1157a40ea1a7SYuri Pankov free(cp); 1158a40ea1a7SYuri Pankov } 1159a40ea1a7SYuri Pankov if (mlink_dest != NULL) { 1160a40ea1a7SYuri Pankov 1161a40ea1a7SYuri Pankov /* The .so target exists. */ 1162a40ea1a7SYuri Pankov 1163a40ea1a7SYuri Pankov mpage_dest = mlink_dest->mpage; 1164a40ea1a7SYuri Pankov while (1) { 1165a40ea1a7SYuri Pankov mlink->mpage = mpage_dest; 1166a40ea1a7SYuri Pankov 1167a40ea1a7SYuri Pankov /* 1168a40ea1a7SYuri Pankov * If the target was already 1169a40ea1a7SYuri Pankov * processed, add the links 1170a40ea1a7SYuri Pankov * to the database now. 1171a40ea1a7SYuri Pankov * Otherwise, this will 1172a40ea1a7SYuri Pankov * happen when we come 1173a40ea1a7SYuri Pankov * to the target. 1174a40ea1a7SYuri Pankov */ 1175a40ea1a7SYuri Pankov 1176a40ea1a7SYuri Pankov if (mpage_dest->dba != NULL) 1177a40ea1a7SYuri Pankov dbadd_mlink(mlink); 1178a40ea1a7SYuri Pankov 1179a40ea1a7SYuri Pankov if (mlink->next == NULL) 1180a40ea1a7SYuri Pankov break; 1181a40ea1a7SYuri Pankov mlink = mlink->next; 1182a40ea1a7SYuri Pankov } 1183a40ea1a7SYuri Pankov 1184a40ea1a7SYuri Pankov /* Move all links to the target. */ 1185a40ea1a7SYuri Pankov 1186a40ea1a7SYuri Pankov mlink->next = mlink_dest->next; 1187a40ea1a7SYuri Pankov mlink_dest->next = mpage->mlinks; 1188a40ea1a7SYuri Pankov mpage->mlinks = NULL; 1189a40ea1a7SYuri Pankov } 1190a40ea1a7SYuri Pankov goto nextpage; 1191*cec8643bSMichal Nowak } else if (meta != NULL && meta->macroset == MACROSET_MDOC) { 1192a40ea1a7SYuri Pankov mpage->form = FORM_SRC; 1193*cec8643bSMichal Nowak mpage->sec = meta->msec; 1194a40ea1a7SYuri Pankov mpage->sec = mandoc_strdup( 1195a40ea1a7SYuri Pankov mpage->sec == NULL ? "" : mpage->sec); 1196*cec8643bSMichal Nowak mpage->arch = meta->arch; 1197a40ea1a7SYuri Pankov mpage->arch = mandoc_strdup( 1198a40ea1a7SYuri Pankov mpage->arch == NULL ? "" : mpage->arch); 1199*cec8643bSMichal Nowak mpage->title = mandoc_strdup(meta->title); 1200*cec8643bSMichal Nowak } else if (meta != NULL && meta->macroset == MACROSET_MAN) { 1201*cec8643bSMichal Nowak if (*meta->msec != '\0' || *meta->title != '\0') { 1202a40ea1a7SYuri Pankov mpage->form = FORM_SRC; 1203*cec8643bSMichal Nowak mpage->sec = mandoc_strdup(meta->msec); 1204a40ea1a7SYuri Pankov mpage->arch = mandoc_strdup(mlink->arch); 1205*cec8643bSMichal Nowak mpage->title = mandoc_strdup(meta->title); 1206a40ea1a7SYuri Pankov } else 1207*cec8643bSMichal Nowak meta = NULL; 1208a40ea1a7SYuri Pankov } 1209a40ea1a7SYuri Pankov 1210a40ea1a7SYuri Pankov assert(mpage->desc == NULL); 1211*cec8643bSMichal Nowak if (meta == NULL) { 1212a40ea1a7SYuri Pankov mpage->form = FORM_CAT; 1213a40ea1a7SYuri Pankov mpage->sec = mandoc_strdup(mlink->dsec); 1214a40ea1a7SYuri Pankov mpage->arch = mandoc_strdup(mlink->arch); 1215a40ea1a7SYuri Pankov mpage->title = mandoc_strdup(mlink->name); 1216a40ea1a7SYuri Pankov parse_cat(mpage, fd); 1217*cec8643bSMichal Nowak } else if (meta->macroset == MACROSET_MDOC) 1218*cec8643bSMichal Nowak parse_mdoc(mpage, meta, meta->first); 1219a40ea1a7SYuri Pankov else 1220*cec8643bSMichal Nowak parse_man(mpage, meta, meta->first); 1221a40ea1a7SYuri Pankov if (mpage->desc == NULL) { 1222a40ea1a7SYuri Pankov mpage->desc = mandoc_strdup(mlink->name); 1223a40ea1a7SYuri Pankov if (warnings) 1224a40ea1a7SYuri Pankov say(mlink->file, "No one-line description, " 1225a40ea1a7SYuri Pankov "using filename \"%s\"", mlink->name); 1226a40ea1a7SYuri Pankov } 1227a40ea1a7SYuri Pankov 1228a40ea1a7SYuri Pankov for (mlink = mpage->mlinks; 1229a40ea1a7SYuri Pankov mlink != NULL; 1230a40ea1a7SYuri Pankov mlink = mlink->next) { 1231a40ea1a7SYuri Pankov putkey(mpage, mlink->name, NAME_FILE); 1232a40ea1a7SYuri Pankov if (warnings && !use_all) 1233a40ea1a7SYuri Pankov mlink_check(mpage, mlink); 1234a40ea1a7SYuri Pankov } 1235a40ea1a7SYuri Pankov 1236a40ea1a7SYuri Pankov dbadd(dba, mpage); 1237a40ea1a7SYuri Pankov 1238a40ea1a7SYuri Pankov nextpage: 1239a40ea1a7SYuri Pankov ohash_delete(&strings); 1240a40ea1a7SYuri Pankov ohash_delete(&names); 1241a40ea1a7SYuri Pankov } 1242a40ea1a7SYuri Pankov } 1243a40ea1a7SYuri Pankov 1244a40ea1a7SYuri Pankov static void 1245a40ea1a7SYuri Pankov parse_cat(struct mpage *mpage, int fd) 1246a40ea1a7SYuri Pankov { 1247a40ea1a7SYuri Pankov FILE *stream; 1248a40ea1a7SYuri Pankov struct mlink *mlink; 1249a40ea1a7SYuri Pankov char *line, *p, *title, *sec; 1250a40ea1a7SYuri Pankov size_t linesz, plen, titlesz; 1251a40ea1a7SYuri Pankov ssize_t len; 1252a40ea1a7SYuri Pankov int offs; 1253a40ea1a7SYuri Pankov 1254a40ea1a7SYuri Pankov mlink = mpage->mlinks; 1255a40ea1a7SYuri Pankov stream = fd == -1 ? fopen(mlink->file, "r") : fdopen(fd, "r"); 1256a40ea1a7SYuri Pankov if (stream == NULL) { 1257a40ea1a7SYuri Pankov if (fd != -1) 1258a40ea1a7SYuri Pankov close(fd); 1259a40ea1a7SYuri Pankov if (warnings) 1260a40ea1a7SYuri Pankov say(mlink->file, "&fopen"); 1261a40ea1a7SYuri Pankov return; 1262a40ea1a7SYuri Pankov } 1263a40ea1a7SYuri Pankov 1264a40ea1a7SYuri Pankov line = NULL; 1265a40ea1a7SYuri Pankov linesz = 0; 1266a40ea1a7SYuri Pankov 1267a40ea1a7SYuri Pankov /* Parse the section number from the header line. */ 1268a40ea1a7SYuri Pankov 1269a40ea1a7SYuri Pankov while (getline(&line, &linesz, stream) != -1) { 1270a40ea1a7SYuri Pankov if (*line == '\n') 1271a40ea1a7SYuri Pankov continue; 1272a40ea1a7SYuri Pankov if ((sec = strchr(line, '(')) == NULL) 1273a40ea1a7SYuri Pankov break; 1274a40ea1a7SYuri Pankov if ((p = strchr(++sec, ')')) == NULL) 1275a40ea1a7SYuri Pankov break; 1276a40ea1a7SYuri Pankov free(mpage->sec); 1277a40ea1a7SYuri Pankov mpage->sec = mandoc_strndup(sec, p - sec); 1278a40ea1a7SYuri Pankov if (warnings && *mlink->dsec != '\0' && 1279a40ea1a7SYuri Pankov strcasecmp(mpage->sec, mlink->dsec)) 1280a40ea1a7SYuri Pankov say(mlink->file, 1281a40ea1a7SYuri Pankov "Section \"%s\" manual in %s directory", 1282a40ea1a7SYuri Pankov mpage->sec, mlink->dsec); 1283a40ea1a7SYuri Pankov break; 1284a40ea1a7SYuri Pankov } 1285a40ea1a7SYuri Pankov 1286a40ea1a7SYuri Pankov /* Skip to first blank line. */ 1287a40ea1a7SYuri Pankov 1288a40ea1a7SYuri Pankov while (line == NULL || *line != '\n') 1289a40ea1a7SYuri Pankov if (getline(&line, &linesz, stream) == -1) 1290a40ea1a7SYuri Pankov break; 1291a40ea1a7SYuri Pankov 1292a40ea1a7SYuri Pankov /* 1293a40ea1a7SYuri Pankov * Assume the first line that is not indented 1294a40ea1a7SYuri Pankov * is the first section header. Skip to it. 1295a40ea1a7SYuri Pankov */ 1296a40ea1a7SYuri Pankov 1297a40ea1a7SYuri Pankov while (getline(&line, &linesz, stream) != -1) 1298a40ea1a7SYuri Pankov if (*line != '\n' && *line != ' ') 1299a40ea1a7SYuri Pankov break; 1300a40ea1a7SYuri Pankov 1301a40ea1a7SYuri Pankov /* 1302a40ea1a7SYuri Pankov * Read up until the next section into a buffer. 1303a40ea1a7SYuri Pankov * Strip the leading and trailing newline from each read line, 1304a40ea1a7SYuri Pankov * appending a trailing space. 1305a40ea1a7SYuri Pankov * Ignore empty (whitespace-only) lines. 1306a40ea1a7SYuri Pankov */ 1307a40ea1a7SYuri Pankov 1308a40ea1a7SYuri Pankov titlesz = 0; 1309a40ea1a7SYuri Pankov title = NULL; 1310a40ea1a7SYuri Pankov 1311a40ea1a7SYuri Pankov while ((len = getline(&line, &linesz, stream)) != -1) { 1312a40ea1a7SYuri Pankov if (*line != ' ') 1313a40ea1a7SYuri Pankov break; 1314a40ea1a7SYuri Pankov offs = 0; 1315a40ea1a7SYuri Pankov while (isspace((unsigned char)line[offs])) 1316a40ea1a7SYuri Pankov offs++; 1317a40ea1a7SYuri Pankov if (line[offs] == '\0') 1318a40ea1a7SYuri Pankov continue; 1319a40ea1a7SYuri Pankov title = mandoc_realloc(title, titlesz + len - offs); 1320a40ea1a7SYuri Pankov memcpy(title + titlesz, line + offs, len - offs); 1321a40ea1a7SYuri Pankov titlesz += len - offs; 1322a40ea1a7SYuri Pankov title[titlesz - 1] = ' '; 1323a40ea1a7SYuri Pankov } 1324a40ea1a7SYuri Pankov free(line); 1325a40ea1a7SYuri Pankov 1326a40ea1a7SYuri Pankov /* 1327a40ea1a7SYuri Pankov * If no page content can be found, or the input line 1328a40ea1a7SYuri Pankov * is already the next section header, or there is no 1329a40ea1a7SYuri Pankov * trailing newline, reuse the page title as the page 1330a40ea1a7SYuri Pankov * description. 1331a40ea1a7SYuri Pankov */ 1332a40ea1a7SYuri Pankov 1333a40ea1a7SYuri Pankov if (NULL == title || '\0' == *title) { 1334a40ea1a7SYuri Pankov if (warnings) 1335a40ea1a7SYuri Pankov say(mlink->file, "Cannot find NAME section"); 1336a40ea1a7SYuri Pankov fclose(stream); 1337a40ea1a7SYuri Pankov free(title); 1338a40ea1a7SYuri Pankov return; 1339a40ea1a7SYuri Pankov } 1340a40ea1a7SYuri Pankov 1341a40ea1a7SYuri Pankov title[titlesz - 1] = '\0'; 1342a40ea1a7SYuri Pankov 1343a40ea1a7SYuri Pankov /* 1344a40ea1a7SYuri Pankov * Skip to the first dash. 1345a40ea1a7SYuri Pankov * Use the remaining line as the description (no more than 70 1346a40ea1a7SYuri Pankov * bytes). 1347a40ea1a7SYuri Pankov */ 1348a40ea1a7SYuri Pankov 1349a40ea1a7SYuri Pankov if (NULL != (p = strstr(title, "- "))) { 1350a40ea1a7SYuri Pankov for (p += 2; ' ' == *p || '\b' == *p; p++) 1351a40ea1a7SYuri Pankov /* Skip to next word. */ ; 1352a40ea1a7SYuri Pankov } else { 1353a40ea1a7SYuri Pankov if (warnings) 1354a40ea1a7SYuri Pankov say(mlink->file, "No dash in title line, " 1355a40ea1a7SYuri Pankov "reusing \"%s\" as one-line description", title); 1356a40ea1a7SYuri Pankov p = title; 1357a40ea1a7SYuri Pankov } 1358a40ea1a7SYuri Pankov 1359a40ea1a7SYuri Pankov plen = strlen(p); 1360a40ea1a7SYuri Pankov 1361a40ea1a7SYuri Pankov /* Strip backspace-encoding from line. */ 1362a40ea1a7SYuri Pankov 1363a40ea1a7SYuri Pankov while (NULL != (line = memchr(p, '\b', plen))) { 1364a40ea1a7SYuri Pankov len = line - p; 1365a40ea1a7SYuri Pankov if (0 == len) { 1366a40ea1a7SYuri Pankov memmove(line, line + 1, plen--); 1367a40ea1a7SYuri Pankov continue; 1368a40ea1a7SYuri Pankov } 1369a40ea1a7SYuri Pankov memmove(line - 1, line + 1, plen - len); 1370a40ea1a7SYuri Pankov plen -= 2; 1371a40ea1a7SYuri Pankov } 1372a40ea1a7SYuri Pankov 13736640c13bSYuri Pankov /* 13746640c13bSYuri Pankov * Cut off excessive one-line descriptions. 13756640c13bSYuri Pankov * Bad pages are not worth better heuristics. 13766640c13bSYuri Pankov */ 13776640c13bSYuri Pankov 13786640c13bSYuri Pankov mpage->desc = mandoc_strndup(p, 150); 1379a40ea1a7SYuri Pankov fclose(stream); 1380a40ea1a7SYuri Pankov free(title); 1381a40ea1a7SYuri Pankov } 1382a40ea1a7SYuri Pankov 1383a40ea1a7SYuri Pankov /* 1384a40ea1a7SYuri Pankov * Put a type/word pair into the word database for this particular file. 1385a40ea1a7SYuri Pankov */ 1386a40ea1a7SYuri Pankov static void 1387a40ea1a7SYuri Pankov putkey(const struct mpage *mpage, char *value, uint64_t type) 1388a40ea1a7SYuri Pankov { 1389a40ea1a7SYuri Pankov putkeys(mpage, value, strlen(value), type); 1390a40ea1a7SYuri Pankov } 1391a40ea1a7SYuri Pankov 1392a40ea1a7SYuri Pankov /* 1393a40ea1a7SYuri Pankov * Grok all nodes at or below a certain mdoc node into putkey(). 1394a40ea1a7SYuri Pankov */ 1395a40ea1a7SYuri Pankov static void 1396a40ea1a7SYuri Pankov putmdockey(const struct mpage *mpage, 1397a40ea1a7SYuri Pankov const struct roff_node *n, uint64_t m, int taboo) 1398a40ea1a7SYuri Pankov { 1399a40ea1a7SYuri Pankov 1400a40ea1a7SYuri Pankov for ( ; NULL != n; n = n->next) { 1401a40ea1a7SYuri Pankov if (n->flags & taboo) 1402a40ea1a7SYuri Pankov continue; 1403a40ea1a7SYuri Pankov if (NULL != n->child) 1404a40ea1a7SYuri Pankov putmdockey(mpage, n->child, m, taboo); 1405a40ea1a7SYuri Pankov if (n->type == ROFFT_TEXT) 1406a40ea1a7SYuri Pankov putkey(mpage, n->string, m); 1407a40ea1a7SYuri Pankov } 1408a40ea1a7SYuri Pankov } 1409a40ea1a7SYuri Pankov 1410a40ea1a7SYuri Pankov static void 1411a40ea1a7SYuri Pankov parse_man(struct mpage *mpage, const struct roff_meta *meta, 1412a40ea1a7SYuri Pankov const struct roff_node *n) 1413a40ea1a7SYuri Pankov { 1414a40ea1a7SYuri Pankov const struct roff_node *head, *body; 1415a40ea1a7SYuri Pankov char *start, *title; 1416a40ea1a7SYuri Pankov char byte; 1417a40ea1a7SYuri Pankov size_t sz; 1418a40ea1a7SYuri Pankov 1419a40ea1a7SYuri Pankov if (n == NULL) 1420a40ea1a7SYuri Pankov return; 1421a40ea1a7SYuri Pankov 1422a40ea1a7SYuri Pankov /* 1423a40ea1a7SYuri Pankov * We're only searching for one thing: the first text child in 1424a40ea1a7SYuri Pankov * the BODY of a NAME section. Since we don't keep track of 1425a40ea1a7SYuri Pankov * sections in -man, run some hoops to find out whether we're in 1426a40ea1a7SYuri Pankov * the correct section or not. 1427a40ea1a7SYuri Pankov */ 1428a40ea1a7SYuri Pankov 1429a40ea1a7SYuri Pankov if (n->type == ROFFT_BODY && n->tok == MAN_SH) { 1430a40ea1a7SYuri Pankov body = n; 1431a40ea1a7SYuri Pankov if ((head = body->parent->head) != NULL && 1432a40ea1a7SYuri Pankov (head = head->child) != NULL && 1433a40ea1a7SYuri Pankov head->next == NULL && 1434a40ea1a7SYuri Pankov head->type == ROFFT_TEXT && 1435a40ea1a7SYuri Pankov strcmp(head->string, "NAME") == 0 && 1436a40ea1a7SYuri Pankov body->child != NULL) { 1437a40ea1a7SYuri Pankov 1438a40ea1a7SYuri Pankov /* 1439a40ea1a7SYuri Pankov * Suck the entire NAME section into memory. 1440a40ea1a7SYuri Pankov * Yes, we might run away. 1441a40ea1a7SYuri Pankov * But too many manuals have big, spread-out 1442a40ea1a7SYuri Pankov * NAME sections over many lines. 1443a40ea1a7SYuri Pankov */ 1444a40ea1a7SYuri Pankov 1445a40ea1a7SYuri Pankov title = NULL; 1446a40ea1a7SYuri Pankov deroff(&title, body); 1447a40ea1a7SYuri Pankov if (NULL == title) 1448a40ea1a7SYuri Pankov return; 1449a40ea1a7SYuri Pankov 1450a40ea1a7SYuri Pankov /* 1451a40ea1a7SYuri Pankov * Go through a special heuristic dance here. 1452a40ea1a7SYuri Pankov * Conventionally, one or more manual names are 1453a40ea1a7SYuri Pankov * comma-specified prior to a whitespace, then a 1454a40ea1a7SYuri Pankov * dash, then a description. Try to puzzle out 1455a40ea1a7SYuri Pankov * the name parts here. 1456a40ea1a7SYuri Pankov */ 1457a40ea1a7SYuri Pankov 1458a40ea1a7SYuri Pankov start = title; 1459a40ea1a7SYuri Pankov for ( ;; ) { 1460a40ea1a7SYuri Pankov sz = strcspn(start, " ,"); 1461a40ea1a7SYuri Pankov if ('\0' == start[sz]) 1462a40ea1a7SYuri Pankov break; 1463a40ea1a7SYuri Pankov 1464a40ea1a7SYuri Pankov byte = start[sz]; 1465a40ea1a7SYuri Pankov start[sz] = '\0'; 1466a40ea1a7SYuri Pankov 1467a40ea1a7SYuri Pankov /* 1468a40ea1a7SYuri Pankov * Assume a stray trailing comma in the 1469a40ea1a7SYuri Pankov * name list if a name begins with a dash. 1470a40ea1a7SYuri Pankov */ 1471a40ea1a7SYuri Pankov 1472a40ea1a7SYuri Pankov if ('-' == start[0] || 1473a40ea1a7SYuri Pankov ('\\' == start[0] && '-' == start[1])) 1474a40ea1a7SYuri Pankov break; 1475a40ea1a7SYuri Pankov 1476a40ea1a7SYuri Pankov putkey(mpage, start, NAME_TITLE); 1477a40ea1a7SYuri Pankov if ( ! (mpage->name_head_done || 1478a40ea1a7SYuri Pankov strcasecmp(start, meta->title))) { 1479a40ea1a7SYuri Pankov putkey(mpage, start, NAME_HEAD); 1480a40ea1a7SYuri Pankov mpage->name_head_done = 1; 1481a40ea1a7SYuri Pankov } 1482a40ea1a7SYuri Pankov 1483a40ea1a7SYuri Pankov if (' ' == byte) { 1484a40ea1a7SYuri Pankov start += sz + 1; 1485a40ea1a7SYuri Pankov break; 1486a40ea1a7SYuri Pankov } 1487a40ea1a7SYuri Pankov 1488a40ea1a7SYuri Pankov assert(',' == byte); 1489a40ea1a7SYuri Pankov start += sz + 1; 1490a40ea1a7SYuri Pankov while (' ' == *start) 1491a40ea1a7SYuri Pankov start++; 1492a40ea1a7SYuri Pankov } 1493a40ea1a7SYuri Pankov 1494a40ea1a7SYuri Pankov if (start == title) { 1495a40ea1a7SYuri Pankov putkey(mpage, start, NAME_TITLE); 1496a40ea1a7SYuri Pankov if ( ! (mpage->name_head_done || 1497a40ea1a7SYuri Pankov strcasecmp(start, meta->title))) { 1498a40ea1a7SYuri Pankov putkey(mpage, start, NAME_HEAD); 1499a40ea1a7SYuri Pankov mpage->name_head_done = 1; 1500a40ea1a7SYuri Pankov } 1501a40ea1a7SYuri Pankov free(title); 1502a40ea1a7SYuri Pankov return; 1503a40ea1a7SYuri Pankov } 1504a40ea1a7SYuri Pankov 1505a40ea1a7SYuri Pankov while (isspace((unsigned char)*start)) 1506a40ea1a7SYuri Pankov start++; 1507a40ea1a7SYuri Pankov 1508a40ea1a7SYuri Pankov if (0 == strncmp(start, "-", 1)) 1509a40ea1a7SYuri Pankov start += 1; 1510a40ea1a7SYuri Pankov else if (0 == strncmp(start, "\\-\\-", 4)) 1511a40ea1a7SYuri Pankov start += 4; 1512a40ea1a7SYuri Pankov else if (0 == strncmp(start, "\\-", 2)) 1513a40ea1a7SYuri Pankov start += 2; 1514a40ea1a7SYuri Pankov else if (0 == strncmp(start, "\\(en", 4)) 1515a40ea1a7SYuri Pankov start += 4; 1516a40ea1a7SYuri Pankov else if (0 == strncmp(start, "\\(em", 4)) 1517a40ea1a7SYuri Pankov start += 4; 1518a40ea1a7SYuri Pankov 1519a40ea1a7SYuri Pankov while (' ' == *start) 1520a40ea1a7SYuri Pankov start++; 1521a40ea1a7SYuri Pankov 15226640c13bSYuri Pankov /* 15236640c13bSYuri Pankov * Cut off excessive one-line descriptions. 15246640c13bSYuri Pankov * Bad pages are not worth better heuristics. 15256640c13bSYuri Pankov */ 15266640c13bSYuri Pankov 15276640c13bSYuri Pankov mpage->desc = mandoc_strndup(start, 150); 1528a40ea1a7SYuri Pankov free(title); 1529a40ea1a7SYuri Pankov return; 1530a40ea1a7SYuri Pankov } 1531a40ea1a7SYuri Pankov } 1532a40ea1a7SYuri Pankov 1533a40ea1a7SYuri Pankov for (n = n->child; n; n = n->next) { 1534a40ea1a7SYuri Pankov if (NULL != mpage->desc) 1535a40ea1a7SYuri Pankov break; 1536a40ea1a7SYuri Pankov parse_man(mpage, meta, n); 1537a40ea1a7SYuri Pankov } 1538a40ea1a7SYuri Pankov } 1539a40ea1a7SYuri Pankov 1540a40ea1a7SYuri Pankov static void 1541a40ea1a7SYuri Pankov parse_mdoc(struct mpage *mpage, const struct roff_meta *meta, 1542a40ea1a7SYuri Pankov const struct roff_node *n) 1543a40ea1a7SYuri Pankov { 1544*cec8643bSMichal Nowak const struct mdoc_handler *handler; 1545a40ea1a7SYuri Pankov 1546c66b8046SYuri Pankov for (n = n->child; n != NULL; n = n->next) { 1547*cec8643bSMichal Nowak if (n->tok == TOKEN_NONE || n->tok < ROFF_MAX) 1548a40ea1a7SYuri Pankov continue; 1549c66b8046SYuri Pankov assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); 1550*cec8643bSMichal Nowak handler = mdoc_handlers + (n->tok - MDOC_Dd); 1551*cec8643bSMichal Nowak if (n->flags & handler->taboo) 1552*cec8643bSMichal Nowak continue; 1553*cec8643bSMichal Nowak 1554a40ea1a7SYuri Pankov switch (n->type) { 1555a40ea1a7SYuri Pankov case ROFFT_ELEM: 1556a40ea1a7SYuri Pankov case ROFFT_BLOCK: 1557a40ea1a7SYuri Pankov case ROFFT_HEAD: 1558a40ea1a7SYuri Pankov case ROFFT_BODY: 1559a40ea1a7SYuri Pankov case ROFFT_TAIL: 1560*cec8643bSMichal Nowak if (handler->fp != NULL && 1561*cec8643bSMichal Nowak (*handler->fp)(mpage, meta, n) == 0) 1562c66b8046SYuri Pankov break; 1563*cec8643bSMichal Nowak if (handler->mask) 1564a40ea1a7SYuri Pankov putmdockey(mpage, n->child, 1565*cec8643bSMichal Nowak handler->mask, handler->taboo); 1566a40ea1a7SYuri Pankov break; 1567a40ea1a7SYuri Pankov default: 1568a40ea1a7SYuri Pankov continue; 1569a40ea1a7SYuri Pankov } 1570a40ea1a7SYuri Pankov if (NULL != n->child) 1571a40ea1a7SYuri Pankov parse_mdoc(mpage, meta, n); 1572a40ea1a7SYuri Pankov } 1573a40ea1a7SYuri Pankov } 1574a40ea1a7SYuri Pankov 15756640c13bSYuri Pankov static int 15766640c13bSYuri Pankov parse_mdoc_Fa(struct mpage *mpage, const struct roff_meta *meta, 15776640c13bSYuri Pankov const struct roff_node *n) 15786640c13bSYuri Pankov { 15796640c13bSYuri Pankov uint64_t mask; 15806640c13bSYuri Pankov 15816640c13bSYuri Pankov mask = TYPE_Fa; 15826640c13bSYuri Pankov if (n->sec == SEC_SYNOPSIS) 15836640c13bSYuri Pankov mask |= TYPE_Vt; 15846640c13bSYuri Pankov 15856640c13bSYuri Pankov putmdockey(mpage, n->child, mask, 0); 15866640c13bSYuri Pankov return 0; 15876640c13bSYuri Pankov } 15886640c13bSYuri Pankov 1589a40ea1a7SYuri Pankov static int 1590a40ea1a7SYuri Pankov parse_mdoc_Fd(struct mpage *mpage, const struct roff_meta *meta, 1591a40ea1a7SYuri Pankov const struct roff_node *n) 1592a40ea1a7SYuri Pankov { 1593a40ea1a7SYuri Pankov char *start, *end; 1594a40ea1a7SYuri Pankov size_t sz; 1595a40ea1a7SYuri Pankov 1596a40ea1a7SYuri Pankov if (SEC_SYNOPSIS != n->sec || 1597a40ea1a7SYuri Pankov NULL == (n = n->child) || 1598a40ea1a7SYuri Pankov n->type != ROFFT_TEXT) 1599a40ea1a7SYuri Pankov return 0; 1600a40ea1a7SYuri Pankov 1601a40ea1a7SYuri Pankov /* 1602a40ea1a7SYuri Pankov * Only consider those `Fd' macro fields that begin with an 1603a40ea1a7SYuri Pankov * "inclusion" token (versus, e.g., #define). 1604a40ea1a7SYuri Pankov */ 1605a40ea1a7SYuri Pankov 1606a40ea1a7SYuri Pankov if (strcmp("#include", n->string)) 1607a40ea1a7SYuri Pankov return 0; 1608a40ea1a7SYuri Pankov 1609a40ea1a7SYuri Pankov if ((n = n->next) == NULL || n->type != ROFFT_TEXT) 1610a40ea1a7SYuri Pankov return 0; 1611a40ea1a7SYuri Pankov 1612a40ea1a7SYuri Pankov /* 1613a40ea1a7SYuri Pankov * Strip away the enclosing angle brackets and make sure we're 1614a40ea1a7SYuri Pankov * not zero-length. 1615a40ea1a7SYuri Pankov */ 1616a40ea1a7SYuri Pankov 1617a40ea1a7SYuri Pankov start = n->string; 1618a40ea1a7SYuri Pankov if ('<' == *start || '"' == *start) 1619a40ea1a7SYuri Pankov start++; 1620a40ea1a7SYuri Pankov 1621a40ea1a7SYuri Pankov if (0 == (sz = strlen(start))) 1622a40ea1a7SYuri Pankov return 0; 1623a40ea1a7SYuri Pankov 1624a40ea1a7SYuri Pankov end = &start[(int)sz - 1]; 1625a40ea1a7SYuri Pankov if ('>' == *end || '"' == *end) 1626a40ea1a7SYuri Pankov end--; 1627a40ea1a7SYuri Pankov 1628a40ea1a7SYuri Pankov if (end > start) 1629a40ea1a7SYuri Pankov putkeys(mpage, start, end - start + 1, TYPE_In); 1630a40ea1a7SYuri Pankov return 0; 1631a40ea1a7SYuri Pankov } 1632a40ea1a7SYuri Pankov 1633a40ea1a7SYuri Pankov static void 1634a40ea1a7SYuri Pankov parse_mdoc_fname(struct mpage *mpage, const struct roff_node *n) 1635a40ea1a7SYuri Pankov { 1636a40ea1a7SYuri Pankov char *cp; 1637a40ea1a7SYuri Pankov size_t sz; 1638a40ea1a7SYuri Pankov 1639a40ea1a7SYuri Pankov if (n->type != ROFFT_TEXT) 1640a40ea1a7SYuri Pankov return; 1641a40ea1a7SYuri Pankov 1642a40ea1a7SYuri Pankov /* Skip function pointer punctuation. */ 1643a40ea1a7SYuri Pankov 1644a40ea1a7SYuri Pankov cp = n->string; 1645a40ea1a7SYuri Pankov while (*cp == '(' || *cp == '*') 1646a40ea1a7SYuri Pankov cp++; 1647a40ea1a7SYuri Pankov sz = strcspn(cp, "()"); 1648a40ea1a7SYuri Pankov 1649a40ea1a7SYuri Pankov putkeys(mpage, cp, sz, TYPE_Fn); 1650a40ea1a7SYuri Pankov if (n->sec == SEC_SYNOPSIS) 1651a40ea1a7SYuri Pankov putkeys(mpage, cp, sz, NAME_SYN); 1652a40ea1a7SYuri Pankov } 1653a40ea1a7SYuri Pankov 1654a40ea1a7SYuri Pankov static int 1655a40ea1a7SYuri Pankov parse_mdoc_Fn(struct mpage *mpage, const struct roff_meta *meta, 1656a40ea1a7SYuri Pankov const struct roff_node *n) 1657a40ea1a7SYuri Pankov { 16586640c13bSYuri Pankov uint64_t mask; 1659a40ea1a7SYuri Pankov 1660a40ea1a7SYuri Pankov if (n->child == NULL) 1661a40ea1a7SYuri Pankov return 0; 1662a40ea1a7SYuri Pankov 1663a40ea1a7SYuri Pankov parse_mdoc_fname(mpage, n->child); 1664a40ea1a7SYuri Pankov 16656640c13bSYuri Pankov n = n->child->next; 16666640c13bSYuri Pankov if (n != NULL && n->type == ROFFT_TEXT) { 16676640c13bSYuri Pankov mask = TYPE_Fa; 16686640c13bSYuri Pankov if (n->sec == SEC_SYNOPSIS) 16696640c13bSYuri Pankov mask |= TYPE_Vt; 16706640c13bSYuri Pankov putmdockey(mpage, n, mask, 0); 16716640c13bSYuri Pankov } 1672a40ea1a7SYuri Pankov 1673a40ea1a7SYuri Pankov return 0; 1674a40ea1a7SYuri Pankov } 1675a40ea1a7SYuri Pankov 1676a40ea1a7SYuri Pankov static int 1677a40ea1a7SYuri Pankov parse_mdoc_Fo(struct mpage *mpage, const struct roff_meta *meta, 1678a40ea1a7SYuri Pankov const struct roff_node *n) 1679a40ea1a7SYuri Pankov { 1680a40ea1a7SYuri Pankov 1681a40ea1a7SYuri Pankov if (n->type != ROFFT_HEAD) 1682a40ea1a7SYuri Pankov return 1; 1683a40ea1a7SYuri Pankov 1684a40ea1a7SYuri Pankov if (n->child != NULL) 1685a40ea1a7SYuri Pankov parse_mdoc_fname(mpage, n->child); 1686a40ea1a7SYuri Pankov 1687a40ea1a7SYuri Pankov return 0; 1688a40ea1a7SYuri Pankov } 1689a40ea1a7SYuri Pankov 1690a40ea1a7SYuri Pankov static int 1691a40ea1a7SYuri Pankov parse_mdoc_Va(struct mpage *mpage, const struct roff_meta *meta, 1692a40ea1a7SYuri Pankov const struct roff_node *n) 1693a40ea1a7SYuri Pankov { 1694a40ea1a7SYuri Pankov char *cp; 1695a40ea1a7SYuri Pankov 1696a40ea1a7SYuri Pankov if (n->type != ROFFT_ELEM && n->type != ROFFT_BODY) 1697a40ea1a7SYuri Pankov return 0; 1698a40ea1a7SYuri Pankov 1699a40ea1a7SYuri Pankov if (n->child != NULL && 1700a40ea1a7SYuri Pankov n->child->next == NULL && 1701a40ea1a7SYuri Pankov n->child->type == ROFFT_TEXT) 1702a40ea1a7SYuri Pankov return 1; 1703a40ea1a7SYuri Pankov 1704a40ea1a7SYuri Pankov cp = NULL; 1705a40ea1a7SYuri Pankov deroff(&cp, n); 1706a40ea1a7SYuri Pankov if (cp != NULL) { 1707a40ea1a7SYuri Pankov putkey(mpage, cp, TYPE_Vt | (n->tok == MDOC_Va || 1708a40ea1a7SYuri Pankov n->type == ROFFT_BODY ? TYPE_Va : 0)); 1709a40ea1a7SYuri Pankov free(cp); 1710a40ea1a7SYuri Pankov } 1711a40ea1a7SYuri Pankov 1712a40ea1a7SYuri Pankov return 0; 1713a40ea1a7SYuri Pankov } 1714a40ea1a7SYuri Pankov 1715a40ea1a7SYuri Pankov static int 1716a40ea1a7SYuri Pankov parse_mdoc_Xr(struct mpage *mpage, const struct roff_meta *meta, 1717a40ea1a7SYuri Pankov const struct roff_node *n) 1718a40ea1a7SYuri Pankov { 1719a40ea1a7SYuri Pankov char *cp; 1720a40ea1a7SYuri Pankov 1721a40ea1a7SYuri Pankov if (NULL == (n = n->child)) 1722a40ea1a7SYuri Pankov return 0; 1723a40ea1a7SYuri Pankov 1724a40ea1a7SYuri Pankov if (NULL == n->next) { 1725a40ea1a7SYuri Pankov putkey(mpage, n->string, TYPE_Xr); 1726a40ea1a7SYuri Pankov return 0; 1727a40ea1a7SYuri Pankov } 1728a40ea1a7SYuri Pankov 1729a40ea1a7SYuri Pankov mandoc_asprintf(&cp, "%s(%s)", n->string, n->next->string); 1730a40ea1a7SYuri Pankov putkey(mpage, cp, TYPE_Xr); 1731a40ea1a7SYuri Pankov free(cp); 1732a40ea1a7SYuri Pankov return 0; 1733a40ea1a7SYuri Pankov } 1734a40ea1a7SYuri Pankov 1735a40ea1a7SYuri Pankov static int 1736a40ea1a7SYuri Pankov parse_mdoc_Nd(struct mpage *mpage, const struct roff_meta *meta, 1737a40ea1a7SYuri Pankov const struct roff_node *n) 1738a40ea1a7SYuri Pankov { 1739a40ea1a7SYuri Pankov 1740a40ea1a7SYuri Pankov if (n->type == ROFFT_BODY) 1741a40ea1a7SYuri Pankov deroff(&mpage->desc, n); 1742a40ea1a7SYuri Pankov return 0; 1743a40ea1a7SYuri Pankov } 1744a40ea1a7SYuri Pankov 1745a40ea1a7SYuri Pankov static int 1746a40ea1a7SYuri Pankov parse_mdoc_Nm(struct mpage *mpage, const struct roff_meta *meta, 1747a40ea1a7SYuri Pankov const struct roff_node *n) 1748a40ea1a7SYuri Pankov { 1749a40ea1a7SYuri Pankov 1750a40ea1a7SYuri Pankov if (SEC_NAME == n->sec) 1751a40ea1a7SYuri Pankov putmdockey(mpage, n->child, NAME_TITLE, 0); 1752a40ea1a7SYuri Pankov else if (n->sec == SEC_SYNOPSIS && n->type == ROFFT_HEAD) { 1753a40ea1a7SYuri Pankov if (n->child == NULL) 1754a40ea1a7SYuri Pankov putkey(mpage, meta->name, NAME_SYN); 1755a40ea1a7SYuri Pankov else 1756a40ea1a7SYuri Pankov putmdockey(mpage, n->child, NAME_SYN, 0); 1757a40ea1a7SYuri Pankov } 1758a40ea1a7SYuri Pankov if ( ! (mpage->name_head_done || 1759a40ea1a7SYuri Pankov n->child == NULL || n->child->string == NULL || 1760a40ea1a7SYuri Pankov strcasecmp(n->child->string, meta->title))) { 1761a40ea1a7SYuri Pankov putkey(mpage, n->child->string, NAME_HEAD); 1762a40ea1a7SYuri Pankov mpage->name_head_done = 1; 1763a40ea1a7SYuri Pankov } 1764a40ea1a7SYuri Pankov return 0; 1765a40ea1a7SYuri Pankov } 1766a40ea1a7SYuri Pankov 1767a40ea1a7SYuri Pankov static int 1768a40ea1a7SYuri Pankov parse_mdoc_Sh(struct mpage *mpage, const struct roff_meta *meta, 1769a40ea1a7SYuri Pankov const struct roff_node *n) 1770a40ea1a7SYuri Pankov { 1771a40ea1a7SYuri Pankov 1772a40ea1a7SYuri Pankov return n->sec == SEC_CUSTOM && n->type == ROFFT_HEAD; 1773a40ea1a7SYuri Pankov } 1774a40ea1a7SYuri Pankov 1775a40ea1a7SYuri Pankov static int 1776a40ea1a7SYuri Pankov parse_mdoc_head(struct mpage *mpage, const struct roff_meta *meta, 1777a40ea1a7SYuri Pankov const struct roff_node *n) 1778a40ea1a7SYuri Pankov { 1779a40ea1a7SYuri Pankov 1780a40ea1a7SYuri Pankov return n->type == ROFFT_HEAD; 1781a40ea1a7SYuri Pankov } 1782a40ea1a7SYuri Pankov 1783a40ea1a7SYuri Pankov /* 1784a40ea1a7SYuri Pankov * Add a string to the hash table for the current manual. 1785a40ea1a7SYuri Pankov * Each string has a bitmask telling which macros it belongs to. 1786a40ea1a7SYuri Pankov * When we finish the manual, we'll dump the table. 1787a40ea1a7SYuri Pankov */ 1788a40ea1a7SYuri Pankov static void 1789a40ea1a7SYuri Pankov putkeys(const struct mpage *mpage, char *cp, size_t sz, uint64_t v) 1790a40ea1a7SYuri Pankov { 1791a40ea1a7SYuri Pankov struct ohash *htab; 1792a40ea1a7SYuri Pankov struct str *s; 1793a40ea1a7SYuri Pankov const char *end; 1794a40ea1a7SYuri Pankov unsigned int slot; 1795a40ea1a7SYuri Pankov int i, mustfree; 1796a40ea1a7SYuri Pankov 1797a40ea1a7SYuri Pankov if (0 == sz) 1798a40ea1a7SYuri Pankov return; 1799a40ea1a7SYuri Pankov 1800a40ea1a7SYuri Pankov mustfree = render_string(&cp, &sz); 1801a40ea1a7SYuri Pankov 1802a40ea1a7SYuri Pankov if (TYPE_Nm & v) { 1803a40ea1a7SYuri Pankov htab = &names; 1804a40ea1a7SYuri Pankov v &= name_mask; 1805a40ea1a7SYuri Pankov if (v & NAME_FIRST) 1806a40ea1a7SYuri Pankov name_mask &= ~NAME_FIRST; 1807a40ea1a7SYuri Pankov if (debug > 1) 1808a40ea1a7SYuri Pankov say(mpage->mlinks->file, 1809a40ea1a7SYuri Pankov "Adding name %*s, bits=0x%llx", (int)sz, cp, 1810a40ea1a7SYuri Pankov (unsigned long long)v); 1811a40ea1a7SYuri Pankov } else { 1812a40ea1a7SYuri Pankov htab = &strings; 1813a40ea1a7SYuri Pankov if (debug > 1) 1814a40ea1a7SYuri Pankov for (i = 0; i < KEY_MAX; i++) 1815a40ea1a7SYuri Pankov if ((uint64_t)1 << i & v) 1816a40ea1a7SYuri Pankov say(mpage->mlinks->file, 1817a40ea1a7SYuri Pankov "Adding key %s=%*s", 1818a40ea1a7SYuri Pankov mansearch_keynames[i], (int)sz, cp); 1819a40ea1a7SYuri Pankov } 1820a40ea1a7SYuri Pankov 1821a40ea1a7SYuri Pankov end = cp + sz; 1822a40ea1a7SYuri Pankov slot = ohash_qlookupi(htab, cp, &end); 1823a40ea1a7SYuri Pankov s = ohash_find(htab, slot); 1824a40ea1a7SYuri Pankov 1825a40ea1a7SYuri Pankov if (NULL != s && mpage == s->mpage) { 1826a40ea1a7SYuri Pankov s->mask |= v; 1827a40ea1a7SYuri Pankov return; 1828a40ea1a7SYuri Pankov } else if (NULL == s) { 1829a40ea1a7SYuri Pankov s = mandoc_calloc(1, sizeof(struct str) + sz + 1); 1830a40ea1a7SYuri Pankov memcpy(s->key, cp, sz); 1831a40ea1a7SYuri Pankov ohash_insert(htab, slot, s); 1832a40ea1a7SYuri Pankov } 1833a40ea1a7SYuri Pankov s->mpage = mpage; 1834a40ea1a7SYuri Pankov s->mask = v; 1835a40ea1a7SYuri Pankov 1836a40ea1a7SYuri Pankov if (mustfree) 1837a40ea1a7SYuri Pankov free(cp); 1838a40ea1a7SYuri Pankov } 1839a40ea1a7SYuri Pankov 1840a40ea1a7SYuri Pankov /* 1841a40ea1a7SYuri Pankov * Take a Unicode codepoint and produce its UTF-8 encoding. 1842a40ea1a7SYuri Pankov * This isn't the best way to do this, but it works. 1843a40ea1a7SYuri Pankov * The magic numbers are from the UTF-8 packaging. 1844a40ea1a7SYuri Pankov * They're not as scary as they seem: read the UTF-8 spec for details. 1845a40ea1a7SYuri Pankov */ 1846a40ea1a7SYuri Pankov static size_t 1847a40ea1a7SYuri Pankov utf8(unsigned int cp, char out[7]) 1848a40ea1a7SYuri Pankov { 1849a40ea1a7SYuri Pankov size_t rc; 1850a40ea1a7SYuri Pankov 1851a40ea1a7SYuri Pankov rc = 0; 1852a40ea1a7SYuri Pankov if (cp <= 0x0000007F) { 1853a40ea1a7SYuri Pankov rc = 1; 1854a40ea1a7SYuri Pankov out[0] = (char)cp; 1855a40ea1a7SYuri Pankov } else if (cp <= 0x000007FF) { 1856a40ea1a7SYuri Pankov rc = 2; 1857a40ea1a7SYuri Pankov out[0] = (cp >> 6 & 31) | 192; 1858a40ea1a7SYuri Pankov out[1] = (cp & 63) | 128; 1859a40ea1a7SYuri Pankov } else if (cp <= 0x0000FFFF) { 1860a40ea1a7SYuri Pankov rc = 3; 1861a40ea1a7SYuri Pankov out[0] = (cp >> 12 & 15) | 224; 1862a40ea1a7SYuri Pankov out[1] = (cp >> 6 & 63) | 128; 1863a40ea1a7SYuri Pankov out[2] = (cp & 63) | 128; 1864a40ea1a7SYuri Pankov } else if (cp <= 0x001FFFFF) { 1865a40ea1a7SYuri Pankov rc = 4; 1866a40ea1a7SYuri Pankov out[0] = (cp >> 18 & 7) | 240; 1867a40ea1a7SYuri Pankov out[1] = (cp >> 12 & 63) | 128; 1868a40ea1a7SYuri Pankov out[2] = (cp >> 6 & 63) | 128; 1869a40ea1a7SYuri Pankov out[3] = (cp & 63) | 128; 1870a40ea1a7SYuri Pankov } else if (cp <= 0x03FFFFFF) { 1871a40ea1a7SYuri Pankov rc = 5; 1872a40ea1a7SYuri Pankov out[0] = (cp >> 24 & 3) | 248; 1873a40ea1a7SYuri Pankov out[1] = (cp >> 18 & 63) | 128; 1874a40ea1a7SYuri Pankov out[2] = (cp >> 12 & 63) | 128; 1875a40ea1a7SYuri Pankov out[3] = (cp >> 6 & 63) | 128; 1876a40ea1a7SYuri Pankov out[4] = (cp & 63) | 128; 1877a40ea1a7SYuri Pankov } else if (cp <= 0x7FFFFFFF) { 1878a40ea1a7SYuri Pankov rc = 6; 1879a40ea1a7SYuri Pankov out[0] = (cp >> 30 & 1) | 252; 1880a40ea1a7SYuri Pankov out[1] = (cp >> 24 & 63) | 128; 1881a40ea1a7SYuri Pankov out[2] = (cp >> 18 & 63) | 128; 1882a40ea1a7SYuri Pankov out[3] = (cp >> 12 & 63) | 128; 1883a40ea1a7SYuri Pankov out[4] = (cp >> 6 & 63) | 128; 1884a40ea1a7SYuri Pankov out[5] = (cp & 63) | 128; 1885a40ea1a7SYuri Pankov } else 1886a40ea1a7SYuri Pankov return 0; 1887a40ea1a7SYuri Pankov 1888a40ea1a7SYuri Pankov out[rc] = '\0'; 1889a40ea1a7SYuri Pankov return rc; 1890a40ea1a7SYuri Pankov } 1891a40ea1a7SYuri Pankov 1892a40ea1a7SYuri Pankov /* 1893a40ea1a7SYuri Pankov * If the string contains escape sequences, 1894a40ea1a7SYuri Pankov * replace it with an allocated rendering and return 1, 1895a40ea1a7SYuri Pankov * such that the caller can free it after use. 1896a40ea1a7SYuri Pankov * Otherwise, do nothing and return 0. 1897a40ea1a7SYuri Pankov */ 1898a40ea1a7SYuri Pankov static int 1899a40ea1a7SYuri Pankov render_string(char **public, size_t *psz) 1900a40ea1a7SYuri Pankov { 1901a40ea1a7SYuri Pankov const char *src, *scp, *addcp, *seq; 1902a40ea1a7SYuri Pankov char *dst; 1903a40ea1a7SYuri Pankov size_t ssz, dsz, addsz; 1904a40ea1a7SYuri Pankov char utfbuf[7], res[6]; 1905a40ea1a7SYuri Pankov int seqlen, unicode; 1906a40ea1a7SYuri Pankov 1907a40ea1a7SYuri Pankov res[0] = '\\'; 1908a40ea1a7SYuri Pankov res[1] = '\t'; 1909a40ea1a7SYuri Pankov res[2] = ASCII_NBRSP; 1910a40ea1a7SYuri Pankov res[3] = ASCII_HYPH; 1911a40ea1a7SYuri Pankov res[4] = ASCII_BREAK; 1912a40ea1a7SYuri Pankov res[5] = '\0'; 1913a40ea1a7SYuri Pankov 1914a40ea1a7SYuri Pankov src = scp = *public; 1915a40ea1a7SYuri Pankov ssz = *psz; 1916a40ea1a7SYuri Pankov dst = NULL; 1917a40ea1a7SYuri Pankov dsz = 0; 1918a40ea1a7SYuri Pankov 1919a40ea1a7SYuri Pankov while (scp < src + *psz) { 1920a40ea1a7SYuri Pankov 1921a40ea1a7SYuri Pankov /* Leave normal characters unchanged. */ 1922a40ea1a7SYuri Pankov 1923a40ea1a7SYuri Pankov if (strchr(res, *scp) == NULL) { 1924a40ea1a7SYuri Pankov if (dst != NULL) 1925a40ea1a7SYuri Pankov dst[dsz++] = *scp; 1926a40ea1a7SYuri Pankov scp++; 1927a40ea1a7SYuri Pankov continue; 1928a40ea1a7SYuri Pankov } 1929a40ea1a7SYuri Pankov 1930a40ea1a7SYuri Pankov /* 1931a40ea1a7SYuri Pankov * Found something that requires replacing, 1932a40ea1a7SYuri Pankov * make sure we have a destination buffer. 1933a40ea1a7SYuri Pankov */ 1934a40ea1a7SYuri Pankov 1935a40ea1a7SYuri Pankov if (dst == NULL) { 1936a40ea1a7SYuri Pankov dst = mandoc_malloc(ssz + 1); 1937a40ea1a7SYuri Pankov dsz = scp - src; 1938a40ea1a7SYuri Pankov memcpy(dst, src, dsz); 1939a40ea1a7SYuri Pankov } 1940a40ea1a7SYuri Pankov 1941a40ea1a7SYuri Pankov /* Handle single-char special characters. */ 1942a40ea1a7SYuri Pankov 1943a40ea1a7SYuri Pankov switch (*scp) { 1944a40ea1a7SYuri Pankov case '\\': 1945a40ea1a7SYuri Pankov break; 1946a40ea1a7SYuri Pankov case '\t': 1947a40ea1a7SYuri Pankov case ASCII_NBRSP: 1948a40ea1a7SYuri Pankov dst[dsz++] = ' '; 1949a40ea1a7SYuri Pankov scp++; 1950a40ea1a7SYuri Pankov continue; 1951a40ea1a7SYuri Pankov case ASCII_HYPH: 1952a40ea1a7SYuri Pankov dst[dsz++] = '-'; 1953a40ea1a7SYuri Pankov /* FALLTHROUGH */ 1954a40ea1a7SYuri Pankov case ASCII_BREAK: 1955a40ea1a7SYuri Pankov scp++; 1956a40ea1a7SYuri Pankov continue; 1957a40ea1a7SYuri Pankov default: 1958a40ea1a7SYuri Pankov abort(); 1959a40ea1a7SYuri Pankov } 1960a40ea1a7SYuri Pankov 1961a40ea1a7SYuri Pankov /* 1962a40ea1a7SYuri Pankov * Found an escape sequence. 1963a40ea1a7SYuri Pankov * Read past the slash, then parse it. 1964a40ea1a7SYuri Pankov * Ignore everything except characters. 1965a40ea1a7SYuri Pankov */ 1966a40ea1a7SYuri Pankov 1967a40ea1a7SYuri Pankov scp++; 1968a40ea1a7SYuri Pankov if (mandoc_escape(&scp, &seq, &seqlen) != ESCAPE_SPECIAL) 1969a40ea1a7SYuri Pankov continue; 1970a40ea1a7SYuri Pankov 1971a40ea1a7SYuri Pankov /* 1972a40ea1a7SYuri Pankov * Render the special character 1973a40ea1a7SYuri Pankov * as either UTF-8 or ASCII. 1974a40ea1a7SYuri Pankov */ 1975a40ea1a7SYuri Pankov 1976a40ea1a7SYuri Pankov if (write_utf8) { 1977a40ea1a7SYuri Pankov unicode = mchars_spec2cp(seq, seqlen); 1978a40ea1a7SYuri Pankov if (unicode <= 0) 1979a40ea1a7SYuri Pankov continue; 1980a40ea1a7SYuri Pankov addsz = utf8(unicode, utfbuf); 1981a40ea1a7SYuri Pankov if (addsz == 0) 1982a40ea1a7SYuri Pankov continue; 1983a40ea1a7SYuri Pankov addcp = utfbuf; 1984a40ea1a7SYuri Pankov } else { 1985a40ea1a7SYuri Pankov addcp = mchars_spec2str(seq, seqlen, &addsz); 1986a40ea1a7SYuri Pankov if (addcp == NULL) 1987a40ea1a7SYuri Pankov continue; 1988a40ea1a7SYuri Pankov if (*addcp == ASCII_NBRSP) { 1989a40ea1a7SYuri Pankov addcp = " "; 1990a40ea1a7SYuri Pankov addsz = 1; 1991a40ea1a7SYuri Pankov } 1992a40ea1a7SYuri Pankov } 1993a40ea1a7SYuri Pankov 1994a40ea1a7SYuri Pankov /* Copy the rendered glyph into the stream. */ 1995a40ea1a7SYuri Pankov 1996a40ea1a7SYuri Pankov ssz += addsz; 1997a40ea1a7SYuri Pankov dst = mandoc_realloc(dst, ssz + 1); 1998a40ea1a7SYuri Pankov memcpy(dst + dsz, addcp, addsz); 1999a40ea1a7SYuri Pankov dsz += addsz; 2000a40ea1a7SYuri Pankov } 2001a40ea1a7SYuri Pankov if (dst != NULL) { 2002a40ea1a7SYuri Pankov *public = dst; 2003a40ea1a7SYuri Pankov *psz = dsz; 2004a40ea1a7SYuri Pankov } 2005a40ea1a7SYuri Pankov 2006a40ea1a7SYuri Pankov /* Trim trailing whitespace and NUL-terminate. */ 2007a40ea1a7SYuri Pankov 2008a40ea1a7SYuri Pankov while (*psz > 0 && (*public)[*psz - 1] == ' ') 2009a40ea1a7SYuri Pankov --*psz; 2010a40ea1a7SYuri Pankov if (dst != NULL) { 2011a40ea1a7SYuri Pankov (*public)[*psz] = '\0'; 2012a40ea1a7SYuri Pankov return 1; 2013a40ea1a7SYuri Pankov } else 2014a40ea1a7SYuri Pankov return 0; 2015a40ea1a7SYuri Pankov } 2016a40ea1a7SYuri Pankov 2017a40ea1a7SYuri Pankov static void 2018a40ea1a7SYuri Pankov dbadd_mlink(const struct mlink *mlink) 2019a40ea1a7SYuri Pankov { 2020a40ea1a7SYuri Pankov dba_page_alias(mlink->mpage->dba, mlink->name, NAME_FILE); 2021a40ea1a7SYuri Pankov dba_page_add(mlink->mpage->dba, DBP_SECT, mlink->dsec); 2022a40ea1a7SYuri Pankov dba_page_add(mlink->mpage->dba, DBP_SECT, mlink->fsec); 2023a40ea1a7SYuri Pankov dba_page_add(mlink->mpage->dba, DBP_ARCH, mlink->arch); 2024a40ea1a7SYuri Pankov dba_page_add(mlink->mpage->dba, DBP_FILE, mlink->file); 2025a40ea1a7SYuri Pankov } 2026a40ea1a7SYuri Pankov 2027a40ea1a7SYuri Pankov /* 2028a40ea1a7SYuri Pankov * Flush the current page's terms (and their bits) into the database. 2029a40ea1a7SYuri Pankov * Also, handle escape sequences at the last possible moment. 2030a40ea1a7SYuri Pankov */ 2031a40ea1a7SYuri Pankov static void 2032a40ea1a7SYuri Pankov dbadd(struct dba *dba, struct mpage *mpage) 2033a40ea1a7SYuri Pankov { 2034a40ea1a7SYuri Pankov struct mlink *mlink; 2035a40ea1a7SYuri Pankov struct str *key; 2036a40ea1a7SYuri Pankov char *cp; 2037a40ea1a7SYuri Pankov uint64_t mask; 2038a40ea1a7SYuri Pankov size_t i; 2039a40ea1a7SYuri Pankov unsigned int slot; 2040a40ea1a7SYuri Pankov int mustfree; 2041a40ea1a7SYuri Pankov 2042a40ea1a7SYuri Pankov mlink = mpage->mlinks; 2043a40ea1a7SYuri Pankov 2044a40ea1a7SYuri Pankov if (nodb) { 2045a40ea1a7SYuri Pankov for (key = ohash_first(&names, &slot); NULL != key; 2046a40ea1a7SYuri Pankov key = ohash_next(&names, &slot)) 2047a40ea1a7SYuri Pankov free(key); 2048a40ea1a7SYuri Pankov for (key = ohash_first(&strings, &slot); NULL != key; 2049a40ea1a7SYuri Pankov key = ohash_next(&strings, &slot)) 2050a40ea1a7SYuri Pankov free(key); 2051a40ea1a7SYuri Pankov if (0 == debug) 2052a40ea1a7SYuri Pankov return; 2053a40ea1a7SYuri Pankov while (NULL != mlink) { 2054a40ea1a7SYuri Pankov fputs(mlink->name, stdout); 2055a40ea1a7SYuri Pankov if (NULL == mlink->next || 2056a40ea1a7SYuri Pankov strcmp(mlink->dsec, mlink->next->dsec) || 2057a40ea1a7SYuri Pankov strcmp(mlink->fsec, mlink->next->fsec) || 2058a40ea1a7SYuri Pankov strcmp(mlink->arch, mlink->next->arch)) { 2059a40ea1a7SYuri Pankov putchar('('); 2060a40ea1a7SYuri Pankov if ('\0' == *mlink->dsec) 2061a40ea1a7SYuri Pankov fputs(mlink->fsec, stdout); 2062a40ea1a7SYuri Pankov else 2063a40ea1a7SYuri Pankov fputs(mlink->dsec, stdout); 2064a40ea1a7SYuri Pankov if ('\0' != *mlink->arch) 2065a40ea1a7SYuri Pankov printf("/%s", mlink->arch); 2066a40ea1a7SYuri Pankov putchar(')'); 2067a40ea1a7SYuri Pankov } 2068a40ea1a7SYuri Pankov mlink = mlink->next; 2069a40ea1a7SYuri Pankov if (NULL != mlink) 2070a40ea1a7SYuri Pankov fputs(", ", stdout); 2071a40ea1a7SYuri Pankov } 2072a40ea1a7SYuri Pankov printf(" - %s\n", mpage->desc); 2073a40ea1a7SYuri Pankov return; 2074a40ea1a7SYuri Pankov } 2075a40ea1a7SYuri Pankov 2076a40ea1a7SYuri Pankov if (debug) 2077a40ea1a7SYuri Pankov say(mlink->file, "Adding to database"); 2078a40ea1a7SYuri Pankov 2079a40ea1a7SYuri Pankov cp = mpage->desc; 2080a40ea1a7SYuri Pankov i = strlen(cp); 2081a40ea1a7SYuri Pankov mustfree = render_string(&cp, &i); 2082a40ea1a7SYuri Pankov mpage->dba = dba_page_new(dba->pages, 2083a40ea1a7SYuri Pankov *mpage->arch == '\0' ? mlink->arch : mpage->arch, 2084a40ea1a7SYuri Pankov cp, mlink->file, mpage->form); 2085a40ea1a7SYuri Pankov if (mustfree) 2086a40ea1a7SYuri Pankov free(cp); 2087a40ea1a7SYuri Pankov dba_page_add(mpage->dba, DBP_SECT, mpage->sec); 2088a40ea1a7SYuri Pankov 2089a40ea1a7SYuri Pankov while (mlink != NULL) { 2090a40ea1a7SYuri Pankov dbadd_mlink(mlink); 2091a40ea1a7SYuri Pankov mlink = mlink->next; 2092a40ea1a7SYuri Pankov } 2093a40ea1a7SYuri Pankov 2094a40ea1a7SYuri Pankov for (key = ohash_first(&names, &slot); NULL != key; 2095a40ea1a7SYuri Pankov key = ohash_next(&names, &slot)) { 2096a40ea1a7SYuri Pankov assert(key->mpage == mpage); 2097a40ea1a7SYuri Pankov dba_page_alias(mpage->dba, key->key, key->mask); 2098a40ea1a7SYuri Pankov free(key); 2099a40ea1a7SYuri Pankov } 2100a40ea1a7SYuri Pankov for (key = ohash_first(&strings, &slot); NULL != key; 2101a40ea1a7SYuri Pankov key = ohash_next(&strings, &slot)) { 2102a40ea1a7SYuri Pankov assert(key->mpage == mpage); 2103a40ea1a7SYuri Pankov i = 0; 2104a40ea1a7SYuri Pankov for (mask = TYPE_Xr; mask <= TYPE_Lb; mask *= 2) { 2105a40ea1a7SYuri Pankov if (key->mask & mask) 2106a40ea1a7SYuri Pankov dba_macro_add(dba->macros, i, 2107a40ea1a7SYuri Pankov key->key, mpage->dba); 2108a40ea1a7SYuri Pankov i++; 2109a40ea1a7SYuri Pankov } 2110a40ea1a7SYuri Pankov free(key); 2111a40ea1a7SYuri Pankov } 2112a40ea1a7SYuri Pankov } 2113a40ea1a7SYuri Pankov 2114a40ea1a7SYuri Pankov static void 2115a40ea1a7SYuri Pankov dbprune(struct dba *dba) 2116a40ea1a7SYuri Pankov { 2117a40ea1a7SYuri Pankov struct dba_array *page, *files; 2118a40ea1a7SYuri Pankov char *file; 2119a40ea1a7SYuri Pankov 2120a40ea1a7SYuri Pankov dba_array_FOREACH(dba->pages, page) { 2121a40ea1a7SYuri Pankov files = dba_array_get(page, DBP_FILE); 2122a40ea1a7SYuri Pankov dba_array_FOREACH(files, file) { 2123a40ea1a7SYuri Pankov if (*file < ' ') 2124a40ea1a7SYuri Pankov file++; 2125a40ea1a7SYuri Pankov if (ohash_find(&mlinks, ohash_qlookup(&mlinks, 2126a40ea1a7SYuri Pankov file)) != NULL) { 2127a40ea1a7SYuri Pankov if (debug) 2128a40ea1a7SYuri Pankov say(file, "Deleting from database"); 2129a40ea1a7SYuri Pankov dba_array_del(dba->pages); 2130a40ea1a7SYuri Pankov break; 2131a40ea1a7SYuri Pankov } 2132a40ea1a7SYuri Pankov } 2133a40ea1a7SYuri Pankov } 2134a40ea1a7SYuri Pankov } 2135a40ea1a7SYuri Pankov 2136a40ea1a7SYuri Pankov /* 2137a40ea1a7SYuri Pankov * Write the database from memory to disk. 2138a40ea1a7SYuri Pankov */ 2139a40ea1a7SYuri Pankov static void 2140a40ea1a7SYuri Pankov dbwrite(struct dba *dba) 2141a40ea1a7SYuri Pankov { 21426640c13bSYuri Pankov struct stat sb1, sb2; 21436640c13bSYuri Pankov char tfn[33], *cp1, *cp2; 21446640c13bSYuri Pankov off_t i; 21456640c13bSYuri Pankov int fd1, fd2; 2146a40ea1a7SYuri Pankov 2147c66b8046SYuri Pankov /* 2148c66b8046SYuri Pankov * Do not write empty databases, and delete existing ones 2149c66b8046SYuri Pankov * when makewhatis -u causes them to become empty. 2150c66b8046SYuri Pankov */ 2151c66b8046SYuri Pankov 2152c66b8046SYuri Pankov dba_array_start(dba->pages); 2153c66b8046SYuri Pankov if (dba_array_next(dba->pages) == NULL) { 2154c66b8046SYuri Pankov if (unlink(MANDOC_DB) == -1 && errno != ENOENT) 2155c66b8046SYuri Pankov say(MANDOC_DB, "&unlink"); 2156c66b8046SYuri Pankov return; 2157c66b8046SYuri Pankov } 2158c66b8046SYuri Pankov 2159c66b8046SYuri Pankov /* 2160c66b8046SYuri Pankov * Build the database in a temporary file, 2161c66b8046SYuri Pankov * then atomically move it into place. 2162c66b8046SYuri Pankov */ 2163c66b8046SYuri Pankov 2164a40ea1a7SYuri Pankov if (dba_write(MANDOC_DB "~", dba) != -1) { 2165a40ea1a7SYuri Pankov if (rename(MANDOC_DB "~", MANDOC_DB) == -1) { 2166a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2167a40ea1a7SYuri Pankov say(MANDOC_DB, "&rename"); 2168a40ea1a7SYuri Pankov unlink(MANDOC_DB "~"); 2169a40ea1a7SYuri Pankov } 2170a40ea1a7SYuri Pankov return; 2171a40ea1a7SYuri Pankov } 2172a40ea1a7SYuri Pankov 2173c66b8046SYuri Pankov /* 2174c66b8046SYuri Pankov * We lack write permission and cannot replace the database 2175c66b8046SYuri Pankov * file, but let's at least check whether the data changed. 2176c66b8046SYuri Pankov */ 2177c66b8046SYuri Pankov 2178a40ea1a7SYuri Pankov (void)strlcpy(tfn, "/tmp/mandocdb.XXXXXXXX", sizeof(tfn)); 2179a40ea1a7SYuri Pankov if (mkdtemp(tfn) == NULL) { 2180a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2181a40ea1a7SYuri Pankov say("", "&%s", tfn); 2182a40ea1a7SYuri Pankov return; 2183a40ea1a7SYuri Pankov } 21846640c13bSYuri Pankov cp1 = cp2 = MAP_FAILED; 21856640c13bSYuri Pankov fd1 = fd2 = -1; 2186a40ea1a7SYuri Pankov (void)strlcat(tfn, "/" MANDOC_DB, sizeof(tfn)); 2187a40ea1a7SYuri Pankov if (dba_write(tfn, dba) == -1) { 2188a40ea1a7SYuri Pankov say(tfn, "&dba_write"); 21896640c13bSYuri Pankov goto err; 2190a40ea1a7SYuri Pankov } 21916640c13bSYuri Pankov if ((fd1 = open(MANDOC_DB, O_RDONLY, 0)) == -1) { 21926640c13bSYuri Pankov say(MANDOC_DB, "&open"); 21936640c13bSYuri Pankov goto err; 2194a40ea1a7SYuri Pankov } 21956640c13bSYuri Pankov if ((fd2 = open(tfn, O_RDONLY, 0)) == -1) { 21966640c13bSYuri Pankov say(tfn, "&open"); 21976640c13bSYuri Pankov goto err; 21986640c13bSYuri Pankov } 21996640c13bSYuri Pankov if (fstat(fd1, &sb1) == -1) { 22006640c13bSYuri Pankov say(MANDOC_DB, "&fstat"); 22016640c13bSYuri Pankov goto err; 22026640c13bSYuri Pankov } 22036640c13bSYuri Pankov if (fstat(fd2, &sb2) == -1) { 22046640c13bSYuri Pankov say(tfn, "&fstat"); 22056640c13bSYuri Pankov goto err; 2206a40ea1a7SYuri Pankov } 22076640c13bSYuri Pankov if (sb1.st_size != sb2.st_size) 22086640c13bSYuri Pankov goto err; 22096640c13bSYuri Pankov if ((cp1 = mmap(NULL, sb1.st_size, PROT_READ, MAP_PRIVATE, 22106640c13bSYuri Pankov fd1, 0)) == MAP_FAILED) { 22116640c13bSYuri Pankov say(MANDOC_DB, "&mmap"); 22126640c13bSYuri Pankov goto err; 22136640c13bSYuri Pankov } 22146640c13bSYuri Pankov if ((cp2 = mmap(NULL, sb2.st_size, PROT_READ, MAP_PRIVATE, 22156640c13bSYuri Pankov fd2, 0)) == MAP_FAILED) { 22166640c13bSYuri Pankov say(tfn, "&mmap"); 22176640c13bSYuri Pankov goto err; 22186640c13bSYuri Pankov } 22196640c13bSYuri Pankov for (i = 0; i < sb1.st_size; i++) 22206640c13bSYuri Pankov if (cp1[i] != cp2[i]) 22216640c13bSYuri Pankov goto err; 22226640c13bSYuri Pankov goto out; 22236640c13bSYuri Pankov 22246640c13bSYuri Pankov err: 22256640c13bSYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 22266640c13bSYuri Pankov say(MANDOC_DB, "Data changed, but cannot replace database"); 2227a40ea1a7SYuri Pankov 2228a40ea1a7SYuri Pankov out: 22296640c13bSYuri Pankov if (cp1 != MAP_FAILED) 22306640c13bSYuri Pankov munmap(cp1, sb1.st_size); 22316640c13bSYuri Pankov if (cp2 != MAP_FAILED) 22326640c13bSYuri Pankov munmap(cp2, sb2.st_size); 22336640c13bSYuri Pankov if (fd1 != -1) 22346640c13bSYuri Pankov close(fd1); 22356640c13bSYuri Pankov if (fd2 != -1) 22366640c13bSYuri Pankov close(fd2); 22376640c13bSYuri Pankov unlink(tfn); 2238a40ea1a7SYuri Pankov *strrchr(tfn, '/') = '\0'; 22396640c13bSYuri Pankov rmdir(tfn); 2240a40ea1a7SYuri Pankov } 2241a40ea1a7SYuri Pankov 2242a40ea1a7SYuri Pankov static int 2243a40ea1a7SYuri Pankov set_basedir(const char *targetdir, int report_baddir) 2244a40ea1a7SYuri Pankov { 2245a40ea1a7SYuri Pankov static char startdir[PATH_MAX]; 2246a40ea1a7SYuri Pankov static int getcwd_status; /* 1 = ok, 2 = failure */ 2247a40ea1a7SYuri Pankov static int chdir_status; /* 1 = changed directory */ 2248a40ea1a7SYuri Pankov char *cp; 2249a40ea1a7SYuri Pankov 2250a40ea1a7SYuri Pankov /* 2251a40ea1a7SYuri Pankov * Remember the original working directory, if possible. 2252a40ea1a7SYuri Pankov * This will be needed if the second or a later directory 2253a40ea1a7SYuri Pankov * on the command line is given as a relative path. 2254a40ea1a7SYuri Pankov * Do not error out if the current directory is not 2255a40ea1a7SYuri Pankov * searchable: Maybe it won't be needed after all. 2256a40ea1a7SYuri Pankov */ 2257a40ea1a7SYuri Pankov if (0 == getcwd_status) { 2258a40ea1a7SYuri Pankov if (NULL == getcwd(startdir, sizeof(startdir))) { 2259a40ea1a7SYuri Pankov getcwd_status = 2; 2260a40ea1a7SYuri Pankov (void)strlcpy(startdir, strerror(errno), 2261a40ea1a7SYuri Pankov sizeof(startdir)); 2262a40ea1a7SYuri Pankov } else 2263a40ea1a7SYuri Pankov getcwd_status = 1; 2264a40ea1a7SYuri Pankov } 2265a40ea1a7SYuri Pankov 2266a40ea1a7SYuri Pankov /* 2267a40ea1a7SYuri Pankov * We are leaving the old base directory. 2268a40ea1a7SYuri Pankov * Do not use it any longer, not even for messages. 2269a40ea1a7SYuri Pankov */ 2270a40ea1a7SYuri Pankov *basedir = '\0'; 2271a40ea1a7SYuri Pankov 2272a40ea1a7SYuri Pankov /* 2273a40ea1a7SYuri Pankov * If and only if the directory was changed earlier and 2274a40ea1a7SYuri Pankov * the next directory to process is given as a relative path, 2275a40ea1a7SYuri Pankov * first go back, or bail out if that is impossible. 2276a40ea1a7SYuri Pankov */ 2277a40ea1a7SYuri Pankov if (chdir_status && '/' != *targetdir) { 2278a40ea1a7SYuri Pankov if (2 == getcwd_status) { 2279a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2280a40ea1a7SYuri Pankov say("", "getcwd: %s", startdir); 2281a40ea1a7SYuri Pankov return 0; 2282a40ea1a7SYuri Pankov } 2283a40ea1a7SYuri Pankov if (-1 == chdir(startdir)) { 2284a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2285a40ea1a7SYuri Pankov say("", "&chdir %s", startdir); 2286a40ea1a7SYuri Pankov return 0; 2287a40ea1a7SYuri Pankov } 2288a40ea1a7SYuri Pankov } 2289a40ea1a7SYuri Pankov 2290a40ea1a7SYuri Pankov /* 2291a40ea1a7SYuri Pankov * Always resolve basedir to the canonicalized absolute 2292a40ea1a7SYuri Pankov * pathname and append a trailing slash, such that 2293a40ea1a7SYuri Pankov * we can reliably check whether files are inside. 2294a40ea1a7SYuri Pankov */ 2295a40ea1a7SYuri Pankov if (NULL == realpath(targetdir, basedir)) { 2296a40ea1a7SYuri Pankov if (report_baddir || errno != ENOENT) { 2297a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_BADARG; 2298a40ea1a7SYuri Pankov say("", "&%s: realpath", targetdir); 2299a40ea1a7SYuri Pankov } 2300a40ea1a7SYuri Pankov return 0; 2301a40ea1a7SYuri Pankov } else if (-1 == chdir(basedir)) { 2302a40ea1a7SYuri Pankov if (report_baddir || errno != ENOENT) { 2303a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_BADARG; 2304a40ea1a7SYuri Pankov say("", "&chdir"); 2305a40ea1a7SYuri Pankov } 2306a40ea1a7SYuri Pankov return 0; 2307a40ea1a7SYuri Pankov } 2308a40ea1a7SYuri Pankov chdir_status = 1; 2309a40ea1a7SYuri Pankov cp = strchr(basedir, '\0'); 2310a40ea1a7SYuri Pankov if ('/' != cp[-1]) { 2311a40ea1a7SYuri Pankov if (cp - basedir >= PATH_MAX - 1) { 2312a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2313a40ea1a7SYuri Pankov say("", "Filename too long"); 2314a40ea1a7SYuri Pankov return 0; 2315a40ea1a7SYuri Pankov } 2316a40ea1a7SYuri Pankov *cp++ = '/'; 2317a40ea1a7SYuri Pankov *cp = '\0'; 2318a40ea1a7SYuri Pankov } 2319a40ea1a7SYuri Pankov return 1; 2320a40ea1a7SYuri Pankov } 2321a40ea1a7SYuri Pankov 2322a40ea1a7SYuri Pankov static void 2323a40ea1a7SYuri Pankov say(const char *file, const char *format, ...) 2324a40ea1a7SYuri Pankov { 2325a40ea1a7SYuri Pankov va_list ap; 2326a40ea1a7SYuri Pankov int use_errno; 2327a40ea1a7SYuri Pankov 2328a40ea1a7SYuri Pankov if ('\0' != *basedir) 2329a40ea1a7SYuri Pankov fprintf(stderr, "%s", basedir); 2330a40ea1a7SYuri Pankov if ('\0' != *basedir && '\0' != *file) 2331a40ea1a7SYuri Pankov fputc('/', stderr); 2332a40ea1a7SYuri Pankov if ('\0' != *file) 2333a40ea1a7SYuri Pankov fprintf(stderr, "%s", file); 2334a40ea1a7SYuri Pankov 2335a40ea1a7SYuri Pankov use_errno = 1; 2336a40ea1a7SYuri Pankov if (NULL != format) { 2337a40ea1a7SYuri Pankov switch (*format) { 2338a40ea1a7SYuri Pankov case '&': 2339a40ea1a7SYuri Pankov format++; 2340a40ea1a7SYuri Pankov break; 2341a40ea1a7SYuri Pankov case '\0': 2342a40ea1a7SYuri Pankov format = NULL; 2343a40ea1a7SYuri Pankov break; 2344a40ea1a7SYuri Pankov default: 2345a40ea1a7SYuri Pankov use_errno = 0; 2346a40ea1a7SYuri Pankov break; 2347a40ea1a7SYuri Pankov } 2348a40ea1a7SYuri Pankov } 2349a40ea1a7SYuri Pankov if (NULL != format) { 2350a40ea1a7SYuri Pankov if ('\0' != *basedir || '\0' != *file) 2351a40ea1a7SYuri Pankov fputs(": ", stderr); 2352a40ea1a7SYuri Pankov va_start(ap, format); 2353a40ea1a7SYuri Pankov vfprintf(stderr, format, ap); 2354a40ea1a7SYuri Pankov va_end(ap); 2355a40ea1a7SYuri Pankov } 2356a40ea1a7SYuri Pankov if (use_errno) { 2357a40ea1a7SYuri Pankov if ('\0' != *basedir || '\0' != *file || NULL != format) 2358a40ea1a7SYuri Pankov fputs(": ", stderr); 2359a40ea1a7SYuri Pankov perror(NULL); 2360a40ea1a7SYuri Pankov } else 2361a40ea1a7SYuri Pankov fputc('\n', stderr); 2362a40ea1a7SYuri Pankov } 2363