1*c66b8046SYuri Pankov /* $Id: mandocdb.c,v 1.253 2017/07/28 14:48:25 schwarze Exp $ */ 2a40ea1a7SYuri Pankov /* 3a40ea1a7SYuri Pankov * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4a40ea1a7SYuri Pankov * Copyright (c) 2011-2017 Ingo Schwarze <schwarze@openbsd.org> 5a40ea1a7SYuri Pankov * Copyright (c) 2016 Ed Maste <emaste@freebsd.org> 6a40ea1a7SYuri Pankov * 7a40ea1a7SYuri Pankov * Permission to use, copy, modify, and distribute this software for any 8a40ea1a7SYuri Pankov * purpose with or without fee is hereby granted, provided that the above 9a40ea1a7SYuri Pankov * copyright notice and this permission notice appear in all copies. 10a40ea1a7SYuri Pankov * 11a40ea1a7SYuri Pankov * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 12a40ea1a7SYuri Pankov * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13a40ea1a7SYuri Pankov * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 14a40ea1a7SYuri Pankov * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15a40ea1a7SYuri Pankov * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16a40ea1a7SYuri Pankov * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17a40ea1a7SYuri Pankov * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18a40ea1a7SYuri Pankov */ 19a40ea1a7SYuri Pankov #include "config.h" 20a40ea1a7SYuri Pankov 21a40ea1a7SYuri Pankov #include <sys/types.h> 22a40ea1a7SYuri Pankov #include <sys/stat.h> 23a40ea1a7SYuri Pankov #include <sys/wait.h> 24a40ea1a7SYuri Pankov 25a40ea1a7SYuri Pankov #include <assert.h> 26a40ea1a7SYuri Pankov #include <ctype.h> 27a40ea1a7SYuri Pankov #if HAVE_ERR 28a40ea1a7SYuri Pankov #include <err.h> 29a40ea1a7SYuri Pankov #endif 30a40ea1a7SYuri Pankov #include <errno.h> 31a40ea1a7SYuri Pankov #include <fcntl.h> 32a40ea1a7SYuri Pankov #if HAVE_FTS 33a40ea1a7SYuri Pankov #include <fts.h> 34a40ea1a7SYuri Pankov #else 35a40ea1a7SYuri Pankov #include "compat_fts.h" 36a40ea1a7SYuri Pankov #endif 37a40ea1a7SYuri Pankov #include <limits.h> 38a40ea1a7SYuri Pankov #if HAVE_SANDBOX_INIT 39a40ea1a7SYuri Pankov #include <sandbox.h> 40a40ea1a7SYuri Pankov #endif 41a40ea1a7SYuri Pankov #include <stdarg.h> 42a40ea1a7SYuri Pankov #include <stddef.h> 43a40ea1a7SYuri Pankov #include <stdio.h> 44a40ea1a7SYuri Pankov #include <stdint.h> 45a40ea1a7SYuri Pankov #include <stdlib.h> 46a40ea1a7SYuri Pankov #include <string.h> 47a40ea1a7SYuri Pankov #include <unistd.h> 48a40ea1a7SYuri Pankov 49a40ea1a7SYuri Pankov #include "mandoc_aux.h" 50a40ea1a7SYuri Pankov #include "mandoc_ohash.h" 51a40ea1a7SYuri Pankov #include "mandoc.h" 52a40ea1a7SYuri Pankov #include "roff.h" 53a40ea1a7SYuri Pankov #include "mdoc.h" 54a40ea1a7SYuri Pankov #include "man.h" 55a40ea1a7SYuri Pankov #include "manconf.h" 56a40ea1a7SYuri Pankov #include "mansearch.h" 57a40ea1a7SYuri Pankov #include "dba_array.h" 58a40ea1a7SYuri Pankov #include "dba.h" 59a40ea1a7SYuri Pankov 60a40ea1a7SYuri Pankov extern const char *const mansearch_keynames[]; 61a40ea1a7SYuri Pankov 62a40ea1a7SYuri Pankov enum op { 63a40ea1a7SYuri Pankov OP_DEFAULT = 0, /* new dbs from dir list or default config */ 64a40ea1a7SYuri Pankov OP_CONFFILE, /* new databases from custom config file */ 65a40ea1a7SYuri Pankov OP_UPDATE, /* delete/add entries in existing database */ 66a40ea1a7SYuri Pankov OP_DELETE, /* delete entries from existing database */ 67a40ea1a7SYuri Pankov OP_TEST /* change no databases, report potential problems */ 68a40ea1a7SYuri Pankov }; 69a40ea1a7SYuri Pankov 70a40ea1a7SYuri Pankov struct str { 71a40ea1a7SYuri Pankov const struct mpage *mpage; /* if set, the owning parse */ 72a40ea1a7SYuri Pankov uint64_t mask; /* bitmask in sequence */ 73a40ea1a7SYuri Pankov char key[]; /* rendered text */ 74a40ea1a7SYuri Pankov }; 75a40ea1a7SYuri Pankov 76a40ea1a7SYuri Pankov struct inodev { 77a40ea1a7SYuri Pankov ino_t st_ino; 78a40ea1a7SYuri Pankov dev_t st_dev; 79a40ea1a7SYuri Pankov }; 80a40ea1a7SYuri Pankov 81a40ea1a7SYuri Pankov struct mpage { 82a40ea1a7SYuri Pankov struct inodev inodev; /* used for hashing routine */ 83a40ea1a7SYuri Pankov struct dba_array *dba; 84a40ea1a7SYuri Pankov char *sec; /* section from file content */ 85a40ea1a7SYuri Pankov char *arch; /* architecture from file content */ 86a40ea1a7SYuri Pankov char *title; /* title from file content */ 87a40ea1a7SYuri Pankov char *desc; /* description from file content */ 88a40ea1a7SYuri Pankov struct mpage *next; /* singly linked list */ 89a40ea1a7SYuri Pankov struct mlink *mlinks; /* singly linked list */ 90a40ea1a7SYuri Pankov int name_head_done; 91a40ea1a7SYuri Pankov enum form form; /* format from file content */ 92a40ea1a7SYuri Pankov }; 93a40ea1a7SYuri Pankov 94a40ea1a7SYuri Pankov struct mlink { 95a40ea1a7SYuri Pankov char file[PATH_MAX]; /* filename rel. to manpath */ 96a40ea1a7SYuri Pankov char *dsec; /* section from directory */ 97a40ea1a7SYuri Pankov char *arch; /* architecture from directory */ 98a40ea1a7SYuri Pankov char *name; /* name from file name (not empty) */ 99a40ea1a7SYuri Pankov char *fsec; /* section from file name suffix */ 100a40ea1a7SYuri Pankov struct mlink *next; /* singly linked list */ 101a40ea1a7SYuri Pankov struct mpage *mpage; /* parent */ 102a40ea1a7SYuri Pankov int gzip; /* filename has a .gz suffix */ 103a40ea1a7SYuri Pankov enum form dform; /* format from directory */ 104a40ea1a7SYuri Pankov enum form fform; /* format from file name suffix */ 105a40ea1a7SYuri Pankov }; 106a40ea1a7SYuri Pankov 107a40ea1a7SYuri Pankov typedef int (*mdoc_fp)(struct mpage *, const struct roff_meta *, 108a40ea1a7SYuri Pankov const struct roff_node *); 109a40ea1a7SYuri Pankov 110a40ea1a7SYuri Pankov struct mdoc_handler { 111a40ea1a7SYuri Pankov mdoc_fp fp; /* optional handler */ 112a40ea1a7SYuri Pankov uint64_t mask; /* set unless handler returns 0 */ 113a40ea1a7SYuri Pankov int taboo; /* node flags that must not be set */ 114a40ea1a7SYuri Pankov }; 115a40ea1a7SYuri Pankov 116a40ea1a7SYuri Pankov 117a40ea1a7SYuri Pankov int mandocdb(int, char *[]); 118a40ea1a7SYuri Pankov 119a40ea1a7SYuri Pankov static void dbadd(struct dba *, struct mpage *); 120a40ea1a7SYuri Pankov static void dbadd_mlink(const struct mlink *mlink); 121a40ea1a7SYuri Pankov static void dbprune(struct dba *); 122a40ea1a7SYuri Pankov static void dbwrite(struct dba *); 123a40ea1a7SYuri Pankov static void filescan(const char *); 124a40ea1a7SYuri Pankov #if HAVE_FTS_COMPARE_CONST 125a40ea1a7SYuri Pankov static int fts_compare(const FTSENT *const *, const FTSENT *const *); 126a40ea1a7SYuri Pankov #else 127a40ea1a7SYuri Pankov static int fts_compare(const FTSENT **, const FTSENT **); 128a40ea1a7SYuri Pankov #endif 129a40ea1a7SYuri Pankov static void mlink_add(struct mlink *, const struct stat *); 130a40ea1a7SYuri Pankov static void mlink_check(struct mpage *, struct mlink *); 131a40ea1a7SYuri Pankov static void mlink_free(struct mlink *); 132a40ea1a7SYuri Pankov static void mlinks_undupe(struct mpage *); 133a40ea1a7SYuri Pankov static void mpages_free(void); 134a40ea1a7SYuri Pankov static void mpages_merge(struct dba *, struct mparse *); 135a40ea1a7SYuri Pankov static void parse_cat(struct mpage *, int); 136a40ea1a7SYuri Pankov static void parse_man(struct mpage *, const struct roff_meta *, 137a40ea1a7SYuri Pankov const struct roff_node *); 138a40ea1a7SYuri Pankov static void parse_mdoc(struct mpage *, const struct roff_meta *, 139a40ea1a7SYuri Pankov const struct roff_node *); 140a40ea1a7SYuri Pankov static int parse_mdoc_head(struct mpage *, const struct roff_meta *, 141a40ea1a7SYuri Pankov const struct roff_node *); 142a40ea1a7SYuri Pankov static int parse_mdoc_Fd(struct mpage *, const struct roff_meta *, 143a40ea1a7SYuri Pankov const struct roff_node *); 144a40ea1a7SYuri Pankov static void parse_mdoc_fname(struct mpage *, const struct roff_node *); 145a40ea1a7SYuri Pankov static int parse_mdoc_Fn(struct mpage *, const struct roff_meta *, 146a40ea1a7SYuri Pankov const struct roff_node *); 147a40ea1a7SYuri Pankov static int parse_mdoc_Fo(struct mpage *, const struct roff_meta *, 148a40ea1a7SYuri Pankov const struct roff_node *); 149a40ea1a7SYuri Pankov static int parse_mdoc_Nd(struct mpage *, const struct roff_meta *, 150a40ea1a7SYuri Pankov const struct roff_node *); 151a40ea1a7SYuri Pankov static int parse_mdoc_Nm(struct mpage *, const struct roff_meta *, 152a40ea1a7SYuri Pankov const struct roff_node *); 153a40ea1a7SYuri Pankov static int parse_mdoc_Sh(struct mpage *, const struct roff_meta *, 154a40ea1a7SYuri Pankov const struct roff_node *); 155a40ea1a7SYuri Pankov static int parse_mdoc_Va(struct mpage *, const struct roff_meta *, 156a40ea1a7SYuri Pankov const struct roff_node *); 157a40ea1a7SYuri Pankov static int parse_mdoc_Xr(struct mpage *, const struct roff_meta *, 158a40ea1a7SYuri Pankov const struct roff_node *); 159a40ea1a7SYuri Pankov static void putkey(const struct mpage *, char *, uint64_t); 160a40ea1a7SYuri Pankov static void putkeys(const struct mpage *, char *, size_t, uint64_t); 161a40ea1a7SYuri Pankov static void putmdockey(const struct mpage *, 162a40ea1a7SYuri Pankov const struct roff_node *, uint64_t, int); 163a40ea1a7SYuri Pankov static int render_string(char **, size_t *); 164a40ea1a7SYuri Pankov static void say(const char *, const char *, ...) 165a40ea1a7SYuri Pankov __attribute__((__format__ (__printf__, 2, 3))); 166a40ea1a7SYuri Pankov static int set_basedir(const char *, int); 167a40ea1a7SYuri Pankov static int treescan(void); 168a40ea1a7SYuri Pankov static size_t utf8(unsigned int, char [7]); 169a40ea1a7SYuri Pankov 170a40ea1a7SYuri Pankov static int nodb; /* no database changes */ 171a40ea1a7SYuri Pankov static int mparse_options; /* abort the parse early */ 172a40ea1a7SYuri Pankov static int use_all; /* use all found files */ 173a40ea1a7SYuri Pankov static int debug; /* print what we're doing */ 174a40ea1a7SYuri Pankov static int warnings; /* warn about crap */ 175a40ea1a7SYuri Pankov static int write_utf8; /* write UTF-8 output; else ASCII */ 176a40ea1a7SYuri Pankov static int exitcode; /* to be returned by main */ 177a40ea1a7SYuri Pankov static enum op op; /* operational mode */ 178a40ea1a7SYuri Pankov static char basedir[PATH_MAX]; /* current base directory */ 179a40ea1a7SYuri Pankov static struct mpage *mpage_head; /* list of distinct manual pages */ 180a40ea1a7SYuri Pankov static struct ohash mpages; /* table of distinct manual pages */ 181a40ea1a7SYuri Pankov static struct ohash mlinks; /* table of directory entries */ 182a40ea1a7SYuri Pankov static struct ohash names; /* table of all names */ 183a40ea1a7SYuri Pankov static struct ohash strings; /* table of all strings */ 184a40ea1a7SYuri Pankov static uint64_t name_mask; 185a40ea1a7SYuri Pankov 186*c66b8046SYuri Pankov static const struct mdoc_handler __mdocs[MDOC_MAX - MDOC_Dd] = { 187a40ea1a7SYuri Pankov { NULL, 0, NODE_NOPRT }, /* Dd */ 188a40ea1a7SYuri Pankov { NULL, 0, NODE_NOPRT }, /* Dt */ 189a40ea1a7SYuri Pankov { NULL, 0, NODE_NOPRT }, /* Os */ 190a40ea1a7SYuri Pankov { parse_mdoc_Sh, TYPE_Sh, 0 }, /* Sh */ 191a40ea1a7SYuri Pankov { parse_mdoc_head, TYPE_Ss, 0 }, /* Ss */ 192a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Pp */ 193a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* D1 */ 194a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Dl */ 195a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bd */ 196a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ed */ 197a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bl */ 198a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* El */ 199a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* It */ 200a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ad */ 201a40ea1a7SYuri Pankov { NULL, TYPE_An, 0 }, /* An */ 202*c66b8046SYuri Pankov { NULL, 0, 0 }, /* Ap */ 203a40ea1a7SYuri Pankov { NULL, TYPE_Ar, 0 }, /* Ar */ 204a40ea1a7SYuri Pankov { NULL, TYPE_Cd, 0 }, /* Cd */ 205a40ea1a7SYuri Pankov { NULL, TYPE_Cm, 0 }, /* Cm */ 206a40ea1a7SYuri Pankov { NULL, TYPE_Dv, 0 }, /* Dv */ 207a40ea1a7SYuri Pankov { NULL, TYPE_Er, 0 }, /* Er */ 208a40ea1a7SYuri Pankov { NULL, TYPE_Ev, 0 }, /* Ev */ 209a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ex */ 210a40ea1a7SYuri Pankov { NULL, TYPE_Fa, 0 }, /* Fa */ 211a40ea1a7SYuri Pankov { parse_mdoc_Fd, 0, 0 }, /* Fd */ 212a40ea1a7SYuri Pankov { NULL, TYPE_Fl, 0 }, /* Fl */ 213a40ea1a7SYuri Pankov { parse_mdoc_Fn, 0, 0 }, /* Fn */ 214a40ea1a7SYuri Pankov { NULL, TYPE_Ft, 0 }, /* Ft */ 215a40ea1a7SYuri Pankov { NULL, TYPE_Ic, 0 }, /* Ic */ 216a40ea1a7SYuri Pankov { NULL, TYPE_In, 0 }, /* In */ 217a40ea1a7SYuri Pankov { NULL, TYPE_Li, 0 }, /* Li */ 218a40ea1a7SYuri Pankov { parse_mdoc_Nd, 0, 0 }, /* Nd */ 219a40ea1a7SYuri Pankov { parse_mdoc_Nm, 0, 0 }, /* Nm */ 220a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Op */ 221a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ot */ 222a40ea1a7SYuri Pankov { NULL, TYPE_Pa, NODE_NOSRC }, /* Pa */ 223a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Rv */ 224a40ea1a7SYuri Pankov { NULL, TYPE_St, 0 }, /* St */ 225a40ea1a7SYuri Pankov { parse_mdoc_Va, TYPE_Va, 0 }, /* Va */ 226a40ea1a7SYuri Pankov { parse_mdoc_Va, TYPE_Vt, 0 }, /* Vt */ 227a40ea1a7SYuri Pankov { parse_mdoc_Xr, 0, 0 }, /* Xr */ 228a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %A */ 229a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %B */ 230a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %D */ 231a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %I */ 232a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %J */ 233a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %N */ 234a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %O */ 235a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %P */ 236a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %R */ 237a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %T */ 238a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %V */ 239a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ac */ 240a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ao */ 241a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Aq */ 242a40ea1a7SYuri Pankov { NULL, TYPE_At, 0 }, /* At */ 243a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bc */ 244a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bf */ 245a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bo */ 246a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bq */ 247a40ea1a7SYuri Pankov { NULL, TYPE_Bsx, NODE_NOSRC }, /* Bsx */ 248a40ea1a7SYuri Pankov { NULL, TYPE_Bx, NODE_NOSRC }, /* Bx */ 249a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Db */ 250a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Dc */ 251a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Do */ 252a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Dq */ 253a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ec */ 254a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ef */ 255a40ea1a7SYuri Pankov { NULL, TYPE_Em, 0 }, /* Em */ 256a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Eo */ 257a40ea1a7SYuri Pankov { NULL, TYPE_Fx, NODE_NOSRC }, /* Fx */ 258a40ea1a7SYuri Pankov { NULL, TYPE_Ms, 0 }, /* Ms */ 259a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* No */ 260a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ns */ 261a40ea1a7SYuri Pankov { NULL, TYPE_Nx, NODE_NOSRC }, /* Nx */ 262a40ea1a7SYuri Pankov { NULL, TYPE_Ox, NODE_NOSRC }, /* Ox */ 263a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Pc */ 264a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Pf */ 265a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Po */ 266a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Pq */ 267a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Qc */ 268a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ql */ 269a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Qo */ 270a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Qq */ 271a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Re */ 272a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Rs */ 273a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Sc */ 274a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* So */ 275a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Sq */ 276a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Sm */ 277a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Sx */ 278a40ea1a7SYuri Pankov { NULL, TYPE_Sy, 0 }, /* Sy */ 279a40ea1a7SYuri Pankov { NULL, TYPE_Tn, 0 }, /* Tn */ 280a40ea1a7SYuri Pankov { NULL, 0, NODE_NOSRC }, /* Ux */ 281a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Xc */ 282a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Xo */ 283a40ea1a7SYuri Pankov { parse_mdoc_Fo, 0, 0 }, /* Fo */ 284a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Fc */ 285a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Oo */ 286a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Oc */ 287a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bk */ 288a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ek */ 289a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bt */ 290a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Hf */ 291a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Fr */ 292a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ud */ 293a40ea1a7SYuri Pankov { NULL, TYPE_Lb, NODE_NOSRC }, /* Lb */ 294a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Lp */ 295a40ea1a7SYuri Pankov { NULL, TYPE_Lk, 0 }, /* Lk */ 296a40ea1a7SYuri Pankov { NULL, TYPE_Mt, NODE_NOSRC }, /* Mt */ 297a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Brq */ 298a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Bro */ 299a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Brc */ 300a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %C */ 301a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Es */ 302a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* En */ 303a40ea1a7SYuri Pankov { NULL, TYPE_Dx, NODE_NOSRC }, /* Dx */ 304a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %Q */ 305a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* %U */ 306a40ea1a7SYuri Pankov { NULL, 0, 0 }, /* Ta */ 307a40ea1a7SYuri Pankov }; 308*c66b8046SYuri Pankov static const struct mdoc_handler *const mdocs = __mdocs - MDOC_Dd; 309a40ea1a7SYuri Pankov 310a40ea1a7SYuri Pankov 311a40ea1a7SYuri Pankov int 312a40ea1a7SYuri Pankov mandocdb(int argc, char *argv[]) 313a40ea1a7SYuri Pankov { 314a40ea1a7SYuri Pankov struct manconf conf; 315a40ea1a7SYuri Pankov struct mparse *mp; 316a40ea1a7SYuri Pankov struct dba *dba; 317a40ea1a7SYuri Pankov const char *path_arg, *progname; 318a40ea1a7SYuri Pankov size_t j, sz; 319a40ea1a7SYuri Pankov int ch, i; 320a40ea1a7SYuri Pankov 321a40ea1a7SYuri Pankov #if HAVE_PLEDGE 322a40ea1a7SYuri Pankov if (pledge("stdio rpath wpath cpath fattr flock proc exec", NULL) == -1) { 323a40ea1a7SYuri Pankov warn("pledge"); 324a40ea1a7SYuri Pankov return (int)MANDOCLEVEL_SYSERR; 325a40ea1a7SYuri Pankov } 326a40ea1a7SYuri Pankov #endif 327a40ea1a7SYuri Pankov 328a40ea1a7SYuri Pankov #if HAVE_SANDBOX_INIT 329a40ea1a7SYuri Pankov if (sandbox_init(kSBXProfileNoInternet, SANDBOX_NAMED, NULL) == -1) { 330a40ea1a7SYuri Pankov warnx("sandbox_init"); 331a40ea1a7SYuri Pankov return (int)MANDOCLEVEL_SYSERR; 332a40ea1a7SYuri Pankov } 333a40ea1a7SYuri Pankov #endif 334a40ea1a7SYuri Pankov 335a40ea1a7SYuri Pankov memset(&conf, 0, sizeof(conf)); 336a40ea1a7SYuri Pankov 337a40ea1a7SYuri Pankov /* 338a40ea1a7SYuri Pankov * We accept a few different invocations. 339a40ea1a7SYuri Pankov * The CHECKOP macro makes sure that invocation styles don't 340a40ea1a7SYuri Pankov * clobber each other. 341a40ea1a7SYuri Pankov */ 342a40ea1a7SYuri Pankov #define CHECKOP(_op, _ch) do \ 343a40ea1a7SYuri Pankov if (OP_DEFAULT != (_op)) { \ 344a40ea1a7SYuri Pankov warnx("-%c: Conflicting option", (_ch)); \ 345a40ea1a7SYuri Pankov goto usage; \ 346a40ea1a7SYuri Pankov } while (/*CONSTCOND*/0) 347a40ea1a7SYuri Pankov 348a40ea1a7SYuri Pankov path_arg = NULL; 349a40ea1a7SYuri Pankov op = OP_DEFAULT; 350a40ea1a7SYuri Pankov 351a40ea1a7SYuri Pankov while (-1 != (ch = getopt(argc, argv, "aC:Dd:npQT:tu:v"))) 352a40ea1a7SYuri Pankov switch (ch) { 353a40ea1a7SYuri Pankov case 'a': 354a40ea1a7SYuri Pankov use_all = 1; 355a40ea1a7SYuri Pankov break; 356a40ea1a7SYuri Pankov case 'C': 357a40ea1a7SYuri Pankov CHECKOP(op, ch); 358a40ea1a7SYuri Pankov path_arg = optarg; 359a40ea1a7SYuri Pankov op = OP_CONFFILE; 360a40ea1a7SYuri Pankov break; 361a40ea1a7SYuri Pankov case 'D': 362a40ea1a7SYuri Pankov debug++; 363a40ea1a7SYuri Pankov break; 364a40ea1a7SYuri Pankov case 'd': 365a40ea1a7SYuri Pankov CHECKOP(op, ch); 366a40ea1a7SYuri Pankov path_arg = optarg; 367a40ea1a7SYuri Pankov op = OP_UPDATE; 368a40ea1a7SYuri Pankov break; 369a40ea1a7SYuri Pankov case 'n': 370a40ea1a7SYuri Pankov nodb = 1; 371a40ea1a7SYuri Pankov break; 372a40ea1a7SYuri Pankov case 'p': 373a40ea1a7SYuri Pankov warnings = 1; 374a40ea1a7SYuri Pankov break; 375a40ea1a7SYuri Pankov case 'Q': 376a40ea1a7SYuri Pankov mparse_options |= MPARSE_QUICK; 377a40ea1a7SYuri Pankov break; 378a40ea1a7SYuri Pankov case 'T': 379a40ea1a7SYuri Pankov if (strcmp(optarg, "utf8")) { 380a40ea1a7SYuri Pankov warnx("-T%s: Unsupported output format", 381a40ea1a7SYuri Pankov optarg); 382a40ea1a7SYuri Pankov goto usage; 383a40ea1a7SYuri Pankov } 384a40ea1a7SYuri Pankov write_utf8 = 1; 385a40ea1a7SYuri Pankov break; 386a40ea1a7SYuri Pankov case 't': 387a40ea1a7SYuri Pankov CHECKOP(op, ch); 388a40ea1a7SYuri Pankov dup2(STDOUT_FILENO, STDERR_FILENO); 389a40ea1a7SYuri Pankov op = OP_TEST; 390a40ea1a7SYuri Pankov nodb = warnings = 1; 391a40ea1a7SYuri Pankov break; 392a40ea1a7SYuri Pankov case 'u': 393a40ea1a7SYuri Pankov CHECKOP(op, ch); 394a40ea1a7SYuri Pankov path_arg = optarg; 395a40ea1a7SYuri Pankov op = OP_DELETE; 396a40ea1a7SYuri Pankov break; 397a40ea1a7SYuri Pankov case 'v': 398a40ea1a7SYuri Pankov /* Compatibility with espie@'s makewhatis. */ 399a40ea1a7SYuri Pankov break; 400a40ea1a7SYuri Pankov default: 401a40ea1a7SYuri Pankov goto usage; 402a40ea1a7SYuri Pankov } 403a40ea1a7SYuri Pankov 404a40ea1a7SYuri Pankov argc -= optind; 405a40ea1a7SYuri Pankov argv += optind; 406a40ea1a7SYuri Pankov 407a40ea1a7SYuri Pankov #if HAVE_PLEDGE 408a40ea1a7SYuri Pankov if (nodb) { 409a40ea1a7SYuri Pankov if (pledge("stdio rpath", NULL) == -1) { 410a40ea1a7SYuri Pankov warn("pledge"); 411a40ea1a7SYuri Pankov return (int)MANDOCLEVEL_SYSERR; 412a40ea1a7SYuri Pankov } 413a40ea1a7SYuri Pankov } 414a40ea1a7SYuri Pankov #endif 415a40ea1a7SYuri Pankov 416a40ea1a7SYuri Pankov if (OP_CONFFILE == op && argc > 0) { 417a40ea1a7SYuri Pankov warnx("-C: Too many arguments"); 418a40ea1a7SYuri Pankov goto usage; 419a40ea1a7SYuri Pankov } 420a40ea1a7SYuri Pankov 421a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_OK; 422a40ea1a7SYuri Pankov mchars_alloc(); 423*c66b8046SYuri Pankov mp = mparse_alloc(mparse_options, MANDOCERR_MAX, NULL, 424*c66b8046SYuri Pankov MANDOC_OS_OTHER, NULL); 425a40ea1a7SYuri Pankov mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev)); 426a40ea1a7SYuri Pankov mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file)); 427a40ea1a7SYuri Pankov 428a40ea1a7SYuri Pankov if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) { 429a40ea1a7SYuri Pankov 430a40ea1a7SYuri Pankov /* 431a40ea1a7SYuri Pankov * Most of these deal with a specific directory. 432a40ea1a7SYuri Pankov * Jump into that directory first. 433a40ea1a7SYuri Pankov */ 434a40ea1a7SYuri Pankov if (OP_TEST != op && 0 == set_basedir(path_arg, 1)) 435a40ea1a7SYuri Pankov goto out; 436a40ea1a7SYuri Pankov 437a40ea1a7SYuri Pankov dba = nodb ? dba_new(128) : dba_read(MANDOC_DB); 438a40ea1a7SYuri Pankov if (dba != NULL) { 439a40ea1a7SYuri Pankov /* 440a40ea1a7SYuri Pankov * The existing database is usable. Process 441a40ea1a7SYuri Pankov * all files specified on the command-line. 442a40ea1a7SYuri Pankov */ 443a40ea1a7SYuri Pankov #if HAVE_PLEDGE 444a40ea1a7SYuri Pankov if (!nodb) { 445a40ea1a7SYuri Pankov if (pledge("stdio rpath wpath cpath fattr flock", NULL) == -1) { 446a40ea1a7SYuri Pankov warn("pledge"); 447a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 448a40ea1a7SYuri Pankov goto out; 449a40ea1a7SYuri Pankov } 450a40ea1a7SYuri Pankov } 451a40ea1a7SYuri Pankov #endif 452a40ea1a7SYuri Pankov use_all = 1; 453a40ea1a7SYuri Pankov for (i = 0; i < argc; i++) 454a40ea1a7SYuri Pankov filescan(argv[i]); 455a40ea1a7SYuri Pankov if (nodb == 0) 456a40ea1a7SYuri Pankov dbprune(dba); 457a40ea1a7SYuri Pankov } else { 458a40ea1a7SYuri Pankov /* Database missing or corrupt. */ 459a40ea1a7SYuri Pankov if (op != OP_UPDATE || errno != ENOENT) 460a40ea1a7SYuri Pankov say(MANDOC_DB, "%s: Automatically recreating" 461a40ea1a7SYuri Pankov " from scratch", strerror(errno)); 462a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_OK; 463a40ea1a7SYuri Pankov op = OP_DEFAULT; 464a40ea1a7SYuri Pankov if (0 == treescan()) 465a40ea1a7SYuri Pankov goto out; 466a40ea1a7SYuri Pankov dba = dba_new(128); 467a40ea1a7SYuri Pankov } 468a40ea1a7SYuri Pankov if (OP_DELETE != op) 469a40ea1a7SYuri Pankov mpages_merge(dba, mp); 470a40ea1a7SYuri Pankov if (nodb == 0) 471a40ea1a7SYuri Pankov dbwrite(dba); 472a40ea1a7SYuri Pankov dba_free(dba); 473a40ea1a7SYuri Pankov } else { 474a40ea1a7SYuri Pankov /* 475a40ea1a7SYuri Pankov * If we have arguments, use them as our manpaths. 476a40ea1a7SYuri Pankov * If we don't, use man.conf(5). 477a40ea1a7SYuri Pankov */ 478a40ea1a7SYuri Pankov if (argc > 0) { 479a40ea1a7SYuri Pankov conf.manpath.paths = mandoc_reallocarray(NULL, 480a40ea1a7SYuri Pankov argc, sizeof(char *)); 481a40ea1a7SYuri Pankov conf.manpath.sz = (size_t)argc; 482a40ea1a7SYuri Pankov for (i = 0; i < argc; i++) 483a40ea1a7SYuri Pankov conf.manpath.paths[i] = mandoc_strdup(argv[i]); 484a40ea1a7SYuri Pankov } else 485a40ea1a7SYuri Pankov manconf_parse(&conf, path_arg, NULL, NULL); 486a40ea1a7SYuri Pankov 487a40ea1a7SYuri Pankov if (conf.manpath.sz == 0) { 488a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_BADARG; 489a40ea1a7SYuri Pankov say("", "Empty manpath"); 490a40ea1a7SYuri Pankov } 491a40ea1a7SYuri Pankov 492a40ea1a7SYuri Pankov /* 493a40ea1a7SYuri Pankov * First scan the tree rooted at a base directory, then 494a40ea1a7SYuri Pankov * build a new database and finally move it into place. 495a40ea1a7SYuri Pankov * Ignore zero-length directories and strip trailing 496a40ea1a7SYuri Pankov * slashes. 497a40ea1a7SYuri Pankov */ 498a40ea1a7SYuri Pankov for (j = 0; j < conf.manpath.sz; j++) { 499a40ea1a7SYuri Pankov sz = strlen(conf.manpath.paths[j]); 500a40ea1a7SYuri Pankov if (sz && conf.manpath.paths[j][sz - 1] == '/') 501a40ea1a7SYuri Pankov conf.manpath.paths[j][--sz] = '\0'; 502a40ea1a7SYuri Pankov if (0 == sz) 503a40ea1a7SYuri Pankov continue; 504a40ea1a7SYuri Pankov 505a40ea1a7SYuri Pankov if (j) { 506a40ea1a7SYuri Pankov mandoc_ohash_init(&mpages, 6, 507a40ea1a7SYuri Pankov offsetof(struct mpage, inodev)); 508a40ea1a7SYuri Pankov mandoc_ohash_init(&mlinks, 6, 509a40ea1a7SYuri Pankov offsetof(struct mlink, file)); 510a40ea1a7SYuri Pankov } 511a40ea1a7SYuri Pankov 512a40ea1a7SYuri Pankov if ( ! set_basedir(conf.manpath.paths[j], argc > 0)) 513a40ea1a7SYuri Pankov continue; 514a40ea1a7SYuri Pankov if (0 == treescan()) 515a40ea1a7SYuri Pankov continue; 516a40ea1a7SYuri Pankov dba = dba_new(128); 517a40ea1a7SYuri Pankov mpages_merge(dba, mp); 518a40ea1a7SYuri Pankov if (nodb == 0) 519a40ea1a7SYuri Pankov dbwrite(dba); 520a40ea1a7SYuri Pankov dba_free(dba); 521a40ea1a7SYuri Pankov 522a40ea1a7SYuri Pankov if (j + 1 < conf.manpath.sz) { 523a40ea1a7SYuri Pankov mpages_free(); 524a40ea1a7SYuri Pankov ohash_delete(&mpages); 525a40ea1a7SYuri Pankov ohash_delete(&mlinks); 526a40ea1a7SYuri Pankov } 527a40ea1a7SYuri Pankov } 528a40ea1a7SYuri Pankov } 529a40ea1a7SYuri Pankov out: 530a40ea1a7SYuri Pankov manconf_free(&conf); 531a40ea1a7SYuri Pankov mparse_free(mp); 532a40ea1a7SYuri Pankov mchars_free(); 533a40ea1a7SYuri Pankov mpages_free(); 534a40ea1a7SYuri Pankov ohash_delete(&mpages); 535a40ea1a7SYuri Pankov ohash_delete(&mlinks); 536a40ea1a7SYuri Pankov return exitcode; 537a40ea1a7SYuri Pankov usage: 538a40ea1a7SYuri Pankov progname = getprogname(); 539a40ea1a7SYuri Pankov fprintf(stderr, "usage: %s [-aDnpQ] [-C file] [-Tutf8]\n" 540a40ea1a7SYuri Pankov " %s [-aDnpQ] [-Tutf8] dir ...\n" 541a40ea1a7SYuri Pankov " %s [-DnpQ] [-Tutf8] -d dir [file ...]\n" 542a40ea1a7SYuri Pankov " %s [-Dnp] -u dir [file ...]\n" 543a40ea1a7SYuri Pankov " %s [-Q] -t file ...\n", 544a40ea1a7SYuri Pankov progname, progname, progname, progname, progname); 545a40ea1a7SYuri Pankov 546a40ea1a7SYuri Pankov return (int)MANDOCLEVEL_BADARG; 547a40ea1a7SYuri Pankov } 548a40ea1a7SYuri Pankov 549a40ea1a7SYuri Pankov /* 550a40ea1a7SYuri Pankov * To get a singly linked list in alpha order while inserting entries 551a40ea1a7SYuri Pankov * at the beginning, process directory entries in reverse alpha order. 552a40ea1a7SYuri Pankov */ 553a40ea1a7SYuri Pankov static int 554a40ea1a7SYuri Pankov #if HAVE_FTS_COMPARE_CONST 555a40ea1a7SYuri Pankov fts_compare(const FTSENT *const *a, const FTSENT *const *b) 556a40ea1a7SYuri Pankov #else 557a40ea1a7SYuri Pankov fts_compare(const FTSENT **a, const FTSENT **b) 558a40ea1a7SYuri Pankov #endif 559a40ea1a7SYuri Pankov { 560a40ea1a7SYuri Pankov return -strcmp((*a)->fts_name, (*b)->fts_name); 561a40ea1a7SYuri Pankov } 562a40ea1a7SYuri Pankov 563a40ea1a7SYuri Pankov /* 564a40ea1a7SYuri Pankov * Scan a directory tree rooted at "basedir" for manpages. 565a40ea1a7SYuri Pankov * We use fts(), scanning directory parts along the way for clues to our 566a40ea1a7SYuri Pankov * section and architecture. 567a40ea1a7SYuri Pankov * 568a40ea1a7SYuri Pankov * If use_all has been specified, grok all files. 569a40ea1a7SYuri Pankov * If not, sanitise paths to the following: 570a40ea1a7SYuri Pankov * 571a40ea1a7SYuri Pankov * [./]man*[/<arch>]/<name>.<section> 572a40ea1a7SYuri Pankov * or 573a40ea1a7SYuri Pankov * [./]cat<section>[/<arch>]/<name>.0 574a40ea1a7SYuri Pankov * 575a40ea1a7SYuri Pankov * TODO: accommodate for multi-language directories. 576a40ea1a7SYuri Pankov */ 577a40ea1a7SYuri Pankov static int 578a40ea1a7SYuri Pankov treescan(void) 579a40ea1a7SYuri Pankov { 580a40ea1a7SYuri Pankov char buf[PATH_MAX]; 581a40ea1a7SYuri Pankov FTS *f; 582a40ea1a7SYuri Pankov FTSENT *ff; 583a40ea1a7SYuri Pankov struct mlink *mlink; 584a40ea1a7SYuri Pankov int gzip; 585a40ea1a7SYuri Pankov enum form dform; 586a40ea1a7SYuri Pankov char *dsec, *arch, *fsec, *cp; 587a40ea1a7SYuri Pankov const char *path; 588a40ea1a7SYuri Pankov const char *argv[2]; 589a40ea1a7SYuri Pankov 590a40ea1a7SYuri Pankov argv[0] = "."; 591a40ea1a7SYuri Pankov argv[1] = NULL; 592a40ea1a7SYuri Pankov 593a40ea1a7SYuri Pankov f = fts_open((char * const *)argv, FTS_PHYSICAL | FTS_NOCHDIR, 594a40ea1a7SYuri Pankov fts_compare); 595a40ea1a7SYuri Pankov if (f == NULL) { 596a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 597a40ea1a7SYuri Pankov say("", "&fts_open"); 598a40ea1a7SYuri Pankov return 0; 599a40ea1a7SYuri Pankov } 600a40ea1a7SYuri Pankov 601a40ea1a7SYuri Pankov dsec = arch = NULL; 602a40ea1a7SYuri Pankov dform = FORM_NONE; 603a40ea1a7SYuri Pankov 604a40ea1a7SYuri Pankov while ((ff = fts_read(f)) != NULL) { 605a40ea1a7SYuri Pankov path = ff->fts_path + 2; 606a40ea1a7SYuri Pankov switch (ff->fts_info) { 607a40ea1a7SYuri Pankov 608a40ea1a7SYuri Pankov /* 609a40ea1a7SYuri Pankov * Symbolic links require various sanity checks, 610a40ea1a7SYuri Pankov * then get handled just like regular files. 611a40ea1a7SYuri Pankov */ 612a40ea1a7SYuri Pankov case FTS_SL: 613a40ea1a7SYuri Pankov if (realpath(path, buf) == NULL) { 614a40ea1a7SYuri Pankov if (warnings) 615a40ea1a7SYuri Pankov say(path, "&realpath"); 616a40ea1a7SYuri Pankov continue; 617a40ea1a7SYuri Pankov } 618a40ea1a7SYuri Pankov if (strstr(buf, basedir) != buf 619a40ea1a7SYuri Pankov #ifdef HOMEBREWDIR 620a40ea1a7SYuri Pankov && strstr(buf, HOMEBREWDIR) != buf 621a40ea1a7SYuri Pankov #endif 622a40ea1a7SYuri Pankov ) { 623a40ea1a7SYuri Pankov if (warnings) say("", 624a40ea1a7SYuri Pankov "%s: outside base directory", buf); 625a40ea1a7SYuri Pankov continue; 626a40ea1a7SYuri Pankov } 627a40ea1a7SYuri Pankov /* Use logical inode to avoid mpages dupe. */ 628a40ea1a7SYuri Pankov if (stat(path, ff->fts_statp) == -1) { 629a40ea1a7SYuri Pankov if (warnings) 630a40ea1a7SYuri Pankov say(path, "&stat"); 631a40ea1a7SYuri Pankov continue; 632a40ea1a7SYuri Pankov } 633a40ea1a7SYuri Pankov /* FALLTHROUGH */ 634a40ea1a7SYuri Pankov 635a40ea1a7SYuri Pankov /* 636a40ea1a7SYuri Pankov * If we're a regular file, add an mlink by using the 637a40ea1a7SYuri Pankov * stored directory data and handling the filename. 638a40ea1a7SYuri Pankov */ 639a40ea1a7SYuri Pankov case FTS_F: 640a40ea1a7SYuri Pankov if ( ! strcmp(path, MANDOC_DB)) 641a40ea1a7SYuri Pankov continue; 642a40ea1a7SYuri Pankov if ( ! use_all && ff->fts_level < 2) { 643a40ea1a7SYuri Pankov if (warnings) 644a40ea1a7SYuri Pankov say(path, "Extraneous file"); 645a40ea1a7SYuri Pankov continue; 646a40ea1a7SYuri Pankov } 647a40ea1a7SYuri Pankov gzip = 0; 648a40ea1a7SYuri Pankov fsec = NULL; 649a40ea1a7SYuri Pankov while (fsec == NULL) { 650a40ea1a7SYuri Pankov fsec = strrchr(ff->fts_name, '.'); 651a40ea1a7SYuri Pankov if (fsec == NULL || strcmp(fsec+1, "gz")) 652a40ea1a7SYuri Pankov break; 653a40ea1a7SYuri Pankov gzip = 1; 654a40ea1a7SYuri Pankov *fsec = '\0'; 655a40ea1a7SYuri Pankov fsec = NULL; 656a40ea1a7SYuri Pankov } 657a40ea1a7SYuri Pankov if (fsec == NULL) { 658a40ea1a7SYuri Pankov if ( ! use_all) { 659a40ea1a7SYuri Pankov if (warnings) 660a40ea1a7SYuri Pankov say(path, 661a40ea1a7SYuri Pankov "No filename suffix"); 662a40ea1a7SYuri Pankov continue; 663a40ea1a7SYuri Pankov } 664a40ea1a7SYuri Pankov } else if ( ! strcmp(++fsec, "html")) { 665a40ea1a7SYuri Pankov if (warnings) 666a40ea1a7SYuri Pankov say(path, "Skip html"); 667a40ea1a7SYuri Pankov continue; 668a40ea1a7SYuri Pankov } else if ( ! strcmp(fsec, "ps")) { 669a40ea1a7SYuri Pankov if (warnings) 670a40ea1a7SYuri Pankov say(path, "Skip ps"); 671a40ea1a7SYuri Pankov continue; 672a40ea1a7SYuri Pankov } else if ( ! strcmp(fsec, "pdf")) { 673a40ea1a7SYuri Pankov if (warnings) 674a40ea1a7SYuri Pankov say(path, "Skip pdf"); 675a40ea1a7SYuri Pankov continue; 676a40ea1a7SYuri Pankov } else if ( ! use_all && 677a40ea1a7SYuri Pankov ((dform == FORM_SRC && 678a40ea1a7SYuri Pankov strncmp(fsec, dsec, strlen(dsec))) || 679a40ea1a7SYuri Pankov (dform == FORM_CAT && strcmp(fsec, "0")))) { 680a40ea1a7SYuri Pankov if (warnings) 681a40ea1a7SYuri Pankov say(path, "Wrong filename suffix"); 682a40ea1a7SYuri Pankov continue; 683a40ea1a7SYuri Pankov } else 684a40ea1a7SYuri Pankov fsec[-1] = '\0'; 685a40ea1a7SYuri Pankov 686a40ea1a7SYuri Pankov mlink = mandoc_calloc(1, sizeof(struct mlink)); 687a40ea1a7SYuri Pankov if (strlcpy(mlink->file, path, 688a40ea1a7SYuri Pankov sizeof(mlink->file)) >= 689a40ea1a7SYuri Pankov sizeof(mlink->file)) { 690a40ea1a7SYuri Pankov say(path, "Filename too long"); 691a40ea1a7SYuri Pankov free(mlink); 692a40ea1a7SYuri Pankov continue; 693a40ea1a7SYuri Pankov } 694a40ea1a7SYuri Pankov mlink->dform = dform; 695a40ea1a7SYuri Pankov mlink->dsec = dsec; 696a40ea1a7SYuri Pankov mlink->arch = arch; 697a40ea1a7SYuri Pankov mlink->name = ff->fts_name; 698a40ea1a7SYuri Pankov mlink->fsec = fsec; 699a40ea1a7SYuri Pankov mlink->gzip = gzip; 700a40ea1a7SYuri Pankov mlink_add(mlink, ff->fts_statp); 701a40ea1a7SYuri Pankov continue; 702a40ea1a7SYuri Pankov 703a40ea1a7SYuri Pankov case FTS_D: 704a40ea1a7SYuri Pankov case FTS_DP: 705a40ea1a7SYuri Pankov break; 706a40ea1a7SYuri Pankov 707a40ea1a7SYuri Pankov default: 708a40ea1a7SYuri Pankov if (warnings) 709a40ea1a7SYuri Pankov say(path, "Not a regular file"); 710a40ea1a7SYuri Pankov continue; 711a40ea1a7SYuri Pankov } 712a40ea1a7SYuri Pankov 713a40ea1a7SYuri Pankov switch (ff->fts_level) { 714a40ea1a7SYuri Pankov case 0: 715a40ea1a7SYuri Pankov /* Ignore the root directory. */ 716a40ea1a7SYuri Pankov break; 717a40ea1a7SYuri Pankov case 1: 718a40ea1a7SYuri Pankov /* 719a40ea1a7SYuri Pankov * This might contain manX/ or catX/. 720a40ea1a7SYuri Pankov * Try to infer this from the name. 721a40ea1a7SYuri Pankov * If we're not in use_all, enforce it. 722a40ea1a7SYuri Pankov */ 723a40ea1a7SYuri Pankov cp = ff->fts_name; 724a40ea1a7SYuri Pankov if (ff->fts_info == FTS_DP) { 725a40ea1a7SYuri Pankov dform = FORM_NONE; 726a40ea1a7SYuri Pankov dsec = NULL; 727a40ea1a7SYuri Pankov break; 728a40ea1a7SYuri Pankov } 729a40ea1a7SYuri Pankov 730a40ea1a7SYuri Pankov if ( ! strncmp(cp, "man", 3)) { 731a40ea1a7SYuri Pankov dform = FORM_SRC; 732a40ea1a7SYuri Pankov dsec = cp + 3; 733a40ea1a7SYuri Pankov } else if ( ! strncmp(cp, "cat", 3)) { 734a40ea1a7SYuri Pankov dform = FORM_CAT; 735a40ea1a7SYuri Pankov dsec = cp + 3; 736a40ea1a7SYuri Pankov } else { 737a40ea1a7SYuri Pankov dform = FORM_NONE; 738a40ea1a7SYuri Pankov dsec = NULL; 739a40ea1a7SYuri Pankov } 740a40ea1a7SYuri Pankov 741a40ea1a7SYuri Pankov if (dsec != NULL || use_all) 742a40ea1a7SYuri Pankov break; 743a40ea1a7SYuri Pankov 744a40ea1a7SYuri Pankov if (warnings) 745a40ea1a7SYuri Pankov say(path, "Unknown directory part"); 746a40ea1a7SYuri Pankov fts_set(f, ff, FTS_SKIP); 747a40ea1a7SYuri Pankov break; 748a40ea1a7SYuri Pankov case 2: 749a40ea1a7SYuri Pankov /* 750a40ea1a7SYuri Pankov * Possibly our architecture. 751a40ea1a7SYuri Pankov * If we're descending, keep tabs on it. 752a40ea1a7SYuri Pankov */ 753a40ea1a7SYuri Pankov if (ff->fts_info != FTS_DP && dsec != NULL) 754a40ea1a7SYuri Pankov arch = ff->fts_name; 755a40ea1a7SYuri Pankov else 756a40ea1a7SYuri Pankov arch = NULL; 757a40ea1a7SYuri Pankov break; 758a40ea1a7SYuri Pankov default: 759a40ea1a7SYuri Pankov if (ff->fts_info == FTS_DP || use_all) 760a40ea1a7SYuri Pankov break; 761a40ea1a7SYuri Pankov if (warnings) 762a40ea1a7SYuri Pankov say(path, "Extraneous directory part"); 763a40ea1a7SYuri Pankov fts_set(f, ff, FTS_SKIP); 764a40ea1a7SYuri Pankov break; 765a40ea1a7SYuri Pankov } 766a40ea1a7SYuri Pankov } 767a40ea1a7SYuri Pankov 768a40ea1a7SYuri Pankov fts_close(f); 769a40ea1a7SYuri Pankov return 1; 770a40ea1a7SYuri Pankov } 771a40ea1a7SYuri Pankov 772a40ea1a7SYuri Pankov /* 773a40ea1a7SYuri Pankov * Add a file to the mlinks table. 774a40ea1a7SYuri Pankov * Do not verify that it's a "valid" looking manpage (we'll do that 775a40ea1a7SYuri Pankov * later). 776a40ea1a7SYuri Pankov * 777a40ea1a7SYuri Pankov * Try to infer the manual section, architecture, and page name from the 778a40ea1a7SYuri Pankov * path, assuming it looks like 779a40ea1a7SYuri Pankov * 780a40ea1a7SYuri Pankov * [./]man*[/<arch>]/<name>.<section> 781a40ea1a7SYuri Pankov * or 782a40ea1a7SYuri Pankov * [./]cat<section>[/<arch>]/<name>.0 783a40ea1a7SYuri Pankov * 784a40ea1a7SYuri Pankov * See treescan() for the fts(3) version of this. 785a40ea1a7SYuri Pankov */ 786a40ea1a7SYuri Pankov static void 787a40ea1a7SYuri Pankov filescan(const char *file) 788a40ea1a7SYuri Pankov { 789a40ea1a7SYuri Pankov char buf[PATH_MAX]; 790a40ea1a7SYuri Pankov struct stat st; 791a40ea1a7SYuri Pankov struct mlink *mlink; 792a40ea1a7SYuri Pankov char *p, *start; 793a40ea1a7SYuri Pankov 794a40ea1a7SYuri Pankov assert(use_all); 795a40ea1a7SYuri Pankov 796a40ea1a7SYuri Pankov if (0 == strncmp(file, "./", 2)) 797a40ea1a7SYuri Pankov file += 2; 798a40ea1a7SYuri Pankov 799a40ea1a7SYuri Pankov /* 800a40ea1a7SYuri Pankov * We have to do lstat(2) before realpath(3) loses 801a40ea1a7SYuri Pankov * the information whether this is a symbolic link. 802a40ea1a7SYuri Pankov * We need to know that because for symbolic links, 803a40ea1a7SYuri Pankov * we want to use the orginal file name, while for 804a40ea1a7SYuri Pankov * regular files, we want to use the real path. 805a40ea1a7SYuri Pankov */ 806a40ea1a7SYuri Pankov if (-1 == lstat(file, &st)) { 807a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_BADARG; 808a40ea1a7SYuri Pankov say(file, "&lstat"); 809a40ea1a7SYuri Pankov return; 810a40ea1a7SYuri Pankov } else if (0 == ((S_IFREG | S_IFLNK) & st.st_mode)) { 811a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_BADARG; 812a40ea1a7SYuri Pankov say(file, "Not a regular file"); 813a40ea1a7SYuri Pankov return; 814a40ea1a7SYuri Pankov } 815a40ea1a7SYuri Pankov 816a40ea1a7SYuri Pankov /* 817a40ea1a7SYuri Pankov * We have to resolve the file name to the real path 818a40ea1a7SYuri Pankov * in any case for the base directory check. 819a40ea1a7SYuri Pankov */ 820a40ea1a7SYuri Pankov if (NULL == realpath(file, buf)) { 821a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_BADARG; 822a40ea1a7SYuri Pankov say(file, "&realpath"); 823a40ea1a7SYuri Pankov return; 824a40ea1a7SYuri Pankov } 825a40ea1a7SYuri Pankov 826a40ea1a7SYuri Pankov if (OP_TEST == op) 827a40ea1a7SYuri Pankov start = buf; 828a40ea1a7SYuri Pankov else if (strstr(buf, basedir) == buf) 829a40ea1a7SYuri Pankov start = buf + strlen(basedir); 830a40ea1a7SYuri Pankov #ifdef HOMEBREWDIR 831a40ea1a7SYuri Pankov else if (strstr(buf, HOMEBREWDIR) == buf) 832a40ea1a7SYuri Pankov start = buf; 833a40ea1a7SYuri Pankov #endif 834a40ea1a7SYuri Pankov else { 835a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_BADARG; 836a40ea1a7SYuri Pankov say("", "%s: outside base directory", buf); 837a40ea1a7SYuri Pankov return; 838a40ea1a7SYuri Pankov } 839a40ea1a7SYuri Pankov 840a40ea1a7SYuri Pankov /* 841a40ea1a7SYuri Pankov * Now we are sure the file is inside our tree. 842a40ea1a7SYuri Pankov * If it is a symbolic link, ignore the real path 843a40ea1a7SYuri Pankov * and use the original name. 844a40ea1a7SYuri Pankov * This implies passing stuff like "cat1/../man1/foo.1" 845a40ea1a7SYuri Pankov * on the command line won't work. So don't do that. 846a40ea1a7SYuri Pankov * Note the stat(2) can still fail if the link target 847a40ea1a7SYuri Pankov * doesn't exist. 848a40ea1a7SYuri Pankov */ 849a40ea1a7SYuri Pankov if (S_IFLNK & st.st_mode) { 850a40ea1a7SYuri Pankov if (-1 == stat(buf, &st)) { 851a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_BADARG; 852a40ea1a7SYuri Pankov say(file, "&stat"); 853a40ea1a7SYuri Pankov return; 854a40ea1a7SYuri Pankov } 855a40ea1a7SYuri Pankov if (strlcpy(buf, file, sizeof(buf)) >= sizeof(buf)) { 856a40ea1a7SYuri Pankov say(file, "Filename too long"); 857a40ea1a7SYuri Pankov return; 858a40ea1a7SYuri Pankov } 859a40ea1a7SYuri Pankov start = buf; 860a40ea1a7SYuri Pankov if (OP_TEST != op && strstr(buf, basedir) == buf) 861a40ea1a7SYuri Pankov start += strlen(basedir); 862a40ea1a7SYuri Pankov } 863a40ea1a7SYuri Pankov 864a40ea1a7SYuri Pankov mlink = mandoc_calloc(1, sizeof(struct mlink)); 865a40ea1a7SYuri Pankov mlink->dform = FORM_NONE; 866a40ea1a7SYuri Pankov if (strlcpy(mlink->file, start, sizeof(mlink->file)) >= 867a40ea1a7SYuri Pankov sizeof(mlink->file)) { 868a40ea1a7SYuri Pankov say(start, "Filename too long"); 869a40ea1a7SYuri Pankov free(mlink); 870a40ea1a7SYuri Pankov return; 871a40ea1a7SYuri Pankov } 872a40ea1a7SYuri Pankov 873a40ea1a7SYuri Pankov /* 874a40ea1a7SYuri Pankov * In test mode or when the original name is absolute 875a40ea1a7SYuri Pankov * but outside our tree, guess the base directory. 876a40ea1a7SYuri Pankov */ 877a40ea1a7SYuri Pankov 878a40ea1a7SYuri Pankov if (op == OP_TEST || (start == buf && *start == '/')) { 879a40ea1a7SYuri Pankov if (strncmp(buf, "man/", 4) == 0) 880a40ea1a7SYuri Pankov start = buf + 4; 881a40ea1a7SYuri Pankov else if ((start = strstr(buf, "/man/")) != NULL) 882a40ea1a7SYuri Pankov start += 5; 883a40ea1a7SYuri Pankov else 884a40ea1a7SYuri Pankov start = buf; 885a40ea1a7SYuri Pankov } 886a40ea1a7SYuri Pankov 887a40ea1a7SYuri Pankov /* 888a40ea1a7SYuri Pankov * First try to guess our directory structure. 889a40ea1a7SYuri Pankov * If we find a separator, try to look for man* or cat*. 890a40ea1a7SYuri Pankov * If we find one of these and what's underneath is a directory, 891a40ea1a7SYuri Pankov * assume it's an architecture. 892a40ea1a7SYuri Pankov */ 893a40ea1a7SYuri Pankov if (NULL != (p = strchr(start, '/'))) { 894a40ea1a7SYuri Pankov *p++ = '\0'; 895a40ea1a7SYuri Pankov if (0 == strncmp(start, "man", 3)) { 896a40ea1a7SYuri Pankov mlink->dform = FORM_SRC; 897a40ea1a7SYuri Pankov mlink->dsec = start + 3; 898a40ea1a7SYuri Pankov } else if (0 == strncmp(start, "cat", 3)) { 899a40ea1a7SYuri Pankov mlink->dform = FORM_CAT; 900a40ea1a7SYuri Pankov mlink->dsec = start + 3; 901a40ea1a7SYuri Pankov } 902a40ea1a7SYuri Pankov 903a40ea1a7SYuri Pankov start = p; 904a40ea1a7SYuri Pankov if (NULL != mlink->dsec && NULL != (p = strchr(start, '/'))) { 905a40ea1a7SYuri Pankov *p++ = '\0'; 906a40ea1a7SYuri Pankov mlink->arch = start; 907a40ea1a7SYuri Pankov start = p; 908a40ea1a7SYuri Pankov } 909a40ea1a7SYuri Pankov } 910a40ea1a7SYuri Pankov 911a40ea1a7SYuri Pankov /* 912a40ea1a7SYuri Pankov * Now check the file suffix. 913a40ea1a7SYuri Pankov * Suffix of `.0' indicates a catpage, `.1-9' is a manpage. 914a40ea1a7SYuri Pankov */ 915a40ea1a7SYuri Pankov p = strrchr(start, '\0'); 916a40ea1a7SYuri Pankov while (p-- > start && '/' != *p && '.' != *p) 917a40ea1a7SYuri Pankov /* Loop. */ ; 918a40ea1a7SYuri Pankov 919a40ea1a7SYuri Pankov if ('.' == *p) { 920a40ea1a7SYuri Pankov *p++ = '\0'; 921a40ea1a7SYuri Pankov mlink->fsec = p; 922a40ea1a7SYuri Pankov } 923a40ea1a7SYuri Pankov 924a40ea1a7SYuri Pankov /* 925a40ea1a7SYuri Pankov * Now try to parse the name. 926a40ea1a7SYuri Pankov * Use the filename portion of the path. 927a40ea1a7SYuri Pankov */ 928a40ea1a7SYuri Pankov mlink->name = start; 929a40ea1a7SYuri Pankov if (NULL != (p = strrchr(start, '/'))) { 930a40ea1a7SYuri Pankov mlink->name = p + 1; 931a40ea1a7SYuri Pankov *p = '\0'; 932a40ea1a7SYuri Pankov } 933a40ea1a7SYuri Pankov mlink_add(mlink, &st); 934a40ea1a7SYuri Pankov } 935a40ea1a7SYuri Pankov 936a40ea1a7SYuri Pankov static void 937a40ea1a7SYuri Pankov mlink_add(struct mlink *mlink, const struct stat *st) 938a40ea1a7SYuri Pankov { 939a40ea1a7SYuri Pankov struct inodev inodev; 940a40ea1a7SYuri Pankov struct mpage *mpage; 941a40ea1a7SYuri Pankov unsigned int slot; 942a40ea1a7SYuri Pankov 943a40ea1a7SYuri Pankov assert(NULL != mlink->file); 944a40ea1a7SYuri Pankov 945a40ea1a7SYuri Pankov mlink->dsec = mandoc_strdup(mlink->dsec ? mlink->dsec : ""); 946a40ea1a7SYuri Pankov mlink->arch = mandoc_strdup(mlink->arch ? mlink->arch : ""); 947a40ea1a7SYuri Pankov mlink->name = mandoc_strdup(mlink->name ? mlink->name : ""); 948a40ea1a7SYuri Pankov mlink->fsec = mandoc_strdup(mlink->fsec ? mlink->fsec : ""); 949a40ea1a7SYuri Pankov 950a40ea1a7SYuri Pankov if ('0' == *mlink->fsec) { 951a40ea1a7SYuri Pankov free(mlink->fsec); 952a40ea1a7SYuri Pankov mlink->fsec = mandoc_strdup(mlink->dsec); 953a40ea1a7SYuri Pankov mlink->fform = FORM_CAT; 954a40ea1a7SYuri Pankov } else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec) 955a40ea1a7SYuri Pankov mlink->fform = FORM_SRC; 956a40ea1a7SYuri Pankov else 957a40ea1a7SYuri Pankov mlink->fform = FORM_NONE; 958a40ea1a7SYuri Pankov 959a40ea1a7SYuri Pankov slot = ohash_qlookup(&mlinks, mlink->file); 960a40ea1a7SYuri Pankov assert(NULL == ohash_find(&mlinks, slot)); 961a40ea1a7SYuri Pankov ohash_insert(&mlinks, slot, mlink); 962a40ea1a7SYuri Pankov 963a40ea1a7SYuri Pankov memset(&inodev, 0, sizeof(inodev)); /* Clear padding. */ 964a40ea1a7SYuri Pankov inodev.st_ino = st->st_ino; 965a40ea1a7SYuri Pankov inodev.st_dev = st->st_dev; 966a40ea1a7SYuri Pankov slot = ohash_lookup_memory(&mpages, (char *)&inodev, 967a40ea1a7SYuri Pankov sizeof(struct inodev), inodev.st_ino); 968a40ea1a7SYuri Pankov mpage = ohash_find(&mpages, slot); 969a40ea1a7SYuri Pankov if (NULL == mpage) { 970a40ea1a7SYuri Pankov mpage = mandoc_calloc(1, sizeof(struct mpage)); 971a40ea1a7SYuri Pankov mpage->inodev.st_ino = inodev.st_ino; 972a40ea1a7SYuri Pankov mpage->inodev.st_dev = inodev.st_dev; 973a40ea1a7SYuri Pankov mpage->form = FORM_NONE; 974a40ea1a7SYuri Pankov mpage->next = mpage_head; 975a40ea1a7SYuri Pankov mpage_head = mpage; 976a40ea1a7SYuri Pankov ohash_insert(&mpages, slot, mpage); 977a40ea1a7SYuri Pankov } else 978a40ea1a7SYuri Pankov mlink->next = mpage->mlinks; 979a40ea1a7SYuri Pankov mpage->mlinks = mlink; 980a40ea1a7SYuri Pankov mlink->mpage = mpage; 981a40ea1a7SYuri Pankov } 982a40ea1a7SYuri Pankov 983a40ea1a7SYuri Pankov static void 984a40ea1a7SYuri Pankov mlink_free(struct mlink *mlink) 985a40ea1a7SYuri Pankov { 986a40ea1a7SYuri Pankov 987a40ea1a7SYuri Pankov free(mlink->dsec); 988a40ea1a7SYuri Pankov free(mlink->arch); 989a40ea1a7SYuri Pankov free(mlink->name); 990a40ea1a7SYuri Pankov free(mlink->fsec); 991a40ea1a7SYuri Pankov free(mlink); 992a40ea1a7SYuri Pankov } 993a40ea1a7SYuri Pankov 994a40ea1a7SYuri Pankov static void 995a40ea1a7SYuri Pankov mpages_free(void) 996a40ea1a7SYuri Pankov { 997a40ea1a7SYuri Pankov struct mpage *mpage; 998a40ea1a7SYuri Pankov struct mlink *mlink; 999a40ea1a7SYuri Pankov 1000a40ea1a7SYuri Pankov while ((mpage = mpage_head) != NULL) { 1001a40ea1a7SYuri Pankov while ((mlink = mpage->mlinks) != NULL) { 1002a40ea1a7SYuri Pankov mpage->mlinks = mlink->next; 1003a40ea1a7SYuri Pankov mlink_free(mlink); 1004a40ea1a7SYuri Pankov } 1005a40ea1a7SYuri Pankov mpage_head = mpage->next; 1006a40ea1a7SYuri Pankov free(mpage->sec); 1007a40ea1a7SYuri Pankov free(mpage->arch); 1008a40ea1a7SYuri Pankov free(mpage->title); 1009a40ea1a7SYuri Pankov free(mpage->desc); 1010a40ea1a7SYuri Pankov free(mpage); 1011a40ea1a7SYuri Pankov } 1012a40ea1a7SYuri Pankov } 1013a40ea1a7SYuri Pankov 1014a40ea1a7SYuri Pankov /* 1015a40ea1a7SYuri Pankov * For each mlink to the mpage, check whether the path looks like 1016a40ea1a7SYuri Pankov * it is formatted, and if it does, check whether a source manual 1017a40ea1a7SYuri Pankov * exists by the same name, ignoring the suffix. 1018a40ea1a7SYuri Pankov * If both conditions hold, drop the mlink. 1019a40ea1a7SYuri Pankov */ 1020a40ea1a7SYuri Pankov static void 1021a40ea1a7SYuri Pankov mlinks_undupe(struct mpage *mpage) 1022a40ea1a7SYuri Pankov { 1023a40ea1a7SYuri Pankov char buf[PATH_MAX]; 1024a40ea1a7SYuri Pankov struct mlink **prev; 1025a40ea1a7SYuri Pankov struct mlink *mlink; 1026a40ea1a7SYuri Pankov char *bufp; 1027a40ea1a7SYuri Pankov 1028a40ea1a7SYuri Pankov mpage->form = FORM_CAT; 1029a40ea1a7SYuri Pankov prev = &mpage->mlinks; 1030a40ea1a7SYuri Pankov while (NULL != (mlink = *prev)) { 1031a40ea1a7SYuri Pankov if (FORM_CAT != mlink->dform) { 1032a40ea1a7SYuri Pankov mpage->form = FORM_NONE; 1033a40ea1a7SYuri Pankov goto nextlink; 1034a40ea1a7SYuri Pankov } 1035a40ea1a7SYuri Pankov (void)strlcpy(buf, mlink->file, sizeof(buf)); 1036a40ea1a7SYuri Pankov bufp = strstr(buf, "cat"); 1037a40ea1a7SYuri Pankov assert(NULL != bufp); 1038a40ea1a7SYuri Pankov memcpy(bufp, "man", 3); 1039a40ea1a7SYuri Pankov if (NULL != (bufp = strrchr(buf, '.'))) 1040a40ea1a7SYuri Pankov *++bufp = '\0'; 1041a40ea1a7SYuri Pankov (void)strlcat(buf, mlink->dsec, sizeof(buf)); 1042a40ea1a7SYuri Pankov if (NULL == ohash_find(&mlinks, 1043a40ea1a7SYuri Pankov ohash_qlookup(&mlinks, buf))) 1044a40ea1a7SYuri Pankov goto nextlink; 1045a40ea1a7SYuri Pankov if (warnings) 1046a40ea1a7SYuri Pankov say(mlink->file, "Man source exists: %s", buf); 1047a40ea1a7SYuri Pankov if (use_all) 1048a40ea1a7SYuri Pankov goto nextlink; 1049a40ea1a7SYuri Pankov *prev = mlink->next; 1050a40ea1a7SYuri Pankov mlink_free(mlink); 1051a40ea1a7SYuri Pankov continue; 1052a40ea1a7SYuri Pankov nextlink: 1053a40ea1a7SYuri Pankov prev = &(*prev)->next; 1054a40ea1a7SYuri Pankov } 1055a40ea1a7SYuri Pankov } 1056a40ea1a7SYuri Pankov 1057a40ea1a7SYuri Pankov static void 1058a40ea1a7SYuri Pankov mlink_check(struct mpage *mpage, struct mlink *mlink) 1059a40ea1a7SYuri Pankov { 1060a40ea1a7SYuri Pankov struct str *str; 1061a40ea1a7SYuri Pankov unsigned int slot; 1062a40ea1a7SYuri Pankov 1063a40ea1a7SYuri Pankov /* 1064a40ea1a7SYuri Pankov * Check whether the manual section given in a file 1065a40ea1a7SYuri Pankov * agrees with the directory where the file is located. 1066a40ea1a7SYuri Pankov * Some manuals have suffixes like (3p) on their 1067a40ea1a7SYuri Pankov * section number either inside the file or in the 1068a40ea1a7SYuri Pankov * directory name, some are linked into more than one 1069a40ea1a7SYuri Pankov * section, like encrypt(1) = makekey(8). 1070a40ea1a7SYuri Pankov */ 1071a40ea1a7SYuri Pankov 1072a40ea1a7SYuri Pankov if (FORM_SRC == mpage->form && 1073a40ea1a7SYuri Pankov strcasecmp(mpage->sec, mlink->dsec)) 1074a40ea1a7SYuri Pankov say(mlink->file, "Section \"%s\" manual in %s directory", 1075a40ea1a7SYuri Pankov mpage->sec, mlink->dsec); 1076a40ea1a7SYuri Pankov 1077a40ea1a7SYuri Pankov /* 1078a40ea1a7SYuri Pankov * Manual page directories exist for each kernel 1079a40ea1a7SYuri Pankov * architecture as returned by machine(1). 1080a40ea1a7SYuri Pankov * However, many manuals only depend on the 1081a40ea1a7SYuri Pankov * application architecture as returned by arch(1). 1082a40ea1a7SYuri Pankov * For example, some (2/ARM) manuals are shared 1083a40ea1a7SYuri Pankov * across the "armish" and "zaurus" kernel 1084a40ea1a7SYuri Pankov * architectures. 1085a40ea1a7SYuri Pankov * A few manuals are even shared across completely 1086a40ea1a7SYuri Pankov * different architectures, for example fdformat(1) 1087a40ea1a7SYuri Pankov * on amd64, i386, and sparc64. 1088a40ea1a7SYuri Pankov */ 1089a40ea1a7SYuri Pankov 1090a40ea1a7SYuri Pankov if (strcasecmp(mpage->arch, mlink->arch)) 1091a40ea1a7SYuri Pankov say(mlink->file, "Architecture \"%s\" manual in " 1092a40ea1a7SYuri Pankov "\"%s\" directory", mpage->arch, mlink->arch); 1093a40ea1a7SYuri Pankov 1094a40ea1a7SYuri Pankov /* 1095a40ea1a7SYuri Pankov * XXX 1096a40ea1a7SYuri Pankov * parse_cat() doesn't set NAME_TITLE yet. 1097a40ea1a7SYuri Pankov */ 1098a40ea1a7SYuri Pankov 1099a40ea1a7SYuri Pankov if (FORM_CAT == mpage->form) 1100a40ea1a7SYuri Pankov return; 1101a40ea1a7SYuri Pankov 1102a40ea1a7SYuri Pankov /* 1103a40ea1a7SYuri Pankov * Check whether this mlink 1104a40ea1a7SYuri Pankov * appears as a name in the NAME section. 1105a40ea1a7SYuri Pankov */ 1106a40ea1a7SYuri Pankov 1107a40ea1a7SYuri Pankov slot = ohash_qlookup(&names, mlink->name); 1108a40ea1a7SYuri Pankov str = ohash_find(&names, slot); 1109a40ea1a7SYuri Pankov assert(NULL != str); 1110a40ea1a7SYuri Pankov if ( ! (NAME_TITLE & str->mask)) 1111a40ea1a7SYuri Pankov say(mlink->file, "Name missing in NAME section"); 1112a40ea1a7SYuri Pankov } 1113a40ea1a7SYuri Pankov 1114a40ea1a7SYuri Pankov /* 1115a40ea1a7SYuri Pankov * Run through the files in the global vector "mpages" 1116a40ea1a7SYuri Pankov * and add them to the database specified in "basedir". 1117a40ea1a7SYuri Pankov * 1118a40ea1a7SYuri Pankov * This handles the parsing scheme itself, using the cues of directory 1119a40ea1a7SYuri Pankov * and filename to determine whether the file is parsable or not. 1120a40ea1a7SYuri Pankov */ 1121a40ea1a7SYuri Pankov static void 1122a40ea1a7SYuri Pankov mpages_merge(struct dba *dba, struct mparse *mp) 1123a40ea1a7SYuri Pankov { 1124a40ea1a7SYuri Pankov struct mpage *mpage, *mpage_dest; 1125a40ea1a7SYuri Pankov struct mlink *mlink, *mlink_dest; 1126a40ea1a7SYuri Pankov struct roff_man *man; 1127a40ea1a7SYuri Pankov char *sodest; 1128a40ea1a7SYuri Pankov char *cp; 1129a40ea1a7SYuri Pankov int fd; 1130a40ea1a7SYuri Pankov 1131a40ea1a7SYuri Pankov for (mpage = mpage_head; mpage != NULL; mpage = mpage->next) { 1132a40ea1a7SYuri Pankov mlinks_undupe(mpage); 1133a40ea1a7SYuri Pankov if ((mlink = mpage->mlinks) == NULL) 1134a40ea1a7SYuri Pankov continue; 1135a40ea1a7SYuri Pankov 1136a40ea1a7SYuri Pankov name_mask = NAME_MASK; 1137a40ea1a7SYuri Pankov mandoc_ohash_init(&names, 4, offsetof(struct str, key)); 1138a40ea1a7SYuri Pankov mandoc_ohash_init(&strings, 6, offsetof(struct str, key)); 1139a40ea1a7SYuri Pankov mparse_reset(mp); 1140a40ea1a7SYuri Pankov man = NULL; 1141a40ea1a7SYuri Pankov sodest = NULL; 1142a40ea1a7SYuri Pankov 1143a40ea1a7SYuri Pankov if ((fd = mparse_open(mp, mlink->file)) == -1) { 1144a40ea1a7SYuri Pankov say(mlink->file, "&open"); 1145a40ea1a7SYuri Pankov goto nextpage; 1146a40ea1a7SYuri Pankov } 1147a40ea1a7SYuri Pankov 1148a40ea1a7SYuri Pankov /* 1149a40ea1a7SYuri Pankov * Interpret the file as mdoc(7) or man(7) source 1150a40ea1a7SYuri Pankov * code, unless it is known to be formatted. 1151a40ea1a7SYuri Pankov */ 1152a40ea1a7SYuri Pankov if (mlink->dform != FORM_CAT || mlink->fform != FORM_CAT) { 1153a40ea1a7SYuri Pankov mparse_readfd(mp, fd, mlink->file); 1154a40ea1a7SYuri Pankov close(fd); 1155a40ea1a7SYuri Pankov fd = -1; 1156a40ea1a7SYuri Pankov mparse_result(mp, &man, &sodest); 1157a40ea1a7SYuri Pankov } 1158a40ea1a7SYuri Pankov 1159a40ea1a7SYuri Pankov if (sodest != NULL) { 1160a40ea1a7SYuri Pankov mlink_dest = ohash_find(&mlinks, 1161a40ea1a7SYuri Pankov ohash_qlookup(&mlinks, sodest)); 1162a40ea1a7SYuri Pankov if (mlink_dest == NULL) { 1163a40ea1a7SYuri Pankov mandoc_asprintf(&cp, "%s.gz", sodest); 1164a40ea1a7SYuri Pankov mlink_dest = ohash_find(&mlinks, 1165a40ea1a7SYuri Pankov ohash_qlookup(&mlinks, cp)); 1166a40ea1a7SYuri Pankov free(cp); 1167a40ea1a7SYuri Pankov } 1168a40ea1a7SYuri Pankov if (mlink_dest != NULL) { 1169a40ea1a7SYuri Pankov 1170a40ea1a7SYuri Pankov /* The .so target exists. */ 1171a40ea1a7SYuri Pankov 1172a40ea1a7SYuri Pankov mpage_dest = mlink_dest->mpage; 1173a40ea1a7SYuri Pankov while (1) { 1174a40ea1a7SYuri Pankov mlink->mpage = mpage_dest; 1175a40ea1a7SYuri Pankov 1176a40ea1a7SYuri Pankov /* 1177a40ea1a7SYuri Pankov * If the target was already 1178a40ea1a7SYuri Pankov * processed, add the links 1179a40ea1a7SYuri Pankov * to the database now. 1180a40ea1a7SYuri Pankov * Otherwise, this will 1181a40ea1a7SYuri Pankov * happen when we come 1182a40ea1a7SYuri Pankov * to the target. 1183a40ea1a7SYuri Pankov */ 1184a40ea1a7SYuri Pankov 1185a40ea1a7SYuri Pankov if (mpage_dest->dba != NULL) 1186a40ea1a7SYuri Pankov dbadd_mlink(mlink); 1187a40ea1a7SYuri Pankov 1188a40ea1a7SYuri Pankov if (mlink->next == NULL) 1189a40ea1a7SYuri Pankov break; 1190a40ea1a7SYuri Pankov mlink = mlink->next; 1191a40ea1a7SYuri Pankov } 1192a40ea1a7SYuri Pankov 1193a40ea1a7SYuri Pankov /* Move all links to the target. */ 1194a40ea1a7SYuri Pankov 1195a40ea1a7SYuri Pankov mlink->next = mlink_dest->next; 1196a40ea1a7SYuri Pankov mlink_dest->next = mpage->mlinks; 1197a40ea1a7SYuri Pankov mpage->mlinks = NULL; 1198a40ea1a7SYuri Pankov } 1199a40ea1a7SYuri Pankov goto nextpage; 1200a40ea1a7SYuri Pankov } else if (man != NULL && man->macroset == MACROSET_MDOC) { 1201a40ea1a7SYuri Pankov mdoc_validate(man); 1202a40ea1a7SYuri Pankov mpage->form = FORM_SRC; 1203a40ea1a7SYuri Pankov mpage->sec = man->meta.msec; 1204a40ea1a7SYuri Pankov mpage->sec = mandoc_strdup( 1205a40ea1a7SYuri Pankov mpage->sec == NULL ? "" : mpage->sec); 1206a40ea1a7SYuri Pankov mpage->arch = man->meta.arch; 1207a40ea1a7SYuri Pankov mpage->arch = mandoc_strdup( 1208a40ea1a7SYuri Pankov mpage->arch == NULL ? "" : mpage->arch); 1209a40ea1a7SYuri Pankov mpage->title = mandoc_strdup(man->meta.title); 1210a40ea1a7SYuri Pankov } else if (man != NULL && man->macroset == MACROSET_MAN) { 1211a40ea1a7SYuri Pankov man_validate(man); 1212a40ea1a7SYuri Pankov if (*man->meta.msec != '\0' || 1213*c66b8046SYuri Pankov *man->meta.title != '\0') { 1214a40ea1a7SYuri Pankov mpage->form = FORM_SRC; 1215a40ea1a7SYuri Pankov mpage->sec = mandoc_strdup(man->meta.msec); 1216a40ea1a7SYuri Pankov mpage->arch = mandoc_strdup(mlink->arch); 1217a40ea1a7SYuri Pankov mpage->title = mandoc_strdup(man->meta.title); 1218a40ea1a7SYuri Pankov } else 1219a40ea1a7SYuri Pankov man = NULL; 1220a40ea1a7SYuri Pankov } 1221a40ea1a7SYuri Pankov 1222a40ea1a7SYuri Pankov assert(mpage->desc == NULL); 1223a40ea1a7SYuri Pankov if (man == NULL) { 1224a40ea1a7SYuri Pankov mpage->form = FORM_CAT; 1225a40ea1a7SYuri Pankov mpage->sec = mandoc_strdup(mlink->dsec); 1226a40ea1a7SYuri Pankov mpage->arch = mandoc_strdup(mlink->arch); 1227a40ea1a7SYuri Pankov mpage->title = mandoc_strdup(mlink->name); 1228a40ea1a7SYuri Pankov parse_cat(mpage, fd); 1229a40ea1a7SYuri Pankov } else if (man->macroset == MACROSET_MDOC) 1230a40ea1a7SYuri Pankov parse_mdoc(mpage, &man->meta, man->first); 1231a40ea1a7SYuri Pankov else 1232a40ea1a7SYuri Pankov parse_man(mpage, &man->meta, man->first); 1233a40ea1a7SYuri Pankov if (mpage->desc == NULL) { 1234a40ea1a7SYuri Pankov mpage->desc = mandoc_strdup(mlink->name); 1235a40ea1a7SYuri Pankov if (warnings) 1236a40ea1a7SYuri Pankov say(mlink->file, "No one-line description, " 1237a40ea1a7SYuri Pankov "using filename \"%s\"", mlink->name); 1238a40ea1a7SYuri Pankov } 1239a40ea1a7SYuri Pankov 1240a40ea1a7SYuri Pankov for (mlink = mpage->mlinks; 1241a40ea1a7SYuri Pankov mlink != NULL; 1242a40ea1a7SYuri Pankov mlink = mlink->next) { 1243a40ea1a7SYuri Pankov putkey(mpage, mlink->name, NAME_FILE); 1244a40ea1a7SYuri Pankov if (warnings && !use_all) 1245a40ea1a7SYuri Pankov mlink_check(mpage, mlink); 1246a40ea1a7SYuri Pankov } 1247a40ea1a7SYuri Pankov 1248a40ea1a7SYuri Pankov dbadd(dba, mpage); 1249a40ea1a7SYuri Pankov 1250a40ea1a7SYuri Pankov nextpage: 1251a40ea1a7SYuri Pankov ohash_delete(&strings); 1252a40ea1a7SYuri Pankov ohash_delete(&names); 1253a40ea1a7SYuri Pankov } 1254a40ea1a7SYuri Pankov } 1255a40ea1a7SYuri Pankov 1256a40ea1a7SYuri Pankov static void 1257a40ea1a7SYuri Pankov parse_cat(struct mpage *mpage, int fd) 1258a40ea1a7SYuri Pankov { 1259a40ea1a7SYuri Pankov FILE *stream; 1260a40ea1a7SYuri Pankov struct mlink *mlink; 1261a40ea1a7SYuri Pankov char *line, *p, *title, *sec; 1262a40ea1a7SYuri Pankov size_t linesz, plen, titlesz; 1263a40ea1a7SYuri Pankov ssize_t len; 1264a40ea1a7SYuri Pankov int offs; 1265a40ea1a7SYuri Pankov 1266a40ea1a7SYuri Pankov mlink = mpage->mlinks; 1267a40ea1a7SYuri Pankov stream = fd == -1 ? fopen(mlink->file, "r") : fdopen(fd, "r"); 1268a40ea1a7SYuri Pankov if (stream == NULL) { 1269a40ea1a7SYuri Pankov if (fd != -1) 1270a40ea1a7SYuri Pankov close(fd); 1271a40ea1a7SYuri Pankov if (warnings) 1272a40ea1a7SYuri Pankov say(mlink->file, "&fopen"); 1273a40ea1a7SYuri Pankov return; 1274a40ea1a7SYuri Pankov } 1275a40ea1a7SYuri Pankov 1276a40ea1a7SYuri Pankov line = NULL; 1277a40ea1a7SYuri Pankov linesz = 0; 1278a40ea1a7SYuri Pankov 1279a40ea1a7SYuri Pankov /* Parse the section number from the header line. */ 1280a40ea1a7SYuri Pankov 1281a40ea1a7SYuri Pankov while (getline(&line, &linesz, stream) != -1) { 1282a40ea1a7SYuri Pankov if (*line == '\n') 1283a40ea1a7SYuri Pankov continue; 1284a40ea1a7SYuri Pankov if ((sec = strchr(line, '(')) == NULL) 1285a40ea1a7SYuri Pankov break; 1286a40ea1a7SYuri Pankov if ((p = strchr(++sec, ')')) == NULL) 1287a40ea1a7SYuri Pankov break; 1288a40ea1a7SYuri Pankov free(mpage->sec); 1289a40ea1a7SYuri Pankov mpage->sec = mandoc_strndup(sec, p - sec); 1290a40ea1a7SYuri Pankov if (warnings && *mlink->dsec != '\0' && 1291a40ea1a7SYuri Pankov strcasecmp(mpage->sec, mlink->dsec)) 1292a40ea1a7SYuri Pankov say(mlink->file, 1293a40ea1a7SYuri Pankov "Section \"%s\" manual in %s directory", 1294a40ea1a7SYuri Pankov mpage->sec, mlink->dsec); 1295a40ea1a7SYuri Pankov break; 1296a40ea1a7SYuri Pankov } 1297a40ea1a7SYuri Pankov 1298a40ea1a7SYuri Pankov /* Skip to first blank line. */ 1299a40ea1a7SYuri Pankov 1300a40ea1a7SYuri Pankov while (line == NULL || *line != '\n') 1301a40ea1a7SYuri Pankov if (getline(&line, &linesz, stream) == -1) 1302a40ea1a7SYuri Pankov break; 1303a40ea1a7SYuri Pankov 1304a40ea1a7SYuri Pankov /* 1305a40ea1a7SYuri Pankov * Assume the first line that is not indented 1306a40ea1a7SYuri Pankov * is the first section header. Skip to it. 1307a40ea1a7SYuri Pankov */ 1308a40ea1a7SYuri Pankov 1309a40ea1a7SYuri Pankov while (getline(&line, &linesz, stream) != -1) 1310a40ea1a7SYuri Pankov if (*line != '\n' && *line != ' ') 1311a40ea1a7SYuri Pankov break; 1312a40ea1a7SYuri Pankov 1313a40ea1a7SYuri Pankov /* 1314a40ea1a7SYuri Pankov * Read up until the next section into a buffer. 1315a40ea1a7SYuri Pankov * Strip the leading and trailing newline from each read line, 1316a40ea1a7SYuri Pankov * appending a trailing space. 1317a40ea1a7SYuri Pankov * Ignore empty (whitespace-only) lines. 1318a40ea1a7SYuri Pankov */ 1319a40ea1a7SYuri Pankov 1320a40ea1a7SYuri Pankov titlesz = 0; 1321a40ea1a7SYuri Pankov title = NULL; 1322a40ea1a7SYuri Pankov 1323a40ea1a7SYuri Pankov while ((len = getline(&line, &linesz, stream)) != -1) { 1324a40ea1a7SYuri Pankov if (*line != ' ') 1325a40ea1a7SYuri Pankov break; 1326a40ea1a7SYuri Pankov offs = 0; 1327a40ea1a7SYuri Pankov while (isspace((unsigned char)line[offs])) 1328a40ea1a7SYuri Pankov offs++; 1329a40ea1a7SYuri Pankov if (line[offs] == '\0') 1330a40ea1a7SYuri Pankov continue; 1331a40ea1a7SYuri Pankov title = mandoc_realloc(title, titlesz + len - offs); 1332a40ea1a7SYuri Pankov memcpy(title + titlesz, line + offs, len - offs); 1333a40ea1a7SYuri Pankov titlesz += len - offs; 1334a40ea1a7SYuri Pankov title[titlesz - 1] = ' '; 1335a40ea1a7SYuri Pankov } 1336a40ea1a7SYuri Pankov free(line); 1337a40ea1a7SYuri Pankov 1338a40ea1a7SYuri Pankov /* 1339a40ea1a7SYuri Pankov * If no page content can be found, or the input line 1340a40ea1a7SYuri Pankov * is already the next section header, or there is no 1341a40ea1a7SYuri Pankov * trailing newline, reuse the page title as the page 1342a40ea1a7SYuri Pankov * description. 1343a40ea1a7SYuri Pankov */ 1344a40ea1a7SYuri Pankov 1345a40ea1a7SYuri Pankov if (NULL == title || '\0' == *title) { 1346a40ea1a7SYuri Pankov if (warnings) 1347a40ea1a7SYuri Pankov say(mlink->file, "Cannot find NAME section"); 1348a40ea1a7SYuri Pankov fclose(stream); 1349a40ea1a7SYuri Pankov free(title); 1350a40ea1a7SYuri Pankov return; 1351a40ea1a7SYuri Pankov } 1352a40ea1a7SYuri Pankov 1353a40ea1a7SYuri Pankov title[titlesz - 1] = '\0'; 1354a40ea1a7SYuri Pankov 1355a40ea1a7SYuri Pankov /* 1356a40ea1a7SYuri Pankov * Skip to the first dash. 1357a40ea1a7SYuri Pankov * Use the remaining line as the description (no more than 70 1358a40ea1a7SYuri Pankov * bytes). 1359a40ea1a7SYuri Pankov */ 1360a40ea1a7SYuri Pankov 1361a40ea1a7SYuri Pankov if (NULL != (p = strstr(title, "- "))) { 1362a40ea1a7SYuri Pankov for (p += 2; ' ' == *p || '\b' == *p; p++) 1363a40ea1a7SYuri Pankov /* Skip to next word. */ ; 1364a40ea1a7SYuri Pankov } else { 1365a40ea1a7SYuri Pankov if (warnings) 1366a40ea1a7SYuri Pankov say(mlink->file, "No dash in title line, " 1367a40ea1a7SYuri Pankov "reusing \"%s\" as one-line description", title); 1368a40ea1a7SYuri Pankov p = title; 1369a40ea1a7SYuri Pankov } 1370a40ea1a7SYuri Pankov 1371a40ea1a7SYuri Pankov plen = strlen(p); 1372a40ea1a7SYuri Pankov 1373a40ea1a7SYuri Pankov /* Strip backspace-encoding from line. */ 1374a40ea1a7SYuri Pankov 1375a40ea1a7SYuri Pankov while (NULL != (line = memchr(p, '\b', plen))) { 1376a40ea1a7SYuri Pankov len = line - p; 1377a40ea1a7SYuri Pankov if (0 == len) { 1378a40ea1a7SYuri Pankov memmove(line, line + 1, plen--); 1379a40ea1a7SYuri Pankov continue; 1380a40ea1a7SYuri Pankov } 1381a40ea1a7SYuri Pankov memmove(line - 1, line + 1, plen - len); 1382a40ea1a7SYuri Pankov plen -= 2; 1383a40ea1a7SYuri Pankov } 1384a40ea1a7SYuri Pankov 1385a40ea1a7SYuri Pankov mpage->desc = mandoc_strdup(p); 1386a40ea1a7SYuri Pankov fclose(stream); 1387a40ea1a7SYuri Pankov free(title); 1388a40ea1a7SYuri Pankov } 1389a40ea1a7SYuri Pankov 1390a40ea1a7SYuri Pankov /* 1391a40ea1a7SYuri Pankov * Put a type/word pair into the word database for this particular file. 1392a40ea1a7SYuri Pankov */ 1393a40ea1a7SYuri Pankov static void 1394a40ea1a7SYuri Pankov putkey(const struct mpage *mpage, char *value, uint64_t type) 1395a40ea1a7SYuri Pankov { 1396a40ea1a7SYuri Pankov putkeys(mpage, value, strlen(value), type); 1397a40ea1a7SYuri Pankov } 1398a40ea1a7SYuri Pankov 1399a40ea1a7SYuri Pankov /* 1400a40ea1a7SYuri Pankov * Grok all nodes at or below a certain mdoc node into putkey(). 1401a40ea1a7SYuri Pankov */ 1402a40ea1a7SYuri Pankov static void 1403a40ea1a7SYuri Pankov putmdockey(const struct mpage *mpage, 1404a40ea1a7SYuri Pankov const struct roff_node *n, uint64_t m, int taboo) 1405a40ea1a7SYuri Pankov { 1406a40ea1a7SYuri Pankov 1407a40ea1a7SYuri Pankov for ( ; NULL != n; n = n->next) { 1408a40ea1a7SYuri Pankov if (n->flags & taboo) 1409a40ea1a7SYuri Pankov continue; 1410a40ea1a7SYuri Pankov if (NULL != n->child) 1411a40ea1a7SYuri Pankov putmdockey(mpage, n->child, m, taboo); 1412a40ea1a7SYuri Pankov if (n->type == ROFFT_TEXT) 1413a40ea1a7SYuri Pankov putkey(mpage, n->string, m); 1414a40ea1a7SYuri Pankov } 1415a40ea1a7SYuri Pankov } 1416a40ea1a7SYuri Pankov 1417a40ea1a7SYuri Pankov static void 1418a40ea1a7SYuri Pankov parse_man(struct mpage *mpage, const struct roff_meta *meta, 1419a40ea1a7SYuri Pankov const struct roff_node *n) 1420a40ea1a7SYuri Pankov { 1421a40ea1a7SYuri Pankov const struct roff_node *head, *body; 1422a40ea1a7SYuri Pankov char *start, *title; 1423a40ea1a7SYuri Pankov char byte; 1424a40ea1a7SYuri Pankov size_t sz; 1425a40ea1a7SYuri Pankov 1426a40ea1a7SYuri Pankov if (n == NULL) 1427a40ea1a7SYuri Pankov return; 1428a40ea1a7SYuri Pankov 1429a40ea1a7SYuri Pankov /* 1430a40ea1a7SYuri Pankov * We're only searching for one thing: the first text child in 1431a40ea1a7SYuri Pankov * the BODY of a NAME section. Since we don't keep track of 1432a40ea1a7SYuri Pankov * sections in -man, run some hoops to find out whether we're in 1433a40ea1a7SYuri Pankov * the correct section or not. 1434a40ea1a7SYuri Pankov */ 1435a40ea1a7SYuri Pankov 1436a40ea1a7SYuri Pankov if (n->type == ROFFT_BODY && n->tok == MAN_SH) { 1437a40ea1a7SYuri Pankov body = n; 1438a40ea1a7SYuri Pankov if ((head = body->parent->head) != NULL && 1439a40ea1a7SYuri Pankov (head = head->child) != NULL && 1440a40ea1a7SYuri Pankov head->next == NULL && 1441a40ea1a7SYuri Pankov head->type == ROFFT_TEXT && 1442a40ea1a7SYuri Pankov strcmp(head->string, "NAME") == 0 && 1443a40ea1a7SYuri Pankov body->child != NULL) { 1444a40ea1a7SYuri Pankov 1445a40ea1a7SYuri Pankov /* 1446a40ea1a7SYuri Pankov * Suck the entire NAME section into memory. 1447a40ea1a7SYuri Pankov * Yes, we might run away. 1448a40ea1a7SYuri Pankov * But too many manuals have big, spread-out 1449a40ea1a7SYuri Pankov * NAME sections over many lines. 1450a40ea1a7SYuri Pankov */ 1451a40ea1a7SYuri Pankov 1452a40ea1a7SYuri Pankov title = NULL; 1453a40ea1a7SYuri Pankov deroff(&title, body); 1454a40ea1a7SYuri Pankov if (NULL == title) 1455a40ea1a7SYuri Pankov return; 1456a40ea1a7SYuri Pankov 1457a40ea1a7SYuri Pankov /* 1458a40ea1a7SYuri Pankov * Go through a special heuristic dance here. 1459a40ea1a7SYuri Pankov * Conventionally, one or more manual names are 1460a40ea1a7SYuri Pankov * comma-specified prior to a whitespace, then a 1461a40ea1a7SYuri Pankov * dash, then a description. Try to puzzle out 1462a40ea1a7SYuri Pankov * the name parts here. 1463a40ea1a7SYuri Pankov */ 1464a40ea1a7SYuri Pankov 1465a40ea1a7SYuri Pankov start = title; 1466a40ea1a7SYuri Pankov for ( ;; ) { 1467a40ea1a7SYuri Pankov sz = strcspn(start, " ,"); 1468a40ea1a7SYuri Pankov if ('\0' == start[sz]) 1469a40ea1a7SYuri Pankov break; 1470a40ea1a7SYuri Pankov 1471a40ea1a7SYuri Pankov byte = start[sz]; 1472a40ea1a7SYuri Pankov start[sz] = '\0'; 1473a40ea1a7SYuri Pankov 1474a40ea1a7SYuri Pankov /* 1475a40ea1a7SYuri Pankov * Assume a stray trailing comma in the 1476a40ea1a7SYuri Pankov * name list if a name begins with a dash. 1477a40ea1a7SYuri Pankov */ 1478a40ea1a7SYuri Pankov 1479a40ea1a7SYuri Pankov if ('-' == start[0] || 1480a40ea1a7SYuri Pankov ('\\' == start[0] && '-' == start[1])) 1481a40ea1a7SYuri Pankov break; 1482a40ea1a7SYuri Pankov 1483a40ea1a7SYuri Pankov putkey(mpage, start, NAME_TITLE); 1484a40ea1a7SYuri Pankov if ( ! (mpage->name_head_done || 1485a40ea1a7SYuri Pankov strcasecmp(start, meta->title))) { 1486a40ea1a7SYuri Pankov putkey(mpage, start, NAME_HEAD); 1487a40ea1a7SYuri Pankov mpage->name_head_done = 1; 1488a40ea1a7SYuri Pankov } 1489a40ea1a7SYuri Pankov 1490a40ea1a7SYuri Pankov if (' ' == byte) { 1491a40ea1a7SYuri Pankov start += sz + 1; 1492a40ea1a7SYuri Pankov break; 1493a40ea1a7SYuri Pankov } 1494a40ea1a7SYuri Pankov 1495a40ea1a7SYuri Pankov assert(',' == byte); 1496a40ea1a7SYuri Pankov start += sz + 1; 1497a40ea1a7SYuri Pankov while (' ' == *start) 1498a40ea1a7SYuri Pankov start++; 1499a40ea1a7SYuri Pankov } 1500a40ea1a7SYuri Pankov 1501a40ea1a7SYuri Pankov if (start == title) { 1502a40ea1a7SYuri Pankov putkey(mpage, start, NAME_TITLE); 1503a40ea1a7SYuri Pankov if ( ! (mpage->name_head_done || 1504a40ea1a7SYuri Pankov strcasecmp(start, meta->title))) { 1505a40ea1a7SYuri Pankov putkey(mpage, start, NAME_HEAD); 1506a40ea1a7SYuri Pankov mpage->name_head_done = 1; 1507a40ea1a7SYuri Pankov } 1508a40ea1a7SYuri Pankov free(title); 1509a40ea1a7SYuri Pankov return; 1510a40ea1a7SYuri Pankov } 1511a40ea1a7SYuri Pankov 1512a40ea1a7SYuri Pankov while (isspace((unsigned char)*start)) 1513a40ea1a7SYuri Pankov start++; 1514a40ea1a7SYuri Pankov 1515a40ea1a7SYuri Pankov if (0 == strncmp(start, "-", 1)) 1516a40ea1a7SYuri Pankov start += 1; 1517a40ea1a7SYuri Pankov else if (0 == strncmp(start, "\\-\\-", 4)) 1518a40ea1a7SYuri Pankov start += 4; 1519a40ea1a7SYuri Pankov else if (0 == strncmp(start, "\\-", 2)) 1520a40ea1a7SYuri Pankov start += 2; 1521a40ea1a7SYuri Pankov else if (0 == strncmp(start, "\\(en", 4)) 1522a40ea1a7SYuri Pankov start += 4; 1523a40ea1a7SYuri Pankov else if (0 == strncmp(start, "\\(em", 4)) 1524a40ea1a7SYuri Pankov start += 4; 1525a40ea1a7SYuri Pankov 1526a40ea1a7SYuri Pankov while (' ' == *start) 1527a40ea1a7SYuri Pankov start++; 1528a40ea1a7SYuri Pankov 1529a40ea1a7SYuri Pankov mpage->desc = mandoc_strdup(start); 1530a40ea1a7SYuri Pankov free(title); 1531a40ea1a7SYuri Pankov return; 1532a40ea1a7SYuri Pankov } 1533a40ea1a7SYuri Pankov } 1534a40ea1a7SYuri Pankov 1535a40ea1a7SYuri Pankov for (n = n->child; n; n = n->next) { 1536a40ea1a7SYuri Pankov if (NULL != mpage->desc) 1537a40ea1a7SYuri Pankov break; 1538a40ea1a7SYuri Pankov parse_man(mpage, meta, n); 1539a40ea1a7SYuri Pankov } 1540a40ea1a7SYuri Pankov } 1541a40ea1a7SYuri Pankov 1542a40ea1a7SYuri Pankov static void 1543a40ea1a7SYuri Pankov parse_mdoc(struct mpage *mpage, const struct roff_meta *meta, 1544a40ea1a7SYuri Pankov const struct roff_node *n) 1545a40ea1a7SYuri Pankov { 1546a40ea1a7SYuri Pankov 1547*c66b8046SYuri Pankov for (n = n->child; n != NULL; n = n->next) { 1548*c66b8046SYuri Pankov if (n->tok == TOKEN_NONE || 1549*c66b8046SYuri Pankov n->tok < ROFF_MAX || 1550*c66b8046SYuri Pankov n->flags & mdocs[n->tok].taboo) 1551a40ea1a7SYuri Pankov continue; 1552*c66b8046SYuri Pankov assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); 1553a40ea1a7SYuri Pankov switch (n->type) { 1554a40ea1a7SYuri Pankov case ROFFT_ELEM: 1555a40ea1a7SYuri Pankov case ROFFT_BLOCK: 1556a40ea1a7SYuri Pankov case ROFFT_HEAD: 1557a40ea1a7SYuri Pankov case ROFFT_BODY: 1558a40ea1a7SYuri Pankov case ROFFT_TAIL: 1559*c66b8046SYuri Pankov if (mdocs[n->tok].fp != NULL && 1560*c66b8046SYuri Pankov (*mdocs[n->tok].fp)(mpage, meta, n) == 0) 1561*c66b8046SYuri Pankov break; 1562a40ea1a7SYuri Pankov if (mdocs[n->tok].mask) 1563a40ea1a7SYuri Pankov putmdockey(mpage, n->child, 1564a40ea1a7SYuri Pankov mdocs[n->tok].mask, mdocs[n->tok].taboo); 1565a40ea1a7SYuri Pankov break; 1566a40ea1a7SYuri Pankov default: 1567a40ea1a7SYuri Pankov continue; 1568a40ea1a7SYuri Pankov } 1569a40ea1a7SYuri Pankov if (NULL != n->child) 1570a40ea1a7SYuri Pankov parse_mdoc(mpage, meta, n); 1571a40ea1a7SYuri Pankov } 1572a40ea1a7SYuri Pankov } 1573a40ea1a7SYuri Pankov 1574a40ea1a7SYuri Pankov static int 1575a40ea1a7SYuri Pankov parse_mdoc_Fd(struct mpage *mpage, const struct roff_meta *meta, 1576a40ea1a7SYuri Pankov const struct roff_node *n) 1577a40ea1a7SYuri Pankov { 1578a40ea1a7SYuri Pankov char *start, *end; 1579a40ea1a7SYuri Pankov size_t sz; 1580a40ea1a7SYuri Pankov 1581a40ea1a7SYuri Pankov if (SEC_SYNOPSIS != n->sec || 1582a40ea1a7SYuri Pankov NULL == (n = n->child) || 1583a40ea1a7SYuri Pankov n->type != ROFFT_TEXT) 1584a40ea1a7SYuri Pankov return 0; 1585a40ea1a7SYuri Pankov 1586a40ea1a7SYuri Pankov /* 1587a40ea1a7SYuri Pankov * Only consider those `Fd' macro fields that begin with an 1588a40ea1a7SYuri Pankov * "inclusion" token (versus, e.g., #define). 1589a40ea1a7SYuri Pankov */ 1590a40ea1a7SYuri Pankov 1591a40ea1a7SYuri Pankov if (strcmp("#include", n->string)) 1592a40ea1a7SYuri Pankov return 0; 1593a40ea1a7SYuri Pankov 1594a40ea1a7SYuri Pankov if ((n = n->next) == NULL || n->type != ROFFT_TEXT) 1595a40ea1a7SYuri Pankov return 0; 1596a40ea1a7SYuri Pankov 1597a40ea1a7SYuri Pankov /* 1598a40ea1a7SYuri Pankov * Strip away the enclosing angle brackets and make sure we're 1599a40ea1a7SYuri Pankov * not zero-length. 1600a40ea1a7SYuri Pankov */ 1601a40ea1a7SYuri Pankov 1602a40ea1a7SYuri Pankov start = n->string; 1603a40ea1a7SYuri Pankov if ('<' == *start || '"' == *start) 1604a40ea1a7SYuri Pankov start++; 1605a40ea1a7SYuri Pankov 1606a40ea1a7SYuri Pankov if (0 == (sz = strlen(start))) 1607a40ea1a7SYuri Pankov return 0; 1608a40ea1a7SYuri Pankov 1609a40ea1a7SYuri Pankov end = &start[(int)sz - 1]; 1610a40ea1a7SYuri Pankov if ('>' == *end || '"' == *end) 1611a40ea1a7SYuri Pankov end--; 1612a40ea1a7SYuri Pankov 1613a40ea1a7SYuri Pankov if (end > start) 1614a40ea1a7SYuri Pankov putkeys(mpage, start, end - start + 1, TYPE_In); 1615a40ea1a7SYuri Pankov return 0; 1616a40ea1a7SYuri Pankov } 1617a40ea1a7SYuri Pankov 1618a40ea1a7SYuri Pankov static void 1619a40ea1a7SYuri Pankov parse_mdoc_fname(struct mpage *mpage, const struct roff_node *n) 1620a40ea1a7SYuri Pankov { 1621a40ea1a7SYuri Pankov char *cp; 1622a40ea1a7SYuri Pankov size_t sz; 1623a40ea1a7SYuri Pankov 1624a40ea1a7SYuri Pankov if (n->type != ROFFT_TEXT) 1625a40ea1a7SYuri Pankov return; 1626a40ea1a7SYuri Pankov 1627a40ea1a7SYuri Pankov /* Skip function pointer punctuation. */ 1628a40ea1a7SYuri Pankov 1629a40ea1a7SYuri Pankov cp = n->string; 1630a40ea1a7SYuri Pankov while (*cp == '(' || *cp == '*') 1631a40ea1a7SYuri Pankov cp++; 1632a40ea1a7SYuri Pankov sz = strcspn(cp, "()"); 1633a40ea1a7SYuri Pankov 1634a40ea1a7SYuri Pankov putkeys(mpage, cp, sz, TYPE_Fn); 1635a40ea1a7SYuri Pankov if (n->sec == SEC_SYNOPSIS) 1636a40ea1a7SYuri Pankov putkeys(mpage, cp, sz, NAME_SYN); 1637a40ea1a7SYuri Pankov } 1638a40ea1a7SYuri Pankov 1639a40ea1a7SYuri Pankov static int 1640a40ea1a7SYuri Pankov parse_mdoc_Fn(struct mpage *mpage, const struct roff_meta *meta, 1641a40ea1a7SYuri Pankov const struct roff_node *n) 1642a40ea1a7SYuri Pankov { 1643a40ea1a7SYuri Pankov 1644a40ea1a7SYuri Pankov if (n->child == NULL) 1645a40ea1a7SYuri Pankov return 0; 1646a40ea1a7SYuri Pankov 1647a40ea1a7SYuri Pankov parse_mdoc_fname(mpage, n->child); 1648a40ea1a7SYuri Pankov 1649a40ea1a7SYuri Pankov for (n = n->child->next; n != NULL; n = n->next) 1650a40ea1a7SYuri Pankov if (n->type == ROFFT_TEXT) 1651a40ea1a7SYuri Pankov putkey(mpage, n->string, TYPE_Fa); 1652a40ea1a7SYuri Pankov 1653a40ea1a7SYuri Pankov return 0; 1654a40ea1a7SYuri Pankov } 1655a40ea1a7SYuri Pankov 1656a40ea1a7SYuri Pankov static int 1657a40ea1a7SYuri Pankov parse_mdoc_Fo(struct mpage *mpage, const struct roff_meta *meta, 1658a40ea1a7SYuri Pankov const struct roff_node *n) 1659a40ea1a7SYuri Pankov { 1660a40ea1a7SYuri Pankov 1661a40ea1a7SYuri Pankov if (n->type != ROFFT_HEAD) 1662a40ea1a7SYuri Pankov return 1; 1663a40ea1a7SYuri Pankov 1664a40ea1a7SYuri Pankov if (n->child != NULL) 1665a40ea1a7SYuri Pankov parse_mdoc_fname(mpage, n->child); 1666a40ea1a7SYuri Pankov 1667a40ea1a7SYuri Pankov return 0; 1668a40ea1a7SYuri Pankov } 1669a40ea1a7SYuri Pankov 1670a40ea1a7SYuri Pankov static int 1671a40ea1a7SYuri Pankov parse_mdoc_Va(struct mpage *mpage, const struct roff_meta *meta, 1672a40ea1a7SYuri Pankov const struct roff_node *n) 1673a40ea1a7SYuri Pankov { 1674a40ea1a7SYuri Pankov char *cp; 1675a40ea1a7SYuri Pankov 1676a40ea1a7SYuri Pankov if (n->type != ROFFT_ELEM && n->type != ROFFT_BODY) 1677a40ea1a7SYuri Pankov return 0; 1678a40ea1a7SYuri Pankov 1679a40ea1a7SYuri Pankov if (n->child != NULL && 1680a40ea1a7SYuri Pankov n->child->next == NULL && 1681a40ea1a7SYuri Pankov n->child->type == ROFFT_TEXT) 1682a40ea1a7SYuri Pankov return 1; 1683a40ea1a7SYuri Pankov 1684a40ea1a7SYuri Pankov cp = NULL; 1685a40ea1a7SYuri Pankov deroff(&cp, n); 1686a40ea1a7SYuri Pankov if (cp != NULL) { 1687a40ea1a7SYuri Pankov putkey(mpage, cp, TYPE_Vt | (n->tok == MDOC_Va || 1688a40ea1a7SYuri Pankov n->type == ROFFT_BODY ? TYPE_Va : 0)); 1689a40ea1a7SYuri Pankov free(cp); 1690a40ea1a7SYuri Pankov } 1691a40ea1a7SYuri Pankov 1692a40ea1a7SYuri Pankov return 0; 1693a40ea1a7SYuri Pankov } 1694a40ea1a7SYuri Pankov 1695a40ea1a7SYuri Pankov static int 1696a40ea1a7SYuri Pankov parse_mdoc_Xr(struct mpage *mpage, const struct roff_meta *meta, 1697a40ea1a7SYuri Pankov const struct roff_node *n) 1698a40ea1a7SYuri Pankov { 1699a40ea1a7SYuri Pankov char *cp; 1700a40ea1a7SYuri Pankov 1701a40ea1a7SYuri Pankov if (NULL == (n = n->child)) 1702a40ea1a7SYuri Pankov return 0; 1703a40ea1a7SYuri Pankov 1704a40ea1a7SYuri Pankov if (NULL == n->next) { 1705a40ea1a7SYuri Pankov putkey(mpage, n->string, TYPE_Xr); 1706a40ea1a7SYuri Pankov return 0; 1707a40ea1a7SYuri Pankov } 1708a40ea1a7SYuri Pankov 1709a40ea1a7SYuri Pankov mandoc_asprintf(&cp, "%s(%s)", n->string, n->next->string); 1710a40ea1a7SYuri Pankov putkey(mpage, cp, TYPE_Xr); 1711a40ea1a7SYuri Pankov free(cp); 1712a40ea1a7SYuri Pankov return 0; 1713a40ea1a7SYuri Pankov } 1714a40ea1a7SYuri Pankov 1715a40ea1a7SYuri Pankov static int 1716a40ea1a7SYuri Pankov parse_mdoc_Nd(struct mpage *mpage, const struct roff_meta *meta, 1717a40ea1a7SYuri Pankov const struct roff_node *n) 1718a40ea1a7SYuri Pankov { 1719a40ea1a7SYuri Pankov 1720a40ea1a7SYuri Pankov if (n->type == ROFFT_BODY) 1721a40ea1a7SYuri Pankov deroff(&mpage->desc, n); 1722a40ea1a7SYuri Pankov return 0; 1723a40ea1a7SYuri Pankov } 1724a40ea1a7SYuri Pankov 1725a40ea1a7SYuri Pankov static int 1726a40ea1a7SYuri Pankov parse_mdoc_Nm(struct mpage *mpage, const struct roff_meta *meta, 1727a40ea1a7SYuri Pankov const struct roff_node *n) 1728a40ea1a7SYuri Pankov { 1729a40ea1a7SYuri Pankov 1730a40ea1a7SYuri Pankov if (SEC_NAME == n->sec) 1731a40ea1a7SYuri Pankov putmdockey(mpage, n->child, NAME_TITLE, 0); 1732a40ea1a7SYuri Pankov else if (n->sec == SEC_SYNOPSIS && n->type == ROFFT_HEAD) { 1733a40ea1a7SYuri Pankov if (n->child == NULL) 1734a40ea1a7SYuri Pankov putkey(mpage, meta->name, NAME_SYN); 1735a40ea1a7SYuri Pankov else 1736a40ea1a7SYuri Pankov putmdockey(mpage, n->child, NAME_SYN, 0); 1737a40ea1a7SYuri Pankov } 1738a40ea1a7SYuri Pankov if ( ! (mpage->name_head_done || 1739a40ea1a7SYuri Pankov n->child == NULL || n->child->string == NULL || 1740a40ea1a7SYuri Pankov strcasecmp(n->child->string, meta->title))) { 1741a40ea1a7SYuri Pankov putkey(mpage, n->child->string, NAME_HEAD); 1742a40ea1a7SYuri Pankov mpage->name_head_done = 1; 1743a40ea1a7SYuri Pankov } 1744a40ea1a7SYuri Pankov return 0; 1745a40ea1a7SYuri Pankov } 1746a40ea1a7SYuri Pankov 1747a40ea1a7SYuri Pankov static int 1748a40ea1a7SYuri Pankov parse_mdoc_Sh(struct mpage *mpage, const struct roff_meta *meta, 1749a40ea1a7SYuri Pankov const struct roff_node *n) 1750a40ea1a7SYuri Pankov { 1751a40ea1a7SYuri Pankov 1752a40ea1a7SYuri Pankov return n->sec == SEC_CUSTOM && n->type == ROFFT_HEAD; 1753a40ea1a7SYuri Pankov } 1754a40ea1a7SYuri Pankov 1755a40ea1a7SYuri Pankov static int 1756a40ea1a7SYuri Pankov parse_mdoc_head(struct mpage *mpage, const struct roff_meta *meta, 1757a40ea1a7SYuri Pankov const struct roff_node *n) 1758a40ea1a7SYuri Pankov { 1759a40ea1a7SYuri Pankov 1760a40ea1a7SYuri Pankov return n->type == ROFFT_HEAD; 1761a40ea1a7SYuri Pankov } 1762a40ea1a7SYuri Pankov 1763a40ea1a7SYuri Pankov /* 1764a40ea1a7SYuri Pankov * Add a string to the hash table for the current manual. 1765a40ea1a7SYuri Pankov * Each string has a bitmask telling which macros it belongs to. 1766a40ea1a7SYuri Pankov * When we finish the manual, we'll dump the table. 1767a40ea1a7SYuri Pankov */ 1768a40ea1a7SYuri Pankov static void 1769a40ea1a7SYuri Pankov putkeys(const struct mpage *mpage, char *cp, size_t sz, uint64_t v) 1770a40ea1a7SYuri Pankov { 1771a40ea1a7SYuri Pankov struct ohash *htab; 1772a40ea1a7SYuri Pankov struct str *s; 1773a40ea1a7SYuri Pankov const char *end; 1774a40ea1a7SYuri Pankov unsigned int slot; 1775a40ea1a7SYuri Pankov int i, mustfree; 1776a40ea1a7SYuri Pankov 1777a40ea1a7SYuri Pankov if (0 == sz) 1778a40ea1a7SYuri Pankov return; 1779a40ea1a7SYuri Pankov 1780a40ea1a7SYuri Pankov mustfree = render_string(&cp, &sz); 1781a40ea1a7SYuri Pankov 1782a40ea1a7SYuri Pankov if (TYPE_Nm & v) { 1783a40ea1a7SYuri Pankov htab = &names; 1784a40ea1a7SYuri Pankov v &= name_mask; 1785a40ea1a7SYuri Pankov if (v & NAME_FIRST) 1786a40ea1a7SYuri Pankov name_mask &= ~NAME_FIRST; 1787a40ea1a7SYuri Pankov if (debug > 1) 1788a40ea1a7SYuri Pankov say(mpage->mlinks->file, 1789a40ea1a7SYuri Pankov "Adding name %*s, bits=0x%llx", (int)sz, cp, 1790a40ea1a7SYuri Pankov (unsigned long long)v); 1791a40ea1a7SYuri Pankov } else { 1792a40ea1a7SYuri Pankov htab = &strings; 1793a40ea1a7SYuri Pankov if (debug > 1) 1794a40ea1a7SYuri Pankov for (i = 0; i < KEY_MAX; i++) 1795a40ea1a7SYuri Pankov if ((uint64_t)1 << i & v) 1796a40ea1a7SYuri Pankov say(mpage->mlinks->file, 1797a40ea1a7SYuri Pankov "Adding key %s=%*s", 1798a40ea1a7SYuri Pankov mansearch_keynames[i], (int)sz, cp); 1799a40ea1a7SYuri Pankov } 1800a40ea1a7SYuri Pankov 1801a40ea1a7SYuri Pankov end = cp + sz; 1802a40ea1a7SYuri Pankov slot = ohash_qlookupi(htab, cp, &end); 1803a40ea1a7SYuri Pankov s = ohash_find(htab, slot); 1804a40ea1a7SYuri Pankov 1805a40ea1a7SYuri Pankov if (NULL != s && mpage == s->mpage) { 1806a40ea1a7SYuri Pankov s->mask |= v; 1807a40ea1a7SYuri Pankov return; 1808a40ea1a7SYuri Pankov } else if (NULL == s) { 1809a40ea1a7SYuri Pankov s = mandoc_calloc(1, sizeof(struct str) + sz + 1); 1810a40ea1a7SYuri Pankov memcpy(s->key, cp, sz); 1811a40ea1a7SYuri Pankov ohash_insert(htab, slot, s); 1812a40ea1a7SYuri Pankov } 1813a40ea1a7SYuri Pankov s->mpage = mpage; 1814a40ea1a7SYuri Pankov s->mask = v; 1815a40ea1a7SYuri Pankov 1816a40ea1a7SYuri Pankov if (mustfree) 1817a40ea1a7SYuri Pankov free(cp); 1818a40ea1a7SYuri Pankov } 1819a40ea1a7SYuri Pankov 1820a40ea1a7SYuri Pankov /* 1821a40ea1a7SYuri Pankov * Take a Unicode codepoint and produce its UTF-8 encoding. 1822a40ea1a7SYuri Pankov * This isn't the best way to do this, but it works. 1823a40ea1a7SYuri Pankov * The magic numbers are from the UTF-8 packaging. 1824a40ea1a7SYuri Pankov * They're not as scary as they seem: read the UTF-8 spec for details. 1825a40ea1a7SYuri Pankov */ 1826a40ea1a7SYuri Pankov static size_t 1827a40ea1a7SYuri Pankov utf8(unsigned int cp, char out[7]) 1828a40ea1a7SYuri Pankov { 1829a40ea1a7SYuri Pankov size_t rc; 1830a40ea1a7SYuri Pankov 1831a40ea1a7SYuri Pankov rc = 0; 1832a40ea1a7SYuri Pankov if (cp <= 0x0000007F) { 1833a40ea1a7SYuri Pankov rc = 1; 1834a40ea1a7SYuri Pankov out[0] = (char)cp; 1835a40ea1a7SYuri Pankov } else if (cp <= 0x000007FF) { 1836a40ea1a7SYuri Pankov rc = 2; 1837a40ea1a7SYuri Pankov out[0] = (cp >> 6 & 31) | 192; 1838a40ea1a7SYuri Pankov out[1] = (cp & 63) | 128; 1839a40ea1a7SYuri Pankov } else if (cp <= 0x0000FFFF) { 1840a40ea1a7SYuri Pankov rc = 3; 1841a40ea1a7SYuri Pankov out[0] = (cp >> 12 & 15) | 224; 1842a40ea1a7SYuri Pankov out[1] = (cp >> 6 & 63) | 128; 1843a40ea1a7SYuri Pankov out[2] = (cp & 63) | 128; 1844a40ea1a7SYuri Pankov } else if (cp <= 0x001FFFFF) { 1845a40ea1a7SYuri Pankov rc = 4; 1846a40ea1a7SYuri Pankov out[0] = (cp >> 18 & 7) | 240; 1847a40ea1a7SYuri Pankov out[1] = (cp >> 12 & 63) | 128; 1848a40ea1a7SYuri Pankov out[2] = (cp >> 6 & 63) | 128; 1849a40ea1a7SYuri Pankov out[3] = (cp & 63) | 128; 1850a40ea1a7SYuri Pankov } else if (cp <= 0x03FFFFFF) { 1851a40ea1a7SYuri Pankov rc = 5; 1852a40ea1a7SYuri Pankov out[0] = (cp >> 24 & 3) | 248; 1853a40ea1a7SYuri Pankov out[1] = (cp >> 18 & 63) | 128; 1854a40ea1a7SYuri Pankov out[2] = (cp >> 12 & 63) | 128; 1855a40ea1a7SYuri Pankov out[3] = (cp >> 6 & 63) | 128; 1856a40ea1a7SYuri Pankov out[4] = (cp & 63) | 128; 1857a40ea1a7SYuri Pankov } else if (cp <= 0x7FFFFFFF) { 1858a40ea1a7SYuri Pankov rc = 6; 1859a40ea1a7SYuri Pankov out[0] = (cp >> 30 & 1) | 252; 1860a40ea1a7SYuri Pankov out[1] = (cp >> 24 & 63) | 128; 1861a40ea1a7SYuri Pankov out[2] = (cp >> 18 & 63) | 128; 1862a40ea1a7SYuri Pankov out[3] = (cp >> 12 & 63) | 128; 1863a40ea1a7SYuri Pankov out[4] = (cp >> 6 & 63) | 128; 1864a40ea1a7SYuri Pankov out[5] = (cp & 63) | 128; 1865a40ea1a7SYuri Pankov } else 1866a40ea1a7SYuri Pankov return 0; 1867a40ea1a7SYuri Pankov 1868a40ea1a7SYuri Pankov out[rc] = '\0'; 1869a40ea1a7SYuri Pankov return rc; 1870a40ea1a7SYuri Pankov } 1871a40ea1a7SYuri Pankov 1872a40ea1a7SYuri Pankov /* 1873a40ea1a7SYuri Pankov * If the string contains escape sequences, 1874a40ea1a7SYuri Pankov * replace it with an allocated rendering and return 1, 1875a40ea1a7SYuri Pankov * such that the caller can free it after use. 1876a40ea1a7SYuri Pankov * Otherwise, do nothing and return 0. 1877a40ea1a7SYuri Pankov */ 1878a40ea1a7SYuri Pankov static int 1879a40ea1a7SYuri Pankov render_string(char **public, size_t *psz) 1880a40ea1a7SYuri Pankov { 1881a40ea1a7SYuri Pankov const char *src, *scp, *addcp, *seq; 1882a40ea1a7SYuri Pankov char *dst; 1883a40ea1a7SYuri Pankov size_t ssz, dsz, addsz; 1884a40ea1a7SYuri Pankov char utfbuf[7], res[6]; 1885a40ea1a7SYuri Pankov int seqlen, unicode; 1886a40ea1a7SYuri Pankov 1887a40ea1a7SYuri Pankov res[0] = '\\'; 1888a40ea1a7SYuri Pankov res[1] = '\t'; 1889a40ea1a7SYuri Pankov res[2] = ASCII_NBRSP; 1890a40ea1a7SYuri Pankov res[3] = ASCII_HYPH; 1891a40ea1a7SYuri Pankov res[4] = ASCII_BREAK; 1892a40ea1a7SYuri Pankov res[5] = '\0'; 1893a40ea1a7SYuri Pankov 1894a40ea1a7SYuri Pankov src = scp = *public; 1895a40ea1a7SYuri Pankov ssz = *psz; 1896a40ea1a7SYuri Pankov dst = NULL; 1897a40ea1a7SYuri Pankov dsz = 0; 1898a40ea1a7SYuri Pankov 1899a40ea1a7SYuri Pankov while (scp < src + *psz) { 1900a40ea1a7SYuri Pankov 1901a40ea1a7SYuri Pankov /* Leave normal characters unchanged. */ 1902a40ea1a7SYuri Pankov 1903a40ea1a7SYuri Pankov if (strchr(res, *scp) == NULL) { 1904a40ea1a7SYuri Pankov if (dst != NULL) 1905a40ea1a7SYuri Pankov dst[dsz++] = *scp; 1906a40ea1a7SYuri Pankov scp++; 1907a40ea1a7SYuri Pankov continue; 1908a40ea1a7SYuri Pankov } 1909a40ea1a7SYuri Pankov 1910a40ea1a7SYuri Pankov /* 1911a40ea1a7SYuri Pankov * Found something that requires replacing, 1912a40ea1a7SYuri Pankov * make sure we have a destination buffer. 1913a40ea1a7SYuri Pankov */ 1914a40ea1a7SYuri Pankov 1915a40ea1a7SYuri Pankov if (dst == NULL) { 1916a40ea1a7SYuri Pankov dst = mandoc_malloc(ssz + 1); 1917a40ea1a7SYuri Pankov dsz = scp - src; 1918a40ea1a7SYuri Pankov memcpy(dst, src, dsz); 1919a40ea1a7SYuri Pankov } 1920a40ea1a7SYuri Pankov 1921a40ea1a7SYuri Pankov /* Handle single-char special characters. */ 1922a40ea1a7SYuri Pankov 1923a40ea1a7SYuri Pankov switch (*scp) { 1924a40ea1a7SYuri Pankov case '\\': 1925a40ea1a7SYuri Pankov break; 1926a40ea1a7SYuri Pankov case '\t': 1927a40ea1a7SYuri Pankov case ASCII_NBRSP: 1928a40ea1a7SYuri Pankov dst[dsz++] = ' '; 1929a40ea1a7SYuri Pankov scp++; 1930a40ea1a7SYuri Pankov continue; 1931a40ea1a7SYuri Pankov case ASCII_HYPH: 1932a40ea1a7SYuri Pankov dst[dsz++] = '-'; 1933a40ea1a7SYuri Pankov /* FALLTHROUGH */ 1934a40ea1a7SYuri Pankov case ASCII_BREAK: 1935a40ea1a7SYuri Pankov scp++; 1936a40ea1a7SYuri Pankov continue; 1937a40ea1a7SYuri Pankov default: 1938a40ea1a7SYuri Pankov abort(); 1939a40ea1a7SYuri Pankov } 1940a40ea1a7SYuri Pankov 1941a40ea1a7SYuri Pankov /* 1942a40ea1a7SYuri Pankov * Found an escape sequence. 1943a40ea1a7SYuri Pankov * Read past the slash, then parse it. 1944a40ea1a7SYuri Pankov * Ignore everything except characters. 1945a40ea1a7SYuri Pankov */ 1946a40ea1a7SYuri Pankov 1947a40ea1a7SYuri Pankov scp++; 1948a40ea1a7SYuri Pankov if (mandoc_escape(&scp, &seq, &seqlen) != ESCAPE_SPECIAL) 1949a40ea1a7SYuri Pankov continue; 1950a40ea1a7SYuri Pankov 1951a40ea1a7SYuri Pankov /* 1952a40ea1a7SYuri Pankov * Render the special character 1953a40ea1a7SYuri Pankov * as either UTF-8 or ASCII. 1954a40ea1a7SYuri Pankov */ 1955a40ea1a7SYuri Pankov 1956a40ea1a7SYuri Pankov if (write_utf8) { 1957a40ea1a7SYuri Pankov unicode = mchars_spec2cp(seq, seqlen); 1958a40ea1a7SYuri Pankov if (unicode <= 0) 1959a40ea1a7SYuri Pankov continue; 1960a40ea1a7SYuri Pankov addsz = utf8(unicode, utfbuf); 1961a40ea1a7SYuri Pankov if (addsz == 0) 1962a40ea1a7SYuri Pankov continue; 1963a40ea1a7SYuri Pankov addcp = utfbuf; 1964a40ea1a7SYuri Pankov } else { 1965a40ea1a7SYuri Pankov addcp = mchars_spec2str(seq, seqlen, &addsz); 1966a40ea1a7SYuri Pankov if (addcp == NULL) 1967a40ea1a7SYuri Pankov continue; 1968a40ea1a7SYuri Pankov if (*addcp == ASCII_NBRSP) { 1969a40ea1a7SYuri Pankov addcp = " "; 1970a40ea1a7SYuri Pankov addsz = 1; 1971a40ea1a7SYuri Pankov } 1972a40ea1a7SYuri Pankov } 1973a40ea1a7SYuri Pankov 1974a40ea1a7SYuri Pankov /* Copy the rendered glyph into the stream. */ 1975a40ea1a7SYuri Pankov 1976a40ea1a7SYuri Pankov ssz += addsz; 1977a40ea1a7SYuri Pankov dst = mandoc_realloc(dst, ssz + 1); 1978a40ea1a7SYuri Pankov memcpy(dst + dsz, addcp, addsz); 1979a40ea1a7SYuri Pankov dsz += addsz; 1980a40ea1a7SYuri Pankov } 1981a40ea1a7SYuri Pankov if (dst != NULL) { 1982a40ea1a7SYuri Pankov *public = dst; 1983a40ea1a7SYuri Pankov *psz = dsz; 1984a40ea1a7SYuri Pankov } 1985a40ea1a7SYuri Pankov 1986a40ea1a7SYuri Pankov /* Trim trailing whitespace and NUL-terminate. */ 1987a40ea1a7SYuri Pankov 1988a40ea1a7SYuri Pankov while (*psz > 0 && (*public)[*psz - 1] == ' ') 1989a40ea1a7SYuri Pankov --*psz; 1990a40ea1a7SYuri Pankov if (dst != NULL) { 1991a40ea1a7SYuri Pankov (*public)[*psz] = '\0'; 1992a40ea1a7SYuri Pankov return 1; 1993a40ea1a7SYuri Pankov } else 1994a40ea1a7SYuri Pankov return 0; 1995a40ea1a7SYuri Pankov } 1996a40ea1a7SYuri Pankov 1997a40ea1a7SYuri Pankov static void 1998a40ea1a7SYuri Pankov dbadd_mlink(const struct mlink *mlink) 1999a40ea1a7SYuri Pankov { 2000a40ea1a7SYuri Pankov dba_page_alias(mlink->mpage->dba, mlink->name, NAME_FILE); 2001a40ea1a7SYuri Pankov dba_page_add(mlink->mpage->dba, DBP_SECT, mlink->dsec); 2002a40ea1a7SYuri Pankov dba_page_add(mlink->mpage->dba, DBP_SECT, mlink->fsec); 2003a40ea1a7SYuri Pankov dba_page_add(mlink->mpage->dba, DBP_ARCH, mlink->arch); 2004a40ea1a7SYuri Pankov dba_page_add(mlink->mpage->dba, DBP_FILE, mlink->file); 2005a40ea1a7SYuri Pankov } 2006a40ea1a7SYuri Pankov 2007a40ea1a7SYuri Pankov /* 2008a40ea1a7SYuri Pankov * Flush the current page's terms (and their bits) into the database. 2009a40ea1a7SYuri Pankov * Also, handle escape sequences at the last possible moment. 2010a40ea1a7SYuri Pankov */ 2011a40ea1a7SYuri Pankov static void 2012a40ea1a7SYuri Pankov dbadd(struct dba *dba, struct mpage *mpage) 2013a40ea1a7SYuri Pankov { 2014a40ea1a7SYuri Pankov struct mlink *mlink; 2015a40ea1a7SYuri Pankov struct str *key; 2016a40ea1a7SYuri Pankov char *cp; 2017a40ea1a7SYuri Pankov uint64_t mask; 2018a40ea1a7SYuri Pankov size_t i; 2019a40ea1a7SYuri Pankov unsigned int slot; 2020a40ea1a7SYuri Pankov int mustfree; 2021a40ea1a7SYuri Pankov 2022a40ea1a7SYuri Pankov mlink = mpage->mlinks; 2023a40ea1a7SYuri Pankov 2024a40ea1a7SYuri Pankov if (nodb) { 2025a40ea1a7SYuri Pankov for (key = ohash_first(&names, &slot); NULL != key; 2026a40ea1a7SYuri Pankov key = ohash_next(&names, &slot)) 2027a40ea1a7SYuri Pankov free(key); 2028a40ea1a7SYuri Pankov for (key = ohash_first(&strings, &slot); NULL != key; 2029a40ea1a7SYuri Pankov key = ohash_next(&strings, &slot)) 2030a40ea1a7SYuri Pankov free(key); 2031a40ea1a7SYuri Pankov if (0 == debug) 2032a40ea1a7SYuri Pankov return; 2033a40ea1a7SYuri Pankov while (NULL != mlink) { 2034a40ea1a7SYuri Pankov fputs(mlink->name, stdout); 2035a40ea1a7SYuri Pankov if (NULL == mlink->next || 2036a40ea1a7SYuri Pankov strcmp(mlink->dsec, mlink->next->dsec) || 2037a40ea1a7SYuri Pankov strcmp(mlink->fsec, mlink->next->fsec) || 2038a40ea1a7SYuri Pankov strcmp(mlink->arch, mlink->next->arch)) { 2039a40ea1a7SYuri Pankov putchar('('); 2040a40ea1a7SYuri Pankov if ('\0' == *mlink->dsec) 2041a40ea1a7SYuri Pankov fputs(mlink->fsec, stdout); 2042a40ea1a7SYuri Pankov else 2043a40ea1a7SYuri Pankov fputs(mlink->dsec, stdout); 2044a40ea1a7SYuri Pankov if ('\0' != *mlink->arch) 2045a40ea1a7SYuri Pankov printf("/%s", mlink->arch); 2046a40ea1a7SYuri Pankov putchar(')'); 2047a40ea1a7SYuri Pankov } 2048a40ea1a7SYuri Pankov mlink = mlink->next; 2049a40ea1a7SYuri Pankov if (NULL != mlink) 2050a40ea1a7SYuri Pankov fputs(", ", stdout); 2051a40ea1a7SYuri Pankov } 2052a40ea1a7SYuri Pankov printf(" - %s\n", mpage->desc); 2053a40ea1a7SYuri Pankov return; 2054a40ea1a7SYuri Pankov } 2055a40ea1a7SYuri Pankov 2056a40ea1a7SYuri Pankov if (debug) 2057a40ea1a7SYuri Pankov say(mlink->file, "Adding to database"); 2058a40ea1a7SYuri Pankov 2059a40ea1a7SYuri Pankov cp = mpage->desc; 2060a40ea1a7SYuri Pankov i = strlen(cp); 2061a40ea1a7SYuri Pankov mustfree = render_string(&cp, &i); 2062a40ea1a7SYuri Pankov mpage->dba = dba_page_new(dba->pages, 2063a40ea1a7SYuri Pankov *mpage->arch == '\0' ? mlink->arch : mpage->arch, 2064a40ea1a7SYuri Pankov cp, mlink->file, mpage->form); 2065a40ea1a7SYuri Pankov if (mustfree) 2066a40ea1a7SYuri Pankov free(cp); 2067a40ea1a7SYuri Pankov dba_page_add(mpage->dba, DBP_SECT, mpage->sec); 2068a40ea1a7SYuri Pankov 2069a40ea1a7SYuri Pankov while (mlink != NULL) { 2070a40ea1a7SYuri Pankov dbadd_mlink(mlink); 2071a40ea1a7SYuri Pankov mlink = mlink->next; 2072a40ea1a7SYuri Pankov } 2073a40ea1a7SYuri Pankov 2074a40ea1a7SYuri Pankov for (key = ohash_first(&names, &slot); NULL != key; 2075a40ea1a7SYuri Pankov key = ohash_next(&names, &slot)) { 2076a40ea1a7SYuri Pankov assert(key->mpage == mpage); 2077a40ea1a7SYuri Pankov dba_page_alias(mpage->dba, key->key, key->mask); 2078a40ea1a7SYuri Pankov free(key); 2079a40ea1a7SYuri Pankov } 2080a40ea1a7SYuri Pankov for (key = ohash_first(&strings, &slot); NULL != key; 2081a40ea1a7SYuri Pankov key = ohash_next(&strings, &slot)) { 2082a40ea1a7SYuri Pankov assert(key->mpage == mpage); 2083a40ea1a7SYuri Pankov i = 0; 2084a40ea1a7SYuri Pankov for (mask = TYPE_Xr; mask <= TYPE_Lb; mask *= 2) { 2085a40ea1a7SYuri Pankov if (key->mask & mask) 2086a40ea1a7SYuri Pankov dba_macro_add(dba->macros, i, 2087a40ea1a7SYuri Pankov key->key, mpage->dba); 2088a40ea1a7SYuri Pankov i++; 2089a40ea1a7SYuri Pankov } 2090a40ea1a7SYuri Pankov free(key); 2091a40ea1a7SYuri Pankov } 2092a40ea1a7SYuri Pankov } 2093a40ea1a7SYuri Pankov 2094a40ea1a7SYuri Pankov static void 2095a40ea1a7SYuri Pankov dbprune(struct dba *dba) 2096a40ea1a7SYuri Pankov { 2097a40ea1a7SYuri Pankov struct dba_array *page, *files; 2098a40ea1a7SYuri Pankov char *file; 2099a40ea1a7SYuri Pankov 2100a40ea1a7SYuri Pankov dba_array_FOREACH(dba->pages, page) { 2101a40ea1a7SYuri Pankov files = dba_array_get(page, DBP_FILE); 2102a40ea1a7SYuri Pankov dba_array_FOREACH(files, file) { 2103a40ea1a7SYuri Pankov if (*file < ' ') 2104a40ea1a7SYuri Pankov file++; 2105a40ea1a7SYuri Pankov if (ohash_find(&mlinks, ohash_qlookup(&mlinks, 2106a40ea1a7SYuri Pankov file)) != NULL) { 2107a40ea1a7SYuri Pankov if (debug) 2108a40ea1a7SYuri Pankov say(file, "Deleting from database"); 2109a40ea1a7SYuri Pankov dba_array_del(dba->pages); 2110a40ea1a7SYuri Pankov break; 2111a40ea1a7SYuri Pankov } 2112a40ea1a7SYuri Pankov } 2113a40ea1a7SYuri Pankov } 2114a40ea1a7SYuri Pankov } 2115a40ea1a7SYuri Pankov 2116a40ea1a7SYuri Pankov /* 2117a40ea1a7SYuri Pankov * Write the database from memory to disk. 2118a40ea1a7SYuri Pankov */ 2119a40ea1a7SYuri Pankov static void 2120a40ea1a7SYuri Pankov dbwrite(struct dba *dba) 2121a40ea1a7SYuri Pankov { 2122a40ea1a7SYuri Pankov char tfn[32]; 2123a40ea1a7SYuri Pankov int status; 2124a40ea1a7SYuri Pankov pid_t child; 2125a40ea1a7SYuri Pankov 2126*c66b8046SYuri Pankov /* 2127*c66b8046SYuri Pankov * Do not write empty databases, and delete existing ones 2128*c66b8046SYuri Pankov * when makewhatis -u causes them to become empty. 2129*c66b8046SYuri Pankov */ 2130*c66b8046SYuri Pankov 2131*c66b8046SYuri Pankov dba_array_start(dba->pages); 2132*c66b8046SYuri Pankov if (dba_array_next(dba->pages) == NULL) { 2133*c66b8046SYuri Pankov if (unlink(MANDOC_DB) == -1 && errno != ENOENT) 2134*c66b8046SYuri Pankov say(MANDOC_DB, "&unlink"); 2135*c66b8046SYuri Pankov return; 2136*c66b8046SYuri Pankov } 2137*c66b8046SYuri Pankov 2138*c66b8046SYuri Pankov /* 2139*c66b8046SYuri Pankov * Build the database in a temporary file, 2140*c66b8046SYuri Pankov * then atomically move it into place. 2141*c66b8046SYuri Pankov */ 2142*c66b8046SYuri Pankov 2143a40ea1a7SYuri Pankov if (dba_write(MANDOC_DB "~", dba) != -1) { 2144a40ea1a7SYuri Pankov if (rename(MANDOC_DB "~", MANDOC_DB) == -1) { 2145a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2146a40ea1a7SYuri Pankov say(MANDOC_DB, "&rename"); 2147a40ea1a7SYuri Pankov unlink(MANDOC_DB "~"); 2148a40ea1a7SYuri Pankov } 2149a40ea1a7SYuri Pankov return; 2150a40ea1a7SYuri Pankov } 2151a40ea1a7SYuri Pankov 2152*c66b8046SYuri Pankov /* 2153*c66b8046SYuri Pankov * We lack write permission and cannot replace the database 2154*c66b8046SYuri Pankov * file, but let's at least check whether the data changed. 2155*c66b8046SYuri Pankov */ 2156*c66b8046SYuri Pankov 2157a40ea1a7SYuri Pankov (void)strlcpy(tfn, "/tmp/mandocdb.XXXXXXXX", sizeof(tfn)); 2158a40ea1a7SYuri Pankov if (mkdtemp(tfn) == NULL) { 2159a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2160a40ea1a7SYuri Pankov say("", "&%s", tfn); 2161a40ea1a7SYuri Pankov return; 2162a40ea1a7SYuri Pankov } 2163a40ea1a7SYuri Pankov 2164a40ea1a7SYuri Pankov (void)strlcat(tfn, "/" MANDOC_DB, sizeof(tfn)); 2165a40ea1a7SYuri Pankov if (dba_write(tfn, dba) == -1) { 2166a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2167a40ea1a7SYuri Pankov say(tfn, "&dba_write"); 2168a40ea1a7SYuri Pankov goto out; 2169a40ea1a7SYuri Pankov } 2170a40ea1a7SYuri Pankov 2171a40ea1a7SYuri Pankov switch (child = fork()) { 2172a40ea1a7SYuri Pankov case -1: 2173a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2174a40ea1a7SYuri Pankov say("", "&fork cmp"); 2175a40ea1a7SYuri Pankov return; 2176a40ea1a7SYuri Pankov case 0: 2177a40ea1a7SYuri Pankov execlp("cmp", "cmp", "-s", tfn, MANDOC_DB, (char *)NULL); 2178a40ea1a7SYuri Pankov say("", "&exec cmp"); 2179a40ea1a7SYuri Pankov exit(0); 2180a40ea1a7SYuri Pankov default: 2181a40ea1a7SYuri Pankov break; 2182a40ea1a7SYuri Pankov } 2183a40ea1a7SYuri Pankov if (waitpid(child, &status, 0) == -1) { 2184a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2185a40ea1a7SYuri Pankov say("", "&wait cmp"); 2186a40ea1a7SYuri Pankov } else if (WIFSIGNALED(status)) { 2187a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2188a40ea1a7SYuri Pankov say("", "cmp died from signal %d", WTERMSIG(status)); 2189a40ea1a7SYuri Pankov } else if (WEXITSTATUS(status)) { 2190a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2191a40ea1a7SYuri Pankov say(MANDOC_DB, 2192a40ea1a7SYuri Pankov "Data changed, but cannot replace database"); 2193a40ea1a7SYuri Pankov } 2194a40ea1a7SYuri Pankov 2195a40ea1a7SYuri Pankov out: 2196a40ea1a7SYuri Pankov *strrchr(tfn, '/') = '\0'; 2197a40ea1a7SYuri Pankov switch (child = fork()) { 2198a40ea1a7SYuri Pankov case -1: 2199a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2200a40ea1a7SYuri Pankov say("", "&fork rm"); 2201a40ea1a7SYuri Pankov return; 2202a40ea1a7SYuri Pankov case 0: 2203a40ea1a7SYuri Pankov execlp("rm", "rm", "-rf", tfn, (char *)NULL); 2204a40ea1a7SYuri Pankov say("", "&exec rm"); 2205a40ea1a7SYuri Pankov exit((int)MANDOCLEVEL_SYSERR); 2206a40ea1a7SYuri Pankov default: 2207a40ea1a7SYuri Pankov break; 2208a40ea1a7SYuri Pankov } 2209a40ea1a7SYuri Pankov if (waitpid(child, &status, 0) == -1) { 2210a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2211a40ea1a7SYuri Pankov say("", "&wait rm"); 2212a40ea1a7SYuri Pankov } else if (WIFSIGNALED(status) || WEXITSTATUS(status)) { 2213a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2214a40ea1a7SYuri Pankov say("", "%s: Cannot remove temporary directory", tfn); 2215a40ea1a7SYuri Pankov } 2216a40ea1a7SYuri Pankov } 2217a40ea1a7SYuri Pankov 2218a40ea1a7SYuri Pankov static int 2219a40ea1a7SYuri Pankov set_basedir(const char *targetdir, int report_baddir) 2220a40ea1a7SYuri Pankov { 2221a40ea1a7SYuri Pankov static char startdir[PATH_MAX]; 2222a40ea1a7SYuri Pankov static int getcwd_status; /* 1 = ok, 2 = failure */ 2223a40ea1a7SYuri Pankov static int chdir_status; /* 1 = changed directory */ 2224a40ea1a7SYuri Pankov char *cp; 2225a40ea1a7SYuri Pankov 2226a40ea1a7SYuri Pankov /* 2227a40ea1a7SYuri Pankov * Remember the original working directory, if possible. 2228a40ea1a7SYuri Pankov * This will be needed if the second or a later directory 2229a40ea1a7SYuri Pankov * on the command line is given as a relative path. 2230a40ea1a7SYuri Pankov * Do not error out if the current directory is not 2231a40ea1a7SYuri Pankov * searchable: Maybe it won't be needed after all. 2232a40ea1a7SYuri Pankov */ 2233a40ea1a7SYuri Pankov if (0 == getcwd_status) { 2234a40ea1a7SYuri Pankov if (NULL == getcwd(startdir, sizeof(startdir))) { 2235a40ea1a7SYuri Pankov getcwd_status = 2; 2236a40ea1a7SYuri Pankov (void)strlcpy(startdir, strerror(errno), 2237a40ea1a7SYuri Pankov sizeof(startdir)); 2238a40ea1a7SYuri Pankov } else 2239a40ea1a7SYuri Pankov getcwd_status = 1; 2240a40ea1a7SYuri Pankov } 2241a40ea1a7SYuri Pankov 2242a40ea1a7SYuri Pankov /* 2243a40ea1a7SYuri Pankov * We are leaving the old base directory. 2244a40ea1a7SYuri Pankov * Do not use it any longer, not even for messages. 2245a40ea1a7SYuri Pankov */ 2246a40ea1a7SYuri Pankov *basedir = '\0'; 2247a40ea1a7SYuri Pankov 2248a40ea1a7SYuri Pankov /* 2249a40ea1a7SYuri Pankov * If and only if the directory was changed earlier and 2250a40ea1a7SYuri Pankov * the next directory to process is given as a relative path, 2251a40ea1a7SYuri Pankov * first go back, or bail out if that is impossible. 2252a40ea1a7SYuri Pankov */ 2253a40ea1a7SYuri Pankov if (chdir_status && '/' != *targetdir) { 2254a40ea1a7SYuri Pankov if (2 == getcwd_status) { 2255a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2256a40ea1a7SYuri Pankov say("", "getcwd: %s", startdir); 2257a40ea1a7SYuri Pankov return 0; 2258a40ea1a7SYuri Pankov } 2259a40ea1a7SYuri Pankov if (-1 == chdir(startdir)) { 2260a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2261a40ea1a7SYuri Pankov say("", "&chdir %s", startdir); 2262a40ea1a7SYuri Pankov return 0; 2263a40ea1a7SYuri Pankov } 2264a40ea1a7SYuri Pankov } 2265a40ea1a7SYuri Pankov 2266a40ea1a7SYuri Pankov /* 2267a40ea1a7SYuri Pankov * Always resolve basedir to the canonicalized absolute 2268a40ea1a7SYuri Pankov * pathname and append a trailing slash, such that 2269a40ea1a7SYuri Pankov * we can reliably check whether files are inside. 2270a40ea1a7SYuri Pankov */ 2271a40ea1a7SYuri Pankov if (NULL == realpath(targetdir, basedir)) { 2272a40ea1a7SYuri Pankov if (report_baddir || errno != ENOENT) { 2273a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_BADARG; 2274a40ea1a7SYuri Pankov say("", "&%s: realpath", targetdir); 2275a40ea1a7SYuri Pankov } 2276a40ea1a7SYuri Pankov return 0; 2277a40ea1a7SYuri Pankov } else if (-1 == chdir(basedir)) { 2278a40ea1a7SYuri Pankov if (report_baddir || errno != ENOENT) { 2279a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_BADARG; 2280a40ea1a7SYuri Pankov say("", "&chdir"); 2281a40ea1a7SYuri Pankov } 2282a40ea1a7SYuri Pankov return 0; 2283a40ea1a7SYuri Pankov } 2284a40ea1a7SYuri Pankov chdir_status = 1; 2285a40ea1a7SYuri Pankov cp = strchr(basedir, '\0'); 2286a40ea1a7SYuri Pankov if ('/' != cp[-1]) { 2287a40ea1a7SYuri Pankov if (cp - basedir >= PATH_MAX - 1) { 2288a40ea1a7SYuri Pankov exitcode = (int)MANDOCLEVEL_SYSERR; 2289a40ea1a7SYuri Pankov say("", "Filename too long"); 2290a40ea1a7SYuri Pankov return 0; 2291a40ea1a7SYuri Pankov } 2292a40ea1a7SYuri Pankov *cp++ = '/'; 2293a40ea1a7SYuri Pankov *cp = '\0'; 2294a40ea1a7SYuri Pankov } 2295a40ea1a7SYuri Pankov return 1; 2296a40ea1a7SYuri Pankov } 2297a40ea1a7SYuri Pankov 2298a40ea1a7SYuri Pankov static void 2299a40ea1a7SYuri Pankov say(const char *file, const char *format, ...) 2300a40ea1a7SYuri Pankov { 2301a40ea1a7SYuri Pankov va_list ap; 2302a40ea1a7SYuri Pankov int use_errno; 2303a40ea1a7SYuri Pankov 2304a40ea1a7SYuri Pankov if ('\0' != *basedir) 2305a40ea1a7SYuri Pankov fprintf(stderr, "%s", basedir); 2306a40ea1a7SYuri Pankov if ('\0' != *basedir && '\0' != *file) 2307a40ea1a7SYuri Pankov fputc('/', stderr); 2308a40ea1a7SYuri Pankov if ('\0' != *file) 2309a40ea1a7SYuri Pankov fprintf(stderr, "%s", file); 2310a40ea1a7SYuri Pankov 2311a40ea1a7SYuri Pankov use_errno = 1; 2312a40ea1a7SYuri Pankov if (NULL != format) { 2313a40ea1a7SYuri Pankov switch (*format) { 2314a40ea1a7SYuri Pankov case '&': 2315a40ea1a7SYuri Pankov format++; 2316a40ea1a7SYuri Pankov break; 2317a40ea1a7SYuri Pankov case '\0': 2318a40ea1a7SYuri Pankov format = NULL; 2319a40ea1a7SYuri Pankov break; 2320a40ea1a7SYuri Pankov default: 2321a40ea1a7SYuri Pankov use_errno = 0; 2322a40ea1a7SYuri Pankov break; 2323a40ea1a7SYuri Pankov } 2324a40ea1a7SYuri Pankov } 2325a40ea1a7SYuri Pankov if (NULL != format) { 2326a40ea1a7SYuri Pankov if ('\0' != *basedir || '\0' != *file) 2327a40ea1a7SYuri Pankov fputs(": ", stderr); 2328a40ea1a7SYuri Pankov va_start(ap, format); 2329a40ea1a7SYuri Pankov vfprintf(stderr, format, ap); 2330a40ea1a7SYuri Pankov va_end(ap); 2331a40ea1a7SYuri Pankov } 2332a40ea1a7SYuri Pankov if (use_errno) { 2333a40ea1a7SYuri Pankov if ('\0' != *basedir || '\0' != *file || NULL != format) 2334a40ea1a7SYuri Pankov fputs(": ", stderr); 2335a40ea1a7SYuri Pankov perror(NULL); 2336a40ea1a7SYuri Pankov } else 2337a40ea1a7SYuri Pankov fputc('\n', stderr); 2338a40ea1a7SYuri Pankov } 2339