xref: /illumos-gate/usr/src/cmd/mandoc/tag.c (revision 4d131170)
1*4d131170SRobert Mustacchi /* $Id: tag.c,v 1.36 2020/04/19 16:36:16 schwarze Exp $ */
2371584c2SYuri Pankov /*
3*4d131170SRobert Mustacchi  * Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze <schwarze@openbsd.org>
4371584c2SYuri Pankov  *
5371584c2SYuri Pankov  * Permission to use, copy, modify, and distribute this software for any
6371584c2SYuri Pankov  * purpose with or without fee is hereby granted, provided that the above
7371584c2SYuri Pankov  * copyright notice and this permission notice appear in all copies.
8371584c2SYuri Pankov  *
9371584c2SYuri Pankov  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10371584c2SYuri Pankov  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11371584c2SYuri Pankov  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12371584c2SYuri Pankov  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13371584c2SYuri Pankov  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14371584c2SYuri Pankov  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15371584c2SYuri Pankov  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16*4d131170SRobert Mustacchi  *
17*4d131170SRobert Mustacchi  * Functions to tag syntax tree nodes.
18*4d131170SRobert Mustacchi  * For internal use by mandoc(1) validation modules only.
19371584c2SYuri Pankov  */
20371584c2SYuri Pankov #include "config.h"
21371584c2SYuri Pankov 
22371584c2SYuri Pankov #include <sys/types.h>
23371584c2SYuri Pankov 
24*4d131170SRobert Mustacchi #include <assert.h>
25cec8643bSMichal Nowak #include <limits.h>
26371584c2SYuri Pankov #include <stddef.h>
27371584c2SYuri Pankov #include <stdint.h>
28371584c2SYuri Pankov #include <stdlib.h>
29371584c2SYuri Pankov #include <string.h>
30371584c2SYuri Pankov 
31371584c2SYuri Pankov #include "mandoc_aux.h"
32371584c2SYuri Pankov #include "mandoc_ohash.h"
33*4d131170SRobert Mustacchi #include "roff.h"
34*4d131170SRobert Mustacchi #include "mdoc.h"
35*4d131170SRobert Mustacchi #include "roff_int.h"
36371584c2SYuri Pankov #include "tag.h"
37371584c2SYuri Pankov 
38371584c2SYuri Pankov struct tag_entry {
39*4d131170SRobert Mustacchi 	struct roff_node **nodes;
40*4d131170SRobert Mustacchi 	size_t	 maxnodes;
41*4d131170SRobert Mustacchi 	size_t	 nnodes;
42371584c2SYuri Pankov 	int	 prio;
43371584c2SYuri Pankov 	char	 s[];
44371584c2SYuri Pankov };
45371584c2SYuri Pankov 
46*4d131170SRobert Mustacchi static void		 tag_move_href(struct roff_man *,
47*4d131170SRobert Mustacchi 				struct roff_node *, const char *);
48*4d131170SRobert Mustacchi static void		 tag_move_id(struct roff_node *);
49371584c2SYuri Pankov 
50371584c2SYuri Pankov static struct ohash	 tag_data;
51371584c2SYuri Pankov 
52371584c2SYuri Pankov 
53371584c2SYuri Pankov /*
54*4d131170SRobert Mustacchi  * Set up the ohash table to collect nodes
55*4d131170SRobert Mustacchi  * where various marked-up terms are documented.
56371584c2SYuri Pankov  */
57*4d131170SRobert Mustacchi void
tag_alloc(void)58*4d131170SRobert Mustacchi tag_alloc(void)
59371584c2SYuri Pankov {
60*4d131170SRobert Mustacchi 	mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
61*4d131170SRobert Mustacchi }
62371584c2SYuri Pankov 
63*4d131170SRobert Mustacchi void
tag_free(void)64*4d131170SRobert Mustacchi tag_free(void)
65*4d131170SRobert Mustacchi {
66*4d131170SRobert Mustacchi 	struct tag_entry	*entry;
67*4d131170SRobert Mustacchi 	unsigned int		 slot;
68371584c2SYuri Pankov 
69*4d131170SRobert Mustacchi 	if (tag_data.info.free == NULL)
70*4d131170SRobert Mustacchi 		return;
71*4d131170SRobert Mustacchi 	entry = ohash_first(&tag_data, &slot);
72*4d131170SRobert Mustacchi 	while (entry != NULL) {
73*4d131170SRobert Mustacchi 		free(entry->nodes);
74*4d131170SRobert Mustacchi 		free(entry);
75*4d131170SRobert Mustacchi 		entry = ohash_next(&tag_data, &slot);
76*4d131170SRobert Mustacchi 	}
77*4d131170SRobert Mustacchi 	ohash_delete(&tag_data);
78*4d131170SRobert Mustacchi 	tag_data.info.free = NULL;
79371584c2SYuri Pankov }
80371584c2SYuri Pankov 
81371584c2SYuri Pankov /*
82*4d131170SRobert Mustacchi  * Set a node where a term is defined,
83cec8643bSMichal Nowak  * unless it is already defined at a lower priority.
84371584c2SYuri Pankov  */
85371584c2SYuri Pankov void
tag_put(const char * s,int prio,struct roff_node * n)86*4d131170SRobert Mustacchi tag_put(const char *s, int prio, struct roff_node *n)
87371584c2SYuri Pankov {
88371584c2SYuri Pankov 	struct tag_entry	*entry;
89*4d131170SRobert Mustacchi 	struct roff_node	*nold;
90cec8643bSMichal Nowak 	const char		*se;
91371584c2SYuri Pankov 	size_t			 len;
92371584c2SYuri Pankov 	unsigned int		 slot;
93371584c2SYuri Pankov 
94*4d131170SRobert Mustacchi 	assert(prio <= TAG_FALLBACK);
95cec8643bSMichal Nowak 
96*4d131170SRobert Mustacchi 	if (s == NULL) {
97*4d131170SRobert Mustacchi 		if (n->child == NULL || n->child->type != ROFFT_TEXT)
98*4d131170SRobert Mustacchi 			return;
99*4d131170SRobert Mustacchi 		s = n->child->string;
100*4d131170SRobert Mustacchi 		switch (s[0]) {
101*4d131170SRobert Mustacchi 		case '-':
102*4d131170SRobert Mustacchi 			s++;
103*4d131170SRobert Mustacchi 			break;
104*4d131170SRobert Mustacchi 		case '\\':
105*4d131170SRobert Mustacchi 			switch (s[1]) {
106*4d131170SRobert Mustacchi 			case '&':
107*4d131170SRobert Mustacchi 			case '-':
108*4d131170SRobert Mustacchi 			case 'e':
109*4d131170SRobert Mustacchi 				s += 2;
110*4d131170SRobert Mustacchi 				break;
111*4d131170SRobert Mustacchi 			default:
112*4d131170SRobert Mustacchi 				break;
113*4d131170SRobert Mustacchi 			}
114*4d131170SRobert Mustacchi 			break;
115*4d131170SRobert Mustacchi 		default:
116*4d131170SRobert Mustacchi 			break;
117*4d131170SRobert Mustacchi 		}
118*4d131170SRobert Mustacchi 	}
119cec8643bSMichal Nowak 
120cec8643bSMichal Nowak 	/*
121*4d131170SRobert Mustacchi 	 * Skip whitespace and escapes and whatever follows,
122cec8643bSMichal Nowak 	 * and if there is any, downgrade the priority.
123cec8643bSMichal Nowak 	 */
124cec8643bSMichal Nowak 
125*4d131170SRobert Mustacchi 	len = strcspn(s, " \t\\");
126cec8643bSMichal Nowak 	if (len == 0)
127371584c2SYuri Pankov 		return;
128a40ea1a7SYuri Pankov 
129cec8643bSMichal Nowak 	se = s + len;
130*4d131170SRobert Mustacchi 	if (*se != '\0' && prio < TAG_WEAK)
131*4d131170SRobert Mustacchi 		prio = TAG_WEAK;
132cec8643bSMichal Nowak 
133cec8643bSMichal Nowak 	slot = ohash_qlookupi(&tag_data, s, &se);
134371584c2SYuri Pankov 	entry = ohash_find(&tag_data, slot);
135a40ea1a7SYuri Pankov 
136*4d131170SRobert Mustacchi 	/* Build a new entry. */
137a40ea1a7SYuri Pankov 
138*4d131170SRobert Mustacchi 	if (entry == NULL) {
139cec8643bSMichal Nowak 		entry = mandoc_malloc(sizeof(*entry) + len + 1);
140371584c2SYuri Pankov 		memcpy(entry->s, s, len);
141cec8643bSMichal Nowak 		entry->s[len] = '\0';
142*4d131170SRobert Mustacchi 		entry->nodes = NULL;
143*4d131170SRobert Mustacchi 		entry->maxnodes = entry->nnodes = 0;
144371584c2SYuri Pankov 		ohash_insert(&tag_data, slot, entry);
145*4d131170SRobert Mustacchi 	}
146a40ea1a7SYuri Pankov 
147*4d131170SRobert Mustacchi 	/*
148*4d131170SRobert Mustacchi 	 * Lower priority numbers take precedence.
149*4d131170SRobert Mustacchi 	 * If a better entry is already present, ignore the new one.
150*4d131170SRobert Mustacchi 	 */
151a40ea1a7SYuri Pankov 
152*4d131170SRobert Mustacchi 	else if (entry->prio < prio)
153a40ea1a7SYuri Pankov 			return;
154a40ea1a7SYuri Pankov 
155*4d131170SRobert Mustacchi 	/*
156*4d131170SRobert Mustacchi 	 * If the existing entry is worse, clear it.
157*4d131170SRobert Mustacchi 	 * In addition, a tag with priority TAG_FALLBACK
158*4d131170SRobert Mustacchi 	 * is only used if the tag occurs exactly once.
159*4d131170SRobert Mustacchi 	 */
160a40ea1a7SYuri Pankov 
161*4d131170SRobert Mustacchi 	else if (entry->prio > prio || prio == TAG_FALLBACK) {
162*4d131170SRobert Mustacchi 		while (entry->nnodes > 0) {
163*4d131170SRobert Mustacchi 			nold = entry->nodes[--entry->nnodes];
164*4d131170SRobert Mustacchi 			nold->flags &= ~NODE_ID;
165*4d131170SRobert Mustacchi 			free(nold->tag);
166*4d131170SRobert Mustacchi 			nold->tag = NULL;
167*4d131170SRobert Mustacchi 		}
168*4d131170SRobert Mustacchi 		if (prio == TAG_FALLBACK) {
169*4d131170SRobert Mustacchi 			entry->prio = TAG_DELETE;
170a40ea1a7SYuri Pankov 			return;
171*4d131170SRobert Mustacchi 		}
172a40ea1a7SYuri Pankov 	}
173a40ea1a7SYuri Pankov 
174*4d131170SRobert Mustacchi 	/* Remember the new node. */
175a40ea1a7SYuri Pankov 
176*4d131170SRobert Mustacchi 	if (entry->maxnodes == entry->nnodes) {
177*4d131170SRobert Mustacchi 		entry->maxnodes += 4;
178*4d131170SRobert Mustacchi 		entry->nodes = mandoc_reallocarray(entry->nodes,
179*4d131170SRobert Mustacchi 		    entry->maxnodes, sizeof(*entry->nodes));
180a40ea1a7SYuri Pankov 	}
181*4d131170SRobert Mustacchi 	entry->nodes[entry->nnodes++] = n;
182371584c2SYuri Pankov 	entry->prio = prio;
183*4d131170SRobert Mustacchi 	n->flags |= NODE_ID;
184*4d131170SRobert Mustacchi 	if (n->child == NULL || n->child->string != s || *se != '\0') {
185*4d131170SRobert Mustacchi 		assert(n->tag == NULL);
186*4d131170SRobert Mustacchi 		n->tag = mandoc_strndup(s, len);
187*4d131170SRobert Mustacchi 	}
188*4d131170SRobert Mustacchi }
189*4d131170SRobert Mustacchi 
190*4d131170SRobert Mustacchi int
tag_exists(const char * tag)191*4d131170SRobert Mustacchi tag_exists(const char *tag)
192*4d131170SRobert Mustacchi {
193*4d131170SRobert Mustacchi 	return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
194371584c2SYuri Pankov }
195371584c2SYuri Pankov 
196371584c2SYuri Pankov /*
197*4d131170SRobert Mustacchi  * For in-line elements, move the link target
198*4d131170SRobert Mustacchi  * to the enclosing paragraph when appropriate.
199371584c2SYuri Pankov  */
200*4d131170SRobert Mustacchi static void
tag_move_id(struct roff_node * n)201*4d131170SRobert Mustacchi tag_move_id(struct roff_node *n)
202371584c2SYuri Pankov {
203*4d131170SRobert Mustacchi 	struct roff_node *np;
204371584c2SYuri Pankov 
205*4d131170SRobert Mustacchi 	np = n;
206*4d131170SRobert Mustacchi 	for (;;) {
207*4d131170SRobert Mustacchi 		if (np->prev != NULL)
208*4d131170SRobert Mustacchi 			np = np->prev;
209*4d131170SRobert Mustacchi 		else if ((np = np->parent) == NULL)
210*4d131170SRobert Mustacchi 			return;
211*4d131170SRobert Mustacchi 		switch (np->tok) {
212*4d131170SRobert Mustacchi 		case MDOC_It:
213*4d131170SRobert Mustacchi 			switch (np->parent->parent->norm->Bl.type) {
214*4d131170SRobert Mustacchi 			case LIST_column:
215*4d131170SRobert Mustacchi 				/* Target the ROFFT_BLOCK = <tr>. */
216*4d131170SRobert Mustacchi 				np = np->parent;
217*4d131170SRobert Mustacchi 				break;
218*4d131170SRobert Mustacchi 			case LIST_diag:
219*4d131170SRobert Mustacchi 			case LIST_hang:
220*4d131170SRobert Mustacchi 			case LIST_inset:
221*4d131170SRobert Mustacchi 			case LIST_ohang:
222*4d131170SRobert Mustacchi 			case LIST_tag:
223*4d131170SRobert Mustacchi 				/* Target the ROFFT_HEAD = <dt>. */
224*4d131170SRobert Mustacchi 				np = np->parent->head;
225*4d131170SRobert Mustacchi 				break;
226*4d131170SRobert Mustacchi 			default:
227*4d131170SRobert Mustacchi 				/* Target the ROFF_BODY = <li>. */
228*4d131170SRobert Mustacchi 				break;
229*4d131170SRobert Mustacchi 			}
230*4d131170SRobert Mustacchi 			/* FALLTHROUGH */
231*4d131170SRobert Mustacchi 		case MDOC_Pp:	/* Target the ROFFT_ELEM = <p>. */
232*4d131170SRobert Mustacchi 			if (np->tag == NULL) {
233*4d131170SRobert Mustacchi 				np->tag = mandoc_strdup(n->tag == NULL ?
234*4d131170SRobert Mustacchi 				    n->child->string : n->tag);
235*4d131170SRobert Mustacchi 				np->flags |= NODE_ID;
236*4d131170SRobert Mustacchi 				n->flags &= ~NODE_ID;
237*4d131170SRobert Mustacchi 			}
238*4d131170SRobert Mustacchi 			return;
239*4d131170SRobert Mustacchi 		case MDOC_Sh:
240*4d131170SRobert Mustacchi 		case MDOC_Ss:
241*4d131170SRobert Mustacchi 		case MDOC_Bd:
242*4d131170SRobert Mustacchi 		case MDOC_Bl:
243*4d131170SRobert Mustacchi 		case MDOC_D1:
244*4d131170SRobert Mustacchi 		case MDOC_Dl:
245*4d131170SRobert Mustacchi 		case MDOC_Rs:
246*4d131170SRobert Mustacchi 			/* Do not move past major blocks. */
247*4d131170SRobert Mustacchi 			return;
248*4d131170SRobert Mustacchi 		default:
249*4d131170SRobert Mustacchi 			/*
250*4d131170SRobert Mustacchi 			 * Move past in-line content and partial
251*4d131170SRobert Mustacchi 			 * blocks, for example .It Xo or .It Bq Er.
252*4d131170SRobert Mustacchi 			 */
253*4d131170SRobert Mustacchi 			break;
254*4d131170SRobert Mustacchi 		}
255371584c2SYuri Pankov 	}
256371584c2SYuri Pankov }
257371584c2SYuri Pankov 
258*4d131170SRobert Mustacchi /*
259*4d131170SRobert Mustacchi  * When a paragraph is tagged and starts with text,
260*4d131170SRobert Mustacchi  * move the permalink to the first few words.
261*4d131170SRobert Mustacchi  */
262*4d131170SRobert Mustacchi static void
tag_move_href(struct roff_man * man,struct roff_node * n,const char * tag)263*4d131170SRobert Mustacchi tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
264371584c2SYuri Pankov {
265*4d131170SRobert Mustacchi 	char	*cp;
266*4d131170SRobert Mustacchi 
267*4d131170SRobert Mustacchi 	if (n == NULL || n->type != ROFFT_TEXT ||
268*4d131170SRobert Mustacchi 	    *n->string == '\0' || *n->string == ' ')
269*4d131170SRobert Mustacchi 		return;
270*4d131170SRobert Mustacchi 
271*4d131170SRobert Mustacchi 	cp = n->string;
272*4d131170SRobert Mustacchi 	while (cp != NULL && cp - n->string < 5)
273*4d131170SRobert Mustacchi 		cp = strchr(cp + 1, ' ');
274*4d131170SRobert Mustacchi 
275*4d131170SRobert Mustacchi 	/* If the first text node is longer, split it. */
276*4d131170SRobert Mustacchi 
277*4d131170SRobert Mustacchi 	if (cp != NULL && cp[1] != '\0') {
278*4d131170SRobert Mustacchi 		man->last = n;
279*4d131170SRobert Mustacchi 		man->next = ROFF_NEXT_SIBLING;
280*4d131170SRobert Mustacchi 		roff_word_alloc(man, n->line,
281*4d131170SRobert Mustacchi 		    n->pos + (cp - n->string), cp + 1);
282*4d131170SRobert Mustacchi 		man->last->flags = n->flags & ~NODE_LINE;
283*4d131170SRobert Mustacchi 		*cp = '\0';
284371584c2SYuri Pankov 	}
285*4d131170SRobert Mustacchi 
286*4d131170SRobert Mustacchi 	assert(n->tag == NULL);
287*4d131170SRobert Mustacchi 	n->tag = mandoc_strdup(tag);
288*4d131170SRobert Mustacchi 	n->flags |= NODE_HREF;
289371584c2SYuri Pankov }
290371584c2SYuri Pankov 
291*4d131170SRobert Mustacchi /*
292*4d131170SRobert Mustacchi  * When all tags have been set, decide where to put
293*4d131170SRobert Mustacchi  * the associated permalinks, and maybe move some tags
294*4d131170SRobert Mustacchi  * to the beginning of the respective paragraphs.
295*4d131170SRobert Mustacchi  */
296*4d131170SRobert Mustacchi void
tag_postprocess(struct roff_man * man,struct roff_node * n)297*4d131170SRobert Mustacchi tag_postprocess(struct roff_man *man, struct roff_node *n)
298371584c2SYuri Pankov {
299*4d131170SRobert Mustacchi 	if (n->flags & NODE_ID) {
300*4d131170SRobert Mustacchi 		switch (n->tok) {
301*4d131170SRobert Mustacchi 		case MDOC_Pp:
302*4d131170SRobert Mustacchi 			tag_move_href(man, n->next, n->tag);
303*4d131170SRobert Mustacchi 			break;
304*4d131170SRobert Mustacchi 		case MDOC_Bd:
305*4d131170SRobert Mustacchi 		case MDOC_D1:
306*4d131170SRobert Mustacchi 		case MDOC_Dl:
307*4d131170SRobert Mustacchi 			tag_move_href(man, n->child, n->tag);
308*4d131170SRobert Mustacchi 			break;
309*4d131170SRobert Mustacchi 		case MDOC_Bl:
310*4d131170SRobert Mustacchi 			/* XXX No permalink for now. */
311*4d131170SRobert Mustacchi 			break;
312*4d131170SRobert Mustacchi 		default:
313*4d131170SRobert Mustacchi 			if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
314*4d131170SRobert Mustacchi 				tag_move_id(n);
315*4d131170SRobert Mustacchi 			if (n->tok != MDOC_Tg)
316*4d131170SRobert Mustacchi 				n->flags |= NODE_HREF;
317*4d131170SRobert Mustacchi 			else if ((n->flags & NODE_ID) == 0) {
318*4d131170SRobert Mustacchi 				n->flags |= NODE_NOPRT;
319*4d131170SRobert Mustacchi 				free(n->tag);
320*4d131170SRobert Mustacchi 				n->tag = NULL;
321*4d131170SRobert Mustacchi 			}
322*4d131170SRobert Mustacchi 			break;
323*4d131170SRobert Mustacchi 		}
324*4d131170SRobert Mustacchi 	}
325*4d131170SRobert Mustacchi 	for (n = n->child; n != NULL; n = n->next)
326*4d131170SRobert Mustacchi 		tag_postprocess(man, n);
327371584c2SYuri Pankov }
328