xref: /illumos-gate/usr/src/cmd/mandoc/tag.c (revision 4d131170)
1 /* $Id: tag.c,v 1.36 2020/04/19 16:36:16 schwarze Exp $ */
2 /*
3  * Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze <schwarze@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  *
17  * Functions to tag syntax tree nodes.
18  * For internal use by mandoc(1) validation modules only.
19  */
20 #include "config.h"
21 
22 #include <sys/types.h>
23 
24 #include <assert.h>
25 #include <limits.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "roff.h"
34 #include "mdoc.h"
35 #include "roff_int.h"
36 #include "tag.h"
37 
38 struct tag_entry {
39 	struct roff_node **nodes;
40 	size_t	 maxnodes;
41 	size_t	 nnodes;
42 	int	 prio;
43 	char	 s[];
44 };
45 
46 static void		 tag_move_href(struct roff_man *,
47 				struct roff_node *, const char *);
48 static void		 tag_move_id(struct roff_node *);
49 
50 static struct ohash	 tag_data;
51 
52 
53 /*
54  * Set up the ohash table to collect nodes
55  * where various marked-up terms are documented.
56  */
57 void
tag_alloc(void)58 tag_alloc(void)
59 {
60 	mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
61 }
62 
63 void
tag_free(void)64 tag_free(void)
65 {
66 	struct tag_entry	*entry;
67 	unsigned int		 slot;
68 
69 	if (tag_data.info.free == NULL)
70 		return;
71 	entry = ohash_first(&tag_data, &slot);
72 	while (entry != NULL) {
73 		free(entry->nodes);
74 		free(entry);
75 		entry = ohash_next(&tag_data, &slot);
76 	}
77 	ohash_delete(&tag_data);
78 	tag_data.info.free = NULL;
79 }
80 
81 /*
82  * Set a node where a term is defined,
83  * unless it is already defined at a lower priority.
84  */
85 void
tag_put(const char * s,int prio,struct roff_node * n)86 tag_put(const char *s, int prio, struct roff_node *n)
87 {
88 	struct tag_entry	*entry;
89 	struct roff_node	*nold;
90 	const char		*se;
91 	size_t			 len;
92 	unsigned int		 slot;
93 
94 	assert(prio <= TAG_FALLBACK);
95 
96 	if (s == NULL) {
97 		if (n->child == NULL || n->child->type != ROFFT_TEXT)
98 			return;
99 		s = n->child->string;
100 		switch (s[0]) {
101 		case '-':
102 			s++;
103 			break;
104 		case '\\':
105 			switch (s[1]) {
106 			case '&':
107 			case '-':
108 			case 'e':
109 				s += 2;
110 				break;
111 			default:
112 				break;
113 			}
114 			break;
115 		default:
116 			break;
117 		}
118 	}
119 
120 	/*
121 	 * Skip whitespace and escapes and whatever follows,
122 	 * and if there is any, downgrade the priority.
123 	 */
124 
125 	len = strcspn(s, " \t\\");
126 	if (len == 0)
127 		return;
128 
129 	se = s + len;
130 	if (*se != '\0' && prio < TAG_WEAK)
131 		prio = TAG_WEAK;
132 
133 	slot = ohash_qlookupi(&tag_data, s, &se);
134 	entry = ohash_find(&tag_data, slot);
135 
136 	/* Build a new entry. */
137 
138 	if (entry == NULL) {
139 		entry = mandoc_malloc(sizeof(*entry) + len + 1);
140 		memcpy(entry->s, s, len);
141 		entry->s[len] = '\0';
142 		entry->nodes = NULL;
143 		entry->maxnodes = entry->nnodes = 0;
144 		ohash_insert(&tag_data, slot, entry);
145 	}
146 
147 	/*
148 	 * Lower priority numbers take precedence.
149 	 * If a better entry is already present, ignore the new one.
150 	 */
151 
152 	else if (entry->prio < prio)
153 			return;
154 
155 	/*
156 	 * If the existing entry is worse, clear it.
157 	 * In addition, a tag with priority TAG_FALLBACK
158 	 * is only used if the tag occurs exactly once.
159 	 */
160 
161 	else if (entry->prio > prio || prio == TAG_FALLBACK) {
162 		while (entry->nnodes > 0) {
163 			nold = entry->nodes[--entry->nnodes];
164 			nold->flags &= ~NODE_ID;
165 			free(nold->tag);
166 			nold->tag = NULL;
167 		}
168 		if (prio == TAG_FALLBACK) {
169 			entry->prio = TAG_DELETE;
170 			return;
171 		}
172 	}
173 
174 	/* Remember the new node. */
175 
176 	if (entry->maxnodes == entry->nnodes) {
177 		entry->maxnodes += 4;
178 		entry->nodes = mandoc_reallocarray(entry->nodes,
179 		    entry->maxnodes, sizeof(*entry->nodes));
180 	}
181 	entry->nodes[entry->nnodes++] = n;
182 	entry->prio = prio;
183 	n->flags |= NODE_ID;
184 	if (n->child == NULL || n->child->string != s || *se != '\0') {
185 		assert(n->tag == NULL);
186 		n->tag = mandoc_strndup(s, len);
187 	}
188 }
189 
190 int
tag_exists(const char * tag)191 tag_exists(const char *tag)
192 {
193 	return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
194 }
195 
196 /*
197  * For in-line elements, move the link target
198  * to the enclosing paragraph when appropriate.
199  */
200 static void
tag_move_id(struct roff_node * n)201 tag_move_id(struct roff_node *n)
202 {
203 	struct roff_node *np;
204 
205 	np = n;
206 	for (;;) {
207 		if (np->prev != NULL)
208 			np = np->prev;
209 		else if ((np = np->parent) == NULL)
210 			return;
211 		switch (np->tok) {
212 		case MDOC_It:
213 			switch (np->parent->parent->norm->Bl.type) {
214 			case LIST_column:
215 				/* Target the ROFFT_BLOCK = <tr>. */
216 				np = np->parent;
217 				break;
218 			case LIST_diag:
219 			case LIST_hang:
220 			case LIST_inset:
221 			case LIST_ohang:
222 			case LIST_tag:
223 				/* Target the ROFFT_HEAD = <dt>. */
224 				np = np->parent->head;
225 				break;
226 			default:
227 				/* Target the ROFF_BODY = <li>. */
228 				break;
229 			}
230 			/* FALLTHROUGH */
231 		case MDOC_Pp:	/* Target the ROFFT_ELEM = <p>. */
232 			if (np->tag == NULL) {
233 				np->tag = mandoc_strdup(n->tag == NULL ?
234 				    n->child->string : n->tag);
235 				np->flags |= NODE_ID;
236 				n->flags &= ~NODE_ID;
237 			}
238 			return;
239 		case MDOC_Sh:
240 		case MDOC_Ss:
241 		case MDOC_Bd:
242 		case MDOC_Bl:
243 		case MDOC_D1:
244 		case MDOC_Dl:
245 		case MDOC_Rs:
246 			/* Do not move past major blocks. */
247 			return;
248 		default:
249 			/*
250 			 * Move past in-line content and partial
251 			 * blocks, for example .It Xo or .It Bq Er.
252 			 */
253 			break;
254 		}
255 	}
256 }
257 
258 /*
259  * When a paragraph is tagged and starts with text,
260  * move the permalink to the first few words.
261  */
262 static void
tag_move_href(struct roff_man * man,struct roff_node * n,const char * tag)263 tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
264 {
265 	char	*cp;
266 
267 	if (n == NULL || n->type != ROFFT_TEXT ||
268 	    *n->string == '\0' || *n->string == ' ')
269 		return;
270 
271 	cp = n->string;
272 	while (cp != NULL && cp - n->string < 5)
273 		cp = strchr(cp + 1, ' ');
274 
275 	/* If the first text node is longer, split it. */
276 
277 	if (cp != NULL && cp[1] != '\0') {
278 		man->last = n;
279 		man->next = ROFF_NEXT_SIBLING;
280 		roff_word_alloc(man, n->line,
281 		    n->pos + (cp - n->string), cp + 1);
282 		man->last->flags = n->flags & ~NODE_LINE;
283 		*cp = '\0';
284 	}
285 
286 	assert(n->tag == NULL);
287 	n->tag = mandoc_strdup(tag);
288 	n->flags |= NODE_HREF;
289 }
290 
291 /*
292  * When all tags have been set, decide where to put
293  * the associated permalinks, and maybe move some tags
294  * to the beginning of the respective paragraphs.
295  */
296 void
tag_postprocess(struct roff_man * man,struct roff_node * n)297 tag_postprocess(struct roff_man *man, struct roff_node *n)
298 {
299 	if (n->flags & NODE_ID) {
300 		switch (n->tok) {
301 		case MDOC_Pp:
302 			tag_move_href(man, n->next, n->tag);
303 			break;
304 		case MDOC_Bd:
305 		case MDOC_D1:
306 		case MDOC_Dl:
307 			tag_move_href(man, n->child, n->tag);
308 			break;
309 		case MDOC_Bl:
310 			/* XXX No permalink for now. */
311 			break;
312 		default:
313 			if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
314 				tag_move_id(n);
315 			if (n->tok != MDOC_Tg)
316 				n->flags |= NODE_HREF;
317 			else if ((n->flags & NODE_ID) == 0) {
318 				n->flags |= NODE_NOPRT;
319 				free(n->tag);
320 				n->tag = NULL;
321 			}
322 			break;
323 		}
324 	}
325 	for (n = n->child; n != NULL; n = n->next)
326 		tag_postprocess(man, n);
327 }
328