1*4d131170SRobert Mustacchi /* $Id: html.c,v 1.275 2021/09/09 14:47:24 schwarze Exp $ */
295c635efSGarrett D'Amore /*
3260e9a87SYuri Pankov * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4*4d131170SRobert Mustacchi * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org>
595c635efSGarrett D'Amore *
695c635efSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any
795c635efSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above
895c635efSGarrett D'Amore * copyright notice and this permission notice appear in all copies.
995c635efSGarrett D'Amore *
10371584c2SYuri Pankov * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1195c635efSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12371584c2SYuri Pankov * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1395c635efSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1495c635efSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1595c635efSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1695c635efSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17*4d131170SRobert Mustacchi *
18*4d131170SRobert Mustacchi * Common functions for mandoc(1) HTML formatters.
19*4d131170SRobert Mustacchi * For use by individual formatters and by the main program.
2095c635efSGarrett D'Amore */
2195c635efSGarrett D'Amore #include "config.h"
2295c635efSGarrett D'Amore
2395c635efSGarrett D'Amore #include <sys/types.h>
24cec8643bSMichal Nowak #include <sys/stat.h>
2595c635efSGarrett D'Amore
2695c635efSGarrett D'Amore #include <assert.h>
2795c635efSGarrett D'Amore #include <ctype.h>
2895c635efSGarrett D'Amore #include <stdarg.h>
296640c13bSYuri Pankov #include <stddef.h>
3095c635efSGarrett D'Amore #include <stdio.h>
3195c635efSGarrett D'Amore #include <stdint.h>
3295c635efSGarrett D'Amore #include <stdlib.h>
3395c635efSGarrett D'Amore #include <string.h>
3495c635efSGarrett D'Amore #include <unistd.h>
3595c635efSGarrett D'Amore
36260e9a87SYuri Pankov #include "mandoc_aux.h"
376640c13bSYuri Pankov #include "mandoc_ohash.h"
38c66b8046SYuri Pankov #include "mandoc.h"
39c66b8046SYuri Pankov #include "roff.h"
4095c635efSGarrett D'Amore #include "out.h"
4195c635efSGarrett D'Amore #include "html.h"
42371584c2SYuri Pankov #include "manconf.h"
4395c635efSGarrett D'Amore #include "main.h"
4495c635efSGarrett D'Amore
4595c635efSGarrett D'Amore struct htmldata {
4695c635efSGarrett D'Amore const char *name;
4795c635efSGarrett D'Amore int flags;
48*4d131170SRobert Mustacchi #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */
49*4d131170SRobert Mustacchi #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */
50*4d131170SRobert Mustacchi #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */
51*4d131170SRobert Mustacchi #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */
52*4d131170SRobert Mustacchi #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */
53*4d131170SRobert Mustacchi #define HTML_NLEND (1 << 5) /* Output line break before closing. */
54*4d131170SRobert Mustacchi #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */
55a40ea1a7SYuri Pankov #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
56a40ea1a7SYuri Pankov #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
57a40ea1a7SYuri Pankov #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
58*4d131170SRobert Mustacchi #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */
59*4d131170SRobert Mustacchi #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */
6095c635efSGarrett D'Amore };
6195c635efSGarrett D'Amore
6295c635efSGarrett D'Amore static const struct htmldata htmltags[TAG_MAX] = {
63a40ea1a7SYuri Pankov {"html", HTML_NLALL},
64a40ea1a7SYuri Pankov {"head", HTML_NLALL | HTML_INDENT},
65*4d131170SRobert Mustacchi {"meta", HTML_NOSTACK | HTML_NLALL},
66*4d131170SRobert Mustacchi {"link", HTML_NOSTACK | HTML_NLALL},
67*4d131170SRobert Mustacchi {"style", HTML_NLALL | HTML_INDENT},
68a40ea1a7SYuri Pankov {"title", HTML_NLAROUND},
69*4d131170SRobert Mustacchi {"body", HTML_NLALL},
70a40ea1a7SYuri Pankov {"div", HTML_NLAROUND},
71cec8643bSMichal Nowak {"section", HTML_NLALL},
72a40ea1a7SYuri Pankov {"table", HTML_NLALL | HTML_INDENT},
73a40ea1a7SYuri Pankov {"tr", HTML_NLALL | HTML_INDENT},
74a40ea1a7SYuri Pankov {"td", HTML_NLAROUND},
75a40ea1a7SYuri Pankov {"li", HTML_NLAROUND | HTML_INDENT},
76a40ea1a7SYuri Pankov {"ul", HTML_NLALL | HTML_INDENT},
77a40ea1a7SYuri Pankov {"ol", HTML_NLALL | HTML_INDENT},
78a40ea1a7SYuri Pankov {"dl", HTML_NLALL | HTML_INDENT},
79a40ea1a7SYuri Pankov {"dt", HTML_NLAROUND},
80a40ea1a7SYuri Pankov {"dd", HTML_NLAROUND | HTML_INDENT},
81*4d131170SRobert Mustacchi {"h1", HTML_TOPHRASE | HTML_NLAROUND},
82*4d131170SRobert Mustacchi {"h2", HTML_TOPHRASE | HTML_NLAROUND},
83*4d131170SRobert Mustacchi {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
84*4d131170SRobert Mustacchi {"pre", HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT},
85*4d131170SRobert Mustacchi {"a", HTML_INPHRASE | HTML_TOPHRASE},
86*4d131170SRobert Mustacchi {"b", HTML_INPHRASE | HTML_TOPHRASE},
87*4d131170SRobert Mustacchi {"cite", HTML_INPHRASE | HTML_TOPHRASE},
88*4d131170SRobert Mustacchi {"code", HTML_INPHRASE | HTML_TOPHRASE},
89*4d131170SRobert Mustacchi {"i", HTML_INPHRASE | HTML_TOPHRASE},
90*4d131170SRobert Mustacchi {"small", HTML_INPHRASE | HTML_TOPHRASE},
91*4d131170SRobert Mustacchi {"span", HTML_INPHRASE | HTML_TOPHRASE},
92*4d131170SRobert Mustacchi {"var", HTML_INPHRASE | HTML_TOPHRASE},
93*4d131170SRobert Mustacchi {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
94*4d131170SRobert Mustacchi {"hr", HTML_INPHRASE | HTML_NOSTACK},
95*4d131170SRobert Mustacchi {"mark", HTML_INPHRASE },
96*4d131170SRobert Mustacchi {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
97a40ea1a7SYuri Pankov {"mrow", 0},
98a40ea1a7SYuri Pankov {"mi", 0},
99c66b8046SYuri Pankov {"mn", 0},
100a40ea1a7SYuri Pankov {"mo", 0},
101a40ea1a7SYuri Pankov {"msup", 0},
102a40ea1a7SYuri Pankov {"msub", 0},
103a40ea1a7SYuri Pankov {"msubsup", 0},
104a40ea1a7SYuri Pankov {"mfrac", 0},
105a40ea1a7SYuri Pankov {"msqrt", 0},
106a40ea1a7SYuri Pankov {"mfenced", 0},
107a40ea1a7SYuri Pankov {"mtable", 0},
108a40ea1a7SYuri Pankov {"mtr", 0},
109a40ea1a7SYuri Pankov {"mtd", 0},
110a40ea1a7SYuri Pankov {"munderover", 0},
111a40ea1a7SYuri Pankov {"munder", 0},
112a40ea1a7SYuri Pankov {"mover", 0},
11395c635efSGarrett D'Amore };
11495c635efSGarrett D'Amore
1156640c13bSYuri Pankov /* Avoid duplicate HTML id= attributes. */
116*4d131170SRobert Mustacchi
117*4d131170SRobert Mustacchi struct id_entry {
118*4d131170SRobert Mustacchi int ord; /* Ordinal number of the latest occurrence. */
119*4d131170SRobert Mustacchi char id[]; /* The id= attribute without any ordinal suffix. */
120*4d131170SRobert Mustacchi };
1216640c13bSYuri Pankov static struct ohash id_unique;
12295c635efSGarrett D'Amore
123cec8643bSMichal Nowak static void html_reset_internal(struct html *);
124a40ea1a7SYuri Pankov static void print_byte(struct html *, char);
125a40ea1a7SYuri Pankov static void print_endword(struct html *);
126a40ea1a7SYuri Pankov static void print_indent(struct html *);
127a40ea1a7SYuri Pankov static void print_word(struct html *, const char *);
128a40ea1a7SYuri Pankov
129260e9a87SYuri Pankov static void print_ctag(struct html *, struct tag *);
130a40ea1a7SYuri Pankov static int print_escape(struct html *, char);
131a40ea1a7SYuri Pankov static int print_encode(struct html *, const char *, const char *, int);
132a40ea1a7SYuri Pankov static void print_href(struct html *, const char *, const char *, int);
133*4d131170SRobert Mustacchi static void print_metaf(struct html *);
13495c635efSGarrett D'Amore
135260e9a87SYuri Pankov
136260e9a87SYuri Pankov void *
html_alloc(const struct manoutput * outopts)137371584c2SYuri Pankov html_alloc(const struct manoutput *outopts)
13895c635efSGarrett D'Amore {
13995c635efSGarrett D'Amore struct html *h;
14095c635efSGarrett D'Amore
14195c635efSGarrett D'Amore h = mandoc_calloc(1, sizeof(struct html));
14295c635efSGarrett D'Amore
143a40ea1a7SYuri Pankov h->tag = NULL;
144*4d131170SRobert Mustacchi h->metac = h->metal = ESCAPE_FONTROMAN;
145371584c2SYuri Pankov h->style = outopts->style;
146cec8643bSMichal Nowak if ((h->base_man1 = outopts->man) == NULL)
147cec8643bSMichal Nowak h->base_man2 = NULL;
148cec8643bSMichal Nowak else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
149cec8643bSMichal Nowak *h->base_man2++ = '\0';
150371584c2SYuri Pankov h->base_includes = outopts->includes;
151371584c2SYuri Pankov if (outopts->fragment)
152371584c2SYuri Pankov h->oflags |= HTML_FRAGMENT;
153cec8643bSMichal Nowak if (outopts->toc)
154cec8643bSMichal Nowak h->oflags |= HTML_TOC;
15595c635efSGarrett D'Amore
156*4d131170SRobert Mustacchi mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
1576640c13bSYuri Pankov
158371584c2SYuri Pankov return h;
15995c635efSGarrett D'Amore }
16095c635efSGarrett D'Amore
161cec8643bSMichal Nowak static void
html_reset_internal(struct html * h)162cec8643bSMichal Nowak html_reset_internal(struct html *h)
16395c635efSGarrett D'Amore {
16495c635efSGarrett D'Amore struct tag *tag;
165*4d131170SRobert Mustacchi struct id_entry *entry;
1666640c13bSYuri Pankov unsigned int slot;
16795c635efSGarrett D'Amore
168a40ea1a7SYuri Pankov while ((tag = h->tag) != NULL) {
169a40ea1a7SYuri Pankov h->tag = tag->next;
17095c635efSGarrett D'Amore free(tag);
17195c635efSGarrett D'Amore }
172*4d131170SRobert Mustacchi entry = ohash_first(&id_unique, &slot);
173*4d131170SRobert Mustacchi while (entry != NULL) {
174*4d131170SRobert Mustacchi free(entry);
175*4d131170SRobert Mustacchi entry = ohash_next(&id_unique, &slot);
1766640c13bSYuri Pankov }
1776640c13bSYuri Pankov ohash_delete(&id_unique);
17895c635efSGarrett D'Amore }
17995c635efSGarrett D'Amore
180cec8643bSMichal Nowak void
html_reset(void * p)181cec8643bSMichal Nowak html_reset(void *p)
182cec8643bSMichal Nowak {
183cec8643bSMichal Nowak html_reset_internal(p);
184*4d131170SRobert Mustacchi mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
185cec8643bSMichal Nowak }
186cec8643bSMichal Nowak
187cec8643bSMichal Nowak void
html_free(void * p)188cec8643bSMichal Nowak html_free(void *p)
189cec8643bSMichal Nowak {
190cec8643bSMichal Nowak html_reset_internal(p);
191cec8643bSMichal Nowak free(p);
192cec8643bSMichal Nowak }
193cec8643bSMichal Nowak
19495c635efSGarrett D'Amore void
print_gen_head(struct html * h)19595c635efSGarrett D'Amore print_gen_head(struct html *h)
19695c635efSGarrett D'Amore {
197260e9a87SYuri Pankov struct tag *t;
19895c635efSGarrett D'Amore
199a40ea1a7SYuri Pankov print_otag(h, TAG_META, "?", "charset", "utf-8");
200*4d131170SRobert Mustacchi print_otag(h, TAG_META, "??", "name", "viewport",
201*4d131170SRobert Mustacchi "content", "width=device-width, initial-scale=1.0");
2026640c13bSYuri Pankov if (h->style != NULL) {
2036640c13bSYuri Pankov print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
2046640c13bSYuri Pankov h->style, "type", "text/css", "media", "all");
2056640c13bSYuri Pankov return;
2066640c13bSYuri Pankov }
20795c635efSGarrett D'Amore
208260e9a87SYuri Pankov /*
2096640c13bSYuri Pankov * Print a minimal embedded style sheet.
210260e9a87SYuri Pankov */
211a40ea1a7SYuri Pankov
212a40ea1a7SYuri Pankov t = print_otag(h, TAG_STYLE, "");
213a40ea1a7SYuri Pankov print_text(h, "table.head, table.foot { width: 100%; }");
214a40ea1a7SYuri Pankov print_endline(h);
215a40ea1a7SYuri Pankov print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
216a40ea1a7SYuri Pankov print_endline(h);
217a40ea1a7SYuri Pankov print_text(h, "td.head-vol { text-align: center; }");
218a40ea1a7SYuri Pankov print_endline(h);
219*4d131170SRobert Mustacchi print_text(h, ".Nd, .Bf, .Op { display: inline; }");
2206640c13bSYuri Pankov print_endline(h);
221*4d131170SRobert Mustacchi print_text(h, ".Pa, .Ad { font-style: italic; }");
2226640c13bSYuri Pankov print_endline(h);
223*4d131170SRobert Mustacchi print_text(h, ".Ms { font-weight: bold; }");
2246640c13bSYuri Pankov print_endline(h);
225*4d131170SRobert Mustacchi print_text(h, ".Bl-diag ");
2266640c13bSYuri Pankov print_byte(h, '>');
2276640c13bSYuri Pankov print_text(h, " dt { font-weight: bold; }");
2286640c13bSYuri Pankov print_endline(h);
229*4d131170SRobert Mustacchi print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
230*4d131170SRobert Mustacchi "{ font-weight: bold; font-family: inherit; }");
231260e9a87SYuri Pankov print_tagq(h, t);
23295c635efSGarrett D'Amore }
23395c635efSGarrett D'Amore
234*4d131170SRobert Mustacchi int
html_setfont(struct html * h,enum mandoc_esc font)235*4d131170SRobert Mustacchi html_setfont(struct html *h, enum mandoc_esc font)
23695c635efSGarrett D'Amore {
237*4d131170SRobert Mustacchi switch (font) {
238260e9a87SYuri Pankov case ESCAPE_FONTPREV:
23995c635efSGarrett D'Amore font = h->metal;
24095c635efSGarrett D'Amore break;
241260e9a87SYuri Pankov case ESCAPE_FONTITALIC:
242260e9a87SYuri Pankov case ESCAPE_FONTBOLD:
243260e9a87SYuri Pankov case ESCAPE_FONTBI:
244*4d131170SRobert Mustacchi case ESCAPE_FONTROMAN:
245*4d131170SRobert Mustacchi case ESCAPE_FONTCR:
246*4d131170SRobert Mustacchi case ESCAPE_FONTCB:
247*4d131170SRobert Mustacchi case ESCAPE_FONTCI:
248cec8643bSMichal Nowak break;
249260e9a87SYuri Pankov case ESCAPE_FONT:
250*4d131170SRobert Mustacchi font = ESCAPE_FONTROMAN;
25195c635efSGarrett D'Amore break;
25295c635efSGarrett D'Amore default:
253*4d131170SRobert Mustacchi return 0;
25495c635efSGarrett D'Amore }
255*4d131170SRobert Mustacchi h->metal = h->metac;
256*4d131170SRobert Mustacchi h->metac = font;
257*4d131170SRobert Mustacchi return 1;
258*4d131170SRobert Mustacchi }
25995c635efSGarrett D'Amore
260*4d131170SRobert Mustacchi static void
print_metaf(struct html * h)261*4d131170SRobert Mustacchi print_metaf(struct html *h)
262*4d131170SRobert Mustacchi {
26395c635efSGarrett D'Amore if (h->metaf) {
26495c635efSGarrett D'Amore print_tagq(h, h->metaf);
26595c635efSGarrett D'Amore h->metaf = NULL;
26695c635efSGarrett D'Amore }
267*4d131170SRobert Mustacchi switch (h->metac) {
268*4d131170SRobert Mustacchi case ESCAPE_FONTITALIC:
269a40ea1a7SYuri Pankov h->metaf = print_otag(h, TAG_I, "");
270698f87a4SGarrett D'Amore break;
271*4d131170SRobert Mustacchi case ESCAPE_FONTBOLD:
272a40ea1a7SYuri Pankov h->metaf = print_otag(h, TAG_B, "");
273698f87a4SGarrett D'Amore break;
274*4d131170SRobert Mustacchi case ESCAPE_FONTBI:
275a40ea1a7SYuri Pankov h->metaf = print_otag(h, TAG_B, "");
276a40ea1a7SYuri Pankov print_otag(h, TAG_I, "");
277698f87a4SGarrett D'Amore break;
278*4d131170SRobert Mustacchi case ESCAPE_FONTCR:
279*4d131170SRobert Mustacchi h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
280*4d131170SRobert Mustacchi break;
281*4d131170SRobert Mustacchi case ESCAPE_FONTCB:
282cec8643bSMichal Nowak h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
283*4d131170SRobert Mustacchi print_otag(h, TAG_B, "");
284*4d131170SRobert Mustacchi break;
285*4d131170SRobert Mustacchi case ESCAPE_FONTCI:
286*4d131170SRobert Mustacchi h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
287*4d131170SRobert Mustacchi print_otag(h, TAG_I, "");
288cec8643bSMichal Nowak break;
289698f87a4SGarrett D'Amore default:
290698f87a4SGarrett D'Amore break;
291698f87a4SGarrett D'Amore }
29295c635efSGarrett D'Amore }
29395c635efSGarrett D'Amore
294cec8643bSMichal Nowak void
html_close_paragraph(struct html * h)295cec8643bSMichal Nowak html_close_paragraph(struct html *h)
296cec8643bSMichal Nowak {
297*4d131170SRobert Mustacchi struct tag *this, *next;
298*4d131170SRobert Mustacchi int flags;
299cec8643bSMichal Nowak
300*4d131170SRobert Mustacchi this = h->tag;
301*4d131170SRobert Mustacchi for (;;) {
302*4d131170SRobert Mustacchi next = this->next;
303*4d131170SRobert Mustacchi flags = htmltags[this->tag].flags;
304*4d131170SRobert Mustacchi if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
305*4d131170SRobert Mustacchi print_ctag(h, this);
306*4d131170SRobert Mustacchi if ((flags & HTML_INPHRASE) == 0)
307cec8643bSMichal Nowak break;
308*4d131170SRobert Mustacchi this = next;
309cec8643bSMichal Nowak }
310cec8643bSMichal Nowak }
311cec8643bSMichal Nowak
312cec8643bSMichal Nowak /*
313cec8643bSMichal Nowak * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
314cec8643bSMichal Nowak * TOKEN_NONE does not switch. The old mode is returned.
315cec8643bSMichal Nowak */
316cec8643bSMichal Nowak enum roff_tok
html_fillmode(struct html * h,enum roff_tok want)317cec8643bSMichal Nowak html_fillmode(struct html *h, enum roff_tok want)
318cec8643bSMichal Nowak {
319cec8643bSMichal Nowak struct tag *t;
320cec8643bSMichal Nowak enum roff_tok had;
321cec8643bSMichal Nowak
322cec8643bSMichal Nowak for (t = h->tag; t != NULL; t = t->next)
323cec8643bSMichal Nowak if (t->tag == TAG_PRE)
324cec8643bSMichal Nowak break;
325cec8643bSMichal Nowak
326cec8643bSMichal Nowak had = t == NULL ? ROFF_fi : ROFF_nf;
327cec8643bSMichal Nowak
328cec8643bSMichal Nowak if (want != had) {
329cec8643bSMichal Nowak switch (want) {
330cec8643bSMichal Nowak case ROFF_fi:
331cec8643bSMichal Nowak print_tagq(h, t);
332cec8643bSMichal Nowak break;
333cec8643bSMichal Nowak case ROFF_nf:
334cec8643bSMichal Nowak html_close_paragraph(h);
335cec8643bSMichal Nowak print_otag(h, TAG_PRE, "");
336cec8643bSMichal Nowak break;
337cec8643bSMichal Nowak case TOKEN_NONE:
338cec8643bSMichal Nowak break;
339cec8643bSMichal Nowak default:
340cec8643bSMichal Nowak abort();
341cec8643bSMichal Nowak }
342cec8643bSMichal Nowak }
343cec8643bSMichal Nowak return had;
344cec8643bSMichal Nowak }
345cec8643bSMichal Nowak
346*4d131170SRobert Mustacchi /*
347*4d131170SRobert Mustacchi * Allocate a string to be used for the "id=" attribute of an HTML
348*4d131170SRobert Mustacchi * element and/or as a segment identifier for a URI in an <a> element.
349*4d131170SRobert Mustacchi * The function may fail and return NULL if the node lacks text data
350*4d131170SRobert Mustacchi * to create the attribute from.
351*4d131170SRobert Mustacchi * The caller is responsible for free(3)ing the returned string.
352*4d131170SRobert Mustacchi *
353*4d131170SRobert Mustacchi * If the "unique" argument is non-zero, the "id_unique" ohash table
354*4d131170SRobert Mustacchi * is used for de-duplication. If the "unique" argument is 1,
355*4d131170SRobert Mustacchi * it is the first time the function is called for this tag and
356*4d131170SRobert Mustacchi * location, so if an ordinal suffix is needed, it is incremented.
357*4d131170SRobert Mustacchi * If the "unique" argument is 2, it is the second time the function
358*4d131170SRobert Mustacchi * is called for this tag and location, so the ordinal suffix
359*4d131170SRobert Mustacchi * remains unchanged.
360*4d131170SRobert Mustacchi */
361c66b8046SYuri Pankov char *
html_make_id(const struct roff_node * n,int unique)3626640c13bSYuri Pankov html_make_id(const struct roff_node *n, int unique)
363c66b8046SYuri Pankov {
364c66b8046SYuri Pankov const struct roff_node *nch;
365*4d131170SRobert Mustacchi struct id_entry *entry;
366*4d131170SRobert Mustacchi char *buf, *cp;
367*4d131170SRobert Mustacchi size_t len;
3686640c13bSYuri Pankov unsigned int slot;
369c66b8046SYuri Pankov
370*4d131170SRobert Mustacchi if (n->tag != NULL)
371*4d131170SRobert Mustacchi buf = mandoc_strdup(n->tag);
372*4d131170SRobert Mustacchi else {
373*4d131170SRobert Mustacchi switch (n->tok) {
374*4d131170SRobert Mustacchi case MDOC_Sh:
375*4d131170SRobert Mustacchi case MDOC_Ss:
376*4d131170SRobert Mustacchi case MDOC_Sx:
377*4d131170SRobert Mustacchi case MAN_SH:
378*4d131170SRobert Mustacchi case MAN_SS:
379*4d131170SRobert Mustacchi for (nch = n->child; nch != NULL; nch = nch->next)
380*4d131170SRobert Mustacchi if (nch->type != ROFFT_TEXT)
381*4d131170SRobert Mustacchi return NULL;
382*4d131170SRobert Mustacchi buf = NULL;
383*4d131170SRobert Mustacchi deroff(&buf, n);
384*4d131170SRobert Mustacchi if (buf == NULL)
385*4d131170SRobert Mustacchi return NULL;
386*4d131170SRobert Mustacchi break;
387*4d131170SRobert Mustacchi default:
388*4d131170SRobert Mustacchi if (n->child == NULL || n->child->type != ROFFT_TEXT)
389*4d131170SRobert Mustacchi return NULL;
390*4d131170SRobert Mustacchi buf = mandoc_strdup(n->child->string);
391*4d131170SRobert Mustacchi break;
392*4d131170SRobert Mustacchi }
393*4d131170SRobert Mustacchi }
394c66b8046SYuri Pankov
3956640c13bSYuri Pankov /*
3966640c13bSYuri Pankov * In ID attributes, only use ASCII characters that are
3976640c13bSYuri Pankov * permitted in URL-fragment strings according to the
3986640c13bSYuri Pankov * explicit list at:
3996640c13bSYuri Pankov * https://url.spec.whatwg.org/#url-fragment-string
400*4d131170SRobert Mustacchi * In addition, reserve '~' for ordinal suffixes.
4016640c13bSYuri Pankov */
402c66b8046SYuri Pankov
403c66b8046SYuri Pankov for (cp = buf; *cp != '\0'; cp++)
4046640c13bSYuri Pankov if (isalnum((unsigned char)*cp) == 0 &&
405*4d131170SRobert Mustacchi strchr("!$&'()*+,-./:;=?@_", *cp) == NULL)
406c66b8046SYuri Pankov *cp = '_';
407c66b8046SYuri Pankov
4086640c13bSYuri Pankov if (unique == 0)
4096640c13bSYuri Pankov return buf;
4106640c13bSYuri Pankov
4116640c13bSYuri Pankov /* Avoid duplicate HTML id= attributes. */
4126640c13bSYuri Pankov
4136640c13bSYuri Pankov slot = ohash_qlookup(&id_unique, buf);
414*4d131170SRobert Mustacchi if ((entry = ohash_find(&id_unique, slot)) == NULL) {
415*4d131170SRobert Mustacchi len = strlen(buf) + 1;
416*4d131170SRobert Mustacchi entry = mandoc_malloc(sizeof(*entry) + len);
417*4d131170SRobert Mustacchi entry->ord = 1;
418*4d131170SRobert Mustacchi memcpy(entry->id, buf, len);
419*4d131170SRobert Mustacchi ohash_insert(&id_unique, slot, entry);
420*4d131170SRobert Mustacchi } else if (unique == 1)
421*4d131170SRobert Mustacchi entry->ord++;
422*4d131170SRobert Mustacchi
423*4d131170SRobert Mustacchi if (entry->ord > 1) {
424*4d131170SRobert Mustacchi cp = buf;
425*4d131170SRobert Mustacchi mandoc_asprintf(&buf, "%s~%d", cp, entry->ord);
426*4d131170SRobert Mustacchi free(cp);
42795c635efSGarrett D'Amore }
4286640c13bSYuri Pankov return buf;
42995c635efSGarrett D'Amore }
43095c635efSGarrett D'Amore
431260e9a87SYuri Pankov static int
print_escape(struct html * h,char c)432a40ea1a7SYuri Pankov print_escape(struct html *h, char c)
433260e9a87SYuri Pankov {
434260e9a87SYuri Pankov
435260e9a87SYuri Pankov switch (c) {
436260e9a87SYuri Pankov case '<':
437a40ea1a7SYuri Pankov print_word(h, "<");
438260e9a87SYuri Pankov break;
439260e9a87SYuri Pankov case '>':
440a40ea1a7SYuri Pankov print_word(h, ">");
441260e9a87SYuri Pankov break;
442260e9a87SYuri Pankov case '&':
443a40ea1a7SYuri Pankov print_word(h, "&");
444260e9a87SYuri Pankov break;
445260e9a87SYuri Pankov case '"':
446a40ea1a7SYuri Pankov print_word(h, """);
447260e9a87SYuri Pankov break;
448260e9a87SYuri Pankov case ASCII_NBRSP:
449a40ea1a7SYuri Pankov print_word(h, " ");
450260e9a87SYuri Pankov break;
451260e9a87SYuri Pankov case ASCII_HYPH:
452a40ea1a7SYuri Pankov print_byte(h, '-');
453371584c2SYuri Pankov break;
454260e9a87SYuri Pankov case ASCII_BREAK:
455260e9a87SYuri Pankov break;
456260e9a87SYuri Pankov default:
457371584c2SYuri Pankov return 0;
458260e9a87SYuri Pankov }
459371584c2SYuri Pankov return 1;
460260e9a87SYuri Pankov }
461260e9a87SYuri Pankov
46295c635efSGarrett D'Amore static int
print_encode(struct html * h,const char * p,const char * pend,int norecurse)463a40ea1a7SYuri Pankov print_encode(struct html *h, const char *p, const char *pend, int norecurse)
46495c635efSGarrett D'Amore {
465a40ea1a7SYuri Pankov char numbuf[16];
46695c635efSGarrett D'Amore const char *seq;
467c66b8046SYuri Pankov size_t sz;
468c66b8046SYuri Pankov int c, len, breakline, nospace;
46995c635efSGarrett D'Amore enum mandoc_esc esc;
470c66b8046SYuri Pankov static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
471260e9a87SYuri Pankov ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
47295c635efSGarrett D'Amore
473a40ea1a7SYuri Pankov if (pend == NULL)
474a40ea1a7SYuri Pankov pend = strchr(p, '\0');
475a40ea1a7SYuri Pankov
476c66b8046SYuri Pankov breakline = 0;
47795c635efSGarrett D'Amore nospace = 0;
47895c635efSGarrett D'Amore
479a40ea1a7SYuri Pankov while (p < pend) {
480698f87a4SGarrett D'Amore if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
481698f87a4SGarrett D'Amore h->flags &= ~HTML_SKIPCHAR;
482698f87a4SGarrett D'Amore p++;
483698f87a4SGarrett D'Amore continue;
484698f87a4SGarrett D'Amore }
485698f87a4SGarrett D'Amore
486a40ea1a7SYuri Pankov for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
487c66b8046SYuri Pankov print_byte(h, *p);
488c66b8046SYuri Pankov
489c66b8046SYuri Pankov if (breakline &&
490c66b8046SYuri Pankov (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
491cec8643bSMichal Nowak print_otag(h, TAG_BR, "");
492c66b8046SYuri Pankov breakline = 0;
493c66b8046SYuri Pankov while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
494c66b8046SYuri Pankov p++;
495c66b8046SYuri Pankov continue;
496c66b8046SYuri Pankov }
49795c635efSGarrett D'Amore
498a40ea1a7SYuri Pankov if (p >= pend)
49995c635efSGarrett D'Amore break;
50095c635efSGarrett D'Amore
501c66b8046SYuri Pankov if (*p == ' ') {
502c66b8046SYuri Pankov print_endword(h);
503c66b8046SYuri Pankov p++;
504c66b8046SYuri Pankov continue;
505c66b8046SYuri Pankov }
506c66b8046SYuri Pankov
507a40ea1a7SYuri Pankov if (print_escape(h, *p++))
50895c635efSGarrett D'Amore continue;
50995c635efSGarrett D'Amore
51095c635efSGarrett D'Amore esc = mandoc_escape(&p, &seq, &len);
511698f87a4SGarrett D'Amore switch (esc) {
512260e9a87SYuri Pankov case ESCAPE_FONT:
513260e9a87SYuri Pankov case ESCAPE_FONTPREV:
514260e9a87SYuri Pankov case ESCAPE_FONTBOLD:
515260e9a87SYuri Pankov case ESCAPE_FONTITALIC:
516260e9a87SYuri Pankov case ESCAPE_FONTBI:
517260e9a87SYuri Pankov case ESCAPE_FONTROMAN:
518*4d131170SRobert Mustacchi case ESCAPE_FONTCR:
519*4d131170SRobert Mustacchi case ESCAPE_FONTCB:
520*4d131170SRobert Mustacchi case ESCAPE_FONTCI:
521cec8643bSMichal Nowak if (0 == norecurse) {
522cec8643bSMichal Nowak h->flags |= HTML_NOSPACE;
523*4d131170SRobert Mustacchi if (html_setfont(h, esc))
524*4d131170SRobert Mustacchi print_metaf(h);
525cec8643bSMichal Nowak h->flags &= ~HTML_NOSPACE;
526cec8643bSMichal Nowak }
527698f87a4SGarrett D'Amore continue;
528260e9a87SYuri Pankov case ESCAPE_SKIPCHAR:
529698f87a4SGarrett D'Amore h->flags |= HTML_SKIPCHAR;
530698f87a4SGarrett D'Amore continue;
531cec8643bSMichal Nowak case ESCAPE_ERROR:
532cec8643bSMichal Nowak continue;
533698f87a4SGarrett D'Amore default:
534698f87a4SGarrett D'Amore break;
535698f87a4SGarrett D'Amore }
536698f87a4SGarrett D'Amore
537698f87a4SGarrett D'Amore if (h->flags & HTML_SKIPCHAR) {
538698f87a4SGarrett D'Amore h->flags &= ~HTML_SKIPCHAR;
539698f87a4SGarrett D'Amore continue;
540698f87a4SGarrett D'Amore }
541698f87a4SGarrett D'Amore
54295c635efSGarrett D'Amore switch (esc) {
543260e9a87SYuri Pankov case ESCAPE_UNICODE:
544260e9a87SYuri Pankov /* Skip past "u" header. */
54595c635efSGarrett D'Amore c = mchars_num2uc(seq + 1, len - 1);
54695c635efSGarrett D'Amore break;
547260e9a87SYuri Pankov case ESCAPE_NUMBERED:
54895c635efSGarrett D'Amore c = mchars_num2char(seq, len);
549260e9a87SYuri Pankov if (c < 0)
550260e9a87SYuri Pankov continue;
55195c635efSGarrett D'Amore break;
552260e9a87SYuri Pankov case ESCAPE_SPECIAL:
553371584c2SYuri Pankov c = mchars_spec2cp(seq, len);
554260e9a87SYuri Pankov if (c <= 0)
555260e9a87SYuri Pankov continue;
55695c635efSGarrett D'Amore break;
557cec8643bSMichal Nowak case ESCAPE_UNDEF:
558cec8643bSMichal Nowak c = *seq;
559cec8643bSMichal Nowak break;
560cec8643bSMichal Nowak case ESCAPE_DEVICE:
561cec8643bSMichal Nowak print_word(h, "html");
562cec8643bSMichal Nowak continue;
563c66b8046SYuri Pankov case ESCAPE_BREAK:
564c66b8046SYuri Pankov breakline = 1;
565c66b8046SYuri Pankov continue;
566260e9a87SYuri Pankov case ESCAPE_NOSPACE:
56795c635efSGarrett D'Amore if ('\0' == *p)
56895c635efSGarrett D'Amore nospace = 1;
569260e9a87SYuri Pankov continue;
570260e9a87SYuri Pankov case ESCAPE_OVERSTRIKE:
571260e9a87SYuri Pankov if (len == 0)
572260e9a87SYuri Pankov continue;
573260e9a87SYuri Pankov c = seq[len - 1];
57495c635efSGarrett D'Amore break;
57595c635efSGarrett D'Amore default:
576260e9a87SYuri Pankov continue;
57795c635efSGarrett D'Amore }
578260e9a87SYuri Pankov if ((c < 0x20 && c != 0x09) ||
579260e9a87SYuri Pankov (c > 0x7E && c < 0xA0))
580260e9a87SYuri Pankov c = 0xFFFD;
581a40ea1a7SYuri Pankov if (c > 0x7E) {
582c66b8046SYuri Pankov (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
583a40ea1a7SYuri Pankov print_word(h, numbuf);
584a40ea1a7SYuri Pankov } else if (print_escape(h, c) == 0)
585a40ea1a7SYuri Pankov print_byte(h, c);
58695c635efSGarrett D'Amore }
58795c635efSGarrett D'Amore
588371584c2SYuri Pankov return nospace;
58995c635efSGarrett D'Amore }
59095c635efSGarrett D'Amore
59195c635efSGarrett D'Amore static void
print_href(struct html * h,const char * name,const char * sec,int man)592a40ea1a7SYuri Pankov print_href(struct html *h, const char *name, const char *sec, int man)
59395c635efSGarrett D'Amore {
594cec8643bSMichal Nowak struct stat sb;
595a40ea1a7SYuri Pankov const char *p, *pp;
596cec8643bSMichal Nowak char *filename;
597cec8643bSMichal Nowak
598cec8643bSMichal Nowak if (man) {
599cec8643bSMichal Nowak pp = h->base_man1;
600cec8643bSMichal Nowak if (h->base_man2 != NULL) {
601cec8643bSMichal Nowak mandoc_asprintf(&filename, "%s.%s", name, sec);
602cec8643bSMichal Nowak if (stat(filename, &sb) == -1)
603cec8643bSMichal Nowak pp = h->base_man2;
604cec8643bSMichal Nowak free(filename);
605cec8643bSMichal Nowak }
606cec8643bSMichal Nowak } else
607cec8643bSMichal Nowak pp = h->base_includes;
608a40ea1a7SYuri Pankov
609a40ea1a7SYuri Pankov while ((p = strchr(pp, '%')) != NULL) {
610a40ea1a7SYuri Pankov print_encode(h, pp, p, 1);
611a40ea1a7SYuri Pankov if (man && p[1] == 'S') {
612a40ea1a7SYuri Pankov if (sec == NULL)
613a40ea1a7SYuri Pankov print_byte(h, '1');
614a40ea1a7SYuri Pankov else
615a40ea1a7SYuri Pankov print_encode(h, sec, NULL, 1);
616a40ea1a7SYuri Pankov } else if ((man && p[1] == 'N') ||
617a40ea1a7SYuri Pankov (man == 0 && p[1] == 'I'))
618a40ea1a7SYuri Pankov print_encode(h, name, NULL, 1);
619a40ea1a7SYuri Pankov else
620a40ea1a7SYuri Pankov print_encode(h, p, p + 2, 1);
621a40ea1a7SYuri Pankov pp = p + 2;
622a40ea1a7SYuri Pankov }
623a40ea1a7SYuri Pankov if (*pp != '\0')
624a40ea1a7SYuri Pankov print_encode(h, pp, NULL, 1);
62595c635efSGarrett D'Amore }
62695c635efSGarrett D'Amore
62795c635efSGarrett D'Amore struct tag *
print_otag(struct html * h,enum htmltag tag,const char * fmt,...)628a40ea1a7SYuri Pankov print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
62995c635efSGarrett D'Amore {
630a40ea1a7SYuri Pankov va_list ap;
63195c635efSGarrett D'Amore struct tag *t;
632a40ea1a7SYuri Pankov const char *attr;
633a40ea1a7SYuri Pankov char *arg1, *arg2;
634cec8643bSMichal Nowak int style_written, tflags;
635a40ea1a7SYuri Pankov
636a40ea1a7SYuri Pankov tflags = htmltags[tag].flags;
63795c635efSGarrett D'Amore
638*4d131170SRobert Mustacchi /* Flow content is not allowed in phrasing context. */
639*4d131170SRobert Mustacchi
640*4d131170SRobert Mustacchi if ((tflags & HTML_INPHRASE) == 0) {
641*4d131170SRobert Mustacchi for (t = h->tag; t != NULL; t = t->next) {
642*4d131170SRobert Mustacchi if (t->closed)
643*4d131170SRobert Mustacchi continue;
644*4d131170SRobert Mustacchi assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
645*4d131170SRobert Mustacchi break;
646*4d131170SRobert Mustacchi }
647*4d131170SRobert Mustacchi
648*4d131170SRobert Mustacchi /*
649*4d131170SRobert Mustacchi * Always wrap phrasing elements in a paragraph
650*4d131170SRobert Mustacchi * unless already contained in some flow container;
651*4d131170SRobert Mustacchi * never put them directly into a section.
652*4d131170SRobert Mustacchi */
653*4d131170SRobert Mustacchi
654*4d131170SRobert Mustacchi } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
655*4d131170SRobert Mustacchi print_otag(h, TAG_P, "c", "Pp");
656*4d131170SRobert Mustacchi
657a40ea1a7SYuri Pankov /* Push this tag onto the stack of open scopes. */
65895c635efSGarrett D'Amore
659a40ea1a7SYuri Pankov if ((tflags & HTML_NOSTACK) == 0) {
66095c635efSGarrett D'Amore t = mandoc_malloc(sizeof(struct tag));
66195c635efSGarrett D'Amore t->tag = tag;
662a40ea1a7SYuri Pankov t->next = h->tag;
663cec8643bSMichal Nowak t->refcnt = 0;
664cec8643bSMichal Nowak t->closed = 0;
665a40ea1a7SYuri Pankov h->tag = t;
66695c635efSGarrett D'Amore } else
66795c635efSGarrett D'Amore t = NULL;
66895c635efSGarrett D'Amore
669a40ea1a7SYuri Pankov if (tflags & HTML_NLBEFORE)
670a40ea1a7SYuri Pankov print_endline(h);
671a40ea1a7SYuri Pankov if (h->col == 0)
672a40ea1a7SYuri Pankov print_indent(h);
673a40ea1a7SYuri Pankov else if ((h->flags & HTML_NOSPACE) == 0) {
674a40ea1a7SYuri Pankov if (h->flags & HTML_KEEP)
675c66b8046SYuri Pankov print_word(h, " ");
676a40ea1a7SYuri Pankov else {
677a40ea1a7SYuri Pankov if (h->flags & HTML_PREKEEP)
678a40ea1a7SYuri Pankov h->flags |= HTML_KEEP;
679a40ea1a7SYuri Pankov print_endword(h);
68095c635efSGarrett D'Amore }
681a40ea1a7SYuri Pankov }
68295c635efSGarrett D'Amore
68395c635efSGarrett D'Amore if ( ! (h->flags & HTML_NONOSPACE))
68495c635efSGarrett D'Amore h->flags &= ~HTML_NOSPACE;
68595c635efSGarrett D'Amore else
68695c635efSGarrett D'Amore h->flags |= HTML_NOSPACE;
68795c635efSGarrett D'Amore
68895c635efSGarrett D'Amore /* Print out the tag name and attributes. */
68995c635efSGarrett D'Amore
690a40ea1a7SYuri Pankov print_byte(h, '<');
691a40ea1a7SYuri Pankov print_word(h, htmltags[tag].name);
692a40ea1a7SYuri Pankov
693a40ea1a7SYuri Pankov va_start(ap, fmt);
694a40ea1a7SYuri Pankov
695cec8643bSMichal Nowak while (*fmt != '\0' && *fmt != 's') {
696a40ea1a7SYuri Pankov
6976640c13bSYuri Pankov /* Parse attributes and arguments. */
698a40ea1a7SYuri Pankov
699a40ea1a7SYuri Pankov arg1 = va_arg(ap, char *);
7006640c13bSYuri Pankov arg2 = NULL;
701a40ea1a7SYuri Pankov switch (*fmt++) {
702a40ea1a7SYuri Pankov case 'c':
703a40ea1a7SYuri Pankov attr = "class";
704a40ea1a7SYuri Pankov break;
705a40ea1a7SYuri Pankov case 'h':
706a40ea1a7SYuri Pankov attr = "href";
707a40ea1a7SYuri Pankov break;
708a40ea1a7SYuri Pankov case 'i':
709a40ea1a7SYuri Pankov attr = "id";
710a40ea1a7SYuri Pankov break;
711a40ea1a7SYuri Pankov case '?':
712a40ea1a7SYuri Pankov attr = arg1;
713a40ea1a7SYuri Pankov arg1 = va_arg(ap, char *);
714a40ea1a7SYuri Pankov break;
715a40ea1a7SYuri Pankov default:
716a40ea1a7SYuri Pankov abort();
717a40ea1a7SYuri Pankov }
718a40ea1a7SYuri Pankov if (*fmt == 'M')
719a40ea1a7SYuri Pankov arg2 = va_arg(ap, char *);
720a40ea1a7SYuri Pankov if (arg1 == NULL)
721a40ea1a7SYuri Pankov continue;
722a40ea1a7SYuri Pankov
7236640c13bSYuri Pankov /* Print the attributes. */
724a40ea1a7SYuri Pankov
725a40ea1a7SYuri Pankov print_byte(h, ' ');
726a40ea1a7SYuri Pankov print_word(h, attr);
727a40ea1a7SYuri Pankov print_byte(h, '=');
728a40ea1a7SYuri Pankov print_byte(h, '"');
729a40ea1a7SYuri Pankov switch (*fmt) {
730a40ea1a7SYuri Pankov case 'I':
731a40ea1a7SYuri Pankov print_href(h, arg1, NULL, 0);
732a40ea1a7SYuri Pankov fmt++;
733a40ea1a7SYuri Pankov break;
734c66b8046SYuri Pankov case 'M':
735c66b8046SYuri Pankov print_href(h, arg1, arg2, 1);
736c66b8046SYuri Pankov fmt++;
737c66b8046SYuri Pankov break;
738a40ea1a7SYuri Pankov case 'R':
739a40ea1a7SYuri Pankov print_byte(h, '#');
740c66b8046SYuri Pankov print_encode(h, arg1, NULL, 1);
741c66b8046SYuri Pankov fmt++;
742c66b8046SYuri Pankov break;
743a40ea1a7SYuri Pankov default:
744cec8643bSMichal Nowak print_encode(h, arg1, NULL, 1);
745a40ea1a7SYuri Pankov break;
746a40ea1a7SYuri Pankov }
747a40ea1a7SYuri Pankov print_byte(h, '"');
7486640c13bSYuri Pankov }
749cec8643bSMichal Nowak
750cec8643bSMichal Nowak style_written = 0;
751cec8643bSMichal Nowak while (*fmt++ == 's') {
752cec8643bSMichal Nowak arg1 = va_arg(ap, char *);
753cec8643bSMichal Nowak arg2 = va_arg(ap, char *);
754cec8643bSMichal Nowak if (arg2 == NULL)
755cec8643bSMichal Nowak continue;
756cec8643bSMichal Nowak print_byte(h, ' ');
757cec8643bSMichal Nowak if (style_written == 0) {
758cec8643bSMichal Nowak print_word(h, "style=\"");
759cec8643bSMichal Nowak style_written = 1;
760cec8643bSMichal Nowak }
761cec8643bSMichal Nowak print_word(h, arg1);
762cec8643bSMichal Nowak print_byte(h, ':');
763cec8643bSMichal Nowak print_byte(h, ' ');
764cec8643bSMichal Nowak print_word(h, arg2);
765cec8643bSMichal Nowak print_byte(h, ';');
766cec8643bSMichal Nowak }
767cec8643bSMichal Nowak if (style_written)
768cec8643bSMichal Nowak print_byte(h, '"');
769cec8643bSMichal Nowak
770a40ea1a7SYuri Pankov va_end(ap);
77195c635efSGarrett D'Amore
772260e9a87SYuri Pankov /* Accommodate for "well-formed" singleton escaping. */
77395c635efSGarrett D'Amore
774*4d131170SRobert Mustacchi if (htmltags[tag].flags & HTML_NOSTACK)
775a40ea1a7SYuri Pankov print_byte(h, '/');
77695c635efSGarrett D'Amore
777a40ea1a7SYuri Pankov print_byte(h, '>');
77895c635efSGarrett D'Amore
779a40ea1a7SYuri Pankov if (tflags & HTML_NLBEGIN)
780a40ea1a7SYuri Pankov print_endline(h);
781a40ea1a7SYuri Pankov else
782a40ea1a7SYuri Pankov h->flags |= HTML_NOSPACE;
78395c635efSGarrett D'Amore
784a40ea1a7SYuri Pankov if (tflags & HTML_INDENT)
785a40ea1a7SYuri Pankov h->indent++;
786a40ea1a7SYuri Pankov if (tflags & HTML_NOINDENT)
787a40ea1a7SYuri Pankov h->noindent++;
78895c635efSGarrett D'Amore
789371584c2SYuri Pankov return t;
79095c635efSGarrett D'Amore }
79195c635efSGarrett D'Amore
792*4d131170SRobert Mustacchi /*
793*4d131170SRobert Mustacchi * Print an element with an optional "id=" attribute.
794*4d131170SRobert Mustacchi * If the element has phrasing content and an "id=" attribute,
795*4d131170SRobert Mustacchi * also add a permalink: outside if it can be in phrasing context,
796*4d131170SRobert Mustacchi * inside otherwise.
797*4d131170SRobert Mustacchi */
798*4d131170SRobert Mustacchi struct tag *
print_otag_id(struct html * h,enum htmltag elemtype,const char * cattr,struct roff_node * n)799*4d131170SRobert Mustacchi print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
800*4d131170SRobert Mustacchi struct roff_node *n)
801*4d131170SRobert Mustacchi {
802*4d131170SRobert Mustacchi struct roff_node *nch;
803*4d131170SRobert Mustacchi struct tag *ret, *t;
804*4d131170SRobert Mustacchi char *id, *href;
805*4d131170SRobert Mustacchi
806*4d131170SRobert Mustacchi ret = NULL;
807*4d131170SRobert Mustacchi id = href = NULL;
808*4d131170SRobert Mustacchi if (n->flags & NODE_ID)
809*4d131170SRobert Mustacchi id = html_make_id(n, 1);
810*4d131170SRobert Mustacchi if (n->flags & NODE_HREF)
811*4d131170SRobert Mustacchi href = id == NULL ? html_make_id(n, 2) : id;
812*4d131170SRobert Mustacchi if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE)
813*4d131170SRobert Mustacchi ret = print_otag(h, TAG_A, "chR", "permalink", href);
814*4d131170SRobert Mustacchi t = print_otag(h, elemtype, "ci", cattr, id);
815*4d131170SRobert Mustacchi if (ret == NULL) {
816*4d131170SRobert Mustacchi ret = t;
817*4d131170SRobert Mustacchi if (href != NULL && (nch = n->child) != NULL) {
818*4d131170SRobert Mustacchi /* man(7) is safe, it tags phrasing content only. */
819*4d131170SRobert Mustacchi if (n->tok > MDOC_MAX ||
820*4d131170SRobert Mustacchi htmltags[elemtype].flags & HTML_TOPHRASE)
821*4d131170SRobert Mustacchi nch = NULL;
822*4d131170SRobert Mustacchi else /* For mdoc(7), beware of nested blocks. */
823*4d131170SRobert Mustacchi while (nch != NULL && nch->type == ROFFT_TEXT)
824*4d131170SRobert Mustacchi nch = nch->next;
825*4d131170SRobert Mustacchi if (nch == NULL)
826*4d131170SRobert Mustacchi print_otag(h, TAG_A, "chR", "permalink", href);
827*4d131170SRobert Mustacchi }
828*4d131170SRobert Mustacchi }
829*4d131170SRobert Mustacchi free(id);
830*4d131170SRobert Mustacchi if (id == NULL)
831*4d131170SRobert Mustacchi free(href);
832*4d131170SRobert Mustacchi return ret;
833*4d131170SRobert Mustacchi }
834*4d131170SRobert Mustacchi
83595c635efSGarrett D'Amore static void
print_ctag(struct html * h,struct tag * tag)836260e9a87SYuri Pankov print_ctag(struct html *h, struct tag *tag)
83795c635efSGarrett D'Amore {
838a40ea1a7SYuri Pankov int tflags;
839260e9a87SYuri Pankov
840cec8643bSMichal Nowak if (tag->closed == 0) {
841cec8643bSMichal Nowak tag->closed = 1;
842cec8643bSMichal Nowak if (tag == h->metaf)
843cec8643bSMichal Nowak h->metaf = NULL;
844cec8643bSMichal Nowak if (tag == h->tblt)
845cec8643bSMichal Nowak h->tblt = NULL;
846cec8643bSMichal Nowak
847cec8643bSMichal Nowak tflags = htmltags[tag->tag].flags;
848cec8643bSMichal Nowak if (tflags & HTML_INDENT)
849cec8643bSMichal Nowak h->indent--;
850cec8643bSMichal Nowak if (tflags & HTML_NOINDENT)
851cec8643bSMichal Nowak h->noindent--;
852cec8643bSMichal Nowak if (tflags & HTML_NLEND)
853cec8643bSMichal Nowak print_endline(h);
854cec8643bSMichal Nowak print_indent(h);
855cec8643bSMichal Nowak print_byte(h, '<');
856cec8643bSMichal Nowak print_byte(h, '/');
857cec8643bSMichal Nowak print_word(h, htmltags[tag->tag].name);
858cec8643bSMichal Nowak print_byte(h, '>');
859cec8643bSMichal Nowak if (tflags & HTML_NLAFTER)
860cec8643bSMichal Nowak print_endline(h);
861cec8643bSMichal Nowak }
862cec8643bSMichal Nowak if (tag->refcnt == 0) {
863cec8643bSMichal Nowak h->tag = tag->next;
864cec8643bSMichal Nowak free(tag);
865cec8643bSMichal Nowak }
86695c635efSGarrett D'Amore }
86795c635efSGarrett D'Amore
86895c635efSGarrett D'Amore void
print_gen_decls(struct html * h)86995c635efSGarrett D'Amore print_gen_decls(struct html *h)
87095c635efSGarrett D'Amore {
871a40ea1a7SYuri Pankov print_word(h, "<!DOCTYPE html>");
872a40ea1a7SYuri Pankov print_endline(h);
87395c635efSGarrett D'Amore }
87495c635efSGarrett D'Amore
8756640c13bSYuri Pankov void
print_gen_comment(struct html * h,struct roff_node * n)8766640c13bSYuri Pankov print_gen_comment(struct html *h, struct roff_node *n)
8776640c13bSYuri Pankov {
8786640c13bSYuri Pankov int wantblank;
8796640c13bSYuri Pankov
8806640c13bSYuri Pankov print_word(h, "<!-- This is an automatically generated file."
8816640c13bSYuri Pankov " Do not edit.");
8826640c13bSYuri Pankov h->indent = 1;
8836640c13bSYuri Pankov wantblank = 0;
8846640c13bSYuri Pankov while (n != NULL && n->type == ROFFT_COMMENT) {
8856640c13bSYuri Pankov if (strstr(n->string, "-->") == NULL &&
8866640c13bSYuri Pankov (wantblank || *n->string != '\0')) {
8876640c13bSYuri Pankov print_endline(h);
8886640c13bSYuri Pankov print_indent(h);
8896640c13bSYuri Pankov print_word(h, n->string);
8906640c13bSYuri Pankov wantblank = *n->string != '\0';
8916640c13bSYuri Pankov }
8926640c13bSYuri Pankov n = n->next;
8936640c13bSYuri Pankov }
8946640c13bSYuri Pankov if (wantblank)
8956640c13bSYuri Pankov print_endline(h);
8966640c13bSYuri Pankov print_word(h, " -->");
8976640c13bSYuri Pankov print_endline(h);
8986640c13bSYuri Pankov h->indent = 0;
8996640c13bSYuri Pankov }
9006640c13bSYuri Pankov
90195c635efSGarrett D'Amore void
print_text(struct html * h,const char * word)90295c635efSGarrett D'Amore print_text(struct html *h, const char *word)
90395c635efSGarrett D'Amore {
904*4d131170SRobert Mustacchi print_tagged_text(h, word, NULL);
905*4d131170SRobert Mustacchi }
906*4d131170SRobert Mustacchi
907*4d131170SRobert Mustacchi void
print_tagged_text(struct html * h,const char * word,struct roff_node * n)908*4d131170SRobert Mustacchi print_tagged_text(struct html *h, const char *word, struct roff_node *n)
909*4d131170SRobert Mustacchi {
910*4d131170SRobert Mustacchi struct tag *t;
911*4d131170SRobert Mustacchi char *href;
912*4d131170SRobert Mustacchi
913*4d131170SRobert Mustacchi /*
914*4d131170SRobert Mustacchi * Always wrap text in a paragraph unless already contained in
915*4d131170SRobert Mustacchi * some flow container; never put it directly into a section.
916*4d131170SRobert Mustacchi */
917*4d131170SRobert Mustacchi
918*4d131170SRobert Mustacchi if (h->tag->tag == TAG_SECTION)
919*4d131170SRobert Mustacchi print_otag(h, TAG_P, "c", "Pp");
920*4d131170SRobert Mustacchi
921*4d131170SRobert Mustacchi /* Output whitespace before this text? */
922*4d131170SRobert Mustacchi
923a40ea1a7SYuri Pankov if (h->col && (h->flags & HTML_NOSPACE) == 0) {
92495c635efSGarrett D'Amore if ( ! (HTML_KEEP & h->flags)) {
92595c635efSGarrett D'Amore if (HTML_PREKEEP & h->flags)
92695c635efSGarrett D'Amore h->flags |= HTML_KEEP;
927a40ea1a7SYuri Pankov print_endword(h);
92895c635efSGarrett D'Amore } else
929c66b8046SYuri Pankov print_word(h, " ");
93095c635efSGarrett D'Amore }
93195c635efSGarrett D'Amore
932*4d131170SRobert Mustacchi /*
933*4d131170SRobert Mustacchi * Optionally switch fonts, optionally write a permalink, then
934*4d131170SRobert Mustacchi * print the text, optionally surrounded by HTML whitespace.
935*4d131170SRobert Mustacchi */
936*4d131170SRobert Mustacchi
937*4d131170SRobert Mustacchi assert(h->metaf == NULL);
938*4d131170SRobert Mustacchi print_metaf(h);
939*4d131170SRobert Mustacchi print_indent(h);
940*4d131170SRobert Mustacchi
941*4d131170SRobert Mustacchi if (n != NULL && (href = html_make_id(n, 2)) != NULL) {
942*4d131170SRobert Mustacchi t = print_otag(h, TAG_A, "chR", "permalink", href);
943*4d131170SRobert Mustacchi free(href);
944*4d131170SRobert Mustacchi } else
945*4d131170SRobert Mustacchi t = NULL;
94695c635efSGarrett D'Amore
947a40ea1a7SYuri Pankov if ( ! print_encode(h, word, NULL, 0)) {
94895c635efSGarrett D'Amore if ( ! (h->flags & HTML_NONOSPACE))
94995c635efSGarrett D'Amore h->flags &= ~HTML_NOSPACE;
950260e9a87SYuri Pankov h->flags &= ~HTML_NONEWLINE;
95195c635efSGarrett D'Amore } else
952260e9a87SYuri Pankov h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
95395c635efSGarrett D'Amore
954*4d131170SRobert Mustacchi if (h->metaf != NULL) {
95595c635efSGarrett D'Amore print_tagq(h, h->metaf);
95695c635efSGarrett D'Amore h->metaf = NULL;
957*4d131170SRobert Mustacchi } else if (t != NULL)
958*4d131170SRobert Mustacchi print_tagq(h, t);
95995c635efSGarrett D'Amore
96095c635efSGarrett D'Amore h->flags &= ~HTML_IGNDELIM;
96195c635efSGarrett D'Amore }
96295c635efSGarrett D'Amore
96395c635efSGarrett D'Amore void
print_tagq(struct html * h,const struct tag * until)96495c635efSGarrett D'Amore print_tagq(struct html *h, const struct tag *until)
96595c635efSGarrett D'Amore {
966cec8643bSMichal Nowak struct tag *this, *next;
96795c635efSGarrett D'Amore
968cec8643bSMichal Nowak for (this = h->tag; this != NULL; this = next) {
969cec8643bSMichal Nowak next = this == until ? NULL : this->next;
970cec8643bSMichal Nowak print_ctag(h, this);
97195c635efSGarrett D'Amore }
97295c635efSGarrett D'Amore }
97395c635efSGarrett D'Amore
974cec8643bSMichal Nowak /*
975cec8643bSMichal Nowak * Close out all open elements up to but excluding suntil.
976cec8643bSMichal Nowak * Note that a paragraph just inside stays open together with it
977cec8643bSMichal Nowak * because paragraphs include subsequent phrasing content.
978cec8643bSMichal Nowak */
97995c635efSGarrett D'Amore void
print_stagq(struct html * h,const struct tag * suntil)98095c635efSGarrett D'Amore print_stagq(struct html *h, const struct tag *suntil)
98195c635efSGarrett D'Amore {
982cec8643bSMichal Nowak struct tag *this, *next;
98395c635efSGarrett D'Amore
984cec8643bSMichal Nowak for (this = h->tag; this != NULL; this = next) {
985cec8643bSMichal Nowak next = this->next;
986cec8643bSMichal Nowak if (this == suntil || (next == suntil &&
987cec8643bSMichal Nowak (this->tag == TAG_P || this->tag == TAG_PRE)))
988cec8643bSMichal Nowak break;
989cec8643bSMichal Nowak print_ctag(h, this);
99095c635efSGarrett D'Amore }
99195c635efSGarrett D'Amore }
99295c635efSGarrett D'Amore
993260e9a87SYuri Pankov
994a40ea1a7SYuri Pankov /***********************************************************************
995a40ea1a7SYuri Pankov * Low level output functions.
996a40ea1a7SYuri Pankov * They implement line breaking using a short static buffer.
997a40ea1a7SYuri Pankov ***********************************************************************/
99895c635efSGarrett D'Amore
999a40ea1a7SYuri Pankov /*
1000a40ea1a7SYuri Pankov * Buffer one HTML output byte.
1001a40ea1a7SYuri Pankov * If the buffer is full, flush and deactivate it and start a new line.
1002a40ea1a7SYuri Pankov * If the buffer is inactive, print directly.
1003a40ea1a7SYuri Pankov */
1004a40ea1a7SYuri Pankov static void
print_byte(struct html * h,char c)1005a40ea1a7SYuri Pankov print_byte(struct html *h, char c)
100695c635efSGarrett D'Amore {
1007a40ea1a7SYuri Pankov if ((h->flags & HTML_BUFFER) == 0) {
1008a40ea1a7SYuri Pankov putchar(c);
1009a40ea1a7SYuri Pankov h->col++;
1010a40ea1a7SYuri Pankov return;
1011a40ea1a7SYuri Pankov }
101295c635efSGarrett D'Amore
1013a40ea1a7SYuri Pankov if (h->col + h->bufcol < sizeof(h->buf)) {
1014a40ea1a7SYuri Pankov h->buf[h->bufcol++] = c;
1015a40ea1a7SYuri Pankov return;
1016a40ea1a7SYuri Pankov }
1017260e9a87SYuri Pankov
1018a40ea1a7SYuri Pankov putchar('\n');
1019a40ea1a7SYuri Pankov h->col = 0;
1020a40ea1a7SYuri Pankov print_indent(h);
1021a40ea1a7SYuri Pankov putchar(' ');
1022a40ea1a7SYuri Pankov putchar(' ');
1023a40ea1a7SYuri Pankov fwrite(h->buf, h->bufcol, 1, stdout);
1024a40ea1a7SYuri Pankov putchar(c);
1025a40ea1a7SYuri Pankov h->col = (h->indent + 1) * 2 + h->bufcol + 1;
1026a40ea1a7SYuri Pankov h->bufcol = 0;
1027a40ea1a7SYuri Pankov h->flags &= ~HTML_BUFFER;
102895c635efSGarrett D'Amore }
102995c635efSGarrett D'Amore
1030a40ea1a7SYuri Pankov /*
1031a40ea1a7SYuri Pankov * If something was printed on the current output line, end it.
1032a40ea1a7SYuri Pankov * Not to be called right after print_indent().
1033a40ea1a7SYuri Pankov */
103495c635efSGarrett D'Amore void
print_endline(struct html * h)1035a40ea1a7SYuri Pankov print_endline(struct html *h)
103695c635efSGarrett D'Amore {
1037a40ea1a7SYuri Pankov if (h->col == 0)
1038a40ea1a7SYuri Pankov return;
103995c635efSGarrett D'Amore
1040a40ea1a7SYuri Pankov if (h->bufcol) {
1041a40ea1a7SYuri Pankov putchar(' ');
1042a40ea1a7SYuri Pankov fwrite(h->buf, h->bufcol, 1, stdout);
1043a40ea1a7SYuri Pankov h->bufcol = 0;
1044a40ea1a7SYuri Pankov }
1045a40ea1a7SYuri Pankov putchar('\n');
1046a40ea1a7SYuri Pankov h->col = 0;
1047a40ea1a7SYuri Pankov h->flags |= HTML_NOSPACE;
1048a40ea1a7SYuri Pankov h->flags &= ~HTML_BUFFER;
104995c635efSGarrett D'Amore }
105095c635efSGarrett D'Amore
1051a40ea1a7SYuri Pankov /*
1052a40ea1a7SYuri Pankov * Flush the HTML output buffer.
1053a40ea1a7SYuri Pankov * If it is inactive, activate it.
1054a40ea1a7SYuri Pankov */
105595c635efSGarrett D'Amore static void
print_endword(struct html * h)1056a40ea1a7SYuri Pankov print_endword(struct html *h)
105795c635efSGarrett D'Amore {
1058a40ea1a7SYuri Pankov if (h->noindent) {
1059a40ea1a7SYuri Pankov print_byte(h, ' ');
1060a40ea1a7SYuri Pankov return;
1061a40ea1a7SYuri Pankov }
106295c635efSGarrett D'Amore
1063a40ea1a7SYuri Pankov if ((h->flags & HTML_BUFFER) == 0) {
1064a40ea1a7SYuri Pankov h->col++;
1065a40ea1a7SYuri Pankov h->flags |= HTML_BUFFER;
1066a40ea1a7SYuri Pankov } else if (h->bufcol) {
1067a40ea1a7SYuri Pankov putchar(' ');
1068a40ea1a7SYuri Pankov fwrite(h->buf, h->bufcol, 1, stdout);
1069a40ea1a7SYuri Pankov h->col += h->bufcol + 1;
107095c635efSGarrett D'Amore }
1071a40ea1a7SYuri Pankov h->bufcol = 0;
107295c635efSGarrett D'Amore }
107395c635efSGarrett D'Amore
1074a40ea1a7SYuri Pankov /*
1075a40ea1a7SYuri Pankov * If at the beginning of a new output line,
1076a40ea1a7SYuri Pankov * perform indentation and mark the line as containing output.
1077a40ea1a7SYuri Pankov * Make sure to really produce some output right afterwards,
1078a40ea1a7SYuri Pankov * but do not use print_otag() for producing it.
1079a40ea1a7SYuri Pankov */
1080a40ea1a7SYuri Pankov static void
print_indent(struct html * h)1081a40ea1a7SYuri Pankov print_indent(struct html *h)
108295c635efSGarrett D'Amore {
1083a40ea1a7SYuri Pankov size_t i;
108495c635efSGarrett D'Amore
1085*4d131170SRobert Mustacchi if (h->col || h->noindent)
1086a40ea1a7SYuri Pankov return;
1087260e9a87SYuri Pankov
1088*4d131170SRobert Mustacchi h->col = h->indent * 2;
1089*4d131170SRobert Mustacchi for (i = 0; i < h->col; i++)
1090*4d131170SRobert Mustacchi putchar(' ');
109195c635efSGarrett D'Amore }
109295c635efSGarrett D'Amore
1093a40ea1a7SYuri Pankov /*
1094a40ea1a7SYuri Pankov * Print or buffer some characters
1095a40ea1a7SYuri Pankov * depending on the current HTML output buffer state.
1096a40ea1a7SYuri Pankov */
1097a40ea1a7SYuri Pankov static void
print_word(struct html * h,const char * cp)1098a40ea1a7SYuri Pankov print_word(struct html *h, const char *cp)
109995c635efSGarrett D'Amore {
1100a40ea1a7SYuri Pankov while (*cp != '\0')
1101a40ea1a7SYuri Pankov print_byte(h, *cp++);
110295c635efSGarrett D'Amore }
1103