xref: /illumos-gate/usr/src/cmd/mandoc/tbl_data.c (revision 4d131170)
1*4d131170SRobert Mustacchi /*	$Id: tbl_data.c,v 1.59 2021/09/10 13:24:38 schwarze Exp $ */
295c635efSGarrett D'Amore /*
395c635efSGarrett D'Amore  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4*4d131170SRobert Mustacchi  * Copyright (c) 2011,2015,2017-2019,2021 Ingo Schwarze <schwarze@openbsd.org>
595c635efSGarrett D'Amore  *
695c635efSGarrett D'Amore  * Permission to use, copy, modify, and distribute this software for any
795c635efSGarrett D'Amore  * purpose with or without fee is hereby granted, provided that the above
895c635efSGarrett D'Amore  * copyright notice and this permission notice appear in all copies.
995c635efSGarrett D'Amore  *
1095c635efSGarrett D'Amore  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1195c635efSGarrett D'Amore  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1295c635efSGarrett D'Amore  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1395c635efSGarrett D'Amore  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1495c635efSGarrett D'Amore  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1595c635efSGarrett D'Amore  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1695c635efSGarrett D'Amore  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1795c635efSGarrett D'Amore  */
1895c635efSGarrett D'Amore #include "config.h"
19260e9a87SYuri Pankov 
20260e9a87SYuri Pankov #include <sys/types.h>
2195c635efSGarrett D'Amore 
2295c635efSGarrett D'Amore #include <assert.h>
2395c635efSGarrett D'Amore #include <ctype.h>
24*4d131170SRobert Mustacchi #include <stdint.h>
25cec8643bSMichal Nowak #include <stdio.h>
2695c635efSGarrett D'Amore #include <stdlib.h>
2795c635efSGarrett D'Amore #include <string.h>
2895c635efSGarrett D'Amore #include <time.h>
2995c635efSGarrett D'Amore 
30260e9a87SYuri Pankov #include "mandoc_aux.h"
31cec8643bSMichal Nowak #include "mandoc.h"
32cec8643bSMichal Nowak #include "tbl.h"
3395c635efSGarrett D'Amore #include "libmandoc.h"
34cec8643bSMichal Nowak #include "tbl_int.h"
3595c635efSGarrett D'Amore 
36260e9a87SYuri Pankov static	void		 getdata(struct tbl_node *, struct tbl_span *,
3795c635efSGarrett D'Amore 				int, const char *, int *);
38260e9a87SYuri Pankov static	struct tbl_span	*newspan(struct tbl_node *, int,
3995c635efSGarrett D'Amore 				struct tbl_row *);
4095c635efSGarrett D'Amore 
41260e9a87SYuri Pankov 
42260e9a87SYuri Pankov static void
getdata(struct tbl_node * tbl,struct tbl_span * dp,int ln,const char * p,int * pos)43260e9a87SYuri Pankov getdata(struct tbl_node *tbl, struct tbl_span *dp,
4495c635efSGarrett D'Amore 		int ln, const char *p, int *pos)
4595c635efSGarrett D'Amore {
46cec8643bSMichal Nowak 	struct tbl_dat	*dat, *pdat;
4795c635efSGarrett D'Amore 	struct tbl_cell	*cp;
48cec8643bSMichal Nowak 	struct tbl_span	*pdp;
49*4d131170SRobert Mustacchi 	const char	*ccp;
50*4d131170SRobert Mustacchi 	int		 startpos, endpos;
5195c635efSGarrett D'Amore 
52cec8643bSMichal Nowak 	/*
53cec8643bSMichal Nowak 	 * Determine the length of the string in the cell
54cec8643bSMichal Nowak 	 * and advance the parse point to the end of the cell.
55cec8643bSMichal Nowak 	 */
56cec8643bSMichal Nowak 
57*4d131170SRobert Mustacchi 	startpos = *pos;
58*4d131170SRobert Mustacchi 	ccp = p + startpos;
59*4d131170SRobert Mustacchi 	while (*ccp != '\0' && *ccp != tbl->opts.tab)
60*4d131170SRobert Mustacchi 		if (*ccp++ == '\\')
61*4d131170SRobert Mustacchi 			mandoc_escape(&ccp, NULL, NULL);
62*4d131170SRobert Mustacchi 	*pos = ccp - p;
63cec8643bSMichal Nowak 
64260e9a87SYuri Pankov 	/* Advance to the next layout cell, skipping spanners. */
6595c635efSGarrett D'Amore 
66260e9a87SYuri Pankov 	cp = dp->last == NULL ? dp->layout->first : dp->last->layout->next;
67260e9a87SYuri Pankov 	while (cp != NULL && cp->pos == TBL_CELL_SPAN)
6895c635efSGarrett D'Amore 		cp = cp->next;
6995c635efSGarrett D'Amore 
7095c635efSGarrett D'Amore 	/*
71c66b8046SYuri Pankov 	 * If the current layout row is out of cells, allocate
72c66b8046SYuri Pankov 	 * a new cell if another row of the table has at least
73c66b8046SYuri Pankov 	 * this number of columns, or discard the input if we
74c66b8046SYuri Pankov 	 * are beyond the last column of the table as a whole.
7595c635efSGarrett D'Amore 	 */
7695c635efSGarrett D'Amore 
77260e9a87SYuri Pankov 	if (cp == NULL) {
78c66b8046SYuri Pankov 		if (dp->layout->last->col + 1 < dp->opts->cols) {
79c66b8046SYuri Pankov 			cp = mandoc_calloc(1, sizeof(*cp));
80c66b8046SYuri Pankov 			cp->pos = TBL_CELL_LEFT;
81*4d131170SRobert Mustacchi 			cp->font = ESCAPE_FONTROMAN;
82*4d131170SRobert Mustacchi 			cp->spacing = SIZE_MAX;
83c66b8046SYuri Pankov 			dp->layout->last->next = cp;
84c66b8046SYuri Pankov 			cp->col = dp->layout->last->col + 1;
85c66b8046SYuri Pankov 			dp->layout->last = cp;
86c66b8046SYuri Pankov 		} else {
87cec8643bSMichal Nowak 			mandoc_msg(MANDOCERR_TBLDATA_EXTRA,
88*4d131170SRobert Mustacchi 			    ln, startpos, "%s", p + startpos);
89cec8643bSMichal Nowak 			while (p[*pos] != '\0')
90c66b8046SYuri Pankov 				(*pos)++;
91c66b8046SYuri Pankov 			return;
92c66b8046SYuri Pankov 		}
9395c635efSGarrett D'Amore 	}
9495c635efSGarrett D'Amore 
95cec8643bSMichal Nowak 	dat = mandoc_malloc(sizeof(*dat));
9695c635efSGarrett D'Amore 	dat->layout = cp;
97cec8643bSMichal Nowak 	dat->next = NULL;
98cec8643bSMichal Nowak 	dat->string = NULL;
99cec8643bSMichal Nowak 	dat->hspans = 0;
100cec8643bSMichal Nowak 	dat->vspans = 0;
101cec8643bSMichal Nowak 	dat->block = 0;
10295c635efSGarrett D'Amore 	dat->pos = TBL_DATA_NONE;
103cec8643bSMichal Nowak 
104cec8643bSMichal Nowak 	/*
105cec8643bSMichal Nowak 	 * Increment the number of vertical spans in a data cell above,
106cec8643bSMichal Nowak 	 * if this cell vertically extends one or more cells above.
107cec8643bSMichal Nowak 	 * The iteration must be done over data rows,
108cec8643bSMichal Nowak 	 * not over layout rows, because one layout row
109cec8643bSMichal Nowak 	 * can be reused for more than one data row.
110cec8643bSMichal Nowak 	 */
111cec8643bSMichal Nowak 
112cec8643bSMichal Nowak 	if (cp->pos == TBL_CELL_DOWN ||
113*4d131170SRobert Mustacchi 	    (*pos - startpos == 2 &&
114*4d131170SRobert Mustacchi 	     p[startpos] == '\\' && p[startpos + 1] == '^')) {
115cec8643bSMichal Nowak 		pdp = dp;
116cec8643bSMichal Nowak 		while ((pdp = pdp->prev) != NULL) {
117cec8643bSMichal Nowak 			pdat = pdp->first;
118cec8643bSMichal Nowak 			while (pdat != NULL &&
119cec8643bSMichal Nowak 			    pdat->layout->col < dat->layout->col)
120cec8643bSMichal Nowak 				pdat = pdat->next;
121cec8643bSMichal Nowak 			if (pdat == NULL)
122cec8643bSMichal Nowak 				break;
123cec8643bSMichal Nowak 			if (pdat->layout->pos != TBL_CELL_DOWN &&
124cec8643bSMichal Nowak 			    strcmp(pdat->string, "\\^") != 0) {
125cec8643bSMichal Nowak 				pdat->vspans++;
126cec8643bSMichal Nowak 				break;
127cec8643bSMichal Nowak 			}
128cec8643bSMichal Nowak 		}
129cec8643bSMichal Nowak 	}
130cec8643bSMichal Nowak 
131cec8643bSMichal Nowak 	/*
132cec8643bSMichal Nowak 	 * Count the number of horizontal spans to the right of this cell.
133cec8643bSMichal Nowak 	 * This is purely a matter of the layout, independent of the data.
134cec8643bSMichal Nowak 	 */
135cec8643bSMichal Nowak 
136260e9a87SYuri Pankov 	for (cp = cp->next; cp != NULL; cp = cp->next)
137260e9a87SYuri Pankov 		if (cp->pos == TBL_CELL_SPAN)
138cec8643bSMichal Nowak 			dat->hspans++;
13995c635efSGarrett D'Amore 		else
14095c635efSGarrett D'Amore 			break;
14195c635efSGarrett D'Amore 
142cec8643bSMichal Nowak 	/* Append the new data cell to the data row. */
143cec8643bSMichal Nowak 
144260e9a87SYuri Pankov 	if (dp->last == NULL)
145260e9a87SYuri Pankov 		dp->first = dat;
146260e9a87SYuri Pankov 	else
14795c635efSGarrett D'Amore 		dp->last->next = dat;
148260e9a87SYuri Pankov 	dp->last = dat;
14995c635efSGarrett D'Amore 
150*4d131170SRobert Mustacchi 	/* Strip leading and trailing spaces, if requested. */
151*4d131170SRobert Mustacchi 
152*4d131170SRobert Mustacchi 	endpos = *pos;
153*4d131170SRobert Mustacchi 	if (dp->opts->opts & TBL_OPT_NOSPACE) {
154*4d131170SRobert Mustacchi 		while (p[startpos] == ' ')
155*4d131170SRobert Mustacchi 			startpos++;
156*4d131170SRobert Mustacchi 		while (endpos > startpos && p[endpos - 1] == ' ')
157*4d131170SRobert Mustacchi 			endpos--;
158*4d131170SRobert Mustacchi 	}
159*4d131170SRobert Mustacchi 
16095c635efSGarrett D'Amore 	/*
16195c635efSGarrett D'Amore 	 * Check for a continued-data scope opening.  This consists of a
16295c635efSGarrett D'Amore 	 * trailing `T{' at the end of the line.  Subsequent lines,
16395c635efSGarrett D'Amore 	 * until a standalone `T}', are included in our cell.
16495c635efSGarrett D'Amore 	 */
16595c635efSGarrett D'Amore 
166*4d131170SRobert Mustacchi 	if (endpos - startpos == 2 &&
167*4d131170SRobert Mustacchi 	    p[startpos] == 'T' && p[startpos + 1] == '{') {
16895c635efSGarrett D'Amore 		tbl->part = TBL_PART_CDATA;
169260e9a87SYuri Pankov 		return;
17095c635efSGarrett D'Amore 	}
17195c635efSGarrett D'Amore 
172*4d131170SRobert Mustacchi 	dat->string = mandoc_strndup(p + startpos, endpos - startpos);
17395c635efSGarrett D'Amore 
174cec8643bSMichal Nowak 	if (p[*pos] != '\0')
17595c635efSGarrett D'Amore 		(*pos)++;
17695c635efSGarrett D'Amore 
17795c635efSGarrett D'Amore 	if ( ! strcmp(dat->string, "_"))
17895c635efSGarrett D'Amore 		dat->pos = TBL_DATA_HORIZ;
17995c635efSGarrett D'Amore 	else if ( ! strcmp(dat->string, "="))
18095c635efSGarrett D'Amore 		dat->pos = TBL_DATA_DHORIZ;
18195c635efSGarrett D'Amore 	else if ( ! strcmp(dat->string, "\\_"))
18295c635efSGarrett D'Amore 		dat->pos = TBL_DATA_NHORIZ;
18395c635efSGarrett D'Amore 	else if ( ! strcmp(dat->string, "\\="))
18495c635efSGarrett D'Amore 		dat->pos = TBL_DATA_NDHORIZ;
18595c635efSGarrett D'Amore 	else
18695c635efSGarrett D'Amore 		dat->pos = TBL_DATA_DATA;
18795c635efSGarrett D'Amore 
188260e9a87SYuri Pankov 	if ((dat->layout->pos == TBL_CELL_HORIZ ||
189260e9a87SYuri Pankov 	    dat->layout->pos == TBL_CELL_DHORIZ ||
190260e9a87SYuri Pankov 	    dat->layout->pos == TBL_CELL_DOWN) &&
191260e9a87SYuri Pankov 	    dat->pos == TBL_DATA_DATA && *dat->string != '\0')
192260e9a87SYuri Pankov 		mandoc_msg(MANDOCERR_TBLDATA_SPAN,
193*4d131170SRobert Mustacchi 		    ln, startpos, "%s", dat->string);
19495c635efSGarrett D'Amore }
19595c635efSGarrett D'Amore 
196c66b8046SYuri Pankov void
tbl_cdata(struct tbl_node * tbl,int ln,const char * p,int pos)197260e9a87SYuri Pankov tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos)
19895c635efSGarrett D'Amore {
19995c635efSGarrett D'Amore 	struct tbl_dat	*dat;
200260e9a87SYuri Pankov 	size_t		 sz;
20195c635efSGarrett D'Amore 
20295c635efSGarrett D'Amore 	dat = tbl->last_span->last;
20395c635efSGarrett D'Amore 
20495c635efSGarrett D'Amore 	if (p[pos] == 'T' && p[pos + 1] == '}') {
20595c635efSGarrett D'Amore 		pos += 2;
206*4d131170SRobert Mustacchi 		if (tbl->opts.opts & TBL_OPT_NOSPACE)
207*4d131170SRobert Mustacchi 			while (p[pos] == ' ')
208*4d131170SRobert Mustacchi 				pos++;
20995c635efSGarrett D'Amore 		if (p[pos] == tbl->opts.tab) {
21095c635efSGarrett D'Amore 			tbl->part = TBL_PART_DATA;
21195c635efSGarrett D'Amore 			pos++;
212371584c2SYuri Pankov 			while (p[pos] != '\0')
213371584c2SYuri Pankov 				getdata(tbl, tbl->last_span, ln, p, &pos);
214c66b8046SYuri Pankov 			return;
215260e9a87SYuri Pankov 		} else if (p[pos] == '\0') {
21695c635efSGarrett D'Amore 			tbl->part = TBL_PART_DATA;
217c66b8046SYuri Pankov 			return;
21895c635efSGarrett D'Amore 		}
21995c635efSGarrett D'Amore 
22095c635efSGarrett D'Amore 		/* Fallthrough: T} is part of a word. */
22195c635efSGarrett D'Amore 	}
22295c635efSGarrett D'Amore 
22395c635efSGarrett D'Amore 	dat->pos = TBL_DATA_DATA;
224c66b8046SYuri Pankov 	dat->block = 1;
22595c635efSGarrett D'Amore 
226260e9a87SYuri Pankov 	if (dat->string != NULL) {
227260e9a87SYuri Pankov 		sz = strlen(p + pos) + strlen(dat->string) + 2;
22895c635efSGarrett D'Amore 		dat->string = mandoc_realloc(dat->string, sz);
229260e9a87SYuri Pankov 		(void)strlcat(dat->string, " ", sz);
230260e9a87SYuri Pankov 		(void)strlcat(dat->string, p + pos, sz);
23195c635efSGarrett D'Amore 	} else
232260e9a87SYuri Pankov 		dat->string = mandoc_strdup(p + pos);
23395c635efSGarrett D'Amore 
234260e9a87SYuri Pankov 	if (dat->layout->pos == TBL_CELL_DOWN)
235cec8643bSMichal Nowak 		mandoc_msg(MANDOCERR_TBLDATA_SPAN,
236cec8643bSMichal Nowak 		    ln, pos, "%s", dat->string);
23795c635efSGarrett D'Amore }
23895c635efSGarrett D'Amore 
23995c635efSGarrett D'Amore static struct tbl_span *
newspan(struct tbl_node * tbl,int line,struct tbl_row * rp)24095c635efSGarrett D'Amore newspan(struct tbl_node *tbl, int line, struct tbl_row *rp)
24195c635efSGarrett D'Amore {
24295c635efSGarrett D'Amore 	struct tbl_span	*dp;
24395c635efSGarrett D'Amore 
244260e9a87SYuri Pankov 	dp = mandoc_calloc(1, sizeof(*dp));
24595c635efSGarrett D'Amore 	dp->line = line;
246698f87a4SGarrett D'Amore 	dp->opts = &tbl->opts;
24795c635efSGarrett D'Amore 	dp->layout = rp;
248260e9a87SYuri Pankov 	dp->prev = tbl->last_span;
24995c635efSGarrett D'Amore 
250260e9a87SYuri Pankov 	if (dp->prev == NULL) {
251260e9a87SYuri Pankov 		tbl->first_span = dp;
25295c635efSGarrett D'Amore 		tbl->current_span = NULL;
253260e9a87SYuri Pankov 	} else
254260e9a87SYuri Pankov 		dp->prev->next = dp;
255260e9a87SYuri Pankov 	tbl->last_span = dp;
25695c635efSGarrett D'Amore 
257371584c2SYuri Pankov 	return dp;
25895c635efSGarrett D'Amore }
25995c635efSGarrett D'Amore 
260260e9a87SYuri Pankov void
tbl_data(struct tbl_node * tbl,int ln,const char * p,int pos)261260e9a87SYuri Pankov tbl_data(struct tbl_node *tbl, int ln, const char *p, int pos)
26295c635efSGarrett D'Amore {
26395c635efSGarrett D'Amore 	struct tbl_row	*rp;
264c66b8046SYuri Pankov 	struct tbl_cell	*cp;
265c66b8046SYuri Pankov 	struct tbl_span	*sp;
26695c635efSGarrett D'Amore 
267*4d131170SRobert Mustacchi 	for (sp = tbl->last_span; sp != NULL; sp = sp->prev)
268*4d131170SRobert Mustacchi 		if (sp->pos == TBL_SPAN_DATA)
269*4d131170SRobert Mustacchi 			break;
270*4d131170SRobert Mustacchi 	rp = sp == NULL ? tbl->first_row :
271*4d131170SRobert Mustacchi 	    sp->layout->next == NULL ? sp->layout : sp->layout->next;
272c66b8046SYuri Pankov 	assert(rp != NULL);
27395c635efSGarrett D'Amore 
274cec8643bSMichal Nowak 	if (p[1] == '\0') {
275cec8643bSMichal Nowak 		switch (p[0]) {
276cec8643bSMichal Nowak 		case '.':
277cec8643bSMichal Nowak 			/*
278cec8643bSMichal Nowak 			 * Empty request lines must be handled here
279cec8643bSMichal Nowak 			 * and cannot be discarded in roff_parseln()
280cec8643bSMichal Nowak 			 * because in the layout section, they
281cec8643bSMichal Nowak 			 * are significant and end the layout.
282cec8643bSMichal Nowak 			 */
283cec8643bSMichal Nowak 			return;
284cec8643bSMichal Nowak 		case '_':
285cec8643bSMichal Nowak 			sp = newspan(tbl, ln, rp);
286cec8643bSMichal Nowak 			sp->pos = TBL_SPAN_HORIZ;
287cec8643bSMichal Nowak 			return;
288cec8643bSMichal Nowak 		case '=':
289cec8643bSMichal Nowak 			sp = newspan(tbl, ln, rp);
290cec8643bSMichal Nowak 			sp->pos = TBL_SPAN_DHORIZ;
291cec8643bSMichal Nowak 			return;
292cec8643bSMichal Nowak 		default:
293cec8643bSMichal Nowak 			break;
294cec8643bSMichal Nowak 		}
29595c635efSGarrett D'Amore 	}
29695c635efSGarrett D'Amore 
297c66b8046SYuri Pankov 	/*
298c66b8046SYuri Pankov 	 * If the layout row contains nothing but horizontal lines,
299c66b8046SYuri Pankov 	 * allocate an empty span for it and assign the current span
300c66b8046SYuri Pankov 	 * to the next layout row accepting data.
301c66b8046SYuri Pankov 	 */
302c66b8046SYuri Pankov 
303c66b8046SYuri Pankov 	while (rp->next != NULL) {
304c66b8046SYuri Pankov 		if (rp->last->col + 1 < tbl->opts.cols)
305c66b8046SYuri Pankov 			break;
306c66b8046SYuri Pankov 		for (cp = rp->first; cp != NULL; cp = cp->next)
307c66b8046SYuri Pankov 			if (cp->pos != TBL_CELL_HORIZ &&
308c66b8046SYuri Pankov 			    cp->pos != TBL_CELL_DHORIZ)
309c66b8046SYuri Pankov 				break;
310c66b8046SYuri Pankov 		if (cp != NULL)
311c66b8046SYuri Pankov 			break;
312c66b8046SYuri Pankov 		sp = newspan(tbl, ln, rp);
313c66b8046SYuri Pankov 		sp->pos = TBL_SPAN_DATA;
314c66b8046SYuri Pankov 		rp = rp->next;
315c66b8046SYuri Pankov 	}
316c66b8046SYuri Pankov 
317c66b8046SYuri Pankov 	/* Process a real data row. */
31895c635efSGarrett D'Amore 
319c66b8046SYuri Pankov 	sp = newspan(tbl, ln, rp);
320c66b8046SYuri Pankov 	sp->pos = TBL_SPAN_DATA;
321260e9a87SYuri Pankov 	while (p[pos] != '\0')
322c66b8046SYuri Pankov 		getdata(tbl, sp, ln, p, &pos);
32395c635efSGarrett D'Amore }
324