1*4d131170SRobert Mustacchi /* $Id: chars.c,v 1.79 2020/02/13 16:18:29 schwarze Exp $ */
295c635efSGarrett D'Amore /*
395c635efSGarrett D'Amore * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4*4d131170SRobert Mustacchi * Copyright (c) 2011, 2014, 2015, 2017, 2018, 2020
5*4d131170SRobert Mustacchi * Ingo Schwarze <schwarze@openbsd.org>
695c635efSGarrett D'Amore *
795c635efSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any
895c635efSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above
995c635efSGarrett D'Amore * copyright notice and this permission notice appear in all copies.
1095c635efSGarrett D'Amore *
1195c635efSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1295c635efSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1395c635efSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1495c635efSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1595c635efSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1695c635efSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1795c635efSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1895c635efSGarrett D'Amore */
1995c635efSGarrett D'Amore #include "config.h"
20260e9a87SYuri Pankov
21260e9a87SYuri Pankov #include <sys/types.h>
2295c635efSGarrett D'Amore
2395c635efSGarrett D'Amore #include <assert.h>
2495c635efSGarrett D'Amore #include <ctype.h>
25371584c2SYuri Pankov #include <stddef.h>
26371584c2SYuri Pankov #include <stdint.h>
27cec8643bSMichal Nowak #include <stdio.h>
2895c635efSGarrett D'Amore #include <stdlib.h>
2995c635efSGarrett D'Amore #include <string.h>
3095c635efSGarrett D'Amore
3195c635efSGarrett D'Amore #include "mandoc.h"
32260e9a87SYuri Pankov #include "mandoc_aux.h"
33371584c2SYuri Pankov #include "mandoc_ohash.h"
3495c635efSGarrett D'Amore #include "libmandoc.h"
3595c635efSGarrett D'Amore
3695c635efSGarrett D'Amore struct ln {
37371584c2SYuri Pankov const char roffcode[16];
3895c635efSGarrett D'Amore const char *ascii;
3995c635efSGarrett D'Amore int unicode;
4095c635efSGarrett D'Amore };
4195c635efSGarrett D'Amore
42371584c2SYuri Pankov /* Special break control characters. */
43371584c2SYuri Pankov static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' };
44371584c2SYuri Pankov static const char ascii_break[2] = { ASCII_BREAK, '\0' };
45371584c2SYuri Pankov
46371584c2SYuri Pankov static struct ln lines[] = {
47371584c2SYuri Pankov
48371584c2SYuri Pankov /* Spacing. */
49371584c2SYuri Pankov { " ", ascii_nbrsp, 0x00a0 },
50371584c2SYuri Pankov { "~", ascii_nbrsp, 0x00a0 },
51*4d131170SRobert Mustacchi { "0", ascii_nbrsp, 0x00a0 },
52371584c2SYuri Pankov { ":", ascii_break, 0 },
53371584c2SYuri Pankov
54371584c2SYuri Pankov /* Lines. */
55371584c2SYuri Pankov { "ba", "|", 0x007c },
56371584c2SYuri Pankov { "br", "|", 0x2502 },
57371584c2SYuri Pankov { "ul", "_", 0x005f },
58cec8643bSMichal Nowak { "_", "_", 0x005f },
59c66b8046SYuri Pankov { "ru", "_", 0x005f },
60371584c2SYuri Pankov { "rn", "-", 0x203e },
61371584c2SYuri Pankov { "bb", "|", 0x00a6 },
62371584c2SYuri Pankov { "sl", "/", 0x002f },
63371584c2SYuri Pankov { "rs", "\\", 0x005c },
64371584c2SYuri Pankov
65371584c2SYuri Pankov /* Text markers. */
66371584c2SYuri Pankov { "ci", "O", 0x25cb },
67371584c2SYuri Pankov { "bu", "+\bo", 0x2022 },
686640c13bSYuri Pankov { "dd", "<**>", 0x2021 },
696640c13bSYuri Pankov { "dg", "<*>", 0x2020 },
70371584c2SYuri Pankov { "lz", "<>", 0x25ca },
71371584c2SYuri Pankov { "sq", "[]", 0x25a1 },
726640c13bSYuri Pankov { "ps", "<paragraph>", 0x00b6 },
736640c13bSYuri Pankov { "sc", "<section>", 0x00a7 },
74371584c2SYuri Pankov { "lh", "<=", 0x261c },
75371584c2SYuri Pankov { "rh", "=>", 0x261e },
76371584c2SYuri Pankov { "at", "@", 0x0040 },
77371584c2SYuri Pankov { "sh", "#", 0x0023 },
786640c13bSYuri Pankov { "CR", "<cr>", 0x21b5 },
79371584c2SYuri Pankov { "OK", "\\/", 0x2713 },
80cec8643bSMichal Nowak { "CL", "C", 0x2663 },
81cec8643bSMichal Nowak { "SP", "S", 0x2660 },
82cec8643bSMichal Nowak { "HE", "H", 0x2665 },
83cec8643bSMichal Nowak { "DI", "D", 0x2666 },
84371584c2SYuri Pankov
85371584c2SYuri Pankov /* Legal symbols. */
86371584c2SYuri Pankov { "co", "(C)", 0x00a9 },
87371584c2SYuri Pankov { "rg", "(R)", 0x00ae },
88371584c2SYuri Pankov { "tm", "tm", 0x2122 },
89371584c2SYuri Pankov
90371584c2SYuri Pankov /* Punctuation. */
91371584c2SYuri Pankov { "em", "--", 0x2014 },
92371584c2SYuri Pankov { "en", "-", 0x2013 },
93371584c2SYuri Pankov { "hy", "-", 0x2010 },
94371584c2SYuri Pankov { "e", "\\", 0x005c },
95371584c2SYuri Pankov { ".", ".", 0x002e },
96371584c2SYuri Pankov { "r!", "!", 0x00a1 },
97371584c2SYuri Pankov { "r?", "?", 0x00bf },
98371584c2SYuri Pankov
99371584c2SYuri Pankov /* Quotes. */
100371584c2SYuri Pankov { "Bq", ",,", 0x201e },
101371584c2SYuri Pankov { "bq", ",", 0x201a },
102371584c2SYuri Pankov { "lq", "\"", 0x201c },
103371584c2SYuri Pankov { "rq", "\"", 0x201d },
104a40ea1a7SYuri Pankov { "Lq", "\"", 0x201c },
105a40ea1a7SYuri Pankov { "Rq", "\"", 0x201d },
106371584c2SYuri Pankov { "oq", "`", 0x2018 },
107371584c2SYuri Pankov { "cq", "\'", 0x2019 },
108371584c2SYuri Pankov { "aq", "\'", 0x0027 },
109371584c2SYuri Pankov { "dq", "\"", 0x0022 },
110371584c2SYuri Pankov { "Fo", "<<", 0x00ab },
111371584c2SYuri Pankov { "Fc", ">>", 0x00bb },
112371584c2SYuri Pankov { "fo", "<", 0x2039 },
113371584c2SYuri Pankov { "fc", ">", 0x203a },
114371584c2SYuri Pankov
115371584c2SYuri Pankov /* Brackets. */
116371584c2SYuri Pankov { "lB", "[", 0x005b },
117371584c2SYuri Pankov { "rB", "]", 0x005d },
118371584c2SYuri Pankov { "lC", "{", 0x007b },
119371584c2SYuri Pankov { "rC", "}", 0x007d },
120371584c2SYuri Pankov { "la", "<", 0x27e8 },
121371584c2SYuri Pankov { "ra", ">", 0x27e9 },
122371584c2SYuri Pankov { "bv", "|", 0x23aa },
123371584c2SYuri Pankov { "braceex", "|", 0x23aa },
124371584c2SYuri Pankov { "bracketlefttp", "|", 0x23a1 },
125371584c2SYuri Pankov { "bracketleftbt", "|", 0x23a3 },
126371584c2SYuri Pankov { "bracketleftex", "|", 0x23a2 },
127371584c2SYuri Pankov { "bracketrighttp", "|", 0x23a4 },
128371584c2SYuri Pankov { "bracketrightbt", "|", 0x23a6 },
129371584c2SYuri Pankov { "bracketrightex", "|", 0x23a5 },
130371584c2SYuri Pankov { "lt", ",-", 0x23a7 },
131371584c2SYuri Pankov { "bracelefttp", ",-", 0x23a7 },
132371584c2SYuri Pankov { "lk", "{", 0x23a8 },
133371584c2SYuri Pankov { "braceleftmid", "{", 0x23a8 },
134371584c2SYuri Pankov { "lb", "`-", 0x23a9 },
135371584c2SYuri Pankov { "braceleftbt", "`-", 0x23a9 },
136371584c2SYuri Pankov { "braceleftex", "|", 0x23aa },
137371584c2SYuri Pankov { "rt", "-.", 0x23ab },
138371584c2SYuri Pankov { "bracerighttp", "-.", 0x23ab },
139371584c2SYuri Pankov { "rk", "}", 0x23ac },
140371584c2SYuri Pankov { "bracerightmid", "}", 0x23ac },
141371584c2SYuri Pankov { "rb", "-\'", 0x23ad },
142371584c2SYuri Pankov { "bracerightbt", "-\'", 0x23ad },
143371584c2SYuri Pankov { "bracerightex", "|", 0x23aa },
144371584c2SYuri Pankov { "parenlefttp", "/", 0x239b },
145371584c2SYuri Pankov { "parenleftbt", "\\", 0x239d },
146371584c2SYuri Pankov { "parenleftex", "|", 0x239c },
147371584c2SYuri Pankov { "parenrighttp", "\\", 0x239e },
148371584c2SYuri Pankov { "parenrightbt", "/", 0x23a0 },
149371584c2SYuri Pankov { "parenrightex", "|", 0x239f },
150371584c2SYuri Pankov
151371584c2SYuri Pankov /* Arrows and lines. */
152371584c2SYuri Pankov { "<-", "<-", 0x2190 },
153371584c2SYuri Pankov { "->", "->", 0x2192 },
154371584c2SYuri Pankov { "<>", "<->", 0x2194 },
155371584c2SYuri Pankov { "da", "|\bv", 0x2193 },
156371584c2SYuri Pankov { "ua", "|\b^", 0x2191 },
157371584c2SYuri Pankov { "va", "^v", 0x2195 },
158371584c2SYuri Pankov { "lA", "<=", 0x21d0 },
159371584c2SYuri Pankov { "rA", "=>", 0x21d2 },
160371584c2SYuri Pankov { "hA", "<=>", 0x21d4 },
161371584c2SYuri Pankov { "uA", "=\b^", 0x21d1 },
162371584c2SYuri Pankov { "dA", "=\bv", 0x21d3 },
163371584c2SYuri Pankov { "vA", "^=v", 0x21d5 },
164c66b8046SYuri Pankov { "an", "-", 0x23af },
165371584c2SYuri Pankov
166371584c2SYuri Pankov /* Logic. */
167371584c2SYuri Pankov { "AN", "^", 0x2227 },
168371584c2SYuri Pankov { "OR", "v", 0x2228 },
169371584c2SYuri Pankov { "no", "~", 0x00ac },
170371584c2SYuri Pankov { "tno", "~", 0x00ac },
1716640c13bSYuri Pankov { "te", "<there\037exists>", 0x2203 },
1726640c13bSYuri Pankov { "fa", "<for\037all>", 0x2200 },
1736640c13bSYuri Pankov { "st", "<such\037that>", 0x220b },
1746640c13bSYuri Pankov { "tf", "<therefore>", 0x2234 },
1756640c13bSYuri Pankov { "3d", "<therefore>", 0x2234 },
176371584c2SYuri Pankov { "or", "|", 0x007c },
177371584c2SYuri Pankov
178371584c2SYuri Pankov /* Mathematicals. */
179371584c2SYuri Pankov { "pl", "+", 0x002b },
180371584c2SYuri Pankov { "mi", "-", 0x2212 },
181371584c2SYuri Pankov { "-", "-", 0x002d },
182371584c2SYuri Pankov { "-+", "-+", 0x2213 },
183371584c2SYuri Pankov { "+-", "+-", 0x00b1 },
184371584c2SYuri Pankov { "t+-", "+-", 0x00b1 },
185371584c2SYuri Pankov { "pc", ".", 0x00b7 },
186371584c2SYuri Pankov { "md", ".", 0x22c5 },
187371584c2SYuri Pankov { "mu", "x", 0x00d7 },
188371584c2SYuri Pankov { "tmu", "x", 0x00d7 },
189371584c2SYuri Pankov { "c*", "O\bx", 0x2297 },
190371584c2SYuri Pankov { "c+", "O\b+", 0x2295 },
1916640c13bSYuri Pankov { "di", "/", 0x00f7 },
1926640c13bSYuri Pankov { "tdi", "/", 0x00f7 },
193371584c2SYuri Pankov { "f/", "/", 0x2044 },
194371584c2SYuri Pankov { "**", "*", 0x2217 },
195371584c2SYuri Pankov { "<=", "<=", 0x2264 },
196371584c2SYuri Pankov { ">=", ">=", 0x2265 },
197371584c2SYuri Pankov { "<<", "<<", 0x226a },
198371584c2SYuri Pankov { ">>", ">>", 0x226b },
199371584c2SYuri Pankov { "eq", "=", 0x003d },
200371584c2SYuri Pankov { "!=", "!=", 0x2260 },
201371584c2SYuri Pankov { "==", "==", 0x2261 },
202371584c2SYuri Pankov { "ne", "!==", 0x2262 },
203371584c2SYuri Pankov { "ap", "~", 0x223c },
204371584c2SYuri Pankov { "|=", "-~", 0x2243 },
205371584c2SYuri Pankov { "=~", "=~", 0x2245 },
206371584c2SYuri Pankov { "~~", "~~", 0x2248 },
207371584c2SYuri Pankov { "~=", "~=", 0x2248 },
2086640c13bSYuri Pankov { "pt", "<proportional\037to>", 0x221d },
209371584c2SYuri Pankov { "es", "{}", 0x2205 },
2106640c13bSYuri Pankov { "mo", "<element\037of>", 0x2208 },
2116640c13bSYuri Pankov { "nm", "<not\037element\037of>", 0x2209 },
2126640c13bSYuri Pankov { "sb", "<proper\037subset>", 0x2282 },
2136640c13bSYuri Pankov { "nb", "<not\037subset>", 0x2284 },
2146640c13bSYuri Pankov { "sp", "<proper\037superset>", 0x2283 },
2156640c13bSYuri Pankov { "nc", "<not\037superset>", 0x2285 },
2166640c13bSYuri Pankov { "ib", "<subset\037or\037equal>", 0x2286 },
2176640c13bSYuri Pankov { "ip", "<superset\037or\037equal>", 0x2287 },
2186640c13bSYuri Pankov { "ca", "<intersection>", 0x2229 },
2196640c13bSYuri Pankov { "cu", "<union>", 0x222a },
2206640c13bSYuri Pankov { "/_", "<angle>", 0x2220 },
2216640c13bSYuri Pankov { "pp", "<perpendicular>", 0x22a5 },
2226640c13bSYuri Pankov { "is", "<integral>", 0x222b },
2236640c13bSYuri Pankov { "integral", "<integral>", 0x222b },
2246640c13bSYuri Pankov { "sum", "<sum>", 0x2211 },
2256640c13bSYuri Pankov { "product", "<product>", 0x220f },
2266640c13bSYuri Pankov { "coproduct", "<coproduct>", 0x2210 },
2276640c13bSYuri Pankov { "gr", "<nabla>", 0x2207 },
2286640c13bSYuri Pankov { "sr", "<sqrt>", 0x221a },
2296640c13bSYuri Pankov { "sqrt", "<sqrt>", 0x221a },
230371584c2SYuri Pankov { "lc", "|~", 0x2308 },
231371584c2SYuri Pankov { "rc", "~|", 0x2309 },
232371584c2SYuri Pankov { "lf", "|_", 0x230a },
233371584c2SYuri Pankov { "rf", "_|", 0x230b },
2346640c13bSYuri Pankov { "if", "<infinity>", 0x221e },
2356640c13bSYuri Pankov { "Ah", "<Aleph>", 0x2135 },
2366640c13bSYuri Pankov { "Im", "<Im>", 0x2111 },
2376640c13bSYuri Pankov { "Re", "<Re>", 0x211c },
238cec8643bSMichal Nowak { "wp", "p", 0x2118 },
2396640c13bSYuri Pankov { "pd", "<del>", 0x2202 },
240371584c2SYuri Pankov { "-h", "/h", 0x210f },
241c66b8046SYuri Pankov { "hbar", "/h", 0x210f },
242371584c2SYuri Pankov { "12", "1/2", 0x00bd },
243371584c2SYuri Pankov { "14", "1/4", 0x00bc },
244371584c2SYuri Pankov { "34", "3/4", 0x00be },
245c66b8046SYuri Pankov { "18", "1/8", 0x215B },
246c66b8046SYuri Pankov { "38", "3/8", 0x215C },
247c66b8046SYuri Pankov { "58", "5/8", 0x215D },
248c66b8046SYuri Pankov { "78", "7/8", 0x215E },
2496640c13bSYuri Pankov { "S1", "^1", 0x00B9 },
2506640c13bSYuri Pankov { "S2", "^2", 0x00B2 },
2516640c13bSYuri Pankov { "S3", "^3", 0x00B3 },
252371584c2SYuri Pankov
253371584c2SYuri Pankov /* Ligatures. */
254371584c2SYuri Pankov { "ff", "ff", 0xfb00 },
255371584c2SYuri Pankov { "fi", "fi", 0xfb01 },
256371584c2SYuri Pankov { "fl", "fl", 0xfb02 },
257371584c2SYuri Pankov { "Fi", "ffi", 0xfb03 },
258371584c2SYuri Pankov { "Fl", "ffl", 0xfb04 },
259371584c2SYuri Pankov { "AE", "AE", 0x00c6 },
260371584c2SYuri Pankov { "ae", "ae", 0x00e6 },
261371584c2SYuri Pankov { "OE", "OE", 0x0152 },
262371584c2SYuri Pankov { "oe", "oe", 0x0153 },
263371584c2SYuri Pankov { "ss", "ss", 0x00df },
264371584c2SYuri Pankov { "IJ", "IJ", 0x0132 },
265371584c2SYuri Pankov { "ij", "ij", 0x0133 },
266371584c2SYuri Pankov
267371584c2SYuri Pankov /* Accents. */
268371584c2SYuri Pankov { "a\"", "\"", 0x02dd },
269371584c2SYuri Pankov { "a-", "-", 0x00af },
270371584c2SYuri Pankov { "a.", ".", 0x02d9 },
271371584c2SYuri Pankov { "a^", "^", 0x005e },
272371584c2SYuri Pankov { "aa", "\'", 0x00b4 },
273371584c2SYuri Pankov { "\'", "\'", 0x00b4 },
274371584c2SYuri Pankov { "ga", "`", 0x0060 },
275371584c2SYuri Pankov { "`", "`", 0x0060 },
276371584c2SYuri Pankov { "ab", "'\b`", 0x02d8 },
277371584c2SYuri Pankov { "ac", ",", 0x00b8 },
278371584c2SYuri Pankov { "ad", "\"", 0x00a8 },
279371584c2SYuri Pankov { "ah", "v", 0x02c7 },
280371584c2SYuri Pankov { "ao", "o", 0x02da },
281371584c2SYuri Pankov { "a~", "~", 0x007e },
282371584c2SYuri Pankov { "ho", ",", 0x02db },
283371584c2SYuri Pankov { "ha", "^", 0x005e },
284371584c2SYuri Pankov { "ti", "~", 0x007e },
285cec8643bSMichal Nowak { "u02DC", "~", 0x02dc },
286371584c2SYuri Pankov
287371584c2SYuri Pankov /* Accented letters. */
288371584c2SYuri Pankov { "'A", "'\bA", 0x00c1 },
289371584c2SYuri Pankov { "'E", "'\bE", 0x00c9 },
290371584c2SYuri Pankov { "'I", "'\bI", 0x00cd },
291371584c2SYuri Pankov { "'O", "'\bO", 0x00d3 },
292371584c2SYuri Pankov { "'U", "'\bU", 0x00da },
293cec8643bSMichal Nowak { "'Y", "'\bY", 0x00dd },
294371584c2SYuri Pankov { "'a", "'\ba", 0x00e1 },
295371584c2SYuri Pankov { "'e", "'\be", 0x00e9 },
296371584c2SYuri Pankov { "'i", "'\bi", 0x00ed },
297371584c2SYuri Pankov { "'o", "'\bo", 0x00f3 },
298371584c2SYuri Pankov { "'u", "'\bu", 0x00fa },
299cec8643bSMichal Nowak { "'y", "'\by", 0x00fd },
300371584c2SYuri Pankov { "`A", "`\bA", 0x00c0 },
301371584c2SYuri Pankov { "`E", "`\bE", 0x00c8 },
302371584c2SYuri Pankov { "`I", "`\bI", 0x00cc },
303371584c2SYuri Pankov { "`O", "`\bO", 0x00d2 },
304371584c2SYuri Pankov { "`U", "`\bU", 0x00d9 },
305371584c2SYuri Pankov { "`a", "`\ba", 0x00e0 },
306371584c2SYuri Pankov { "`e", "`\be", 0x00e8 },
307371584c2SYuri Pankov { "`i", "`\bi", 0x00ec },
308371584c2SYuri Pankov { "`o", "`\bo", 0x00f2 },
309371584c2SYuri Pankov { "`u", "`\bu", 0x00f9 },
310371584c2SYuri Pankov { "~A", "~\bA", 0x00c3 },
311371584c2SYuri Pankov { "~N", "~\bN", 0x00d1 },
312371584c2SYuri Pankov { "~O", "~\bO", 0x00d5 },
313371584c2SYuri Pankov { "~a", "~\ba", 0x00e3 },
314371584c2SYuri Pankov { "~n", "~\bn", 0x00f1 },
315371584c2SYuri Pankov { "~o", "~\bo", 0x00f5 },
316371584c2SYuri Pankov { ":A", "\"\bA", 0x00c4 },
317371584c2SYuri Pankov { ":E", "\"\bE", 0x00cb },
318371584c2SYuri Pankov { ":I", "\"\bI", 0x00cf },
319371584c2SYuri Pankov { ":O", "\"\bO", 0x00d6 },
320371584c2SYuri Pankov { ":U", "\"\bU", 0x00dc },
321371584c2SYuri Pankov { ":a", "\"\ba", 0x00e4 },
322371584c2SYuri Pankov { ":e", "\"\be", 0x00eb },
323371584c2SYuri Pankov { ":i", "\"\bi", 0x00ef },
324371584c2SYuri Pankov { ":o", "\"\bo", 0x00f6 },
325371584c2SYuri Pankov { ":u", "\"\bu", 0x00fc },
326371584c2SYuri Pankov { ":y", "\"\by", 0x00ff },
327371584c2SYuri Pankov { "^A", "^\bA", 0x00c2 },
328371584c2SYuri Pankov { "^E", "^\bE", 0x00ca },
329371584c2SYuri Pankov { "^I", "^\bI", 0x00ce },
330371584c2SYuri Pankov { "^O", "^\bO", 0x00d4 },
331371584c2SYuri Pankov { "^U", "^\bU", 0x00db },
332371584c2SYuri Pankov { "^a", "^\ba", 0x00e2 },
333371584c2SYuri Pankov { "^e", "^\be", 0x00ea },
334371584c2SYuri Pankov { "^i", "^\bi", 0x00ee },
335371584c2SYuri Pankov { "^o", "^\bo", 0x00f4 },
336371584c2SYuri Pankov { "^u", "^\bu", 0x00fb },
337371584c2SYuri Pankov { ",C", ",\bC", 0x00c7 },
338371584c2SYuri Pankov { ",c", ",\bc", 0x00e7 },
339371584c2SYuri Pankov { "/L", "/\bL", 0x0141 },
340371584c2SYuri Pankov { "/l", "/\bl", 0x0142 },
341371584c2SYuri Pankov { "/O", "/\bO", 0x00d8 },
342371584c2SYuri Pankov { "/o", "/\bo", 0x00f8 },
343371584c2SYuri Pankov { "oA", "o\bA", 0x00c5 },
344371584c2SYuri Pankov { "oa", "o\ba", 0x00e5 },
345371584c2SYuri Pankov
346371584c2SYuri Pankov /* Special letters. */
3476640c13bSYuri Pankov { "-D", "Dh", 0x00d0 },
3486640c13bSYuri Pankov { "Sd", "dh", 0x00f0 },
349371584c2SYuri Pankov { "TP", "Th", 0x00de },
350371584c2SYuri Pankov { "Tp", "th", 0x00fe },
351371584c2SYuri Pankov { ".i", "i", 0x0131 },
352371584c2SYuri Pankov { ".j", "j", 0x0237 },
353371584c2SYuri Pankov
354371584c2SYuri Pankov /* Currency. */
355371584c2SYuri Pankov { "Do", "$", 0x0024 },
356371584c2SYuri Pankov { "ct", "/\bc", 0x00a2 },
357371584c2SYuri Pankov { "Eu", "EUR", 0x20ac },
358371584c2SYuri Pankov { "eu", "EUR", 0x20ac },
359371584c2SYuri Pankov { "Ye", "=\bY", 0x00a5 },
360cec8643bSMichal Nowak { "Po", "-\bL", 0x00a3 },
361371584c2SYuri Pankov { "Cs", "o\bx", 0x00a4 },
362371584c2SYuri Pankov { "Fn", ",\bf", 0x0192 },
363371584c2SYuri Pankov
364371584c2SYuri Pankov /* Units. */
3656640c13bSYuri Pankov { "de", "<degree>", 0x00b0 },
3666640c13bSYuri Pankov { "%0", "<permille>", 0x2030 },
367371584c2SYuri Pankov { "fm", "\'", 0x2032 },
368371584c2SYuri Pankov { "sd", "''", 0x2033 },
3696640c13bSYuri Pankov { "mc", "<micro>", 0x00b5 },
370c66b8046SYuri Pankov { "Of", "_\ba", 0x00aa },
371c66b8046SYuri Pankov { "Om", "_\bo", 0x00ba },
372371584c2SYuri Pankov
373371584c2SYuri Pankov /* Greek characters. */
374371584c2SYuri Pankov { "*A", "A", 0x0391 },
375371584c2SYuri Pankov { "*B", "B", 0x0392 },
3766640c13bSYuri Pankov { "*G", "<Gamma>", 0x0393 },
3776640c13bSYuri Pankov { "*D", "<Delta>", 0x0394 },
378371584c2SYuri Pankov { "*E", "E", 0x0395 },
379371584c2SYuri Pankov { "*Z", "Z", 0x0396 },
380371584c2SYuri Pankov { "*Y", "H", 0x0397 },
3816640c13bSYuri Pankov { "*H", "<Theta>", 0x0398 },
382371584c2SYuri Pankov { "*I", "I", 0x0399 },
383371584c2SYuri Pankov { "*K", "K", 0x039a },
3846640c13bSYuri Pankov { "*L", "<Lambda>", 0x039b },
385371584c2SYuri Pankov { "*M", "M", 0x039c },
386371584c2SYuri Pankov { "*N", "N", 0x039d },
3876640c13bSYuri Pankov { "*C", "<Xi>", 0x039e },
388371584c2SYuri Pankov { "*O", "O", 0x039f },
3896640c13bSYuri Pankov { "*P", "<Pi>", 0x03a0 },
390371584c2SYuri Pankov { "*R", "P", 0x03a1 },
3916640c13bSYuri Pankov { "*S", "<Sigma>", 0x03a3 },
392371584c2SYuri Pankov { "*T", "T", 0x03a4 },
393371584c2SYuri Pankov { "*U", "Y", 0x03a5 },
3946640c13bSYuri Pankov { "*F", "<Phi>", 0x03a6 },
395371584c2SYuri Pankov { "*X", "X", 0x03a7 },
3966640c13bSYuri Pankov { "*Q", "<Psi>", 0x03a8 },
3976640c13bSYuri Pankov { "*W", "<Omega>", 0x03a9 },
3986640c13bSYuri Pankov { "*a", "<alpha>", 0x03b1 },
3996640c13bSYuri Pankov { "*b", "<beta>", 0x03b2 },
4006640c13bSYuri Pankov { "*g", "<gamma>", 0x03b3 },
4016640c13bSYuri Pankov { "*d", "<delta>", 0x03b4 },
4026640c13bSYuri Pankov { "*e", "<epsilon>", 0x03b5 },
4036640c13bSYuri Pankov { "*z", "<zeta>", 0x03b6 },
4046640c13bSYuri Pankov { "*y", "<eta>", 0x03b7 },
4056640c13bSYuri Pankov { "*h", "<theta>", 0x03b8 },
4066640c13bSYuri Pankov { "*i", "<iota>", 0x03b9 },
4076640c13bSYuri Pankov { "*k", "<kappa>", 0x03ba },
4086640c13bSYuri Pankov { "*l", "<lambda>", 0x03bb },
4096640c13bSYuri Pankov { "*m", "<mu>", 0x03bc },
4106640c13bSYuri Pankov { "*n", "<nu>", 0x03bd },
4116640c13bSYuri Pankov { "*c", "<xi>", 0x03be },
412371584c2SYuri Pankov { "*o", "o", 0x03bf },
4136640c13bSYuri Pankov { "*p", "<pi>", 0x03c0 },
4146640c13bSYuri Pankov { "*r", "<rho>", 0x03c1 },
4156640c13bSYuri Pankov { "*s", "<sigma>", 0x03c3 },
4166640c13bSYuri Pankov { "*t", "<tau>", 0x03c4 },
4176640c13bSYuri Pankov { "*u", "<upsilon>", 0x03c5 },
4186640c13bSYuri Pankov { "*f", "<phi>", 0x03d5 },
4196640c13bSYuri Pankov { "*x", "<chi>", 0x03c7 },
4206640c13bSYuri Pankov { "*q", "<psi>", 0x03c8 },
4216640c13bSYuri Pankov { "*w", "<omega>", 0x03c9 },
4226640c13bSYuri Pankov { "+h", "<theta>", 0x03d1 },
4236640c13bSYuri Pankov { "+f", "<phi>", 0x03c6 },
4246640c13bSYuri Pankov { "+p", "<pi>", 0x03d6 },
4256640c13bSYuri Pankov { "+e", "<epsilon>", 0x03f5 },
4266640c13bSYuri Pankov { "ts", "<sigma>", 0x03c2 },
42795c635efSGarrett D'Amore };
42895c635efSGarrett D'Amore
429371584c2SYuri Pankov static struct ohash mchars;
43095c635efSGarrett D'Amore
431260e9a87SYuri Pankov
43295c635efSGarrett D'Amore void
mchars_free(void)433371584c2SYuri Pankov mchars_free(void)
43495c635efSGarrett D'Amore {
43595c635efSGarrett D'Amore
436371584c2SYuri Pankov ohash_delete(&mchars);
43795c635efSGarrett D'Amore }
43895c635efSGarrett D'Amore
439371584c2SYuri Pankov void
mchars_alloc(void)44095c635efSGarrett D'Amore mchars_alloc(void)
44195c635efSGarrett D'Amore {
442371584c2SYuri Pankov size_t i;
443371584c2SYuri Pankov unsigned int slot;
444371584c2SYuri Pankov
445371584c2SYuri Pankov mandoc_ohash_init(&mchars, 9, offsetof(struct ln, roffcode));
446371584c2SYuri Pankov for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++) {
447371584c2SYuri Pankov slot = ohash_qlookup(&mchars, lines[i].roffcode);
448371584c2SYuri Pankov assert(ohash_find(&mchars, slot) == NULL);
449371584c2SYuri Pankov ohash_insert(&mchars, slot, lines + i);
45095c635efSGarrett D'Amore }
45195c635efSGarrett D'Amore }
45295c635efSGarrett D'Amore
45395c635efSGarrett D'Amore int
mchars_spec2cp(const char * p,size_t sz)454371584c2SYuri Pankov mchars_spec2cp(const char *p, size_t sz)
45595c635efSGarrett D'Amore {
45695c635efSGarrett D'Amore const struct ln *ln;
457371584c2SYuri Pankov const char *end;
45895c635efSGarrett D'Amore
459371584c2SYuri Pankov end = p + sz;
460371584c2SYuri Pankov ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end));
461cec8643bSMichal Nowak return ln != NULL ? ln->unicode : -1;
46295c635efSGarrett D'Amore }
46395c635efSGarrett D'Amore
464260e9a87SYuri Pankov int
mchars_num2char(const char * p,size_t sz)46595c635efSGarrett D'Amore mchars_num2char(const char *p, size_t sz)
46695c635efSGarrett D'Amore {
467260e9a87SYuri Pankov int i;
46895c635efSGarrett D'Amore
469260e9a87SYuri Pankov i = mandoc_strntoi(p, sz, 10);
470371584c2SYuri Pankov return i >= 0 && i < 256 ? i : -1;
47195c635efSGarrett D'Amore }
47295c635efSGarrett D'Amore
47395c635efSGarrett D'Amore int
mchars_num2uc(const char * p,size_t sz)47495c635efSGarrett D'Amore mchars_num2uc(const char *p, size_t sz)
47595c635efSGarrett D'Amore {
476260e9a87SYuri Pankov int i;
47795c635efSGarrett D'Amore
478260e9a87SYuri Pankov i = mandoc_strntoi(p, sz, 16);
479260e9a87SYuri Pankov assert(i >= 0 && i <= 0x10FFFF);
480371584c2SYuri Pankov return i;
48195c635efSGarrett D'Amore }
48295c635efSGarrett D'Amore
48395c635efSGarrett D'Amore const char *
mchars_spec2str(const char * p,size_t sz,size_t * rsz)484371584c2SYuri Pankov mchars_spec2str(const char *p, size_t sz, size_t *rsz)
48595c635efSGarrett D'Amore {
48695c635efSGarrett D'Amore const struct ln *ln;
487371584c2SYuri Pankov const char *end;
48895c635efSGarrett D'Amore
489371584c2SYuri Pankov end = p + sz;
490371584c2SYuri Pankov ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end));
491cec8643bSMichal Nowak if (ln == NULL)
492cec8643bSMichal Nowak return NULL;
49395c635efSGarrett D'Amore
49495c635efSGarrett D'Amore *rsz = strlen(ln->ascii);
495371584c2SYuri Pankov return ln->ascii;
49695c635efSGarrett D'Amore }
49795c635efSGarrett D'Amore
498260e9a87SYuri Pankov const char *
mchars_uc2str(int uc)499260e9a87SYuri Pankov mchars_uc2str(int uc)
500260e9a87SYuri Pankov {
501371584c2SYuri Pankov size_t i;
502260e9a87SYuri Pankov
503371584c2SYuri Pankov for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++)
504260e9a87SYuri Pankov if (uc == lines[i].unicode)
505371584c2SYuri Pankov return lines[i].ascii;
506371584c2SYuri Pankov return "<?>";
50795c635efSGarrett D'Amore }
508