1*6a6cfa5dSJason King /*
2*6a6cfa5dSJason King  * This file and its contents are supplied under the terms of the
3*6a6cfa5dSJason King  * Common Development and Distribution License ("CDDL"), version 1.0.
4*6a6cfa5dSJason King  * You may only use this file in accordance with the terms of version
5*6a6cfa5dSJason King  * 1.0 of the CDDL.
6*6a6cfa5dSJason King  *
7*6a6cfa5dSJason King  * A full copy of the text of the CDDL should have accompanied this
8*6a6cfa5dSJason King  * source.  A copy of the CDDL is also available via the Internet at
9*6a6cfa5dSJason King  * http://www.illumos.org/license/CDDL.
10*6a6cfa5dSJason King  */
11*6a6cfa5dSJason King 
12*6a6cfa5dSJason King /*
13*6a6cfa5dSJason King  * Copyright 2019, Joyent, Inc.
14*6a6cfa5dSJason King  */
15*6a6cfa5dSJason King 
16*6a6cfa5dSJason King #include <errno.h>
17*6a6cfa5dSJason King #include <libcustr.h>
18*6a6cfa5dSJason King #include <limits.h>
19*6a6cfa5dSJason King #include <string.h>
20*6a6cfa5dSJason King #include <sys/ctype.h>	/* We want the C locale ISXXX() versions */
21*6a6cfa5dSJason King #include <sys/debug.h>
22*6a6cfa5dSJason King #include <stdio.h>
23*6a6cfa5dSJason King #include <sys/sysmacros.h>
24*6a6cfa5dSJason King 
25*6a6cfa5dSJason King #include "strview.h"
26*6a6cfa5dSJason King #include "demangle_int.h"
27*6a6cfa5dSJason King 
28*6a6cfa5dSJason King /*
29*6a6cfa5dSJason King  * Unfortunately, there is currently no official specification for the rust
30*6a6cfa5dSJason King  * name mangling.  This is an attempt to document the understanding of the
31*6a6cfa5dSJason King  * mangling used here.  It is based off examination of
32*6a6cfa5dSJason King  *     https://docs.rs/rustc-demangle/0.1.13/rustc_demangle/
33*6a6cfa5dSJason King  *
34*6a6cfa5dSJason King  * A mangled rust name is:
35*6a6cfa5dSJason King  *     <prefix> <name> <hash> E
36*6a6cfa5dSJason King  *
37*6a6cfa5dSJason King  * <prefix>	::=	_Z
38*6a6cfa5dSJason King  *			__Z
39*6a6cfa5dSJason King  *
40*6a6cfa5dSJason King  * <name>	::= <name-segment>+
41*6a6cfa5dSJason King  *
42*6a6cfa5dSJason King  * <name-segment> ::= <len> <name-chars>{len}
43*6a6cfa5dSJason King  *
44*6a6cfa5dSJason King  * <len>	::= [1-9][0-9]+
45*6a6cfa5dSJason King  *
46*6a6cfa5dSJason King  * <name-chars>	::=	<[A-Za-z]> <[A-Za-z0-9]>*
47*6a6cfa5dSJason King  *			<separator>
48*6a6cfa5dSJason King  *			<special>
49*6a6cfa5dSJason King  *
50*6a6cfa5dSJason King  * <separator>	::=	'..'	# '::'
51*6a6cfa5dSJason King  *
52*6a6cfa5dSJason King  * <special>	::=	$SP$	# ' '
53*6a6cfa5dSJason King  *			$BP$	# '*'
54*6a6cfa5dSJason King  *			$RF$	# '&'
55*6a6cfa5dSJason King  *			$LT$	# '<'
56*6a6cfa5dSJason King  *			$GT$	# '>'
57*6a6cfa5dSJason King  *			$LP$	# '('
58*6a6cfa5dSJason King  *			$RP$	# ')'
59*6a6cfa5dSJason King  *			$C$	# ','
60*6a6cfa5dSJason King  *			$u7e$	# '~'
61*6a6cfa5dSJason King  *			$u20$	# ' '
62*6a6cfa5dSJason King  *			$u27$	# '\''
63*6a6cfa5dSJason King  *			$u3d$	# '='
64*6a6cfa5dSJason King  *			$u5b$	# '['
65*6a6cfa5dSJason King  *			$u5d$	# ']'
66*6a6cfa5dSJason King  *			$u7b$	# '{'
67*6a6cfa5dSJason King  *			$u7d$	# '}'
68*6a6cfa5dSJason King  *			$u3b$	# ';'
69*6a6cfa5dSJason King  *			$u2b$	# '+'
70*6a6cfa5dSJason King  *			$u22$	# '"'
71*6a6cfa5dSJason King  *
72*6a6cfa5dSJason King  * <hash>	:= <len> h <hex-digits>+
73*6a6cfa5dSJason King  *
74*6a6cfa5dSJason King  * <hex-digits>	:= <[0-9a-f]>
75*6a6cfa5dSJason King  */
76*6a6cfa5dSJason King 
77*6a6cfa5dSJason King typedef struct rustdem_state {
78*6a6cfa5dSJason King 	const char	*rds_str;
79*6a6cfa5dSJason King 	custr_t		*rds_demangled;
80*6a6cfa5dSJason King 	sysdem_ops_t	*rds_ops;
81*6a6cfa5dSJason King 	int		rds_error;
82*6a6cfa5dSJason King } rustdem_state_t;
83*6a6cfa5dSJason King 
84*6a6cfa5dSJason King static const struct rust_charmap {
85*6a6cfa5dSJason King 	const char	*ruc_seq;
86*6a6cfa5dSJason King 	char		ruc_ch;
87*6a6cfa5dSJason King } rust_charmap[] = {
88*6a6cfa5dSJason King 	{ "$SP$", '@' },
89*6a6cfa5dSJason King 	{ "$BP$", '*' },
90*6a6cfa5dSJason King 	{ "$RF$", '&' },
91*6a6cfa5dSJason King 	{ "$LT$", '<' },
92*6a6cfa5dSJason King 	{ "$GT$", '>' },
93*6a6cfa5dSJason King 	{ "$LP$", '(' },
94*6a6cfa5dSJason King 	{ "$RP$", ')' },
95*6a6cfa5dSJason King 	{ "$C$", ',' },
96*6a6cfa5dSJason King 	{ "$u7e$", '~' },
97*6a6cfa5dSJason King 	{ "$u20$", ' ' },
98*6a6cfa5dSJason King 	{ "$u27$", '\'' },
99*6a6cfa5dSJason King 	{ "$u3d$", '=' },
100*6a6cfa5dSJason King 	{ "$u5b$", '[' },
101*6a6cfa5dSJason King 	{ "$u5d$", ']' },
102*6a6cfa5dSJason King 	{ "$u7b$", '{' },
103*6a6cfa5dSJason King 	{ "$u7d$", '}' },
104*6a6cfa5dSJason King 	{ "$u3b$", ';' },
105*6a6cfa5dSJason King 	{ "$u2b$", '+' },
106*6a6cfa5dSJason King 	{ "$u22$", '"' }
107*6a6cfa5dSJason King };
108*6a6cfa5dSJason King static const size_t rust_charmap_sz = ARRAY_SIZE(rust_charmap);
109*6a6cfa5dSJason King 
110*6a6cfa5dSJason King static void *rustdem_alloc(custr_alloc_t *, size_t);
111*6a6cfa5dSJason King static void rustdem_free(custr_alloc_t *, void *, size_t);
112*6a6cfa5dSJason King 
113*6a6cfa5dSJason King static boolean_t rustdem_append_c(rustdem_state_t *, char);
114*6a6cfa5dSJason King static boolean_t rustdem_all_ascii(const strview_t *);
115*6a6cfa5dSJason King 
116*6a6cfa5dSJason King static boolean_t rustdem_parse_prefix(rustdem_state_t *, strview_t *);
117*6a6cfa5dSJason King static boolean_t rustdem_parse_name(rustdem_state_t *, strview_t *);
118*6a6cfa5dSJason King static boolean_t rustdem_parse_hash(rustdem_state_t *, strview_t *);
119*6a6cfa5dSJason King static boolean_t rustdem_parse_num(rustdem_state_t *, strview_t *, uint64_t *);
120*6a6cfa5dSJason King static boolean_t rustdem_parse_special(rustdem_state_t *, strview_t *);
121*6a6cfa5dSJason King static boolean_t rustdem_add_sep(rustdem_state_t *);
122*6a6cfa5dSJason King 
123*6a6cfa5dSJason King char *
124*6a6cfa5dSJason King rust_demangle(const char *s, size_t slen, sysdem_ops_t *ops)
125*6a6cfa5dSJason King {
126*6a6cfa5dSJason King 	rustdem_state_t st = {
127*6a6cfa5dSJason King 		.rds_str = s,
128*6a6cfa5dSJason King 		.rds_ops = ops,
129*6a6cfa5dSJason King 	};
130*6a6cfa5dSJason King 	custr_alloc_ops_t custr_ops = {
131*6a6cfa5dSJason King 		.custr_ao_alloc = rustdem_alloc,
132*6a6cfa5dSJason King 		.custr_ao_free = rustdem_free
133*6a6cfa5dSJason King 	};
134*6a6cfa5dSJason King 	custr_alloc_t custr_alloc = {
135*6a6cfa5dSJason King 		.cua_version = CUSTR_VERSION
136*6a6cfa5dSJason King 	};
137*6a6cfa5dSJason King 	strview_t sv;
138*6a6cfa5dSJason King 	int ret;
139*6a6cfa5dSJason King 
140*6a6cfa5dSJason King 	if (custr_alloc_init(&custr_alloc, &custr_ops) != 0)
141*6a6cfa5dSJason King 		return (NULL);
142*6a6cfa5dSJason King 	custr_alloc.cua_arg = &st;
143*6a6cfa5dSJason King 
144*6a6cfa5dSJason King 	sv_init_str(&sv, s, s + slen);
145*6a6cfa5dSJason King 
146*6a6cfa5dSJason King 	if (sv_remaining(&sv) < 1 || sv_peek(&sv, -1) != 'E') {
147*6a6cfa5dSJason King 		DEMDEBUG("ERROR: string is either too small or does not end "
148*6a6cfa5dSJason King 		    "with 'E'");
149*6a6cfa5dSJason King 		errno = EINVAL;
150*6a6cfa5dSJason King 		return (NULL);
151*6a6cfa5dSJason King 	}
152*6a6cfa5dSJason King 
153*6a6cfa5dSJason King 	if (!rustdem_parse_prefix(&st, &sv)) {
154*6a6cfa5dSJason King 		DEMDEBUG("ERROR: could not parse prefix");
155*6a6cfa5dSJason King 		errno = EINVAL;
156*6a6cfa5dSJason King 		return (NULL);
157*6a6cfa5dSJason King 	}
158*6a6cfa5dSJason King 	DEMDEBUG("parsed prefix; remaining='%.*s'", SV_PRINT(&sv));
159*6a6cfa5dSJason King 
160*6a6cfa5dSJason King 	if (!rustdem_all_ascii(&sv)) {
161*6a6cfa5dSJason King 		/* rustdem_all_ascii() provides debug output */
162*6a6cfa5dSJason King 		errno = EINVAL;
163*6a6cfa5dSJason King 		return (NULL);
164*6a6cfa5dSJason King 	}
165*6a6cfa5dSJason King 
166*6a6cfa5dSJason King 	if ((ret = custr_xalloc(&st.rds_demangled, &custr_alloc)) != 0)
167*6a6cfa5dSJason King 		return (NULL);
168*6a6cfa5dSJason King 
169*6a6cfa5dSJason King 	while (sv_remaining(&sv) > 1) {
170*6a6cfa5dSJason King 		if (rustdem_parse_name(&st, &sv))
171*6a6cfa5dSJason King 			continue;
172*6a6cfa5dSJason King 		if (st.rds_error != 0)
173*6a6cfa5dSJason King 			goto fail;
174*6a6cfa5dSJason King 	}
175*6a6cfa5dSJason King 
176*6a6cfa5dSJason King 	if (st.rds_error != 0 || !sv_consume_if_c(&sv, 'E'))
177*6a6cfa5dSJason King 		goto fail;
178*6a6cfa5dSJason King 
179*6a6cfa5dSJason King 	char *res = xstrdup(ops, custr_cstr(st.rds_demangled));
180*6a6cfa5dSJason King 	if (res == NULL) {
181*6a6cfa5dSJason King 		st.rds_error = errno;
182*6a6cfa5dSJason King 		goto fail;
183*6a6cfa5dSJason King 	}
184*6a6cfa5dSJason King 
185*6a6cfa5dSJason King 	custr_free(st.rds_demangled);
186*6a6cfa5dSJason King 	DEMDEBUG("result = '%s'", res);
187*6a6cfa5dSJason King 	return (res);
188*6a6cfa5dSJason King 
189*6a6cfa5dSJason King fail:
190*6a6cfa5dSJason King 	custr_free(st.rds_demangled);
191*6a6cfa5dSJason King 	errno = st.rds_error;
192*6a6cfa5dSJason King 	return (NULL);
193*6a6cfa5dSJason King }
194*6a6cfa5dSJason King 
195*6a6cfa5dSJason King static boolean_t
196*6a6cfa5dSJason King rustdem_parse_prefix(rustdem_state_t *st, strview_t *svp)
197*6a6cfa5dSJason King {
198*6a6cfa5dSJason King 	strview_t pfx;
199*6a6cfa5dSJason King 
200*6a6cfa5dSJason King 	sv_init_sv(&pfx, svp);
201*6a6cfa5dSJason King 
202*6a6cfa5dSJason King 	DEMDEBUG("checking for '_ZN' or '__ZN' in '%.*s'", SV_PRINT(&pfx));
203*6a6cfa5dSJason King 
204*6a6cfa5dSJason King 	if (st->rds_error != 0)
205*6a6cfa5dSJason King 		return (B_FALSE);
206*6a6cfa5dSJason King 
207*6a6cfa5dSJason King 	if (!sv_consume_if_c(&pfx, '_'))
208*6a6cfa5dSJason King 		return (B_FALSE);
209*6a6cfa5dSJason King 
210*6a6cfa5dSJason King 	(void) sv_consume_if_c(&pfx, '_');
211*6a6cfa5dSJason King 
212*6a6cfa5dSJason King 	if (!sv_consume_if_c(&pfx, 'Z') || !sv_consume_if_c(&pfx, 'N'))
213*6a6cfa5dSJason King 		return (B_FALSE);
214*6a6cfa5dSJason King 
215*6a6cfa5dSJason King 	/* Update svp with new position */
216*6a6cfa5dSJason King 	sv_init_sv(svp, &pfx);
217*6a6cfa5dSJason King 	return (B_TRUE);
218*6a6cfa5dSJason King }
219*6a6cfa5dSJason King 
220*6a6cfa5dSJason King static boolean_t
221*6a6cfa5dSJason King rustdem_parse_name_segment(rustdem_state_t *st, strview_t *svp, boolean_t first)
222*6a6cfa5dSJason King {
223*6a6cfa5dSJason King 	strview_t sv;
224*6a6cfa5dSJason King 	strview_t name;
225*6a6cfa5dSJason King 	uint64_t len;
226*6a6cfa5dSJason King 	size_t rem;
227*6a6cfa5dSJason King 	boolean_t last = B_FALSE;
228*6a6cfa5dSJason King 
229*6a6cfa5dSJason King 	if (st->rds_error != 0 || sv_remaining(svp) == 0)
230*6a6cfa5dSJason King 		return (B_FALSE);
231*6a6cfa5dSJason King 
232*6a6cfa5dSJason King 	sv_init_sv(&sv, svp);
233*6a6cfa5dSJason King 
234*6a6cfa5dSJason King 	if (!rustdem_parse_num(st, &sv, &len)) {
235*6a6cfa5dSJason King 		DEMDEBUG("ERROR: no leading length");
236*6a6cfa5dSJason King 		st->rds_error = EINVAL;
237*6a6cfa5dSJason King 		return (B_FALSE);
238*6a6cfa5dSJason King 	}
239*6a6cfa5dSJason King 
240*6a6cfa5dSJason King 	rem = sv_remaining(&sv);
241*6a6cfa5dSJason King 
242*6a6cfa5dSJason King 	if (rem < len || len > SIZE_MAX) {
243*6a6cfa5dSJason King 		st->rds_error = EINVAL;
244*6a6cfa5dSJason King 		return (B_FALSE);
245*6a6cfa5dSJason King 	}
246*6a6cfa5dSJason King 
247*6a6cfa5dSJason King 	/* Is this the last segment before the terminating E? */
248*6a6cfa5dSJason King 	if (rem == len + 1) {
249*6a6cfa5dSJason King 		VERIFY3U(sv_peek(&sv, -1), ==, 'E');
250*6a6cfa5dSJason King 		last = B_TRUE;
251*6a6cfa5dSJason King 	}
252*6a6cfa5dSJason King 
253*6a6cfa5dSJason King 	if (!first && !rustdem_add_sep(st))
254*6a6cfa5dSJason King 		return (B_FALSE);
255*6a6cfa5dSJason King 
256*6a6cfa5dSJason King 	/* Reduce length of seg to the length we parsed */
257*6a6cfa5dSJason King 	(void) sv_init_sv_range(&name, &sv, len);
258*6a6cfa5dSJason King 
259*6a6cfa5dSJason King 	DEMDEBUG("%s: segment='%.*s'", __func__, SV_PRINT(&name));
260*6a6cfa5dSJason King 
261*6a6cfa5dSJason King 	/*
262*6a6cfa5dSJason King 	 * A rust hash starts with 'h', and is the last component of a name
263*6a6cfa5dSJason King 	 * before the terminating 'E'
264*6a6cfa5dSJason King 	 */
265*6a6cfa5dSJason King 	if (sv_peek(&name, 0) == 'h' && last) {
266*6a6cfa5dSJason King 		if (!rustdem_parse_hash(st, &name))
267*6a6cfa5dSJason King 			return (B_FALSE);
268*6a6cfa5dSJason King 		goto done;
269*6a6cfa5dSJason King 	}
270*6a6cfa5dSJason King 
271*6a6cfa5dSJason King 	while (sv_remaining(&name) > 0) {
272*6a6cfa5dSJason King 		switch (sv_peek(&name, 0)) {
273*6a6cfa5dSJason King 		case '$':
274*6a6cfa5dSJason King 			if (rustdem_parse_special(st, &name))
275*6a6cfa5dSJason King 				continue;
276*6a6cfa5dSJason King 			break;
277*6a6cfa5dSJason King 		case '_':
278*6a6cfa5dSJason King 			if (sv_peek(&name, 1) == '$') {
279*6a6cfa5dSJason King 				/*
280*6a6cfa5dSJason King 				 * Only consume/ignore '_'.  Leave
281*6a6cfa5dSJason King 				 * $ for next round.
282*6a6cfa5dSJason King 				 */
283*6a6cfa5dSJason King 				sv_consume_n(&name, 1);
284*6a6cfa5dSJason King 				continue;
285*6a6cfa5dSJason King 			}
286*6a6cfa5dSJason King 			break;
287*6a6cfa5dSJason King 		case '.':
288*6a6cfa5dSJason King 			/* Convert '..' to '::' */
289*6a6cfa5dSJason King 			if (sv_peek(&name, 1) != '.')
290*6a6cfa5dSJason King 				break;
291*6a6cfa5dSJason King 
292*6a6cfa5dSJason King 			if (!rustdem_add_sep(st))
293*6a6cfa5dSJason King 				return (B_FALSE);
294*6a6cfa5dSJason King 
295*6a6cfa5dSJason King 			sv_consume_n(&name, 2);
296*6a6cfa5dSJason King 			continue;
297*6a6cfa5dSJason King 		default:
298*6a6cfa5dSJason King 			break;
299*6a6cfa5dSJason King 		}
300*6a6cfa5dSJason King 
301*6a6cfa5dSJason King 		if (custr_appendc(st->rds_demangled,
302*6a6cfa5dSJason King 		    sv_consume_c(&name)) != 0) {
303*6a6cfa5dSJason King 			st->rds_error = ENOMEM;
304*6a6cfa5dSJason King 			return (B_FALSE);
305*6a6cfa5dSJason King 		}
306*6a6cfa5dSJason King 	}
307*6a6cfa5dSJason King 
308*6a6cfa5dSJason King done:
309*6a6cfa5dSJason King 	DEMDEBUG("%s: consumed '%.*s'", __func__, (int)len, svp->sv_first);
310*6a6cfa5dSJason King 	sv_consume_n(&sv, len);
311*6a6cfa5dSJason King 	sv_init_sv(svp, &sv);
312*6a6cfa5dSJason King 	return (B_TRUE);
313*6a6cfa5dSJason King }
314*6a6cfa5dSJason King 
315*6a6cfa5dSJason King static boolean_t
316*6a6cfa5dSJason King rustdem_parse_name(rustdem_state_t *st, strview_t *svp)
317*6a6cfa5dSJason King {
318*6a6cfa5dSJason King 	strview_t name;
319*6a6cfa5dSJason King 	boolean_t first = B_TRUE;
320*6a6cfa5dSJason King 
321*6a6cfa5dSJason King 	if (st->rds_error != 0)
322*6a6cfa5dSJason King 		return (B_FALSE);
323*6a6cfa5dSJason King 
324*6a6cfa5dSJason King 	sv_init_sv(&name, svp);
325*6a6cfa5dSJason King 
326*6a6cfa5dSJason King 	if (sv_remaining(&name) == 0)
327*6a6cfa5dSJason King 		return (B_FALSE);
328*6a6cfa5dSJason King 
329*6a6cfa5dSJason King 	while (sv_remaining(&name) > 0 && sv_peek(&name, 0) != 'E') {
330*6a6cfa5dSJason King 		if (!rustdem_parse_name_segment(st, &name, first))
331*6a6cfa5dSJason King 			return (B_FALSE);
332*6a6cfa5dSJason King 		first = B_FALSE;
333*6a6cfa5dSJason King 	}
334*6a6cfa5dSJason King 
335*6a6cfa5dSJason King 	sv_init_sv(svp, &name);
336*6a6cfa5dSJason King 	return (B_TRUE);
337*6a6cfa5dSJason King }
338*6a6cfa5dSJason King 
339*6a6cfa5dSJason King static boolean_t
340*6a6cfa5dSJason King rustdem_parse_hash(rustdem_state_t *st, strview_t *svp)
341*6a6cfa5dSJason King {
342*6a6cfa5dSJason King 	strview_t sv;
343*6a6cfa5dSJason King 
344*6a6cfa5dSJason King 	sv_init_sv(&sv, svp);
345*6a6cfa5dSJason King 
346*6a6cfa5dSJason King 	VERIFY(sv_consume_if_c(&sv, 'h'));
347*6a6cfa5dSJason King 	if (!rustdem_append_c(st, 'h'))
348*6a6cfa5dSJason King 		return (B_FALSE);
349*6a6cfa5dSJason King 
350*6a6cfa5dSJason King 	while (sv_remaining(&sv) > 0) {
351*6a6cfa5dSJason King 		char c = sv_consume_c(&sv);
352*6a6cfa5dSJason King 
353*6a6cfa5dSJason King 		switch (c) {
354*6a6cfa5dSJason King 		/*
355*6a6cfa5dSJason King 		 * The upper-case hex digits (A-F) are excluded as valid
356*6a6cfa5dSJason King 		 * hash values for several reasons:
357*6a6cfa5dSJason King 		 *
358*6a6cfa5dSJason King 		 * 1. It would result in two different possible names for
359*6a6cfa5dSJason King 		 * the same function, leading to ambiguity in linking (among
360*6a6cfa5dSJason King 		 * other things).
361*6a6cfa5dSJason King 		 *
362*6a6cfa5dSJason King 		 * 2. It would cause potential ambiguity in parsing -- is a
363*6a6cfa5dSJason King 		 * trailing 'E' part of the hash, or the terminating character
364*6a6cfa5dSJason King 		 * in the mangled name?
365*6a6cfa5dSJason King 		 *
366*6a6cfa5dSJason King 		 * 3. No examples were able to be found in the wild where
367*6a6cfa5dSJason King 		 * uppercase digits are used, and other rust demanglers all
368*6a6cfa5dSJason King 		 * seem to assume the hash must contain lower-case hex digits.
369*6a6cfa5dSJason King 		 */
370*6a6cfa5dSJason King 		case '0': case '1': case '2': case '3':
371*6a6cfa5dSJason King 		case '4': case '5': case '6': case '7':
372*6a6cfa5dSJason King 		case '8': case '9': case 'a': case 'b':
373*6a6cfa5dSJason King 		case 'c': case 'd': case 'e': case 'f':
374*6a6cfa5dSJason King 			if (!rustdem_append_c(st, c))
375*6a6cfa5dSJason King 				return (B_FALSE);
376*6a6cfa5dSJason King 			break;
377*6a6cfa5dSJason King 		default:
378*6a6cfa5dSJason King 			return (B_FALSE);
379*6a6cfa5dSJason King 		}
380*6a6cfa5dSJason King 	}
381*6a6cfa5dSJason King 
382*6a6cfa5dSJason King 	sv_init_sv(svp, &sv);
383*6a6cfa5dSJason King 	return (B_TRUE);
384*6a6cfa5dSJason King }
385*6a6cfa5dSJason King 
386*6a6cfa5dSJason King /*
387*6a6cfa5dSJason King  * A 10 digit value would imply a name 1Gb or larger in size.  It seems
388*6a6cfa5dSJason King  * unlikely to the point of absurdity any such value could every possibly
389*6a6cfa5dSJason King  * be valid (or even have compiled properly).  This also prevents the
390*6a6cfa5dSJason King  * uint64_t conversion from possibly overflowing since the value must always
391*6a6cfa5dSJason King  * be below 10 * UINT32_MAX.
392*6a6cfa5dSJason King  */
393*6a6cfa5dSJason King #define	MAX_DIGITS 10
394*6a6cfa5dSJason King 
395*6a6cfa5dSJason King static boolean_t
396*6a6cfa5dSJason King rustdem_parse_num(rustdem_state_t *restrict st, strview_t *restrict svp,
397*6a6cfa5dSJason King     uint64_t *restrict valp)
398*6a6cfa5dSJason King {
399*6a6cfa5dSJason King 	strview_t snum;
400*6a6cfa5dSJason King 	uint64_t v = 0;
401*6a6cfa5dSJason King 	size_t ndigits = 0;
402*6a6cfa5dSJason King 	char c;
403*6a6cfa5dSJason King 
404*6a6cfa5dSJason King 	if (st->rds_error != 0)
405*6a6cfa5dSJason King 		return (B_FALSE);
406*6a6cfa5dSJason King 
407*6a6cfa5dSJason King 	sv_init_sv(&snum, svp);
408*6a6cfa5dSJason King 
409*6a6cfa5dSJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(&snum));
410*6a6cfa5dSJason King 
411*6a6cfa5dSJason King 	c = sv_peek(&snum, 0);
412*6a6cfa5dSJason King 	if (!ISDIGIT(c)) {
413*6a6cfa5dSJason King 		DEMDEBUG("%s: ERROR no digits in str\n", __func__);
414*6a6cfa5dSJason King 		st->rds_error = EINVAL;
415*6a6cfa5dSJason King 		return (B_FALSE);
416*6a6cfa5dSJason King 	}
417*6a6cfa5dSJason King 
418*6a6cfa5dSJason King 	/*
419*6a6cfa5dSJason King 	 * Since there is currently no official specification on rust name
420*6a6cfa5dSJason King 	 * mangling, only that it has been stated that rust follows what
421*6a6cfa5dSJason King 	 * C++ mangling does.  In the Itanium C++ ABI (what practically
422*6a6cfa5dSJason King 	 * every non-Windows C++ implementation uses these days), it
423*6a6cfa5dSJason King 	 * explicitly disallows leading 0s in numeric values (except for
424*6a6cfa5dSJason King 	 * substition and template indexes, which aren't relevant here).
425*6a6cfa5dSJason King 	 * We enforce the same restriction -- if a rust implementation allowed
426*6a6cfa5dSJason King 	 * leading zeros in numbers (basically segment lengths) it'd
427*6a6cfa5dSJason King 	 * cause all sorts of ambiguity problems with names that likely lead
428*6a6cfa5dSJason King 	 * to much bigger problems with linking and such, so this seems
429*6a6cfa5dSJason King 	 * reasonable.
430*6a6cfa5dSJason King 	 */
431*6a6cfa5dSJason King 	if (c == '0') {
432*6a6cfa5dSJason King 		DEMDEBUG("%s: ERROR number starts with leading 0\n", __func__);
433*6a6cfa5dSJason King 		st->rds_error = EINVAL;
434*6a6cfa5dSJason King 		return (B_FALSE);
435*6a6cfa5dSJason King 	}
436*6a6cfa5dSJason King 
437*6a6cfa5dSJason King 	while (sv_remaining(&snum) > 0 && ndigits <= MAX_DIGITS) {
438*6a6cfa5dSJason King 		c = sv_consume_c(&snum);
439*6a6cfa5dSJason King 
440*6a6cfa5dSJason King 		if (!ISDIGIT(c))
441*6a6cfa5dSJason King 			break;
442*6a6cfa5dSJason King 
443*6a6cfa5dSJason King 		v *= 10;
444*6a6cfa5dSJason King 		v += c - '0';
445*6a6cfa5dSJason King 		ndigits++;
446*6a6cfa5dSJason King 	}
447*6a6cfa5dSJason King 
448*6a6cfa5dSJason King 	if (ndigits > MAX_DIGITS) {
449*6a6cfa5dSJason King 		DEMDEBUG("%s: value %llu is too large\n", __func__, v);
450*6a6cfa5dSJason King 		st->rds_error = ERANGE;
451*6a6cfa5dSJason King 		return (B_FALSE);
452*6a6cfa5dSJason King 	}
453*6a6cfa5dSJason King 
454*6a6cfa5dSJason King 	DEMDEBUG("%s: num=%llu", __func__, v);
455*6a6cfa5dSJason King 
456*6a6cfa5dSJason King 	*valp = v;
457*6a6cfa5dSJason King 	sv_consume_n(svp, ndigits);
458*6a6cfa5dSJason King 	return (B_TRUE);
459*6a6cfa5dSJason King }
460*6a6cfa5dSJason King 
461*6a6cfa5dSJason King static boolean_t
462*6a6cfa5dSJason King rustdem_parse_special(rustdem_state_t *restrict st, strview_t *restrict svp)
463*6a6cfa5dSJason King {
464*6a6cfa5dSJason King 	if (st->rds_error != 0)
465*6a6cfa5dSJason King 		return (B_FALSE);
466*6a6cfa5dSJason King 
467*6a6cfa5dSJason King 	if (sv_peek(svp, 0) != '$')
468*6a6cfa5dSJason King 		return (B_FALSE);
469*6a6cfa5dSJason King 
470*6a6cfa5dSJason King 	for (size_t i = 0; i < rust_charmap_sz; i++) {
471*6a6cfa5dSJason King 		if (sv_consume_if(svp, rust_charmap[i].ruc_seq)) {
472*6a6cfa5dSJason King 			if (!rustdem_append_c(st, rust_charmap[i].ruc_ch))
473*6a6cfa5dSJason King 				return (B_FALSE);
474*6a6cfa5dSJason King 			return (B_TRUE);
475*6a6cfa5dSJason King 		}
476*6a6cfa5dSJason King 	}
477*6a6cfa5dSJason King 	return (B_FALSE);
478*6a6cfa5dSJason King }
479*6a6cfa5dSJason King 
480*6a6cfa5dSJason King static boolean_t
481*6a6cfa5dSJason King rustdem_add_sep(rustdem_state_t *st)
482*6a6cfa5dSJason King {
483*6a6cfa5dSJason King 	if (st->rds_error != 0)
484*6a6cfa5dSJason King 		return (B_FALSE);
485*6a6cfa5dSJason King 
486*6a6cfa5dSJason King 	if (!rustdem_append_c(st, ':') ||
487*6a6cfa5dSJason King 	    !rustdem_append_c(st, ':'))
488*6a6cfa5dSJason King 		return (B_FALSE);
489*6a6cfa5dSJason King 
490*6a6cfa5dSJason King 	return (B_TRUE);
491*6a6cfa5dSJason King }
492*6a6cfa5dSJason King 
493*6a6cfa5dSJason King static boolean_t
494*6a6cfa5dSJason King rustdem_append_c(rustdem_state_t *st, char c)
495*6a6cfa5dSJason King {
496*6a6cfa5dSJason King 	if (st->rds_error != 0)
497*6a6cfa5dSJason King 		return (B_FALSE);
498*6a6cfa5dSJason King 
499*6a6cfa5dSJason King 	if (custr_appendc(st->rds_demangled, c) == 0)
500*6a6cfa5dSJason King 		return (B_TRUE);
501*6a6cfa5dSJason King 
502*6a6cfa5dSJason King 	st->rds_error = errno;
503*6a6cfa5dSJason King 	return (B_FALSE);
504*6a6cfa5dSJason King }
505*6a6cfa5dSJason King 
506*6a6cfa5dSJason King static boolean_t
507*6a6cfa5dSJason King rustdem_all_ascii(const strview_t *svp)
508*6a6cfa5dSJason King {
509*6a6cfa5dSJason King 	strview_t p;
510*6a6cfa5dSJason King 
511*6a6cfa5dSJason King 	sv_init_sv(&p, svp);
512*6a6cfa5dSJason King 
513*6a6cfa5dSJason King 	while (sv_remaining(&p) > 0) {
514*6a6cfa5dSJason King 		char c = sv_consume_c(&p);
515*6a6cfa5dSJason King 
516*6a6cfa5dSJason King 		/*
517*6a6cfa5dSJason King 		 * #including <sys/ctype.h> conflicts with <ctype.h>.  Since
518*6a6cfa5dSJason King 		 * we want the C locale macros (ISDIGIT, etc), it also means
519*6a6cfa5dSJason King 		 * we can't use isascii(3C).
520*6a6cfa5dSJason King 		 */
521*6a6cfa5dSJason King 		if ((c & 0x80) != 0) {
522*6a6cfa5dSJason King 			DEMDEBUG("%s: found non-ascii character 0x%02hhx at "
523*6a6cfa5dSJason King 			    "offset %tu", __func__, c,
524*6a6cfa5dSJason King 			    (ptrdiff_t)(p.sv_first - svp->sv_first));
525*6a6cfa5dSJason King 			return (B_FALSE);
526*6a6cfa5dSJason King 		}
527*6a6cfa5dSJason King 	}
528*6a6cfa5dSJason King 	return (B_TRUE);
529*6a6cfa5dSJason King }
530*6a6cfa5dSJason King 
531*6a6cfa5dSJason King static void *
532*6a6cfa5dSJason King rustdem_alloc(custr_alloc_t *cao, size_t len)
533*6a6cfa5dSJason King {
534*6a6cfa5dSJason King 	rustdem_state_t *st = cao->cua_arg;
535*6a6cfa5dSJason King 	return (zalloc(st->rds_ops, len));
536*6a6cfa5dSJason King }
537*6a6cfa5dSJason King 
538*6a6cfa5dSJason King static void
539*6a6cfa5dSJason King rustdem_free(custr_alloc_t *cao, void *p, size_t len)
540*6a6cfa5dSJason King {
541*6a6cfa5dSJason King 	rustdem_state_t *st = cao->cua_arg;
542*6a6cfa5dSJason King 	xfree(st->rds_ops, p, len);
543*6a6cfa5dSJason King }
544