1*1cd08393SJason King /*
2*1cd08393SJason King  * This file and its contents are supplied under the terms of the
3*1cd08393SJason King  * Common Development and Distribution License ("CDDL"), version 1.0.
4*1cd08393SJason King  * You may only use this file in accordance with the terms of version
5*1cd08393SJason King  * 1.0 of the CDDL.
6*1cd08393SJason King  *
7*1cd08393SJason King  * A full copy of the text of the CDDL should have accompanied this
8*1cd08393SJason King  * source.  A copy of the CDDL is also available via the Internet at
9*1cd08393SJason King  * http://www.illumos.org/license/CDDL.
10*1cd08393SJason King  */
11*1cd08393SJason King 
12*1cd08393SJason King /*
13*1cd08393SJason King  * Copyright 2019 Joyent, Inc.
14*1cd08393SJason King  * Copyright 2021 Jason King
15*1cd08393SJason King  */
16*1cd08393SJason King 
17*1cd08393SJason King /* BEGIN CSTYLED */
18*1cd08393SJason King 
19*1cd08393SJason King /*
20*1cd08393SJason King  * This implements the 'symbol_name_mangling_v2' demangling for rust as
21*1cd08393SJason King  * described in Rust RFC 2603 as opposed to the original (now called
22*1cd08393SJason King  * legacy) mangling older versions of rust used (implemented in rust.c).
23*1cd08393SJason King  *
24*1cd08393SJason King  * The specification can be viewed at:
25*1cd08393SJason King  *     https://github.com/rust-lang/rfcs/blob/master/text/2603-rust-symbol-name-mangling-v0.md
26*1cd08393SJason King  */
27*1cd08393SJason King 
28*1cd08393SJason King /* END CSTYLED */
29*1cd08393SJason King 
30*1cd08393SJason King #include <errno.h>
31*1cd08393SJason King #include <libcustr.h>
32*1cd08393SJason King #include <stdarg.h>
33*1cd08393SJason King #include <stdio.h>
34*1cd08393SJason King #include <stdlib.h>
35*1cd08393SJason King #include <string.h>
36*1cd08393SJason King 
37*1cd08393SJason King #include "rust.h"
38*1cd08393SJason King 
39*1cd08393SJason King /*
40*1cd08393SJason King  * Help track amount of additional output added to rs_demangled across
41*1cd08393SJason King  * a function call (to allow that portion to be output for debugging)
42*1cd08393SJason King  */
43*1cd08393SJason King #define	SAVE_LEN(_st, _len) _len = custr_len((_st)->rs_demangled)
44*1cd08393SJason King #define	CSTR_END(_st, _len)					\
45*1cd08393SJason King 	((int)(custr_len((_st)->rs_demangled) - (_len))),	\
46*1cd08393SJason King 	custr_cstr((_st)->rs_demangled) + (_len)
47*1cd08393SJason King 
48*1cd08393SJason King typedef enum const_type_class {
49*1cd08393SJason King 	CTC_INVALID = -1,
50*1cd08393SJason King 	CTC_UNSIGNED,
51*1cd08393SJason King 	CTC_SIGNED,
52*1cd08393SJason King 	CTC_CHAR,
53*1cd08393SJason King 	CTC_BOOL,
54*1cd08393SJason King } const_type_class_t;
55*1cd08393SJason King 
56*1cd08393SJason King /*
57*1cd08393SJason King  * Sometimes, parsing something is optional.  In this case a failure to
58*1cd08393SJason King  * parse is fine, however we still want to consider a fatal error as
59*1cd08393SJason King  * failure.
60*1cd08393SJason King  */
61*1cd08393SJason King #define	OPTIONAL(_st, _f) ((_f) || !HAS_ERROR(_st))
62*1cd08393SJason King 
63*1cd08393SJason King static boolean_t rustv0_valid_sym(const strview_t *);
64*1cd08393SJason King static const_type_class_t rustv0_classify_const_type(char);
65*1cd08393SJason King static boolean_t rustv0_parse_hex_num(rust_state_t *restrict,
66*1cd08393SJason King     strview_t *restrict, uint64_t *restrict);
67*1cd08393SJason King static boolean_t rustv0_parse_base62(rust_state_t *restrict,
68*1cd08393SJason King     strview_t *restrict, uint64_t *restrict);
69*1cd08393SJason King 
70*1cd08393SJason King static boolean_t rustv0_parse_undisambiguated_identifier(
71*1cd08393SJason King     rust_state_t *restrict, strview_t *restrict, boolean_t);
72*1cd08393SJason King static boolean_t rustv0_parse_disambiguator(rust_state_t *restrict,
73*1cd08393SJason King     strview_t *restrict, uint64_t *restrict);
74*1cd08393SJason King 
75*1cd08393SJason King static boolean_t rustv0_parse_path(rust_state_t *restrict, strview_t *restrict,
76*1cd08393SJason King     boolean_t);
77*1cd08393SJason King static boolean_t rustv0_parse_impl_path(rust_state_t *restrict,
78*1cd08393SJason King     strview_t *restrict, boolean_t);
79*1cd08393SJason King static boolean_t rustv0_parse_nested_path(rust_state_t *restrict,
80*1cd08393SJason King     strview_t *restrict, boolean_t);
81*1cd08393SJason King static boolean_t rustv0_parse_basic_type(rust_state_t *restrict,
82*1cd08393SJason King     strview_t *restrict);
83*1cd08393SJason King static boolean_t rustv0_parse_backref(rust_state_t *restrict,
84*1cd08393SJason King     strview_t *restrict,
85*1cd08393SJason King     boolean_t (*)(rust_state_t *restrict, strview_t *restrict, boolean_t),
86*1cd08393SJason King     boolean_t);
87*1cd08393SJason King static boolean_t rustv0_parse_lifetime(rust_state_t *restrict,
88*1cd08393SJason King     strview_t *restrict);
89*1cd08393SJason King static boolean_t rustv0_parse_const(rust_state_t *restrict,
90*1cd08393SJason King     strview_t *restrict, boolean_t);
91*1cd08393SJason King static boolean_t rustv0_parse_fnsig(rust_state_t *restrict,
92*1cd08393SJason King     strview_t *restrict);
93*1cd08393SJason King static boolean_t rustv0_parse_dynbounds(rust_state_t *restrict,
94*1cd08393SJason King     strview_t *restrict);
95*1cd08393SJason King static boolean_t rustv0_parse_generic_arg(rust_state_t *restrict,
96*1cd08393SJason King     strview_t *restrict, boolean_t);
97*1cd08393SJason King 
98*1cd08393SJason King boolean_t
rust_demangle_v0(rust_state_t * restrict st,strview_t * restrict sv)99*1cd08393SJason King rust_demangle_v0(rust_state_t *restrict st, strview_t *restrict sv)
100*1cd08393SJason King {
101*1cd08393SJason King 	boolean_t save_skip;
102*1cd08393SJason King 	boolean_t ret;
103*1cd08393SJason King 
104*1cd08393SJason King 	/* Make sure all the characters are valid */
105*1cd08393SJason King 	if (!rustv0_valid_sym(sv)) {
106*1cd08393SJason King 		st->rs_error = EINVAL;
107*1cd08393SJason King 		return (B_FALSE);
108*1cd08393SJason King 	}
109*1cd08393SJason King 
110*1cd08393SJason King 	/*
111*1cd08393SJason King 	 * <symbol-name> = "_R" [<decimal-number>] <path>
112*1cd08393SJason King 	 *	[<instantiating-crate>]
113*1cd08393SJason King 	 *
114*1cd08393SJason King 	 * We've already parsed the prefix in rust_demangle(), as well
115*1cd08393SJason King 	 * as made sure there's no [<decimal-number>] present, so
116*1cd08393SJason King 	 * start with <path>.
117*1cd08393SJason King 	 */
118*1cd08393SJason King 	if (!rustv0_parse_path(st, sv, B_TRUE))
119*1cd08393SJason King 		return (B_FALSE);
120*1cd08393SJason King 
121*1cd08393SJason King 	/* [<instantiating crate>] -- parse but don't save */
122*1cd08393SJason King 	SKIP_BEGIN(st, save_skip);
123*1cd08393SJason King 	ret = OPTIONAL(st, rustv0_parse_path(st, sv, B_FALSE));
124*1cd08393SJason King 	SKIP_END(st, save_skip);
125*1cd08393SJason King 	if (!ret)
126*1cd08393SJason King 		return (B_FALSE);
127*1cd08393SJason King 
128*1cd08393SJason King 	/* If nothing's left, we know we're done */
129*1cd08393SJason King 	if (sv_remaining(sv) == 0)
130*1cd08393SJason King 		return (!HAS_ERROR(st));
131*1cd08393SJason King 
132*1cd08393SJason King 	/*
133*1cd08393SJason King 	 * LLVM sometimes will suffix symbols starting with a '.'
134*1cd08393SJason King 	 * followed by extra data. For things that start with
135*1cd08393SJason King 	 * ".llvm.", we discard the rest of the string.  For
136*1cd08393SJason King 	 * other things that start with '.', we copy the
137*1cd08393SJason King 	 * results to the final string. This matches
138*1cd08393SJason King 	 * what the rust native demangler crate does, and
139*1cd08393SJason King 	 * we don't see a reason to deviate from their
140*1cd08393SJason King 	 * behavior.
141*1cd08393SJason King 	 */
142*1cd08393SJason King 	if (sv_consume_if(sv, ".llvm."))
143*1cd08393SJason King 		return (!HAS_ERROR(st));
144*1cd08393SJason King 
145*1cd08393SJason King 	if (sv_peek(sv, 0) != '.') {
146*1cd08393SJason King 		DEMDEBUG("%s: Unexpected trailing data at the end of the "
147*1cd08393SJason King 		    "name: '%.*s'", __func__, SV_PRINT(sv));
148*1cd08393SJason King 		st->rs_error = EINVAL;
149*1cd08393SJason King 		return (B_FALSE);
150*1cd08393SJason King 	}
151*1cd08393SJason King 
152*1cd08393SJason King 	return (rust_append_sv(st, sv_remaining(sv), sv));
153*1cd08393SJason King }
154*1cd08393SJason King 
155*1cd08393SJason King /*
156*1cd08393SJason King  * Parse an optional list terminated by 'E'. Each result of 'fn' is
157*1cd08393SJason King  * separated by 'sep' in the output.
158*1cd08393SJason King  */
159*1cd08393SJason King static boolean_t
rustv0_parse_opt_list(rust_state_t * restrict st,strview_t * restrict sv,boolean_t (* fn)(rust_state_t * restrict,strview_t * restrict,boolean_t),const char * restrict sep,boolean_t bval,size_t * restrict countp)160*1cd08393SJason King rustv0_parse_opt_list(rust_state_t *restrict st, strview_t *restrict sv,
161*1cd08393SJason King     boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t),
162*1cd08393SJason King     const char *restrict sep, boolean_t bval, size_t *restrict countp)
163*1cd08393SJason King {
164*1cd08393SJason King 	size_t count = 0;
165*1cd08393SJason King 
166*1cd08393SJason King 	DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv));
167*1cd08393SJason King 
168*1cd08393SJason King 	while (sv_remaining(sv) > 0) {
169*1cd08393SJason King 		if (sv_consume_if_c(sv, 'E')) {
170*1cd08393SJason King 			if (countp != NULL)
171*1cd08393SJason King 				*countp += count;
172*1cd08393SJason King 			return (B_TRUE);
173*1cd08393SJason King 		}
174*1cd08393SJason King 
175*1cd08393SJason King 		if (count > 0 && !rust_append(st, sep))
176*1cd08393SJason King 			return (B_FALSE);
177*1cd08393SJason King 
178*1cd08393SJason King 		if (!fn(st, sv, bval))
179*1cd08393SJason King 			return (B_FALSE);
180*1cd08393SJason King 
181*1cd08393SJason King 		count++;
182*1cd08393SJason King 	}
183*1cd08393SJason King 
184*1cd08393SJason King 	/*
185*1cd08393SJason King 	 * An optional list should terminate with an 'E'.  If we get here,
186*1cd08393SJason King 	 * we ran out of charaters and didn't terminate as we should.
187*1cd08393SJason King 	 */
188*1cd08393SJason King 	return (B_FALSE);
189*1cd08393SJason King }
190*1cd08393SJason King 
191*1cd08393SJason King static boolean_t
rustv0_parse_uint_type(rust_state_t * restrict st,strview_t * sv)192*1cd08393SJason King rustv0_parse_uint_type(rust_state_t *restrict st, strview_t *sv)
193*1cd08393SJason King {
194*1cd08393SJason King 	const char *str = NULL;
195*1cd08393SJason King 	strview_t save;
196*1cd08393SJason King 	char c;
197*1cd08393SJason King 
198*1cd08393SJason King 	if (HAS_ERROR(st) || sv_remaining(sv) == 0)
199*1cd08393SJason King 		return (B_FALSE);
200*1cd08393SJason King 
201*1cd08393SJason King 	sv_init_sv(&save, sv);
202*1cd08393SJason King 
203*1cd08393SJason King 	switch (c = sv_consume_c(sv)) {
204*1cd08393SJason King 	case 'h':
205*1cd08393SJason King 		str = "u8";
206*1cd08393SJason King 		break;
207*1cd08393SJason King 	case 't':
208*1cd08393SJason King 		str = "u16";
209*1cd08393SJason King 		break;
210*1cd08393SJason King 	case 'm':
211*1cd08393SJason King 		str = "u32";
212*1cd08393SJason King 		break;
213*1cd08393SJason King 	case 'y':
214*1cd08393SJason King 		str = "u64";
215*1cd08393SJason King 		break;
216*1cd08393SJason King 	case 'o':
217*1cd08393SJason King 		str = "u128";
218*1cd08393SJason King 		break;
219*1cd08393SJason King 	case 'j':	/* usize */
220*1cd08393SJason King 		str = "usize";
221*1cd08393SJason King 		break;
222*1cd08393SJason King 	default:
223*1cd08393SJason King 		sv_init_sv(sv, &save);
224*1cd08393SJason King 		return (B_FALSE);
225*1cd08393SJason King 	}
226*1cd08393SJason King 
227*1cd08393SJason King 	DEMDEBUG("%s: %c -> %s", __func__, c, str);
228*1cd08393SJason King 	return (rust_append(st, str));
229*1cd08393SJason King }
230*1cd08393SJason King 
231*1cd08393SJason King static boolean_t
rustv0_parse_basic_type(rust_state_t * restrict st,strview_t * restrict sv)232*1cd08393SJason King rustv0_parse_basic_type(rust_state_t *restrict st, strview_t *restrict sv)
233*1cd08393SJason King {
234*1cd08393SJason King 	const char *str = NULL;
235*1cd08393SJason King 	strview_t save;
236*1cd08393SJason King 	char c;
237*1cd08393SJason King 
238*1cd08393SJason King 	if (HAS_ERROR(st) || sv_remaining(sv) == 0)
239*1cd08393SJason King 		return (B_FALSE);
240*1cd08393SJason King 
241*1cd08393SJason King 	if (rustv0_parse_uint_type(st, sv))
242*1cd08393SJason King 		return (B_TRUE);
243*1cd08393SJason King 
244*1cd08393SJason King 	sv_init_sv(&save, sv);
245*1cd08393SJason King 
246*1cd08393SJason King 	switch (c = sv_consume_c(sv)) {
247*1cd08393SJason King 	case 'a':
248*1cd08393SJason King 		str = "i8";
249*1cd08393SJason King 		break;
250*1cd08393SJason King 	case 'b':
251*1cd08393SJason King 		str = "bool";
252*1cd08393SJason King 		break;
253*1cd08393SJason King 	case 'c':
254*1cd08393SJason King 		str = "char";
255*1cd08393SJason King 		break;
256*1cd08393SJason King 	case 'd':
257*1cd08393SJason King 		str = "f64";
258*1cd08393SJason King 		break;
259*1cd08393SJason King 	case 'e':
260*1cd08393SJason King 		str = "str";
261*1cd08393SJason King 		break;
262*1cd08393SJason King 	case 'f':
263*1cd08393SJason King 		str = "f32";
264*1cd08393SJason King 		break;
265*1cd08393SJason King 	case 'i':
266*1cd08393SJason King 		str = "isize";
267*1cd08393SJason King 		break;
268*1cd08393SJason King 	case 'l':
269*1cd08393SJason King 		str = "i32";
270*1cd08393SJason King 		break;
271*1cd08393SJason King 	case 'n':
272*1cd08393SJason King 		str = "i128";
273*1cd08393SJason King 		break;
274*1cd08393SJason King 	case 'p':
275*1cd08393SJason King 		str = "_";
276*1cd08393SJason King 		break;
277*1cd08393SJason King 	case 's':
278*1cd08393SJason King 		str = "i16";
279*1cd08393SJason King 		break;
280*1cd08393SJason King 	case 'u':
281*1cd08393SJason King 		str = "()";
282*1cd08393SJason King 		break;
283*1cd08393SJason King 	case 'v':
284*1cd08393SJason King 		str = "...";
285*1cd08393SJason King 		break;
286*1cd08393SJason King 	case 'x':
287*1cd08393SJason King 		str = "i64";
288*1cd08393SJason King 		break;
289*1cd08393SJason King 	case 'z':
290*1cd08393SJason King 		str = "!";
291*1cd08393SJason King 		break;
292*1cd08393SJason King 	default:
293*1cd08393SJason King 		sv_init_sv(sv, &save);
294*1cd08393SJason King 		return (B_FALSE);
295*1cd08393SJason King 	}
296*1cd08393SJason King 
297*1cd08393SJason King 	DEMDEBUG("%s: %c -> %s", __func__, c, str);
298*1cd08393SJason King 	return (rust_append(st, str));
299*1cd08393SJason King }
300*1cd08393SJason King 
301*1cd08393SJason King static boolean_t
rustv0_parse_type(rust_state_t * restrict st,strview_t * restrict sv,boolean_t dummy __unused)302*1cd08393SJason King rustv0_parse_type(rust_state_t *restrict st, strview_t *restrict sv,
303*1cd08393SJason King     boolean_t dummy __unused)
304*1cd08393SJason King {
305*1cd08393SJason King 	strview_t save;
306*1cd08393SJason King 	size_t len, tuple_elem_count;
307*1cd08393SJason King 	boolean_t ret;
308*1cd08393SJason King 	char c;
309*1cd08393SJason King 
310*1cd08393SJason King 	if (HAS_ERROR(st))
311*1cd08393SJason King 		return (B_FALSE);
312*1cd08393SJason King 
313*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
314*1cd08393SJason King 
315*1cd08393SJason King 	if (sv_remaining(sv) == 0)
316*1cd08393SJason King 		return (B_FALSE);
317*1cd08393SJason King 
318*1cd08393SJason King 	SAVE_LEN(st, len);
319*1cd08393SJason King 	sv_init_sv(&save, sv);
320*1cd08393SJason King 
321*1cd08393SJason King 	switch (c = sv_consume_c(sv)) {
322*1cd08393SJason King 	case 'A':
323*1cd08393SJason King 		ret = rust_appendc(st, '[') &&
324*1cd08393SJason King 		    rustv0_parse_type(st, sv, B_FALSE) &&
325*1cd08393SJason King 		    rust_append(st, "; ") &&
326*1cd08393SJason King 		    rustv0_parse_const(st, sv, B_FALSE) &&
327*1cd08393SJason King 		    rust_appendc(st, ']');
328*1cd08393SJason King 		break;
329*1cd08393SJason King 	case 'S':
330*1cd08393SJason King 		ret = rust_appendc(st, '[') &&
331*1cd08393SJason King 		    rustv0_parse_type(st, sv, B_FALSE) &&
332*1cd08393SJason King 		    rust_appendc(st, ']');
333*1cd08393SJason King 		break;
334*1cd08393SJason King 	case 'T':
335*1cd08393SJason King 		tuple_elem_count = 0;
336*1cd08393SJason King 		ret = rust_appendc(st, '(') &&
337*1cd08393SJason King 		    rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ",
338*1cd08393SJason King 		    B_FALSE, &tuple_elem_count) &&
339*1cd08393SJason King 		    rust_append(st, (tuple_elem_count == 1) ? ",)" : ")");
340*1cd08393SJason King 		break;
341*1cd08393SJason King 	case 'R':
342*1cd08393SJason King 	case 'Q':
343*1cd08393SJason King 		/* `&mut T` or `&'... mut T` */
344*1cd08393SJason King 		if (!(ret = rust_appendc(st, '&')))
345*1cd08393SJason King 			break;
346*1cd08393SJason King 
347*1cd08393SJason King 		/*
348*1cd08393SJason King 		 * lifetime is optional, but we need to add a trailing
349*1cd08393SJason King 		 * space if present (so we cannot use the OPTIONAL macro).
350*1cd08393SJason King 		 */
351*1cd08393SJason King 		if (rustv0_parse_lifetime(st, sv)) {
352*1cd08393SJason King 			if (!(ret = rust_appendc(st, ' ')))
353*1cd08393SJason King 				break;
354*1cd08393SJason King 		} else if (HAS_ERROR(st)) {
355*1cd08393SJason King 			break;
356*1cd08393SJason King 		}
357*1cd08393SJason King 
358*1cd08393SJason King 		ret = rust_append(st, (c == 'Q') ? "mut " : "") &&
359*1cd08393SJason King 		    rustv0_parse_type(st, sv, B_FALSE);
360*1cd08393SJason King 		break;
361*1cd08393SJason King 	case 'P':
362*1cd08393SJason King 		ret = rust_append(st, "*const ") &&
363*1cd08393SJason King 		    rustv0_parse_type(st, sv, B_FALSE);
364*1cd08393SJason King 		break;
365*1cd08393SJason King 	case 'O':
366*1cd08393SJason King 		ret = rust_append(st, "*mut ") &&
367*1cd08393SJason King 		    rustv0_parse_type(st, sv, B_FALSE);
368*1cd08393SJason King 		break;
369*1cd08393SJason King 	case 'F':
370*1cd08393SJason King 		ret = rustv0_parse_fnsig(st, sv);
371*1cd08393SJason King 		break;
372*1cd08393SJason King 	case 'D':
373*1cd08393SJason King 		ret = rust_append(st, "dyn ") &&
374*1cd08393SJason King 		    rustv0_parse_dynbounds(st, sv);
375*1cd08393SJason King 		if (!ret)
376*1cd08393SJason King 			break;
377*1cd08393SJason King 
378*1cd08393SJason King 		/*
379*1cd08393SJason King 		 * Rust RFC2603 shows the lifetime as required, however
380*1cd08393SJason King 		 * it appears this is optional.
381*1cd08393SJason King 		 */
382*1cd08393SJason King 		DEMDEBUG("%s: pre-lifetime: '%*s'", __func__, SV_PRINT(sv));
383*1cd08393SJason King 
384*1cd08393SJason King 		/*
385*1cd08393SJason King 		 * We only want to print a non-zero (non "'_")
386*1cd08393SJason King 		 * lifetime.
387*1cd08393SJason King 		 */
388*1cd08393SJason King 		if (sv_consume_if(sv, "L_"))
389*1cd08393SJason King 			break;
390*1cd08393SJason King 
391*1cd08393SJason King 		/*
392*1cd08393SJason King 		 * But if there is a lifetime we want to print,
393*1cd08393SJason King 		 * we want to prepend " + " before it.
394*1cd08393SJason King 		 */
395*1cd08393SJason King 		if (sv_peek(sv, 0) == 'L' &&
396*1cd08393SJason King 		    !(ret = rust_append(st, " + ")))
397*1cd08393SJason King 			break;
398*1cd08393SJason King 
399*1cd08393SJason King 		ret = rustv0_parse_lifetime(st, sv);
400*1cd08393SJason King 		break;
401*1cd08393SJason King 	default:
402*1cd08393SJason King 		sv_init_sv(sv, &save);
403*1cd08393SJason King 
404*1cd08393SJason King 		ret = rustv0_parse_backref(st, sv, rustv0_parse_type,
405*1cd08393SJason King 		    B_FALSE) ||
406*1cd08393SJason King 		    rustv0_parse_basic_type(st, sv);
407*1cd08393SJason King 		if (ret)
408*1cd08393SJason King 			break;
409*1cd08393SJason King 
410*1cd08393SJason King 		ret = rustv0_parse_path(st, sv, B_FALSE);
411*1cd08393SJason King 		break;
412*1cd08393SJason King 	}
413*1cd08393SJason King 
414*1cd08393SJason King 	DEMDEBUG("%s: type='%.*s' (%s)", __func__, CSTR_END(st, len),
415*1cd08393SJason King 	    ret ? "success" : "fail");
416*1cd08393SJason King 
417*1cd08393SJason King 	return (ret);
418*1cd08393SJason King }
419*1cd08393SJason King 
420*1cd08393SJason King /*
421*1cd08393SJason King  * <path> = "C" <identifier>		crate root
422*1cd08393SJason King  *	| "M" <impl-path> <type>	<T>
423*1cd08393SJason King  *	| "X" <impl-path> <type> <path>	<T as Trait> (trait impl)
424*1cd08393SJason King  *	| "Y" <type> <path>		<T as Trait> (trait definition)
425*1cd08393SJason King  *	| "N" <ns> <path> <identifier>	...::ident (nested path)
426*1cd08393SJason King  *	| "I" <path> {<generic-arg>} "E" ...<T, U>
427*1cd08393SJason King  *	| <backref>
428*1cd08393SJason King  */
429*1cd08393SJason King static boolean_t
rustv0_parse_path(rust_state_t * restrict st,strview_t * restrict sv,boolean_t in_value)430*1cd08393SJason King rustv0_parse_path(rust_state_t *restrict st, strview_t *restrict sv,
431*1cd08393SJason King     boolean_t in_value)
432*1cd08393SJason King {
433*1cd08393SJason King 	strview_t save;
434*1cd08393SJason King 	uint64_t disamb = 0;
435*1cd08393SJason King 	size_t len;
436*1cd08393SJason King 	boolean_t ret = B_FALSE;
437*1cd08393SJason King 	boolean_t save_skip;
438*1cd08393SJason King 	boolean_t args_stay_save = st->rs_args_stay_open;
439*1cd08393SJason King 	boolean_t args_open_save = st->rs_args_is_open;
440*1cd08393SJason King 
441*1cd08393SJason King 	if (HAS_ERROR(st))
442*1cd08393SJason King 		return (B_FALSE);
443*1cd08393SJason King 
444*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
445*1cd08393SJason King 
446*1cd08393SJason King 	if (sv_remaining(sv) == 0)
447*1cd08393SJason King 		return (B_FALSE);
448*1cd08393SJason King 
449*1cd08393SJason King 	SAVE_LEN(st, len);
450*1cd08393SJason King 	sv_init_sv(&save, sv);
451*1cd08393SJason King 
452*1cd08393SJason King 	switch (sv_consume_c(sv)) {
453*1cd08393SJason King 	case 'C':
454*1cd08393SJason King 		if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disamb)))
455*1cd08393SJason King 			goto done;
456*1cd08393SJason King 
457*1cd08393SJason King 		if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE))
458*1cd08393SJason King 			goto done;
459*1cd08393SJason King 
460*1cd08393SJason King 		if (st->rs_verbose &&
461*1cd08393SJason King 		    !rust_append_printf(st, "[%" PRIx64 "]", disamb))
462*1cd08393SJason King 			goto done;
463*1cd08393SJason King 		break;
464*1cd08393SJason King 	case 'M':
465*1cd08393SJason King 		SKIP_BEGIN(st, save_skip);
466*1cd08393SJason King 		if (!rustv0_parse_impl_path(st, sv, in_value)) {
467*1cd08393SJason King 			SKIP_END(st, save_skip);
468*1cd08393SJason King 			goto done;
469*1cd08393SJason King 		}
470*1cd08393SJason King 		SKIP_END(st, save_skip);
471*1cd08393SJason King 
472*1cd08393SJason King 		if (!rust_appendc(st, '<') ||
473*1cd08393SJason King 		    !rustv0_parse_type(st, sv, B_FALSE) ||
474*1cd08393SJason King 		    !rust_appendc(st, '>'))
475*1cd08393SJason King 			goto done;
476*1cd08393SJason King 		break;
477*1cd08393SJason King 	case 'X':
478*1cd08393SJason King 		SKIP_BEGIN(st, save_skip);
479*1cd08393SJason King 		if (!rustv0_parse_impl_path(st, sv, in_value)) {
480*1cd08393SJason King 			SKIP_END(st, save_skip);
481*1cd08393SJason King 			goto done;
482*1cd08393SJason King 		}
483*1cd08393SJason King 		SKIP_END(st, save_skip);
484*1cd08393SJason King 		/*FALLTHRU*/
485*1cd08393SJason King 	case 'Y':
486*1cd08393SJason King 		if (!rust_appendc(st, '<') ||
487*1cd08393SJason King 		    !rustv0_parse_type(st, sv, B_FALSE) ||
488*1cd08393SJason King 		    !rust_append(st, " as ") ||
489*1cd08393SJason King 		    !rustv0_parse_path(st, sv, B_FALSE) ||
490*1cd08393SJason King 		    !rust_appendc(st, '>'))
491*1cd08393SJason King 			goto done;
492*1cd08393SJason King 		break;
493*1cd08393SJason King 	case 'N':
494*1cd08393SJason King 		if (!rustv0_parse_nested_path(st, sv, in_value))
495*1cd08393SJason King 			goto done;
496*1cd08393SJason King 		break;
497*1cd08393SJason King 	case 'I':
498*1cd08393SJason King 		st->rs_args_stay_open = B_FALSE;
499*1cd08393SJason King 		st->rs_args_is_open = B_FALSE;
500*1cd08393SJason King 
501*1cd08393SJason King 		if (!rustv0_parse_path(st, sv, in_value))
502*1cd08393SJason King 			goto done;
503*1cd08393SJason King 
504*1cd08393SJason King 		if (in_value && !rust_append(st, "::"))
505*1cd08393SJason King 			goto done;
506*1cd08393SJason King 
507*1cd08393SJason King 		if (!rust_appendc(st, '<') ||
508*1cd08393SJason King 		    !rustv0_parse_opt_list(st, sv, rustv0_parse_generic_arg,
509*1cd08393SJason King 		    ", ", B_FALSE, NULL))
510*1cd08393SJason King 			goto done;
511*1cd08393SJason King 
512*1cd08393SJason King 		st->rs_args_stay_open = args_stay_save;
513*1cd08393SJason King 		st->rs_args_is_open = args_open_save;
514*1cd08393SJason King 
515*1cd08393SJason King 		/*
516*1cd08393SJason King 		 * If we were asked to not close our list, then don't and
517*1cd08393SJason King 		 * indicate that the list is open.
518*1cd08393SJason King 		 */
519*1cd08393SJason King 		if (st->rs_args_stay_open) {
520*1cd08393SJason King 			st->rs_args_stay_open = B_FALSE;
521*1cd08393SJason King 			st->rs_args_is_open = B_TRUE;
522*1cd08393SJason King 		} else if (!rust_appendc(st, '>')) {
523*1cd08393SJason King 			goto done;
524*1cd08393SJason King 		}
525*1cd08393SJason King 		break;
526*1cd08393SJason King 	default:
527*1cd08393SJason King 		/*
528*1cd08393SJason King 		 * Didn't recognize the letter, so it has to be a path. Restore
529*1cd08393SJason King 		 * sv to state prior to switch and continue.
530*1cd08393SJason King 		 */
531*1cd08393SJason King 		sv_init_sv(sv, &save);
532*1cd08393SJason King 		if (!rustv0_parse_backref(st, sv, rustv0_parse_path, in_value))
533*1cd08393SJason King 			goto done;
534*1cd08393SJason King 	}
535*1cd08393SJason King 
536*1cd08393SJason King 	ret = B_TRUE;
537*1cd08393SJason King 
538*1cd08393SJason King done:
539*1cd08393SJason King 	DEMDEBUG("%s: path='%.*s' (%s)", __func__, CSTR_END(st, len),
540*1cd08393SJason King 	    ret ? "success" : "fail");
541*1cd08393SJason King 
542*1cd08393SJason King 	return (ret);
543*1cd08393SJason King }
544*1cd08393SJason King 
545*1cd08393SJason King static boolean_t
rustv0_parse_impl_path(rust_state_t * restrict st,strview_t * restrict sv,boolean_t in_value)546*1cd08393SJason King rustv0_parse_impl_path(rust_state_t *restrict st, strview_t *restrict sv,
547*1cd08393SJason King     boolean_t in_value)
548*1cd08393SJason King {
549*1cd08393SJason King 	uint64_t val = 0;
550*1cd08393SJason King 
551*1cd08393SJason King 	return (OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &val)) &&
552*1cd08393SJason King 	    rustv0_parse_path(st, sv, in_value));
553*1cd08393SJason King }
554*1cd08393SJason King 
555*1cd08393SJason King /*
556*1cd08393SJason King  * A bit of a hack -- when printing a nested path, we need to know
557*1cd08393SJason King  * if the identifier is there or not in order to correctly format
558*1cd08393SJason King  * the output preceeding it (when present). This peeks ahead and
559*1cd08393SJason King  * determines this.
560*1cd08393SJason King  */
561*1cd08393SJason King static boolean_t
rustv0_has_name(rust_state_t * restrict st,strview_t * restrict sv,boolean_t * has_namep)562*1cd08393SJason King rustv0_has_name(rust_state_t *restrict st, strview_t *restrict sv,
563*1cd08393SJason King     boolean_t *has_namep)
564*1cd08393SJason King {
565*1cd08393SJason King 	strview_t save;
566*1cd08393SJason King 
567*1cd08393SJason King 	if (HAS_ERROR(st))
568*1cd08393SJason King 		return (B_FALSE);
569*1cd08393SJason King 
570*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
571*1cd08393SJason King 
572*1cd08393SJason King 	if (sv_remaining(sv) == 0)
573*1cd08393SJason King 		return (B_FALSE);
574*1cd08393SJason King 
575*1cd08393SJason King 	sv_init_sv(&save, sv);
576*1cd08393SJason King 
577*1cd08393SJason King 	/* For checking the length, we don't care if it's punycode or not */
578*1cd08393SJason King 	(void) sv_consume_if_c(&save, 'u');
579*1cd08393SJason King 
580*1cd08393SJason King 	if (sv_remaining(sv) == 0) {
581*1cd08393SJason King 		st->rs_error = EINVAL;
582*1cd08393SJason King 		return (B_FALSE);
583*1cd08393SJason King 	}
584*1cd08393SJason King 
585*1cd08393SJason King 	if (sv_consume_if_c(&save, '0')) {
586*1cd08393SJason King 		*has_namep = B_FALSE;
587*1cd08393SJason King 		return (B_TRUE);
588*1cd08393SJason King 	}
589*1cd08393SJason King 
590*1cd08393SJason King 	*has_namep = B_TRUE;
591*1cd08393SJason King 	return (B_TRUE);
592*1cd08393SJason King }
593*1cd08393SJason King 
594*1cd08393SJason King static boolean_t
rustv0_parse_nested_path(rust_state_t * restrict st,strview_t * restrict sv,boolean_t in_value)595*1cd08393SJason King rustv0_parse_nested_path(rust_state_t *restrict st, strview_t *restrict sv,
596*1cd08393SJason King     boolean_t in_value)
597*1cd08393SJason King {
598*1cd08393SJason King 	uint64_t disambiguator = 0;
599*1cd08393SJason King 	size_t len = 0;
600*1cd08393SJason King 	char ns;
601*1cd08393SJason King 	boolean_t ret = B_FALSE;
602*1cd08393SJason King 	boolean_t has_name;
603*1cd08393SJason King 
604*1cd08393SJason King 	if (HAS_ERROR(st))
605*1cd08393SJason King 		return (B_FALSE);
606*1cd08393SJason King 
607*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
608*1cd08393SJason King 
609*1cd08393SJason King 	if (sv_remaining(sv) == 0)
610*1cd08393SJason King 		return (B_FALSE);
611*1cd08393SJason King 
612*1cd08393SJason King 	SAVE_LEN(st, len);
613*1cd08393SJason King 
614*1cd08393SJason King 	ns = sv_consume_c(sv);
615*1cd08393SJason King 
616*1cd08393SJason King 	if (!rustv0_parse_path(st, sv, in_value))
617*1cd08393SJason King 		goto done;
618*1cd08393SJason King 
619*1cd08393SJason King 	if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disambiguator)))
620*1cd08393SJason King 		goto done;
621*1cd08393SJason King 
622*1cd08393SJason King 	if (!rustv0_has_name(st, sv, &has_name))
623*1cd08393SJason King 		goto done;
624*1cd08393SJason King 
625*1cd08393SJason King 	if (ISUPPER(ns)) {
626*1cd08393SJason King 		if (!rust_append(st, "::{"))
627*1cd08393SJason King 			goto done;
628*1cd08393SJason King 
629*1cd08393SJason King 		switch (ns) {
630*1cd08393SJason King 		case 'C':
631*1cd08393SJason King 			if (!rust_append(st, "closure"))
632*1cd08393SJason King 				goto done;
633*1cd08393SJason King 			break;
634*1cd08393SJason King 		case 'S':
635*1cd08393SJason King 			if (!rust_append(st, "shim"))
636*1cd08393SJason King 				goto done;
637*1cd08393SJason King 			break;
638*1cd08393SJason King 		default:
639*1cd08393SJason King 			if (!rust_appendc(st, ns))
640*1cd08393SJason King 				goto done;
641*1cd08393SJason King 			break;
642*1cd08393SJason King 		}
643*1cd08393SJason King 
644*1cd08393SJason King 		if (has_name && !rust_appendc(st, ':'))
645*1cd08393SJason King 			goto done;
646*1cd08393SJason King 
647*1cd08393SJason King 		if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE))
648*1cd08393SJason King 			goto done;
649*1cd08393SJason King 
650*1cd08393SJason King 		ret = rust_append_printf(st, "#%" PRIu64 "}", disambiguator);
651*1cd08393SJason King 	} else {
652*1cd08393SJason King 		if (has_name) {
653*1cd08393SJason King 			if (!(ret = rust_append(st, "::")))
654*1cd08393SJason King 				goto done;
655*1cd08393SJason King 		}
656*1cd08393SJason King 		ret = rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE);
657*1cd08393SJason King 	}
658*1cd08393SJason King 
659*1cd08393SJason King done:
660*1cd08393SJason King 	DEMDEBUG("%s: nested path = '%.*s' (%s)", __func__, CSTR_END(st, len),
661*1cd08393SJason King 	    ret ? "success" : "fail");
662*1cd08393SJason King 
663*1cd08393SJason King 	return (ret);
664*1cd08393SJason King }
665*1cd08393SJason King 
666*1cd08393SJason King /*
667*1cd08393SJason King  * <disambiguator> = "s" <base-64-number>
668*1cd08393SJason King  *
669*1cd08393SJason King  */
670*1cd08393SJason King static boolean_t
rustv0_parse_disambiguator(rust_state_t * restrict st,strview_t * restrict sv,uint64_t * valp)671*1cd08393SJason King rustv0_parse_disambiguator(rust_state_t *restrict st, strview_t *restrict sv,
672*1cd08393SJason King     uint64_t *valp)
673*1cd08393SJason King {
674*1cd08393SJason King 	if (HAS_ERROR(st) || sv_remaining(sv) < 2)
675*1cd08393SJason King 		return (B_FALSE);
676*1cd08393SJason King 
677*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
678*1cd08393SJason King 
679*1cd08393SJason King 	*valp = 0;
680*1cd08393SJason King 
681*1cd08393SJason King 	if (!sv_consume_if_c(sv, 's'))
682*1cd08393SJason King 		return (B_FALSE);
683*1cd08393SJason King 
684*1cd08393SJason King 	if (!rustv0_parse_base62(st, sv, valp)) {
685*1cd08393SJason King 		st->rs_error = EINVAL;
686*1cd08393SJason King 		return (B_FALSE);
687*1cd08393SJason King 	}
688*1cd08393SJason King 
689*1cd08393SJason King 	/*
690*1cd08393SJason King 	 * Rust RFC 2603 details this in Appendix A, but not the main
691*1cd08393SJason King 	 * portion of the RFC. If no disambiguator is present, the value
692*1cd08393SJason King 	 * is 0, if the decoded value is 0, the index is 1, ...
693*1cd08393SJason King 	 * rustv0_parse_base62() already adjusts _ -> 0, 0 -> 1, so we
694*1cd08393SJason King 	 * only need to add one here to complete the adjustment.
695*1cd08393SJason King 	 */
696*1cd08393SJason King 	*valp = *valp + 1;
697*1cd08393SJason King 
698*1cd08393SJason King 	DEMDEBUG("%s: disambiguator=%" PRIu64, __func__, *valp);
699*1cd08393SJason King 	return (B_TRUE);
700*1cd08393SJason King }
701*1cd08393SJason King 
702*1cd08393SJason King /* <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> */
703*1cd08393SJason King static boolean_t
rustv0_parse_undisambiguated_identifier(rust_state_t * restrict st,strview_t * restrict sv,boolean_t repl_underscore)704*1cd08393SJason King rustv0_parse_undisambiguated_identifier(rust_state_t *restrict st,
705*1cd08393SJason King     strview_t *restrict sv, boolean_t repl_underscore)
706*1cd08393SJason King {
707*1cd08393SJason King 	uint64_t len = 0;
708*1cd08393SJason King 	boolean_t puny = B_FALSE;
709*1cd08393SJason King 
710*1cd08393SJason King 	if (HAS_ERROR(st))
711*1cd08393SJason King 		return (B_FALSE);
712*1cd08393SJason King 
713*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
714*1cd08393SJason King 
715*1cd08393SJason King 	if (sv_remaining(sv) == 0)
716*1cd08393SJason King 		return (B_FALSE);
717*1cd08393SJason King 
718*1cd08393SJason King 	if (sv_consume_if_c(sv, 'u'))
719*1cd08393SJason King 		puny = B_TRUE;
720*1cd08393SJason King 
721*1cd08393SJason King 	if (!rust_parse_base10(st, sv, &len))
722*1cd08393SJason King 		return (B_FALSE);
723*1cd08393SJason King 
724*1cd08393SJason King 	/* skip optional separator '_' */
725*1cd08393SJason King 	(void) sv_consume_if_c(sv, '_');
726*1cd08393SJason King 
727*1cd08393SJason King 	if (sv_remaining(sv) < len) {
728*1cd08393SJason King 		DEMDEBUG("%s: ERROR: identifier length (%" PRIu64 ") "
729*1cd08393SJason King 		    "> remaining bytes (%zu)", __func__, len,
730*1cd08393SJason King 		    sv_remaining(sv));
731*1cd08393SJason King 		return (B_FALSE);
732*1cd08393SJason King 	}
733*1cd08393SJason King 
734*1cd08393SJason King 	/* 0 length identifiers are acceptable */
735*1cd08393SJason King 	if (len == 0)
736*1cd08393SJason King 		return (B_TRUE);
737*1cd08393SJason King 
738*1cd08393SJason King 	if (puny) {
739*1cd08393SJason King 		strview_t ident;
740*1cd08393SJason King 
741*1cd08393SJason King 		sv_init_sv_range(&ident, sv, len);
742*1cd08393SJason King 		if (!rustv0_puny_decode(st, &ident, repl_underscore))
743*1cd08393SJason King 			return (B_FALSE);
744*1cd08393SJason King 
745*1cd08393SJason King 		sv_consume_n(sv, len);
746*1cd08393SJason King 		return (B_TRUE);
747*1cd08393SJason King 	}
748*1cd08393SJason King 
749*1cd08393SJason King 	/*
750*1cd08393SJason King 	 * rust identifiers do not contain '-'. However ABI identifiers
751*1cd08393SJason King 	 * are allowed to contain them (e.g. extern "foo-bar" fn ...).
752*1cd08393SJason King 	 * They are substituted with '_' in the mangled output. If we
753*1cd08393SJason King 	 * do not need to reverse this, we can just append 'len' bytes
754*1cd08393SJason King 	 * of sv.  Otherwise we need to go through and reverse this
755*1cd08393SJason King 	 * substitution.
756*1cd08393SJason King 	 */
757*1cd08393SJason King 	if (!repl_underscore)
758*1cd08393SJason King 		return (rust_append_sv(st, len, sv));
759*1cd08393SJason King 
760*1cd08393SJason King 	/*
761*1cd08393SJason King 	 * We checked earlier that len < sv_remaining(sv); so this loop
762*1cd08393SJason King 	 * cannot overrun.
763*1cd08393SJason King 	 */
764*1cd08393SJason King 	for (size_t i = 0; i < len; i++) {
765*1cd08393SJason King 		char c = sv_consume_c(sv);
766*1cd08393SJason King 
767*1cd08393SJason King 		if (c == '_')
768*1cd08393SJason King 			c = '-';
769*1cd08393SJason King 
770*1cd08393SJason King 		if (!rust_appendc(st, c))
771*1cd08393SJason King 			return (B_FALSE);
772*1cd08393SJason King 	}
773*1cd08393SJason King 
774*1cd08393SJason King 	return (B_TRUE);
775*1cd08393SJason King }
776*1cd08393SJason King 
777*1cd08393SJason King /* <backref> = "B" <base-62-number> */
778*1cd08393SJason King static boolean_t
rustv0_parse_backref(rust_state_t * restrict st,strview_t * restrict sv,boolean_t (* fn)(rust_state_t * restrict,strview_t * restrict,boolean_t b),boolean_t bval)779*1cd08393SJason King rustv0_parse_backref(rust_state_t *restrict st, strview_t *restrict sv,
780*1cd08393SJason King     boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t b),
781*1cd08393SJason King     boolean_t bval)
782*1cd08393SJason King {
783*1cd08393SJason King 	strview_t backref;
784*1cd08393SJason King 	strview_t target;
785*1cd08393SJason King 	uint64_t idx = 0;
786*1cd08393SJason King 	size_t save_len;
787*1cd08393SJason King 	size_t len;
788*1cd08393SJason King 
789*1cd08393SJason King 	if (HAS_ERROR(st))
790*1cd08393SJason King 		return (B_FALSE);