1*1cd08393SJason King /*
2*1cd08393SJason King  * This file and its contents are supplied under the terms of the
3*1cd08393SJason King  * Common Development and Distribution License ("CDDL"), version 1.0.
4*1cd08393SJason King  * You may only use this file in accordance with the terms of version
5*1cd08393SJason King  * 1.0 of the CDDL.
6*1cd08393SJason King  *
7*1cd08393SJason King  * A full copy of the text of the CDDL should have accompanied this
8*1cd08393SJason King  * source.  A copy of the CDDL is also available via the Internet at
9*1cd08393SJason King  * http://www.illumos.org/license/CDDL.
10*1cd08393SJason King  */
11*1cd08393SJason King 
12*1cd08393SJason King /*
13*1cd08393SJason King  * Copyright 2019 Joyent, Inc.
14*1cd08393SJason King  * Copyright 2021 Jason King
15*1cd08393SJason King  */
16*1cd08393SJason King 
17*1cd08393SJason King /* BEGIN CSTYLED */
18*1cd08393SJason King 
19*1cd08393SJason King /*
20*1cd08393SJason King  * This implements the 'symbol_name_mangling_v2' demangling for rust as
21*1cd08393SJason King  * described in Rust RFC 2603 as opposed to the original (now called
22*1cd08393SJason King  * legacy) mangling older versions of rust used (implemented in rust.c).
23*1cd08393SJason King  *
24*1cd08393SJason King  * The specification can be viewed at:
25*1cd08393SJason King  *     https://github.com/rust-lang/rfcs/blob/master/text/2603-rust-symbol-name-mangling-v0.md
26*1cd08393SJason King  */
27*1cd08393SJason King 
28*1cd08393SJason King /* END CSTYLED */
29*1cd08393SJason King 
30*1cd08393SJason King #include <errno.h>
31*1cd08393SJason King #include <libcustr.h>
32*1cd08393SJason King #include <stdarg.h>
33*1cd08393SJason King #include <stdio.h>
34*1cd08393SJason King #include <stdlib.h>
35*1cd08393SJason King #include <string.h>
36*1cd08393SJason King 
37*1cd08393SJason King #include "rust.h"
38*1cd08393SJason King 
39*1cd08393SJason King /*
40*1cd08393SJason King  * Help track amount of additional output added to rs_demangled across
41*1cd08393SJason King  * a function call (to allow that portion to be output for debugging)
42*1cd08393SJason King  */
43*1cd08393SJason King #define	SAVE_LEN(_st, _len) _len = custr_len((_st)->rs_demangled)
44*1cd08393SJason King #define	CSTR_END(_st, _len)					\
45*1cd08393SJason King 	((int)(custr_len((_st)->rs_demangled) - (_len))),	\
46*1cd08393SJason King 	custr_cstr((_st)->rs_demangled) + (_len)
47*1cd08393SJason King 
48*1cd08393SJason King typedef enum const_type_class {
49*1cd08393SJason King 	CTC_INVALID = -1,
50*1cd08393SJason King 	CTC_UNSIGNED,
51*1cd08393SJason King 	CTC_SIGNED,
52*1cd08393SJason King 	CTC_CHAR,
53*1cd08393SJason King 	CTC_BOOL,
54*1cd08393SJason King } const_type_class_t;
55*1cd08393SJason King 
56*1cd08393SJason King /*
57*1cd08393SJason King  * Sometimes, parsing something is optional.  In this case a failure to
58*1cd08393SJason King  * parse is fine, however we still want to consider a fatal error as
59*1cd08393SJason King  * failure.
60*1cd08393SJason King  */
61*1cd08393SJason King #define	OPTIONAL(_st, _f) ((_f) || !HAS_ERROR(_st))
62*1cd08393SJason King 
63*1cd08393SJason King static boolean_t rustv0_valid_sym(const strview_t *);
64*1cd08393SJason King static const_type_class_t rustv0_classify_const_type(char);
65*1cd08393SJason King static boolean_t rustv0_parse_hex_num(rust_state_t *restrict,
66*1cd08393SJason King     strview_t *restrict, uint64_t *restrict);
67*1cd08393SJason King static boolean_t rustv0_parse_base62(rust_state_t *restrict,
68*1cd08393SJason King     strview_t *restrict, uint64_t *restrict);
69*1cd08393SJason King 
70*1cd08393SJason King static boolean_t rustv0_parse_undisambiguated_identifier(
71*1cd08393SJason King     rust_state_t *restrict, strview_t *restrict, boolean_t);
72*1cd08393SJason King static boolean_t rustv0_parse_disambiguator(rust_state_t *restrict,
73*1cd08393SJason King     strview_t *restrict, uint64_t *restrict);
74*1cd08393SJason King 
75*1cd08393SJason King static boolean_t rustv0_parse_path(rust_state_t *restrict, strview_t *restrict,
76*1cd08393SJason King     boolean_t);
77*1cd08393SJason King static boolean_t rustv0_parse_impl_path(rust_state_t *restrict,
78*1cd08393SJason King     strview_t *restrict, boolean_t);
79*1cd08393SJason King static boolean_t rustv0_parse_nested_path(rust_state_t *restrict,
80*1cd08393SJason King     strview_t *restrict, boolean_t);
81*1cd08393SJason King static boolean_t rustv0_parse_basic_type(rust_state_t *restrict,
82*1cd08393SJason King     strview_t *restrict);
83*1cd08393SJason King static boolean_t rustv0_parse_backref(rust_state_t *restrict,
84*1cd08393SJason King     strview_t *restrict,
85*1cd08393SJason King     boolean_t (*)(rust_state_t *restrict, strview_t *restrict, boolean_t),
86*1cd08393SJason King     boolean_t);
87*1cd08393SJason King static boolean_t rustv0_parse_lifetime(rust_state_t *restrict,
88*1cd08393SJason King     strview_t *restrict);
89*1cd08393SJason King static boolean_t rustv0_parse_const(rust_state_t *restrict,
90*1cd08393SJason King     strview_t *restrict, boolean_t);
91*1cd08393SJason King static boolean_t rustv0_parse_fnsig(rust_state_t *restrict,
92*1cd08393SJason King     strview_t *restrict);
93*1cd08393SJason King static boolean_t rustv0_parse_dynbounds(rust_state_t *restrict,
94*1cd08393SJason King     strview_t *restrict);
95*1cd08393SJason King static boolean_t rustv0_parse_generic_arg(rust_state_t *restrict,
96*1cd08393SJason King     strview_t *restrict, boolean_t);
97*1cd08393SJason King 
98*1cd08393SJason King boolean_t
rust_demangle_v0(rust_state_t * restrict st,strview_t * restrict sv)99*1cd08393SJason King rust_demangle_v0(rust_state_t *restrict st, strview_t *restrict sv)
100*1cd08393SJason King {
101*1cd08393SJason King 	boolean_t save_skip;
102*1cd08393SJason King 	boolean_t ret;
103*1cd08393SJason King 
104*1cd08393SJason King 	/* Make sure all the characters are valid */
105*1cd08393SJason King 	if (!rustv0_valid_sym(sv)) {
106*1cd08393SJason King 		st->rs_error = EINVAL;
107*1cd08393SJason King 		return (B_FALSE);
108*1cd08393SJason King 	}
109*1cd08393SJason King 
110*1cd08393SJason King 	/*
111*1cd08393SJason King 	 * <symbol-name> = "_R" [<decimal-number>] <path>
112*1cd08393SJason King 	 *	[<instantiating-crate>]
113*1cd08393SJason King 	 *
114*1cd08393SJason King 	 * We've already parsed the prefix in rust_demangle(), as well
115*1cd08393SJason King 	 * as made sure there's no [<decimal-number>] present, so
116*1cd08393SJason King 	 * start with <path>.
117*1cd08393SJason King 	 */
118*1cd08393SJason King 	if (!rustv0_parse_path(st, sv, B_TRUE))
119*1cd08393SJason King 		return (B_FALSE);
120*1cd08393SJason King 
121*1cd08393SJason King 	/* [<instantiating crate>] -- parse but don't save */
122*1cd08393SJason King 	SKIP_BEGIN(st, save_skip);
123*1cd08393SJason King 	ret = OPTIONAL(st, rustv0_parse_path(st, sv, B_FALSE));
124*1cd08393SJason King 	SKIP_END(st, save_skip);
125*1cd08393SJason King 	if (!ret)
126*1cd08393SJason King 		return (B_FALSE);
127*1cd08393SJason King 
128*1cd08393SJason King 	/* If nothing's left, we know we're done */
129*1cd08393SJason King 	if (sv_remaining(sv) == 0)
130*1cd08393SJason King 		return (!HAS_ERROR(st));
131*1cd08393SJason King 
132*1cd08393SJason King 	/*
133*1cd08393SJason King 	 * LLVM sometimes will suffix symbols starting with a '.'
134*1cd08393SJason King 	 * followed by extra data. For things that start with
135*1cd08393SJason King 	 * ".llvm.", we discard the rest of the string.  For
136*1cd08393SJason King 	 * other things that start with '.', we copy the
137*1cd08393SJason King 	 * results to the final string. This matches
138*1cd08393SJason King 	 * what the rust native demangler crate does, and
139*1cd08393SJason King 	 * we don't see a reason to deviate from their
140*1cd08393SJason King 	 * behavior.
141*1cd08393SJason King 	 */
142*1cd08393SJason King 	if (sv_consume_if(sv, ".llvm."))
143*1cd08393SJason King 		return (!HAS_ERROR(st));
144*1cd08393SJason King 
145*1cd08393SJason King 	if (sv_peek(sv, 0) != '.') {
146*1cd08393SJason King 		DEMDEBUG("%s: Unexpected trailing data at the end of the "
147*1cd08393SJason King 		    "name: '%.*s'", __func__, SV_PRINT(sv));
148*1cd08393SJason King 		st->rs_error = EINVAL;
149*1cd08393SJason King 		return (B_FALSE);
150*1cd08393SJason King 	}
151*1cd08393SJason King 
152*1cd08393SJason King 	return (rust_append_sv(st, sv_remaining(sv), sv));
153*1cd08393SJason King }
154*1cd08393SJason King 
155*1cd08393SJason King /*
156*1cd08393SJason King  * Parse an optional list terminated by 'E'. Each result of 'fn' is
157*1cd08393SJason King  * separated by 'sep' in the output.
158*1cd08393SJason King  */
159*1cd08393SJason King static boolean_t
rustv0_parse_opt_list(rust_state_t * restrict st,strview_t * restrict sv,boolean_t (* fn)(rust_state_t * restrict,strview_t * restrict,boolean_t),const char * restrict sep,boolean_t bval,size_t * restrict countp)160*1cd08393SJason King rustv0_parse_opt_list(rust_state_t *restrict st, strview_t *restrict sv,
161*1cd08393SJason King     boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t),
162*1cd08393SJason King     const char *restrict sep, boolean_t bval, size_t *restrict countp)
163*1cd08393SJason King {
164*1cd08393SJason King 	size_t count = 0;
165*1cd08393SJason King 
166*1cd08393SJason King 	DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv));
167*1cd08393SJason King 
168*1cd08393SJason King 	while (sv_remaining(sv) > 0) {
169*1cd08393SJason King 		if (sv_consume_if_c(sv, 'E')) {
170*1cd08393SJason King 			if (countp != NULL)
171*1cd08393SJason King 				*countp += count;
172*1cd08393SJason King 			return (B_TRUE);
173*1cd08393SJason King 		}
174*1cd08393SJason King 
175*1cd08393SJason King 		if (count > 0 && !rust_append(st, sep))
176*1cd08393SJason King 			return (B_FALSE);
177*1cd08393SJason King 
178*1cd08393SJason King 		if (!fn(st, sv, bval))
179*1cd08393SJason King 			return (B_FALSE);
180*1cd08393SJason King 
181*1cd08393SJason King 		count++;
182*1cd08393SJason King 	}
183*1cd08393SJason King 
184*1cd08393SJason King 	/*
185*1cd08393SJason King 	 * An optional list should terminate with an 'E'.  If we get here,
186*1cd08393SJason King 	 * we ran out of charaters and didn't terminate as we should.
187*1cd08393SJason King 	 */
188*1cd08393SJason King 	return (B_FALSE);
189*1cd08393SJason King }
190*1cd08393SJason King 
191*1cd08393SJason King static boolean_t
rustv0_parse_uint_type(rust_state_t * restrict st,strview_t * sv)192*1cd08393SJason King rustv0_parse_uint_type(rust_state_t *restrict st, strview_t *sv)
193*1cd08393SJason King {
194*1cd08393SJason King 	const char *str = NULL;
195*1cd08393SJason King 	strview_t save;
196*1cd08393SJason King 	char c;
197*1cd08393SJason King 
198*1cd08393SJason King 	if (HAS_ERROR(st) || sv_remaining(sv) == 0)
199*1cd08393SJason King 		return (B_FALSE);
200*1cd08393SJason King 
201*1cd08393SJason King 	sv_init_sv(&save, sv);
202*1cd08393SJason King 
203*1cd08393SJason King 	switch (c = sv_consume_c(sv)) {
204*1cd08393SJason King 	case 'h':
205*1cd08393SJason King 		str = "u8";
206*1cd08393SJason King 		break;
207*1cd08393SJason King 	case 't':
208*1cd08393SJason King 		str = "u16";
209*1cd08393SJason King 		break;
210*1cd08393SJason King 	case 'm':
211*1cd08393SJason King 		str = "u32";
212*1cd08393SJason King 		break;
213*1cd08393SJason King 	case 'y':
214*1cd08393SJason King 		str = "u64";
215*1cd08393SJason King 		break;
216*1cd08393SJason King 	case 'o':
217*1cd08393SJason King 		str = "u128";
218*1cd08393SJason King 		break;
219*1cd08393SJason King 	case 'j':	/* usize */
220*1cd08393SJason King 		str = "usize";
221*1cd08393SJason King 		break;
222*1cd08393SJason King 	default:
223*1cd08393SJason King 		sv_init_sv(sv, &save);
224*1cd08393SJason King 		return (B_FALSE);
225*1cd08393SJason King 	}
226*1cd08393SJason King 
227*1cd08393SJason King 	DEMDEBUG("%s: %c -> %s", __func__, c, str);
228*1cd08393SJason King 	return (rust_append(st, str));
229*1cd08393SJason King }
230*1cd08393SJason King 
231*1cd08393SJason King static boolean_t
rustv0_parse_basic_type(rust_state_t * restrict st,strview_t * restrict sv)232*1cd08393SJason King rustv0_parse_basic_type(rust_state_t *restrict st, strview_t *restrict sv)
233*1cd08393SJason King {
234*1cd08393SJason King 	const char *str = NULL;
235*1cd08393SJason King 	strview_t save;
236*1cd08393SJason King 	char c;
237*1cd08393SJason King 
238*1cd08393SJason King 	if (HAS_ERROR(st) || sv_remaining(sv) == 0)
239*1cd08393SJason King 		return (B_FALSE);
240*1cd08393SJason King 
241*1cd08393SJason King 	if (rustv0_parse_uint_type(st, sv))
242*1cd08393SJason King 		return (B_TRUE);
243*1cd08393SJason King 
244*1cd08393SJason King 	sv_init_sv(&save, sv);
245*1cd08393SJason King 
246*1cd08393SJason King 	switch (c = sv_consume_c(sv)) {
247*1cd08393SJason King 	case 'a':
248*1cd08393SJason King 		str = "i8";
249*1cd08393SJason King 		break;
250*1cd08393SJason King 	case 'b':
251*1cd08393SJason King 		str = "bool";
252*1cd08393SJason King 		break;
253*1cd08393SJason King 	case 'c':
254*1cd08393SJason King 		str = "char";
255*1cd08393SJason King 		break;
256*1cd08393SJason King 	case 'd':
257*1cd08393SJason King 		str = "f64";
258*1cd08393SJason King 		break;
259*1cd08393SJason King 	case 'e':
260*1cd08393SJason King 		str = "str";
261*1cd08393SJason King 		break;
262*1cd08393SJason King 	case 'f':
263*1cd08393SJason King 		str = "f32";
264*1cd08393SJason King 		break;
265*1cd08393SJason King 	case 'i':
266*1cd08393SJason King 		str = "isize";
267*1cd08393SJason King 		break;
268*1cd08393SJason King 	case 'l':
269*1cd08393SJason King 		str = "i32";
270*1cd08393SJason King 		break;
271*1cd08393SJason King 	case 'n':
272*1cd08393SJason King 		str = "i128";
273*1cd08393SJason King 		break;
274*1cd08393SJason King 	case 'p':
275*1cd08393SJason King 		str = "_";
276*1cd08393SJason King 		break;
277*1cd08393SJason King 	case 's':
278*1cd08393SJason King 		str = "i16";
279*1cd08393SJason King 		break;
280*1cd08393SJason King 	case 'u':
281*1cd08393SJason King 		str = "()";
282*1cd08393SJason King 		break;
283*1cd08393SJason King 	case 'v':
284*1cd08393SJason King 		str = "...";
285*1cd08393SJason King 		break;
286*1cd08393SJason King 	case 'x':
287*1cd08393SJason King 		str = "i64";
288*1cd08393SJason King 		break;
289*1cd08393SJason King 	case 'z':
290*1cd08393SJason King 		str = "!";
291*1cd08393SJason King 		break;
292*1cd08393SJason King 	default:
293*1cd08393SJason King 		sv_init_sv(sv, &save);
294*1cd08393SJason King 		return (B_FALSE);
295*1cd08393SJason King 	}
296*1cd08393SJason King 
297*1cd08393SJason King 	DEMDEBUG("%s: %c -> %s", __func__, c, str);
298*1cd08393SJason King 	return (rust_append(st, str));
299*1cd08393SJason King }
300*1cd08393SJason King 
301*1cd08393SJason King static boolean_t
rustv0_parse_type(rust_state_t * restrict st,strview_t * restrict sv,boolean_t dummy __unused)302*1cd08393SJason King rustv0_parse_type(rust_state_t *restrict st, strview_t *restrict sv,
303*1cd08393SJason King     boolean_t dummy __unused)
304*1cd08393SJason King {
305*1cd08393SJason King 	strview_t save;
306*1cd08393SJason King 	size_t len, tuple_elem_count;
307*1cd08393SJason King 	boolean_t ret;
308*1cd08393SJason King 	char c;
309*1cd08393SJason King 
310*1cd08393SJason King 	if (HAS_ERROR(st))
311*1cd08393SJason King 		return (B_FALSE);
312*1cd08393SJason King 
313*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
314*1cd08393SJason King 
315*1cd08393SJason King 	if (sv_remaining(sv) == 0)
316*1cd08393SJason King 		return (B_FALSE);
317*1cd08393SJason King 
318*1cd08393SJason King 	SAVE_LEN(st, len);
319*1cd08393SJason King 	sv_init_sv(&save, sv);
320*1cd08393SJason King 
321*1cd08393SJason King 	switch (c = sv_consume_c(sv)) {
322*1cd08393SJason King 	case 'A':
323*1cd08393SJason King 		ret = rust_appendc(st, '[') &&
324*1cd08393SJason King 		    rustv0_parse_type(st, sv, B_FALSE) &&
325*1cd08393SJason King 		    rust_append(st, "; ") &&
326*1cd08393SJason King 		    rustv0_parse_const(st, sv, B_FALSE) &&
327*1cd08393SJason King 		    rust_appendc(st, ']');
328*1cd08393SJason King 		break;
329*1cd08393SJason King 	case 'S':
330*1cd08393SJason King 		ret = rust_appendc(st, '[') &&
331*1cd08393SJason King 		    rustv0_parse_type(st, sv, B_FALSE) &&
332*1cd08393SJason King 		    rust_appendc(st, ']');
333*1cd08393SJason King 		break;
334*1cd08393SJason King 	case 'T':
335*1cd08393SJason King 		tuple_elem_count = 0;
336*1cd08393SJason King 		ret = rust_appendc(st, '(') &&
337*1cd08393SJason King 		    rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ",
338*1cd08393SJason King 		    B_FALSE, &tuple_elem_count) &&
339*1cd08393SJason King 		    rust_append(st, (tuple_elem_count == 1) ? ",)" : ")");
340*1cd08393SJason King 		break;
341*1cd08393SJason King 	case 'R':
342*1cd08393SJason King 	case 'Q':
343*1cd08393SJason King 		/* `&mut T` or `&'... mut T` */
344*1cd08393SJason King 		if (!(ret = rust_appendc(st, '&')))
345*1cd08393SJason King 			break;
346*1cd08393SJason King 
347*1cd08393SJason King 		/*
348*1cd08393SJason King 		 * lifetime is optional, but we need to add a trailing
349*1cd08393SJason King 		 * space if present (so we cannot use the OPTIONAL macro).
350*1cd08393SJason King 		 */
351*1cd08393SJason King 		if (rustv0_parse_lifetime(st, sv)) {
352*1cd08393SJason King 			if (!(ret = rust_appendc(st, ' ')))
353*1cd08393SJason King 				break;
354*1cd08393SJason King 		} else if (HAS_ERROR(st)) {
355*1cd08393SJason King 			break;
356*1cd08393SJason King 		}
357*1cd08393SJason King 
358*1cd08393SJason King 		ret = rust_append(st, (c == 'Q') ? "mut " : "") &&
359*1cd08393SJason King 		    rustv0_parse_type(st, sv, B_FALSE);
360*1cd08393SJason King 		break;
361*1cd08393SJason King 	case 'P':
362*1cd08393SJason King 		ret = rust_append(st, "*const ") &&
363*1cd08393SJason King 		    rustv0_parse_type(st, sv, B_FALSE);
364*1cd08393SJason King 		break;
365*1cd08393SJason King 	case 'O':
366*1cd08393SJason King 		ret = rust_append(st, "*mut ") &&
367*1cd08393SJason King 		    rustv0_parse_type(st, sv, B_FALSE);
368*1cd08393SJason King 		break;
369*1cd08393SJason King 	case 'F':
370*1cd08393SJason King 		ret = rustv0_parse_fnsig(st, sv);
371*1cd08393SJason King 		break;
372*1cd08393SJason King 	case 'D':
373*1cd08393SJason King 		ret = rust_append(st, "dyn ") &&
374*1cd08393SJason King 		    rustv0_parse_dynbounds(st, sv);
375*1cd08393SJason King 		if (!ret)
376*1cd08393SJason King 			break;
377*1cd08393SJason King 
378*1cd08393SJason King 		/*
379*1cd08393SJason King 		 * Rust RFC2603 shows the lifetime as required, however
380*1cd08393SJason King 		 * it appears this is optional.
381*1cd08393SJason King 		 */
382*1cd08393SJason King 		DEMDEBUG("%s: pre-lifetime: '%*s'", __func__, SV_PRINT(sv));
383*1cd08393SJason King 
384*1cd08393SJason King 		/*
385*1cd08393SJason King 		 * We only want to print a non-zero (non "'_")
386*1cd08393SJason King 		 * lifetime.
387*1cd08393SJason King 		 */
388*1cd08393SJason King 		if (sv_consume_if(sv, "L_"))
389*1cd08393SJason King 			break;
390*1cd08393SJason King 
391*1cd08393SJason King 		/*
392*1cd08393SJason King 		 * But if there is a lifetime we want to print,
393*1cd08393SJason King 		 * we want to prepend " + " before it.
394*1cd08393SJason King 		 */
395*1cd08393SJason King 		if (sv_peek(sv, 0) == 'L' &&
396*1cd08393SJason King 		    !(ret = rust_append(st, " + ")))
397*1cd08393SJason King 			break;
398*1cd08393SJason King 
399*1cd08393SJason King 		ret = rustv0_parse_lifetime(st, sv);
400*1cd08393SJason King 		break;
401*1cd08393SJason King 	default:
402*1cd08393SJason King 		sv_init_sv(sv, &save);
403*1cd08393SJason King 
404*1cd08393SJason King 		ret = rustv0_parse_backref(st, sv, rustv0_parse_type,
405*1cd08393SJason King 		    B_FALSE) ||
406*1cd08393SJason King 		    rustv0_parse_basic_type(st, sv);
407*1cd08393SJason King 		if (ret)
408*1cd08393SJason King 			break;
409*1cd08393SJason King 
410*1cd08393SJason King 		ret = rustv0_parse_path(st, sv, B_FALSE);
411*1cd08393SJason King 		break;
412*1cd08393SJason King 	}
413*1cd08393SJason King 
414*1cd08393SJason King 	DEMDEBUG("%s: type='%.*s' (%s)", __func__, CSTR_END(st, len),
415*1cd08393SJason King 	    ret ? "success" : "fail");
416*1cd08393SJason King 
417*1cd08393SJason King 	return (ret);
418*1cd08393SJason King }
419*1cd08393SJason King 
420*1cd08393SJason King /*
421*1cd08393SJason King  * <path> = "C" <identifier>		crate root
422*1cd08393SJason King  *	| "M" <impl-path> <type>	<T>
423*1cd08393SJason King  *	| "X" <impl-path> <type> <path>	<T as Trait> (trait impl)
424*1cd08393SJason King  *	| "Y" <type> <path>		<T as Trait> (trait definition)
425*1cd08393SJason King  *	| "N" <ns> <path> <identifier>	...::ident (nested path)
426*1cd08393SJason King  *	| "I" <path> {<generic-arg>} "E" ...<T, U>
427*1cd08393SJason King  *	| <backref>
428*1cd08393SJason King  */
429*1cd08393SJason King static boolean_t
rustv0_parse_path(rust_state_t * restrict st,strview_t * restrict sv,boolean_t in_value)430*1cd08393SJason King rustv0_parse_path(rust_state_t *restrict st, strview_t *restrict sv,
431*1cd08393SJason King     boolean_t in_value)
432*1cd08393SJason King {
433*1cd08393SJason King 	strview_t save;
434*1cd08393SJason King 	uint64_t disamb = 0;
435*1cd08393SJason King 	size_t len;
436*1cd08393SJason King 	boolean_t ret = B_FALSE;
437*1cd08393SJason King 	boolean_t save_skip;
438*1cd08393SJason King 	boolean_t args_stay_save = st->rs_args_stay_open;
439*1cd08393SJason King 	boolean_t args_open_save = st->rs_args_is_open;
440*1cd08393SJason King 
441*1cd08393SJason King 	if (HAS_ERROR(st))
442*1cd08393SJason King 		return (B_FALSE);
443*1cd08393SJason King 
444*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
445*1cd08393SJason King 
446*1cd08393SJason King 	if (sv_remaining(sv) == 0)
447*1cd08393SJason King 		return (B_FALSE);
448*1cd08393SJason King 
449*1cd08393SJason King 	SAVE_LEN(st, len);
450*1cd08393SJason King 	sv_init_sv(&save, sv);
451*1cd08393SJason King 
452*1cd08393SJason King 	switch (sv_consume_c(sv)) {
453*1cd08393SJason King 	case 'C':
454*1cd08393SJason King 		if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disamb)))
455*1cd08393SJason King 			goto done;
456*1cd08393SJason King 
457*1cd08393SJason King 		if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE))
458*1cd08393SJason King 			goto done;
459*1cd08393SJason King 
460*1cd08393SJason King 		if (st->rs_verbose &&
461*1cd08393SJason King 		    !rust_append_printf(st, "[%" PRIx64 "]", disamb))
462*1cd08393SJason King 			goto done;
463*1cd08393SJason King 		break;
464*1cd08393SJason King 	case 'M':
465*1cd08393SJason King 		SKIP_BEGIN(st, save_skip);
466*1cd08393SJason King 		if (!rustv0_parse_impl_path(st, sv, in_value)) {
467*1cd08393SJason King 			SKIP_END(st, save_skip);
468*1cd08393SJason King 			goto done;
469*1cd08393SJason King 		}
470*1cd08393SJason King 		SKIP_END(st, save_skip);
471*1cd08393SJason King 
472*1cd08393SJason King 		if (!rust_appendc(st, '<') ||
473*1cd08393SJason King 		    !rustv0_parse_type(st, sv, B_FALSE) ||
474*1cd08393SJason King 		    !rust_appendc(st, '>'))
475*1cd08393SJason King 			goto done;
476*1cd08393SJason King 		break;
477*1cd08393SJason King 	case 'X':
478*1cd08393SJason King 		SKIP_BEGIN(st, save_skip);
479*1cd08393SJason King 		if (!rustv0_parse_impl_path(st, sv, in_value)) {
480*1cd08393SJason King 			SKIP_END(st, save_skip);
481*1cd08393SJason King 			goto done;
482*1cd08393SJason King 		}
483*1cd08393SJason King 		SKIP_END(st, save_skip);
484*1cd08393SJason King 		/*FALLTHRU*/
485*1cd08393SJason King 	case 'Y':
486*1cd08393SJason King 		if (!rust_appendc(st, '<') ||
487*1cd08393SJason King 		    !rustv0_parse_type(st, sv, B_FALSE) ||
488*1cd08393SJason King 		    !rust_append(st, " as ") ||
489*1cd08393SJason King 		    !rustv0_parse_path(st, sv, B_FALSE) ||
490*1cd08393SJason King 		    !rust_appendc(st, '>'))
491*1cd08393SJason King 			goto done;
492*1cd08393SJason King 		break;
493*1cd08393SJason King 	case 'N':
494*1cd08393SJason King 		if (!rustv0_parse_nested_path(st, sv, in_value))
495*1cd08393SJason King 			goto done;
496*1cd08393SJason King 		break;
497*1cd08393SJason King 	case 'I':
498*1cd08393SJason King 		st->rs_args_stay_open = B_FALSE;
499*1cd08393SJason King 		st->rs_args_is_open = B_FALSE;
500*1cd08393SJason King 
501*1cd08393SJason King 		if (!rustv0_parse_path(st, sv, in_value))
502*1cd08393SJason King 			goto done;
503*1cd08393SJason King 
504*1cd08393SJason King 		if (in_value && !rust_append(st, "::"))
505*1cd08393SJason King 			goto done;
506*1cd08393SJason King 
507*1cd08393SJason King 		if (!rust_appendc(st, '<') ||
508*1cd08393SJason King 		    !rustv0_parse_opt_list(st, sv, rustv0_parse_generic_arg,
509*1cd08393SJason King 		    ", ", B_FALSE, NULL))
510*1cd08393SJason King 			goto done;
511*1cd08393SJason King 
512*1cd08393SJason King 		st->rs_args_stay_open = args_stay_save;
513*1cd08393SJason King 		st->rs_args_is_open = args_open_save;
514*1cd08393SJason King 
515*1cd08393SJason King 		/*
516*1cd08393SJason King 		 * If we were asked to not close our list, then don't and
517*1cd08393SJason King 		 * indicate that the list is open.
518*1cd08393SJason King 		 */
519*1cd08393SJason King 		if (st->rs_args_stay_open) {
520*1cd08393SJason King 			st->rs_args_stay_open = B_FALSE;
521*1cd08393SJason King 			st->rs_args_is_open = B_TRUE;
522*1cd08393SJason King 		} else if (!rust_appendc(st, '>')) {
523*1cd08393SJason King 			goto done;
524*1cd08393SJason King 		}
525*1cd08393SJason King 		break;
526*1cd08393SJason King 	default:
527*1cd08393SJason King 		/*
528*1cd08393SJason King 		 * Didn't recognize the letter, so it has to be a path. Restore
529*1cd08393SJason King 		 * sv to state prior to switch and continue.
530*1cd08393SJason King 		 */
531*1cd08393SJason King 		sv_init_sv(sv, &save);
532*1cd08393SJason King 		if (!rustv0_parse_backref(st, sv, rustv0_parse_path, in_value))
533*1cd08393SJason King 			goto done;
534*1cd08393SJason King 	}
535*1cd08393SJason King 
536*1cd08393SJason King 	ret = B_TRUE;
537*1cd08393SJason King 
538*1cd08393SJason King done:
539*1cd08393SJason King 	DEMDEBUG("%s: path='%.*s' (%s)", __func__, CSTR_END(st, len),
540*1cd08393SJason King 	    ret ? "success" : "fail");
541*1cd08393SJason King 
542*1cd08393SJason King 	return (ret);
543*1cd08393SJason King }
544*1cd08393SJason King 
545*1cd08393SJason King static boolean_t
rustv0_parse_impl_path(rust_state_t * restrict st,strview_t * restrict sv,boolean_t in_value)546*1cd08393SJason King rustv0_parse_impl_path(rust_state_t *restrict st, strview_t *restrict sv,
547*1cd08393SJason King     boolean_t in_value)
548*1cd08393SJason King {
549*1cd08393SJason King 	uint64_t val = 0;
550*1cd08393SJason King 
551*1cd08393SJason King 	return (OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &val)) &&
552*1cd08393SJason King 	    rustv0_parse_path(st, sv, in_value));
553*1cd08393SJason King }
554*1cd08393SJason King 
555*1cd08393SJason King /*
556*1cd08393SJason King  * A bit of a hack -- when printing a nested path, we need to know
557*1cd08393SJason King  * if the identifier is there or not in order to correctly format
558*1cd08393SJason King  * the output preceeding it (when present). This peeks ahead and
559*1cd08393SJason King  * determines this.
560*1cd08393SJason King  */
561*1cd08393SJason King static boolean_t
rustv0_has_name(rust_state_t * restrict st,strview_t * restrict sv,boolean_t * has_namep)562*1cd08393SJason King rustv0_has_name(rust_state_t *restrict st, strview_t *restrict sv,
563*1cd08393SJason King     boolean_t *has_namep)
564*1cd08393SJason King {
565*1cd08393SJason King 	strview_t save;
566*1cd08393SJason King 
567*1cd08393SJason King 	if (HAS_ERROR(st))
568*1cd08393SJason King 		return (B_FALSE);
569*1cd08393SJason King 
570*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
571*1cd08393SJason King 
572*1cd08393SJason King 	if (sv_remaining(sv) == 0)
573*1cd08393SJason King 		return (B_FALSE);
574*1cd08393SJason King 
575*1cd08393SJason King 	sv_init_sv(&save, sv);
576*1cd08393SJason King 
577*1cd08393SJason King 	/* For checking the length, we don't care if it's punycode or not */
578*1cd08393SJason King 	(void) sv_consume_if_c(&save, 'u');
579*1cd08393SJason King 
580*1cd08393SJason King 	if (sv_remaining(sv) == 0) {
581*1cd08393SJason King 		st->rs_error = EINVAL;
582*1cd08393SJason King 		return (B_FALSE);
583*1cd08393SJason King 	}
584*1cd08393SJason King 
585*1cd08393SJason King 	if (sv_consume_if_c(&save, '0')) {
586*1cd08393SJason King 		*has_namep = B_FALSE;
587*1cd08393SJason King 		return (B_TRUE);
588*1cd08393SJason King 	}
589*1cd08393SJason King 
590*1cd08393SJason King 	*has_namep = B_TRUE;
591*1cd08393SJason King 	return (B_TRUE);
592*1cd08393SJason King }
593*1cd08393SJason King 
594*1cd08393SJason King static boolean_t
rustv0_parse_nested_path(rust_state_t * restrict st,strview_t * restrict sv,boolean_t in_value)595*1cd08393SJason King rustv0_parse_nested_path(rust_state_t *restrict st, strview_t *restrict sv,
596*1cd08393SJason King     boolean_t in_value)
597*1cd08393SJason King {
598*1cd08393SJason King 	uint64_t disambiguator = 0;
599*1cd08393SJason King 	size_t len = 0;
600*1cd08393SJason King 	char ns;
601*1cd08393SJason King 	boolean_t ret = B_FALSE;
602*1cd08393SJason King 	boolean_t has_name;
603*1cd08393SJason King 
604*1cd08393SJason King 	if (HAS_ERROR(st))
605*1cd08393SJason King 		return (B_FALSE);
606*1cd08393SJason King 
607*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
608*1cd08393SJason King 
609*1cd08393SJason King 	if (sv_remaining(sv) == 0)
610*1cd08393SJason King 		return (B_FALSE);
611*1cd08393SJason King 
612*1cd08393SJason King 	SAVE_LEN(st, len);
613*1cd08393SJason King 
614*1cd08393SJason King 	ns = sv_consume_c(sv);
615*1cd08393SJason King 
616*1cd08393SJason King 	if (!rustv0_parse_path(st, sv, in_value))
617*1cd08393SJason King 		goto done;
618*1cd08393SJason King 
619*1cd08393SJason King 	if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disambiguator)))
620*1cd08393SJason King 		goto done;
621*1cd08393SJason King 
622*1cd08393SJason King 	if (!rustv0_has_name(st, sv, &has_name))
623*1cd08393SJason King 		goto done;
624*1cd08393SJason King 
625*1cd08393SJason King 	if (ISUPPER(ns)) {
626*1cd08393SJason King 		if (!rust_append(st, "::{"))
627*1cd08393SJason King 			goto done;
628*1cd08393SJason King 
629*1cd08393SJason King 		switch (ns) {
630*1cd08393SJason King 		case 'C':
631*1cd08393SJason King 			if (!rust_append(st, "closure"))
632*1cd08393SJason King 				goto done;
633*1cd08393SJason King 			break;
634*1cd08393SJason King 		case 'S':
635*1cd08393SJason King 			if (!rust_append(st, "shim"))
636*1cd08393SJason King 				goto done;
637*1cd08393SJason King 			break;
638*1cd08393SJason King 		default:
639*1cd08393SJason King 			if (!rust_appendc(st, ns))
640*1cd08393SJason King 				goto done;
641*1cd08393SJason King 			break;
642*1cd08393SJason King 		}
643*1cd08393SJason King 
644*1cd08393SJason King 		if (has_name && !rust_appendc(st, ':'))
645*1cd08393SJason King 			goto done;
646*1cd08393SJason King 
647*1cd08393SJason King 		if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE))
648*1cd08393SJason King 			goto done;
649*1cd08393SJason King 
650*1cd08393SJason King 		ret = rust_append_printf(st, "#%" PRIu64 "}", disambiguator);
651*1cd08393SJason King 	} else {
652*1cd08393SJason King 		if (has_name) {
653*1cd08393SJason King 			if (!(ret = rust_append(st, "::")))
654*1cd08393SJason King 				goto done;
655*1cd08393SJason King 		}
656*1cd08393SJason King 		ret = rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE);
657*1cd08393SJason King 	}
658*1cd08393SJason King 
659*1cd08393SJason King done:
660*1cd08393SJason King 	DEMDEBUG("%s: nested path = '%.*s' (%s)", __func__, CSTR_END(st, len),
661*1cd08393SJason King 	    ret ? "success" : "fail");
662*1cd08393SJason King 
663*1cd08393SJason King 	return (ret);
664*1cd08393SJason King }
665*1cd08393SJason King 
666*1cd08393SJason King /*
667*1cd08393SJason King  * <disambiguator> = "s" <base-64-number>
668*1cd08393SJason King  *
669*1cd08393SJason King  */
670*1cd08393SJason King static boolean_t
rustv0_parse_disambiguator(rust_state_t * restrict st,strview_t * restrict sv,uint64_t * valp)671*1cd08393SJason King rustv0_parse_disambiguator(rust_state_t *restrict st, strview_t *restrict sv,
672*1cd08393SJason King     uint64_t *valp)
673*1cd08393SJason King {
674*1cd08393SJason King 	if (HAS_ERROR(st) || sv_remaining(sv) < 2)
675*1cd08393SJason King 		return (B_FALSE);
676*1cd08393SJason King 
677*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
678*1cd08393SJason King 
679*1cd08393SJason King 	*valp = 0;
680*1cd08393SJason King 
681*1cd08393SJason King 	if (!sv_consume_if_c(sv, 's'))
682*1cd08393SJason King 		return (B_FALSE);
683*1cd08393SJason King 
684*1cd08393SJason King 	if (!rustv0_parse_base62(st, sv, valp)) {
685*1cd08393SJason King 		st->rs_error = EINVAL;
686*1cd08393SJason King 		return (B_FALSE);
687*1cd08393SJason King 	}
688*1cd08393SJason King 
689*1cd08393SJason King 	/*
690*1cd08393SJason King 	 * Rust RFC 2603 details this in Appendix A, but not the main
691*1cd08393SJason King 	 * portion of the RFC. If no disambiguator is present, the value
692*1cd08393SJason King 	 * is 0, if the decoded value is 0, the index is 1, ...
693*1cd08393SJason King 	 * rustv0_parse_base62() already adjusts _ -> 0, 0 -> 1, so we
694*1cd08393SJason King 	 * only need to add one here to complete the adjustment.
695*1cd08393SJason King 	 */
696*1cd08393SJason King 	*valp = *valp + 1;
697*1cd08393SJason King 
698*1cd08393SJason King 	DEMDEBUG("%s: disambiguator=%" PRIu64, __func__, *valp);
699*1cd08393SJason King 	return (B_TRUE);
700*1cd08393SJason King }
701*1cd08393SJason King 
702*1cd08393SJason King /* <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> */
703*1cd08393SJason King static boolean_t
rustv0_parse_undisambiguated_identifier(rust_state_t * restrict st,strview_t * restrict sv,boolean_t repl_underscore)704*1cd08393SJason King rustv0_parse_undisambiguated_identifier(rust_state_t *restrict st,
705*1cd08393SJason King     strview_t *restrict sv, boolean_t repl_underscore)
706*1cd08393SJason King {
707*1cd08393SJason King 	uint64_t len = 0;
708*1cd08393SJason King 	boolean_t puny = B_FALSE;
709*1cd08393SJason King 
710*1cd08393SJason King 	if (HAS_ERROR(st))
711*1cd08393SJason King 		return (B_FALSE);
712*1cd08393SJason King 
713*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
714*1cd08393SJason King 
715*1cd08393SJason King 	if (sv_remaining(sv) == 0)
716*1cd08393SJason King 		return (B_FALSE);
717*1cd08393SJason King 
718*1cd08393SJason King 	if (sv_consume_if_c(sv, 'u'))
719*1cd08393SJason King 		puny = B_TRUE;
720*1cd08393SJason King 
721*1cd08393SJason King 	if (!rust_parse_base10(st, sv, &len))
722*1cd08393SJason King 		return (B_FALSE);
723*1cd08393SJason King 
724*1cd08393SJason King 	/* skip optional separator '_' */
725*1cd08393SJason King 	(void) sv_consume_if_c(sv, '_');
726*1cd08393SJason King 
727*1cd08393SJason King 	if (sv_remaining(sv) < len) {
728*1cd08393SJason King 		DEMDEBUG("%s: ERROR: identifier length (%" PRIu64 ") "
729*1cd08393SJason King 		    "> remaining bytes (%zu)", __func__, len,
730*1cd08393SJason King 		    sv_remaining(sv));
731*1cd08393SJason King 		return (B_FALSE);
732*1cd08393SJason King 	}
733*1cd08393SJason King 
734*1cd08393SJason King 	/* 0 length identifiers are acceptable */
735*1cd08393SJason King 	if (len == 0)
736*1cd08393SJason King 		return (B_TRUE);
737*1cd08393SJason King 
738*1cd08393SJason King 	if (puny) {
739*1cd08393SJason King 		strview_t ident;
740*1cd08393SJason King 
741*1cd08393SJason King 		sv_init_sv_range(&ident, sv, len);
742*1cd08393SJason King 		if (!rustv0_puny_decode(st, &ident, repl_underscore))
743*1cd08393SJason King 			return (B_FALSE);
744*1cd08393SJason King 
745*1cd08393SJason King 		sv_consume_n(sv, len);
746*1cd08393SJason King 		return (B_TRUE);
747*1cd08393SJason King 	}
748*1cd08393SJason King 
749*1cd08393SJason King 	/*
750*1cd08393SJason King 	 * rust identifiers do not contain '-'. However ABI identifiers
751*1cd08393SJason King 	 * are allowed to contain them (e.g. extern "foo-bar" fn ...).
752*1cd08393SJason King 	 * They are substituted with '_' in the mangled output. If we
753*1cd08393SJason King 	 * do not need to reverse this, we can just append 'len' bytes
754*1cd08393SJason King 	 * of sv.  Otherwise we need to go through and reverse this
755*1cd08393SJason King 	 * substitution.
756*1cd08393SJason King 	 */
757*1cd08393SJason King 	if (!repl_underscore)
758*1cd08393SJason King 		return (rust_append_sv(st, len, sv));
759*1cd08393SJason King 
760*1cd08393SJason King 	/*
761*1cd08393SJason King 	 * We checked earlier that len < sv_remaining(sv); so this loop
762*1cd08393SJason King 	 * cannot overrun.
763*1cd08393SJason King 	 */
764*1cd08393SJason King 	for (size_t i = 0; i < len; i++) {
765*1cd08393SJason King 		char c = sv_consume_c(sv);
766*1cd08393SJason King 
767*1cd08393SJason King 		if (c == '_')
768*1cd08393SJason King 			c = '-';
769*1cd08393SJason King 
770*1cd08393SJason King 		if (!rust_appendc(st, c))
771*1cd08393SJason King 			return (B_FALSE);
772*1cd08393SJason King 	}
773*1cd08393SJason King 
774*1cd08393SJason King 	return (B_TRUE);
775*1cd08393SJason King }
776*1cd08393SJason King 
777*1cd08393SJason King /* <backref> = "B" <base-62-number> */
778*1cd08393SJason King static boolean_t
rustv0_parse_backref(rust_state_t * restrict st,strview_t * restrict sv,boolean_t (* fn)(rust_state_t * restrict,strview_t * restrict,boolean_t b),boolean_t bval)779*1cd08393SJason King rustv0_parse_backref(rust_state_t *restrict st, strview_t *restrict sv,
780*1cd08393SJason King     boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t b),
781*1cd08393SJason King     boolean_t bval)
782*1cd08393SJason King {
783*1cd08393SJason King 	strview_t backref;
784*1cd08393SJason King 	strview_t target;
785*1cd08393SJason King 	uint64_t idx = 0;
786*1cd08393SJason King 	size_t save_len;
787*1cd08393SJason King 	size_t len;
788*1cd08393SJason King 
789*1cd08393SJason King 	if (HAS_ERROR(st))
790*1cd08393SJason King 		return (B_FALSE);
791*1cd08393SJason King 
792*1cd08393SJason King 	sv_init_sv(&backref, sv);
793*1cd08393SJason King 
794*1cd08393SJason King 	if (!sv_consume_if_c(sv, 'B'))
795*1cd08393SJason King 		return (B_FALSE);
796*1cd08393SJason King 
797*1cd08393SJason King 	DEMDEBUG("%s: str='B%.*s'", __func__, SV_PRINT(sv));
798*1cd08393SJason King 
799*1cd08393SJason King 	if (!rustv0_parse_base62(st, sv, &idx)) {
800*1cd08393SJason King 		st->rs_error = EINVAL;
801*1cd08393SJason King 		return (B_FALSE);
802*1cd08393SJason King 	}
803*1cd08393SJason King 
804*1cd08393SJason King 	/*
805*1cd08393SJason King 	 * Determine how many bytes we've consumed (up to the start of
806*1cd08393SJason King 	 * the current backref token).
807*1cd08393SJason King 	 */
808*1cd08393SJason King 	VERIFY3P(backref.sv_first, >=, st->rs_orig.sv_first);
809*1cd08393SJason King 	len = (size_t)(uintptr_t)(backref.sv_first - st->rs_orig.sv_first);
810*1cd08393SJason King 
811*1cd08393SJason King 	/*
812*1cd08393SJason King 	 * The backref can only refer to an index prior to the start of
813*1cd08393SJason King 	 * the current backref token -- that is must always refer back in
814*1cd08393SJason King 	 * the string, never to the current position or beyond.
815*1cd08393SJason King 	 */
816*1cd08393SJason King 	if (idx >= len) {
817*1cd08393SJason King 		DEMDEBUG("%s: ERROR: backref index (%" PRIu64 ") "
818*1cd08393SJason King 		    "is out of range [0, %zu)", __func__, idx, len);
819*1cd08393SJason King 		st->rs_error = ERANGE;
820*1cd08393SJason King 		return (B_FALSE);
821*1cd08393SJason King 	}
822*1cd08393SJason King 
823*1cd08393SJason King 	/*
824*1cd08393SJason King 	 * Create a strview_t of the original string (sans prefix) by
825*1cd08393SJason King 	 * copying from st->rs_orig. The length of the target strview_t is
826*1cd08393SJason King 	 * capped to end immediately prior to this backref token. Since we
827*1cd08393SJason King 	 * enforce that backrefs must always refer to already processed
828*1cd08393SJason King 	 * portions of the string (i.e. must always refer backwards), and the
829*1cd08393SJason King 	 * length of the strview_t is set to end prior to the start of this
830*1cd08393SJason King 	 * backref token, we guarantee processing of a backref will always
831*1cd08393SJason King 	 * terminate before it can possibly encounter this backref token
832*1cd08393SJason King 	 * and cause a loop -- either the processing terminates normally or
833*1cd08393SJason King 	 * it reaches the end of the capped strview_t.
834*1cd08393SJason King 	 */
835*1cd08393SJason King 	sv_init_sv_range(&target, &st->rs_orig, len);
836*1cd08393SJason King 
837*1cd08393SJason King 	/*
838*1cd08393SJason King 	 * Consume all the input in the target strview_t up to the index
839*1cd08393SJason King 	 */
840*1cd08393SJason King 	sv_consume_n(&target, idx);
841*1cd08393SJason King 
842*1cd08393SJason King 	DEMDEBUG("%s: backref starting at %" PRIu64 " str='%.*s'%s", __func__,
843*1cd08393SJason King 	    idx, SV_PRINT(&target), st->rs_skip ? " (skipping)" : "");
844*1cd08393SJason King 
845*1cd08393SJason King 	/*
846*1cd08393SJason King 	 * If we're skipping the output, there's no reason to bother reparsing
847*1cd08393SJason King 	 * the output -- we're not going to save it. We still setup everything
848*1cd08393SJason King 	 * so that the debug output is still emitted.
849*1cd08393SJason King 	 */
850*1cd08393SJason King 	if (st->rs_skip)
851*1cd08393SJason King 		return (B_TRUE);
852*1cd08393SJason King 
853*1cd08393SJason King 	SAVE_LEN(st, save_len);
854*1cd08393SJason King 	if (!fn(st, &target, bval))
855*1cd08393SJason King 		return (B_FALSE);
856*1cd08393SJason King 
857*1cd08393SJason King 	DEMDEBUG("%s: backref is '%.*s'", __func__, CSTR_END(st, save_len));
858*1cd08393SJason King 	return (B_TRUE);
859*1cd08393SJason King }
860*1cd08393SJason King 
861*1cd08393SJason King static boolean_t
rustv0_append_lifetime(rust_state_t * restrict st,uint64_t lifetime)862*1cd08393SJason King rustv0_append_lifetime(rust_state_t *restrict st, uint64_t lifetime)
863*1cd08393SJason King {
864*1cd08393SJason King 	uint64_t bound_lt;
865*1cd08393SJason King 
866*1cd08393SJason King 	if (HAS_ERROR(st))
867*1cd08393SJason King 		return (B_FALSE);
868*1cd08393SJason King 
869*1cd08393SJason King 	if (!rust_appendc(st, '\''))
870*1cd08393SJason King 		return (B_FALSE);
871*1cd08393SJason King 
872*1cd08393SJason King 	if (lifetime == 0)
873*1cd08393SJason King 		return (rust_appendc(st, '_'));
874*1cd08393SJason King 
875*1cd08393SJason King 	if (sub_overflow(st->rs_lt_depth, lifetime, &bound_lt)) {
876*1cd08393SJason King 		DEMDEBUG("%s: ERROR: lifetime value %" PRIu64
877*1cd08393SJason King 		    " > current depth %" PRIu64, __func__, lifetime,
878*1cd08393SJason King 		    st->rs_lt_depth);
879*1cd08393SJason King 		st->rs_lt_depth = ERANGE;
880*1cd08393SJason King 		return (B_FALSE);
881*1cd08393SJason King 	}
882*1cd08393SJason King 
883*1cd08393SJason King 	/*
884*1cd08393SJason King 	 * Use 'a, 'b, ...
885*1cd08393SJason King 	 */
886*1cd08393SJason King 	if (bound_lt < 26) {
887*1cd08393SJason King 		char c = (char)bound_lt + 'a';
888*1cd08393SJason King 		return (rust_append_printf(st, "%c", c));
889*1cd08393SJason King 	}
890*1cd08393SJason King 
891*1cd08393SJason King 	/*
892*1cd08393SJason King 	 * Otherwise, use '_123, '_456, ...
893*1cd08393SJason King 	 */
894*1cd08393SJason King 	return (rust_append_printf(st, "_%" PRIu64, bound_lt));
895*1cd08393SJason King }
896*1cd08393SJason King 
897*1cd08393SJason King static boolean_t
rustv0_parse_lifetime(rust_state_t * restrict st,strview_t * restrict sv)898*1cd08393SJason King rustv0_parse_lifetime(rust_state_t *restrict st, strview_t *restrict sv)
899*1cd08393SJason King {
900*1cd08393SJason King 	uint64_t lifetime;
901*1cd08393SJason King 
902*1cd08393SJason King 	if (!sv_consume_if_c(sv, 'L'))
903*1cd08393SJason King 		return (B_FALSE);
904*1cd08393SJason King 
905*1cd08393SJason King 	if (!rustv0_parse_base62(st, sv, &lifetime))
906*1cd08393SJason King 		return (B_FALSE);
907*1cd08393SJason King 
908*1cd08393SJason King 	return (rustv0_append_lifetime(st, lifetime));
909*1cd08393SJason King }
910*1cd08393SJason King 
911*1cd08393SJason King static boolean_t
rustv0_parse_const_data(rust_state_t * restrict st,const_type_class_t type_class,strview_t * restrict sv)912*1cd08393SJason King rustv0_parse_const_data(rust_state_t *restrict st,
913*1cd08393SJason King     const_type_class_t type_class, strview_t *restrict sv)
914*1cd08393SJason King {
915*1cd08393SJason King 	uint64_t val = 0;
916*1cd08393SJason King 	size_t save_len;
917*1cd08393SJason King 	boolean_t neg = B_FALSE;
918*1cd08393SJason King 	boolean_t ret = B_FALSE;
919*1cd08393SJason King 
920*1cd08393SJason King 	VERIFY3S(type_class, !=, CTC_INVALID);
921*1cd08393SJason King 
922*1cd08393SJason King 	if (HAS_ERROR(st))
923*1cd08393SJason King 		return (B_FALSE);
924*1cd08393SJason King 
925*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
926*1cd08393SJason King 	SAVE_LEN(st, save_len);
927*1cd08393SJason King 
928*1cd08393SJason King 	if (sv_remaining(sv) == 0)
929*1cd08393SJason King 		return (B_FALSE);
930*1cd08393SJason King 
931*1cd08393SJason King 	if (type_class == CTC_SIGNED && sv_consume_if_c(sv, 'n'))
932*1cd08393SJason King 		neg = B_TRUE;
933*1cd08393SJason King 
934*1cd08393SJason King 	ret = OPTIONAL(st, rustv0_parse_hex_num(st, sv, &val)) &&
935*1cd08393SJason King 	    sv_consume_if_c(sv, '_');
936*1cd08393SJason King 	if (!ret)
937*1cd08393SJason King 		goto done;
938*1cd08393SJason King 
939*1cd08393SJason King 	switch (type_class) {
940*1cd08393SJason King 	case CTC_SIGNED:
941*1cd08393SJason King 	case CTC_UNSIGNED:
942*1cd08393SJason King 		ret = rust_append_printf(st, "%s%" PRIu64, neg ? "-" : "", val);
943*1cd08393SJason King 		break;
944*1cd08393SJason King 	case CTC_BOOL:
945*1cd08393SJason King 		if (val > 1) {
946*1cd08393SJason King 			DEMDEBUG("%s: invalid bool val %" PRIu64, __func__,
947*1cd08393SJason King 			    val);
948*1cd08393SJason King 			ret = B_FALSE;
949*1cd08393SJason King 			break;
950*1cd08393SJason King 		}
951*1cd08393SJason King 		ret = rust_append_printf(st, "%s",
952*1cd08393SJason King 		    (val == 0) ? "false" : "true");
953*1cd08393SJason King 		break;
954*1cd08393SJason King 	case CTC_CHAR:
955*1cd08393SJason King 		if (val > UINT32_MAX) {
956*1cd08393SJason King 			DEMDEBUG("%s: char value %" PRIu64 " out of range",
957*1cd08393SJason King 			    __func__, val);
958*1cd08393SJason King 			ret = B_FALSE;
959*1cd08393SJason King 			break;
960*1cd08393SJason King 		}
961*1cd08393SJason King 
962*1cd08393SJason King 		ret = rust_appendc(st, '\'') && rust_append_utf8_c(st, val) &&
963*1cd08393SJason King 		    rust_appendc(st, '\'');
964*1cd08393SJason King 		break;
965*1cd08393SJason King 	default:
966*1cd08393SJason King 		ret = B_FALSE;
967*1cd08393SJason King 	}
968*1cd08393SJason King 
969*1cd08393SJason King done:
970*1cd08393SJason King 	DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, save_len),
971*1cd08393SJason King 	    ret ? "success" : "fail");
972*1cd08393SJason King 
973*1cd08393SJason King 	return (ret);
974*1cd08393SJason King }
975*1cd08393SJason King 
976*1cd08393SJason King static boolean_t
rustv0_parse_const(rust_state_t * restrict st,strview_t * restrict sv,boolean_t dummy __unused)977*1cd08393SJason King rustv0_parse_const(rust_state_t *restrict st, strview_t *restrict sv,
978*1cd08393SJason King     boolean_t dummy __unused)
979*1cd08393SJason King {
980*1cd08393SJason King 	strview_t type;
981*1cd08393SJason King 	size_t start_len;
982*1cd08393SJason King 	const_type_class_t ctype_class;
983*1cd08393SJason King 	char ctype;
984*1cd08393SJason King 	boolean_t save_skip;
985*1cd08393SJason King 	boolean_t ret;
986*1cd08393SJason King 
987*1cd08393SJason King 	if (HAS_ERROR(st))
988*1cd08393SJason King 		return (B_FALSE);
989*1cd08393SJason King 
990*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
991*1cd08393SJason King 	SAVE_LEN(st, start_len);
992*1cd08393SJason King 
993*1cd08393SJason King 	if (sv_remaining(sv) == 0)
994*1cd08393SJason King 		return (B_FALSE);
995*1cd08393SJason King 
996*1cd08393SJason King 	if (rustv0_parse_backref(st, sv, rustv0_parse_const, B_FALSE))
997*1cd08393SJason King 		return (B_TRUE);
998*1cd08393SJason King 
999*1cd08393SJason King 	if (sv_consume_if_c(sv, 'p')) {
1000*1cd08393SJason King 		ret = rust_appendc(st, '_');
1001*1cd08393SJason King 		goto done;
1002*1cd08393SJason King 	}
1003*1cd08393SJason King 
1004*1cd08393SJason King 	ctype = sv_peek(sv, 0);
1005*1cd08393SJason King 	ctype_class = rustv0_classify_const_type(ctype);
1006*1cd08393SJason King 	if (ctype_class == CTC_INVALID) {
1007*1cd08393SJason King 		DEMDEBUG("%s: const type isn't a valid const generic type",
1008*1cd08393SJason King 		    __func__);
1009*1cd08393SJason King 		return (B_FALSE);
1010*1cd08393SJason King 	}
1011*1cd08393SJason King 
1012*1cd08393SJason King 	/*
1013*1cd08393SJason King 	 * This isn't spelled out clearly in Rust RFC 2603, but currently
1014*1cd08393SJason King 	 * only unsigned int types are allowed at this point. However, we
1015*1cd08393SJason King 	 * have a bit of a potential tricky situation. Unlike formatting
1016*1cd08393SJason King 	 * the other tokens, if we want to display the type, we do so
1017*1cd08393SJason King 	 * _after_ the value, even though the type appears first.
1018*1cd08393SJason King 	 *
1019*1cd08393SJason King 	 * This is bit of a hack, but we save off the input position from
1020*1cd08393SJason King 	 * sv before the parse the type. We then parse it without saving
1021*1cd08393SJason King 	 * the resulting value, then parse and output the constant. If
1022*1cd08393SJason King 	 * we wish to then display the type, we can go back and parse
1023*1cd08393SJason King 	 * the type again, this time saving the result.
1024*1cd08393SJason King 	 */
1025*1cd08393SJason King 	sv_init_sv(&type, sv);
1026*1cd08393SJason King 
1027*1cd08393SJason King 	SKIP_BEGIN(st, save_skip);
1028*1cd08393SJason King 	ret = rustv0_parse_type(st, sv, B_FALSE);
1029*1cd08393SJason King 	SKIP_END(st, save_skip);
1030*1cd08393SJason King 
1031*1cd08393SJason King 	if (!ret) {
1032*1cd08393SJason King 		DEMDEBUG("%s: const type isn't valid", __func__);
1033*1cd08393SJason King 		return (B_FALSE);
1034*1cd08393SJason King 	}
1035*1cd08393SJason King 
1036*1cd08393SJason King 	if (sv_consume_if_c(sv, 'p')) {
1037*1cd08393SJason King 		ret = rust_appendc(st, '_');
1038*1cd08393SJason King 	} else {
1039*1cd08393SJason King 		ret = rustv0_parse_const_data(st, ctype_class, sv);
1040*1cd08393SJason King 	}
1041*1cd08393SJason King 	if (!ret)
1042*1cd08393SJason King 		goto done;
1043*1cd08393SJason King 
1044*1cd08393SJason King 	if (st->rs_show_const_type) {
1045*1cd08393SJason King 		ret = rust_append(st, ": ") &&
1046*1cd08393SJason King 		    rustv0_parse_uint_type(st, &type);
1047*1cd08393SJason King 	}
1048*1cd08393SJason King 
1049*1cd08393SJason King done:
1050*1cd08393SJason King 	DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, start_len),
1051*1cd08393SJason King 	    ret ? "success" : "fail");
1052*1cd08393SJason King 	return (ret);
1053*1cd08393SJason King }
1054*1cd08393SJason King 
1055*1cd08393SJason King static boolean_t
rustv0_parse_abi(rust_state_t * restrict st,strview_t * restrict sv)1056*1cd08393SJason King rustv0_parse_abi(rust_state_t *restrict st, strview_t *restrict sv)
1057*1cd08393SJason King {
1058*1cd08393SJason King 	DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv));
1059*1cd08393SJason King 
1060*1cd08393SJason King 	if (sv_consume_if_c(sv, 'C'))
1061*1cd08393SJason King 		return (rust_appendc(st, 'C'));
1062*1cd08393SJason King 
1063*1cd08393SJason King 	return (rustv0_parse_undisambiguated_identifier(st, sv, B_TRUE));
1064*1cd08393SJason King }
1065*1cd08393SJason King 
1066*1cd08393SJason King static boolean_t
rustv0_parse_binder(rust_state_t * restrict st,strview_t * restrict sv)1067*1cd08393SJason King rustv0_parse_binder(rust_state_t *restrict st, strview_t *restrict sv)
1068*1cd08393SJason King {
1069*1cd08393SJason King 	uint64_t n, i;
1070*1cd08393SJason King 
1071*1cd08393SJason King 	if (!sv_consume_if_c(sv, 'G'))
1072*1cd08393SJason King 		return (B_FALSE);
1073*1cd08393SJason King 
1074*1cd08393SJason King 	if (!rustv0_parse_base62(st, sv, &n))
1075*1cd08393SJason King 		return (B_FALSE);
1076*1cd08393SJason King 	n += 1;
1077*1cd08393SJason King 
1078*1cd08393SJason King 	if (!rust_append(st, "for<"))
1079*1cd08393SJason King 		return (B_FALSE);
1080*1cd08393SJason King 
1081*1cd08393SJason King 	for (i = 0; i < n; i++) {
1082*1cd08393SJason King 		if (i > 0 && !rust_append(st, ", "))
1083*1cd08393SJason King 			return (B_FALSE);
1084*1cd08393SJason King 
1085*1cd08393SJason King 		st->rs_lt_depth++;
1086*1cd08393SJason King 		if (!rustv0_append_lifetime(st, 1))
1087*1cd08393SJason King 			return (B_FALSE);
1088*1cd08393SJason King 	}
1089*1cd08393SJason King 
1090*1cd08393SJason King 	if (!rust_append(st, "> "))
1091*1cd08393SJason King 		return (B_FALSE);
1092*1cd08393SJason King 
1093*1cd08393SJason King 	return (B_TRUE);
1094*1cd08393SJason King }
1095*1cd08393SJason King 
1096*1cd08393SJason King /*
1097*1cd08393SJason King  * <fn-sig> := [<binder>] ["U"] ["K" <abi>] {type} "E" <type>
1098*1cd08393SJason King  *
1099*1cd08393SJason King  * Note that while the Rust RFC states the binder is manditory, based on
1100*1cd08393SJason King  * actual examples, and comparing with the rust-based demangler, it is in
1101*1cd08393SJason King  * fact optional.
1102*1cd08393SJason King  */
1103*1cd08393SJason King static boolean_t
rustv0_parse_fnsig(rust_state_t * restrict st,strview_t * restrict sv)1104*1cd08393SJason King rustv0_parse_fnsig(rust_state_t *restrict st, strview_t *restrict sv)
1105*1cd08393SJason King {
1106*1cd08393SJason King 	uint64_t save_lt = st->rs_lt_depth;
1107*1cd08393SJason King 
1108*1cd08393SJason King 	DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv));
1109*1cd08393SJason King 
1110*1cd08393SJason King 	if (!OPTIONAL(st, rustv0_parse_binder(st, sv)))
1111*1cd08393SJason King 		return (B_FALSE);
1112*1cd08393SJason King 
1113*1cd08393SJason King 	if (sv_consume_if_c(sv, 'U') && !rust_append(st, "unsafe "))
1114*1cd08393SJason King 		return (B_FALSE);
1115*1cd08393SJason King 
1116*1cd08393SJason King 	if (sv_consume_if_c(sv, 'K') &&
1117*1cd08393SJason King 	    (!rust_append(st, "extern \"") || !rustv0_parse_abi(st, sv) ||
1118*1cd08393SJason King 	    !rust_append(st, "\" ")))
1119*1cd08393SJason King 		return (B_FALSE);
1120*1cd08393SJason King 
1121*1cd08393SJason King 	if (!rust_append(st, "fn("))
1122*1cd08393SJason King 		return (B_FALSE);
1123*1cd08393SJason King 
1124*1cd08393SJason King 	if (!rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ", B_FALSE,
1125*1cd08393SJason King 	    NULL)) {
1126*1cd08393SJason King 		return (B_FALSE);
1127*1cd08393SJason King 	}
1128*1cd08393SJason King 
1129*1cd08393SJason King 	if (!rust_appendc(st, ')'))
1130*1cd08393SJason King 		return (B_FALSE);
1131*1cd08393SJason King 
1132*1cd08393SJason King 	/* If the return type is (), don't print it */
1133*1cd08393SJason King 	if (!sv_consume_if_c(sv, 'u')) {
1134*1cd08393SJason King 		if (!rust_append(st, " -> "))
1135*1cd08393SJason King 			return (B_FALSE);
1136*1cd08393SJason King 
1137*1cd08393SJason King 		if (!rustv0_parse_type(st, sv, B_FALSE))
1138*1cd08393SJason King 			return (B_FALSE);
1139*1cd08393SJason King 	}
1140*1cd08393SJason King 
1141*1cd08393SJason King 	st->rs_lt_depth = save_lt;
1142*1cd08393SJason King 
1143*1cd08393SJason King 	return (B_TRUE);
1144*1cd08393SJason King }
1145*1cd08393SJason King 
1146*1cd08393SJason King /*
1147*1cd08393SJason King  * <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
1148*1cd08393SJason King  */
1149*1cd08393SJason King static boolean_t
rustv0_parse_dyn_trait_assoc_binding(rust_state_t * restrict st,strview_t * restrict sv,boolean_t open)1150*1cd08393SJason King rustv0_parse_dyn_trait_assoc_binding(rust_state_t *restrict st,
1151*1cd08393SJason King     strview_t *restrict sv, boolean_t open)
1152*1cd08393SJason King {
1153*1cd08393SJason King 	size_t save_len;
1154*1cd08393SJason King 
1155*1cd08393SJason King 	if (HAS_ERROR(st))
1156*1cd08393SJason King 		return (B_FALSE);
1157*1cd08393SJason King 
1158*1cd08393SJason King 	if (sv_remaining(sv) == 0)
1159*1cd08393SJason King 		return (B_FALSE);
1160*1cd08393SJason King 
1161*1cd08393SJason King 	if (!sv_consume_if_c(sv, 'p'))
1162*1cd08393SJason King 		return (B_FALSE);
1163*1cd08393SJason King 
1164*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1165*1cd08393SJason King 	SAVE_LEN(st, save_len);
1166*1cd08393SJason King 
1167*1cd08393SJason King 	if (!rust_append(st, open ? ", " : "<"))
1168*1cd08393SJason King 		return (B_FALSE);
1169*1cd08393SJason King 
1170*1cd08393SJason King 	if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) {
1171*1cd08393SJason King 		st->rs_error = EINVAL;
1172*1cd08393SJason King 		return (B_FALSE);
1173*1cd08393SJason King 	}
1174*1cd08393SJason King 
1175*1cd08393SJason King 	if (!rust_append(st, " = "))
1176*1cd08393SJason King 		return (B_FALSE);
1177*1cd08393SJason King 
1178*1cd08393SJason King 	if (!rustv0_parse_type(st, sv, B_FALSE)) {
1179*1cd08393SJason King 		st->rs_error = EINVAL;
1180*1cd08393SJason King 		return (B_FALSE);
1181*1cd08393SJason King 	}
1182*1cd08393SJason King 
1183*1cd08393SJason King 	DEMDEBUG("%s: binding='%.*s'", __func__, CSTR_END(st, save_len));
1184*1cd08393SJason King 
1185*1cd08393SJason King 	return (B_TRUE);
1186*1cd08393SJason King }
1187*1cd08393SJason King 
1188*1cd08393SJason King static boolean_t
rustv0_parse_dyn_trait(rust_state_t * restrict st,strview_t * restrict sv,boolean_t dummy __unused)1189*1cd08393SJason King rustv0_parse_dyn_trait(rust_state_t *restrict st, strview_t *restrict sv,
1190*1cd08393SJason King     boolean_t dummy __unused)
1191*1cd08393SJason King {
1192*1cd08393SJason King 	boolean_t stay_save = st->rs_args_stay_open;
1193*1cd08393SJason King 	boolean_t open_save = st->rs_args_is_open;
1194*1cd08393SJason King 	boolean_t open = B_FALSE;
1195*1cd08393SJason King 
1196*1cd08393SJason King 	if (HAS_ERROR(st))
1197*1cd08393SJason King 		return (B_FALSE);
1198*1cd08393SJason King 
1199*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1200*1cd08393SJason King 
1201*1cd08393SJason King 	/*
1202*1cd08393SJason King 	 * This is a bit subtle, but when formatting a trait in trait,
1203*1cd08393SJason King 	 * we want something like this:
1204*1cd08393SJason King 	 *
1205*1cd08393SJason King 	 *	dyn Trait<T, U, Assoc=X>
1206*1cd08393SJason King 	 *
1207*1cd08393SJason King 	 * instead of
1208*1cd08393SJason King 	 *
1209*1cd08393SJason King 	 *	dyn Trait<T, U, <Assoc=X>>
1210*1cd08393SJason King 	 *
1211*1cd08393SJason King 	 * So when parsing the path, if we encounter generic arguments, we want
1212*1cd08393SJason King 	 * the arg list to remain open at the end of processing the path so
1213*1cd08393SJason King 	 * we can append the bindings to it. We set rs_args_stay_open to B_TRUE
1214*1cd08393SJason King 	 * to indidcate to rustv0_parse_path() that a generic argument list
1215*1cd08393SJason King 	 * should not be closed (i.e. don't append a '>' at the end of the
1216*1cd08393SJason King 	 * list). If rustv0_parse_path() encounters a list of generic arguments,
1217*1cd08393SJason King 	 * it will also set rs->args_is_open to indiciate it opened the list.
1218*1cd08393SJason King 	 * We save this in 'open' so that when we process the associated
1219*1cd08393SJason King 	 * bindings, we know if we need to open the list on the first binding
1220*1cd08393SJason King 	 * or not -- we don't want 'dyn Trait<>' if there are no bindings,
1221*1cd08393SJason King 	 * just 'dyn Trait'.
1222*1cd08393SJason King 	 */
1223*1cd08393SJason King 	st->rs_args_stay_open = B_TRUE;
1224*1cd08393SJason King 	st->rs_args_is_open = B_FALSE;
1225*1cd08393SJason King 
1226*1cd08393SJason King 	if (!rustv0_parse_path(st, sv, B_FALSE)) {
1227*1cd08393SJason King 		st->rs_args_stay_open = stay_save;
1228*1cd08393SJason King 		st->rs_args_is_open = open_save;
1229*1cd08393SJason King 		return (B_FALSE);
1230*1cd08393SJason King 	}
1231*1cd08393SJason King 
1232*1cd08393SJason King 	open = st->rs_args_is_open;
1233*1cd08393SJason King 
1234*1cd08393SJason King 	st->rs_args_stay_open = stay_save;
1235*1cd08393SJason King 	st->rs_args_is_open = open_save;
1236*1cd08393SJason King 
1237*1cd08393SJason King 	while (rustv0_parse_dyn_trait_assoc_binding(st, sv, open)) {
1238*1cd08393SJason King 		open = B_TRUE;
1239*1cd08393SJason King 	}
1240*1cd08393SJason King 
1241*1cd08393SJason King 	if (HAS_ERROR(st))
1242*1cd08393SJason King 		return (B_FALSE);
1243*1cd08393SJason King 
1244*1cd08393SJason King 	if (open && !rust_appendc(st, '>'))
1245*1cd08393SJason King 		return (B_FALSE);
1246*1cd08393SJason King 
1247*1cd08393SJason King 	return (!HAS_ERROR(st));
1248*1cd08393SJason King }
1249*1cd08393SJason King 
1250*1cd08393SJason King static boolean_t
rustv0_parse_dynbounds(rust_state_t * restrict st,strview_t * restrict sv)1251*1cd08393SJason King rustv0_parse_dynbounds(rust_state_t *restrict st, strview_t *restrict sv)
1252*1cd08393SJason King {
1253*1cd08393SJason King 	uint64_t save_lt = st->rs_lt_depth;
1254*1cd08393SJason King 
1255*1cd08393SJason King 	if (HAS_ERROR(st))
1256*1cd08393SJason King 		return (B_FALSE);
1257*1cd08393SJason King 
1258*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1259*1cd08393SJason King 
1260*1cd08393SJason King 	/*
1261*1cd08393SJason King 	 * This is another case where Rust RFC2603 seems to disagree with
1262*1cd08393SJason King 	 * the implementation. The RFC implies this is mandatory, while
1263*1cd08393SJason King 	 * the implementations treat it as optional.
1264*1cd08393SJason King 	 */
1265*1cd08393SJason King 	if (!OPTIONAL(st, rustv0_parse_binder(st, sv)))
1266*1cd08393SJason King 		return (B_FALSE);
1267*1cd08393SJason King 
1268*1cd08393SJason King 	if (!rustv0_parse_opt_list(st, sv, rustv0_parse_dyn_trait, " + ",
1269*1cd08393SJason King 	    B_FALSE, NULL))
1270*1cd08393SJason King 		return (B_FALSE);
1271*1cd08393SJason King 
1272*1cd08393SJason King 	st->rs_lt_depth = save_lt;
1273*1cd08393SJason King 
1274*1cd08393SJason King 	return (B_TRUE);
1275*1cd08393SJason King }
1276*1cd08393SJason King 
1277*1cd08393SJason King static boolean_t
rustv0_parse_generic_arg(rust_state_t * restrict st,strview_t * restrict sv,boolean_t dummy __unused)1278*1cd08393SJason King rustv0_parse_generic_arg(rust_state_t *restrict st, strview_t *restrict sv,
1279*1cd08393SJason King     boolean_t dummy __unused)
1280*1cd08393SJason King {
1281*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1282*1cd08393SJason King 
1283*1cd08393SJason King 	if (sv_consume_if_c(sv, 'K'))
1284*1cd08393SJason King 		return (rustv0_parse_const(st, sv, B_FALSE));
1285*1cd08393SJason King 
1286*1cd08393SJason King 	if (rustv0_parse_lifetime(st, sv))
1287*1cd08393SJason King 		return (B_TRUE);
1288*1cd08393SJason King 
1289*1cd08393SJason King 	return (rustv0_parse_type(st, sv, B_FALSE));
1290*1cd08393SJason King }
1291*1cd08393SJason King 
1292*1cd08393SJason King /*
1293*1cd08393SJason King  * Parse a hex value into *valp. Note that rust only uses lower case
1294*1cd08393SJason King  * hex values.
1295*1cd08393SJason King  */
1296*1cd08393SJason King static boolean_t
rustv0_parse_hex_num(rust_state_t * restrict st,strview_t * restrict sv,uint64_t * restrict valp)1297*1cd08393SJason King rustv0_parse_hex_num(rust_state_t *restrict st, strview_t *restrict sv,
1298*1cd08393SJason King     uint64_t *restrict valp)
1299*1cd08393SJason King {
1300*1cd08393SJason King 	uint64_t val = 0;
1301*1cd08393SJason King 	size_t ndigits = 0;
1302*1cd08393SJason King 
1303*1cd08393SJason King 	if (HAS_ERROR(st))
1304*1cd08393SJason King 		return (B_FALSE);
1305*1cd08393SJason King 
1306*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1307*1cd08393SJason King 
1308*1cd08393SJason King 	if (sv_remaining(sv) == 0)
1309*1cd08393SJason King 		return (B_FALSE);
1310*1cd08393SJason King 
1311*1cd08393SJason King 	/*
1312*1cd08393SJason King 	 * Unfortunately, Rust RFC 2603 also doesn't not explicty define
1313*1cd08393SJason King 	 * {hex-digits}. We follow what decimal digits does, and treat a
1314*1cd08393SJason King 	 * leading 0 as a terminator.
1315*1cd08393SJason King 	 */
1316*1cd08393SJason King 	while (sv_remaining(sv) > 0) {
1317*1cd08393SJason King 		char c = sv_peek(sv, 0);
1318*1cd08393SJason King 
1319*1cd08393SJason King 		if (ISDIGIT(c)) {
1320*1cd08393SJason King 			val *= 16;
1321*1cd08393SJason King 			val += c - '0';
1322*1cd08393SJason King 		} else if (c >= 'a' && c <= 'f') {
1323*1cd08393SJason King 			val *= 16;
1324*1cd08393SJason King 			val += c - 'a' + 10;
1325*1cd08393SJason King 		} else {
1326*1cd08393SJason King 			break;
1327*1cd08393SJason King 		}
1328*1cd08393SJason King 
1329*1cd08393SJason King 		sv_consume_n(sv, 1);
1330*1cd08393SJason King 
1331*1cd08393SJason King 		if (++ndigits == 1 && val == 0)
1332*1cd08393SJason King 			break;
1333*1cd08393SJason King 	}
1334*1cd08393SJason King 
1335*1cd08393SJason King 	if (ndigits > 0)
1336*1cd08393SJason King 		*valp = val;
1337*1cd08393SJason King 
1338*1cd08393SJason King 	return ((ndigits > 0) ? B_TRUE : B_FALSE);
1339*1cd08393SJason King }
1340*1cd08393SJason King 
1341*1cd08393SJason King /*
1342*1cd08393SJason King  * Parse a base62 number into *valp.  The number is explicitly terminated
1343*1cd08393SJason King  * by a '_'.  The values are also offset by 0 -- that is '_' == 0,
1344*1cd08393SJason King  * '0_' == 1, ...
1345*1cd08393SJason King  */
1346*1cd08393SJason King static boolean_t
rustv0_parse_base62(rust_state_t * restrict st,strview_t * restrict sv,uint64_t * restrict valp)1347*1cd08393SJason King rustv0_parse_base62(rust_state_t *restrict st, strview_t *restrict sv,
1348*1cd08393SJason King     uint64_t *restrict valp)
1349*1cd08393SJason King {
1350*1cd08393SJason King 	uint64_t val = 0;
1351*1cd08393SJason King 	char c;
1352*1cd08393SJason King 
1353*1cd08393SJason King 	if (HAS_ERROR(st))
1354*1cd08393SJason King 		return (B_FALSE);
1355*1cd08393SJason King 
1356*1cd08393SJason King 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1357*1cd08393SJason King 
1358*1cd08393SJason King 	if (sv_remaining(sv) == 0)
1359*1cd08393SJason King 		return (B_FALSE);
1360*1cd08393SJason King 
1361*1cd08393SJason King 	/* A terminating '_' without any digits is 0 */
1362*1cd08393SJason King 	if (sv_consume_if_c(sv, '_')) {
1363*1cd08393SJason King 		*valp = 0;
1364*1cd08393SJason King 		return (B_TRUE);
1365*1cd08393SJason King 	}
1366*1cd08393SJason King 
1367*1cd08393SJason King 	/* Need at least one valid digit if > 0 */
1368*1cd08393SJason King 	if (!ISALNUM(sv_peek(sv, 0)))
1369*1cd08393SJason King 		return (B_FALSE);
1370*1cd08393SJason King 
1371*1cd08393SJason King 	while (sv_remaining(sv) > 0) {
1372*1cd08393SJason King 		c = sv_consume_c(sv);
1373*1cd08393SJason King 
1374*1cd08393SJason King 		if (c == '_') {
1375*1cd08393SJason King 			/*
1376*1cd08393SJason King 			 * Because a lone '_' was already handled earlier,
1377*1cd08393SJason King 			 * we know we've had at least one other digit and
1378*1cd08393SJason King 			 * can increment the value and return.
1379*1cd08393SJason King 			 */
1380*1cd08393SJason King 			*valp = val + 1;
1381*1cd08393SJason King 			return (B_TRUE);
1382*1cd08393SJason King 		} else if (ISDIGIT(c)) {
1383*1cd08393SJason King 			val *= 62;
1384*1cd08393SJason King 			val += c - '0';
1385*1cd08393SJason King 		} else if (ISLOWER(c)) {
1386*1cd08393SJason King 			val *= 62;
1387*1cd08393SJason King 			val += c - 'a' + 10;
1388*1cd08393SJason King 		} else if (ISUPPER(c)) {
1389*1cd08393SJason King 			val *= 62;
1390*1cd08393SJason King 			val += c - 'A' + 36;
1391*1cd08393SJason King 		} else {
1392*1cd08393SJason King 			return (B_FALSE);
1393*1cd08393SJason King 		}
1394*1cd08393SJason King 	}
1395*1cd08393SJason King 
1396*1cd08393SJason King 	/* We reached the end of the string without a terminating _ */
1397*1cd08393SJason King 	return (B_FALSE);
1398*1cd08393SJason King }
1399*1cd08393SJason King 
1400*1cd08393SJason King static const_type_class_t
rustv0_classify_const_type(char type)1401*1cd08393SJason King rustv0_classify_const_type(char type)
1402*1cd08393SJason King {
1403*1cd08393SJason King 	switch (type) {
1404*1cd08393SJason King 	case 'h': case 't': case 'm': case 'y': case 'o': case 'j':
1405*1cd08393SJason King 		return (CTC_UNSIGNED);
1406*1cd08393SJason King 	case 'a': case 'i': case 'l': case 'n': case 's': case 'x':
1407*1cd08393SJason King 		return (CTC_SIGNED);
1408*1cd08393SJason King 	case 'b':
1409*1cd08393SJason King 		return (CTC_BOOL);
1410*1cd08393SJason King 	case 'c':
1411*1cd08393SJason King 		return (CTC_CHAR);
1412*1cd08393SJason King 	default:
1413*1cd08393SJason King 		return (CTC_INVALID);
1414*1cd08393SJason King 	}
1415*1cd08393SJason King }
1416*1cd08393SJason King 
1417*1cd08393SJason King /*
1418*1cd08393SJason King  * Make sure the name is a plausible mangled rust symbol.
1419*1cd08393SJason King  * Non-ASCII are never allowed.  Rust itself uses [_0-9A-Za-z], however
1420*1cd08393SJason King  * some things will add a suffix starting with a '.' (e.g. LLVM thin LTO).
1421*1cd08393SJason King  * As such we proceed in two phases. We first only allow [_0-9A-Z-az] until
1422*1cd08393SJason King  * we encounter a '.'. At that point, any ASCII character is allowed.
1423*1cd08393SJason King  */
1424*1cd08393SJason King static boolean_t
rustv0_valid_sym(const strview_t * sv)1425*1cd08393SJason King rustv0_valid_sym(const strview_t *sv)
1426*1cd08393SJason King {
1427*1cd08393SJason King 	size_t i;
1428*1cd08393SJason King 	boolean_t check_rust = B_TRUE;
1429*1cd08393SJason King 
1430*1cd08393SJason King 	for (i = 0; i < sv->sv_rem; i++) {
1431*1cd08393SJason King 		char c = sv->sv_first[i];
1432*1cd08393SJason King 
1433*1cd08393SJason King 		if (ISALNUM(c) || c == '_')
1434*1cd08393SJason King 			continue;
1435*1cd08393SJason King 
1436*1cd08393SJason King 		if (c == '.') {
1437*1cd08393SJason King 			check_rust = B_FALSE;
1438*1cd08393SJason King 			continue;
1439*1cd08393SJason King 		}
1440*1cd08393SJason King 
1441*1cd08393SJason King 		if (check_rust || (c & 0x80) != 0) {
1442*1cd08393SJason King 			DEMDEBUG("%s: ERROR found invalid character '%c' "
1443*1cd08393SJason King 			    "in '%.*s' at index %zu",
1444*1cd08393SJason King 			    __func__, c, SV_PRINT(sv), i);
1445*1cd08393SJason King 			return (B_FALSE);
1446*1cd08393SJason King 		}
1447*1cd08393SJason King 	}
1448*1cd08393SJason King 	return (B_TRUE);
1449*1cd08393SJason King }
1450