1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019 Joyent, Inc.
14  * Copyright 2021 Jason King
15  */
16 
17 /* BEGIN CSTYLED */
18 
19 /*
20  * This implements the 'symbol_name_mangling_v2' demangling for rust as
21  * described in Rust RFC 2603 as opposed to the original (now called
22  * legacy) mangling older versions of rust used (implemented in rust.c).
23  *
24  * The specification can be viewed at:
25  *     https://github.com/rust-lang/rfcs/blob/master/text/2603-rust-symbol-name-mangling-v0.md
26  */
27 
28 /* END CSTYLED */
29 
30 #include <errno.h>
31 #include <libcustr.h>
32 #include <stdarg.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #include "rust.h"
38 
39 /*
40  * Help track amount of additional output added to rs_demangled across
41  * a function call (to allow that portion to be output for debugging)
42  */
43 #define	SAVE_LEN(_st, _len) _len = custr_len((_st)->rs_demangled)
44 #define	CSTR_END(_st, _len)					\
45 	((int)(custr_len((_st)->rs_demangled) - (_len))),	\
46 	custr_cstr((_st)->rs_demangled) + (_len)
47 
48 typedef enum const_type_class {
49 	CTC_INVALID = -1,
50 	CTC_UNSIGNED,
51 	CTC_SIGNED,
52 	CTC_CHAR,
53 	CTC_BOOL,
54 } const_type_class_t;
55 
56 /*
57  * Sometimes, parsing something is optional.  In this case a failure to
58  * parse is fine, however we still want to consider a fatal error as
59  * failure.
60  */
61 #define	OPTIONAL(_st, _f) ((_f) || !HAS_ERROR(_st))
62 
63 static boolean_t rustv0_valid_sym(const strview_t *);
64 static const_type_class_t rustv0_classify_const_type(char);
65 static boolean_t rustv0_parse_hex_num(rust_state_t *restrict,
66     strview_t *restrict, uint64_t *restrict);
67 static boolean_t rustv0_parse_base62(rust_state_t *restrict,
68     strview_t *restrict, uint64_t *restrict);
69 
70 static boolean_t rustv0_parse_undisambiguated_identifier(
71     rust_state_t *restrict, strview_t *restrict, boolean_t);
72 static boolean_t rustv0_parse_disambiguator(rust_state_t *restrict,
73     strview_t *restrict, uint64_t *restrict);
74 
75 static boolean_t rustv0_parse_path(rust_state_t *restrict, strview_t *restrict,
76     boolean_t);
77 static boolean_t rustv0_parse_impl_path(rust_state_t *restrict,
78     strview_t *restrict, boolean_t);
79 static boolean_t rustv0_parse_nested_path(rust_state_t *restrict,
80     strview_t *restrict, boolean_t);
81 static boolean_t rustv0_parse_basic_type(rust_state_t *restrict,
82     strview_t *restrict);
83 static boolean_t rustv0_parse_backref(rust_state_t *restrict,
84     strview_t *restrict,
85     boolean_t (*)(rust_state_t *restrict, strview_t *restrict, boolean_t),
86     boolean_t);
87 static boolean_t rustv0_parse_lifetime(rust_state_t *restrict,
88     strview_t *restrict);
89 static boolean_t rustv0_parse_const(rust_state_t *restrict,
90     strview_t *restrict, boolean_t);
91 static boolean_t rustv0_parse_fnsig(rust_state_t *restrict,
92     strview_t *restrict);
93 static boolean_t rustv0_parse_dynbounds(rust_state_t *restrict,
94     strview_t *restrict);
95 static boolean_t rustv0_parse_generic_arg(rust_state_t *restrict,
96     strview_t *restrict, boolean_t);
97 
98 boolean_t
rust_demangle_v0(rust_state_t * restrict st,strview_t * restrict sv)99 rust_demangle_v0(rust_state_t *restrict st, strview_t *restrict sv)
100 {
101 	boolean_t save_skip;
102 	boolean_t ret;
103 
104 	/* Make sure all the characters are valid */
105 	if (!rustv0_valid_sym(sv)) {
106 		st->rs_error = EINVAL;
107 		return (B_FALSE);
108 	}
109 
110 	/*
111 	 * <symbol-name> = "_R" [<decimal-number>] <path>
112 	 *	[<instantiating-crate>]
113 	 *
114 	 * We've already parsed the prefix in rust_demangle(), as well
115 	 * as made sure there's no [<decimal-number>] present, so
116 	 * start with <path>.
117 	 */
118 	if (!rustv0_parse_path(st, sv, B_TRUE))
119 		return (B_FALSE);
120 
121 	/* [<instantiating crate>] -- parse but don't save */
122 	SKIP_BEGIN(st, save_skip);
123 	ret = OPTIONAL(st, rustv0_parse_path(st, sv, B_FALSE));
124 	SKIP_END(st, save_skip);
125 	if (!ret)
126 		return (B_FALSE);
127 
128 	/* If nothing's left, we know we're done */
129 	if (sv_remaining(sv) == 0)
130 		return (!HAS_ERROR(st));
131 
132 	/*
133 	 * LLVM sometimes will suffix symbols starting with a '.'
134 	 * followed by extra data. For things that start with
135 	 * ".llvm.", we discard the rest of the string.  For
136 	 * other things that start with '.', we copy the
137 	 * results to the final string. This matches
138 	 * what the rust native demangler crate does, and
139 	 * we don't see a reason to deviate from their
140 	 * behavior.
141 	 */
142 	if (sv_consume_if(sv, ".llvm."))
143 		return (!HAS_ERROR(st));
144 
145 	if (sv_peek(sv, 0) != '.') {
146 		DEMDEBUG("%s: Unexpected trailing data at the end of the "
147 		    "name: '%.*s'", __func__, SV_PRINT(sv));
148 		st->rs_error = EINVAL;
149 		return (B_FALSE);
150 	}
151 
152 	return (rust_append_sv(st, sv_remaining(sv), sv));
153 }
154 
155 /*
156  * Parse an optional list terminated by 'E'. Each result of 'fn' is
157  * separated by 'sep' in the output.
158  */
159 static boolean_t
rustv0_parse_opt_list(rust_state_t * restrict st,strview_t * restrict sv,boolean_t (* fn)(rust_state_t * restrict,strview_t * restrict,boolean_t),const char * restrict sep,boolean_t bval,size_t * restrict countp)160 rustv0_parse_opt_list(rust_state_t *restrict st, strview_t *restrict sv,
161     boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t),
162     const char *restrict sep, boolean_t bval, size_t *restrict countp)
163 {
164 	size_t count = 0;
165 
166 	DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv));
167 
168 	while (sv_remaining(sv) > 0) {
169 		if (sv_consume_if_c(sv, 'E')) {
170 			if (countp != NULL)
171 				*countp += count;
172 			return (B_TRUE);
173 		}
174 
175 		if (count > 0 && !rust_append(st, sep))
176 			return (B_FALSE);
177 
178 		if (!fn(st, sv, bval))
179 			return (B_FALSE);
180 
181 		count++;
182 	}
183 
184 	/*
185 	 * An optional list should terminate with an 'E'.  If we get here,
186 	 * we ran out of charaters and didn't terminate as we should.
187 	 */
188 	return (B_FALSE);
189 }
190 
191 static boolean_t
rustv0_parse_uint_type(rust_state_t * restrict st,strview_t * sv)192 rustv0_parse_uint_type(rust_state_t *restrict st, strview_t *sv)
193 {
194 	const char *str = NULL;
195 	strview_t save;
196 	char c;
197 
198 	if (HAS_ERROR(st) || sv_remaining(sv) == 0)
199 		return (B_FALSE);
200 
201 	sv_init_sv(&save, sv);
202 
203 	switch (c = sv_consume_c(sv)) {
204 	case 'h':
205 		str = "u8";
206 		break;
207 	case 't':
208 		str = "u16";
209 		break;
210 	case 'm':
211 		str = "u32";
212 		break;
213 	case 'y':
214 		str = "u64";
215 		break;
216 	case 'o':
217 		str = "u128";
218 		break;
219 	case 'j':	/* usize */
220 		str = "usize";
221 		break;
222 	default:
223 		sv_init_sv(sv, &save);
224 		return (B_FALSE);
225 	}
226 
227 	DEMDEBUG("%s: %c -> %s", __func__, c, str);
228 	return (rust_append(st, str));
229 }
230 
231 static boolean_t
rustv0_parse_basic_type(rust_state_t * restrict st,strview_t * restrict sv)232 rustv0_parse_basic_type(rust_state_t *restrict st, strview_t *restrict sv)
233 {
234 	const char *str = NULL;
235 	strview_t save;
236 	char c;
237 
238 	if (HAS_ERROR(st) || sv_remaining(sv) == 0)
239 		return (B_FALSE);
240 
241 	if (rustv0_parse_uint_type(st, sv))
242 		return (B_TRUE);
243 
244 	sv_init_sv(&save, sv);
245 
246 	switch (c = sv_consume_c(sv)) {
247 	case 'a':
248 		str = "i8";
249 		break;
250 	case 'b':
251 		str = "bool";
252 		break;
253 	case 'c':
254 		str = "char";
255 		break;
256 	case 'd':
257 		str = "f64";
258 		break;
259 	case 'e':
260 		str = "str";
261 		break;
262 	case 'f':
263 		str = "f32";
264 		break;
265 	case 'i':
266 		str = "isize";
267 		break;
268 	case 'l':
269 		str = "i32";
270 		break;
271 	case 'n':
272 		str = "i128";
273 		break;
274 	case 'p':
275 		str = "_";
276 		break;
277 	case 's':
278 		str = "i16";
279 		break;
280 	case 'u':
281 		str = "()";
282 		break;
283 	case 'v':
284 		str = "...";
285 		break;
286 	case 'x':
287 		str = "i64";
288 		break;
289 	case 'z':
290 		str = "!";
291 		break;
292 	default:
293 		sv_init_sv(sv, &save);
294 		return (B_FALSE);
295 	}
296 
297 	DEMDEBUG("%s: %c -> %s", __func__, c, str);
298 	return (rust_append(st, str));
299 }
300 
301 static boolean_t
rustv0_parse_type(rust_state_t * restrict st,strview_t * restrict sv,boolean_t dummy __unused)302 rustv0_parse_type(rust_state_t *restrict st, strview_t *restrict sv,
303     boolean_t dummy __unused)
304 {
305 	strview_t save;
306 	size_t len, tuple_elem_count;
307 	boolean_t ret;
308 	char c;
309 
310 	if (HAS_ERROR(st))
311 		return (B_FALSE);
312 
313 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
314 
315 	if (sv_remaining(sv) == 0)
316 		return (B_FALSE);
317 
318 	SAVE_LEN(st, len);
319 	sv_init_sv(&save, sv);
320 
321 	switch (c = sv_consume_c(sv)) {
322 	case 'A':
323 		ret = rust_appendc(st, '[') &&
324 		    rustv0_parse_type(st, sv, B_FALSE) &&
325 		    rust_append(st, "; ") &&
326 		    rustv0_parse_const(st, sv, B_FALSE) &&
327 		    rust_appendc(st, ']');
328 		break;
329 	case 'S':
330 		ret = rust_appendc(st, '[') &&
331 		    rustv0_parse_type(st, sv, B_FALSE) &&
332 		    rust_appendc(st, ']');
333 		break;
334 	case 'T':
335 		tuple_elem_count = 0;
336 		ret = rust_appendc(st, '(') &&
337 		    rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ",
338 		    B_FALSE, &tuple_elem_count) &&
339 		    rust_append(st, (tuple_elem_count == 1) ? ",)" : ")");
340 		break;
341 	case 'R':
342 	case 'Q':
343 		/* `&mut T` or `&'... mut T` */
344 		if (!(ret = rust_appendc(st, '&')))
345 			break;
346 
347 		/*
348 		 * lifetime is optional, but we need to add a trailing
349 		 * space if present (so we cannot use the OPTIONAL macro).
350 		 */
351 		if (rustv0_parse_lifetime(st, sv)) {
352 			if (!(ret = rust_appendc(st, ' ')))
353 				break;
354 		} else if (HAS_ERROR(st)) {
355 			break;
356 		}
357 
358 		ret = rust_append(st, (c == 'Q') ? "mut " : "") &&
359 		    rustv0_parse_type(st, sv, B_FALSE);
360 		break;
361 	case 'P':
362 		ret = rust_append(st, "*const ") &&
363 		    rustv0_parse_type(st, sv, B_FALSE);
364 		break;
365 	case 'O':
366 		ret = rust_append(st, "*mut ") &&
367 		    rustv0_parse_type(st, sv, B_FALSE);
368 		break;
369 	case 'F':
370 		ret = rustv0_parse_fnsig(st, sv);
371 		break;
372 	case 'D':
373 		ret = rust_append(st, "dyn ") &&
374 		    rustv0_parse_dynbounds(st, sv);
375 		if (!ret)
376 			break;
377 
378 		/*
379 		 * Rust RFC2603 shows the lifetime as required, however
380 		 * it appears this is optional.
381 		 */
382 		DEMDEBUG("%s: pre-lifetime: '%*s'", __func__, SV_PRINT(sv));
383 
384 		/*
385 		 * We only want to print a non-zero (non "'_")
386 		 * lifetime.
387 		 */
388 		if (sv_consume_if(sv, "L_"))
389 			break;
390 
391 		/*
392 		 * But if there is a lifetime we want to print,
393 		 * we want to prepend " + " before it.
394 		 */
395 		if (sv_peek(sv, 0) == 'L' &&
396 		    !(ret = rust_append(st, " + ")))
397 			break;
398 
399 		ret = rustv0_parse_lifetime(st, sv);
400 		break;
401 	default:
402 		sv_init_sv(sv, &save);
403 
404 		ret = rustv0_parse_backref(st, sv, rustv0_parse_type,
405 		    B_FALSE) ||
406 		    rustv0_parse_basic_type(st, sv);
407 		if (ret)
408 			break;
409 
410 		ret = rustv0_parse_path(st, sv, B_FALSE);
411 		break;
412 	}
413 
414 	DEMDEBUG("%s: type='%.*s' (%s)", __func__, CSTR_END(st, len),
415 	    ret ? "success" : "fail");
416 
417 	return (ret);
418 }
419 
420 /*
421  * <path> = "C" <identifier>		crate root
422  *	| "M" <impl-path> <type>	<T>
423  *	| "X" <impl-path> <type> <path>	<T as Trait> (trait impl)
424  *	| "Y" <type> <path>		<T as Trait> (trait definition)
425  *	| "N" <ns> <path> <identifier>	...::ident (nested path)
426  *	| "I" <path> {<generic-arg>} "E" ...<T, U>
427  *	| <backref>
428  */
429 static boolean_t
rustv0_parse_path(rust_state_t * restrict st,strview_t * restrict sv,boolean_t in_value)430 rustv0_parse_path(rust_state_t *restrict st, strview_t *restrict sv,
431     boolean_t in_value)
432 {
433 	strview_t save;
434 	uint64_t disamb = 0;
435 	size_t len;
436 	boolean_t ret = B_FALSE;
437 	boolean_t save_skip;
438 	boolean_t args_stay_save = st->rs_args_stay_open;
439 	boolean_t args_open_save = st->rs_args_is_open;
440 
441 	if (HAS_ERROR(st))
442 		return (B_FALSE);
443 
444 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
445 
446 	if (sv_remaining(sv) == 0)
447 		return (B_FALSE);
448 
449 	SAVE_LEN(st, len);
450 	sv_init_sv(&save, sv);
451 
452 	switch (sv_consume_c(sv)) {
453 	case 'C':
454 		if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disamb)))
455 			goto done;
456 
457 		if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE))
458 			goto done;
459 
460 		if (st->rs_verbose &&
461 		    !rust_append_printf(st, "[%" PRIx64 "]", disamb))
462 			goto done;
463 		break;
464 	case 'M':
465 		SKIP_BEGIN(st, save_skip);
466 		if (!rustv0_parse_impl_path(st, sv, in_value)) {
467 			SKIP_END(st, save_skip);
468 			goto done;
469 		}
470 		SKIP_END(st, save_skip);
471 
472 		if (!rust_appendc(st, '<') ||
473 		    !rustv0_parse_type(st, sv, B_FALSE) ||
474 		    !rust_appendc(st, '>'))
475 			goto done;
476 		break;
477 	case 'X':
478 		SKIP_BEGIN(st, save_skip);
479 		if (!rustv0_parse_impl_path(st, sv, in_value)) {
480 			SKIP_END(st, save_skip);
481 			goto done;
482 		}
483 		SKIP_END(st, save_skip);
484 		/*FALLTHRU*/
485 	case 'Y':
486 		if (!rust_appendc(st, '<') ||
487 		    !rustv0_parse_type(st, sv, B_FALSE) ||
488 		    !rust_append(st, " as ") ||
489 		    !rustv0_parse_path(st, sv, B_FALSE) ||
490 		    !rust_appendc(st, '>'))
491 			goto done;
492 		break;
493 	case 'N':
494 		if (!rustv0_parse_nested_path(st, sv, in_value))
495 			goto done;
496 		break;
497 	case 'I':
498 		st->rs_args_stay_open = B_FALSE;
499 		st->rs_args_is_open = B_FALSE;
500 
501 		if (!rustv0_parse_path(st, sv, in_value))
502 			goto done;
503 
504 		if (in_value && !rust_append(st, "::"))
505 			goto done;
506 
507 		if (!rust_appendc(st, '<') ||
508 		    !rustv0_parse_opt_list(st, sv, rustv0_parse_generic_arg,
509 		    ", ", B_FALSE, NULL))
510 			goto done;
511 
512 		st->rs_args_stay_open = args_stay_save;
513 		st->rs_args_is_open = args_open_save;
514 
515 		/*
516 		 * If we were asked to not close our list, then don't and
517 		 * indicate that the list is open.
518 		 */
519 		if (st->rs_args_stay_open) {
520 			st->rs_args_stay_open = B_FALSE;
521 			st->rs_args_is_open = B_TRUE;
522 		} else if (!rust_appendc(st, '>')) {
523 			goto done;
524 		}
525 		break;
526 	default:
527 		/*
528 		 * Didn't recognize the letter, so it has to be a path. Restore
529 		 * sv to state prior to switch and continue.
530 		 */
531 		sv_init_sv(sv, &save);
532 		if (!rustv0_parse_backref(st, sv, rustv0_parse_path, in_value))
533 			goto done;
534 	}
535 
536 	ret = B_TRUE;
537 
538 done:
539 	DEMDEBUG("%s: path='%.*s' (%s)", __func__, CSTR_END(st, len),
540 	    ret ? "success" : "fail");
541 
542 	return (ret);
543 }
544 
545 static boolean_t
rustv0_parse_impl_path(rust_state_t * restrict st,strview_t * restrict sv,boolean_t in_value)546 rustv0_parse_impl_path(rust_state_t *restrict st, strview_t *restrict sv,
547     boolean_t in_value)
548 {
549 	uint64_t val = 0;
550 
551 	return (OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &val)) &&
552 	    rustv0_parse_path(st, sv, in_value));
553 }
554 
555 /*
556  * A bit of a hack -- when printing a nested path, we need to know
557  * if the identifier is there or not in order to correctly format
558  * the output preceeding it (when present). This peeks ahead and
559  * determines this.
560  */
561 static boolean_t
rustv0_has_name(rust_state_t * restrict st,strview_t * restrict sv,boolean_t * has_namep)562 rustv0_has_name(rust_state_t *restrict st, strview_t *restrict sv,
563     boolean_t *has_namep)
564 {
565 	strview_t save;
566 
567 	if (HAS_ERROR(st))
568 		return (B_FALSE);
569 
570 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
571 
572 	if (sv_remaining(sv) == 0)
573 		return (B_FALSE);
574 
575 	sv_init_sv(&save, sv);
576 
577 	/* For checking the length, we don't care if it's punycode or not */
578 	(void) sv_consume_if_c(&save, 'u');
579 
580 	if (sv_remaining(sv) == 0) {
581 		st->rs_error = EINVAL;
582 		return (B_FALSE);
583 	}
584 
585 	if (sv_consume_if_c(&save, '0')) {
586 		*has_namep = B_FALSE;
587 		return (B_TRUE);
588 	}
589 
590 	*has_namep = B_TRUE;
591 	return (B_TRUE);
592 }
593 
594 static boolean_t
rustv0_parse_nested_path(rust_state_t * restrict st,strview_t * restrict sv,boolean_t in_value)595 rustv0_parse_nested_path(rust_state_t *restrict st, strview_t *restrict sv,
596     boolean_t in_value)
597 {
598 	uint64_t disambiguator = 0;
599 	size_t len = 0;
600 	char ns;
601 	boolean_t ret = B_FALSE;
602 	boolean_t has_name;
603 
604 	if (HAS_ERROR(st))
605 		return (B_FALSE);
606 
607 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
608 
609 	if (sv_remaining(sv) == 0)
610 		return (B_FALSE);
611 
612 	SAVE_LEN(st, len);
613 
614 	ns = sv_consume_c(sv);
615 
616 	if (!rustv0_parse_path(st, sv, in_value))
617 		goto done;
618 
619 	if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disambiguator)))
620 		goto done;
621 
622 	if (!rustv0_has_name(st, sv, &has_name))
623 		goto done;
624 
625 	if (ISUPPER(ns)) {
626 		if (!rust_append(st, "::{"))
627 			goto done;
628 
629 		switch (ns) {
630 		case 'C':
631 			if (!rust_append(st, "closure"))
632 				goto done;
633 			break;
634 		case 'S':
635 			if (!rust_append(st, "shim"))
636 				goto done;
637 			break;
638 		default:
639 			if (!rust_appendc(st, ns))
640 				goto done;
641 			break;
642 		}
643 
644 		if (has_name && !rust_appendc(st, ':'))
645 			goto done;
646 
647 		if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE))
648 			goto done;
649 
650 		ret = rust_append_printf(st, "#%" PRIu64 "}", disambiguator);
651 	} else {
652 		if (has_name) {
653 			if (!(ret = rust_append(st, "::")))
654 				goto done;
655 		}
656 		ret = rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE);
657 	}
658 
659 done:
660 	DEMDEBUG("%s: nested path = '%.*s' (%s)", __func__, CSTR_END(st, len),
661 	    ret ? "success" : "fail");
662 
663 	return (ret);
664 }
665 
666 /*
667  * <disambiguator> = "s" <base-64-number>
668  *
669  */
670 static boolean_t
rustv0_parse_disambiguator(rust_state_t * restrict st,strview_t * restrict sv,uint64_t * valp)671 rustv0_parse_disambiguator(rust_state_t *restrict st, strview_t *restrict sv,
672     uint64_t *valp)
673 {
674 	if (HAS_ERROR(st) || sv_remaining(sv) < 2)
675 		return (B_FALSE);
676 
677 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
678 
679 	*valp = 0;
680 
681 	if (!sv_consume_if_c(sv, 's'))
682 		return (B_FALSE);
683 
684 	if (!rustv0_parse_base62(st, sv, valp)) {
685 		st->rs_error = EINVAL;
686 		return (B_FALSE);
687 	}
688 
689 	/*
690 	 * Rust RFC 2603 details this in Appendix A, but not the main
691 	 * portion of the RFC. If no disambiguator is present, the value
692 	 * is 0, if the decoded value is 0, the index is 1, ...
693 	 * rustv0_parse_base62() already adjusts _ -> 0, 0 -> 1, so we
694 	 * only need to add one here to complete the adjustment.
695 	 */
696 	*valp = *valp + 1;
697 
698 	DEMDEBUG("%s: disambiguator=%" PRIu64, __func__, *valp);
699 	return (B_TRUE);
700 }
701 
702 /* <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> */
703 static boolean_t
rustv0_parse_undisambiguated_identifier(rust_state_t * restrict st,strview_t * restrict sv,boolean_t repl_underscore)704 rustv0_parse_undisambiguated_identifier(rust_state_t *restrict st,
705     strview_t *restrict sv, boolean_t repl_underscore)
706 {
707 	uint64_t len = 0;
708 	boolean_t puny = B_FALSE;
709 
710 	if (HAS_ERROR(st))
711 		return (B_FALSE);
712 
713 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
714 
715 	if (sv_remaining(sv) == 0)
716 		return (B_FALSE);
717 
718 	if (sv_consume_if_c(sv, 'u'))
719 		puny = B_TRUE;
720 
721 	if (!rust_parse_base10(st, sv, &len))
722 		return (B_FALSE);
723 
724 	/* skip optional separator '_' */
725 	(void) sv_consume_if_c(sv, '_');
726 
727 	if (sv_remaining(sv) < len) {
728 		DEMDEBUG("%s: ERROR: identifier length (%" PRIu64 ") "
729 		    "> remaining bytes (%zu)", __func__, len,
730 		    sv_remaining(sv));
731 		return (B_FALSE);
732 	}
733 
734 	/* 0 length identifiers are acceptable */
735 	if (len == 0)
736 		return (B_TRUE);
737 
738 	if (puny) {
739 		strview_t ident;
740 
741 		sv_init_sv_range(&ident, sv, len);
742 		if (!rustv0_puny_decode(st, &ident, repl_underscore))
743 			return (B_FALSE);
744 
745 		sv_consume_n(sv, len);
746 		return (B_TRUE);
747 	}
748 
749 	/*
750 	 * rust identifiers do not contain '-'. However ABI identifiers
751 	 * are allowed to contain them (e.g. extern "foo-bar" fn ...).
752 	 * They are substituted with '_' in the mangled output. If we
753 	 * do not need to reverse this, we can just append 'len' bytes
754 	 * of sv.  Otherwise we need to go through and reverse this
755 	 * substitution.
756 	 */
757 	if (!repl_underscore)
758 		return (rust_append_sv(st, len, sv));
759 
760 	/*
761 	 * We checked earlier that len < sv_remaining(sv); so this loop
762 	 * cannot overrun.
763 	 */
764 	for (size_t i = 0; i < len; i++) {
765 		char c = sv_consume_c(sv);
766 
767 		if (c == '_')
768 			c = '-';
769 
770 		if (!rust_appendc(st, c))
771 			return (B_FALSE);
772 	}
773 
774 	return (B_TRUE);
775 }
776 
777 /* <backref> = "B" <base-62-number> */
778 static boolean_t
rustv0_parse_backref(rust_state_t * restrict st,strview_t * restrict sv,boolean_t (* fn)(rust_state_t * restrict,strview_t * restrict,boolean_t b),boolean_t bval)779 rustv0_parse_backref(rust_state_t *restrict st, strview_t *restrict sv,
780     boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t b),
781     boolean_t bval)
782 {
783 	strview_t backref;
784 	strview_t target;
785 	uint64_t idx = 0;
786 	size_t save_len;
787 	size_t len;
788 
789 	if (HAS_ERROR(st))
790 		return (B_FALSE);
791 
792 	sv_init_sv(&backref, sv);
793 
794 	if (!sv_consume_if_c(sv, 'B'))
795 		return (B_FALSE);
796 
797 	DEMDEBUG("%s: str='B%.*s'", __func__, SV_PRINT(sv));
798 
799 	if (!rustv0_parse_base62(st, sv, &idx)) {
800 		st->rs_error = EINVAL;
801 		return (B_FALSE);
802 	}
803 
804 	/*
805 	 * Determine how many bytes we've consumed (up to the start of
806 	 * the current backref token).
807 	 */
808 	VERIFY3P(backref.sv_first, >=, st->rs_orig.sv_first);
809 	len = (size_t)(uintptr_t)(backref.sv_first - st->rs_orig.sv_first);
810 
811 	/*
812 	 * The backref can only refer to an index prior to the start of
813 	 * the current backref token -- that is must always refer back in
814 	 * the string, never to the current position or beyond.
815 	 */
816 	if (idx >= len) {
817 		DEMDEBUG("%s: ERROR: backref index (%" PRIu64 ") "
818 		    "is out of range [0, %zu)", __func__, idx, len);
819 		st->rs_error = ERANGE;
820 		return (B_FALSE);
821 	}
822 
823 	/*
824 	 * Create a strview_t of the original string (sans prefix) by
825 	 * copying from st->rs_orig. The length of the target strview_t is
826 	 * capped to end immediately prior to this backref token. Since we
827 	 * enforce that backrefs must always refer to already processed
828 	 * portions of the string (i.e. must always refer backwards), and the
829 	 * length of the strview_t is set to end prior to the start of this
830 	 * backref token, we guarantee processing of a backref will always
831 	 * terminate before it can possibly encounter this backref token
832 	 * and cause a loop -- either the processing terminates normally or
833 	 * it reaches the end of the capped strview_t.
834 	 */
835 	sv_init_sv_range(&target, &st->rs_orig, len);
836 
837 	/*
838 	 * Consume all the input in the target strview_t up to the index
839 	 */
840 	sv_consume_n(&target, idx);
841 
842 	DEMDEBUG("%s: backref starting at %" PRIu64 " str='%.*s'%s", __func__,
843 	    idx, SV_PRINT(&target), st->rs_skip ? " (skipping)" : "");
844 
845 	/*
846 	 * If we're skipping the output, there's no reason to bother reparsing
847 	 * the output -- we're not going to save it. We still setup everything
848 	 * so that the debug output is still emitted.
849 	 */
850 	if (st->rs_skip)
851 		return (B_TRUE);
852 
853 	SAVE_LEN(st, save_len);
854 	if (!fn(st, &target, bval))
855 		return (B_FALSE);
856 
857 	DEMDEBUG("%s: backref is '%.*s'", __func__, CSTR_END(st, save_len));
858 	return (B_TRUE);
859 }
860 
861 static boolean_t
rustv0_append_lifetime(rust_state_t * restrict st,uint64_t lifetime)862 rustv0_append_lifetime(rust_state_t *restrict st, uint64_t lifetime)
863 {
864 	uint64_t bound_lt;
865 
866 	if (HAS_ERROR(st))
867 		return (B_FALSE);
868 
869 	if (!rust_appendc(st, '\''))
870 		return (B_FALSE);
871 
872 	if (lifetime == 0)
873 		return (rust_appendc(st, '_'));
874 
875 	if (sub_overflow(st->rs_lt_depth, lifetime, &bound_lt)) {
876 		DEMDEBUG("%s: ERROR: lifetime value %" PRIu64
877 		    " > current depth %" PRIu64, __func__, lifetime,
878 		    st->rs_lt_depth);
879 		st->rs_lt_depth = ERANGE;
880 		return (B_FALSE);
881 	}
882 
883 	/*
884 	 * Use 'a, 'b, ...
885 	 */
886 	if (bound_lt < 26) {
887 		char c = (char)bound_lt + 'a';
888 		return (rust_append_printf(st, "%c", c));
889 	}
890 
891 	/*
892 	 * Otherwise, use '_123, '_456, ...
893 	 */
894 	return (rust_append_printf(st, "_%" PRIu64, bound_lt));
895 }
896 
897 static boolean_t
rustv0_parse_lifetime(rust_state_t * restrict st,strview_t * restrict sv)898 rustv0_parse_lifetime(rust_state_t *restrict st, strview_t *restrict sv)
899 {
900 	uint64_t lifetime;
901 
902 	if (!sv_consume_if_c(sv, 'L'))
903 		return (B_FALSE);
904 
905 	if (!rustv0_parse_base62(st, sv, &lifetime))
906 		return (B_FALSE);
907 
908 	return (rustv0_append_lifetime(st, lifetime));
909 }
910 
911 static boolean_t
rustv0_parse_const_data(rust_state_t * restrict st,const_type_class_t type_class,strview_t * restrict sv)912 rustv0_parse_const_data(rust_state_t *restrict st,
913     const_type_class_t type_class, strview_t *restrict sv)
914 {
915 	uint64_t val = 0;
916 	size_t save_len;
917 	boolean_t neg = B_FALSE;
918 	boolean_t ret = B_FALSE;
919 
920 	VERIFY3S(type_class, !=, CTC_INVALID);
921 
922 	if (HAS_ERROR(st))
923 		return (B_FALSE);
924 
925 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
926 	SAVE_LEN(st, save_len);
927 
928 	if (sv_remaining(sv) == 0)
929 		return (B_FALSE);
930 
931 	if (type_class == CTC_SIGNED && sv_consume_if_c(sv, 'n'))
932 		neg = B_TRUE;
933 
934 	ret = OPTIONAL(st, rustv0_parse_hex_num(st, sv, &val)) &&
935 	    sv_consume_if_c(sv, '_');
936 	if (!ret)
937 		goto done;
938 
939 	switch (type_class) {
940 	case CTC_SIGNED:
941 	case CTC_UNSIGNED:
942 		ret = rust_append_printf(st, "%s%" PRIu64, neg ? "-" : "", val);
943 		break;
944 	case CTC_BOOL:
945 		if (val > 1) {
946 			DEMDEBUG("%s: invalid bool val %" PRIu64, __func__,
947 			    val);
948 			ret = B_FALSE;
949 			break;
950 		}
951 		ret = rust_append_printf(st, "%s",
952 		    (val == 0) ? "false" : "true");
953 		break;
954 	case CTC_CHAR:
955 		if (val > UINT32_MAX) {
956 			DEMDEBUG("%s: char value %" PRIu64 " out of range",
957 			    __func__, val);
958 			ret = B_FALSE;
959 			break;
960 		}
961 
962 		ret = rust_appendc(st, '\'') && rust_append_utf8_c(st, val) &&
963 		    rust_appendc(st, '\'');
964 		break;
965 	default:
966 		ret = B_FALSE;
967 	}
968 
969 done:
970 	DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, save_len),
971 	    ret ? "success" : "fail");
972 
973 	return (ret);
974 }
975 
976 static boolean_t
rustv0_parse_const(rust_state_t * restrict st,strview_t * restrict sv,boolean_t dummy __unused)977 rustv0_parse_const(rust_state_t *restrict st, strview_t *restrict sv,
978     boolean_t dummy __unused)
979 {
980 	strview_t type;
981 	size_t start_len;
982 	const_type_class_t ctype_class;
983 	char ctype;
984 	boolean_t save_skip;
985 	boolean_t ret;
986 
987 	if (HAS_ERROR(st))
988 		return (B_FALSE);
989 
990 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
991 	SAVE_LEN(st, start_len);
992 
993 	if (sv_remaining(sv) == 0)
994 		return (B_FALSE);
995 
996 	if (rustv0_parse_backref(st, sv, rustv0_parse_const, B_FALSE))
997 		return (B_TRUE);
998 
999 	if (sv_consume_if_c(sv, 'p')) {
1000 		ret = rust_appendc(st, '_');
1001 		goto done;
1002 	}
1003 
1004 	ctype = sv_peek(sv, 0);
1005 	ctype_class = rustv0_classify_const_type(ctype);
1006 	if (ctype_class == CTC_INVALID) {
1007 		DEMDEBUG("%s: const type isn't a valid const generic type",
1008 		    __func__);
1009 		return (B_FALSE);
1010 	}
1011 
1012 	/*
1013 	 * This isn't spelled out clearly in Rust RFC 2603, but currently
1014 	 * only unsigned int types are allowed at this point. However, we
1015 	 * have a bit of a potential tricky situation. Unlike formatting
1016 	 * the other tokens, if we want to display the type, we do so
1017 	 * _after_ the value, even though the type appears first.
1018 	 *
1019 	 * This is bit of a hack, but we save off the input position from
1020 	 * sv before the parse the type. We then parse it without saving
1021 	 * the resulting value, then parse and output the constant. If
1022 	 * we wish to then display the type, we can go back and parse
1023 	 * the type again, this time saving the result.
1024 	 */
1025 	sv_init_sv(&type, sv);
1026 
1027 	SKIP_BEGIN(st, save_skip);
1028 	ret = rustv0_parse_type(st, sv, B_FALSE);
1029 	SKIP_END(st, save_skip);
1030 
1031 	if (!ret) {
1032 		DEMDEBUG("%s: const type isn't valid", __func__);
1033 		return (B_FALSE);
1034 	}
1035 
1036 	if (sv_consume_if_c(sv, 'p')) {
1037 		ret = rust_appendc(st, '_');
1038 	} else {
1039 		ret = rustv0_parse_const_data(st, ctype_class, sv);
1040 	}
1041 	if (!ret)
1042 		goto done;
1043 
1044 	if (st->rs_show_const_type) {
1045 		ret = rust_append(st, ": ") &&
1046 		    rustv0_parse_uint_type(st, &type);
1047 	}
1048 
1049 done:
1050 	DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, start_len),
1051 	    ret ? "success" : "fail");
1052 	return (ret);
1053 }
1054 
1055 static boolean_t
rustv0_parse_abi(rust_state_t * restrict st,strview_t * restrict sv)1056 rustv0_parse_abi(rust_state_t *restrict st, strview_t *restrict sv)
1057 {
1058 	DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv));
1059 
1060 	if (sv_consume_if_c(sv, 'C'))
1061 		return (rust_appendc(st, 'C'));
1062 
1063 	return (rustv0_parse_undisambiguated_identifier(st, sv, B_TRUE));
1064 }
1065 
1066 static boolean_t
rustv0_parse_binder(rust_state_t * restrict st,strview_t * restrict sv)1067 rustv0_parse_binder(rust_state_t *restrict st, strview_t *restrict sv)
1068 {
1069 	uint64_t n, i;
1070 
1071 	if (!sv_consume_if_c(sv, 'G'))
1072 		return (B_FALSE);
1073 
1074 	if (!rustv0_parse_base62(st, sv, &n))
1075 		return (B_FALSE);
1076 	n += 1;
1077 
1078 	if (!rust_append(st, "for<"))
1079 		return (B_FALSE);
1080 
1081 	for (i = 0; i < n; i++) {
1082 		if (i > 0 && !rust_append(st, ", "))
1083 			return (B_FALSE);
1084 
1085 		st->rs_lt_depth++;
1086 		if (!rustv0_append_lifetime(st, 1))
1087 			return (B_FALSE);
1088 	}
1089 
1090 	if (!rust_append(st, "> "))
1091 		return (B_FALSE);
1092 
1093 	return (B_TRUE);
1094 }
1095 
1096 /*
1097  * <fn-sig> := [<binder>] ["U"] ["K" <abi>] {type} "E" <type>
1098  *
1099  * Note that while the Rust RFC states the binder is manditory, based on
1100  * actual examples, and comparing with the rust-based demangler, it is in
1101  * fact optional.
1102  */
1103 static boolean_t
rustv0_parse_fnsig(rust_state_t * restrict st,strview_t * restrict sv)1104 rustv0_parse_fnsig(rust_state_t *restrict st, strview_t *restrict sv)
1105 {
1106 	uint64_t save_lt = st->rs_lt_depth;
1107 
1108 	DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv));
1109 
1110 	if (!OPTIONAL(st, rustv0_parse_binder(st, sv)))
1111 		return (B_FALSE);
1112 
1113 	if (sv_consume_if_c(sv, 'U') && !rust_append(st, "unsafe "))
1114 		return (B_FALSE);
1115 
1116 	if (sv_consume_if_c(sv, 'K') &&
1117 	    (!rust_append(st, "extern \"") || !rustv0_parse_abi(st, sv) ||
1118 	    !rust_append(st, "\" ")))
1119 		return (B_FALSE);
1120 
1121 	if (!rust_append(st, "fn("))
1122 		return (B_FALSE);
1123 
1124 	if (!rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ", B_FALSE,
1125 	    NULL)) {
1126 		return (B_FALSE);
1127 	}
1128 
1129 	if (!rust_appendc(st, ')'))
1130 		return (B_FALSE);
1131 
1132 	/* If the return type is (), don't print it */
1133 	if (!sv_consume_if_c(sv, 'u')) {
1134 		if (!rust_append(st, " -> "))
1135 			return (B_FALSE);
1136 
1137 		if (!rustv0_parse_type(st, sv, B_FALSE))
1138 			return (B_FALSE);
1139 	}
1140 
1141 	st->rs_lt_depth = save_lt;
1142 
1143 	return (B_TRUE);
1144 }
1145 
1146 /*
1147  * <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
1148  */
1149 static boolean_t
rustv0_parse_dyn_trait_assoc_binding(rust_state_t * restrict st,strview_t * restrict sv,boolean_t open)1150 rustv0_parse_dyn_trait_assoc_binding(rust_state_t *restrict st,
1151     strview_t *restrict sv, boolean_t open)
1152 {
1153 	size_t save_len;
1154 
1155 	if (HAS_ERROR(st))
1156 		return (B_FALSE);
1157 
1158 	if (sv_remaining(sv) == 0)
1159 		return (B_FALSE);
1160 
1161 	if (!sv_consume_if_c(sv, 'p'))
1162 		return (B_FALSE);
1163 
1164 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1165 	SAVE_LEN(st, save_len);
1166 
1167 	if (!rust_append(st, open ? ", " : "<"))
1168 		return (B_FALSE);
1169 
1170 	if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) {
1171 		st->rs_error = EINVAL;
1172 		return (B_FALSE);
1173 	}
1174 
1175 	if (!rust_append(st, " = "))
1176 		return (B_FALSE);
1177 
1178 	if (!rustv0_parse_type(st, sv, B_FALSE)) {
1179 		st->rs_error = EINVAL;
1180 		return (B_FALSE);
1181 	}
1182 
1183 	DEMDEBUG("%s: binding='%.*s'", __func__, CSTR_END(st, save_len));
1184 
1185 	return (B_TRUE);
1186 }
1187 
1188 static boolean_t
rustv0_parse_dyn_trait(rust_state_t * restrict st,strview_t * restrict sv,boolean_t dummy __unused)1189 rustv0_parse_dyn_trait(rust_state_t *restrict st, strview_t *restrict sv,
1190     boolean_t dummy __unused)
1191 {
1192 	boolean_t stay_save = st->rs_args_stay_open;
1193 	boolean_t open_save = st->rs_args_is_open;
1194 	boolean_t open = B_FALSE;
1195 
1196 	if (HAS_ERROR(st))
1197 		return (B_FALSE);
1198 
1199 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1200 
1201 	/*
1202 	 * This is a bit subtle, but when formatting a trait in trait,
1203 	 * we want something like this:
1204 	 *
1205 	 *	dyn Trait<T, U, Assoc=X>
1206 	 *
1207 	 * instead of
1208 	 *
1209 	 *	dyn Trait<T, U, <Assoc=X>>
1210 	 *
1211 	 * So when parsing the path, if we encounter generic arguments, we want
1212 	 * the arg list to remain open at the end of processing the path so
1213 	 * we can append the bindings to it. We set rs_args_stay_open to B_TRUE
1214 	 * to indidcate to rustv0_parse_path() that a generic argument list
1215 	 * should not be closed (i.e. don't append a '>' at the end of the
1216 	 * list). If rustv0_parse_path() encounters a list of generic arguments,
1217 	 * it will also set rs->args_is_open to indiciate it opened the list.
1218 	 * We save this in 'open' so that when we process the associated
1219 	 * bindings, we know if we need to open the list on the first binding
1220 	 * or not -- we don't want 'dyn Trait<>' if there are no bindings,
1221 	 * just 'dyn Trait'.
1222 	 */
1223 	st->rs_args_stay_open = B_TRUE;
1224 	st->rs_args_is_open = B_FALSE;
1225 
1226 	if (!rustv0_parse_path(st, sv, B_FALSE)) {
1227 		st->rs_args_stay_open = stay_save;
1228 		st->rs_args_is_open = open_save;
1229 		return (B_FALSE);
1230 	}
1231 
1232 	open = st->rs_args_is_open;
1233 
1234 	st->rs_args_stay_open = stay_save;
1235 	st->rs_args_is_open = open_save;
1236 
1237 	while (rustv0_parse_dyn_trait_assoc_binding(st, sv, open)) {
1238 		open = B_TRUE;
1239 	}
1240 
1241 	if (HAS_ERROR(st))
1242 		return (B_FALSE);
1243 
1244 	if (open && !rust_appendc(st, '>'))
1245 		return (B_FALSE);
1246 
1247 	return (!HAS_ERROR(st));
1248 }
1249 
1250 static boolean_t
rustv0_parse_dynbounds(rust_state_t * restrict st,strview_t * restrict sv)1251 rustv0_parse_dynbounds(rust_state_t *restrict st, strview_t *restrict sv)
1252 {
1253 	uint64_t save_lt = st->rs_lt_depth;
1254 
1255 	if (HAS_ERROR(st))
1256 		return (B_FALSE);
1257 
1258 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1259 
1260 	/*
1261 	 * This is another case where Rust RFC2603 seems to disagree with
1262 	 * the implementation. The RFC implies this is mandatory, while
1263 	 * the implementations treat it as optional.
1264 	 */
1265 	if (!OPTIONAL(st, rustv0_parse_binder(st, sv)))
1266 		return (B_FALSE);
1267 
1268 	if (!rustv0_parse_opt_list(st, sv, rustv0_parse_dyn_trait, " + ",
1269 	    B_FALSE, NULL))
1270 		return (B_FALSE);
1271 
1272 	st->rs_lt_depth = save_lt;
1273 
1274 	return (B_TRUE);
1275 }
1276 
1277 static boolean_t
rustv0_parse_generic_arg(rust_state_t * restrict st,strview_t * restrict sv,boolean_t dummy __unused)1278 rustv0_parse_generic_arg(rust_state_t *restrict st, strview_t *restrict sv,
1279     boolean_t dummy __unused)
1280 {
1281 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1282 
1283 	if (sv_consume_if_c(sv, 'K'))
1284 		return (rustv0_parse_const(st, sv, B_FALSE));
1285 
1286 	if (rustv0_parse_lifetime(st, sv))
1287 		return (B_TRUE);
1288 
1289 	return (rustv0_parse_type(st, sv, B_FALSE));
1290 }
1291 
1292 /*
1293  * Parse a hex value into *valp. Note that rust only uses lower case
1294  * hex values.
1295  */
1296 static boolean_t
rustv0_parse_hex_num(rust_state_t * restrict st,strview_t * restrict sv,uint64_t * restrict valp)1297 rustv0_parse_hex_num(rust_state_t *restrict st, strview_t *restrict sv,
1298     uint64_t *restrict valp)
1299 {
1300 	uint64_t val = 0;
1301 	size_t ndigits = 0;
1302 
1303 	if (HAS_ERROR(st))
1304 		return (B_FALSE);
1305 
1306 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1307 
1308 	if (sv_remaining(sv) == 0)
1309 		return (B_FALSE);
1310 
1311 	/*
1312 	 * Unfortunately, Rust RFC 2603 also doesn't not explicty define
1313 	 * {hex-digits}. We follow what decimal digits does, and treat a
1314 	 * leading 0 as a terminator.
1315 	 */
1316 	while (sv_remaining(sv) > 0) {
1317 		char c = sv_peek(sv, 0);
1318 
1319 		if (ISDIGIT(c)) {
1320 			val *= 16;
1321 			val += c - '0';
1322 		} else if (c >= 'a' && c <= 'f') {
1323 			val *= 16;
1324 			val += c - 'a' + 10;
1325 		} else {
1326 			break;
1327 		}
1328 
1329 		sv_consume_n(sv, 1);
1330 
1331 		if (++ndigits == 1 && val == 0)
1332 			break;
1333 	}
1334 
1335 	if (ndigits > 0)
1336 		*valp = val;
1337 
1338 	return ((ndigits > 0) ? B_TRUE : B_FALSE);
1339 }
1340 
1341 /*
1342  * Parse a base62 number into *valp.  The number is explicitly terminated
1343  * by a '_'.  The values are also offset by 0 -- that is '_' == 0,
1344  * '0_' == 1, ...
1345  */
1346 static boolean_t
rustv0_parse_base62(rust_state_t * restrict st,strview_t * restrict sv,uint64_t * restrict valp)1347 rustv0_parse_base62(rust_state_t *restrict st, strview_t *restrict sv,
1348     uint64_t *restrict valp)
1349 {
1350 	uint64_t val = 0;
1351 	char c;
1352 
1353 	if (HAS_ERROR(st))
1354 		return (B_FALSE);
1355 
1356 	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1357 
1358 	if (sv_remaining(sv) == 0)
1359 		return (B_FALSE);
1360 
1361 	/* A terminating '_' without any digits is 0 */
1362 	if (sv_consume_if_c(sv, '_')) {
1363 		*valp = 0;
1364 		return (B_TRUE);
1365 	}
1366 
1367 	/* Need at least one valid digit if > 0 */
1368 	if (!ISALNUM(sv_peek(sv, 0)))
1369 		return (B_FALSE);
1370 
1371 	while (sv_remaining(sv) > 0) {
1372 		c = sv_consume_c(sv);
1373 
1374 		if (c == '_') {
1375 			/*
1376 			 * Because a lone '_' was already handled earlier,
1377 			 * we know we've had at least one other digit and
1378 			 * can increment the value and return.
1379 			 */
1380 			*valp = val + 1;
1381 			return (B_TRUE);
1382 		} else if (ISDIGIT(c)) {
1383 			val *= 62;
1384 			val += c - '0';
1385 		} else if (ISLOWER(c)) {
1386 			val *= 62;
1387 			val += c - 'a' + 10;
1388 		} else if (ISUPPER(c)) {
1389 			val *= 62;
1390 			val += c - 'A' + 36;
1391 		} else {
1392 			return (B_FALSE);
1393 		}
1394 	}
1395 
1396 	/* We reached the end of the string without a terminating _ */
1397 	return (B_FALSE);
1398 }
1399 
1400 static const_type_class_t
rustv0_classify_const_type(char type)1401 rustv0_classify_const_type(char type)
1402 {
1403 	switch (type) {
1404 	case 'h': case 't': case 'm': case 'y': case 'o': case 'j':
1405 		return (CTC_UNSIGNED);
1406 	case 'a': case 'i': case 'l': case 'n': case 's': case 'x':
1407 		return (CTC_SIGNED);
1408 	case 'b':
1409 		return (CTC_BOOL);
1410 	case 'c':
1411 		return (CTC_CHAR);
1412 	default:
1413 		return (CTC_INVALID);
1414 	}
1415 }
1416 
1417 /*
1418  * Make sure the name is a plausible mangled rust symbol.
1419  * Non-ASCII are never allowed.  Rust itself uses [_0-9A-Za-z], however
1420  * some things will add a suffix starting with a '.' (e.g. LLVM thin LTO).
1421  * As such we proceed in two phases. We first only allow [_0-9A-Z-az] until
1422  * we encounter a '.'. At that point, any ASCII character is allowed.
1423  */
1424 static boolean_t
rustv0_valid_sym(const strview_t * sv)1425 rustv0_valid_sym(const strview_t *sv)
1426 {
1427 	size_t i;
1428 	boolean_t check_rust = B_TRUE;
1429 
1430 	for (i = 0; i < sv->sv_rem; i++) {
1431 		char c = sv->sv_first[i];
1432 
1433 		if (ISALNUM(c) || c == '_')
1434 			continue;
1435 
1436 		if (c == '.') {
1437 			check_rust = B_FALSE;
1438 			continue;
1439 		}
1440 
1441 		if (check_rust || (c & 0x80) != 0) {
1442 			DEMDEBUG("%s: ERROR found invalid character '%c' "
1443 			    "in '%.*s' at index %zu",
1444 			    __func__, c, SV_PRINT(sv), i);
1445 			return (B_FALSE);
1446 		}
1447 	}
1448 	return (B_TRUE);
1449 }
1450