/* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. */ /* * Copyright 2019 Joyent, Inc. * Copyright 2021 Jason King */ /* BEGIN CSTYLED */ /* * This implements the 'symbol_name_mangling_v2' demangling for rust as * described in Rust RFC 2603 as opposed to the original (now called * legacy) mangling older versions of rust used (implemented in rust.c). * * The specification can be viewed at: * https://github.com/rust-lang/rfcs/blob/master/text/2603-rust-symbol-name-mangling-v0.md */ /* END CSTYLED */ #include #include #include #include #include #include #include "rust.h" /* * Help track amount of additional output added to rs_demangled across * a function call (to allow that portion to be output for debugging) */ #define SAVE_LEN(_st, _len) _len = custr_len((_st)->rs_demangled) #define CSTR_END(_st, _len) \ ((int)(custr_len((_st)->rs_demangled) - (_len))), \ custr_cstr((_st)->rs_demangled) + (_len) typedef enum const_type_class { CTC_INVALID = -1, CTC_UNSIGNED, CTC_SIGNED, CTC_CHAR, CTC_BOOL, } const_type_class_t; /* * Sometimes, parsing something is optional. In this case a failure to * parse is fine, however we still want to consider a fatal error as * failure. */ #define OPTIONAL(_st, _f) ((_f) || !HAS_ERROR(_st)) static boolean_t rustv0_valid_sym(const strview_t *); static const_type_class_t rustv0_classify_const_type(char); static boolean_t rustv0_parse_hex_num(rust_state_t *restrict, strview_t *restrict, uint64_t *restrict); static boolean_t rustv0_parse_base62(rust_state_t *restrict, strview_t *restrict, uint64_t *restrict); static boolean_t rustv0_parse_undisambiguated_identifier( rust_state_t *restrict, strview_t *restrict, boolean_t); static boolean_t rustv0_parse_disambiguator(rust_state_t *restrict, strview_t *restrict, uint64_t *restrict); static boolean_t rustv0_parse_path(rust_state_t *restrict, strview_t *restrict, boolean_t); static boolean_t rustv0_parse_impl_path(rust_state_t *restrict, strview_t *restrict, boolean_t); static boolean_t rustv0_parse_nested_path(rust_state_t *restrict, strview_t *restrict, boolean_t); static boolean_t rustv0_parse_basic_type(rust_state_t *restrict, strview_t *restrict); static boolean_t rustv0_parse_backref(rust_state_t *restrict, strview_t *restrict, boolean_t (*)(rust_state_t *restrict, strview_t *restrict, boolean_t), boolean_t); static boolean_t rustv0_parse_lifetime(rust_state_t *restrict, strview_t *restrict); static boolean_t rustv0_parse_const(rust_state_t *restrict, strview_t *restrict, boolean_t); static boolean_t rustv0_parse_fnsig(rust_state_t *restrict, strview_t *restrict); static boolean_t rustv0_parse_dynbounds(rust_state_t *restrict, strview_t *restrict); static boolean_t rustv0_parse_generic_arg(rust_state_t *restrict, strview_t *restrict, boolean_t); boolean_t rust_demangle_v0(rust_state_t *restrict st, strview_t *restrict sv) { boolean_t save_skip; boolean_t ret; /* Make sure all the characters are valid */ if (!rustv0_valid_sym(sv)) { st->rs_error = EINVAL; return (B_FALSE); } /* * = "_R" [] * [] * * We've already parsed the prefix in rust_demangle(), as well * as made sure there's no [] present, so * start with . */ if (!rustv0_parse_path(st, sv, B_TRUE)) return (B_FALSE); /* [] -- parse but don't save */ SKIP_BEGIN(st, save_skip); ret = OPTIONAL(st, rustv0_parse_path(st, sv, B_FALSE)); SKIP_END(st, save_skip); if (!ret) return (B_FALSE); /* If nothing's left, we know we're done */ if (sv_remaining(sv) == 0) return (!HAS_ERROR(st)); /* * LLVM sometimes will suffix symbols starting with a '.' * followed by extra data. For things that start with * ".llvm.", we discard the rest of the string. For * other things that start with '.', we copy the * results to the final string. This matches * what the rust native demangler crate does, and * we don't see a reason to deviate from their * behavior. */ if (sv_consume_if(sv, ".llvm.")) return (!HAS_ERROR(st)); if (sv_peek(sv, 0) != '.') { DEMDEBUG("%s: Unexpected trailing data at the end of the " "name: '%.*s'", __func__, SV_PRINT(sv)); st->rs_error = EINVAL; return (B_FALSE); } return (rust_append_sv(st, sv_remaining(sv), sv)); } /* * Parse an optional list terminated by 'E'. Each result of 'fn' is * separated by 'sep' in the output. */ static boolean_t rustv0_parse_opt_list(rust_state_t *restrict st, strview_t *restrict sv, boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t), const char *restrict sep, boolean_t bval, size_t *restrict countp) { size_t count = 0; DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv)); while (sv_remaining(sv) > 0) { if (sv_consume_if_c(sv, 'E')) { if (countp != NULL) *countp += count; return (B_TRUE); } if (count > 0 && !rust_append(st, sep)) return (B_FALSE); if (!fn(st, sv, bval)) return (B_FALSE); count++; } /* * An optional list should terminate with an 'E'. If we get here, * we ran out of charaters and didn't terminate as we should. */ return (B_FALSE); } static boolean_t rustv0_parse_uint_type(rust_state_t *restrict st, strview_t *sv) { const char *str = NULL; strview_t save; char c; if (HAS_ERROR(st) || sv_remaining(sv) == 0) return (B_FALSE); sv_init_sv(&save, sv); switch (c = sv_consume_c(sv)) { case 'h': str = "u8"; break; case 't': str = "u16"; break; case 'm': str = "u32"; break; case 'y': str = "u64"; break; case 'o': str = "u128"; break; case 'j': /* usize */ str = "usize"; break; default: sv_init_sv(sv, &save); return (B_FALSE); } DEMDEBUG("%s: %c -> %s", __func__, c, str); return (rust_append(st, str)); } static boolean_t rustv0_parse_basic_type(rust_state_t *restrict st, strview_t *restrict sv) { const char *str = NULL; strview_t save; char c; if (HAS_ERROR(st) || sv_remaining(sv) == 0) return (B_FALSE); if (rustv0_parse_uint_type(st, sv)) return (B_TRUE); sv_init_sv(&save, sv); switch (c = sv_consume_c(sv)) { case 'a': str = "i8"; break; case 'b': str = "bool"; break; case 'c': str = "char"; break; case 'd': str = "f64"; break; case 'e': str = "str"; break; case 'f': str = "f32"; break; case 'i': str = "isize"; break; case 'l': str = "i32"; break; case 'n': str = "i128"; break; case 'p': str = "_"; break; case 's': str = "i16"; break; case 'u': str = "()"; break; case 'v': str = "..."; break; case 'x': str = "i64"; break; case 'z': str = "!"; break; default: sv_init_sv(sv, &save); return (B_FALSE); } DEMDEBUG("%s: %c -> %s", __func__, c, str); return (rust_append(st, str)); } static boolean_t rustv0_parse_type(rust_state_t *restrict st, strview_t *restrict sv, boolean_t dummy __unused) { strview_t save; size_t len, tuple_elem_count; boolean_t ret; char c; if (HAS_ERROR(st)) return (B_FALSE); DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); if (sv_remaining(sv) == 0) return (B_FALSE); SAVE_LEN(st, len); sv_init_sv(&save, sv); switch (c = sv_consume_c(sv)) { case 'A': ret = rust_appendc(st, '[') && rustv0_parse_type(st, sv, B_FALSE) && rust_append(st, "; ") && rustv0_parse_const(st, sv, B_FALSE) && rust_appendc(st, ']'); break; case 'S': ret = rust_appendc(st, '[') && rustv0_parse_type(st, sv, B_FALSE) && rust_appendc(st, ']'); break; case 'T': tuple_elem_count = 0; ret = rust_appendc(st, '(') && rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ", B_FALSE, &tuple_elem_count) && rust_append(st, (tuple_elem_count == 1) ? ",)" : ")"); break; case 'R': case 'Q': /* `&mut T` or `&'... mut T` */ if (!(ret = rust_appendc(st, '&'))) break; /* * lifetime is optional, but we need to add a trailing * space if present (so we cannot use the OPTIONAL macro). */ if (rustv0_parse_lifetime(st, sv)) { if (!(ret = rust_appendc(st, ' '))) break; } else if (HAS_ERROR(st)) { break; } ret = rust_append(st, (c == 'Q') ? "mut " : "") && rustv0_parse_type(st, sv, B_FALSE); break; case 'P': ret = rust_append(st, "*const ") && rustv0_parse_type(st, sv, B_FALSE); break; case 'O': ret = rust_append(st, "*mut ") && rustv0_parse_type(st, sv, B_FALSE); break; case 'F': ret = rustv0_parse_fnsig(st, sv); break; case 'D': ret = rust_append(st, "dyn ") && rustv0_parse_dynbounds(st, sv); if (!ret) break; /* * Rust RFC2603 shows the lifetime as required, however * it appears this is optional. */ DEMDEBUG("%s: pre-lifetime: '%*s'", __func__, SV_PRINT(sv)); /* * We only want to print a non-zero (non "'_") * lifetime. */ if (sv_consume_if(sv, "L_")) break; /* * But if there is a lifetime we want to print, * we want to prepend " + " before it. */ if (sv_peek(sv, 0) == 'L' && !(ret = rust_append(st, " + "))) break; ret = rustv0_parse_lifetime(st, sv); break; default: sv_init_sv(sv, &save); ret = rustv0_parse_backref(st, sv, rustv0_parse_type, B_FALSE) || rustv0_parse_basic_type(st, sv); if (ret) break; ret = rustv0_parse_path(st, sv, B_FALSE); break; } DEMDEBUG("%s: type='%.*s' (%s)", __func__, CSTR_END(st, len), ret ? "success" : "fail"); return (ret); } /* * = "C" crate root * | "M" * | "X" (trait impl) * | "Y" (trait definition) * | "N" ...::ident (nested path) * | "I" {} "E" ... * | */ static boolean_t rustv0_parse_path(rust_state_t *restrict st, strview_t *restrict sv, boolean_t in_value) { strview_t save; uint64_t disamb = 0; size_t len; boolean_t ret = B_FALSE; boolean_t save_skip; boolean_t args_stay_save = st->rs_args_stay_open; boolean_t args_open_save = st->rs_args_is_open; if (HAS_ERROR(st)) return (B_FALSE); DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); if (sv_remaining(sv) == 0) return (B_FALSE); SAVE_LEN(st, len); sv_init_sv(&save, sv); switch (sv_consume_c(sv)) { case 'C': if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disamb))) goto done; if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) goto done; if (st->rs_verbose && !rust_append_printf(st, "[%" PRIx64 "]", disamb)) goto done; break; case 'M': SKIP_BEGIN(st, save_skip); if (!rustv0_parse_impl_path(st, sv, in_value)) { SKIP_END(st, save_skip); goto done; } SKIP_END(st, save_skip); if (!rust_appendc(st, '<') || !rustv0_parse_type(st, sv, B_FALSE) || !rust_appendc(st, '>')) goto done; break; case 'X': SKIP_BEGIN(st, save_skip); if (!rustv0_parse_impl_path(st, sv, in_value)) { SKIP_END(st, save_skip); goto done; } SKIP_END(st, save_skip); /*FALLTHRU*/ case 'Y': if (!rust_appendc(st, '<') || !rustv0_parse_type(st, sv, B_FALSE) || !rust_append(st, " as ") || !rustv0_parse_path(st, sv, B_FALSE) || !rust_appendc(st, '>')) goto done; break; case 'N': if (!rustv0_parse_nested_path(st, sv, in_value)) goto done; break; case 'I': st->rs_args_stay_open = B_FALSE; st->rs_args_is_open = B_FALSE; if (!rustv0_parse_path(st, sv, in_value)) goto done; if (in_value && !rust_append(st, "::")) goto done; if (!rust_appendc(st, '<') || !rustv0_parse_opt_list(st, sv, rustv0_parse_generic_arg, ", ", B_FALSE, NULL)) goto done; st->rs_args_stay_open = args_stay_save; st->rs_args_is_open = args_open_save; /* * If we were asked to not close our list, then don't and * indicate that the list is open. */ if (st->rs_args_stay_open) { st->rs_args_stay_open = B_FALSE; st->rs_args_is_open = B_TRUE; } else if (!rust_appendc(st, '>')) { goto done; } break; default: /* * Didn't recognize the letter, so it has to be a path. Restore * sv to state prior to switch and continue. */ sv_init_sv(sv, &save); if (!rustv0_parse_backref(st, sv, rustv0_parse_path, in_value)) goto done; } ret = B_TRUE; done: DEMDEBUG("%s: path='%.*s' (%s)", __func__, CSTR_END(st, len), ret ? "success" : "fail"); return (ret); } static boolean_t rustv0_parse_impl_path(rust_state_t *restrict st, strview_t *restrict sv, boolean_t in_value) { uint64_t val = 0; return (OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &val)) && rustv0_parse_path(st, sv, in_value)); } /* * A bit of a hack -- when printing a nested path, we need to know * if the identifier is there or not in order to correctly format * the output preceeding it (when present). This peeks ahead and * determines this. */ static boolean_t rustv0_has_name(rust_state_t *restrict st, strview_t *restrict sv, boolean_t *has_namep) { strview_t save; if (HAS_ERROR(st)) return (B_FALSE); DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); if (sv_remaining(sv) == 0) return (B_FALSE); sv_init_sv(&save, sv); /* For checking the length, we don't care if it's punycode or not */ (void) sv_consume_if_c(&save, 'u'); if (sv_remaining(sv) == 0) { st->rs_error = EINVAL; return (B_FALSE); } if (sv_consume_if_c(&save, '0')) { *has_namep = B_FALSE; return (B_TRUE); } *has_namep = B_TRUE; return (B_TRUE); } static boolean_t rustv0_parse_nested_path(rust_state_t *restrict st, strview_t *restrict sv, boolean_t in_value) { uint64_t disambiguator = 0; size_t len = 0; char ns; boolean_t ret = B_FALSE; boolean_t has_name; if (HAS_ERROR(st)) return (B_FALSE); DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); if (sv_remaining(sv) == 0) return (B_FALSE); SAVE_LEN(st, len); ns = sv_consume_c(sv); if (!rustv0_parse_path(st, sv, in_value)) goto done; if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disambiguator))) goto done; if (!rustv0_has_name(st, sv, &has_name)) goto done; if (ISUPPER(ns)) { if (!rust_append(st, "::{")) goto done; switch (ns) { case 'C': if (!rust_append(st, "closure")) goto done; break; case 'S': if (!rust_append(st, "shim")) goto done; break; default: if (!rust_appendc(st, ns)) goto done; break; } if (has_name && !rust_appendc(st, ':')) goto done; if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) goto done; ret = rust_append_printf(st, "#%" PRIu64 "}", disambiguator); } else { if (has_name) { if (!(ret = rust_append(st, "::"))) goto done; } ret = rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE); } done: DEMDEBUG("%s: nested path = '%.*s' (%s)", __func__, CSTR_END(st, len), ret ? "success" : "fail"); return (ret); } /* * = "s" * */ static boolean_t rustv0_parse_disambiguator(rust_state_t *restrict st, strview_t *restrict sv, uint64_t *valp) { if (HAS_ERROR(st) || sv_remaining(sv) < 2) return (B_FALSE); DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); *valp = 0; if (!sv_consume_if_c(sv, 's')) return (B_FALSE); if (!rustv0_parse_base62(st, sv, valp)) { st->rs_error = EINVAL; return (B_FALSE); } /* * Rust RFC 2603 details this in Appendix A, but not the main * portion of the RFC. If no disambiguator is present, the value * is 0, if the decoded value is 0, the index is 1, ... * rustv0_parse_base62() already adjusts _ -> 0, 0 -> 1, so we * only need to add one here to complete the adjustment. */ *valp = *valp + 1; DEMDEBUG("%s: disambiguator=%" PRIu64, __func__, *valp); return (B_TRUE); } /* = ["u"] ["_"] */ static boolean_t rustv0_parse_undisambiguated_identifier(rust_state_t *restrict st, strview_t *restrict sv, boolean_t repl_underscore) { uint64_t len = 0; boolean_t puny = B_FALSE; if (HAS_ERROR(st)) return (B_FALSE); DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); if (sv_remaining(sv) == 0) return (B_FALSE); if (sv_consume_if_c(sv, 'u')) puny = B_TRUE; if (!rust_parse_base10(st, sv, &len)) return (B_FALSE); /* skip optional separator '_' */ (void) sv_consume_if_c(sv, '_'); if (sv_remaining(sv) < len) { DEMDEBUG("%s: ERROR: identifier length (%" PRIu64 ") " "> remaining bytes (%zu)", __func__, len, sv_remaining(sv)); return (B_FALSE); } /* 0 length identifiers are acceptable */ if (len == 0) return (B_TRUE); if (puny) { strview_t ident; sv_init_sv_range(&ident, sv, len); if (!rustv0_puny_decode(st, &ident, repl_underscore)) return (B_FALSE); sv_consume_n(sv, len); return (B_TRUE); } /* * rust identifiers do not contain '-'. However ABI identifiers * are allowed to contain them (e.g. extern "foo-bar" fn ...). * They are substituted with '_' in the mangled output. If we * do not need to reverse this, we can just append 'len' bytes * of sv. Otherwise we need to go through and reverse this * substitution. */ if (!repl_underscore) return (rust_append_sv(st, len, sv)); /* * We checked earlier that len < sv_remaining(sv); so this loop * cannot overrun. */ for (size_t i = 0; i < len; i++) { char c = sv_consume_c(sv); if (c == '_') c = '-'; if (!rust_appendc(st, c)) return (B_FALSE); } return (B_TRUE); } /* = "B" */ static boolean_t rustv0_parse_backref(rust_state_t *restrict st, strview_t *restrict sv, boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t b), boolean_t bval) { strview_t backref; strview_t target; uint64_t idx = 0; size_t save_len; size_t len; if (HAS_ERROR(st)) return (B_FALSE); sv_init_sv(&backref, sv); if (!sv_consume_if_c(sv, 'B')) return (B_FALSE); DEMDEBUG("%s: str='B%.*s'", __func__, SV_PRINT(sv)); if (!rustv0_parse_base62(st, sv, &idx)) { st->rs_error = EINVAL; return (B_FALSE); } /* * Determine how many bytes we've consumed (up to the start of * the current backref token). */ VERIFY3P(backref.sv_first, >=, st->rs_orig.sv_first); len = (size_t)(uintptr_t)(backref.sv_first - st->rs_orig.sv_first); /* * The backref can only refer to an index prior to the start of * the current backref token -- that is must always refer back in * the string, never to the current position or beyond. */ if (idx >= len) { DEMDEBUG("%s: ERROR: backref index (%" PRIu64 ") " "is out of range [0, %zu)", __func__, idx, len); st->rs_error = ERANGE; return (B_FALSE); } /* * Create a strview_t of the original string (sans prefix) by * copying from st->rs_orig. The length of the target strview_t is * capped to end immediately prior to this backref token. Since we * enforce that backrefs must always refer to already processed * portions of the string (i.e. must always refer backwards), and the * length of the strview_t is set to end prior to the start of this * backref token, we guarantee processing of a backref will always * terminate before it can possibly encounter this backref token * and cause a loop -- either the processing terminates normally or * it reaches the end of the capped strview_t. */ sv_init_sv_range(&target, &st->rs_orig, len); /* * Consume all the input in the target strview_t up to the index */ sv_consume_n(&target, idx); DEMDEBUG("%s: backref starting at %" PRIu64 " str='%.*s'%s", __func__, idx, SV_PRINT(&target), st->rs_skip ? " (skipping)" : ""); /* * If we're skipping the output, there's no reason to bother reparsing * the output -- we're not going to save it. We still setup everything * so that the debug output is still emitted. */ if (st->rs_skip) return (B_TRUE); SAVE_LEN(st, save_len); if (!fn(st, &target, bval)) return (B_FALSE); DEMDEBUG("%s: backref is '%.*s'", __func__, CSTR_END(st, save_len)); return (B_TRUE); } static boolean_t rustv0_append_lifetime(rust_state_t *restrict st, uint64_t lifetime) { uint64_t bound_lt; if (HAS_ERROR(st)) return (B_FALSE); if (!rust_appendc(st, '\'')) return (B_FALSE); if (lifetime == 0) return (rust_appendc(st, '_')); if (sub_overflow(st->rs_lt_depth, lifetime, &bound_lt)) { DEMDEBUG("%s: ERROR: lifetime value %" PRIu64 " > current depth %" PRIu64, __func__, lifetime, st->rs_lt_depth); st->rs_lt_depth = ERANGE; return (B_FALSE); } /* * Use 'a, 'b, ... */ if (bound_lt < 26) { char c = (char)bound_lt + 'a'; return (rust_append_printf(st, "%c", c)); } /* * Otherwise, use '_123, '_456, ... */ return (rust_append_printf(st, "_%" PRIu64, bound_lt)); } static boolean_t rustv0_parse_lifetime(rust_state_t *restrict st, strview_t *restrict sv) { uint64_t lifetime; if (!sv_consume_if_c(sv, 'L')) return (B_FALSE); if (!rustv0_parse_base62(st, sv, &lifetime)) return (B_FALSE); return (rustv0_append_lifetime(st, lifetime)); } static boolean_t rustv0_parse_const_data(rust_state_t *restrict st, const_type_class_t type_class, strview_t *restrict sv) { uint64_t val = 0; size_t save_len; boolean_t neg = B_FALSE; boolean_t ret = B_FALSE; VERIFY3S(type_class, !=, CTC_INVALID); if (HAS_ERROR(st)) return (B_FALSE); DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); SAVE_LEN(st, save_len); if (sv_remaining(sv) == 0) return (B_FALSE); if (type_class == CTC_SIGNED && sv_consume_if_c(sv, 'n')) neg = B_TRUE; ret = OPTIONAL(st, rustv0_parse_hex_num(st, sv, &val)) && sv_consume_if_c(sv, '_'); if (!ret) goto done; switch (type_class) { case CTC_SIGNED: case CTC_UNSIGNED: ret = rust_append_printf(st, "%s%" PRIu64, neg ? "-" : "", val); break; case CTC_BOOL: if (val > 1) { DEMDEBUG("%s: invalid bool val %" PRIu64, __func__, val); ret = B_FALSE; break; } ret = rust_append_printf(st, "%s", (val == 0) ? "false" : "true"); break; case CTC_CHAR: if (val > UINT32_MAX) { DEMDEBUG("%s: char value %" PRIu64 " out of range", __func__, val); ret = B_FALSE; break; } ret = rust_appendc(st, '\'') && rust_append_utf8_c(st, val) && rust_appendc(st, '\''); break; default: ret = B_FALSE; } done: DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, save_len), ret ? "success" : "fail"); return (ret); } static boolean_t rustv0_parse_const(rust_state_t *restrict st, strview_t *restrict sv, boolean_t dummy __unused) { strview_t type; size_t start_len; const_type_class_t ctype_class; char ctype; boolean_t save_skip; boolean_t ret; if (HAS_ERROR(st)) return (B_FALSE); DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); SAVE_LEN(st, start_len); if (sv_remaining(sv) == 0) return (B_FALSE); if (rustv0_parse_backref(st, sv, rustv0_parse_const, B_FALSE)) return (B_TRUE); if (sv_consume_if_c(sv, 'p')) { ret = rust_appendc(st, '_'); goto done; } ctype = sv_peek(sv, 0); ctype_class = rustv0_classify_const_type(ctype); if (ctype_class == CTC_INVALID) { DEMDEBUG("%s: const type isn't a valid const generic type", __func__); return (B_FALSE); } /* * This isn't spelled out clearly in Rust RFC 2603, but currently * only unsigned int types are allowed at this point. However, we * have a bit of a potential tricky situation. Unlike formatting * the other tokens, if we want to display the type, we do so * _after_ the value, even though the type appears first. * * This is bit of a hack, but we save off the input position from * sv before the parse the type. We then parse it without saving * the resulting value, then parse and output the constant. If * we wish to then display the type, we can go back and parse * the type again, this time saving the result. */ sv_init_sv(&type, sv); SKIP_BEGIN(st, save_skip); ret = rustv0_parse_type(st, sv, B_FALSE); SKIP_END(st, save_skip); if (!ret) { DEMDEBUG("%s: const type isn't valid", __func__); return (B_FALSE); } if (sv_consume_if_c(sv, 'p')) { ret = rust_appendc(st, '_'); } else { ret = rustv0_parse_const_data(st, ctype_class, sv); } if (!ret) goto done; if (st->rs_show_const_type) { ret = rust_append(st, ": ") && rustv0_parse_uint_type(st, &type); } done: DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, start_len), ret ? "success" : "fail"); return (ret); } static boolean_t rustv0_parse_abi(rust_state_t *restrict st, strview_t *restrict sv) { DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv)); if (sv_consume_if_c(sv, 'C')) return (rust_appendc(st, 'C')); return (rustv0_parse_undisambiguated_identifier(st, sv, B_TRUE)); } static boolean_t rustv0_parse_binder(rust_state_t *restrict st, strview_t *restrict sv) { uint64_t n, i; if (!sv_consume_if_c(sv, 'G')) return (B_FALSE); if (!rustv0_parse_base62(st, sv, &n)) return (B_FALSE); n += 1; if (!rust_append(st, "for<")) return (B_FALSE); for (i = 0; i < n; i++) { if (i > 0 && !rust_append(st, ", ")) return (B_FALSE); st->rs_lt_depth++; if (!rustv0_append_lifetime(st, 1)) return (B_FALSE); } if (!rust_append(st, "> ")) return (B_FALSE); return (B_TRUE); } /* * := [] ["U"] ["K" ] {type} "E" * * Note that while the Rust RFC states the binder is manditory, based on * actual examples, and comparing with the rust-based demangler, it is in * fact optional. */ static boolean_t rustv0_parse_fnsig(rust_state_t *restrict st, strview_t *restrict sv) { uint64_t save_lt = st->rs_lt_depth; DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv)); if (!OPTIONAL(st, rustv0_parse_binder(st, sv))) return (B_FALSE); if (sv_consume_if_c(sv, 'U') && !rust_append(st, "unsafe ")) return (B_FALSE); if (sv_consume_if_c(sv, 'K') && (!rust_append(st, "extern \"") || !rustv0_parse_abi(st, sv) || !rust_append(st, "\" "))) return (B_FALSE); if (!rust_append(st, "fn(")) return (B_FALSE); if (!rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ", B_FALSE, NULL)) { return (B_FALSE); } if (!rust_appendc(st, ')')) return (B_FALSE); /* If the return type is (), don't print it */ if (!sv_consume_if_c(sv, 'u')) { if (!rust_append(st, " -> ")) return (B_FALSE); if (!rustv0_parse_type(st, sv, B_FALSE)) return (B_FALSE); } st->rs_lt_depth = save_lt; return (B_TRUE); } /* * = "p" */ static boolean_t rustv0_parse_dyn_trait_assoc_binding(rust_state_t *restrict st, strview_t *restrict sv, boolean_t open) { size_t save_len; if (HAS_ERROR(st)) return (B_FALSE); if (sv_remaining(sv) == 0) return (B_FALSE); if (!sv_consume_if_c(sv, 'p')) return (B_FALSE); DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); SAVE_LEN(st, save_len); if (!rust_append(st, open ? ", " : "<")) return (B_FALSE); if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) { st->rs_error = EINVAL; return (B_FALSE); } if (!rust_append(st, " = ")) return (B_FALSE); if (!rustv0_parse_type(st, sv, B_FALSE)) { st->rs_error = EINVAL; return (B_FALSE); } DEMDEBUG("%s: binding='%.*s'", __func__, CSTR_END(st, save_len)); return (B_TRUE); } static boolean_t rustv0_parse_dyn_trait(rust_state_t *restrict st, strview_t *restrict sv, boolean_t dummy __unused) { boolean_t stay_save = st->rs_args_stay_open; boolean_t open_save = st->rs_args_is_open; boolean_t open = B_FALSE; if (HAS_ERROR(st)) return (B_FALSE); DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); /* * This is a bit subtle, but when formatting a trait in trait, * we want something like this: * * dyn Trait * * instead of * * dyn Trait> * * So when parsing the path, if we encounter generic arguments, we want * the arg list to remain open at the end of processing the path so * we can append the bindings to it. We set rs_args_stay_open to B_TRUE * to indidcate to rustv0_parse_path() that a generic argument list * should not be closed (i.e. don't append a '>' at the end of the * list). If rustv0_parse_path() encounters a list of generic arguments, * it will also set rs->args_is_open to indiciate it opened the list. * We save this in 'open' so that when we process the associated * bindings, we know if we need to open the list on the first binding * or not -- we don't want 'dyn Trait<>' if there are no bindings, * just 'dyn Trait'. */ st->rs_args_stay_open = B_TRUE; st->rs_args_is_open = B_FALSE; if (!rustv0_parse_path(st, sv, B_FALSE)) { st->rs_args_stay_open = stay_save; st->rs_args_is_open = open_save; return (B_FALSE); } open = st->rs_args_is_open; st->rs_args_stay_open = stay_save; st->rs_args_is_open = open_save; while (rustv0_parse_dyn_trait_assoc_binding(st, sv, open)) { open = B_TRUE; } if (HAS_ERROR(st)) return (B_FALSE); if (open && !rust_appendc(st, '>')) return (B_FALSE); return (!HAS_ERROR(st)); } static boolean_t rustv0_parse_dynbounds(rust_state_t *restrict st, strview_t *restrict sv) { uint64_t save_lt = st->rs_lt_depth; if (HAS_ERROR(st)) return (B_FALSE); DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); /* * This is another case where Rust RFC2603 seems to disagree with * the implementation. The RFC implies this is mandatory, while * the implementations treat it as optional. */ if (!OPTIONAL(st, rustv0_parse_binder(st, sv))) return (B_FALSE); if (!rustv0_parse_opt_list(st, sv, rustv0_parse_dyn_trait, " + ", B_FALSE, NULL)) return (B_FALSE); st->rs_lt_depth = save_lt; return (B_TRUE); } static boolean_t rustv0_parse_generic_arg(rust_state_t *restrict st, strview_t *restrict sv, boolean_t dummy __unused) { DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); if (sv_consume_if_c(sv, 'K')) return (rustv0_parse_const(st, sv, B_FALSE)); if (rustv0_parse_lifetime(st, sv)) return (B_TRUE); return (rustv0_parse_type(st, sv, B_FALSE)); } /* * Parse a hex value into *valp. Note that rust only uses lower case * hex values. */ static boolean_t rustv0_parse_hex_num(rust_state_t *restrict st, strview_t *restrict sv, uint64_t *restrict valp) { uint64_t val = 0; size_t ndigits = 0; if (HAS_ERROR(st)) return (B_FALSE); DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); if (sv_remaining(sv) == 0) return (B_FALSE); /* * Unfortunately, Rust RFC 2603 also doesn't not explicty define * {hex-digits}. We follow what decimal digits does, and treat a * leading 0 as a terminator. */ while (sv_remaining(sv) > 0) { char c = sv_peek(sv, 0); if (ISDIGIT(c)) { val *= 16; val += c - '0'; } else if (c >= 'a' && c <= 'f') { val *= 16; val += c - 'a' + 10; } else { break; } sv_consume_n(sv, 1); if (++ndigits == 1 && val == 0) break; } if (ndigits > 0) *valp = val; return ((ndigits > 0) ? B_TRUE : B_FALSE); } /* * Parse a base62 number into *valp. The number is explicitly terminated * by a '_'. The values are also offset by 0 -- that is '_' == 0, * '0_' == 1, ... */ static boolean_t rustv0_parse_base62(rust_state_t *restrict st, strview_t *restrict sv, uint64_t *restrict valp) { uint64_t val = 0; char c; if (HAS_ERROR(st)) return (B_FALSE); DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); if (sv_remaining(sv) == 0) return (B_FALSE); /* A terminating '_' without any digits is 0 */ if (sv_consume_if_c(sv, '_')) { *valp = 0; return (B_TRUE); } /* Need at least one valid digit if > 0 */ if (!ISALNUM(sv_peek(sv, 0))) return (B_FALSE); while (sv_remaining(sv) > 0) { c = sv_consume_c(sv); if (c == '_') { /* * Because a lone '_' was already handled earlier, * we know we've had at least one other digit and * can increment the value and return. */ *valp = val + 1; return (B_TRUE); } else if (ISDIGIT(c)) { val *= 62; val += c - '0'; } else if (ISLOWER(c)) { val *= 62; val += c - 'a' + 10; } else if (ISUPPER(c)) { val *= 62; val += c - 'A' + 36; } else { return (B_FALSE); } } /* We reached the end of the string without a terminating _ */ return (B_FALSE); } static const_type_class_t rustv0_classify_const_type(char type) { switch (type) { case 'h': case 't': case 'm': case 'y': case 'o': case 'j': return (CTC_UNSIGNED); case 'a': case 'i': case 'l': case 'n': case 's': case 'x': return (CTC_SIGNED); case 'b': return (CTC_BOOL); case 'c': return (CTC_CHAR); default: return (CTC_INVALID); } } /* * Make sure the name is a plausible mangled rust symbol. * Non-ASCII are never allowed. Rust itself uses [_0-9A-Za-z], however * some things will add a suffix starting with a '.' (e.g. LLVM thin LTO). * As such we proceed in two phases. We first only allow [_0-9A-Z-az] until * we encounter a '.'. At that point, any ASCII character is allowed. */ static boolean_t rustv0_valid_sym(const strview_t *sv) { size_t i; boolean_t check_rust = B_TRUE; for (i = 0; i < sv->sv_rem; i++) { char c = sv->sv_first[i]; if (ISALNUM(c) || c == '_') continue; if (c == '.') { check_rust = B_FALSE; continue; } if (check_rust || (c & 0x80) != 0) { DEMDEBUG("%s: ERROR found invalid character '%c' " "in '%.*s' at index %zu", __func__, c, SV_PRINT(sv), i); return (B_FALSE); } } return (B_TRUE); }