1*6a6cfa5dSJason King /* 2*6a6cfa5dSJason King * This file and its contents are supplied under the terms of the 3*6a6cfa5dSJason King * Common Development and Distribution License ("CDDL"), version 1.0. 4*6a6cfa5dSJason King * You may only use this file in accordance with the terms of version 5*6a6cfa5dSJason King * 1.0 of the CDDL. 6*6a6cfa5dSJason King * 7*6a6cfa5dSJason King * A full copy of the text of the CDDL should have accompanied this 8*6a6cfa5dSJason King * source. A copy of the CDDL is also available via the Internet at 9*6a6cfa5dSJason King * http://www.illumos.org/license/CDDL. 10*6a6cfa5dSJason King */ 11*6a6cfa5dSJason King 12*6a6cfa5dSJason King /* 13*6a6cfa5dSJason King * Copyright 2019, Joyent, Inc. 14*6a6cfa5dSJason King */ 15*6a6cfa5dSJason King 16*6a6cfa5dSJason King #include <errno.h> 17*6a6cfa5dSJason King #include <libcustr.h> 18*6a6cfa5dSJason King #include <limits.h> 19*6a6cfa5dSJason King #include <string.h> 20*6a6cfa5dSJason King #include <sys/ctype.h> /* We want the C locale ISXXX() versions */ 21*6a6cfa5dSJason King #include <sys/debug.h> 22*6a6cfa5dSJason King #include <stdio.h> 23*6a6cfa5dSJason King #include <sys/sysmacros.h> 24*6a6cfa5dSJason King 25*6a6cfa5dSJason King #include "strview.h" 26*6a6cfa5dSJason King #include "demangle_int.h" 27*6a6cfa5dSJason King 28*6a6cfa5dSJason King /* 29*6a6cfa5dSJason King * Unfortunately, there is currently no official specification for the rust 30*6a6cfa5dSJason King * name mangling. This is an attempt to document the understanding of the 31*6a6cfa5dSJason King * mangling used here. It is based off examination of 32*6a6cfa5dSJason King * https://docs.rs/rustc-demangle/0.1.13/rustc_demangle/ 33*6a6cfa5dSJason King * 34*6a6cfa5dSJason King * A mangled rust name is: 35*6a6cfa5dSJason King * <prefix> <name> <hash> E 36*6a6cfa5dSJason King * 37*6a6cfa5dSJason King * <prefix> ::= _Z 38*6a6cfa5dSJason King * __Z 39*6a6cfa5dSJason King * 40*6a6cfa5dSJason King * <name> ::= <name-segment>+ 41*6a6cfa5dSJason King * 42*6a6cfa5dSJason King * <name-segment> ::= <len> <name-chars>{len} 43*6a6cfa5dSJason King * 44*6a6cfa5dSJason King * <len> ::= [1-9][0-9]+ 45*6a6cfa5dSJason King * 46*6a6cfa5dSJason King * <name-chars> ::= <[A-Za-z]> <[A-Za-z0-9]>* 47*6a6cfa5dSJason King * <separator> 48*6a6cfa5dSJason King * <special> 49*6a6cfa5dSJason King * 50*6a6cfa5dSJason King * <separator> ::= '..' # '::' 51*6a6cfa5dSJason King * 52*6a6cfa5dSJason King * <special> ::= $SP$ # ' ' 53*6a6cfa5dSJason King * $BP$ # '*' 54*6a6cfa5dSJason King * $RF$ # '&' 55*6a6cfa5dSJason King * $LT$ # '<' 56*6a6cfa5dSJason King * $GT$ # '>' 57*6a6cfa5dSJason King * $LP$ # '(' 58*6a6cfa5dSJason King * $RP$ # ')' 59*6a6cfa5dSJason King * $C$ # ',' 60*6a6cfa5dSJason King * $u7e$ # '~' 61*6a6cfa5dSJason King * $u20$ # ' ' 62*6a6cfa5dSJason King * $u27$ # '\'' 63*6a6cfa5dSJason King * $u3d$ # '=' 64*6a6cfa5dSJason King * $u5b$ # '[' 65*6a6cfa5dSJason King * $u5d$ # ']' 66*6a6cfa5dSJason King * $u7b$ # '{' 67*6a6cfa5dSJason King * $u7d$ # '}' 68*6a6cfa5dSJason King * $u3b$ # ';' 69*6a6cfa5dSJason King * $u2b$ # '+' 70*6a6cfa5dSJason King * $u22$ # '"' 71*6a6cfa5dSJason King * 72*6a6cfa5dSJason King * <hash> := <len> h <hex-digits>+ 73*6a6cfa5dSJason King * 74*6a6cfa5dSJason King * <hex-digits> := <[0-9a-f]> 75*6a6cfa5dSJason King */ 76*6a6cfa5dSJason King 77*6a6cfa5dSJason King typedef struct rustdem_state { 78*6a6cfa5dSJason King const char *rds_str; 79*6a6cfa5dSJason King custr_t *rds_demangled; 80*6a6cfa5dSJason King sysdem_ops_t *rds_ops; 81*6a6cfa5dSJason King int rds_error; 82*6a6cfa5dSJason King } rustdem_state_t; 83*6a6cfa5dSJason King 84*6a6cfa5dSJason King static const struct rust_charmap { 85*6a6cfa5dSJason King const char *ruc_seq; 86*6a6cfa5dSJason King char ruc_ch; 87*6a6cfa5dSJason King } rust_charmap[] = { 88*6a6cfa5dSJason King { "$SP$", '@' }, 89*6a6cfa5dSJason King { "$BP$", '*' }, 90*6a6cfa5dSJason King { "$RF$", '&' }, 91*6a6cfa5dSJason King { "$LT$", '<' }, 92*6a6cfa5dSJason King { "$GT$", '>' }, 93*6a6cfa5dSJason King { "$LP$", '(' }, 94*6a6cfa5dSJason King { "$RP$", ')' }, 95*6a6cfa5dSJason King { "$C$", ',' }, 96*6a6cfa5dSJason King { "$u7e$", '~' }, 97*6a6cfa5dSJason King { "$u20$", ' ' }, 98*6a6cfa5dSJason King { "$u27$", '\'' }, 99*6a6cfa5dSJason King { "$u3d$", '=' }, 100*6a6cfa5dSJason King { "$u5b$", '[' }, 101*6a6cfa5dSJason King { "$u5d$", ']' }, 102*6a6cfa5dSJason King { "$u7b$", '{' }, 103*6a6cfa5dSJason King { "$u7d$", '}' }, 104*6a6cfa5dSJason King { "$u3b$", ';' }, 105*6a6cfa5dSJason King { "$u2b$", '+' }, 106*6a6cfa5dSJason King { "$u22$", '"' } 107*6a6cfa5dSJason King }; 108*6a6cfa5dSJason King static const size_t rust_charmap_sz = ARRAY_SIZE(rust_charmap); 109*6a6cfa5dSJason King 110*6a6cfa5dSJason King static void *rustdem_alloc(custr_alloc_t *, size_t); 111*6a6cfa5dSJason King static void rustdem_free(custr_alloc_t *, void *, size_t); 112*6a6cfa5dSJason King 113*6a6cfa5dSJason King static boolean_t rustdem_append_c(rustdem_state_t *, char); 114*6a6cfa5dSJason King static boolean_t rustdem_all_ascii(const strview_t *); 115*6a6cfa5dSJason King 116*6a6cfa5dSJason King static boolean_t rustdem_parse_prefix(rustdem_state_t *, strview_t *); 117*6a6cfa5dSJason King static boolean_t rustdem_parse_name(rustdem_state_t *, strview_t *); 118*6a6cfa5dSJason King static boolean_t rustdem_parse_hash(rustdem_state_t *, strview_t *); 119*6a6cfa5dSJason King static boolean_t rustdem_parse_num(rustdem_state_t *, strview_t *, uint64_t *); 120*6a6cfa5dSJason King static boolean_t rustdem_parse_special(rustdem_state_t *, strview_t *); 121*6a6cfa5dSJason King static boolean_t rustdem_add_sep(rustdem_state_t *); 122*6a6cfa5dSJason King 123*6a6cfa5dSJason King char * 124*6a6cfa5dSJason King rust_demangle(const char *s, size_t slen, sysdem_ops_t *ops) 125*6a6cfa5dSJason King { 126*6a6cfa5dSJason King rustdem_state_t st = { 127*6a6cfa5dSJason King .rds_str = s, 128*6a6cfa5dSJason King .rds_ops = ops, 129*6a6cfa5dSJason King }; 130*6a6cfa5dSJason King custr_alloc_ops_t custr_ops = { 131*6a6cfa5dSJason King .custr_ao_alloc = rustdem_alloc, 132*6a6cfa5dSJason King .custr_ao_free = rustdem_free 133*6a6cfa5dSJason King }; 134*6a6cfa5dSJason King custr_alloc_t custr_alloc = { 135*6a6cfa5dSJason King .cua_version = CUSTR_VERSION 136*6a6cfa5dSJason King }; 137*6a6cfa5dSJason King strview_t sv; 138*6a6cfa5dSJason King int ret; 139*6a6cfa5dSJason King 140*6a6cfa5dSJason King if (custr_alloc_init(&custr_alloc, &custr_ops) != 0) 141*6a6cfa5dSJason King return (NULL); 142*6a6cfa5dSJason King custr_alloc.cua_arg = &st; 143*6a6cfa5dSJason King 144*6a6cfa5dSJason King sv_init_str(&sv, s, s + slen); 145*6a6cfa5dSJason King 146*6a6cfa5dSJason King if (sv_remaining(&sv) < 1 || sv_peek(&sv, -1) != 'E') { 147*6a6cfa5dSJason King DEMDEBUG("ERROR: string is either too small or does not end " 148*6a6cfa5dSJason King "with 'E'"); 149*6a6cfa5dSJason King errno = EINVAL; 150*6a6cfa5dSJason King return (NULL); 151*6a6cfa5dSJason King } 152*6a6cfa5dSJason King 153*6a6cfa5dSJason King if (!rustdem_parse_prefix(&st, &sv)) { 154*6a6cfa5dSJason King DEMDEBUG("ERROR: could not parse prefix"); 155*6a6cfa5dSJason King errno = EINVAL; 156*6a6cfa5dSJason King return (NULL); 157*6a6cfa5dSJason King } 158*6a6cfa5dSJason King DEMDEBUG("parsed prefix; remaining='%.*s'", SV_PRINT(&sv)); 159*6a6cfa5dSJason King 160*6a6cfa5dSJason King if (!rustdem_all_ascii(&sv)) { 161*6a6cfa5dSJason King /* rustdem_all_ascii() provides debug output */ 162*6a6cfa5dSJason King errno = EINVAL; 163*6a6cfa5dSJason King return (NULL); 164*6a6cfa5dSJason King } 165*6a6cfa5dSJason King 166*6a6cfa5dSJason King if ((ret = custr_xalloc(&st.rds_demangled, &custr_alloc)) != 0) 167*6a6cfa5dSJason King return (NULL); 168*6a6cfa5dSJason King 169*6a6cfa5dSJason King while (sv_remaining(&sv) > 1) { 170*6a6cfa5dSJason King if (rustdem_parse_name(&st, &sv)) 171*6a6cfa5dSJason King continue; 172*6a6cfa5dSJason King if (st.rds_error != 0) 173*6a6cfa5dSJason King goto fail; 174*6a6cfa5dSJason King } 175*6a6cfa5dSJason King 176*6a6cfa5dSJason King if (st.rds_error != 0 || !sv_consume_if_c(&sv, 'E')) 177*6a6cfa5dSJason King goto fail; 178*6a6cfa5dSJason King 179*6a6cfa5dSJason King char *res = xstrdup(ops, custr_cstr(st.rds_demangled)); 180*6a6cfa5dSJason King if (res == NULL) { 181*6a6cfa5dSJason King st.rds_error = errno; 182*6a6cfa5dSJason King goto fail; 183*6a6cfa5dSJason King } 184*6a6cfa5dSJason King 185*6a6cfa5dSJason King custr_free(st.rds_demangled); 186*6a6cfa5dSJason King DEMDEBUG("result = '%s'", res); 187*6a6cfa5dSJason King return (res); 188*6a6cfa5dSJason King 189*6a6cfa5dSJason King fail: 190*6a6cfa5dSJason King custr_free(st.rds_demangled); 191*6a6cfa5dSJason King errno = st.rds_error; 192*6a6cfa5dSJason King return (NULL); 193*6a6cfa5dSJason King } 194*6a6cfa5dSJason King 195*6a6cfa5dSJason King static boolean_t 196*6a6cfa5dSJason King rustdem_parse_prefix(rustdem_state_t *st, strview_t *svp) 197*6a6cfa5dSJason King { 198*6a6cfa5dSJason King strview_t pfx; 199*6a6cfa5dSJason King 200*6a6cfa5dSJason King sv_init_sv(&pfx, svp); 201*6a6cfa5dSJason King 202*6a6cfa5dSJason King DEMDEBUG("checking for '_ZN' or '__ZN' in '%.*s'", SV_PRINT(&pfx)); 203*6a6cfa5dSJason King 204*6a6cfa5dSJason King if (st->rds_error != 0) 205*6a6cfa5dSJason King return (B_FALSE); 206*6a6cfa5dSJason King 207*6a6cfa5dSJason King if (!sv_consume_if_c(&pfx, '_')) 208*6a6cfa5dSJason King return (B_FALSE); 209*6a6cfa5dSJason King 210*6a6cfa5dSJason King (void) sv_consume_if_c(&pfx, '_'); 211*6a6cfa5dSJason King 212*6a6cfa5dSJason King if (!sv_consume_if_c(&pfx, 'Z') || !sv_consume_if_c(&pfx, 'N')) 213*6a6cfa5dSJason King return (B_FALSE); 214*6a6cfa5dSJason King 215*6a6cfa5dSJason King /* Update svp with new position */ 216*6a6cfa5dSJason King sv_init_sv(svp, &pfx); 217*6a6cfa5dSJason King return (B_TRUE); 218*6a6cfa5dSJason King } 219*6a6cfa5dSJason King 220*6a6cfa5dSJason King static boolean_t 221*6a6cfa5dSJason King rustdem_parse_name_segment(rustdem_state_t *st, strview_t *svp, boolean_t first) 222*6a6cfa5dSJason King { 223*6a6cfa5dSJason King strview_t sv; 224*6a6cfa5dSJason King strview_t name; 225*6a6cfa5dSJason King uint64_t len; 226*6a6cfa5dSJason King size_t rem; 227*6a6cfa5dSJason King boolean_t last = B_FALSE; 228*6a6cfa5dSJason King 229*6a6cfa5dSJason King if (st->rds_error != 0 || sv_remaining(svp) == 0) 230*6a6cfa5dSJason King return (B_FALSE); 231*6a6cfa5dSJason King 232*6a6cfa5dSJason King sv_init_sv(&sv, svp); 233*6a6cfa5dSJason King 234*6a6cfa5dSJason King if (!rustdem_parse_num(st, &sv, &len)) { 235*6a6cfa5dSJason King DEMDEBUG("ERROR: no leading length"); 236*6a6cfa5dSJason King st->rds_error = EINVAL; 237*6a6cfa5dSJason King return (B_FALSE); 238*6a6cfa5dSJason King } 239*6a6cfa5dSJason King 240*6a6cfa5dSJason King rem = sv_remaining(&sv); 241*6a6cfa5dSJason King 242*6a6cfa5dSJason King if (rem < len || len > SIZE_MAX) { 243*6a6cfa5dSJason King st->rds_error = EINVAL; 244*6a6cfa5dSJason King return (B_FALSE); 245*6a6cfa5dSJason King } 246*6a6cfa5dSJason King 247*6a6cfa5dSJason King /* Is this the last segment before the terminating E? */ 248*6a6cfa5dSJason King if (rem == len + 1) { 249*6a6cfa5dSJason King VERIFY3U(sv_peek(&sv, -1), ==, 'E'); 250*6a6cfa5dSJason King last = B_TRUE; 251*6a6cfa5dSJason King } 252*6a6cfa5dSJason King 253*6a6cfa5dSJason King if (!first && !rustdem_add_sep(st)) 254*6a6cfa5dSJason King return (B_FALSE); 255*6a6cfa5dSJason King 256*6a6cfa5dSJason King /* Reduce length of seg to the length we parsed */ 257*6a6cfa5dSJason King (void) sv_init_sv_range(&name, &sv, len); 258*6a6cfa5dSJason King 259*6a6cfa5dSJason King DEMDEBUG("%s: segment='%.*s'", __func__, SV_PRINT(&name)); 260*6a6cfa5dSJason King 261*6a6cfa5dSJason King /* 262*6a6cfa5dSJason King * A rust hash starts with 'h', and is the last component of a name 263*6a6cfa5dSJason King * before the terminating 'E' 264*6a6cfa5dSJason King */ 265*6a6cfa5dSJason King if (sv_peek(&name, 0) == 'h' && last) { 266*6a6cfa5dSJason King if (!rustdem_parse_hash(st, &name)) 267*6a6cfa5dSJason King return (B_FALSE); 268*6a6cfa5dSJason King goto done; 269*6a6cfa5dSJason King } 270*6a6cfa5dSJason King 271*6a6cfa5dSJason King while (sv_remaining(&name) > 0) { 272*6a6cfa5dSJason King switch (sv_peek(&name, 0)) { 273*6a6cfa5dSJason King case '$': 274*6a6cfa5dSJason King if (rustdem_parse_special(st, &name)) 275*6a6cfa5dSJason King continue; 276*6a6cfa5dSJason King break; 277*6a6cfa5dSJason King case '_': 278*6a6cfa5dSJason King if (sv_peek(&name, 1) == '$') { 279*6a6cfa5dSJason King /* 280*6a6cfa5dSJason King * Only consume/ignore '_'. Leave 281*6a6cfa5dSJason King * $ for next round. 282*6a6cfa5dSJason King */ 283*6a6cfa5dSJason King sv_consume_n(&name, 1); 284*6a6cfa5dSJason King continue; 285*6a6cfa5dSJason King } 286*6a6cfa5dSJason King break; 287*6a6cfa5dSJason King case '.': 288*6a6cfa5dSJason King /* Convert '..' to '::' */ 289*6a6cfa5dSJason King if (sv_peek(&name, 1) != '.') 290*6a6cfa5dSJason King break; 291*6a6cfa5dSJason King 292*6a6cfa5dSJason King if (!rustdem_add_sep(st)) 293*6a6cfa5dSJason King return (B_FALSE); 294*6a6cfa5dSJason King 295*6a6cfa5dSJason King sv_consume_n(&name, 2); 296*6a6cfa5dSJason King continue; 297*6a6cfa5dSJason King default: 298*6a6cfa5dSJason King break; 299*6a6cfa5dSJason King } 300*6a6cfa5dSJason King 301*6a6cfa5dSJason King if (custr_appendc(st->rds_demangled, 302*6a6cfa5dSJason King sv_consume_c(&name)) != 0) { 303*6a6cfa5dSJason King st->rds_error = ENOMEM; 304*6a6cfa5dSJason King return (B_FALSE); 305*6a6cfa5dSJason King } 306*6a6cfa5dSJason King } 307*6a6cfa5dSJason King 308*6a6cfa5dSJason King done: 309*6a6cfa5dSJason King DEMDEBUG("%s: consumed '%.*s'", __func__, (int)len, svp->sv_first); 310*6a6cfa5dSJason King sv_consume_n(&sv, len); 311*6a6cfa5dSJason King sv_init_sv(svp, &sv); 312*6a6cfa5dSJason King return (B_TRUE); 313*6a6cfa5dSJason King } 314*6a6cfa5dSJason King 315*6a6cfa5dSJason King static boolean_t 316*6a6cfa5dSJason King rustdem_parse_name(rustdem_state_t *st, strview_t *svp) 317*6a6cfa5dSJason King { 318*6a6cfa5dSJason King strview_t name; 319*6a6cfa5dSJason King boolean_t first = B_TRUE; 320*6a6cfa5dSJason King 321*6a6cfa5dSJason King if (st->rds_error != 0) 322*6a6cfa5dSJason King return (B_FALSE); 323*6a6cfa5dSJason King 324*6a6cfa5dSJason King sv_init_sv(&name, svp); 325*6a6cfa5dSJason King 326*6a6cfa5dSJason King if (sv_remaining(&name) == 0) 327*6a6cfa5dSJason King return (B_FALSE); 328*6a6cfa5dSJason King 329*6a6cfa5dSJason King while (sv_remaining(&name) > 0 && sv_peek(&name, 0) != 'E') { 330*6a6cfa5dSJason King if (!rustdem_parse_name_segment(st, &name, first)) 331*6a6cfa5dSJason King return (B_FALSE); 332*6a6cfa5dSJason King first = B_FALSE; 333*6a6cfa5dSJason King } 334*6a6cfa5dSJason King 335*6a6cfa5dSJason King sv_init_sv(svp, &name); 336*6a6cfa5dSJason King return (B_TRUE); 337*6a6cfa5dSJason King } 338*6a6cfa5dSJason King 339*6a6cfa5dSJason King static boolean_t 340*6a6cfa5dSJason King rustdem_parse_hash(rustdem_state_t *st, strview_t *svp) 341*6a6cfa5dSJason King { 342*6a6cfa5dSJason King strview_t sv; 343*6a6cfa5dSJason King 344*6a6cfa5dSJason King sv_init_sv(&sv, svp); 345*6a6cfa5dSJason King 346*6a6cfa5dSJason King VERIFY(sv_consume_if_c(&sv, 'h')); 347*6a6cfa5dSJason King if (!rustdem_append_c(st, 'h')) 348*6a6cfa5dSJason King return (B_FALSE); 349*6a6cfa5dSJason King 350*6a6cfa5dSJason King while (sv_remaining(&sv) > 0) { 351*6a6cfa5dSJason King char c = sv_consume_c(&sv); 352*6a6cfa5dSJason King 353*6a6cfa5dSJason King switch (c) { 354*6a6cfa5dSJason King /* 355*6a6cfa5dSJason King * The upper-case hex digits (A-F) are excluded as valid 356*6a6cfa5dSJason King * hash values for several reasons: 357*6a6cfa5dSJason King * 358*6a6cfa5dSJason King * 1. It would result in two different possible names for 359*6a6cfa5dSJason King * the same function, leading to ambiguity in linking (among 360*6a6cfa5dSJason King * other things). 361*6a6cfa5dSJason King * 362*6a6cfa5dSJason King * 2. It would cause potential ambiguity in parsing -- is a 363*6a6cfa5dSJason King * trailing 'E' part of the hash, or the terminating character 364*6a6cfa5dSJason King * in the mangled name? 365*6a6cfa5dSJason King * 366*6a6cfa5dSJason King * 3. No examples were able to be found in the wild where 367*6a6cfa5dSJason King * uppercase digits are used, and other rust demanglers all 368*6a6cfa5dSJason King * seem to assume the hash must contain lower-case hex digits. 369*6a6cfa5dSJason King */ 370*6a6cfa5dSJason King case '0': case '1': case '2': case '3': 371*6a6cfa5dSJason King case '4': case '5': case '6': case '7': 372*6a6cfa5dSJason King case '8': case '9': case 'a': case 'b': 373*6a6cfa5dSJason King case 'c': case 'd': case 'e': case 'f': 374*6a6cfa5dSJason King if (!rustdem_append_c(st, c)) 375*6a6cfa5dSJason King return (B_FALSE); 376*6a6cfa5dSJason King break; 377*6a6cfa5dSJason King default: 378*6a6cfa5dSJason King return (B_FALSE); 379*6a6cfa5dSJason King } 380*6a6cfa5dSJason King } 381*6a6cfa5dSJason King 382*6a6cfa5dSJason King sv_init_sv(svp, &sv); 383*6a6cfa5dSJason King return (B_TRUE); 384*6a6cfa5dSJason King } 385*6a6cfa5dSJason King 386*6a6cfa5dSJason King /* 387*6a6cfa5dSJason King * A 10 digit value would imply a name 1Gb or larger in size. It seems 388*6a6cfa5dSJason King * unlikely to the point of absurdity any such value could every possibly 389*6a6cfa5dSJason King * be valid (or even have compiled properly). This also prevents the 390*6a6cfa5dSJason King * uint64_t conversion from possibly overflowing since the value must always 391*6a6cfa5dSJason King * be below 10 * UINT32_MAX. 392*6a6cfa5dSJason King */ 393*6a6cfa5dSJason King #define MAX_DIGITS 10 394*6a6cfa5dSJason King 395*6a6cfa5dSJason King static boolean_t 396*6a6cfa5dSJason King rustdem_parse_num(rustdem_state_t *restrict st, strview_t *restrict svp, 397*6a6cfa5dSJason King uint64_t *restrict valp) 398*6a6cfa5dSJason King { 399*6a6cfa5dSJason King strview_t snum; 400*6a6cfa5dSJason King uint64_t v = 0; 401*6a6cfa5dSJason King size_t ndigits = 0; 402*6a6cfa5dSJason King char c; 403*6a6cfa5dSJason King 404*6a6cfa5dSJason King if (st->rds_error != 0) 405*6a6cfa5dSJason King return (B_FALSE); 406*6a6cfa5dSJason King 407*6a6cfa5dSJason King sv_init_sv(&snum, svp); 408*6a6cfa5dSJason King 409*6a6cfa5dSJason King DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(&snum)); 410*6a6cfa5dSJason King 411*6a6cfa5dSJason King c = sv_peek(&snum, 0); 412*6a6cfa5dSJason King if (!ISDIGIT(c)) { 413*6a6cfa5dSJason King DEMDEBUG("%s: ERROR no digits in str\n", __func__); 414*6a6cfa5dSJason King st->rds_error = EINVAL; 415*6a6cfa5dSJason King return (B_FALSE); 416*6a6cfa5dSJason King } 417*6a6cfa5dSJason King 418*6a6cfa5dSJason King /* 419*6a6cfa5dSJason King * Since there is currently no official specification on rust name 420*6a6cfa5dSJason King * mangling, only that it has been stated that rust follows what 421*6a6cfa5dSJason King * C++ mangling does. In the Itanium C++ ABI (what practically 422*6a6cfa5dSJason King * every non-Windows C++ implementation uses these days), it 423*6a6cfa5dSJason King * explicitly disallows leading 0s in numeric values (except for 424*6a6cfa5dSJason King * substition and template indexes, which aren't relevant here). 425*6a6cfa5dSJason King * We enforce the same restriction -- if a rust implementation allowed 426*6a6cfa5dSJason King * leading zeros in numbers (basically segment lengths) it'd 427*6a6cfa5dSJason King * cause all sorts of ambiguity problems with names that likely lead 428*6a6cfa5dSJason King * to much bigger problems with linking and such, so this seems 429*6a6cfa5dSJason King * reasonable. 430*6a6cfa5dSJason King */ 431*6a6cfa5dSJason King if (c == '0') { 432*6a6cfa5dSJason King DEMDEBUG("%s: ERROR number starts with leading 0\n", __func__); 433*6a6cfa5dSJason King st->rds_error = EINVAL; 434*6a6cfa5dSJason King return (B_FALSE); 435*6a6cfa5dSJason King } 436*6a6cfa5dSJason King 437*6a6cfa5dSJason King while (sv_remaining(&snum) > 0 && ndigits <= MAX_DIGITS) { 438*6a6cfa5dSJason King c = sv_consume_c(&snum); 439*6a6cfa5dSJason King 440*6a6cfa5dSJason King if (!ISDIGIT(c)) 441*6a6cfa5dSJason King break; 442*6a6cfa5dSJason King 443*6a6cfa5dSJason King v *= 10; 444*6a6cfa5dSJason King v += c - '0'; 445*6a6cfa5dSJason King ndigits++; 446*6a6cfa5dSJason King } 447*6a6cfa5dSJason King 448*6a6cfa5dSJason King if (ndigits > MAX_DIGITS) { 449*6a6cfa5dSJason King DEMDEBUG("%s: value %llu is too large\n", __func__, v); 450*6a6cfa5dSJason King st->rds_error = ERANGE; 451*6a6cfa5dSJason King return (B_FALSE); 452*6a6cfa5dSJason King } 453*6a6cfa5dSJason King 454*6a6cfa5dSJason King DEMDEBUG("%s: num=%llu", __func__, v); 455*6a6cfa5dSJason King 456*6a6cfa5dSJason King *valp = v; 457*6a6cfa5dSJason King sv_consume_n(svp, ndigits); 458*6a6cfa5dSJason King return (B_TRUE); 459*6a6cfa5dSJason King } 460*6a6cfa5dSJason King 461*6a6cfa5dSJason King static boolean_t 462*6a6cfa5dSJason King rustdem_parse_special(rustdem_state_t *restrict st, strview_t *restrict svp) 463*6a6cfa5dSJason King { 464*6a6cfa5dSJason King if (st->rds_error != 0) 465*6a6cfa5dSJason King return (B_FALSE); 466*6a6cfa5dSJason King 467*6a6cfa5dSJason King if (sv_peek(svp, 0) != '$') 468*6a6cfa5dSJason King return (B_FALSE); 469*6a6cfa5dSJason King 470*6a6cfa5dSJason King for (size_t i = 0; i < rust_charmap_sz; i++) { 471*6a6cfa5dSJason King if (sv_consume_if(svp, rust_charmap[i].ruc_seq)) { 472*6a6cfa5dSJason King if (!rustdem_append_c(st, rust_charmap[i].ruc_ch)) 473*6a6cfa5dSJason King return (B_FALSE); 474*6a6cfa5dSJason King return (B_TRUE); 475*6a6cfa5dSJason King } 476*6a6cfa5dSJason King } 477*6a6cfa5dSJason King return (B_FALSE); 478*6a6cfa5dSJason King } 479*6a6cfa5dSJason King 480*6a6cfa5dSJason King static boolean_t 481*6a6cfa5dSJason King rustdem_add_sep(rustdem_state_t *st) 482*6a6cfa5dSJason King { 483*6a6cfa5dSJason King if (st->rds_error != 0) 484*6a6cfa5dSJason King return (B_FALSE); 485*6a6cfa5dSJason King 486*6a6cfa5dSJason King if (!rustdem_append_c(st, ':') || 487*6a6cfa5dSJason King !rustdem_append_c(st, ':')) 488*6a6cfa5dSJason King return (B_FALSE); 489*6a6cfa5dSJason King 490*6a6cfa5dSJason King return (B_TRUE); 491*6a6cfa5dSJason King } 492*6a6cfa5dSJason King 493*6a6cfa5dSJason King static boolean_t 494*6a6cfa5dSJason King rustdem_append_c(rustdem_state_t *st, char c) 495*6a6cfa5dSJason King { 496*6a6cfa5dSJason King if (st->rds_error != 0) 497*6a6cfa5dSJason King return (B_FALSE); 498*6a6cfa5dSJason King 499*6a6cfa5dSJason King if (custr_appendc(st->rds_demangled, c) == 0) 500*6a6cfa5dSJason King return (B_TRUE); 501*6a6cfa5dSJason King 502*6a6cfa5dSJason King st->rds_error = errno; 503*6a6cfa5dSJason King return (B_FALSE); 504*6a6cfa5dSJason King } 505*6a6cfa5dSJason King 506*6a6cfa5dSJason King static boolean_t 507*6a6cfa5dSJason King rustdem_all_ascii(const strview_t *svp) 508*6a6cfa5dSJason King { 509*6a6cfa5dSJason King strview_t p; 510*6a6cfa5dSJason King 511*6a6cfa5dSJason King sv_init_sv(&p, svp); 512*6a6cfa5dSJason King 513*6a6cfa5dSJason King while (sv_remaining(&p) > 0) { 514*6a6cfa5dSJason King char c = sv_consume_c(&p); 515*6a6cfa5dSJason King 516*6a6cfa5dSJason King /* 517*6a6cfa5dSJason King * #including <sys/ctype.h> conflicts with <ctype.h>. Since 518*6a6cfa5dSJason King * we want the C locale macros (ISDIGIT, etc), it also means 519*6a6cfa5dSJason King * we can't use isascii(3C). 520*6a6cfa5dSJason King */ 521*6a6cfa5dSJason King if ((c & 0x80) != 0) { 522*6a6cfa5dSJason King DEMDEBUG("%s: found non-ascii character 0x%02hhx at " 523*6a6cfa5dSJason King "offset %tu", __func__, c, 524*6a6cfa5dSJason King (ptrdiff_t)(p.sv_first - svp->sv_first)); 525*6a6cfa5dSJason King return (B_FALSE); 526*6a6cfa5dSJason King } 527*6a6cfa5dSJason King } 528*6a6cfa5dSJason King return (B_TRUE); 529*6a6cfa5dSJason King } 530*6a6cfa5dSJason King 531*6a6cfa5dSJason King static void * 532*6a6cfa5dSJason King rustdem_alloc(custr_alloc_t *cao, size_t len) 533*6a6cfa5dSJason King { 534*6a6cfa5dSJason King rustdem_state_t *st = cao->cua_arg; 535*6a6cfa5dSJason King return (zalloc(st->rds_ops, len)); 536*6a6cfa5dSJason King } 537*6a6cfa5dSJason King 538*6a6cfa5dSJason King static void 539*6a6cfa5dSJason King rustdem_free(custr_alloc_t *cao, void *p, size_t len) 540*6a6cfa5dSJason King { 541*6a6cfa5dSJason King rustdem_state_t *st = cao->cua_arg; 542*6a6cfa5dSJason King xfree(st->rds_ops, p, len); 543*6a6cfa5dSJason King } 544