16a6cfa5dSJason King /*
26a6cfa5dSJason King * This file and its contents are supplied under the terms of the
36a6cfa5dSJason King * Common Development and Distribution License ("CDDL"), version 1.0.
46a6cfa5dSJason King * You may only use this file in accordance with the terms of version
56a6cfa5dSJason King * 1.0 of the CDDL.
66a6cfa5dSJason King *
76a6cfa5dSJason King * A full copy of the text of the CDDL should have accompanied this
86a6cfa5dSJason King * source. A copy of the CDDL is also available via the Internet at
96a6cfa5dSJason King * http://www.illumos.org/license/CDDL.
106a6cfa5dSJason King */
116a6cfa5dSJason King
126a6cfa5dSJason King /*
13f5ac8590SJason King * Copyright 2021 Jason King
14*1cd08393SJason King * Copyright 2019 Joyent, Inc.
156a6cfa5dSJason King */
166a6cfa5dSJason King
176a6cfa5dSJason King #include <errno.h>
18*1cd08393SJason King #include <langinfo.h>
196a6cfa5dSJason King #include <libcustr.h>
206a6cfa5dSJason King #include <limits.h>
21*1cd08393SJason King #include <stdarg.h>
226a6cfa5dSJason King #include <string.h>
236a6cfa5dSJason King
246a6cfa5dSJason King #include "demangle_int.h"
25*1cd08393SJason King #include "rust.h"
266a6cfa5dSJason King
27*1cd08393SJason King static void *
rust_cualloc(custr_alloc_t * cua,size_t len)28*1cd08393SJason King rust_cualloc(custr_alloc_t *cua, size_t len)
296a6cfa5dSJason King {
30*1cd08393SJason King rust_state_t *st = cua->cua_arg;
31*1cd08393SJason King return (zalloc(st->rs_ops, len));
326a6cfa5dSJason King }
336a6cfa5dSJason King
34*1cd08393SJason King static void
rust_cufree(custr_alloc_t * cua,void * p,size_t len)35*1cd08393SJason King rust_cufree(custr_alloc_t *cua, void *p, size_t len)
366a6cfa5dSJason King {
37*1cd08393SJason King rust_state_t *st = cua->cua_arg;
38*1cd08393SJason King xfree(st->rs_ops, p, len);
39*1cd08393SJason King }
406a6cfa5dSJason King
41*1cd08393SJason King static const custr_alloc_ops_t rust_custr_ops = {
42*1cd08393SJason King .custr_ao_alloc = rust_cualloc,
43*1cd08393SJason King .custr_ao_free = rust_cufree
44*1cd08393SJason King };
456a6cfa5dSJason King
46*1cd08393SJason King boolean_t
rust_appendc(rust_state_t * st,char c)47*1cd08393SJason King rust_appendc(rust_state_t *st, char c)
48*1cd08393SJason King {
49*1cd08393SJason King custr_t *cus = st->rs_demangled;
506a6cfa5dSJason King
51*1cd08393SJason King if (HAS_ERROR(st))
526a6cfa5dSJason King return (B_FALSE);
536a6cfa5dSJason King
54*1cd08393SJason King if (st->rs_skip)
55*1cd08393SJason King return (B_TRUE);
566a6cfa5dSJason King
57*1cd08393SJason King switch (c) {
58*1cd08393SJason King case '\a':
59*1cd08393SJason King return (rust_append(st, "\\a"));
60*1cd08393SJason King case '\b':
61*1cd08393SJason King return (rust_append(st, "\\b"));
62*1cd08393SJason King case '\f':
63*1cd08393SJason King return (rust_append(st, "\\f"));
64*1cd08393SJason King case '\n':
65*1cd08393SJason King return (rust_append(st, "\\n"));
66*1cd08393SJason King case '\r':
67*1cd08393SJason King return (rust_append(st, "\\r"));
68*1cd08393SJason King case '\t':
69*1cd08393SJason King return (rust_append(st, "\\t"));
70*1cd08393SJason King case '\v':
71*1cd08393SJason King return (rust_append(st, "\\v"));
72*1cd08393SJason King case '\\':
73*1cd08393SJason King return (rust_append(st, "\\\\"));
74*1cd08393SJason King }
75*1cd08393SJason King
76*1cd08393SJason King if (c < ' ')
77*1cd08393SJason King return (rust_append_printf(st, "\\x%02" PRIx8, (uint8_t)c));
786a6cfa5dSJason King
79*1cd08393SJason King if (custr_appendc(cus, c) != 0) {
80*1cd08393SJason King SET_ERROR(st);
816a6cfa5dSJason King return (B_FALSE);
82*1cd08393SJason King }
836a6cfa5dSJason King
846a6cfa5dSJason King return (B_TRUE);
856a6cfa5dSJason King }
866a6cfa5dSJason King
87*1cd08393SJason King /*
88*1cd08393SJason King * Append a UTF-8 code point. If we're not in a UTF-8 locale, this gets
89*1cd08393SJason King * appended as '\u<hex codepoint>' otherwise the character itself is
90*1cd08393SJason King * added.
91*1cd08393SJason King */
92*1cd08393SJason King boolean_t
rust_append_utf8_c(rust_state_t * st,uint32_t val)93*1cd08393SJason King rust_append_utf8_c(rust_state_t *st, uint32_t val)
946a6cfa5dSJason King {
95*1cd08393SJason King custr_t *cus = st->rs_demangled;
96*1cd08393SJason King uint_t n = 0;
97*1cd08393SJason King uint8_t c[4] = { 0 };
986a6cfa5dSJason King
99*1cd08393SJason King if (HAS_ERROR(st))
1006a6cfa5dSJason King return (B_FALSE);
1016a6cfa5dSJason King
102*1cd08393SJason King if (!st->rs_isutf8) {
103*1cd08393SJason King if (val < 0x80)
104*1cd08393SJason King return (rust_appendc(st, (char)val));
105*1cd08393SJason King if (val < 0x10000)
106*1cd08393SJason King return (rust_append_printf(st, "\\u%04" PRIx32, val));
107*1cd08393SJason King return (rust_append_printf(st, "\\U%08" PRIx32, val));
1086a6cfa5dSJason King }
1096a6cfa5dSJason King
110*1cd08393SJason King if (val < 0x80) {
111*1cd08393SJason King return (rust_appendc(st, (char)val));
112*1cd08393SJason King } else if (val < 0x800) {
113*1cd08393SJason King c[0] = 0xc0 | ((val >> 6) & 0x1f);
114*1cd08393SJason King c[1] = 0x80 | (val & 0x3f);
115*1cd08393SJason King n = 2;
116*1cd08393SJason King } else if (val < 0x10000) {
117*1cd08393SJason King c[0] = 0xe0 | ((val >> 12) & 0x0f);
118*1cd08393SJason King c[1] = 0x80 | ((val >> 6) & 0x3f);
119*1cd08393SJason King c[2] = 0x80 | (val & 0x3f);
120*1cd08393SJason King n = 3;
121*1cd08393SJason King } else if (val < 0x110000) {
122*1cd08393SJason King c[0] = 0xf0 | ((val >> 18) & 0x7);
123*1cd08393SJason King c[1] = 0x80 | ((val >> 12) & 0x3f);
124*1cd08393SJason King c[2] = 0x80 | ((val >> 6) & 0x3f);
125*1cd08393SJason King c[3] = 0x80 | (val & 0x3f);
126*1cd08393SJason King n = 4;
127*1cd08393SJason King } else {
128*1cd08393SJason King DEMDEBUG("%s: invalid unicode character \\u%" PRIx32, __func__,
129*1cd08393SJason King val);
1306a6cfa5dSJason King return (B_FALSE);
1316a6cfa5dSJason King }
1326a6cfa5dSJason King
133*1cd08393SJason King for (uint_t i = 0; i < n; i++) {
134*1cd08393SJason King if (custr_appendc(cus, c[i]) != 0) {
135*1cd08393SJason King SET_ERROR(st);
136*1cd08393SJason King return (B_FALSE);
137*1cd08393SJason King }
1386a6cfa5dSJason King }
1396a6cfa5dSJason King
140*1cd08393SJason King return (B_TRUE);
141*1cd08393SJason King }
1426a6cfa5dSJason King
143*1cd08393SJason King boolean_t
rust_append(rust_state_t * st,const char * s)144*1cd08393SJason King rust_append(rust_state_t *st, const char *s)
145*1cd08393SJason King {
146*1cd08393SJason King custr_t *cus = st->rs_demangled;
1476a6cfa5dSJason King
148*1cd08393SJason King if (HAS_ERROR(st))
149*1cd08393SJason King return (B_FALSE);
1506a6cfa5dSJason King
151*1cd08393SJason King if (st->rs_skip)
152*1cd08393SJason King return (B_TRUE);
1536a6cfa5dSJason King
154*1cd08393SJason King if (custr_append(cus, s) != 0) {
155*1cd08393SJason King SET_ERROR(st);
156*1cd08393SJason King return (B_FALSE);
1576a6cfa5dSJason King }
1586a6cfa5dSJason King
1596a6cfa5dSJason King return (B_TRUE);
1606a6cfa5dSJason King }
1616a6cfa5dSJason King
162*1cd08393SJason King boolean_t
rust_append_sv(rust_state_t * restrict st,uint64_t n,strview_t * restrict sv)163*1cd08393SJason King rust_append_sv(rust_state_t *restrict st, uint64_t n, strview_t *restrict sv)
1646a6cfa5dSJason King {
165*1cd08393SJason King if (HAS_ERROR(st))
1666a6cfa5dSJason King return (B_FALSE);
1676a6cfa5dSJason King
168*1cd08393SJason King if (st->rs_skip) {
169*1cd08393SJason King sv_consume_n(sv, (size_t)n);
170*1cd08393SJason King return (B_TRUE);
171*1cd08393SJason King }
172f5ac8590SJason King
173*1cd08393SJason King if (n > sv_remaining(sv)) {
174*1cd08393SJason King DEMDEBUG("%s: ERROR amount to append (%" PRIu64 ") > "
175*1cd08393SJason King "remaining bytes (%zu)", __func__, n, sv_remaining(sv));
176*1cd08393SJason King st->rs_error = ERANGE;
1776a6cfa5dSJason King return (B_FALSE);
178f5ac8590SJason King }
179f5ac8590SJason King
180*1cd08393SJason King if (n > INT_MAX) {
181*1cd08393SJason King DEMDEBUG("%s: amount (%" PRIu64 ") > INT_MAX", __func__, n);
182*1cd08393SJason King st->rs_error = ERANGE;
183f5ac8590SJason King return (B_FALSE);
184f5ac8590SJason King }
1856a6cfa5dSJason King
186*1cd08393SJason King if (custr_append_printf(st->rs_demangled, "%.*s",
187*1cd08393SJason King (int)n, sv->sv_first) != 0) {
188*1cd08393SJason King SET_ERROR(st);
189*1cd08393SJason King return (B_FALSE);
1906a6cfa5dSJason King }
191*1cd08393SJason King sv_consume_n(sv, (size_t)n);
1926a6cfa5dSJason King
1936a6cfa5dSJason King return (B_TRUE);
1946a6cfa5dSJason King }
1956a6cfa5dSJason King
196*1cd08393SJason King boolean_t
rust_append_printf(rust_state_t * st,const char * fmt,...)197*1cd08393SJason King rust_append_printf(rust_state_t *st, const char *fmt, ...)
1986a6cfa5dSJason King {
199*1cd08393SJason King va_list ap;
200*1cd08393SJason King int ret;
2016a6cfa5dSJason King
202*1cd08393SJason King if (HAS_ERROR(st))
2036a6cfa5dSJason King return (B_FALSE);
2046a6cfa5dSJason King
205*1cd08393SJason King if (st->rs_skip)
206*1cd08393SJason King return (B_TRUE);
2076a6cfa5dSJason King
208*1cd08393SJason King va_start(ap, fmt);
209*1cd08393SJason King ret = custr_append_vprintf(st->rs_demangled, fmt, ap);
210*1cd08393SJason King va_end(ap);
2116a6cfa5dSJason King
212*1cd08393SJason King if (ret == 0)
213*1cd08393SJason King return (B_TRUE);
214*1cd08393SJason King SET_ERROR(st);
215*1cd08393SJason King return (B_FALSE);
2166a6cfa5dSJason King }
2176a6cfa5dSJason King
218*1cd08393SJason King boolean_t
rust_parse_base10(rust_state_t * restrict st,strview_t * restrict sv,uint64_t * restrict valp)219*1cd08393SJason King rust_parse_base10(rust_state_t *restrict st, strview_t *restrict sv,
2206a6cfa5dSJason King uint64_t *restrict valp)
2216a6cfa5dSJason King {
2226a6cfa5dSJason King uint64_t v = 0;
2236a6cfa5dSJason King char c;
2246a6cfa5dSJason King
225*1cd08393SJason King if (HAS_ERROR(st) || sv_remaining(sv) == 0)
2266a6cfa5dSJason King return (B_FALSE);
2276a6cfa5dSJason King
228*1cd08393SJason King c = sv_peek(sv, 0);
2296a6cfa5dSJason King
2306a6cfa5dSJason King /*
231*1cd08393SJason King * Since the legacy rust encoding states that it follows the
232*1cd08393SJason King * Itanium C++ mangling format, we match the behavior of the
233*1cd08393SJason King * Itanium C++ ABI in disallowing leading 0s in decimal numbers.
234*1cd08393SJason King *
235*1cd08393SJason King * For Rust encoding v0, RFC2603 currently has omitted the
236*1cd08393SJason King * actual definition of <decimal-number>. However examination of
237*1cd08393SJason King * other implementations written in tandem with the mangling
238*1cd08393SJason King * implementation suggest that <decimal-number> can be expressed
239*1cd08393SJason King * by the eregex: 0|[1-9][0-9]* -- that is a '0' is allowed and
240*1cd08393SJason King * terminates the token, while any other leading digit allows
241*1cd08393SJason King * parsing to continue until a non-digit is encountered, the
242*1cd08393SJason King * end of the string is encountered, or overflow is encountered.
2436a6cfa5dSJason King */
2446a6cfa5dSJason King if (c == '0') {
245*1cd08393SJason King if (st->rs_encver == RUSTENC_V0) {
246*1cd08393SJason King sv_consume_n(sv, 1);
247*1cd08393SJason King *valp = 0;
248*1cd08393SJason King return (B_TRUE);
249*1cd08393SJason King }
250*1cd08393SJason King
251*1cd08393SJason King DEMDEBUG("%s: ERROR number starts with leading 0\n",
252*1cd08393SJason King __func__);
253*1cd08393SJason King st->rs_error = EINVAL;
254*1cd08393SJason King return (B_FALSE);
255*1cd08393SJason King } else if (!ISDIGIT(c)) {
2566a6cfa5dSJason King return (B_FALSE);
2576a6cfa5dSJason King }
2586a6cfa5dSJason King
259*1cd08393SJason King while (sv_remaining(sv) > 0) {
260*1cd08393SJason King uint64_t cval;
2616a6cfa5dSJason King
262*1cd08393SJason King c = sv_peek(sv, 0);
2636a6cfa5dSJason King if (!ISDIGIT(c))
2646a6cfa5dSJason King break;
265*1cd08393SJason King sv_consume_n(sv, 1);
2666a6cfa5dSJason King
267*1cd08393SJason King cval = c - '0';
2686a6cfa5dSJason King
269*1cd08393SJason King if (mul_overflow(v, 10, &v)) {
270*1cd08393SJason King DEMDEBUG("%s: multiplication overflowed\n", __func__);
271*1cd08393SJason King st->rs_error = EOVERFLOW;
272*1cd08393SJason King return (B_FALSE);
273*1cd08393SJason King }
2746a6cfa5dSJason King
275*1cd08393SJason King if (add_overflow(v, cval, &v)) {
276*1cd08393SJason King DEMDEBUG("%s: addition overflowed\n", __func__);
277*1cd08393SJason King st->rs_error = EOVERFLOW;
278*1cd08393SJason King return (B_FALSE);
279*1cd08393SJason King }
280*1cd08393SJason King }
2816a6cfa5dSJason King
2826a6cfa5dSJason King *valp = v;
2836a6cfa5dSJason King return (B_TRUE);
2846a6cfa5dSJason King }
2856a6cfa5dSJason King
2866a6cfa5dSJason King static boolean_t
rust_parse_prefix(rust_state_t * restrict st,strview_t * restrict sv)287*1cd08393SJason King rust_parse_prefix(rust_state_t *restrict st, strview_t *restrict sv)
2886a6cfa5dSJason King {
289*1cd08393SJason King DEMDEBUG("checking prefix in '%.*s'", SV_PRINT(sv));
290*1cd08393SJason King
291*1cd08393SJason King if (HAS_ERROR(st))
2926a6cfa5dSJason King return (B_FALSE);
2936a6cfa5dSJason King
294*1cd08393SJason King if (!sv_consume_if_c(sv, '_'))
2956a6cfa5dSJason King return (B_FALSE);
2966a6cfa5dSJason King
297*1cd08393SJason King /*
298*1cd08393SJason King * MacOS prepends an additional '_' -- allow that in case
299*1cd08393SJason King * we're given symbols from a MacOS object.
300*1cd08393SJason King */
301*1cd08393SJason King (void) sv_consume_if_c(sv, '_');
302*1cd08393SJason King
303*1cd08393SJason King if (sv_consume_if_c(sv, 'Z')) {
304*1cd08393SJason King /*
305*1cd08393SJason King * Legacy names must start with '[_]_Z'
306*1cd08393SJason King */
307*1cd08393SJason King st->rs_encver = RUSTENC_LEGACY;
308*1cd08393SJason King DEMDEBUG("name is encoded using the rust legacy mangling "
309*1cd08393SJason King "scheme");
310*1cd08393SJason King } else if (sv_consume_if_c(sv, 'R')) {
311*1cd08393SJason King uint64_t ver = 0;
312*1cd08393SJason King
313*1cd08393SJason King /*
314*1cd08393SJason King * The non-legacy encoding is versioned. After the initial
315*1cd08393SJason King * 'R' is the version. This isn't spelled out clearly in the
316*1cd08393SJason King * RFC, but many numeric values encoded take an approach of
317*1cd08393SJason King * a value of 0 is omitted, and any digits represent the
318*1cd08393SJason King * value - 1. In other words, in this case, no digits means
319*1cd08393SJason King * version 0, '_R0...' would be version 1, 'R1...' would
320*1cd08393SJason King * be version 2, etc. Currently only version 0 is defined,
321*1cd08393SJason King * but we try to provide a (hopefully) useful message
322*1cd08393SJason King * when debugging, even if we can't use the version value
323*1cd08393SJason King * beyond that.
324*1cd08393SJason King */
325*1cd08393SJason King if (rust_parse_base10(st, sv, &ver)) {
326*1cd08393SJason King DEMDEBUG("%s: ERROR: an unsupported encoding version "
327*1cd08393SJason King "(%" PRIu64 ") was encountered", ver + 1);
328*1cd08393SJason King st->rs_error = ENOTSUP;
329*1cd08393SJason King return (B_FALSE);
3306a6cfa5dSJason King }
331*1cd08393SJason King
332*1cd08393SJason King st->rs_encver = RUSTENC_V0;
333*1cd08393SJason King DEMDEBUG("name is encoded using the v0 mangling scheme");
334*1cd08393SJason King } else {
335*1cd08393SJason King DEMDEBUG("did not find a valid rust prefix");
336*1cd08393SJason King return (B_FALSE);
3376a6cfa5dSJason King }
338*1cd08393SJason King
339*1cd08393SJason King sv_init_sv(&st->rs_orig, sv);
340*1cd08393SJason King return (B_TRUE);
341*1cd08393SJason King }
342*1cd08393SJason King
343*1cd08393SJason King static void
rust_fini_state(rust_state_t * st)344*1cd08393SJason King rust_fini_state(rust_state_t *st)
345*1cd08393SJason King {
346*1cd08393SJason King custr_free(st->rs_demangled);
347*1cd08393SJason King custr_alloc_fini(&st->rs_cualloc);
3486a6cfa5dSJason King }
3496a6cfa5dSJason King
3506a6cfa5dSJason King static boolean_t
rust_init_state(rust_state_t * restrict st,const char * s,sysdem_ops_t * ops)351*1cd08393SJason King rust_init_state(rust_state_t *restrict st, const char *s, sysdem_ops_t *ops)
3526a6cfa5dSJason King {
353*1cd08393SJason King const char *codeset;
354*1cd08393SJason King
355*1cd08393SJason King (void) memset(st, 0, sizeof (*st));
356*1cd08393SJason King
357*1cd08393SJason King st->rs_str = s;
358*1cd08393SJason King st->rs_ops = ops;
359*1cd08393SJason King
360*1cd08393SJason King st->rs_cualloc.cua_version = CUSTR_VERSION;
361*1cd08393SJason King if (custr_alloc_init(&st->rs_cualloc, &rust_custr_ops) != 0)
3626a6cfa5dSJason King return (B_FALSE);
363*1cd08393SJason King st->rs_cualloc.cua_arg = st;
3646a6cfa5dSJason King
365*1cd08393SJason King if (custr_xalloc(&st->rs_demangled, &st->rs_cualloc) != 0) {
366*1cd08393SJason King custr_alloc_fini(&st->rs_cualloc);
3676a6cfa5dSJason King return (B_FALSE);
368*1cd08393SJason King }
369*1cd08393SJason King
370*1cd08393SJason King codeset = nl_langinfo(CODESET);
371*1cd08393SJason King if (codeset != NULL && strcmp(codeset, "UTF-8") == 0)
372*1cd08393SJason King st->rs_isutf8 = B_TRUE;
3736a6cfa5dSJason King
3746a6cfa5dSJason King return (B_TRUE);
3756a6cfa5dSJason King }
3766a6cfa5dSJason King
377*1cd08393SJason King char *
rust_demangle(const char * s,size_t len,sysdem_ops_t * ops)378*1cd08393SJason King rust_demangle(const char *s, size_t len, sysdem_ops_t *ops)
3796a6cfa5dSJason King {
380*1cd08393SJason King rust_state_t st;
381*1cd08393SJason King strview_t sv = { 0 };
382*1cd08393SJason King boolean_t success = B_FALSE;
383*1cd08393SJason King int e = 0;
384*1cd08393SJason King char *out = NULL;
3856a6cfa5dSJason King
386*1cd08393SJason King if (!rust_init_state(&st, s, ops))
387*1cd08393SJason King return (NULL);
3886a6cfa5dSJason King
389*1cd08393SJason King sv_init_str(&sv, s, s + len);
3906a6cfa5dSJason King
391*1cd08393SJason King if (!rust_parse_prefix(&st, &sv)) {
392*1cd08393SJason King if (st.rs_error == 0)
393*1cd08393SJason King st.rs_error = EINVAL;
394*1cd08393SJason King goto done;
395*1cd08393SJason King }
3966a6cfa5dSJason King
397*1cd08393SJason King DEMDEBUG("parsed prefix; remaining string='%.*s'", SV_PRINT(&sv));
3986a6cfa5dSJason King
399*1cd08393SJason King switch (st.rs_encver) {
400*1cd08393SJason King case RUSTENC_LEGACY:
401*1cd08393SJason King success = rust_demangle_legacy(&st, &sv);
402*1cd08393SJason King break;
403*1cd08393SJason King case RUSTENC_V0:
404*1cd08393SJason King success = rust_demangle_v0(&st, &sv);
405*1cd08393SJason King break;
406*1cd08393SJason King }
4076a6cfa5dSJason King
408*1cd08393SJason King done:
409*1cd08393SJason King if (success) {
410*1cd08393SJason King out = xstrdup(ops, custr_cstr(st.rs_demangled));
411*1cd08393SJason King if (out == NULL)
412*1cd08393SJason King SET_ERROR(&st);
413*1cd08393SJason King } else {
414*1cd08393SJason King DEMDEBUG("%s: failed, str='%s'", __func__,
415*1cd08393SJason King custr_cstr(st.rs_demangled));
416*1cd08393SJason King
417*1cd08393SJason King st.rs_error = EINVAL;
4186a6cfa5dSJason King }
4196a6cfa5dSJason King
420*1cd08393SJason King e = st.rs_error;
421*1cd08393SJason King rust_fini_state(&st);
422*1cd08393SJason King if (e > 0)
423*1cd08393SJason King errno = e;
4246a6cfa5dSJason King
425*1cd08393SJason King return (out);
4266a6cfa5dSJason King }
427