xref: /illumos-gate/usr/src/lib/libctf/common/ctf_dwarf.c (revision 11551c95ce2b1db0e052ae7a25787421afdef4da)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright 2012 Jason King.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 /*
31  * Copyright 2020 Joyent, Inc.
32  * Copyright 2020 Robert Mustacchi
33  */
34 
35 /*
36  * CTF DWARF conversion theory.
37  *
38  * DWARF data contains a series of compilation units. Each compilation unit
39  * generally refers to an object file or what once was, in the case of linked
40  * binaries and shared objects. Each compilation unit has a series of what DWARF
41  * calls a DIE (Debugging Information Entry). The set of entries that we care
42  * about have type information stored in a series of attributes. Each DIE also
43  * has a tag that identifies the kind of attributes that it has.
44  *
45  * A given DIE may itself have children. For example, a DIE that represents a
46  * structure has children which represent members. Whenever we encounter a DIE
47  * that has children or other values or types associated with it, we recursively
48  * process those children first so that way we can then refer to the generated
49  * CTF type id while processing its parent. This reduces the amount of unknowns
50  * and fixups that we need. It also ensures that we don't accidentally add types
51  * that an overzealous compiler might add to the DWARF data but aren't used by
52  * anything in the system.
53  *
54  * Once we do a conversion, we store a mapping in an AVL tree that goes from the
55  * DWARF's die offset, which is relative to the given compilation unit, to a
56  * ctf_id_t.
57  *
58  * Unfortunately, some compilers actually will emit duplicate entries for a
59  * given type that look similar, but aren't quite. To that end, we go through
60  * and do a variant on a merge once we're done processing a single compilation
61  * unit which deduplicates all of the types that are in the unit.
62  *
63  * Finally, if we encounter an object that has multiple compilation units, then
64  * we'll convert all of the compilation units separately and then do a merge, so
65  * that way we can result in one single ctf_file_t that represents everything
66  * for the object.
67  *
68  * Conversion Steps
69  * ----------------
70  *
71  * Because a given object we've been given to convert may have multiple
72  * compilation units, we break the work into two halves. The first half
73  * processes each compilation unit (potentially in parallel) and then the second
74  * half optionally merges all of the dies in the first half. First, we'll cover
75  * what's involved in converting a single ctf_cu_t's dwarf to CTF. This covers
76  * the work done in ctf_dwarf_convert_one().
77  *
78  * An individual ctf_cu_t, which represents a compilation unit, is converted to
79  * CTF in a series of multiple passes.
80  *
81  * Pass 1: During the first pass we walk all of the top-level dies and if we
82  * find a function, variable, struct, union, enum or typedef, we recursively
83  * transform all of its types. We don't recurse or process everything, because
84  * we don't want to add some of the types that compilers may add which are
85  * effectively unused.
86  *
87  * During pass 1, if we encounter any structures or unions we mark them for
88  * fixing up later. This is necessary because we may not be able to determine
89  * the full size of a structure at the beginning of time. This will happen if
90  * the DWARF attribute DW_AT_byte_size is not present for a member. Because of
91  * this possibility we defer adding members to structures or even converting
92  * them during pass 1 and save that for pass 2. Adding all of the base
93  * structures without any of their members helps deal with any circular
94  * dependencies that we might encounter.
95  *
96  * Pass 2: This pass is used to do the first half of fixing up structures and
97  * unions. Rather than walk the entire type space again, we actually walk the
98  * list of structures and unions that we marked for later fixing up. Here, we
99  * iterate over every structure and add members to the underlying ctf_file_t,
100  * but not to the structs themselves. One might wonder why we don't, and the
101  * main reason is that libctf requires a ctf_update() be done before adding the
102  * members to structures or unions.
103  *
104  * Pass 3: This pass is used to do the second half of fixing up structures and
105  * unions. During this part we always go through and add members to structures
106  * and unions that we added to the container in the previous pass. In addition,
107  * we set the structure and union's actual size, which may have additional
108  * padding added by the compiler, it isn't simply the last offset. DWARF always
109  * guarantees an attribute exists for this. Importantly no ctf_id_t's change
110  * during pass 2.
111  *
112  * Pass 4: The next phase is to add CTF entries for all of the symbols and
113  * variables that are present in this die. During pass 1 we added entries to a
114  * map for each variable and function. During this pass, we iterate over the
115  * symbol table and when we encounter a symbol that we have in our lists of
116  * translated information which matches, we then add it to the ctf_file_t.
117  *
118  * Pass 5: Here we go and look for any weak symbols and functions and see if
119  * they match anything that we recognize. If so, then we add type information
120  * for them at this point based on the matching type.
121  *
122  * Pass 6: This pass is actually a variant on a merge. The traditional merge
123  * process expects there to be no duplicate types. As such, at the end of
124  * conversion, we do a dedup on all of the types in the system. The
125  * deduplication process is described in lib/libctf/common/ctf_merge.c.
126  *
127  * Once pass 6 is done, we've finished processing the individual compilation
128  * unit.
129  *
130  * The following steps reflect the general process of doing a conversion.
131  *
132  * 1) Walk the dwarf section and determine the number of compilation units
133  * 2) Create a ctf_cu_t for each compilation unit
134  * 3) Add all ctf_cu_t's to a workq
135  * 4) Have the workq process each die with ctf_dwarf_convert_one. This itself
136  *    is comprised of several steps, which were already enumerated.
137  * 5) If we have multiple cu's, we do a ctf merge of all the dies. The mechanics
138  *    of the merge are discussed in lib/libctf/common/ctf_merge.c.
139  * 6) Free everything up and return a ctf_file_t to the user. If we only had a
140  *    single compilation unit, then we give that to the user. Otherwise, we
141  *    return the merged ctf_file_t.
142  *
143  * Threading
144  * ---------
145  *
146  * The process has been designed to be amenable to threading. Each compilation
147  * unit has its own type stream, therefore the logical place to divide and
148  * conquer is at the compilation unit. Each ctf_cu_t has been built to be able
149  * to be processed independently of the others. It has its own libdwarf handle,
150  * as a given libdwarf handle may only be used by a single thread at a time.
151  * This allows the various ctf_cu_t's to be processed in parallel by different
152  * threads.
153  *
154  * All of the ctf_cu_t's are loaded into a workq which allows for a number of
155  * threads to be specified and used as a thread pool to process all of the
156  * queued work. We set the number of threads to use in the workq equal to the
157  * number of threads that the user has specified.
158  *
159  * After all of the compilation units have been drained, we use the same number
160  * of threads when performing a merge of multiple compilation units, if they
161  * exist.
162  *
163  * While all of these different parts do support and allow for multiple threads,
164  * it's important that when only a single thread is specified, that it be the
165  * calling thread. This allows the conversion routines to be used in a context
166  * that doesn't allow additional threads, such as rtld.
167  *
168  * Common DWARF Mechanics and Notes
169  * --------------------------------
170  *
171  * At this time, we really only support DWARFv2, though support for DWARFv4 is
172  * mostly there. There is no intent to support DWARFv3.
173  *
174  * Generally types for something are stored in the DW_AT_type attribute. For
175  * example, a function's return type will be stored in the local DW_AT_type
176  * attribute while the arguments will be in child DIEs. There are also various
177  * times when we don't have any DW_AT_type. In that case, the lack of a type
178  * implies, at least for C, that its C type is void. Because DWARF doesn't emit
179  * one, we have a synthetic void type that we create and manipulate instead and
180  * pass it off to consumers on an as-needed basis. If nothing has a void type,
181  * it will not be emitted.
182  *
183  * Architecture Specific Parts
184  * ---------------------------
185  *
186  * The CTF tooling encodes various information about the various architectures
187  * in the system. Importantly, the tool assumes that every architecture has a
188  * data model where long and pointer are the same size. This is currently the
189  * case, as the two data models illumos supports are ILP32 and LP64.
190  *
191  * In addition, we encode the mapping of various floating point sizes to various
192  * types for each architecture. If a new architecture is being added, it should
193  * be added to the list. The general design of the ctf conversion tools is to be
194  * architecture independent. eg. any of the tools here should be able to convert
195  * any architecture's DWARF into ctf; however, this has not been rigorously
196  * tested and more importantly, the ctf routines don't currently write out the
197  * data in an endian-aware form, they only use that of the currently running
198  * library.
199  */
200 
201 #include <libctf_impl.h>
202 #include <sys/avl.h>
203 #include <sys/debug.h>
204 #include <gelf.h>
205 #include <libdwarf.h>
206 #include <dwarf.h>
207 #include <libgen.h>
208 #include <workq.h>
209 #include <errno.h>
210 
211 #define	DWARF_VERSION_TWO	2
212 #define	DWARF_VARARGS_NAME	"..."
213 
214 /*
215  * Dwarf may refer recursively to other types that we've already processed. To
216  * see if we've already converted them, we look them up in an AVL tree that's
217  * sorted by the DWARF id.
218  */
219 typedef struct ctf_dwmap {
220 	avl_node_t	cdm_avl;
221 	Dwarf_Off	cdm_off;
222 	Dwarf_Die	cdm_die;
223 	ctf_id_t	cdm_id;
224 	boolean_t	cdm_fix;
225 } ctf_dwmap_t;
226 
227 typedef struct ctf_dwvar {
228 	ctf_list_t	cdv_list;
229 	char		*cdv_name;
230 	ctf_id_t	cdv_type;
231 	boolean_t	cdv_global;
232 } ctf_dwvar_t;
233 
234 typedef struct ctf_dwfunc {
235 	ctf_list_t	cdf_list;
236 	char		*cdf_name;
237 	ctf_funcinfo_t	cdf_fip;
238 	ctf_id_t	*cdf_argv;
239 	boolean_t	cdf_global;
240 } ctf_dwfunc_t;
241 
242 typedef struct ctf_dwbitf {
243 	ctf_list_t	cdb_list;
244 	ctf_id_t	cdb_base;
245 	uint_t		cdb_nbits;
246 	ctf_id_t	cdb_id;
247 } ctf_dwbitf_t;
248 
249 /*
250  * The ctf_cu_t represents a single top-level DWARF die unit. While generally,
251  * the typical object file has only a single die, if we're asked to convert
252  * something that's been linked from multiple sources, multiple dies will exist.
253  */
254 typedef struct ctf_die {
255 	Elf		*cu_elf;	/* shared libelf handle */
256 	char		*cu_name;	/* basename of the DIE */
257 	ctf_merge_t	*cu_cmh;	/* merge handle */
258 	ctf_list_t	cu_vars;	/* List of variables */
259 	ctf_list_t	cu_funcs;	/* List of functions */
260 	ctf_list_t	cu_bitfields;	/* Bit field members */
261 	Dwarf_Debug	cu_dwarf;	/* libdwarf handle */
262 	Dwarf_Die	cu_cu;		/* libdwarf compilation unit */
263 	Dwarf_Off	cu_cuoff;	/* cu's offset */
264 	Dwarf_Off	cu_maxoff;	/* maximum offset */
265 	ctf_file_t	*cu_ctfp;	/* output CTF file */
266 	avl_tree_t	cu_map;		/* map die offsets to CTF types */
267 	char		*cu_errbuf;	/* error message buffer */
268 	size_t		cu_errlen;	/* error message buffer length */
269 	size_t		cu_ptrsz;	/* object's pointer size */
270 	boolean_t	cu_bigend;	/* is it big endian */
271 	boolean_t	cu_doweaks;	/* should we convert weak symbols? */
272 	uint_t		cu_mach;	/* machine type */
273 	ctf_id_t	cu_voidtid;	/* void pointer */
274 	ctf_id_t	cu_longtid;	/* id for a 'long' */
275 } ctf_cu_t;
276 
277 static int ctf_dwarf_offset(ctf_cu_t *, Dwarf_Die, Dwarf_Off *);
278 static int ctf_dwarf_convert_die(ctf_cu_t *, Dwarf_Die);
279 static int ctf_dwarf_convert_type(ctf_cu_t *, Dwarf_Die, ctf_id_t *, int);
280 
281 static int ctf_dwarf_function_count(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
282     boolean_t);
283 static int ctf_dwarf_convert_fargs(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
284     ctf_id_t *);
285 
286 /*
287  * This is a generic way to set a CTF Conversion backend error depending on what
288  * we were doing. Unless it was one of a specific set of errors that don't
289  * indicate a programming / translation bug, eg. ENOMEM, then we transform it
290  * into a CTF backend error and fill in the error buffer.
291  */
292 static int
293 ctf_dwarf_error(ctf_cu_t *cup, ctf_file_t *cfp, int err, const char *fmt, ...)
294 {
295 	va_list ap;
296 	int ret;
297 	size_t off = 0;
298 	ssize_t rem = cup->cu_errlen;
299 	if (cfp != NULL)
300 		err = ctf_errno(cfp);
301 
302 	if (err == ENOMEM)
303 		return (err);
304 
305 	ret = snprintf(cup->cu_errbuf, rem, "die %s: ", cup->cu_name);
306 	if (ret < 0)
307 		goto err;
308 	off += ret;
309 	rem = MAX(rem - ret, 0);
310 
311 	va_start(ap, fmt);
312 	ret = vsnprintf(cup->cu_errbuf + off, rem, fmt, ap);
313 	va_end(ap);
314 	if (ret < 0)
315 		goto err;
316 
317 	off += ret;
318 	rem = MAX(rem - ret, 0);
319 	if (fmt[strlen(fmt) - 1] != '\n') {
320 		(void) snprintf(cup->cu_errbuf + off, rem,
321 		    ": %s\n", ctf_errmsg(err));
322 	}
323 	va_end(ap);
324 	return (ECTF_CONVBKERR);
325 
326 err:
327 	cup->cu_errbuf[0] = '\0';
328 	return (ECTF_CONVBKERR);
329 }
330 
331 /*
332  * DWARF often opts to put no explicit type to describe a void type. eg. if we
333  * have a reference type whose DW_AT_type member doesn't exist, then we should
334  * instead assume it points to void. Because this isn't represented, we
335  * instead cause it to come into existence.
336  */
337 static ctf_id_t
338 ctf_dwarf_void(ctf_cu_t *cup)
339 {
340 	if (cup->cu_voidtid == CTF_ERR) {
341 		ctf_encoding_t enc = { CTF_INT_SIGNED, 0, 0 };
342 		cup->cu_voidtid = ctf_add_integer(cup->cu_ctfp, CTF_ADD_ROOT,
343 		    "void", &enc);
344 		if (cup->cu_voidtid == CTF_ERR) {
345 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
346 			    "failed to create void type: %s\n",
347 			    ctf_errmsg(ctf_errno(cup->cu_ctfp)));
348 		}
349 	}
350 
351 	return (cup->cu_voidtid);
352 }
353 
354 /*
355  * There are many different forms that an array index may take. However, we just
356  * always force it to be of a type long no matter what. Therefore we use this to
357  * have a single instance of long across everything.
358  */
359 static ctf_id_t
360 ctf_dwarf_long(ctf_cu_t *cup)
361 {
362 	if (cup->cu_longtid == CTF_ERR) {
363 		ctf_encoding_t enc;
364 
365 		enc.cte_format = CTF_INT_SIGNED;
366 		enc.cte_offset = 0;
367 		/* All illumos systems are LP */
368 		enc.cte_bits = cup->cu_ptrsz * 8;
369 		cup->cu_longtid = ctf_add_integer(cup->cu_ctfp, CTF_ADD_NONROOT,
370 		    "long", &enc);
371 		if (cup->cu_longtid == CTF_ERR) {
372 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
373 			    "failed to create long type: %s\n",
374 			    ctf_errmsg(ctf_errno(cup->cu_ctfp)));
375 		}
376 
377 	}
378 
379 	return (cup->cu_longtid);
380 }
381 
382 static int
383 ctf_dwmap_comp(const void *a, const void *b)
384 {
385 	const ctf_dwmap_t *ca = a;
386 	const ctf_dwmap_t *cb = b;
387 
388 	if (ca->cdm_off > cb->cdm_off)
389 		return (1);
390 	if (ca->cdm_off < cb->cdm_off)
391 		return (-1);
392 	return (0);
393 }
394 
395 static int
396 ctf_dwmap_add(ctf_cu_t *cup, ctf_id_t id, Dwarf_Die die, boolean_t fix)
397 {
398 	int ret;
399 	avl_index_t index;
400 	ctf_dwmap_t *dwmap;
401 	Dwarf_Off off;
402 
403 	VERIFY(id > 0 && id < CTF_MAX_TYPE);
404 
405 	if ((ret = ctf_dwarf_offset(cup, die, &off)) != 0)
406 		return (ret);
407 
408 	if ((dwmap = ctf_alloc(sizeof (ctf_dwmap_t))) == NULL)
409 		return (ENOMEM);
410 
411 	dwmap->cdm_die = die;
412 	dwmap->cdm_off = off;
413 	dwmap->cdm_id = id;
414 	dwmap->cdm_fix = fix;
415 
416 	ctf_dprintf("dwmap: %p %" DW_PR_DUx "->%d\n", dwmap, off, id);
417 	VERIFY(avl_find(&cup->cu_map, dwmap, &index) == NULL);
418 	avl_insert(&cup->cu_map, dwmap, index);
419 	return (0);
420 }
421 
422 static int
423 ctf_dwarf_attribute(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
424     Dwarf_Attribute *attrp)
425 {
426 	int ret;
427 	Dwarf_Error derr;
428 
429 	if ((ret = dwarf_attr(die, name, attrp, &derr)) == DW_DLV_OK)
430 		return (0);
431 	if (ret == DW_DLV_NO_ENTRY) {
432 		*attrp = NULL;
433 		return (ENOENT);
434 	}
435 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
436 	    "failed to get attribute for type: %s\n",
437 	    dwarf_errmsg(derr));
438 	return (ECTF_CONVBKERR);
439 }
440 
441 static int
442 ctf_dwarf_ref(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, Dwarf_Off *refp)
443 {
444 	int ret;
445 	Dwarf_Attribute attr;
446 	Dwarf_Error derr;
447 
448 	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
449 		return (ret);
450 
451 	if (dwarf_formref(attr, refp, &derr) == DW_DLV_OK) {
452 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
453 		return (0);
454 	}
455 
456 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
457 	    "failed to get unsigned attribute for type: %s\n",
458 	    dwarf_errmsg(derr));
459 	return (ECTF_CONVBKERR);
460 }
461 
462 static int
463 ctf_dwarf_refdie(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
464     Dwarf_Die *diep)
465 {
466 	int ret;
467 	Dwarf_Off off;
468 	Dwarf_Error derr;
469 
470 	if ((ret = ctf_dwarf_ref(cup, die, name, &off)) != 0)
471 		return (ret);
472 
473 	off += cup->cu_cuoff;
474 	if ((ret = dwarf_offdie(cup->cu_dwarf, off, diep, &derr)) !=
475 	    DW_DLV_OK) {
476 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
477 		    "failed to get die from offset %" DW_PR_DUu ": %s\n",
478 		    off, dwarf_errmsg(derr));
479 		return (ECTF_CONVBKERR);
480 	}
481 
482 	return (0);
483 }
484 
485 static int
486 ctf_dwarf_signed(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
487     Dwarf_Signed *valp)
488 {
489 	int ret;
490 	Dwarf_Attribute attr;
491 	Dwarf_Error derr;
492 
493 	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
494 		return (ret);
495 
496 	if (dwarf_formsdata(attr, valp, &derr) == DW_DLV_OK) {
497 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
498 		return (0);
499 	}
500 
501 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
502 	    "failed to get unsigned attribute for type: %s\n",
503 	    dwarf_errmsg(derr));
504 	return (ECTF_CONVBKERR);
505 }
506 
507 static int
508 ctf_dwarf_unsigned(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
509     Dwarf_Unsigned *valp)
510 {
511 	int ret;
512 	Dwarf_Attribute attr;
513 	Dwarf_Error derr;
514 
515 	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
516 		return (ret);
517 
518 	if (dwarf_formudata(attr, valp, &derr) == DW_DLV_OK) {
519 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
520 		return (0);
521 	}
522 
523 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
524 	    "failed to get unsigned attribute for type: %s\n",
525 	    dwarf_errmsg(derr));
526 	return (ECTF_CONVBKERR);
527 }
528 
529 static int
530 ctf_dwarf_boolean(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
531     Dwarf_Bool *val)
532 {
533 	int ret;
534 	Dwarf_Attribute attr;
535 	Dwarf_Error derr;
536 
537 	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
538 		return (ret);
539 
540 	if (dwarf_formflag(attr, val, &derr) == DW_DLV_OK) {
541 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
542 		return (0);
543 	}
544 
545 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
546 	    "failed to get boolean attribute for type: %s\n",
547 	    dwarf_errmsg(derr));
548 
549 	return (ECTF_CONVBKERR);
550 }
551 
552 static int
553 ctf_dwarf_string(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, char **strp)
554 {
555 	int ret;
556 	char *s;
557 	Dwarf_Attribute attr;
558 	Dwarf_Error derr;
559 
560 	*strp = NULL;
561 	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
562 		return (ret);
563 
564 	if (dwarf_formstring(attr, &s, &derr) == DW_DLV_OK) {
565 		if ((*strp = ctf_strdup(s)) == NULL)
566 			ret = ENOMEM;
567 		else
568 			ret = 0;
569 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
570 		return (ret);
571 	}
572 
573 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
574 	    "failed to get string attribute for type: %s\n",
575 	    dwarf_errmsg(derr));
576 	return (ECTF_CONVBKERR);
577 }
578 
579 static int
580 ctf_dwarf_member_location(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Unsigned *valp)
581 {
582 	int ret;
583 	Dwarf_Error derr;
584 	Dwarf_Attribute attr;
585 	Dwarf_Locdesc *loc;
586 	Dwarf_Signed locnum;
587 
588 	if ((ret = ctf_dwarf_attribute(cup, die, DW_AT_data_member_location,
589 	    &attr)) != 0)
590 		return (ret);
591 
592 	if (dwarf_loclist(attr, &loc, &locnum, &derr) != DW_DLV_OK) {
593 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
594 		    "failed to obtain location list for member offset: %s",
595 		    dwarf_errmsg(derr));
596 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
597 		return (ECTF_CONVBKERR);
598 	}
599 	dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
600 
601 	if (locnum != 1 || loc->ld_s->lr_atom != DW_OP_plus_uconst) {
602 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
603 		    "failed to parse location structure for member");
604 		dwarf_dealloc(cup->cu_dwarf, loc->ld_s, DW_DLA_LOC_BLOCK);
605 		dwarf_dealloc(cup->cu_dwarf, loc, DW_DLA_LOCDESC);
606 		return (ECTF_CONVBKERR);
607 	}
608 
609 	*valp = loc->ld_s->lr_number;
610 
611 	dwarf_dealloc(cup->cu_dwarf, loc->ld_s, DW_DLA_LOC_BLOCK);
612 	dwarf_dealloc(cup->cu_dwarf, loc, DW_DLA_LOCDESC);
613 	return (0);
614 }
615 
616 
617 static int
618 ctf_dwarf_offset(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Off *offsetp)
619 {
620 	Dwarf_Error derr;
621 
622 	if (dwarf_dieoffset(die, offsetp, &derr) == DW_DLV_OK)
623 		return (0);
624 
625 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
626 	    "failed to get die offset: %s\n",
627 	    dwarf_errmsg(derr));
628 	return (ECTF_CONVBKERR);
629 }
630 
631 /* simpler variant for debugging output */
632 static Dwarf_Off
633 ctf_die_offset(Dwarf_Die die)
634 {
635 	Dwarf_Off off = -1;
636 	Dwarf_Error derr;
637 
638 	(void) dwarf_dieoffset(die, &off, &derr);
639 	return (off);
640 }
641 
642 static int
643 ctf_dwarf_tag(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half *tagp)
644 {
645 	Dwarf_Error derr;
646 
647 	if (dwarf_tag(die, tagp, &derr) == DW_DLV_OK)
648 		return (0);
649 
650 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
651 	    "failed to get tag type: %s\n",
652 	    dwarf_errmsg(derr));
653 	return (ECTF_CONVBKERR);
654 }
655 
656 static int
657 ctf_dwarf_sib(ctf_cu_t *cup, Dwarf_Die base, Dwarf_Die *sibp)
658 {
659 	Dwarf_Error derr;
660 	int ret;
661 
662 	*sibp = NULL;
663 	ret = dwarf_siblingof(cup->cu_dwarf, base, sibp, &derr);
664 	if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY)
665 		return (0);
666 
667 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
668 	    "failed to sibling from die: %s\n",
669 	    dwarf_errmsg(derr));
670 	return (ECTF_CONVBKERR);
671 }
672 
673 static int
674 ctf_dwarf_child(ctf_cu_t *cup, Dwarf_Die base, Dwarf_Die *childp)
675 {
676 	Dwarf_Error derr;
677 	int ret;
678 
679 	*childp = NULL;
680 	ret = dwarf_child(base, childp, &derr);
681 	if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY)
682 		return (0);
683 
684 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
685 	    "failed to child from die: %s\n",
686 	    dwarf_errmsg(derr));
687 	return (ECTF_CONVBKERR);
688 }
689 
690 /*
691  * Compilers disagree on what to do to determine if something has global
692  * visiblity. Traditionally gcc has used DW_AT_external to indicate this while
693  * Studio has used DW_AT_visibility. We check DW_AT_visibility first and then
694  * fall back to DW_AT_external. Lack of DW_AT_external implies that it is not.
695  */
696 static int
697 ctf_dwarf_isglobal(ctf_cu_t *cup, Dwarf_Die die, boolean_t *igp)
698 {
699 	int ret;
700 	Dwarf_Signed vis;
701 	Dwarf_Bool ext;
702 
703 	if ((ret = ctf_dwarf_signed(cup, die, DW_AT_visibility, &vis)) == 0) {
704 		*igp = vis == DW_VIS_exported;
705 		return (0);
706 	} else if (ret != ENOENT) {
707 		return (ret);
708 	}
709 
710 	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_external, &ext)) != 0) {
711 		if (ret == ENOENT) {
712 			*igp = B_FALSE;
713 			return (0);
714 		}
715 		return (ret);
716 	}
717 	*igp = ext != 0 ? B_TRUE : B_FALSE;
718 	return (0);
719 }
720 
721 static int
722 ctf_dwarf_die_elfenc(Elf *elf, ctf_cu_t *cup, char *errbuf, size_t errlen)
723 {
724 	GElf_Ehdr ehdr;
725 
726 	if (gelf_getehdr(elf, &ehdr) == NULL) {
727 		(void) snprintf(errbuf, errlen,
728 		    "failed to get ELF header: %s\n",
729 		    elf_errmsg(elf_errno()));
730 		return (ECTF_CONVBKERR);
731 	}
732 
733 	cup->cu_mach = ehdr.e_machine;
734 
735 	if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
736 		cup->cu_ptrsz = 4;
737 		VERIFY(ctf_setmodel(cup->cu_ctfp, CTF_MODEL_ILP32) == 0);
738 	} else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
739 		cup->cu_ptrsz = 8;
740 		VERIFY(ctf_setmodel(cup->cu_ctfp, CTF_MODEL_LP64) == 0);
741 	} else {
742 		(void) snprintf(errbuf, errlen,
743 		    "unknown ELF class %d", ehdr.e_ident[EI_CLASS]);
744 		return (ECTF_CONVBKERR);
745 	}
746 
747 	if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB) {
748 		cup->cu_bigend = B_FALSE;
749 	} else if (ehdr.e_ident[EI_DATA] == ELFDATA2MSB) {
750 		cup->cu_bigend = B_TRUE;
751 	} else {
752 		(void) snprintf(errbuf, errlen,
753 		    "unknown ELF data encoding: %hhu", ehdr.e_ident[EI_DATA]);
754 		return (ECTF_CONVBKERR);
755 	}
756 
757 	return (0);
758 }
759 
760 typedef struct ctf_dwarf_fpent {
761 	size_t	cdfe_size;
762 	uint_t	cdfe_enc[3];
763 } ctf_dwarf_fpent_t;
764 
765 typedef struct ctf_dwarf_fpmap {
766 	uint_t			cdf_mach;
767 	ctf_dwarf_fpent_t	cdf_ents[4];
768 } ctf_dwarf_fpmap_t;
769 
770 static const ctf_dwarf_fpmap_t ctf_dwarf_fpmaps[] = {
771 	{ EM_SPARC, {
772 		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
773 		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
774 		{ 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
775 		{ 0, { 0 } }
776 	} },
777 	{ EM_SPARC32PLUS, {
778 		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
779 		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
780 		{ 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
781 		{ 0, { 0 } }
782 	} },
783 	{ EM_SPARCV9, {
784 		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
785 		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
786 		{ 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
787 		{ 0, { 0 } }
788 	} },
789 	{ EM_386, {
790 		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
791 		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
792 		{ 12, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
793 		{ 0, { 0 } }
794 	} },
795 	{ EM_X86_64, {
796 		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
797 		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
798 		{ 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
799 		{ 0, { 0 } }
800 	} },
801 	{ EM_NONE }
802 };
803 
804 /*
805  * We want to normalize the type names that are used between compilers in the
806  * case of complex. gcc prefixes things with types like 'long complex' where as
807  * clang only calls them 'complex' in the dwarf even if in the C they are long
808  * complex or similar.
809  */
810 static int
811 ctf_dwarf_fixup_complex(ctf_cu_t *cup, ctf_encoding_t *enc, char **namep)
812 {
813 	const char *name;
814 	*namep = NULL;
815 
816 	switch (enc->cte_format) {
817 	case CTF_FP_CPLX:
818 		name = "complex float";
819 		break;
820 	case CTF_FP_DCPLX:
821 		name = "complex double";
822 		break;
823 	case CTF_FP_LDCPLX:
824 		name = "complex long double";
825 		break;
826 	default:
827 		return (0);
828 	}
829 
830 	*namep = ctf_strdup(name);
831 	if (*namep == NULL) {
832 		return (ENOMEM);
833 	}
834 
835 	return (0);
836 }
837 
838 static int
839 ctf_dwarf_float_base(ctf_cu_t *cup, Dwarf_Signed type, ctf_encoding_t *enc)
840 {
841 	const ctf_dwarf_fpmap_t *map = &ctf_dwarf_fpmaps[0];
842 	const ctf_dwarf_fpent_t *ent;
843 	uint_t col = 0, mult = 1;
844 
845 	for (map = &ctf_dwarf_fpmaps[0]; map->cdf_mach != EM_NONE; map++) {
846 		if (map->cdf_mach == cup->cu_mach)
847 			break;
848 	}
849 
850 	if (map->cdf_mach == EM_NONE) {
851 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
852 		    "Unsupported machine type: %d\n", cup->cu_mach);
853 		return (ENOTSUP);
854 	}
855 
856 	if (type == DW_ATE_complex_float) {
857 		mult = 2;
858 		col = 1;
859 	} else if (type == DW_ATE_imaginary_float ||
860 	    type == DW_ATE_SUN_imaginary_float) {
861 		col = 2;
862 	}
863 
864 	ent = &map->cdf_ents[0];
865 	for (ent = &map->cdf_ents[0]; ent->cdfe_size != 0; ent++) {
866 		if (ent->cdfe_size * mult * 8 == enc->cte_bits) {
867 			enc->cte_format = ent->cdfe_enc[col];
868 			return (0);
869 		}
870 	}
871 
872 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
873 	    "failed to find valid fp mapping for encoding %d, size %d bits\n",
874 	    type, enc->cte_bits);
875 	return (EINVAL);
876 }
877 
878 static int
879 ctf_dwarf_dwarf_base(ctf_cu_t *cup, Dwarf_Die die, int *kindp,
880     ctf_encoding_t *enc)
881 {
882 	int ret;
883 	Dwarf_Signed type;
884 
885 	if ((ret = ctf_dwarf_signed(cup, die, DW_AT_encoding, &type)) != 0)
886 		return (ret);
887 
888 	switch (type) {
889 	case DW_ATE_unsigned:
890 	case DW_ATE_address:
891 		*kindp = CTF_K_INTEGER;
892 		enc->cte_format = 0;
893 		break;
894 	case DW_ATE_unsigned_char:
895 		*kindp = CTF_K_INTEGER;
896 		enc->cte_format = CTF_INT_CHAR;
897 		break;
898 	case DW_ATE_signed:
899 		*kindp = CTF_K_INTEGER;
900 		enc->cte_format = CTF_INT_SIGNED;
901 		break;
902 	case DW_ATE_signed_char:
903 		*kindp = CTF_K_INTEGER;
904 		enc->cte_format = CTF_INT_SIGNED | CTF_INT_CHAR;
905 		break;
906 	case DW_ATE_boolean:
907 		*kindp = CTF_K_INTEGER;
908 		enc->cte_format = CTF_INT_SIGNED | CTF_INT_BOOL;
909 		break;
910 	case DW_ATE_float:
911 	case DW_ATE_complex_float:
912 	case DW_ATE_imaginary_float:
913 	case DW_ATE_SUN_imaginary_float:
914 	case DW_ATE_SUN_interval_float:
915 		*kindp = CTF_K_FLOAT;
916 		if ((ret = ctf_dwarf_float_base(cup, type, enc)) != 0)
917 			return (ret);
918 		break;
919 	default:
920 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
921 		    "encountered unknown DWARF encoding: %d", type);
922 		return (ECTF_CONVBKERR);
923 	}
924 
925 	return (0);
926 }
927 
928 /*
929  * Different compilers (at least GCC and Studio) use different names for types.
930  * This parses the types and attempts to unify them. If this fails, we just fall
931  * back to using the DWARF itself.
932  */
933 static int
934 ctf_dwarf_parse_int(const char *name, int *kindp, ctf_encoding_t *enc,
935     char **newnamep)
936 {
937 	char buf[256];
938 	char *base, *c, *last;
939 	int nlong = 0, nshort = 0, nchar = 0, nint = 0;
940 	int sign = 1;
941 
942 	if (strlen(name) + 1 > sizeof (buf))
943 		return (EINVAL);
944 
945 	(void) strlcpy(buf, name, sizeof (buf));
946 	for (c = strtok_r(buf, " ", &last); c != NULL;
947 	    c = strtok_r(NULL, " ", &last)) {
948 		if (strcmp(c, "signed") == 0) {
949 			sign = 1;
950 		} else if (strcmp(c, "unsigned") == 0) {
951 			sign = 0;
952 		} else if (strcmp(c, "long") == 0) {
953 			nlong++;
954 		} else if (strcmp(c, "char") == 0) {
955 			nchar++;
956 		} else if (strcmp(c, "short") == 0) {
957 			nshort++;
958 		} else if (strcmp(c, "int") == 0) {
959 			nint++;
960 		} else {
961 			/*
962 			 * If we don't recognize any of the tokens, we'll tell
963 			 * the caller to fall back to the dwarf-provided
964 			 * encoding information.
965 			 */
966 			return (EINVAL);
967 		}
968 	}
969 
970 	if (nchar > 1 || nshort > 1 || nint > 1 || nlong > 2)
971 		return (EINVAL);
972 
973 	if (nchar > 0) {
974 		if (nlong > 0 || nshort > 0 || nint > 0)
975 			return (EINVAL);
976 		base = "char";
977 	} else if (nshort > 0) {
978 		if (nlong > 0)
979 			return (EINVAL);
980 		base = "short";
981 	} else if (nlong > 0) {
982 		base = "long";
983 	} else {
984 		base = "int";
985 	}
986 
987 	if (nchar > 0)
988 		enc->cte_format = CTF_INT_CHAR;
989 	else
990 		enc->cte_format = 0;
991 
992 	if (sign > 0)
993 		enc->cte_format |= CTF_INT_SIGNED;
994 
995 	(void) snprintf(buf, sizeof (buf), "%s%s%s",
996 	    (sign ? "" : "unsigned "),
997 	    (nlong > 1 ? "long " : ""),
998 	    base);
999 
1000 	*newnamep = ctf_strdup(buf);
1001 	if (*newnamep == NULL)
1002 		return (ENOMEM);
1003 	*kindp = CTF_K_INTEGER;
1004 	return (0);
1005 }
1006 
1007 static int
1008 ctf_dwarf_create_base(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot,
1009     Dwarf_Off off)
1010 {
1011 	int ret;
1012 	char *name, *nname = NULL;
1013 	Dwarf_Unsigned sz;
1014 	int kind;
1015 	ctf_encoding_t enc;
1016 	ctf_id_t id;
1017 
1018 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0)
1019 		return (ret);
1020 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &sz)) != 0) {
1021 		goto out;
1022 	}
1023 	ctf_dprintf("Creating base type %s from off %llu, size: %d\n", name,
1024 	    off, sz);
1025 
1026 	bzero(&enc, sizeof (ctf_encoding_t));
1027 	enc.cte_bits = sz * 8;
1028 	if ((ret = ctf_dwarf_parse_int(name, &kind, &enc, &nname)) == 0) {
1029 		ctf_free(name, strlen(name) + 1);
1030 		name = nname;
1031 	} else {
1032 		if (ret != EINVAL) {
1033 			goto out;
1034 		}
1035 		ctf_dprintf("falling back to dwarf for base type %s\n", name);
1036 		if ((ret = ctf_dwarf_dwarf_base(cup, die, &kind, &enc)) != 0) {
1037 			goto out;
1038 		}
1039 
1040 		if (kind == CTF_K_FLOAT && (ret = ctf_dwarf_fixup_complex(cup,
1041 		    &enc, &nname)) != 0) {
1042 			goto out;
1043 		} else if (nname != NULL) {
1044 			ctf_free(name, strlen(name) + 1);
1045 			name = nname;
1046 		}
1047 	}
1048 
1049 	id = ctf_add_encoded(cup->cu_ctfp, isroot, name, &enc, kind);
1050 	if (id == CTF_ERR) {
1051 		ret = ctf_errno(cup->cu_ctfp);
1052 	} else {
1053 		*idp = id;
1054 		ret = ctf_dwmap_add(cup, id, die, B_FALSE);
1055 	}
1056 out:
1057 	ctf_free(name, strlen(name) + 1);
1058 	return (ret);
1059 }
1060 
1061 /*
1062  * Getting a member's offset is a surprisingly intricate dance. It works as
1063  * follows:
1064  *
1065  * 1) If we're in DWARFv4, then we either have a DW_AT_data_bit_offset or we
1066  * have a DW_AT_data_member_location. We won't have both. Thus we check first
1067  * for DW_AT_data_bit_offset, and if it exists, we're set.
1068  *
1069  * Next, if we have a bitfield and we don't have a DW_AT_data_bit_offset, then
1070  * we have to grab the data location and use the following dance:
1071  *
1072  * 2) Gather the set of DW_AT_byte_size, DW_AT_bit_offset, and DW_AT_bit_size.
1073  * Of course, the DW_AT_byte_size may be omitted, even though it isn't always.
1074  * When it's been omitted, we then have to say that the size is that of the
1075  * underlying type, which forces that to be after a ctf_update(). Here, we have
1076  * to do different things based on whether or not we're using big endian or
1077  * little endian to obtain the proper offset.
1078  */
1079 static int
1080 ctf_dwarf_member_offset(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t mid,
1081     ulong_t *offp)
1082 {
1083 	int ret;
1084 	Dwarf_Unsigned loc, bitsz, bytesz;
1085 	Dwarf_Signed bitoff;
1086 	size_t off;
1087 	ssize_t tsz;
1088 
1089 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_data_bit_offset,
1090 	    &loc)) == 0) {
1091 		*offp = loc;
1092 		return (0);
1093 	} else if (ret != ENOENT) {
1094 		return (ret);
1095 	}
1096 
1097 	if ((ret = ctf_dwarf_member_location(cup, die, &loc)) != 0)
1098 		return (ret);
1099 	off = loc * 8;
1100 
1101 	if ((ret = ctf_dwarf_signed(cup, die, DW_AT_bit_offset,
1102 	    &bitoff)) != 0) {
1103 		if (ret != ENOENT)
1104 			return (ret);
1105 		*offp = off;
1106 		return (0);
1107 	}
1108 
1109 	/* At this point we have to have DW_AT_bit_size */
1110 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_bit_size, &bitsz)) != 0)
1111 		return (ret);
1112 
1113 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size,
1114 	    &bytesz)) != 0) {
1115 		if (ret != ENOENT)
1116 			return (ret);
1117 		if ((tsz = ctf_type_size(cup->cu_ctfp, mid)) == CTF_ERR) {
1118 			int e = ctf_errno(cup->cu_ctfp);
1119 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1120 			    "failed to get type size: %s", ctf_errmsg(e));
1121 			return (ECTF_CONVBKERR);
1122 		}
1123 	} else {
1124 		tsz = bytesz;
1125 	}
1126 	tsz *= 8;
1127 	if (cup->cu_bigend == B_TRUE) {
1128 		*offp = off + bitoff;
1129 	} else {
1130 		*offp = off + tsz - bitoff - bitsz;
1131 	}
1132 
1133 	return (0);
1134 }
1135 
1136 /*
1137  * We need to determine if the member in question is a bitfield. If it is, then
1138  * we need to go through and create a new type that's based on the actual base
1139  * type, but has a different size. We also rename the type as a result to help
1140  * deal with future collisions.
1141  *
1142  * Here we need to look and see if we have a DW_AT_bit_size value. If we have a
1143  * bit size member and it does not equal the byte size member, then we need to
1144  * create a bitfield type based on this.
1145  *
1146  * Note: When we support DWARFv4, there may be a chance that we need to also
1147  * search for the DW_AT_byte_size if we don't have a DW_AT_bit_size member.
1148  */
1149 static int
1150 ctf_dwarf_member_bitfield(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp)
1151 {
1152 	int ret;
1153 	Dwarf_Unsigned bitsz;
1154 	ctf_encoding_t e;
1155 	ctf_dwbitf_t *cdb;
1156 	ctf_dtdef_t *dtd;
1157 	ctf_id_t base = *idp;
1158 	int kind;
1159 
1160 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_bit_size, &bitsz)) != 0) {
1161 		if (ret == ENOENT)
1162 			return (0);
1163 		return (ret);
1164 	}
1165 
1166 	ctf_dprintf("Trying to deal with bitfields on %d:%d\n", base, bitsz);
1167 	/*
1168 	 * Given that we now have a bitsize, time to go do something about it.
1169 	 * We're going to create a new type based on the current one, but first
1170 	 * we need to find the base type. This means we need to traverse any
1171 	 * typedef's, consts, and volatiles until we get to what should be
1172 	 * something of type integer or enumeration.
1173 	 */
1174 	VERIFY(bitsz < UINT32_MAX);
1175 	dtd = ctf_dtd_lookup(cup->cu_ctfp, base);
1176 	VERIFY(dtd != NULL);
1177 	kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info);
1178 	while (kind == CTF_K_TYPEDEF || kind == CTF_K_CONST ||
1179 	    kind == CTF_K_VOLATILE) {
1180 		dtd = ctf_dtd_lookup(cup->cu_ctfp, dtd->dtd_data.ctt_type);
1181 		VERIFY(dtd != NULL);
1182 		kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info);
1183 	}
1184 	ctf_dprintf("got kind %d\n", kind);
1185 	VERIFY(kind == CTF_K_INTEGER || kind == CTF_K_ENUM);
1186 
1187 	/*
1188 	 * As surprising as it may be, it is strictly possible to create a
1189 	 * bitfield that is based on an enum. Of course, the C standard leaves
1190 	 * enums sizing as an ABI concern more or less. To that effect, today on
1191 	 * all illumos platforms the size of an enum is generally that of an
1192 	 * int as our supported data models and ABIs all agree on that. So what
1193 	 * we'll do is fake up a CTF encoding here to use. In this case, we'll
1194 	 * treat it as an unsigned value of whatever size the underlying enum
1195 	 * currently has (which is in the ctt_size member of its dynamic type
1196 	 * data).
1197 	 */
1198 	if (kind == CTF_K_INTEGER) {
1199 		e = dtd->dtd_u.dtu_enc;
1200 	} else {
1201 		bzero(&e, sizeof (ctf_encoding_t));
1202 		e.cte_bits = dtd->dtd_data.ctt_size * NBBY;
1203 	}
1204 
1205 	for (cdb = ctf_list_next(&cup->cu_bitfields); cdb != NULL;
1206 	    cdb = ctf_list_next(cdb)) {
1207 		if (cdb->cdb_base == base && cdb->cdb_nbits == bitsz)
1208 			break;
1209 	}
1210 
1211 	/*
1212 	 * Create a new type if none exists. We name all types in a way that is
1213 	 * guaranteed not to conflict with the corresponding C type. We do this
1214 	 * by using the ':' operator.
1215 	 */
1216 	if (cdb == NULL) {
1217 		size_t namesz;
1218 		char *name;
1219 
1220 		e.cte_bits = bitsz;
1221 		namesz = snprintf(NULL, 0, "%s:%d", dtd->dtd_name,
1222 		    (uint32_t)bitsz);
1223 		name = ctf_alloc(namesz + 1);
1224 		if (name == NULL)
1225 			return (ENOMEM);
1226 		cdb = ctf_alloc(sizeof (ctf_dwbitf_t));
1227 		if (cdb == NULL) {
1228 			ctf_free(name, namesz + 1);
1229 			return (ENOMEM);
1230 		}
1231 		(void) snprintf(name, namesz + 1, "%s:%d", dtd->dtd_name,
1232 		    (uint32_t)bitsz);
1233 
1234 		cdb->cdb_base = base;
1235 		cdb->cdb_nbits = bitsz;
1236 		cdb->cdb_id = ctf_add_integer(cup->cu_ctfp, CTF_ADD_NONROOT,
1237 		    name, &e);
1238 		if (cdb->cdb_id == CTF_ERR) {
1239 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1240 			    "failed to get add bitfield type %s: %s", name,
1241 			    ctf_errmsg(ctf_errno(cup->cu_ctfp)));
1242 			ctf_free(name, namesz + 1);
1243 			ctf_free(cdb, sizeof (ctf_dwbitf_t));
1244 			return (ECTF_CONVBKERR);
1245 		}
1246 		ctf_free(name, namesz + 1);
1247 		ctf_list_append(&cup->cu_bitfields, cdb);
1248 	}
1249 
1250 	*idp = cdb->cdb_id;
1251 
1252 	return (0);
1253 }
1254 
1255 static int
1256 ctf_dwarf_fixup_sou(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t base, boolean_t add)
1257 {
1258 	int ret, kind;
1259 	Dwarf_Die child, memb;
1260 	Dwarf_Unsigned size;
1261 
1262 	kind = ctf_type_kind(cup->cu_ctfp, base);
1263 	VERIFY(kind != CTF_ERR);
1264 	VERIFY(kind == CTF_K_STRUCT || kind == CTF_K_UNION);
1265 
1266 	/*
1267 	 * Members are in children. However, gcc also allows empty ones.
1268 	 */
1269 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
1270 		return (ret);
1271 	if (child == NULL)
1272 		return (0);
1273 
1274 	memb = child;
1275 	while (memb != NULL) {
1276 		Dwarf_Die sib, tdie;
1277 		Dwarf_Half tag;
1278 		ctf_id_t mid;
1279 		char *mname;
1280 		ulong_t memboff = 0;
1281 
1282 		if ((ret = ctf_dwarf_tag(cup, memb, &tag)) != 0)
1283 			return (ret);
1284 
1285 		if (tag != DW_TAG_member)
1286 			goto next;
1287 
1288 		if ((ret = ctf_dwarf_refdie(cup, memb, DW_AT_type, &tdie)) != 0)
1289 			return (ret);
1290 
1291 		if ((ret = ctf_dwarf_convert_type(cup, tdie, &mid,
1292 		    CTF_ADD_NONROOT)) != 0)
1293 			return (ret);
1294 		ctf_dprintf("Got back type id: %d\n", mid);
1295 
1296 		/*
1297 		 * If we're not adding a member, just go ahead and return.
1298 		 */
1299 		if (add == B_FALSE) {
1300 			if ((ret = ctf_dwarf_member_bitfield(cup, memb,
1301 			    &mid)) != 0)
1302 				return (ret);
1303 			goto next;
1304 		}
1305 
1306 		if ((ret = ctf_dwarf_string(cup, memb, DW_AT_name,
1307 		    &mname)) != 0 && ret != ENOENT)
1308 			return (ret);
1309 		if (ret == ENOENT)
1310 			mname = NULL;
1311 
1312 		if (kind == CTF_K_UNION) {
1313 			memboff = 0;
1314 		} else if ((ret = ctf_dwarf_member_offset(cup, memb, mid,
1315 		    &memboff)) != 0) {
1316 			if (mname != NULL)
1317 				ctf_free(mname, strlen(mname) + 1);
1318 			return (ret);
1319 		}
1320 
1321 		if ((ret = ctf_dwarf_member_bitfield(cup, memb, &mid)) != 0)
1322 			return (ret);
1323 
1324 		ret = ctf_add_member(cup->cu_ctfp, base, mname, mid, memboff);
1325 		if (ret == CTF_ERR) {
1326 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1327 			    "failed to add member %s: %s",
1328 			    mname, ctf_errmsg(ctf_errno(cup->cu_ctfp)));
1329 			if (mname != NULL)
1330 				ctf_free(mname, strlen(mname) + 1);
1331 			return (ECTF_CONVBKERR);
1332 		}
1333 
1334 		if (mname != NULL)
1335 			ctf_free(mname, strlen(mname) + 1);
1336 
1337 next:
1338 		if ((ret = ctf_dwarf_sib(cup, memb, &sib)) != 0)
1339 			return (ret);
1340 		memb = sib;
1341 	}
1342 
1343 	/*
1344 	 * If we're not adding members, then we don't know the final size of the
1345 	 * structure, so end here.
1346 	 */
1347 	if (add == B_FALSE)
1348 		return (0);
1349 
1350 	/* Finally set the size of the structure to the actual byte size */
1351 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &size)) != 0)
1352 		return (ret);
1353 	if ((ctf_set_size(cup->cu_ctfp, base, size)) == CTF_ERR) {
1354 		int e = ctf_errno(cup->cu_ctfp);
1355 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1356 		    "failed to set type size for %d to 0x%x: %s", base,
1357 		    (uint32_t)size, ctf_errmsg(e));
1358 		return (ECTF_CONVBKERR);
1359 	}
1360 
1361 	return (0);
1362 }
1363 
1364 static int
1365 ctf_dwarf_create_sou(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp,
1366     int kind, int isroot)
1367 {
1368 	int ret;
1369 	char *name;
1370 	ctf_id_t base;
1371 	Dwarf_Die child;
1372 	Dwarf_Bool decl;
1373 
1374 	/*
1375 	 * Deal with the terribly annoying case of anonymous structs and unions.
1376 	 * If they don't have a name, set the name to the empty string.
1377 	 */
1378 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
1379 	    ret != ENOENT)
1380 		return (ret);
1381 	if (ret == ENOENT)
1382 		name = NULL;
1383 
1384 	/*
1385 	 * We need to check if we just have a declaration here. If we do, then
1386 	 * instead of creating an actual structure or union, we're just going to
1387 	 * go ahead and create a forward. During a dedup or merge, the forward
1388 	 * will be replaced with the real thing.
1389 	 */
1390 	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration,
1391 	    &decl)) != 0) {
1392 		if (ret != ENOENT)
1393 			return (ret);
1394 		decl = 0;
1395 	}
1396 
1397 	if (decl != 0) {
1398 		base = ctf_add_forward(cup->cu_ctfp, isroot, name, kind);
1399 	} else if (kind == CTF_K_STRUCT) {
1400 		base = ctf_add_struct(cup->cu_ctfp, isroot, name);
1401 	} else {
1402 		base = ctf_add_union(cup->cu_ctfp, isroot, name);
1403 	}
1404 	ctf_dprintf("added sou %s (%d) (%d)\n", name, kind, base);
1405 	if (name != NULL)
1406 		ctf_free(name, strlen(name) + 1);
1407 	if (base == CTF_ERR)
1408 		return (ctf_errno(cup->cu_ctfp));
1409 	*idp = base;
1410 
1411 	/*
1412 	 * If it's just a declaration, we're not going to mark it for fix up or
1413 	 * do anything else.
1414 	 */
1415 	if (decl == B_TRUE)
1416 		return (ctf_dwmap_add(cup, base, die, B_FALSE));
1417 	if ((ret = ctf_dwmap_add(cup, base, die, B_TRUE)) != 0)
1418 		return (ret);
1419 
1420 	/*
1421 	 * The children of a structure or union are generally members. However,
1422 	 * some compilers actually insert structs and unions there and not as a
1423 	 * top-level die. Therefore, to make sure we honor our pass 1 contract
1424 	 * of having all the base types, but not members, we need to walk this
1425 	 * for instances of a DW_TAG_union_type.
1426 	 */
1427 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
1428 		return (ret);
1429 
1430 	while (child != NULL) {
1431 		Dwarf_Half tag;
1432 		Dwarf_Die sib;
1433 
1434 		if ((ret = ctf_dwarf_tag(cup, child, &tag)) != 0)
1435 			return (ret);
1436 
1437 		switch (tag) {
1438 		case DW_TAG_union_type:
1439 		case DW_TAG_structure_type:
1440 			ret = ctf_dwarf_convert_type(cup, child, NULL,
1441 			    CTF_ADD_NONROOT);
1442 			if (ret != 0) {
1443 				return (ret);
1444 			}
1445 			break;
1446 		default:
1447 			break;
1448 		}
1449 
1450 		if ((ret = ctf_dwarf_sib(cup, child, &sib)) != 0)
1451 			return (ret);
1452 		child = sib;
1453 	}
1454 
1455 	return (0);
1456 }
1457 
1458 static int
1459 ctf_dwarf_array_upper_bound(ctf_cu_t *cup, Dwarf_Die range, ctf_arinfo_t *ar)
1460 {
1461 	Dwarf_Attribute attr;
1462 	Dwarf_Unsigned uval;
1463 	Dwarf_Signed sval;
1464 	Dwarf_Half form;
1465 	Dwarf_Error derr;
1466 	const char *formstr = NULL;
1467 	uint_t adj = 0;
1468 	int ret = 0;
1469 
1470 	ctf_dprintf("setting array upper bound\n");
1471 
1472 	ar->ctr_nelems = 0;
1473 
1474 	/*
1475 	 * Different compilers use different attributes to indicate the size of
1476 	 * an array. GCC has traditionally used DW_AT_upper_bound, while Clang
1477 	 * uses DW_AT_count. They have slightly different semantics. DW_AT_count
1478 	 * indicates the total number of elements that are present, while
1479 	 * DW_AT_upper_bound indicates the last index, hence we need to add one
1480 	 * to that index to get the count.
1481 	 *
1482 	 * We first search for DW_AT_count and then for DW_AT_upper_bound. If we
1483 	 * find neither, then we treat the lack of this as a zero element array.
1484 	 * Our value is initialized assuming we find a DW_AT_count value.
1485 	 */
1486 	ret = ctf_dwarf_attribute(cup, range, DW_AT_count, &attr);
1487 	if (ret != 0 && ret != ENOENT) {
1488 		return (ret);
1489 	} else if (ret == ENOENT) {
1490 		ret = ctf_dwarf_attribute(cup, range, DW_AT_upper_bound, &attr);
1491 		if (ret != 0 && ret != ENOENT) {
1492 			return (ret);
1493 		} else if (ret == ENOENT) {
1494 			return (0);
1495 		} else {
1496 			adj = 1;
1497 		}
1498 	}
1499 
1500 	if (dwarf_whatform(attr, &form, &derr) != DW_DLV_OK) {
1501 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1502 		    "failed to get DW_AT_upper_bound attribute form: %s\n",
1503 		    dwarf_errmsg(derr));
1504 		ret = ECTF_CONVBKERR;
1505 		goto done;
1506 	}
1507 
1508 	/*
1509 	 * Compilers can indicate array bounds using signed or unsigned values.
1510 	 * Additionally, some compilers may also store the array bounds
1511 	 * using as DW_FORM_data{1,2,4,8} (which DWARF treats as raw data and
1512 	 * expects the caller to understand how to interpret the value).
1513 	 *
1514 	 * GCC 4.4.4 appears to always use unsigned values to encode the
1515 	 * array size (using '(unsigned)-1' to represent a zero-length or
1516 	 * unknown length array). Later versions of GCC use a signed value of
1517 	 * -1 for zero/unknown length arrays, and unsigned values to encode
1518 	 * known array sizes.
1519 	 *
1520 	 * Both dwarf_formsdata() and dwarf_formudata() will retrieve values
1521 	 * as their respective signed/unsigned forms, but both will also
1522 	 * retreive DW_FORM_data{1,2,4,8} values and treat them as signed or
1523 	 * unsigned integers (i.e. dwarf_formsdata() treats DW_FORM_dataXX
1524 	 * as signed integers and dwarf_formudata() treats DW_FORM_dataXX as
1525 	 * unsigned integers). Both will return an error if the form is not
1526 	 * their respective signed/unsigned form, or DW_FORM_dataXX.
1527 	 *
1528 	 * To obtain the upper bound, we use the appropriate
1529 	 * dwarf_form[su]data() function based on the form of DW_AT_upper_bound.
1530 	 * Additionally, we let dwarf_formudata() handle the DW_FORM_dataXX
1531 	 * forms (via the default option in the switch). If the value is in an
1532 	 * unexpected form (i.e. not DW_FORM_udata or DW_FORM_dataXX),
1533 	 * dwarf_formudata() will return failure (i.e. not DW_DLV_OK) and set
1534 	 * derr with the specific error value.
1535 	 */
1536 	switch (form) {
1537 	case DW_FORM_sdata:
1538 		if (dwarf_formsdata(attr, &sval, &derr) == DW_DLV_OK) {
1539 			ar->ctr_nelems = sval + adj;
1540 			goto done;
1541 		}
1542 		break;
1543 	case DW_FORM_udata:
1544 	default:
1545 		if (dwarf_formudata(attr, &uval, &derr) == DW_DLV_OK) {
1546 			ar->ctr_nelems = uval + adj;
1547 			goto done;
1548 		}
1549 		break;
1550 	}
1551 
1552 	if (dwarf_get_FORM_name(form, &formstr) != DW_DLV_OK)
1553 		formstr = "unknown DWARF form";
1554 
1555 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1556 	    "failed to get %s (%hu) value for DW_AT_upper_bound: %s\n",
1557 	    formstr, form, dwarf_errmsg(derr));
1558 	ret = ECTF_CONVBKERR;
1559 
1560 done:
1561 	dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
1562 	return (ret);
1563 }
1564 
1565 static int
1566 ctf_dwarf_create_array_range(ctf_cu_t *cup, Dwarf_Die range, ctf_id_t *idp,
1567     ctf_id_t base, int isroot)
1568 {
1569 	int ret;
1570 	Dwarf_Die sib;
1571 	ctf_arinfo_t ar;
1572 
1573 	ctf_dprintf("creating array range\n");
1574 
1575 	if ((ret = ctf_dwarf_sib(cup, range, &sib)) != 0)
1576 		return (ret);
1577 	if (sib != NULL) {
1578 		ctf_id_t id;
1579 		if ((ret = ctf_dwarf_create_array_range(cup, sib, &id,
1580 		    base, CTF_ADD_NONROOT)) != 0)
1581 			return (ret);
1582 		ar.ctr_contents = id;
1583 	} else {
1584 		ar.ctr_contents = base;
1585 	}
1586 
1587 	if ((ar.ctr_index = ctf_dwarf_long(cup)) == CTF_ERR)
1588 		return (ctf_errno(cup->cu_ctfp));
1589 
1590 	if ((ret = ctf_dwarf_array_upper_bound(cup, range, &ar)) != 0)
1591 		return (ret);
1592 
1593 	if ((*idp = ctf_add_array(cup->cu_ctfp, isroot, &ar)) == CTF_ERR)
1594 		return (ctf_errno(cup->cu_ctfp));
1595 
1596 	return (0);
1597 }
1598 
1599 /*
1600  * Try and create an array type. First, the kind of the array is specified in
1601  * the DW_AT_type entry. Next, the number of entries is stored in a more
1602  * complicated form, we should have a child that has the DW_TAG_subrange type.
1603  */
1604 static int
1605 ctf_dwarf_create_array(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot)
1606 {
1607 	int ret;
1608 	Dwarf_Die tdie, rdie;
1609 	ctf_id_t tid;
1610 	Dwarf_Half rtag;
1611 
1612 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0)
1613 		return (ret);
1614 	if ((ret = ctf_dwarf_convert_type(cup, tdie, &tid,
1615 	    CTF_ADD_NONROOT)) != 0)
1616 		return (ret);
1617 
1618 	if ((ret = ctf_dwarf_child(cup, die, &rdie)) != 0)
1619 		return (ret);
1620 	if ((ret = ctf_dwarf_tag(cup, rdie, &rtag)) != 0)
1621 		return (ret);
1622 	if (rtag != DW_TAG_subrange_type) {
1623 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1624 		    "encountered array without DW_TAG_subrange_type child\n");
1625 		return (ECTF_CONVBKERR);
1626 	}
1627 
1628 	/*
1629 	 * The compiler may opt to describe a multi-dimensional array as one
1630 	 * giant array or it may opt to instead encode it as a series of
1631 	 * subranges. If it's the latter, then for each subrange we introduce a
1632 	 * type. We can always use the base type.
1633 	 */
1634 	if ((ret = ctf_dwarf_create_array_range(cup, rdie, idp, tid,
1635 	    isroot)) != 0)
1636 		return (ret);
1637 	ctf_dprintf("Got back id %d\n", *idp);
1638 	return (ctf_dwmap_add(cup, *idp, die, B_FALSE));
1639 }
1640 
1641 /*
1642  * Given "const int const_array3[11]", GCC7 at least will create a DIE tree of
1643  * DW_TAG_const_type:DW_TAG_array_type:DW_Tag_const_type:<member_type>.
1644  *
1645  * Given C's syntax, this renders out as "const const int const_array3[11]".  To
1646  * get closer to round-tripping (and make the unit tests work), we'll peek for
1647  * this case, and avoid adding the extraneous qualifier if we see that the
1648  * underlying array referent already has the same qualifier.
1649  *
1650  * This is unfortunately less trivial than it could be: this issue applies to
1651  * qualifier sets like "const volatile", as well as multi-dimensional arrays, so
1652  * we need to descend down those.
1653  *
1654  * Returns CTF_ERR on error, or a boolean value otherwise.
1655  */
1656 static int
1657 needed_array_qualifier(ctf_cu_t *cup, int kind, ctf_id_t ref_id)
1658 {
1659 	const ctf_type_t *t;
1660 	ctf_arinfo_t arinfo;
1661 	int akind;
1662 
1663 	if (kind != CTF_K_CONST && kind != CTF_K_VOLATILE &&
1664 	    kind != CTF_K_RESTRICT)
1665 		return (1);
1666 
1667 	if ((t = ctf_dyn_lookup_by_id(cup->cu_ctfp, ref_id)) == NULL)
1668 		return (CTF_ERR);
1669 
1670 	if (LCTF_INFO_KIND(cup->cu_ctfp, t->ctt_info) != CTF_K_ARRAY)
1671 		return (1);
1672 
1673 	if (ctf_dyn_array_info(cup->cu_ctfp, ref_id, &arinfo) != 0)
1674 		return (CTF_ERR);
1675 
1676 	ctf_id_t id = arinfo.ctr_contents;
1677 
1678 	for (;;) {
1679 		if ((t = ctf_dyn_lookup_by_id(cup->cu_ctfp, id)) == NULL)
1680 			return (CTF_ERR);
1681 
1682 		akind = LCTF_INFO_KIND(cup->cu_ctfp, t->ctt_info);
1683 
1684 		if (akind == kind)
1685 			break;
1686 
1687 		if (akind == CTF_K_ARRAY) {
1688 			if (ctf_dyn_array_info(cup->cu_ctfp,
1689 			    id, &arinfo) != 0)
1690 				return (CTF_ERR);
1691 			id = arinfo.ctr_contents;
1692 			continue;
1693 		}
1694 
1695 		if (akind != CTF_K_CONST && akind != CTF_K_VOLATILE &&
1696 		    akind != CTF_K_RESTRICT)
1697 			break;
1698 
1699 		id = t->ctt_type;
1700 	}
1701 
1702 	if (kind == akind) {
1703 		ctf_dprintf("ignoring extraneous %s qualifier for array %d\n",
1704 		    ctf_kind_name(cup->cu_ctfp, kind), ref_id);
1705 	}
1706 
1707 	return (kind != akind);
1708 }
1709 
1710 static int
1711 ctf_dwarf_create_reference(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp,
1712     int kind, int isroot)
1713 {
1714 	int ret;
1715 	ctf_id_t id;
1716 	Dwarf_Die tdie;
1717 	char *name;
1718 	size_t namelen;
1719 
1720 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
1721 	    ret != ENOENT)
1722 		return (ret);
1723 	if (ret == ENOENT) {
1724 		name = NULL;
1725 		namelen = 0;
1726 	} else {
1727 		namelen = strlen(name);
1728 	}
1729 
1730 	ctf_dprintf("reference kind %d %s\n", kind, name != NULL ? name : "<>");
1731 
1732 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0) {
1733 		if (ret != ENOENT) {
1734 			ctf_free(name, namelen);
1735 			return (ret);
1736 		}
1737 		if ((id = ctf_dwarf_void(cup)) == CTF_ERR) {
1738 			ctf_free(name, namelen);
1739 			return (ctf_errno(cup->cu_ctfp));
1740 		}
1741 	} else {
1742 		if ((ret = ctf_dwarf_convert_type(cup, tdie, &id,
1743 		    CTF_ADD_NONROOT)) != 0) {
1744 			ctf_free(name, namelen);
1745 			return (ret);
1746 		}
1747 	}
1748 
1749 	if ((ret = needed_array_qualifier(cup, kind, id)) <= 0) {
1750 		if (ret != 0) {
1751 			ret = (ctf_errno(cup->cu_ctfp));
1752 		} else {
1753 			*idp = id;
1754 		}
1755 
1756 		ctf_free(name, namelen);
1757 		return (ret);
1758 	}
1759 
1760 	if ((*idp = ctf_add_reftype(cup->cu_ctfp, isroot, name, id, kind)) ==
1761 	    CTF_ERR) {
1762 		ctf_free(name, namelen);
1763 		return (ctf_errno(cup->cu_ctfp));
1764 	}
1765 
1766 	ctf_free(name, namelen);
1767 	return (ctf_dwmap_add(cup, *idp, die, B_FALSE));
1768 }
1769 
1770 static int
1771 ctf_dwarf_create_enum(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot)
1772 {
1773 	size_t size = 0;
1774 	Dwarf_Die child;
1775 	Dwarf_Unsigned dw;
1776 	ctf_id_t id;
1777 	char *name;
1778 	int ret;
1779 
1780 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
1781 	    ret != ENOENT)
1782 		return (ret);
1783 	if (ret == ENOENT)
1784 		name = NULL;
1785 
1786 	/*
1787 	 * Enumerations may have a size associated with them, particularly if
1788 	 * they're packed. Note, a Dwarf_Unsigned is larger than a size_t on an
1789 	 * ILP32 system.
1790 	 */
1791 	if (ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &dw) == 0 &&
1792 	    dw < SIZE_MAX) {
1793 		size = (size_t)dw;
1794 	}
1795 
1796 	id = ctf_add_enum(cup->cu_ctfp, isroot, name, size);
1797 	ctf_dprintf("added enum %s (%d)\n", name, id);
1798 	if (name != NULL)
1799 		ctf_free(name, strlen(name) + 1);
1800 	if (id == CTF_ERR)
1801 		return (ctf_errno(cup->cu_ctfp));
1802 	*idp = id;
1803 	if ((ret = ctf_dwmap_add(cup, id, die, B_FALSE)) != 0)
1804 		return (ret);
1805 
1806 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) {
1807 		if (ret == ENOENT)
1808 			ret = 0;
1809 		return (ret);
1810 	}
1811 
1812 	while (child != NULL) {
1813 		Dwarf_Half tag;
1814 		Dwarf_Signed sval;
1815 		Dwarf_Unsigned uval;
1816 		Dwarf_Die arg = child;
1817 		int eval;
1818 
1819 		if ((ret = ctf_dwarf_sib(cup, arg, &child)) != 0)
1820 			return (ret);
1821 
1822 		if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0)
1823 			return (ret);
1824 
1825 		if (tag != DW_TAG_enumerator) {
1826 			if ((ret = ctf_dwarf_convert_type(cup, arg, NULL,
1827 			    CTF_ADD_NONROOT)) != 0)
1828 				return (ret);
1829 			continue;
1830 		}
1831 
1832 		/*
1833 		 * DWARF v4 section 5.7 tells us we'll always have names.
1834 		 */
1835 		if ((ret = ctf_dwarf_string(cup, arg, DW_AT_name, &name)) != 0)
1836 			return (ret);
1837 
1838 		/*
1839 		 * We have to be careful here: newer GCCs generate DWARF where
1840 		 * an unsigned value will happily pass ctf_dwarf_signed().
1841 		 * Since negative values will fail ctf_dwarf_unsigned(), we try
1842 		 * that first to make sure we get the right value.
1843 		 */
1844 		if ((ret = ctf_dwarf_unsigned(cup, arg, DW_AT_const_value,
1845 		    &uval)) == 0) {
1846 			eval = (int)uval;
1847 		} else if ((ret = ctf_dwarf_signed(cup, arg, DW_AT_const_value,
1848 		    &sval)) == 0) {
1849 			eval = sval;
1850 		}
1851 
1852 		if (ret != 0) {
1853 			if (ret != ENOENT)
1854 				return (ret);
1855 
1856 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1857 			    "encountered enumeration without constant value\n");
1858 			return (ECTF_CONVBKERR);
1859 		}
1860 
1861 		ret = ctf_add_enumerator(cup->cu_ctfp, id, name, eval);
1862 		if (ret == CTF_ERR) {
1863 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1864 			    "failed to add enumarator %s (%d) to %d\n",
1865 			    name, eval, id);
1866 			ctf_free(name, strlen(name) + 1);
1867 			return (ctf_errno(cup->cu_ctfp));
1868 		}
1869 		ctf_free(name, strlen(name) + 1);
1870 	}
1871 
1872 	return (0);
1873 }
1874 
1875 /*
1876  * For a function pointer, walk over and process all of its children, unless we
1877  * encounter one that's just a declaration. In which case, we error on it.
1878  */
1879 static int
1880 ctf_dwarf_create_fptr(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot)
1881 {
1882 	int ret;
1883 	Dwarf_Bool b;
1884 	ctf_funcinfo_t fi;
1885 	Dwarf_Die retdie;
1886 	ctf_id_t *argv = NULL;
1887 
1888 	bzero(&fi, sizeof (ctf_funcinfo_t));
1889 
1890 	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) != 0) {
1891 		if (ret != ENOENT)
1892 			return (ret);
1893 	} else {
1894 		if (b != 0)
1895 			return (EPROTOTYPE);
1896 	}
1897 
1898 	/*
1899 	 * Return type is in DW_AT_type, if none, it returns void.
1900 	 */
1901 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &retdie)) != 0) {
1902 		if (ret != ENOENT)
1903 			return (ret);
1904 		if ((fi.ctc_return = ctf_dwarf_void(cup)) == CTF_ERR)
1905 			return (ctf_errno(cup->cu_ctfp));
1906 	} else {
1907 		if ((ret = ctf_dwarf_convert_type(cup, retdie, &fi.ctc_return,
1908 		    CTF_ADD_NONROOT)) != 0)
1909 			return (ret);
1910 	}
1911 
1912 	if ((ret = ctf_dwarf_function_count(cup, die, &fi, B_TRUE)) != 0) {
1913 		return (ret);
1914 	}
1915 
1916 	if (fi.ctc_argc != 0) {
1917 		argv = ctf_alloc(sizeof (ctf_id_t) * fi.ctc_argc);
1918 		if (argv == NULL)
1919 			return (ENOMEM);
1920 
1921 		if ((ret = ctf_dwarf_convert_fargs(cup, die, &fi, argv)) != 0) {
1922 			ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1923 			return (ret);
1924 		}
1925 	}
1926 
1927 	if ((*idp = ctf_add_funcptr(cup->cu_ctfp, isroot, &fi, argv)) ==
1928 	    CTF_ERR) {
1929 		ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1930 		return (ctf_errno(cup->cu_ctfp));
1931 	}
1932 
1933 	ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1934 	return (ctf_dwmap_add(cup, *idp, die, B_FALSE));
1935 }
1936 
1937 static int
1938 ctf_dwarf_convert_type(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp,
1939     int isroot)
1940 {
1941 	int ret;
1942 	Dwarf_Off offset;
1943 	Dwarf_Half tag;
1944 	ctf_dwmap_t lookup, *map;
1945 	ctf_id_t id;
1946 
1947 	if (idp == NULL)
1948 		idp = &id;
1949 
1950 	if ((ret = ctf_dwarf_offset(cup, die, &offset)) != 0)
1951 		return (ret);
1952 
1953 	if (offset > cup->cu_maxoff) {
1954 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1955 		    "die offset %llu beyond maximum for header %llu\n",
1956 		    offset, cup->cu_maxoff);
1957 		return (ECTF_CONVBKERR);
1958 	}
1959 
1960 	/*
1961 	 * If we've already added an entry for this offset, then we're done.
1962 	 */
1963 	lookup.cdm_off = offset;
1964 	if ((map = avl_find(&cup->cu_map, &lookup, NULL)) != NULL) {
1965 		*idp = map->cdm_id;
1966 		return (0);
1967 	}
1968 
1969 	if ((ret = ctf_dwarf_tag(cup, die, &tag)) != 0)
1970 		return (ret);
1971 
1972 	ret = ENOTSUP;
1973 	switch (tag) {
1974 	case DW_TAG_base_type:
1975 		ctf_dprintf("base\n");
1976 		ret = ctf_dwarf_create_base(cup, die, idp, isroot, offset);
1977 		break;
1978 	case DW_TAG_array_type:
1979 		ctf_dprintf("array\n");
1980 		ret = ctf_dwarf_create_array(cup, die, idp, isroot);
1981 		break;
1982 	case DW_TAG_enumeration_type:
1983 		ctf_dprintf("enum\n");
1984 		ret = ctf_dwarf_create_enum(cup, die, idp, isroot);
1985 		break;
1986 	case DW_TAG_pointer_type:
1987 		ctf_dprintf("pointer\n");
1988 		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_POINTER,
1989 		    isroot);
1990 		break;
1991 	case DW_TAG_structure_type:
1992 		ctf_dprintf("struct\n");
1993 		ret = ctf_dwarf_create_sou(cup, die, idp, CTF_K_STRUCT,
1994 		    isroot);
1995 		break;
1996 	case DW_TAG_subroutine_type:
1997 		ctf_dprintf("fptr\n");
1998 		ret = ctf_dwarf_create_fptr(cup, die, idp, isroot);
1999 		break;
2000 	case DW_TAG_typedef:
2001 		ctf_dprintf("typedef\n");
2002 		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_TYPEDEF,
2003 		    isroot);
2004 		break;
2005 	case DW_TAG_union_type:
2006 		ctf_dprintf("union\n");
2007 		ret = ctf_dwarf_create_sou(cup, die, idp, CTF_K_UNION,
2008 		    isroot);
2009 		break;
2010 	case DW_TAG_const_type:
2011 		ctf_dprintf("const\n");
2012 		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_CONST,
2013 		    isroot);
2014 		break;
2015 	case DW_TAG_volatile_type:
2016 		ctf_dprintf("volatile\n");
2017 		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_VOLATILE,
2018 		    isroot);
2019 		break;
2020 	case DW_TAG_restrict_type:
2021 		ctf_dprintf("restrict\n");
2022 		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_RESTRICT,
2023 		    isroot);
2024 		break;
2025 	default:
2026 		ctf_dprintf("ignoring tag type %x\n", tag);
2027 		*idp = CTF_ERR;
2028 		ret = 0;
2029 		break;
2030 	}
2031 	ctf_dprintf("ctf_dwarf_convert_type tag specific handler returned %d\n",
2032 	    ret);
2033 
2034 	return (ret);
2035 }
2036 
2037 static int
2038 ctf_dwarf_walk_lexical(ctf_cu_t *cup, Dwarf_Die die)
2039 {
2040 	int ret;
2041 	Dwarf_Die child;
2042 
2043 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
2044 		return (ret);
2045 
2046 	if (child == NULL)
2047 		return (0);
2048 
2049 	return (ctf_dwarf_convert_die(cup, die));
2050 }
2051 
2052 static int
2053 ctf_dwarf_function_count(ctf_cu_t *cup, Dwarf_Die die, ctf_funcinfo_t *fip,
2054     boolean_t fptr)
2055 {
2056 	int ret;
2057 	Dwarf_Die child, sib, arg;
2058 
2059 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
2060 		return (ret);
2061 
2062 	arg = child;
2063 	while (arg != NULL) {
2064 		Dwarf_Half tag;
2065 
2066 		if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0)
2067 			return (ret);
2068 
2069 		/*
2070 		 * We have to check for a varargs type declaration. This will
2071 		 * happen in one of two ways. If we have a function pointer
2072 		 * type, then it'll be done with a tag of type
2073 		 * DW_TAG_unspecified_parameters. However, it only means we have
2074 		 * a variable number of arguments, if we have more than one
2075 		 * argument found so far. Otherwise, when we have a function
2076 		 * type, it instead uses a formal parameter whose name is '...'
2077 		 * to indicate a variable arguments member.
2078 		 *
2079 		 * Also, if we have a function pointer, then we have to expect
2080 		 * that we might not get a name at all.
2081 		 */
2082 		if (tag == DW_TAG_formal_parameter && fptr == B_FALSE) {
2083 			char *name;
2084 			if ((ret = ctf_dwarf_string(cup, die, DW_AT_name,
2085 			    &name)) != 0)
2086 				return (ret);
2087 			if (strcmp(name, DWARF_VARARGS_NAME) == 0)
2088 				fip->ctc_flags |= CTF_FUNC_VARARG;
2089 			else
2090 				fip->ctc_argc++;
2091 			ctf_free(name, strlen(name) + 1);
2092 		} else if (tag == DW_TAG_formal_parameter) {
2093 			fip->ctc_argc++;
2094 		} else if (tag == DW_TAG_unspecified_parameters &&
2095 		    fip->ctc_argc > 0) {
2096 			fip->ctc_flags |= CTF_FUNC_VARARG;
2097 		}
2098 		if ((ret = ctf_dwarf_sib(cup, arg, &sib)) != 0)
2099 			return (ret);
2100 		arg = sib;
2101 	}
2102 
2103 	return (0);
2104 }
2105 
2106 static int
2107 ctf_dwarf_convert_fargs(ctf_cu_t *cup, Dwarf_Die die, ctf_funcinfo_t *fip,
2108     ctf_id_t *argv)
2109 {
2110 	int ret;
2111 	int i = 0;
2112 	Dwarf_Die child, sib, arg;
2113 
2114 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
2115 		return (ret);
2116 
2117 	arg = child;
2118 	while (arg != NULL) {
2119 		Dwarf_Half tag;
2120 
2121 		if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0)
2122 			return (ret);
2123 		if (tag == DW_TAG_formal_parameter) {
2124 			Dwarf_Die tdie;
2125 
2126 			if ((ret = ctf_dwarf_refdie(cup, arg, DW_AT_type,
2127 			    &tdie)) != 0)
2128 				return (ret);
2129 
2130 			if ((ret = ctf_dwarf_convert_type(cup, tdie, &argv[i],
2131 			    CTF_ADD_ROOT)) != 0)
2132 				return (ret);
2133 			i++;
2134 
2135 			/*
2136 			 * Once we hit argc entries, we're done. This ensures we
2137 			 * don't accidentally hit a varargs which should be the
2138 			 * last entry.
2139 			 */
2140 			if (i == fip->ctc_argc)
2141 				break;
2142 		}
2143 
2144 		if ((ret = ctf_dwarf_sib(cup, arg, &sib)) != 0)
2145 			return (ret);
2146 		arg = sib;
2147 	}
2148 
2149 	return (0);
2150 }
2151 
2152 static int
2153 ctf_dwarf_convert_function(ctf_cu_t *cup, Dwarf_Die die)
2154 {
2155 	ctf_dwfunc_t *cdf;
2156 	Dwarf_Die tdie;
2157 	Dwarf_Bool b;
2158 	char *name;
2159 	int ret;
2160 
2161 	/*
2162 	 * Functions that don't have a name are generally functions that have
2163 	 * been inlined and thus most information about them has been lost. If
2164 	 * we can't get a name, then instead of returning ENOENT, we silently
2165 	 * swallow the error.
2166 	 */
2167 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0) {
2168 		if (ret == ENOENT)
2169 			return (0);
2170 		return (ret);
2171 	}
2172 
2173 	ctf_dprintf("beginning work on function %s (die %llx)\n",
2174 	    name, ctf_die_offset(die));
2175 
2176 	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) != 0) {
2177 		if (ret != ENOENT)
2178 			return (ret);
2179 	} else if (b != 0) {
2180 		/*
2181 		 * GCC7 at least creates empty DW_AT_declarations for functions
2182 		 * defined in headers.  As they lack details on the function
2183 		 * prototype, we need to ignore them.  If we later actually
2184 		 * see the relevant function's definition, we will see another
2185 		 * DW_TAG_subprogram that is more complete.
2186 		 */
2187 		ctf_dprintf("ignoring declaration of function %s (die %llx)\n",
2188 		    name, ctf_die_offset(die));
2189 		return (0);
2190 	}
2191 
2192 	if ((cdf = ctf_alloc(sizeof (ctf_dwfunc_t))) == NULL) {
2193 		ctf_free(name, strlen(name) + 1);
2194 		return (ENOMEM);
2195 	}
2196 	bzero(cdf, sizeof (ctf_dwfunc_t));
2197 	cdf->cdf_name = name;
2198 
2199 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) == 0) {
2200 		if ((ret = ctf_dwarf_convert_type(cup, tdie,
2201 		    &(cdf->cdf_fip.ctc_return), CTF_ADD_ROOT)) != 0) {
2202 			ctf_free(name, strlen(name) + 1);
2203 			ctf_free(cdf, sizeof (ctf_dwfunc_t));
2204 			return (ret);
2205 		}
2206 	} else if (ret != ENOENT) {
2207 		ctf_free(name, strlen(name) + 1);
2208 		ctf_free(cdf, sizeof (ctf_dwfunc_t));
2209 		return (ret);
2210 	} else {
2211 		if ((cdf->cdf_fip.ctc_return = ctf_dwarf_void(cup)) ==
2212 		    CTF_ERR) {
2213 			ctf_free(name, strlen(name) + 1);
2214 			ctf_free(cdf, sizeof (ctf_dwfunc_t));
2215 			return (ctf_errno(cup->cu_ctfp));
2216 		}
2217 	}
2218 
2219 	/*
2220 	 * A function has a number of children, some of which may not be ones we
2221 	 * care about. Children that we care about have a type of
2222 	 * DW_TAG_formal_parameter. We're going to do two passes, the first to
2223 	 * count the arguments, the second to process them. Afterwards, we
2224 	 * should be good to go ahead and add this function.
2225 	 *
2226 	 * Note, we already got the return type by going in and grabbing it out
2227 	 * of the DW_AT_type.
2228 	 */
2229 	if ((ret = ctf_dwarf_function_count(cup, die, &cdf->cdf_fip,
2230 	    B_FALSE)) != 0) {
2231 		ctf_free(name, strlen(name) + 1);
2232 		ctf_free(cdf, sizeof (ctf_dwfunc_t));
2233 		return (ret);
2234 	}
2235 
2236 	ctf_dprintf("beginning to convert function arguments %s\n", name);
2237 	if (cdf->cdf_fip.ctc_argc != 0) {
2238 		uint_t argc = cdf->cdf_fip.ctc_argc;
2239 		cdf->cdf_argv = ctf_alloc(sizeof (ctf_id_t) * argc);
2240 		if (cdf->cdf_argv == NULL) {
2241 			ctf_free(name, strlen(name) + 1);
2242 			ctf_free(cdf, sizeof (ctf_dwfunc_t));
2243 			return (ENOMEM);
2244 		}
2245 		if ((ret = ctf_dwarf_convert_fargs(cup, die,
2246 		    &cdf->cdf_fip, cdf->cdf_argv)) != 0) {
2247 			ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) * argc);
2248 			ctf_free(name, strlen(name) + 1);
2249 			ctf_free(cdf, sizeof (ctf_dwfunc_t));
2250 			return (ret);
2251 		}
2252 	} else {
2253 		cdf->cdf_argv = NULL;
2254 	}
2255 
2256 	if ((ret = ctf_dwarf_isglobal(cup, die, &cdf->cdf_global)) != 0) {
2257 		ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) *
2258 		    cdf->cdf_fip.ctc_argc);
2259 		ctf_free(name, strlen(name) + 1);
2260 		ctf_free(cdf, sizeof (ctf_dwfunc_t));
2261 		return (ret);
2262 	}
2263 
2264 	ctf_list_append(&cup->cu_funcs, cdf);
2265 	return (ret);
2266 }
2267 
2268 /*
2269  * Convert variables, but only if they're not prototypes and have names.
2270  */
2271 static int
2272 ctf_dwarf_convert_variable(ctf_cu_t *cup, Dwarf_Die die)
2273 {
2274 	int ret;
2275 	char *name;
2276 	Dwarf_Bool b;
2277 	Dwarf_Die tdie;
2278 	ctf_id_t id;
2279 	ctf_dwvar_t *cdv;
2280 
2281 	/* Skip "Non-Defining Declarations" */
2282 	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) == 0) {
2283 		if (b != 0)
2284 			return (0);
2285 	} else if (ret != ENOENT) {
2286 		return (ret);
2287 	}
2288 
2289 	/*
2290 	 * If we find a DIE of "Declarations Completing Non-Defining
2291 	 * Declarations", we will use the referenced type's DIE.  This isn't
2292 	 * quite correct, e.g. DW_AT_decl_line will be the forward declaration
2293 	 * not this site.  It's sufficient for what we need, however: in
2294 	 * particular, we should find DW_AT_external as needed there.
2295 	 */
2296 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_specification,
2297 	    &tdie)) == 0) {
2298 		Dwarf_Off offset;
2299 		if ((ret = ctf_dwarf_offset(cup, tdie, &offset)) != 0)
2300 			return (ret);
2301 		ctf_dprintf("die 0x%llx DW_AT_specification -> die 0x%llx\n",
2302 		    ctf_die_offset(die), ctf_die_offset(tdie));
2303 		die = tdie;
2304 	} else if (ret != ENOENT) {
2305 		return (ret);
2306 	}
2307 
2308 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
2309 	    ret != ENOENT)
2310 		return (ret);
2311 	if (ret == ENOENT)
2312 		return (0);
2313 
2314 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0) {
2315 		ctf_free(name, strlen(name) + 1);
2316 		return (ret);
2317 	}
2318 
2319 	if ((ret = ctf_dwarf_convert_type(cup, tdie, &id,
2320 	    CTF_ADD_ROOT)) != 0)
2321 		return (ret);
2322 
2323 	if ((cdv = ctf_alloc(sizeof (ctf_dwvar_t))) == NULL) {
2324 		ctf_free(name, strlen(name) + 1);
2325 		return (ENOMEM);
2326 	}
2327 
2328 	cdv->cdv_name = name;
2329 	cdv->cdv_type = id;
2330 
2331 	if ((ret = ctf_dwarf_isglobal(cup, die, &cdv->cdv_global)) != 0) {
2332 		ctf_free(cdv, sizeof (ctf_dwvar_t));
2333 		ctf_free(name, strlen(name) + 1);
2334 		return (ret);
2335 	}
2336 
2337 	ctf_list_append(&cup->cu_vars, cdv);
2338 	return (0);
2339 }
2340 
2341 /*
2342  * Walk through our set of top-level types and process them.
2343  */
2344 static int
2345 ctf_dwarf_walk_toplevel(ctf_cu_t *cup, Dwarf_Die die)
2346 {
2347 	int ret;
2348 	Dwarf_Off offset;
2349 	Dwarf_Half tag;
2350 
2351 	if ((ret = ctf_dwarf_offset(cup, die, &offset)) != 0)
2352 		return (ret);
2353 
2354 	if (offset > cup->cu_maxoff) {
2355 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
2356 		    "die offset %llu beyond maximum for header %llu\n",
2357 		    offset, cup->cu_maxoff);
2358 		return (ECTF_CONVBKERR);
2359 	}
2360 
2361 	if ((ret = ctf_dwarf_tag(cup, die, &tag)) != 0)
2362 		return (ret);
2363 
2364 	ret = 0;
2365 	switch (tag) {
2366 	case DW_TAG_subprogram:
2367 		ctf_dprintf("top level func\n");
2368 		ret = ctf_dwarf_convert_function(cup, die);
2369 		break;
2370 	case DW_TAG_variable:
2371 		ctf_dprintf("top level var\n");
2372 		ret = ctf_dwarf_convert_variable(cup, die);
2373 		break;
2374 	case DW_TAG_lexical_block:
2375 		ctf_dprintf("top level block\n");
2376 		ret = ctf_dwarf_walk_lexical(cup, die);
2377 		break;
2378 	case DW_TAG_enumeration_type:
2379 	case DW_TAG_structure_type:
2380 	case DW_TAG_typedef:
2381 	case DW_TAG_union_type:
2382 		ctf_dprintf("top level type\n");
2383 		ret = ctf_dwarf_convert_type(cup, die, NULL, B_TRUE);
2384 		break;
2385 	default:
2386 		break;
2387 	}
2388 
2389 	return (ret);
2390 }
2391 
2392 
2393 /*
2394  * We're given a node. At this node we need to convert it and then proceed to
2395  * convert any siblings that are associaed with this die.
2396  */
2397 static int
2398 ctf_dwarf_convert_die(ctf_cu_t *cup, Dwarf_Die die)
2399 {
2400 	while (die != NULL) {
2401 		int ret;
2402 		Dwarf_Die sib;
2403 
2404 		if ((ret = ctf_dwarf_walk_toplevel(cup, die)) != 0)
2405 			return (ret);
2406 
2407 		if ((ret = ctf_dwarf_sib(cup, die, &sib)) != 0)
2408 			return (ret);
2409 		die = sib;
2410 	}
2411 	return (0);
2412 }
2413 
2414 static int
2415 ctf_dwarf_fixup_die(ctf_cu_t *cup, boolean_t addpass)
2416 {
2417 	ctf_dwmap_t *map;
2418 
2419 	for (map = avl_first(&cup->cu_map); map != NULL;
2420 	    map = AVL_NEXT(&cup->cu_map, map)) {
2421 		int ret;
2422 		if (map->cdm_fix == B_FALSE)
2423 			continue;
2424 		if ((ret = ctf_dwarf_fixup_sou(cup, map->cdm_die, map->cdm_id,
2425 		    addpass)) != 0)
2426 			return (ret);
2427 	}
2428 
2429 	return (0);
2430 }
2431 
2432 /*
2433  * The DWARF information about a symbol and the information in the symbol table
2434  * may not be the same due to symbol reduction that is performed by ld due to a
2435  * mapfile or other such directive. We process weak symbols at a later time.
2436  *
2437  * The following are the rules that we employ:
2438  *
2439  * 1. A DWARF function that is considered exported matches STB_GLOBAL entries
2440  * with the same name.
2441  *
2442  * 2. A DWARF function that is considered exported matches STB_LOCAL entries
2443  * with the same name and the same file. This case may happen due to mapfile
2444  * reduction.
2445  *
2446  * 3. A DWARF function that is not considered exported matches STB_LOCAL entries
2447  * with the same name and the same file.
2448  *
2449  * 4. A DWARF function that has the same name as the symbol table entry, but the
2450  * files do not match. This is considered a 'fuzzy' match. This may also happen
2451  * due to a mapfile reduction. Fuzzy matching is only used when we know that the
2452  * file in question refers to the primary object. This is because when a symbol
2453  * is reduced in a mapfile, it's always going to be tagged as a local value in
2454  * the generated output and it is considered as to belong to the primary file
2455  * which is the first STT_FILE symbol we see.
2456  */
2457 static boolean_t
2458 ctf_dwarf_symbol_match(const char *symtab_file, const char *symtab_name,
2459     uint_t symtab_bind, const char *dwarf_file, const char *dwarf_name,
2460     boolean_t dwarf_global, boolean_t *is_fuzzy)
2461 {
2462 	*is_fuzzy = B_FALSE;
2463 
2464 	if (symtab_bind != STB_LOCAL && symtab_bind != STB_GLOBAL) {
2465 		return (B_FALSE);
2466 	}
2467 
2468 	if (strcmp(symtab_name, dwarf_name) != 0) {
2469 		return (B_FALSE);
2470 	}
2471 
2472 	if (symtab_bind == STB_GLOBAL) {
2473 		return (dwarf_global);
2474 	}
2475 
2476 	if (strcmp(symtab_file, dwarf_file) == 0) {
2477 		return (B_TRUE);
2478 	}
2479 
2480 	if (dwarf_global) {
2481 		*is_fuzzy = B_TRUE;
2482 		return (B_TRUE);
2483 	}
2484 
2485 	return (B_FALSE);
2486 }
2487 
2488 static ctf_dwfunc_t *
2489 ctf_dwarf_match_func(ctf_cu_t *cup, const char *file, const char *name,
2490     uint_t bind, boolean_t primary)
2491 {
2492 	ctf_dwfunc_t *cdf, *fuzzy = NULL;
2493 
2494 	if (bind == STB_WEAK)
2495 		return (NULL);
2496 
2497 	if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2498 		return (NULL);
2499 
2500 	for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL;
2501 	    cdf = ctf_list_next(cdf)) {
2502 		boolean_t is_fuzzy = B_FALSE;
2503 
2504 		if (ctf_dwarf_symbol_match(file, name, bind, cup->cu_name,
2505 		    cdf->cdf_name, cdf->cdf_global, &is_fuzzy)) {
2506 			if (is_fuzzy) {
2507 				if (primary) {
2508 					fuzzy = cdf;
2509 				}
2510 				continue;
2511 			} else {
2512 				return (cdf);
2513 			}
2514 		}
2515 	}
2516 
2517 	return (fuzzy);
2518 }
2519 
2520 static ctf_dwvar_t *
2521 ctf_dwarf_match_var(ctf_cu_t *cup, const char *file, const char *name,
2522     uint_t bind, boolean_t primary)
2523 {
2524 	ctf_dwvar_t *cdv, *fuzzy = NULL;
2525 
2526 	if (bind == STB_WEAK)
2527 		return (NULL);
2528 
2529 	if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2530 		return (NULL);
2531 
2532 	for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL;
2533 	    cdv = ctf_list_next(cdv)) {
2534 		boolean_t is_fuzzy = B_FALSE;
2535 
2536 		if (ctf_dwarf_symbol_match(file, name, bind, cup->cu_name,
2537 		    cdv->cdv_name, cdv->cdv_global, &is_fuzzy)) {
2538 			if (is_fuzzy) {
2539 				if (primary) {
2540 					fuzzy = cdv;
2541 				}
2542 			} else {
2543 				return (cdv);
2544 			}
2545 		}
2546 	}
2547 
2548 	return (fuzzy);
2549 }
2550 
2551 static int
2552 ctf_dwarf_conv_funcvars_cb(const Elf64_Sym *symp, ulong_t idx,
2553     const char *file, const char *name, boolean_t primary, void *arg)
2554 {
2555 	int ret;
2556 	uint_t bind, type;
2557 	ctf_cu_t *cup = arg;
2558 
2559 	bind = GELF_ST_BIND(symp->st_info);
2560 	type = GELF_ST_TYPE(symp->st_info);
2561 
2562 	/*
2563 	 * Come back to weak symbols in another pass
2564 	 */
2565 	if (bind == STB_WEAK)
2566 		return (0);
2567 
2568 	if (type == STT_OBJECT) {
2569 		ctf_dwvar_t *cdv = ctf_dwarf_match_var(cup, file, name,
2570 		    bind, primary);
2571 		if (cdv == NULL)
2572 			return (0);
2573 		ret = ctf_add_object(cup->cu_ctfp, idx, cdv->cdv_type);
2574 		ctf_dprintf("added object %s->%ld\n", name, cdv->cdv_type);
2575 	} else {
2576 		ctf_dwfunc_t *cdf = ctf_dwarf_match_func(cup, file, name,
2577 		    bind, primary);
2578 		if (cdf == NULL)
2579 			return (0);
2580 		ret = ctf_add_function(cup->cu_ctfp, idx, &cdf->cdf_fip,
2581 		    cdf->cdf_argv);
2582 		ctf_dprintf("added function %s\n", name);
2583 	}
2584 
2585 	if (ret == CTF_ERR) {
2586 		return (ctf_errno(cup->cu_ctfp));
2587 	}
2588 
2589 	return (0);
2590 }
2591 
2592 static int
2593 ctf_dwarf_conv_funcvars(ctf_cu_t *cup)
2594 {
2595 	return (ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_funcvars_cb, cup));
2596 }
2597 
2598 /*
2599  * If we have a weak symbol, attempt to find the strong symbol it will resolve
2600  * to.  Note: the code where this actually happens is in sym_process() in
2601  * cmd/sgs/libld/common/syms.c
2602  *
2603  * Finding the matching symbol is unfortunately not trivial.  For a symbol to be
2604  * a candidate, it must:
2605  *
2606  * - have the same type (function, object)
2607  * - have the same value (address)
2608  * - have the same size
2609  * - not be another weak symbol
2610  * - belong to the same section (checked via section index)
2611  *
2612  * To perform this check, we first iterate over the symbol table. For each weak
2613  * symbol that we encounter, we then do a second walk over the symbol table,
2614  * calling ctf_dwarf_conv_check_weak(). If a symbol matches the above, then it's
2615  * either a local or global symbol. If we find a global symbol then we go with
2616  * it and stop searching for additional matches.
2617  *
2618  * If instead, we find a local symbol, things are more complicated. The first
2619  * thing we do is to try and see if we have file information about both symbols
2620  * (STT_FILE). If they both have file information and it matches, then we treat
2621  * that as a good match and stop searching for additional matches.
2622  *
2623  * Otherwise, this means we have a non-matching file and a local symbol. We
2624  * treat this as a candidate and if we find a better match (one of the two cases
2625  * above), use that instead. There are two different ways this can happen.
2626  * Either this is a completely different symbol, or it's a once-global symbol
2627  * that was scoped to local via a mapfile.  In the former case, curfile is
2628  * likely inaccurate since the linker does not preserve the needed curfile in
2629  * the order of the symbol table (see the comments about locally scoped symbols
2630  * in libld's update_osym()).  As we can't tell this case from the former one,
2631  * we use this symbol iff no other matching symbol is found.
2632  *
2633  * What we really need here is a SUNW section containing weak<->strong mappings
2634  * that we can consume.
2635  */
2636 typedef struct ctf_dwarf_weak_arg {
2637 	const Elf64_Sym *cweak_symp;
2638 	const char *cweak_file;
2639 	boolean_t cweak_candidate;
2640 	ulong_t cweak_idx;
2641 } ctf_dwarf_weak_arg_t;
2642 
2643 static int
2644 ctf_dwarf_conv_check_weak(const Elf64_Sym *symp, ulong_t idx, const char *file,
2645     const char *name, boolean_t primary, void *arg)
2646 {
2647 	ctf_dwarf_weak_arg_t *cweak = arg;
2648 
2649 	const Elf64_Sym *wsymp = cweak->cweak_symp;
2650 
2651 	ctf_dprintf("comparing weak to %s\n", name);
2652 
2653 	if (GELF_ST_BIND(symp->st_info) == STB_WEAK) {
2654 		return (0);
2655 	}
2656 
2657 	if (GELF_ST_TYPE(wsymp->st_info) != GELF_ST_TYPE(symp->st_info)) {
2658 		return (0);
2659 	}
2660 
2661 	if (wsymp->st_value != symp->st_value) {
2662 		return (0);
2663 	}
2664 
2665 	if (wsymp->st_size != symp->st_size) {
2666 		return (0);
2667 	}
2668 
2669 	if (wsymp->st_shndx != symp->st_shndx) {
2670 		return (0);
2671 	}
2672 
2673 	/*
2674 	 * Check if it's a weak candidate.
2675 	 */
2676 	if (GELF_ST_BIND(symp->st_info) == STB_LOCAL &&
2677 	    (file == NULL || cweak->cweak_file == NULL ||
2678 	    strcmp(file, cweak->cweak_file) != 0)) {
2679 		cweak->cweak_candidate = B_TRUE;
2680 		cweak->cweak_idx = idx;
2681 		return (0);
2682 	}
2683 
2684 	/*
2685 	 * Found a match, break.
2686 	 */
2687 	cweak->cweak_idx = idx;
2688 	return (1);
2689 }
2690 
2691 static int
2692 ctf_dwarf_duplicate_sym(ctf_cu_t *cup, ulong_t idx, ulong_t matchidx)
2693 {
2694 	ctf_id_t id = ctf_lookup_by_symbol(cup->cu_ctfp, matchidx);
2695 
2696 	/*
2697 	 * If we matched something that for some reason didn't have type data,
2698 	 * we don't consider that a fatal error and silently swallow it.
2699 	 */
2700 	if (id == CTF_ERR) {
2701 		if (ctf_errno(cup->cu_ctfp) == ECTF_NOTYPEDAT)
2702 			return (0);
2703 		else
2704 			return (ctf_errno(cup->cu_ctfp));
2705 	}
2706 
2707 	if (ctf_add_object(cup->cu_ctfp, idx, id) == CTF_ERR)
2708 		return (ctf_errno(cup->cu_ctfp));
2709 
2710 	return (0);
2711 }
2712 
2713 static int
2714 ctf_dwarf_duplicate_func(ctf_cu_t *cup, ulong_t idx, ulong_t matchidx)
2715 {
2716 	int ret;
2717 	ctf_funcinfo_t fip;
2718 	ctf_id_t *args = NULL;
2719 
2720 	if (ctf_func_info(cup->cu_ctfp, matchidx, &fip) == CTF_ERR) {
2721 		if (ctf_errno(cup->cu_ctfp) == ECTF_NOFUNCDAT)
2722 			return (0);
2723 		else
2724 			return (ctf_errno(cup->cu_ctfp));
2725 	}
2726 
2727 	if (fip.ctc_argc != 0) {
2728 		args = ctf_alloc(sizeof (ctf_id_t) * fip.ctc_argc);
2729 		if (args == NULL)
2730 			return (ENOMEM);
2731 
2732 		if (ctf_func_args(cup->cu_ctfp, matchidx, fip.ctc_argc, args) ==
2733 		    CTF_ERR) {
2734 			ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2735 			return (ctf_errno(cup->cu_ctfp));
2736 		}
2737 	}
2738 
2739 	ret = ctf_add_function(cup->cu_ctfp, idx, &fip, args);
2740 	if (args != NULL)
2741 		ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2742 	if (ret == CTF_ERR)
2743 		return (ctf_errno(cup->cu_ctfp));
2744 
2745 	return (0);
2746 }
2747 
2748 static int
2749 ctf_dwarf_conv_weaks_cb(const Elf64_Sym *symp, ulong_t idx, const char *file,
2750     const char *name, boolean_t primary, void *arg)
2751 {
2752 	int ret, type;
2753 	ctf_dwarf_weak_arg_t cweak;
2754 	ctf_cu_t *cup = arg;
2755 
2756 	/*
2757 	 * We only care about weak symbols.
2758 	 */
2759 	if (GELF_ST_BIND(symp->st_info) != STB_WEAK)
2760 		return (0);
2761 
2762 	type = GELF_ST_TYPE(symp->st_info);
2763 	ASSERT(type == STT_OBJECT || type == STT_FUNC);
2764 
2765 	/*
2766 	 * For each weak symbol we encounter, we need to do a second iteration
2767 	 * to try and find a match. We should probably think about other
2768 	 * techniques to try and save us time in the future.
2769 	 */
2770 	cweak.cweak_symp = symp;
2771 	cweak.cweak_file = file;
2772 	cweak.cweak_candidate = B_FALSE;
2773 	cweak.cweak_idx = 0;
2774 
2775 	ctf_dprintf("Trying to find weak equiv for %s\n", name);
2776 
2777 	ret = ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_check_weak, &cweak);
2778 	VERIFY(ret == 0 || ret == 1);
2779 
2780 	/*
2781 	 * Nothing was ever found, we're not going to add anything for this
2782 	 * entry.
2783 	 */
2784 	if (ret == 0 && cweak.cweak_candidate == B_FALSE) {
2785 		ctf_dprintf("found no weak match for %s\n", name);
2786 		return (0);
2787 	}
2788 
2789 	/*
2790 	 * Now, finally go and add the type based on the match.
2791 	 */
2792 	ctf_dprintf("matched weak symbol %lu to %lu\n", idx, cweak.cweak_idx);
2793 	if (type == STT_OBJECT) {
2794 		ret = ctf_dwarf_duplicate_sym(cup, idx, cweak.cweak_idx);
2795 	} else {
2796 		ret = ctf_dwarf_duplicate_func(cup, idx, cweak.cweak_idx);
2797 	}
2798 
2799 	return (ret);
2800 }
2801 
2802 static int
2803 ctf_dwarf_conv_weaks(ctf_cu_t *cup)
2804 {
2805 	return (ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_weaks_cb, cup));
2806 }
2807 
2808 /* ARGSUSED */
2809 static int
2810 ctf_dwarf_convert_one(void *arg, void *unused)
2811 {
2812 	int ret;
2813 	ctf_file_t *dedup;
2814 	ctf_cu_t *cup = arg;
2815 
2816 	ctf_dprintf("converting die: %s\n", cup->cu_name);
2817 	ctf_dprintf("max offset: %x\n", cup->cu_maxoff);
2818 	VERIFY(cup != NULL);
2819 
2820 	ret = ctf_dwarf_convert_die(cup, cup->cu_cu);
2821 	ctf_dprintf("ctf_dwarf_convert_die (%s) returned %d\n", cup->cu_name,
2822 	    ret);
2823 	if (ret != 0) {
2824 		return (ret);
2825 	}
2826 	if (ctf_update(cup->cu_ctfp) != 0) {
2827 		return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2828 		    "failed to update output ctf container"));
2829 	}
2830 
2831 	ret = ctf_dwarf_fixup_die(cup, B_FALSE);
2832 	ctf_dprintf("ctf_dwarf_fixup_die (%s) returned %d\n", cup->cu_name,
2833 	    ret);
2834 	if (ret != 0) {
2835 		return (ret);
2836 	}
2837 	if (ctf_update(cup->cu_ctfp) != 0) {
2838 		return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2839 		    "failed to update output ctf container"));
2840 	}
2841 
2842 	ret = ctf_dwarf_fixup_die(cup, B_TRUE);
2843 	ctf_dprintf("ctf_dwarf_fixup_die (%s) returned %d\n", cup->cu_name,
2844 	    ret);
2845 	if (ret != 0) {
2846 		return (ret);
2847 	}
2848 	if (ctf_update(cup->cu_ctfp) != 0) {
2849 		return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2850 		    "failed to update output ctf container"));
2851 	}
2852 
2853 
2854 	if ((ret = ctf_dwarf_conv_funcvars(cup)) != 0) {
2855 		return (ctf_dwarf_error(cup, NULL, ret,
2856 		    "failed to convert strong functions and variables"));
2857 	}
2858 
2859 	if (ctf_update(cup->cu_ctfp) != 0) {
2860 		return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2861 		    "failed to update output ctf container"));
2862 	}
2863 
2864 	if (cup->cu_doweaks == B_TRUE) {
2865 		if ((ret = ctf_dwarf_conv_weaks(cup)) != 0) {
2866 			return (ctf_dwarf_error(cup, NULL, ret,
2867 			    "failed to convert weak functions and variables"));
2868 		}
2869 
2870 		if (ctf_update(cup->cu_ctfp) != 0) {
2871 			return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2872 			    "failed to update output ctf container"));
2873 		}
2874 	}
2875 
2876 	ctf_phase_dump(cup->cu_ctfp, "pre-dwarf-dedup", cup->cu_name);
2877 	ctf_dprintf("adding inputs for dedup\n");
2878 	if ((ret = ctf_merge_add(cup->cu_cmh, cup->cu_ctfp)) != 0) {
2879 		return (ctf_dwarf_error(cup, NULL, ret,
2880 		    "failed to add inputs for merge"));
2881 	}
2882 
2883 	ctf_dprintf("starting dedup of %s\n", cup->cu_name);
2884 	if ((ret = ctf_merge_dedup(cup->cu_cmh, &dedup)) != 0) {
2885 		return (ctf_dwarf_error(cup, NULL, ret,
2886 		    "failed to deduplicate die"));
2887 	}
2888 	ctf_close(cup->cu_ctfp);
2889 	cup->cu_ctfp = dedup;
2890 	ctf_phase_dump(cup->cu_ctfp, "post-dwarf-dedup", cup->cu_name);
2891 
2892 	return (0);
2893 }
2894 
2895 /*
2896  * Note, we expect that if we're returning a ctf_file_t from one of the dies,
2897  * say in the single node case, it's been saved and the entry here has been set
2898  * to NULL, which ctf_close happily ignores.
2899  */
2900 static void
2901 ctf_dwarf_free_die(ctf_cu_t *cup)
2902 {
2903 	ctf_dwfunc_t *cdf, *ndf;
2904 	ctf_dwvar_t *cdv, *ndv;
2905 	ctf_dwbitf_t *cdb, *ndb;
2906 	ctf_dwmap_t *map;
2907 	void *cookie;
2908 	Dwarf_Error derr;
2909 
2910 	ctf_dprintf("Beginning to free die: %p\n", cup);
2911 	cup->cu_elf = NULL;
2912 	ctf_dprintf("Trying to free name: %p\n", cup->cu_name);
2913 	if (cup->cu_name != NULL)
2914 		ctf_free(cup->cu_name, strlen(cup->cu_name) + 1);
2915 	ctf_dprintf("Trying to free merge handle: %p\n", cup->cu_cmh);
2916 	if (cup->cu_cmh != NULL) {
2917 		ctf_merge_fini(cup->cu_cmh);
2918 		cup->cu_cmh = NULL;
2919 	}
2920 
2921 	ctf_dprintf("Trying to free functions\n");
2922 	for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL; cdf = ndf) {
2923 		ndf = ctf_list_next(cdf);
2924 		ctf_free(cdf->cdf_name, strlen(cdf->cdf_name) + 1);
2925 		if (cdf->cdf_fip.ctc_argc != 0) {
2926 			ctf_free(cdf->cdf_argv,
2927 			    sizeof (ctf_id_t) * cdf->cdf_fip.ctc_argc);
2928 		}
2929 		ctf_free(cdf, sizeof (ctf_dwfunc_t));
2930 	}
2931 
2932 	ctf_dprintf("Trying to free variables\n");
2933 	for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL; cdv = ndv) {
2934 		ndv = ctf_list_next(cdv);
2935 		ctf_free(cdv->cdv_name, strlen(cdv->cdv_name) + 1);
2936 		ctf_free(cdv, sizeof (ctf_dwvar_t));
2937 	}
2938 
2939 	ctf_dprintf("Trying to free bitfields\n");
2940 	for (cdb = ctf_list_next(&cup->cu_bitfields); cdb != NULL; cdb = ndb) {
2941 		ndb = ctf_list_next(cdb);
2942 		ctf_free(cdb, sizeof (ctf_dwbitf_t));
2943 	}
2944 
2945 	ctf_dprintf("Trying to clean up dwarf_t: %p\n", cup->cu_dwarf);
2946 	if (cup->cu_dwarf != NULL)
2947 		(void) dwarf_finish(cup->cu_dwarf, &derr);
2948 	cup->cu_dwarf = NULL;
2949 	ctf_close(cup->cu_ctfp);
2950 
2951 	cookie = NULL;
2952 	while ((map = avl_destroy_nodes(&cup->cu_map, &cookie)) != NULL) {
2953 		ctf_free(map, sizeof (ctf_dwmap_t));
2954 	}
2955 	avl_destroy(&cup->cu_map);
2956 	cup->cu_errbuf = NULL;
2957 }
2958 
2959 static void
2960 ctf_dwarf_free_dies(ctf_cu_t *cdies, int ndies)
2961 {
2962 	int i;
2963 
2964 	ctf_dprintf("Beginning to free dies\n");
2965 	for (i = 0; i < ndies; i++) {
2966 		ctf_dwarf_free_die(&cdies[i]);
2967 	}
2968 
2969 	ctf_free(cdies, sizeof (ctf_cu_t) * ndies);
2970 }
2971 
2972 static int
2973 ctf_dwarf_count_dies(Dwarf_Debug dw, Dwarf_Error *derr, int *ndies,
2974     char *errbuf, size_t errlen)
2975 {
2976 	int ret;
2977 	Dwarf_Half vers;
2978 	Dwarf_Unsigned nexthdr;
2979 
2980 	while ((ret = dwarf_next_cu_header(dw, NULL, &vers, NULL, NULL,
2981 	    &nexthdr, derr)) != DW_DLV_NO_ENTRY) {
2982 		if (ret != DW_DLV_OK) {
2983 			(void) snprintf(errbuf, errlen,
2984 			    "file does not contain valid DWARF data: %s\n",
2985 			    dwarf_errmsg(*derr));
2986 			return (ECTF_CONVBKERR);
2987 		}
2988 
2989 		if (vers != DWARF_VERSION_TWO) {
2990 			(void) snprintf(errbuf, errlen,
2991 			    "unsupported DWARF version: %d\n", vers);
2992 			return (ECTF_CONVBKERR);
2993 		}
2994 		*ndies = *ndies + 1;
2995 	}
2996 
2997 	return (0);
2998 }
2999 
3000 static int
3001 ctf_dwarf_init_die(int fd, Elf *elf, ctf_cu_t *cup, int ndie, char *errbuf,
3002     size_t errlen)
3003 {
3004 	int ret;
3005 	Dwarf_Unsigned hdrlen, abboff, nexthdr;
3006 	Dwarf_Half addrsz;
3007 	Dwarf_Unsigned offset = 0;
3008 	Dwarf_Error derr;
3009 
3010 	while ((ret = dwarf_next_cu_header(cup->cu_dwarf, &hdrlen, NULL,
3011 	    &abboff, &addrsz, &nexthdr, &derr)) != DW_DLV_NO_ENTRY) {
3012 		char *name;
3013 		Dwarf_Die cu, child;
3014 
3015 		/* Based on the counting above, we should be good to go */
3016 		VERIFY(ret == DW_DLV_OK);
3017 		if (ndie > 0) {
3018 			ndie--;
3019 			offset = nexthdr;
3020 			continue;
3021 		}
3022 
3023 		/*
3024 		 * Compilers are apparently inconsistent. Some emit no DWARF for
3025 		 * empty files and others emit empty compilation unit.
3026 		 */
3027 		cup->cu_voidtid = CTF_ERR;
3028 		cup->cu_longtid = CTF_ERR;
3029 		cup->cu_elf = elf;
3030 		cup->cu_maxoff = nexthdr - 1;
3031 		cup->cu_ctfp = ctf_fdcreate(fd, &ret);
3032 		if (cup->cu_ctfp == NULL)
3033 			return (ret);
3034 
3035 		avl_create(&cup->cu_map, ctf_dwmap_comp, sizeof (ctf_dwmap_t),
3036 		    offsetof(ctf_dwmap_t, cdm_avl));
3037 		cup->cu_errbuf = errbuf;
3038 		cup->cu_errlen = errlen;
3039 		bzero(&cup->cu_vars, sizeof (ctf_list_t));
3040 		bzero(&cup->cu_funcs, sizeof (ctf_list_t));
3041 		bzero(&cup->cu_bitfields, sizeof (ctf_list_t));
3042 
3043 		if ((ret = ctf_dwarf_die_elfenc(elf, cup, errbuf,
3044 		    errlen)) != 0)
3045 			return (ret);
3046 
3047 		if ((ret = ctf_dwarf_sib(cup, NULL, &cu)) != 0)
3048 			return (ret);
3049 
3050 		if (cu == NULL) {
3051 			(void) snprintf(errbuf, errlen,
3052 			    "file does not contain DWARF data");
3053 			return (ECTF_CONVNODEBUG);
3054 		}
3055 
3056 		if ((ret = ctf_dwarf_child(cup, cu, &child)) != 0)
3057 			return (ret);
3058 
3059 		if (child == NULL) {
3060 			(void) snprintf(errbuf, errlen,
3061 			    "file does not contain DWARF data");
3062 			return (ECTF_CONVNODEBUG);
3063 		}
3064 
3065 		cup->cu_cuoff = offset;
3066 		cup->cu_cu = child;
3067 
3068 		if ((cup->cu_cmh = ctf_merge_init(fd, &ret)) == NULL)
3069 			return (ret);
3070 
3071 		if (ctf_dwarf_string(cup, cu, DW_AT_name, &name) == 0) {
3072 			size_t len = strlen(name) + 1;
3073 			char *b = basename(name);
3074 			cup->cu_name = strdup(b);
3075 			ctf_free(name, len);
3076 		}
3077 		break;
3078 	}
3079 
3080 	return (0);
3081 }
3082 
3083 /*
3084  * This is our only recourse to identify a C source file that is missing debug
3085  * info: it will be mentioned as an STT_FILE, but not have a compile unit entry.
3086  * (A traditional ctfmerge works on individual files, so can identify missing
3087  * DWARF more directly, via ctf_has_c_source() on the .o file.)
3088  *
3089  * As we operate on basenames, this can of course miss some cases, but it's
3090  * better than not checking at all.
3091  *
3092  * We explicitly whitelist some CRT components.  Failing that, there's always
3093  * the -m option.
3094  */
3095 static boolean_t
3096 c_source_has_debug(const char *file, ctf_cu_t *cus, size_t nr_cus)
3097 {
3098 	const char *basename = strrchr(file, '/');
3099 
3100 	if (basename == NULL)
3101 		basename = file;
3102 	else
3103 		basename++;
3104 
3105 	if (strcmp(basename, "common-crt.c") == 0 ||
3106 	    strcmp(basename, "gmon.c") == 0 ||
3107 	    strcmp(basename, "dlink_init.c") == 0 ||
3108 	    strcmp(basename, "dlink_common.c") == 0 ||
3109 	    strncmp(basename, "crt", strlen("crt")) == 0 ||
3110 	    strncmp(basename, "values-", strlen("values-")) == 0)
3111 		return (B_TRUE);
3112 
3113 	for (size_t i = 0; i < nr_cus; i++) {
3114 		if (strcmp(basename, cus[i].cu_name) == 0)
3115 			return (B_TRUE);
3116 	}
3117 
3118 	return (B_FALSE);
3119 }
3120 
3121 static int
3122 ctf_dwarf_check_missing(ctf_cu_t *cus, size_t nr_cus, Elf *elf,
3123     char *errmsg, size_t errlen)
3124 {
3125 	Elf_Scn *scn, *strscn;
3126 	Elf_Data *data, *strdata;
3127 	GElf_Shdr shdr;
3128 	ulong_t i;
3129 
3130 	scn = NULL;
3131 	while ((scn = elf_nextscn(elf, scn)) != NULL) {
3132 		if (gelf_getshdr(scn, &shdr) == NULL) {
3133 			(void) snprintf(errmsg, errlen,
3134 			    "failed to get section header: %s\n",
3135 			    elf_errmsg(elf_errno()));
3136 			return (EINVAL);
3137 		}
3138 
3139 		if (shdr.sh_type == SHT_SYMTAB)
3140 			break;
3141 	}
3142 
3143 	if (scn == NULL)
3144 		return (0);
3145 
3146 	if ((strscn = elf_getscn(elf, shdr.sh_link)) == NULL) {
3147 		(void) snprintf(errmsg, errlen,
3148 		    "failed to get str section: %s\n",
3149 		    elf_errmsg(elf_errno()));
3150 		return (EINVAL);
3151 	}
3152 
3153 	if ((data = elf_getdata(scn, NULL)) == NULL) {
3154 		(void) snprintf(errmsg, errlen, "failed to read section: %s\n",
3155 		    elf_errmsg(elf_errno()));
3156 		return (EINVAL);
3157 	}
3158 
3159 	if ((strdata = elf_getdata(strscn, NULL)) == NULL) {
3160 		(void) snprintf(errmsg, errlen,
3161 		    "failed to read string table: %s\n",
3162 		    elf_errmsg(elf_errno()));
3163 		return (EINVAL);
3164 	}
3165 
3166 	for (i = 0; i < shdr.sh_size / shdr.sh_entsize; i++) {
3167 		GElf_Sym sym;
3168 		const char *file;
3169 		size_t len;
3170 
3171 		if (gelf_getsym(data, i, &sym) == NULL) {
3172 			(void) snprintf(errmsg, errlen,
3173 			    "failed to read sym %lu: %s\n",
3174 			    i, elf_errmsg(elf_errno()));
3175 			return (EINVAL);
3176 		}
3177 
3178 		if (GELF_ST_TYPE(sym.st_info) != STT_FILE)
3179 			continue;
3180 
3181 		file = (const char *)((uintptr_t)strdata->d_buf + sym.st_name);
3182 		len = strlen(file);
3183 		if (len < 2 || strncmp(".c", &file[len - 2], 2) != 0)
3184 			continue;
3185 
3186 		if (!c_source_has_debug(file, cus, nr_cus)) {
3187 			(void) snprintf(errmsg, errlen,
3188 			    "file %s is missing debug info\n", file);
3189 			return (ECTF_CONVNODEBUG);
3190 		}
3191 	}
3192 
3193 	return (0);
3194 }
3195 
3196 int
3197 ctf_dwarf_convert(int fd, Elf *elf, uint_t nthrs, uint_t flags,
3198     ctf_file_t **fpp, char *errbuf, size_t errlen)
3199 {
3200 	int err, ret, ndies, i;
3201 	Dwarf_Debug dw;
3202 	Dwarf_Error derr;
3203 	ctf_cu_t *cdies = NULL, *cup;
3204 	workq_t *wqp = NULL;
3205 
3206 	*fpp = NULL;
3207 
3208 	ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL, &dw, &derr);
3209 	if (ret != DW_DLV_OK) {
3210 		if (ret == DW_DLV_NO_ENTRY ||
3211 		    dwarf_errno(derr) == DW_DLE_DEBUG_INFO_NULL) {
3212 			(void) snprintf(errbuf, errlen,
3213 			    "file does not contain DWARF data\n");
3214 			return (ECTF_CONVNODEBUG);
3215 		}
3216 
3217 		(void) snprintf(errbuf, errlen,
3218 		    "dwarf_elf_init() failed: %s\n", dwarf_errmsg(derr));
3219 		return (ECTF_CONVBKERR);
3220 	}
3221 
3222 	/*
3223 	 * Iterate over all of the compilation units and create a ctf_cu_t for
3224 	 * each of them.  This is used to determine if we have zero, one, or
3225 	 * multiple dies to convert. If we have zero, that's an error. If
3226 	 * there's only one die, that's the simple case.  No merge needed and
3227 	 * only a single Dwarf_Debug as well.
3228 	 */
3229 	ndies = 0;
3230 	err = ctf_dwarf_count_dies(dw, &derr, &ndies, errbuf, errlen);
3231 
3232 	ctf_dprintf("found %d DWARF CUs\n", ndies);
3233 
3234 	if (ndies == 0) {
3235 		(void) snprintf(errbuf, errlen,
3236 		    "file does not contain DWARF data\n");
3237 		return (ECTF_CONVNODEBUG);
3238 	}
3239 
3240 	(void) dwarf_finish(dw, &derr);
3241 	cdies = ctf_alloc(sizeof (ctf_cu_t) * ndies);
3242 	if (cdies == NULL) {
3243 		return (ENOMEM);
3244 	}
3245 
3246 	bzero(cdies, sizeof (ctf_cu_t) * ndies);
3247 
3248 	for (i = 0; i < ndies; i++) {
3249 		cup = &cdies[i];
3250 		ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL,
3251 		    &cup->cu_dwarf, &derr);
3252 		if (ret != 0) {
3253 			ctf_free(cdies, sizeof (ctf_cu_t) * ndies);
3254 			(void) snprintf(errbuf, errlen,
3255 			    "failed to initialize DWARF: %s\n",
3256 			    dwarf_errmsg(derr));
3257 			return (ECTF_CONVBKERR);
3258 		}
3259 
3260 		err = ctf_dwarf_init_die(fd, elf, cup, i, errbuf, errlen);
3261 		if (err != 0)
3262 			goto out;
3263 
3264 		cup->cu_doweaks = ndies > 1 ? B_FALSE : B_TRUE;
3265 	}
3266 
3267 	if (!(flags & CTF_ALLOW_MISSING_DEBUG) &&
3268 	    (err = ctf_dwarf_check_missing(cdies, ndies,
3269 	    elf, errbuf, errlen)) != 0)
3270 		goto out;
3271 
3272 	/*
3273 	 * If we only have one compilation unit, there's no reason to use
3274 	 * multiple threads, even if the user requested them. After all, they
3275 	 * just gave us an upper bound.
3276 	 */
3277 	if (ndies == 1)
3278 		nthrs = 1;
3279 
3280 	if (workq_init(&wqp, nthrs) == -1) {
3281 		err = errno;
3282 		goto out;
3283 	}
3284 
3285 	for (i = 0; i < ndies; i++) {
3286 		cup = &cdies[i];
3287 		ctf_dprintf("adding cu %s: %p, %x %x\n", cup->cu_name,
3288 		    cup->cu_cu, cup->cu_cuoff, cup->cu_maxoff);
3289 		if (workq_add(wqp, cup) == -1) {
3290 			err = errno;
3291 			goto out;
3292 		}
3293 	}
3294 
3295 	ret = workq_work(wqp, ctf_dwarf_convert_one, NULL, &err);
3296 	if (ret == WORKQ_ERROR) {
3297 		err = errno;
3298 		goto out;
3299 	} else if (ret == WORKQ_UERROR) {
3300 		ctf_dprintf("internal convert failed: %s\n",
3301 		    ctf_errmsg(err));
3302 		goto out;
3303 	}
3304 
3305 	ctf_dprintf("Determining next phase: have %d CUs\n", ndies);
3306 	if (ndies != 1) {
3307 		ctf_merge_t *cmp;
3308 
3309 		cmp = ctf_merge_init(fd, &err);
3310 		if (cmp == NULL)
3311 			goto out;
3312 
3313 		ctf_dprintf("setting threads\n");
3314 		if ((err = ctf_merge_set_nthreads(cmp, nthrs)) != 0) {
3315 			ctf_merge_fini(cmp);
3316 			goto out;
3317 		}
3318 
3319 		for (i = 0; i < ndies; i++) {
3320 			cup = &cdies[i];
3321 			if ((err = ctf_merge_add(cmp, cup->cu_ctfp)) != 0) {
3322 				ctf_merge_fini(cmp);
3323 				goto out;
3324 			}
3325 		}
3326 
3327 		ctf_dprintf("performing merge\n");
3328 		err = ctf_merge_merge(cmp, fpp);
3329 		if (err != 0) {
3330 			ctf_dprintf("failed merge!\n");
3331 			*fpp = NULL;
3332 			ctf_merge_fini(cmp);
3333 			goto out;
3334 		}
3335 		ctf_merge_fini(cmp);
3336 		err = 0;
3337 		ctf_dprintf("successfully converted!\n");
3338 	} else {
3339 		err = 0;
3340 		*fpp = cdies->cu_ctfp;
3341 		cdies->cu_ctfp = NULL;
3342 		ctf_dprintf("successfully converted!\n");
3343 	}
3344 
3345 out:
3346 	workq_fini(wqp);
3347 	ctf_dwarf_free_dies(cdies, ndies);
3348 	return (err);
3349 }
3350