xref: /illumos-gate/usr/src/lib/libctf/common/ctf_dwarf.c (revision 3df9f0641f28754051d5e82c6457527cf4af1258)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright 2012 Jason King.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 /*
31  * Copyright 2020 Joyent, Inc.
32  */
33 
34 /*
35  * CTF DWARF conversion theory.
36  *
37  * DWARF data contains a series of compilation units. Each compilation unit
38  * generally refers to an object file or what once was, in the case of linked
39  * binaries and shared objects. Each compilation unit has a series of what DWARF
40  * calls a DIE (Debugging Information Entry). The set of entries that we care
41  * about have type information stored in a series of attributes. Each DIE also
42  * has a tag that identifies the kind of attributes that it has.
43  *
44  * A given DIE may itself have children. For example, a DIE that represents a
45  * structure has children which represent members. Whenever we encounter a DIE
46  * that has children or other values or types associated with it, we recursively
47  * process those children first so that way we can then refer to the generated
48  * CTF type id while processing its parent. This reduces the amount of unknowns
49  * and fixups that we need. It also ensures that we don't accidentally add types
50  * that an overzealous compiler might add to the DWARF data but aren't used by
51  * anything in the system.
52  *
53  * Once we do a conversion, we store a mapping in an AVL tree that goes from the
54  * DWARF's die offset, which is relative to the given compilation unit, to a
55  * ctf_id_t.
56  *
57  * Unfortunately, some compilers actually will emit duplicate entries for a
58  * given type that look similar, but aren't quite. To that end, we go through
59  * and do a variant on a merge once we're done processing a single compilation
60  * unit which deduplicates all of the types that are in the unit.
61  *
62  * Finally, if we encounter an object that has multiple compilation units, then
63  * we'll convert all of the compilation units separately and then do a merge, so
64  * that way we can result in one single ctf_file_t that represents everything
65  * for the object.
66  *
67  * Conversion Steps
68  * ----------------
69  *
70  * Because a given object we've been given to convert may have multiple
71  * compilation units, we break the work into two halves. The first half
72  * processes each compilation unit (potentially in parallel) and then the second
73  * half optionally merges all of the dies in the first half. First, we'll cover
74  * what's involved in converting a single ctf_cu_t's dwarf to CTF. This covers
75  * the work done in ctf_dwarf_convert_one().
76  *
77  * An individual ctf_cu_t, which represents a compilation unit, is converted to
78  * CTF in a series of multiple passes.
79  *
80  * Pass 1: During the first pass we walk all of the top-level dies and if we
81  * find a function, variable, struct, union, enum or typedef, we recursively
82  * transform all of its types. We don't recurse or process everything, because
83  * we don't want to add some of the types that compilers may add which are
84  * effectively unused.
85  *
86  * During pass 1, if we encounter any structures or unions we mark them for
87  * fixing up later. This is necessary because we may not be able to determine
88  * the full size of a structure at the beginning of time. This will happen if
89  * the DWARF attribute DW_AT_byte_size is not present for a member. Because of
90  * this possibility we defer adding members to structures or even converting
91  * them during pass 1 and save that for pass 2. Adding all of the base
92  * structures without any of their members helps deal with any circular
93  * dependencies that we might encounter.
94  *
95  * Pass 2: This pass is used to do the first half of fixing up structures and
96  * unions. Rather than walk the entire type space again, we actually walk the
97  * list of structures and unions that we marked for later fixing up. Here, we
98  * iterate over every structure and add members to the underlying ctf_file_t,
99  * but not to the structs themselves. One might wonder why we don't, and the
100  * main reason is that libctf requires a ctf_update() be done before adding the
101  * members to structures or unions.
102  *
103  * Pass 3: This pass is used to do the second half of fixing up structures and
104  * unions. During this part we always go through and add members to structures
105  * and unions that we added to the container in the previous pass. In addition,
106  * we set the structure and union's actual size, which may have additional
107  * padding added by the compiler, it isn't simply the last offset. DWARF always
108  * guarantees an attribute exists for this. Importantly no ctf_id_t's change
109  * during pass 2.
110  *
111  * Pass 4: The next phase is to add CTF entries for all of the symbols and
112  * variables that are present in this die. During pass 1 we added entries to a
113  * map for each variable and function. During this pass, we iterate over the
114  * symbol table and when we encounter a symbol that we have in our lists of
115  * translated information which matches, we then add it to the ctf_file_t.
116  *
117  * Pass 5: Here we go and look for any weak symbols and functions and see if
118  * they match anything that we recognize. If so, then we add type information
119  * for them at this point based on the matching type.
120  *
121  * Pass 6: This pass is actually a variant on a merge. The traditional merge
122  * process expects there to be no duplicate types. As such, at the end of
123  * conversion, we do a dedup on all of the types in the system. The
124  * deduplication process is described in lib/libctf/common/ctf_merge.c.
125  *
126  * Once pass 6 is done, we've finished processing the individual compilation
127  * unit.
128  *
129  * The following steps reflect the general process of doing a conversion.
130  *
131  * 1) Walk the dwarf section and determine the number of compilation units
132  * 2) Create a ctf_cu_t for each compilation unit
133  * 3) Add all ctf_cu_t's to a workq
134  * 4) Have the workq process each die with ctf_dwarf_convert_one. This itself
135  *    is comprised of several steps, which were already enumerated.
136  * 5) If we have multiple cu's, we do a ctf merge of all the dies. The mechanics
137  *    of the merge are discussed in lib/libctf/common/ctf_merge.c.
138  * 6) Free everything up and return a ctf_file_t to the user. If we only had a
139  *    single compilation unit, then we give that to the user. Otherwise, we
140  *    return the merged ctf_file_t.
141  *
142  * Threading
143  * ---------
144  *
145  * The process has been designed to be amenable to threading. Each compilation
146  * unit has its own type stream, therefore the logical place to divide and
147  * conquer is at the compilation unit. Each ctf_cu_t has been built to be able
148  * to be processed independently of the others. It has its own libdwarf handle,
149  * as a given libdwarf handle may only be used by a single thread at a time.
150  * This allows the various ctf_cu_t's to be processed in parallel by different
151  * threads.
152  *
153  * All of the ctf_cu_t's are loaded into a workq which allows for a number of
154  * threads to be specified and used as a thread pool to process all of the
155  * queued work. We set the number of threads to use in the workq equal to the
156  * number of threads that the user has specified.
157  *
158  * After all of the compilation units have been drained, we use the same number
159  * of threads when performing a merge of multiple compilation units, if they
160  * exist.
161  *
162  * While all of these different parts do support and allow for multiple threads,
163  * it's important that when only a single thread is specified, that it be the
164  * calling thread. This allows the conversion routines to be used in a context
165  * that doesn't allow additional threads, such as rtld.
166  *
167  * Common DWARF Mechanics and Notes
168  * --------------------------------
169  *
170  * At this time, we really only support DWARFv2, though support for DWARFv4 is
171  * mostly there. There is no intent to support DWARFv3.
172  *
173  * Generally types for something are stored in the DW_AT_type attribute. For
174  * example, a function's return type will be stored in the local DW_AT_type
175  * attribute while the arguments will be in child DIEs. There are also various
176  * times when we don't have any DW_AT_type. In that case, the lack of a type
177  * implies, at least for C, that its C type is void. Because DWARF doesn't emit
178  * one, we have a synthetic void type that we create and manipulate instead and
179  * pass it off to consumers on an as-needed basis. If nothing has a void type,
180  * it will not be emitted.
181  *
182  * Architecture Specific Parts
183  * ---------------------------
184  *
185  * The CTF tooling encodes various information about the various architectures
186  * in the system. Importantly, the tool assumes that every architecture has a
187  * data model where long and pointer are the same size. This is currently the
188  * case, as the two data models illumos supports are ILP32 and LP64.
189  *
190  * In addition, we encode the mapping of various floating point sizes to various
191  * types for each architecture. If a new architecture is being added, it should
192  * be added to the list. The general design of the ctf conversion tools is to be
193  * architecture independent. eg. any of the tools here should be able to convert
194  * any architecture's DWARF into ctf; however, this has not been rigorously
195  * tested and more importantly, the ctf routines don't currently write out the
196  * data in an endian-aware form, they only use that of the currently running
197  * library.
198  */
199 
200 #include <libctf_impl.h>
201 #include <sys/avl.h>
202 #include <sys/debug.h>
203 #include <gelf.h>
204 #include <libdwarf.h>
205 #include <dwarf.h>
206 #include <libgen.h>
207 #include <workq.h>
208 #include <errno.h>
209 
210 #define	DWARF_VERSION_TWO	2
211 #define	DWARF_VARARGS_NAME	"..."
212 
213 /*
214  * Dwarf may refer recursively to other types that we've already processed. To
215  * see if we've already converted them, we look them up in an AVL tree that's
216  * sorted by the DWARF id.
217  */
218 typedef struct ctf_dwmap {
219 	avl_node_t	cdm_avl;
220 	Dwarf_Off	cdm_off;
221 	Dwarf_Die	cdm_die;
222 	ctf_id_t	cdm_id;
223 	boolean_t	cdm_fix;
224 } ctf_dwmap_t;
225 
226 typedef struct ctf_dwvar {
227 	ctf_list_t	cdv_list;
228 	char		*cdv_name;
229 	ctf_id_t	cdv_type;
230 	boolean_t	cdv_global;
231 } ctf_dwvar_t;
232 
233 typedef struct ctf_dwfunc {
234 	ctf_list_t	cdf_list;
235 	char		*cdf_name;
236 	ctf_funcinfo_t	cdf_fip;
237 	ctf_id_t	*cdf_argv;
238 	boolean_t	cdf_global;
239 } ctf_dwfunc_t;
240 
241 typedef struct ctf_dwbitf {
242 	ctf_list_t	cdb_list;
243 	ctf_id_t	cdb_base;
244 	uint_t		cdb_nbits;
245 	ctf_id_t	cdb_id;
246 } ctf_dwbitf_t;
247 
248 /*
249  * The ctf_cu_t represents a single top-level DWARF die unit. While generally,
250  * the typical object file has only a single die, if we're asked to convert
251  * something that's been linked from multiple sources, multiple dies will exist.
252  */
253 typedef struct ctf_die {
254 	Elf		*cu_elf;	/* shared libelf handle */
255 	char		*cu_name;	/* basename of the DIE */
256 	ctf_merge_t	*cu_cmh;	/* merge handle */
257 	ctf_list_t	cu_vars;	/* List of variables */
258 	ctf_list_t	cu_funcs;	/* List of functions */
259 	ctf_list_t	cu_bitfields;	/* Bit field members */
260 	Dwarf_Debug	cu_dwarf;	/* libdwarf handle */
261 	Dwarf_Die	cu_cu;		/* libdwarf compilation unit */
262 	Dwarf_Off	cu_cuoff;	/* cu's offset */
263 	Dwarf_Off	cu_maxoff;	/* maximum offset */
264 	ctf_file_t	*cu_ctfp;	/* output CTF file */
265 	avl_tree_t	cu_map;		/* map die offsets to CTF types */
266 	char		*cu_errbuf;	/* error message buffer */
267 	size_t		cu_errlen;	/* error message buffer length */
268 	size_t		cu_ptrsz;	/* object's pointer size */
269 	boolean_t	cu_bigend;	/* is it big endian */
270 	boolean_t	cu_doweaks;	/* should we convert weak symbols? */
271 	uint_t		cu_mach;	/* machine type */
272 	ctf_id_t	cu_voidtid;	/* void pointer */
273 	ctf_id_t	cu_longtid;	/* id for a 'long' */
274 } ctf_cu_t;
275 
276 static int ctf_dwarf_offset(ctf_cu_t *, Dwarf_Die, Dwarf_Off *);
277 static int ctf_dwarf_convert_die(ctf_cu_t *, Dwarf_Die);
278 static int ctf_dwarf_convert_type(ctf_cu_t *, Dwarf_Die, ctf_id_t *, int);
279 
280 static int ctf_dwarf_function_count(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
281     boolean_t);
282 static int ctf_dwarf_convert_fargs(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
283     ctf_id_t *);
284 
285 /*
286  * This is a generic way to set a CTF Conversion backend error depending on what
287  * we were doing. Unless it was one of a specific set of errors that don't
288  * indicate a programming / translation bug, eg. ENOMEM, then we transform it
289  * into a CTF backend error and fill in the error buffer.
290  */
291 static int
292 ctf_dwarf_error(ctf_cu_t *cup, ctf_file_t *cfp, int err, const char *fmt, ...)
293 {
294 	va_list ap;
295 	int ret;
296 	size_t off = 0;
297 	ssize_t rem = cup->cu_errlen;
298 	if (cfp != NULL)
299 		err = ctf_errno(cfp);
300 
301 	if (err == ENOMEM)
302 		return (err);
303 
304 	ret = snprintf(cup->cu_errbuf, rem, "die %s: ", cup->cu_name);
305 	if (ret < 0)
306 		goto err;
307 	off += ret;
308 	rem = MAX(rem - ret, 0);
309 
310 	va_start(ap, fmt);
311 	ret = vsnprintf(cup->cu_errbuf + off, rem, fmt, ap);
312 	va_end(ap);
313 	if (ret < 0)
314 		goto err;
315 
316 	off += ret;
317 	rem = MAX(rem - ret, 0);
318 	if (fmt[strlen(fmt) - 1] != '\n') {
319 		(void) snprintf(cup->cu_errbuf + off, rem,
320 		    ": %s\n", ctf_errmsg(err));
321 	}
322 	va_end(ap);
323 	return (ECTF_CONVBKERR);
324 
325 err:
326 	cup->cu_errbuf[0] = '\0';
327 	return (ECTF_CONVBKERR);
328 }
329 
330 /*
331  * DWARF often opts to put no explicit type to describe a void type. eg. if we
332  * have a reference type whose DW_AT_type member doesn't exist, then we should
333  * instead assume it points to void. Because this isn't represented, we
334  * instead cause it to come into existence.
335  */
336 static ctf_id_t
337 ctf_dwarf_void(ctf_cu_t *cup)
338 {
339 	if (cup->cu_voidtid == CTF_ERR) {
340 		ctf_encoding_t enc = { CTF_INT_SIGNED, 0, 0 };
341 		cup->cu_voidtid = ctf_add_integer(cup->cu_ctfp, CTF_ADD_ROOT,
342 		    "void", &enc);
343 		if (cup->cu_voidtid == CTF_ERR) {
344 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
345 			    "failed to create void type: %s\n",
346 			    ctf_errmsg(ctf_errno(cup->cu_ctfp)));
347 		}
348 	}
349 
350 	return (cup->cu_voidtid);
351 }
352 
353 /*
354  * There are many different forms that an array index may take. However, we just
355  * always force it to be of a type long no matter what. Therefore we use this to
356  * have a single instance of long across everything.
357  */
358 static ctf_id_t
359 ctf_dwarf_long(ctf_cu_t *cup)
360 {
361 	if (cup->cu_longtid == CTF_ERR) {
362 		ctf_encoding_t enc;
363 
364 		enc.cte_format = CTF_INT_SIGNED;
365 		enc.cte_offset = 0;
366 		/* All illumos systems are LP */
367 		enc.cte_bits = cup->cu_ptrsz * 8;
368 		cup->cu_longtid = ctf_add_integer(cup->cu_ctfp, CTF_ADD_NONROOT,
369 		    "long", &enc);
370 		if (cup->cu_longtid == CTF_ERR) {
371 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
372 			    "failed to create long type: %s\n",
373 			    ctf_errmsg(ctf_errno(cup->cu_ctfp)));
374 		}
375 
376 	}
377 
378 	return (cup->cu_longtid);
379 }
380 
381 static int
382 ctf_dwmap_comp(const void *a, const void *b)
383 {
384 	const ctf_dwmap_t *ca = a;
385 	const ctf_dwmap_t *cb = b;
386 
387 	if (ca->cdm_off > cb->cdm_off)
388 		return (1);
389 	if (ca->cdm_off < cb->cdm_off)
390 		return (-1);
391 	return (0);
392 }
393 
394 static int
395 ctf_dwmap_add(ctf_cu_t *cup, ctf_id_t id, Dwarf_Die die, boolean_t fix)
396 {
397 	int ret;
398 	avl_index_t index;
399 	ctf_dwmap_t *dwmap;
400 	Dwarf_Off off;
401 
402 	VERIFY(id > 0 && id < CTF_MAX_TYPE);
403 
404 	if ((ret = ctf_dwarf_offset(cup, die, &off)) != 0)
405 		return (ret);
406 
407 	if ((dwmap = ctf_alloc(sizeof (ctf_dwmap_t))) == NULL)
408 		return (ENOMEM);
409 
410 	dwmap->cdm_die = die;
411 	dwmap->cdm_off = off;
412 	dwmap->cdm_id = id;
413 	dwmap->cdm_fix = fix;
414 
415 	ctf_dprintf("dwmap: %p %" DW_PR_DUx "->%d\n", dwmap, off, id);
416 	VERIFY(avl_find(&cup->cu_map, dwmap, &index) == NULL);
417 	avl_insert(&cup->cu_map, dwmap, index);
418 	return (0);
419 }
420 
421 static int
422 ctf_dwarf_attribute(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
423     Dwarf_Attribute *attrp)
424 {
425 	int ret;
426 	Dwarf_Error derr;
427 
428 	if ((ret = dwarf_attr(die, name, attrp, &derr)) == DW_DLV_OK)
429 		return (0);
430 	if (ret == DW_DLV_NO_ENTRY) {
431 		*attrp = NULL;
432 		return (ENOENT);
433 	}
434 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
435 	    "failed to get attribute for type: %s\n",
436 	    dwarf_errmsg(derr));
437 	return (ECTF_CONVBKERR);
438 }
439 
440 static int
441 ctf_dwarf_ref(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, Dwarf_Off *refp)
442 {
443 	int ret;
444 	Dwarf_Attribute attr;
445 	Dwarf_Error derr;
446 
447 	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
448 		return (ret);
449 
450 	if (dwarf_formref(attr, refp, &derr) == DW_DLV_OK) {
451 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
452 		return (0);
453 	}
454 
455 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
456 	    "failed to get unsigned attribute for type: %s\n",
457 	    dwarf_errmsg(derr));
458 	return (ECTF_CONVBKERR);
459 }
460 
461 static int
462 ctf_dwarf_refdie(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
463     Dwarf_Die *diep)
464 {
465 	int ret;
466 	Dwarf_Off off;
467 	Dwarf_Error derr;
468 
469 	if ((ret = ctf_dwarf_ref(cup, die, name, &off)) != 0)
470 		return (ret);
471 
472 	off += cup->cu_cuoff;
473 	if ((ret = dwarf_offdie(cup->cu_dwarf, off, diep, &derr)) !=
474 	    DW_DLV_OK) {
475 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
476 		    "failed to get die from offset %" DW_PR_DUu ": %s\n",
477 		    off, dwarf_errmsg(derr));
478 		return (ECTF_CONVBKERR);
479 	}
480 
481 	return (0);
482 }
483 
484 static int
485 ctf_dwarf_signed(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
486     Dwarf_Signed *valp)
487 {
488 	int ret;
489 	Dwarf_Attribute attr;
490 	Dwarf_Error derr;
491 
492 	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
493 		return (ret);
494 
495 	if (dwarf_formsdata(attr, valp, &derr) == DW_DLV_OK) {
496 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
497 		return (0);
498 	}
499 
500 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
501 	    "failed to get unsigned attribute for type: %s\n",
502 	    dwarf_errmsg(derr));
503 	return (ECTF_CONVBKERR);
504 }
505 
506 static int
507 ctf_dwarf_unsigned(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
508     Dwarf_Unsigned *valp)
509 {
510 	int ret;
511 	Dwarf_Attribute attr;
512 	Dwarf_Error derr;
513 
514 	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
515 		return (ret);
516 
517 	if (dwarf_formudata(attr, valp, &derr) == DW_DLV_OK) {
518 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
519 		return (0);
520 	}
521 
522 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
523 	    "failed to get unsigned attribute for type: %s\n",
524 	    dwarf_errmsg(derr));
525 	return (ECTF_CONVBKERR);
526 }
527 
528 static int
529 ctf_dwarf_boolean(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
530     Dwarf_Bool *val)
531 {
532 	int ret;
533 	Dwarf_Attribute attr;
534 	Dwarf_Error derr;
535 
536 	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
537 		return (ret);
538 
539 	if (dwarf_formflag(attr, val, &derr) == DW_DLV_OK) {
540 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
541 		return (0);
542 	}
543 
544 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
545 	    "failed to get boolean attribute for type: %s\n",
546 	    dwarf_errmsg(derr));
547 
548 	return (ECTF_CONVBKERR);
549 }
550 
551 static int
552 ctf_dwarf_string(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, char **strp)
553 {
554 	int ret;
555 	char *s;
556 	Dwarf_Attribute attr;
557 	Dwarf_Error derr;
558 
559 	*strp = NULL;
560 	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
561 		return (ret);
562 
563 	if (dwarf_formstring(attr, &s, &derr) == DW_DLV_OK) {
564 		if ((*strp = ctf_strdup(s)) == NULL)
565 			ret = ENOMEM;
566 		else
567 			ret = 0;
568 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
569 		return (ret);
570 	}
571 
572 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
573 	    "failed to get string attribute for type: %s\n",
574 	    dwarf_errmsg(derr));
575 	return (ECTF_CONVBKERR);
576 }
577 
578 static int
579 ctf_dwarf_member_location(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Unsigned *valp)
580 {
581 	int ret;
582 	Dwarf_Error derr;
583 	Dwarf_Attribute attr;
584 	Dwarf_Locdesc *loc;
585 	Dwarf_Signed locnum;
586 
587 	if ((ret = ctf_dwarf_attribute(cup, die, DW_AT_data_member_location,
588 	    &attr)) != 0)
589 		return (ret);
590 
591 	if (dwarf_loclist(attr, &loc, &locnum, &derr) != DW_DLV_OK) {
592 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
593 		    "failed to obtain location list for member offset: %s",
594 		    dwarf_errmsg(derr));
595 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
596 		return (ECTF_CONVBKERR);
597 	}
598 	dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
599 
600 	if (locnum != 1 || loc->ld_s->lr_atom != DW_OP_plus_uconst) {
601 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
602 		    "failed to parse location structure for member");
603 		dwarf_dealloc(cup->cu_dwarf, loc->ld_s, DW_DLA_LOC_BLOCK);
604 		dwarf_dealloc(cup->cu_dwarf, loc, DW_DLA_LOCDESC);
605 		return (ECTF_CONVBKERR);
606 	}
607 
608 	*valp = loc->ld_s->lr_number;
609 
610 	dwarf_dealloc(cup->cu_dwarf, loc->ld_s, DW_DLA_LOC_BLOCK);
611 	dwarf_dealloc(cup->cu_dwarf, loc, DW_DLA_LOCDESC);
612 	return (0);
613 }
614 
615 
616 static int
617 ctf_dwarf_offset(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Off *offsetp)
618 {
619 	Dwarf_Error derr;
620 
621 	if (dwarf_dieoffset(die, offsetp, &derr) == DW_DLV_OK)
622 		return (0);
623 
624 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
625 	    "failed to get die offset: %s\n",
626 	    dwarf_errmsg(derr));
627 	return (ECTF_CONVBKERR);
628 }
629 
630 /* simpler variant for debugging output */
631 static Dwarf_Off
632 ctf_die_offset(Dwarf_Die die)
633 {
634 	Dwarf_Off off = -1;
635 	Dwarf_Error derr;
636 
637 	(void) dwarf_dieoffset(die, &off, &derr);
638 	return (off);
639 }
640 
641 static int
642 ctf_dwarf_tag(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half *tagp)
643 {
644 	Dwarf_Error derr;
645 
646 	if (dwarf_tag(die, tagp, &derr) == DW_DLV_OK)
647 		return (0);
648 
649 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
650 	    "failed to get tag type: %s\n",
651 	    dwarf_errmsg(derr));
652 	return (ECTF_CONVBKERR);
653 }
654 
655 static int
656 ctf_dwarf_sib(ctf_cu_t *cup, Dwarf_Die base, Dwarf_Die *sibp)
657 {
658 	Dwarf_Error derr;
659 	int ret;
660 
661 	*sibp = NULL;
662 	ret = dwarf_siblingof(cup->cu_dwarf, base, sibp, &derr);
663 	if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY)
664 		return (0);
665 
666 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
667 	    "failed to sibling from die: %s\n",
668 	    dwarf_errmsg(derr));
669 	return (ECTF_CONVBKERR);
670 }
671 
672 static int
673 ctf_dwarf_child(ctf_cu_t *cup, Dwarf_Die base, Dwarf_Die *childp)
674 {
675 	Dwarf_Error derr;
676 	int ret;
677 
678 	*childp = NULL;
679 	ret = dwarf_child(base, childp, &derr);
680 	if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY)
681 		return (0);
682 
683 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
684 	    "failed to child from die: %s\n",
685 	    dwarf_errmsg(derr));
686 	return (ECTF_CONVBKERR);
687 }
688 
689 /*
690  * Compilers disagree on what to do to determine if something has global
691  * visiblity. Traditionally gcc has used DW_AT_external to indicate this while
692  * Studio has used DW_AT_visibility. We check DW_AT_visibility first and then
693  * fall back to DW_AT_external. Lack of DW_AT_external implies that it is not.
694  */
695 static int
696 ctf_dwarf_isglobal(ctf_cu_t *cup, Dwarf_Die die, boolean_t *igp)
697 {
698 	int ret;
699 	Dwarf_Signed vis;
700 	Dwarf_Bool ext;
701 
702 	if ((ret = ctf_dwarf_signed(cup, die, DW_AT_visibility, &vis)) == 0) {
703 		*igp = vis == DW_VIS_exported;
704 		return (0);
705 	} else if (ret != ENOENT) {
706 		return (ret);
707 	}
708 
709 	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_external, &ext)) != 0) {
710 		if (ret == ENOENT) {
711 			*igp = B_FALSE;
712 			return (0);
713 		}
714 		return (ret);
715 	}
716 	*igp = ext != 0 ? B_TRUE : B_FALSE;
717 	return (0);
718 }
719 
720 static int
721 ctf_dwarf_die_elfenc(Elf *elf, ctf_cu_t *cup, char *errbuf, size_t errlen)
722 {
723 	GElf_Ehdr ehdr;
724 
725 	if (gelf_getehdr(elf, &ehdr) == NULL) {
726 		(void) snprintf(errbuf, errlen,
727 		    "failed to get ELF header: %s\n",
728 		    elf_errmsg(elf_errno()));
729 		return (ECTF_CONVBKERR);
730 	}
731 
732 	cup->cu_mach = ehdr.e_machine;
733 
734 	if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
735 		cup->cu_ptrsz = 4;
736 		VERIFY(ctf_setmodel(cup->cu_ctfp, CTF_MODEL_ILP32) == 0);
737 	} else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
738 		cup->cu_ptrsz = 8;
739 		VERIFY(ctf_setmodel(cup->cu_ctfp, CTF_MODEL_LP64) == 0);
740 	} else {
741 		(void) snprintf(errbuf, errlen,
742 		    "unknown ELF class %d", ehdr.e_ident[EI_CLASS]);
743 		return (ECTF_CONVBKERR);
744 	}
745 
746 	if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB) {
747 		cup->cu_bigend = B_FALSE;
748 	} else if (ehdr.e_ident[EI_DATA] == ELFDATA2MSB) {
749 		cup->cu_bigend = B_TRUE;
750 	} else {
751 		(void) snprintf(errbuf, errlen,
752 		    "unknown ELF data encoding: %hhu", ehdr.e_ident[EI_DATA]);
753 		return (ECTF_CONVBKERR);
754 	}
755 
756 	return (0);
757 }
758 
759 typedef struct ctf_dwarf_fpent {
760 	size_t	cdfe_size;
761 	uint_t	cdfe_enc[3];
762 } ctf_dwarf_fpent_t;
763 
764 typedef struct ctf_dwarf_fpmap {
765 	uint_t			cdf_mach;
766 	ctf_dwarf_fpent_t	cdf_ents[4];
767 } ctf_dwarf_fpmap_t;
768 
769 static const ctf_dwarf_fpmap_t ctf_dwarf_fpmaps[] = {
770 	{ EM_SPARC, {
771 		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
772 		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
773 		{ 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
774 		{ 0, { 0 } }
775 	} },
776 	{ EM_SPARC32PLUS, {
777 		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
778 		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
779 		{ 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
780 		{ 0, { 0 } }
781 	} },
782 	{ EM_SPARCV9, {
783 		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
784 		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
785 		{ 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
786 		{ 0, { 0 } }
787 	} },
788 	{ EM_386, {
789 		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
790 		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
791 		{ 12, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
792 		{ 0, { 0 } }
793 	} },
794 	{ EM_X86_64, {
795 		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
796 		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
797 		{ 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
798 		{ 0, { 0 } }
799 	} },
800 	{ EM_NONE }
801 };
802 
803 static int
804 ctf_dwarf_float_base(ctf_cu_t *cup, Dwarf_Signed type, ctf_encoding_t *enc)
805 {
806 	const ctf_dwarf_fpmap_t *map = &ctf_dwarf_fpmaps[0];
807 	const ctf_dwarf_fpent_t *ent;
808 	uint_t col = 0, mult = 1;
809 
810 	for (map = &ctf_dwarf_fpmaps[0]; map->cdf_mach != EM_NONE; map++) {
811 		if (map->cdf_mach == cup->cu_mach)
812 			break;
813 	}
814 
815 	if (map->cdf_mach == EM_NONE) {
816 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
817 		    "Unsupported machine type: %d\n", cup->cu_mach);
818 		return (ENOTSUP);
819 	}
820 
821 	if (type == DW_ATE_complex_float) {
822 		mult = 2;
823 		col = 1;
824 	} else if (type == DW_ATE_imaginary_float ||
825 	    type == DW_ATE_SUN_imaginary_float) {
826 		col = 2;
827 	}
828 
829 	ent = &map->cdf_ents[0];
830 	for (ent = &map->cdf_ents[0]; ent->cdfe_size != 0; ent++) {
831 		if (ent->cdfe_size * mult * 8 == enc->cte_bits) {
832 			enc->cte_format = ent->cdfe_enc[col];
833 			return (0);
834 		}
835 	}
836 
837 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
838 	    "failed to find valid fp mapping for encoding %d, size %d bits\n",
839 	    type, enc->cte_bits);
840 	return (EINVAL);
841 }
842 
843 static int
844 ctf_dwarf_dwarf_base(ctf_cu_t *cup, Dwarf_Die die, int *kindp,
845     ctf_encoding_t *enc)
846 {
847 	int ret;
848 	Dwarf_Signed type;
849 
850 	if ((ret = ctf_dwarf_signed(cup, die, DW_AT_encoding, &type)) != 0)
851 		return (ret);
852 
853 	switch (type) {
854 	case DW_ATE_unsigned:
855 	case DW_ATE_address:
856 		*kindp = CTF_K_INTEGER;
857 		enc->cte_format = 0;
858 		break;
859 	case DW_ATE_unsigned_char:
860 		*kindp = CTF_K_INTEGER;
861 		enc->cte_format = CTF_INT_CHAR;
862 		break;
863 	case DW_ATE_signed:
864 		*kindp = CTF_K_INTEGER;
865 		enc->cte_format = CTF_INT_SIGNED;
866 		break;
867 	case DW_ATE_signed_char:
868 		*kindp = CTF_K_INTEGER;
869 		enc->cte_format = CTF_INT_SIGNED | CTF_INT_CHAR;
870 		break;
871 	case DW_ATE_boolean:
872 		*kindp = CTF_K_INTEGER;
873 		enc->cte_format = CTF_INT_SIGNED | CTF_INT_BOOL;
874 		break;
875 	case DW_ATE_float:
876 	case DW_ATE_complex_float:
877 	case DW_ATE_imaginary_float:
878 	case DW_ATE_SUN_imaginary_float:
879 	case DW_ATE_SUN_interval_float:
880 		*kindp = CTF_K_FLOAT;
881 		if ((ret = ctf_dwarf_float_base(cup, type, enc)) != 0)
882 			return (ret);
883 		break;
884 	default:
885 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
886 		    "encountered unknown DWARF encoding: %d", type);
887 		return (ECTF_CONVBKERR);
888 	}
889 
890 	return (0);
891 }
892 
893 /*
894  * Different compilers (at least GCC and Studio) use different names for types.
895  * This parses the types and attempts to unify them. If this fails, we just fall
896  * back to using the DWARF itself.
897  */
898 static int
899 ctf_dwarf_parse_base(const char *name, int *kindp, ctf_encoding_t *enc,
900     char **newnamep)
901 {
902 	char buf[256];
903 	char *base, *c, *last;
904 	int nlong = 0, nshort = 0, nchar = 0, nint = 0;
905 	int sign = 1;
906 
907 	if (strlen(name) + 1 > sizeof (buf))
908 		return (EINVAL);
909 
910 	(void) strlcpy(buf, name, sizeof (buf));
911 	for (c = strtok_r(buf, " ", &last); c != NULL;
912 	    c = strtok_r(NULL, " ", &last)) {
913 		if (strcmp(c, "signed") == 0) {
914 			sign = 1;
915 		} else if (strcmp(c, "unsigned") == 0) {
916 			sign = 0;
917 		} else if (strcmp(c, "long") == 0) {
918 			nlong++;
919 		} else if (strcmp(c, "char") == 0) {
920 			nchar++;
921 		} else if (strcmp(c, "short") == 0) {
922 			nshort++;
923 		} else if (strcmp(c, "int") == 0) {
924 			nint++;
925 		} else {
926 			/*
927 			 * If we don't recognize any of the tokens, we'll tell
928 			 * the caller to fall back to the dwarf-provided
929 			 * encoding information.
930 			 */
931 			return (EINVAL);
932 		}
933 	}
934 
935 	if (nchar > 1 || nshort > 1 || nint > 1 || nlong > 2)
936 		return (EINVAL);
937 
938 	if (nchar > 0) {
939 		if (nlong > 0 || nshort > 0 || nint > 0)
940 			return (EINVAL);
941 		base = "char";
942 	} else if (nshort > 0) {
943 		if (nlong > 0)
944 			return (EINVAL);
945 		base = "short";
946 	} else if (nlong > 0) {
947 		base = "long";
948 	} else {
949 		base = "int";
950 	}
951 
952 	if (nchar > 0)
953 		enc->cte_format = CTF_INT_CHAR;
954 	else
955 		enc->cte_format = 0;
956 
957 	if (sign > 0)
958 		enc->cte_format |= CTF_INT_SIGNED;
959 
960 	(void) snprintf(buf, sizeof (buf), "%s%s%s",
961 	    (sign ? "" : "unsigned "),
962 	    (nlong > 1 ? "long " : ""),
963 	    base);
964 
965 	*newnamep = ctf_strdup(buf);
966 	if (*newnamep == NULL)
967 		return (ENOMEM);
968 	*kindp = CTF_K_INTEGER;
969 	return (0);
970 }
971 
972 static int
973 ctf_dwarf_create_base(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot,
974     Dwarf_Off off)
975 {
976 	int ret;
977 	char *name, *nname;
978 	Dwarf_Unsigned sz;
979 	int kind;
980 	ctf_encoding_t enc;
981 	ctf_id_t id;
982 
983 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0)
984 		return (ret);
985 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &sz)) != 0) {
986 		goto out;
987 	}
988 	ctf_dprintf("Creating base type %s from off %llu, size: %d\n", name,
989 	    off, sz);
990 
991 	bzero(&enc, sizeof (ctf_encoding_t));
992 	enc.cte_bits = sz * 8;
993 	if ((ret = ctf_dwarf_parse_base(name, &kind, &enc, &nname)) == 0) {
994 		ctf_free(name, strlen(name) + 1);
995 		name = nname;
996 	} else {
997 		if (ret != EINVAL)
998 			return (ret);
999 		ctf_dprintf("falling back to dwarf for base type %s\n", name);
1000 		if ((ret = ctf_dwarf_dwarf_base(cup, die, &kind, &enc)) != 0)
1001 			return (ret);
1002 	}
1003 
1004 	id = ctf_add_encoded(cup->cu_ctfp, isroot, name, &enc, kind);
1005 	if (id == CTF_ERR) {
1006 		ret = ctf_errno(cup->cu_ctfp);
1007 	} else {
1008 		*idp = id;
1009 		ret = ctf_dwmap_add(cup, id, die, B_FALSE);
1010 	}
1011 out:
1012 	ctf_free(name, strlen(name) + 1);
1013 	return (ret);
1014 }
1015 
1016 /*
1017  * Getting a member's offset is a surprisingly intricate dance. It works as
1018  * follows:
1019  *
1020  * 1) If we're in DWARFv4, then we either have a DW_AT_data_bit_offset or we
1021  * have a DW_AT_data_member_location. We won't have both. Thus we check first
1022  * for DW_AT_data_bit_offset, and if it exists, we're set.
1023  *
1024  * Next, if we have a bitfield and we don't have a DW_AT_data_bit_offset, then
1025  * we have to grab the data location and use the following dance:
1026  *
1027  * 2) Gather the set of DW_AT_byte_size, DW_AT_bit_offset, and DW_AT_bit_size.
1028  * Of course, the DW_AT_byte_size may be omitted, even though it isn't always.
1029  * When it's been omitted, we then have to say that the size is that of the
1030  * underlying type, which forces that to be after a ctf_update(). Here, we have
1031  * to do different things based on whether or not we're using big endian or
1032  * little endian to obtain the proper offset.
1033  */
1034 static int
1035 ctf_dwarf_member_offset(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t mid,
1036     ulong_t *offp)
1037 {
1038 	int ret;
1039 	Dwarf_Unsigned loc, bitsz, bytesz;
1040 	Dwarf_Signed bitoff;
1041 	size_t off;
1042 	ssize_t tsz;
1043 
1044 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_data_bit_offset,
1045 	    &loc)) == 0) {
1046 		*offp = loc;
1047 		return (0);
1048 	} else if (ret != ENOENT) {
1049 		return (ret);
1050 	}
1051 
1052 	if ((ret = ctf_dwarf_member_location(cup, die, &loc)) != 0)
1053 		return (ret);
1054 	off = loc * 8;
1055 
1056 	if ((ret = ctf_dwarf_signed(cup, die, DW_AT_bit_offset,
1057 	    &bitoff)) != 0) {
1058 		if (ret != ENOENT)
1059 			return (ret);
1060 		*offp = off;
1061 		return (0);
1062 	}
1063 
1064 	/* At this point we have to have DW_AT_bit_size */
1065 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_bit_size, &bitsz)) != 0)
1066 		return (ret);
1067 
1068 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size,
1069 	    &bytesz)) != 0) {
1070 		if (ret != ENOENT)
1071 			return (ret);
1072 		if ((tsz = ctf_type_size(cup->cu_ctfp, mid)) == CTF_ERR) {
1073 			int e = ctf_errno(cup->cu_ctfp);
1074 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1075 			    "failed to get type size: %s", ctf_errmsg(e));
1076 			return (ECTF_CONVBKERR);
1077 		}
1078 	} else {
1079 		tsz = bytesz;
1080 	}
1081 	tsz *= 8;
1082 	if (cup->cu_bigend == B_TRUE) {
1083 		*offp = off + bitoff;
1084 	} else {
1085 		*offp = off + tsz - bitoff - bitsz;
1086 	}
1087 
1088 	return (0);
1089 }
1090 
1091 /*
1092  * We need to determine if the member in question is a bitfield. If it is, then
1093  * we need to go through and create a new type that's based on the actual base
1094  * type, but has a different size. We also rename the type as a result to help
1095  * deal with future collisions.
1096  *
1097  * Here we need to look and see if we have a DW_AT_bit_size value. If we have a
1098  * bit size member and it does not equal the byte size member, then we need to
1099  * create a bitfield type based on this.
1100  *
1101  * Note: When we support DWARFv4, there may be a chance that we need to also
1102  * search for the DW_AT_byte_size if we don't have a DW_AT_bit_size member.
1103  */
1104 static int
1105 ctf_dwarf_member_bitfield(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp)
1106 {
1107 	int ret;
1108 	Dwarf_Unsigned bitsz;
1109 	ctf_encoding_t e;
1110 	ctf_dwbitf_t *cdb;
1111 	ctf_dtdef_t *dtd;
1112 	ctf_id_t base = *idp;
1113 	int kind;
1114 
1115 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_bit_size, &bitsz)) != 0) {
1116 		if (ret == ENOENT)
1117 			return (0);
1118 		return (ret);
1119 	}
1120 
1121 	ctf_dprintf("Trying to deal with bitfields on %d:%d\n", base, bitsz);
1122 	/*
1123 	 * Given that we now have a bitsize, time to go do something about it.
1124 	 * We're going to create a new type based on the current one, but first
1125 	 * we need to find the base type. This means we need to traverse any
1126 	 * typedef's, consts, and volatiles until we get to what should be
1127 	 * something of type integer or enumeration.
1128 	 */
1129 	VERIFY(bitsz < UINT32_MAX);
1130 	dtd = ctf_dtd_lookup(cup->cu_ctfp, base);
1131 	VERIFY(dtd != NULL);
1132 	kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info);
1133 	while (kind == CTF_K_TYPEDEF || kind == CTF_K_CONST ||
1134 	    kind == CTF_K_VOLATILE) {
1135 		dtd = ctf_dtd_lookup(cup->cu_ctfp, dtd->dtd_data.ctt_type);
1136 		VERIFY(dtd != NULL);
1137 		kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info);
1138 	}
1139 	ctf_dprintf("got kind %d\n", kind);
1140 	VERIFY(kind == CTF_K_INTEGER || kind == CTF_K_ENUM);
1141 
1142 	/*
1143 	 * As surprising as it may be, it is strictly possible to create a
1144 	 * bitfield that is based on an enum. Of course, the C standard leaves
1145 	 * enums sizing as an ABI concern more or less. To that effect, today on
1146 	 * all illumos platforms the size of an enum is generally that of an
1147 	 * int as our supported data models and ABIs all agree on that. So what
1148 	 * we'll do is fake up a CTF encoding here to use. In this case, we'll
1149 	 * treat it as an unsigned value of whatever size the underlying enum
1150 	 * currently has (which is in the ctt_size member of its dynamic type
1151 	 * data).
1152 	 */
1153 	if (kind == CTF_K_INTEGER) {
1154 		e = dtd->dtd_u.dtu_enc;
1155 	} else {
1156 		bzero(&e, sizeof (ctf_encoding_t));
1157 		e.cte_bits = dtd->dtd_data.ctt_size * NBBY;
1158 	}
1159 
1160 	for (cdb = ctf_list_next(&cup->cu_bitfields); cdb != NULL;
1161 	    cdb = ctf_list_next(cdb)) {
1162 		if (cdb->cdb_base == base && cdb->cdb_nbits == bitsz)
1163 			break;
1164 	}
1165 
1166 	/*
1167 	 * Create a new type if none exists. We name all types in a way that is
1168 	 * guaranteed not to conflict with the corresponding C type. We do this
1169 	 * by using the ':' operator.
1170 	 */
1171 	if (cdb == NULL) {
1172 		size_t namesz;
1173 		char *name;
1174 
1175 		e.cte_bits = bitsz;
1176 		namesz = snprintf(NULL, 0, "%s:%d", dtd->dtd_name,
1177 		    (uint32_t)bitsz);
1178 		name = ctf_alloc(namesz + 1);
1179 		if (name == NULL)
1180 			return (ENOMEM);
1181 		cdb = ctf_alloc(sizeof (ctf_dwbitf_t));
1182 		if (cdb == NULL) {
1183 			ctf_free(name, namesz + 1);
1184 			return (ENOMEM);
1185 		}
1186 		(void) snprintf(name, namesz + 1, "%s:%d", dtd->dtd_name,
1187 		    (uint32_t)bitsz);
1188 
1189 		cdb->cdb_base = base;
1190 		cdb->cdb_nbits = bitsz;
1191 		cdb->cdb_id = ctf_add_integer(cup->cu_ctfp, CTF_ADD_NONROOT,
1192 		    name, &e);
1193 		if (cdb->cdb_id == CTF_ERR) {
1194 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1195 			    "failed to get add bitfield type %s: %s", name,
1196 			    ctf_errmsg(ctf_errno(cup->cu_ctfp)));
1197 			ctf_free(name, namesz + 1);
1198 			ctf_free(cdb, sizeof (ctf_dwbitf_t));
1199 			return (ECTF_CONVBKERR);
1200 		}
1201 		ctf_free(name, namesz + 1);
1202 		ctf_list_append(&cup->cu_bitfields, cdb);
1203 	}
1204 
1205 	*idp = cdb->cdb_id;
1206 
1207 	return (0);
1208 }
1209 
1210 static int
1211 ctf_dwarf_fixup_sou(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t base, boolean_t add)
1212 {
1213 	int ret, kind;
1214 	Dwarf_Die child, memb;
1215 	Dwarf_Unsigned size;
1216 
1217 	kind = ctf_type_kind(cup->cu_ctfp, base);
1218 	VERIFY(kind != CTF_ERR);
1219 	VERIFY(kind == CTF_K_STRUCT || kind == CTF_K_UNION);
1220 
1221 	/*
1222 	 * Members are in children. However, gcc also allows empty ones.
1223 	 */
1224 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
1225 		return (ret);
1226 	if (child == NULL)
1227 		return (0);
1228 
1229 	memb = child;
1230 	while (memb != NULL) {
1231 		Dwarf_Die sib, tdie;
1232 		Dwarf_Half tag;
1233 		ctf_id_t mid;
1234 		char *mname;
1235 		ulong_t memboff = 0;
1236 
1237 		if ((ret = ctf_dwarf_tag(cup, memb, &tag)) != 0)
1238 			return (ret);
1239 
1240 		if (tag != DW_TAG_member)
1241 			goto next;
1242 
1243 		if ((ret = ctf_dwarf_refdie(cup, memb, DW_AT_type, &tdie)) != 0)
1244 			return (ret);
1245 
1246 		if ((ret = ctf_dwarf_convert_type(cup, tdie, &mid,
1247 		    CTF_ADD_NONROOT)) != 0)
1248 			return (ret);
1249 		ctf_dprintf("Got back type id: %d\n", mid);
1250 
1251 		/*
1252 		 * If we're not adding a member, just go ahead and return.
1253 		 */
1254 		if (add == B_FALSE) {
1255 			if ((ret = ctf_dwarf_member_bitfield(cup, memb,
1256 			    &mid)) != 0)
1257 				return (ret);
1258 			goto next;
1259 		}
1260 
1261 		if ((ret = ctf_dwarf_string(cup, memb, DW_AT_name,
1262 		    &mname)) != 0 && ret != ENOENT)
1263 			return (ret);
1264 		if (ret == ENOENT)
1265 			mname = NULL;
1266 
1267 		if (kind == CTF_K_UNION) {
1268 			memboff = 0;
1269 		} else if ((ret = ctf_dwarf_member_offset(cup, memb, mid,
1270 		    &memboff)) != 0) {
1271 			if (mname != NULL)
1272 				ctf_free(mname, strlen(mname) + 1);
1273 			return (ret);
1274 		}
1275 
1276 		if ((ret = ctf_dwarf_member_bitfield(cup, memb, &mid)) != 0)
1277 			return (ret);
1278 
1279 		ret = ctf_add_member(cup->cu_ctfp, base, mname, mid, memboff);
1280 		if (ret == CTF_ERR) {
1281 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1282 			    "failed to add member %s: %s",
1283 			    mname, ctf_errmsg(ctf_errno(cup->cu_ctfp)));
1284 			if (mname != NULL)
1285 				ctf_free(mname, strlen(mname) + 1);
1286 			return (ECTF_CONVBKERR);
1287 		}
1288 
1289 		if (mname != NULL)
1290 			ctf_free(mname, strlen(mname) + 1);
1291 
1292 next:
1293 		if ((ret = ctf_dwarf_sib(cup, memb, &sib)) != 0)
1294 			return (ret);
1295 		memb = sib;
1296 	}
1297 
1298 	/*
1299 	 * If we're not adding members, then we don't know the final size of the
1300 	 * structure, so end here.
1301 	 */
1302 	if (add == B_FALSE)
1303 		return (0);
1304 
1305 	/* Finally set the size of the structure to the actual byte size */
1306 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &size)) != 0)
1307 		return (ret);
1308 	if ((ctf_set_size(cup->cu_ctfp, base, size)) == CTF_ERR) {
1309 		int e = ctf_errno(cup->cu_ctfp);
1310 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1311 		    "failed to set type size for %d to 0x%x: %s", base,
1312 		    (uint32_t)size, ctf_errmsg(e));
1313 		return (ECTF_CONVBKERR);
1314 	}
1315 
1316 	return (0);
1317 }
1318 
1319 static int
1320 ctf_dwarf_create_sou(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp,
1321     int kind, int isroot)
1322 {
1323 	int ret;
1324 	char *name;
1325 	ctf_id_t base;
1326 	Dwarf_Die child;
1327 	Dwarf_Bool decl;
1328 
1329 	/*
1330 	 * Deal with the terribly annoying case of anonymous structs and unions.
1331 	 * If they don't have a name, set the name to the empty string.
1332 	 */
1333 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
1334 	    ret != ENOENT)
1335 		return (ret);
1336 	if (ret == ENOENT)
1337 		name = NULL;
1338 
1339 	/*
1340 	 * We need to check if we just have a declaration here. If we do, then
1341 	 * instead of creating an actual structure or union, we're just going to
1342 	 * go ahead and create a forward. During a dedup or merge, the forward
1343 	 * will be replaced with the real thing.
1344 	 */
1345 	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration,
1346 	    &decl)) != 0) {
1347 		if (ret != ENOENT)
1348 			return (ret);
1349 		decl = 0;
1350 	}
1351 
1352 	if (decl != 0) {
1353 		base = ctf_add_forward(cup->cu_ctfp, isroot, name, kind);
1354 	} else if (kind == CTF_K_STRUCT) {
1355 		base = ctf_add_struct(cup->cu_ctfp, isroot, name);
1356 	} else {
1357 		base = ctf_add_union(cup->cu_ctfp, isroot, name);
1358 	}
1359 	ctf_dprintf("added sou %s (%d) (%d)\n", name, kind, base);
1360 	if (name != NULL)
1361 		ctf_free(name, strlen(name) + 1);
1362 	if (base == CTF_ERR)
1363 		return (ctf_errno(cup->cu_ctfp));
1364 	*idp = base;
1365 
1366 	/*
1367 	 * If it's just a declaration, we're not going to mark it for fix up or
1368 	 * do anything else.
1369 	 */
1370 	if (decl == B_TRUE)
1371 		return (ctf_dwmap_add(cup, base, die, B_FALSE));
1372 	if ((ret = ctf_dwmap_add(cup, base, die, B_TRUE)) != 0)
1373 		return (ret);
1374 
1375 	/*
1376 	 * The children of a structure or union are generally members. However,
1377 	 * some compilers actually insert structs and unions there and not as a
1378 	 * top-level die. Therefore, to make sure we honor our pass 1 contract
1379 	 * of having all the base types, but not members, we need to walk this
1380 	 * for instances of a DW_TAG_union_type.
1381 	 */
1382 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
1383 		return (ret);
1384 
1385 	while (child != NULL) {
1386 		Dwarf_Half tag;
1387 		Dwarf_Die sib;
1388 
1389 		if ((ret = ctf_dwarf_tag(cup, child, &tag)) != 0)
1390 			return (ret);
1391 
1392 		switch (tag) {
1393 		case DW_TAG_union_type:
1394 		case DW_TAG_structure_type:
1395 			ret = ctf_dwarf_convert_type(cup, child, NULL,
1396 			    CTF_ADD_NONROOT);
1397 			if (ret != 0) {
1398 				return (ret);
1399 			}
1400 			break;
1401 		default:
1402 			break;
1403 		}
1404 
1405 		if ((ret = ctf_dwarf_sib(cup, child, &sib)) != 0)
1406 			return (ret);
1407 		child = sib;
1408 	}
1409 
1410 	return (0);
1411 }
1412 
1413 static int
1414 ctf_dwarf_create_array_range(ctf_cu_t *cup, Dwarf_Die range, ctf_id_t *idp,
1415     ctf_id_t base, int isroot)
1416 {
1417 	int ret;
1418 	Dwarf_Die sib;
1419 	Dwarf_Unsigned val;
1420 	Dwarf_Signed sval;
1421 	ctf_arinfo_t ar;
1422 
1423 	ctf_dprintf("creating array range\n");
1424 
1425 	if ((ret = ctf_dwarf_sib(cup, range, &sib)) != 0)
1426 		return (ret);
1427 	if (sib != NULL) {
1428 		ctf_id_t id;
1429 		if ((ret = ctf_dwarf_create_array_range(cup, sib, &id,
1430 		    base, CTF_ADD_NONROOT)) != 0)
1431 			return (ret);
1432 		ar.ctr_contents = id;
1433 	} else {
1434 		ar.ctr_contents = base;
1435 	}
1436 
1437 	if ((ar.ctr_index = ctf_dwarf_long(cup)) == CTF_ERR)
1438 		return (ctf_errno(cup->cu_ctfp));
1439 
1440 	/*
1441 	 * Array bounds can be signed or unsigned, but there are several kinds
1442 	 * of signless forms (data1, data2, etc) that take their sign from the
1443 	 * routine that is trying to interpret them.  That is, data1 can be
1444 	 * either signed or unsigned, depending on whether you use the signed or
1445 	 * unsigned accessor function.  GCC will use the signless forms to store
1446 	 * unsigned values which have their high bit set, so we need to try to
1447 	 * read them first as unsigned to get positive values.  We could also
1448 	 * try signed first, falling back to unsigned if we got a negative
1449 	 * value.
1450 	 */
1451 	if ((ret = ctf_dwarf_unsigned(cup, range, DW_AT_upper_bound,
1452 	    &val)) == 0) {
1453 		ar.ctr_nelems = val + 1;
1454 	} else if (ret != ENOENT) {
1455 		return (ret);
1456 	} else if ((ret = ctf_dwarf_signed(cup, range, DW_AT_upper_bound,
1457 	    &sval)) == 0) {
1458 		ar.ctr_nelems = sval + 1;
1459 	} else if (ret != ENOENT) {
1460 		return (ret);
1461 	} else {
1462 		ar.ctr_nelems = 0;
1463 	}
1464 
1465 	if ((*idp = ctf_add_array(cup->cu_ctfp, isroot, &ar)) == CTF_ERR)
1466 		return (ctf_errno(cup->cu_ctfp));
1467 
1468 	return (0);
1469 }
1470 
1471 /*
1472  * Try and create an array type. First, the kind of the array is specified in
1473  * the DW_AT_type entry. Next, the number of entries is stored in a more
1474  * complicated form, we should have a child that has the DW_TAG_subrange type.
1475  */
1476 static int
1477 ctf_dwarf_create_array(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot)
1478 {
1479 	int ret;
1480 	Dwarf_Die tdie, rdie;
1481 	ctf_id_t tid;
1482 	Dwarf_Half rtag;
1483 
1484 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0)
1485 		return (ret);
1486 	if ((ret = ctf_dwarf_convert_type(cup, tdie, &tid,
1487 	    CTF_ADD_NONROOT)) != 0)
1488 		return (ret);
1489 
1490 	if ((ret = ctf_dwarf_child(cup, die, &rdie)) != 0)
1491 		return (ret);
1492 	if ((ret = ctf_dwarf_tag(cup, rdie, &rtag)) != 0)
1493 		return (ret);
1494 	if (rtag != DW_TAG_subrange_type) {
1495 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1496 		    "encountered array without DW_TAG_subrange_type child\n");
1497 		return (ECTF_CONVBKERR);
1498 	}
1499 
1500 	/*
1501 	 * The compiler may opt to describe a multi-dimensional array as one
1502 	 * giant array or it may opt to instead encode it as a series of
1503 	 * subranges. If it's the latter, then for each subrange we introduce a
1504 	 * type. We can always use the base type.
1505 	 */
1506 	if ((ret = ctf_dwarf_create_array_range(cup, rdie, idp, tid,
1507 	    isroot)) != 0)
1508 		return (ret);
1509 	ctf_dprintf("Got back id %d\n", *idp);
1510 	return (ctf_dwmap_add(cup, *idp, die, B_FALSE));
1511 }
1512 
1513 /*
1514  * Given "const int const_array3[11]", GCC7 at least will create a DIE tree of
1515  * DW_TAG_const_type:DW_TAG_array_type:DW_Tag_const_type:<member_type>.
1516  *
1517  * Given C's syntax, this renders out as "const const int const_array3[11]".  To
1518  * get closer to round-tripping (and make the unit tests work), we'll peek for
1519  * this case, and avoid adding the extraneous qualifier if we see that the
1520  * underlying array referent already has the same qualifier.
1521  *
1522  * This is unfortunately less trivial than it could be: this issue applies to
1523  * qualifier sets like "const volatile", as well as multi-dimensional arrays, so
1524  * we need to descend down those.
1525  *
1526  * Returns CTF_ERR on error, or a boolean value otherwise.
1527  */
1528 static int
1529 needed_array_qualifier(ctf_cu_t *cup, int kind, ctf_id_t ref_id)
1530 {
1531 	const ctf_type_t *t;
1532 	ctf_arinfo_t arinfo;
1533 	int akind;
1534 
1535 	if (kind != CTF_K_CONST && kind != CTF_K_VOLATILE &&
1536 	    kind != CTF_K_RESTRICT)
1537 		return (1);
1538 
1539 	if ((t = ctf_dyn_lookup_by_id(cup->cu_ctfp, ref_id)) == NULL)
1540 		return (CTF_ERR);
1541 
1542 	if (LCTF_INFO_KIND(cup->cu_ctfp, t->ctt_info) != CTF_K_ARRAY)
1543 		return (1);
1544 
1545 	if (ctf_dyn_array_info(cup->cu_ctfp, ref_id, &arinfo) != 0)
1546 		return (CTF_ERR);
1547 
1548 	ctf_id_t id = arinfo.ctr_contents;
1549 
1550 	for (;;) {
1551 		if ((t = ctf_dyn_lookup_by_id(cup->cu_ctfp, id)) == NULL)
1552 			return (CTF_ERR);
1553 
1554 		akind = LCTF_INFO_KIND(cup->cu_ctfp, t->ctt_info);
1555 
1556 		if (akind == kind)
1557 			break;
1558 
1559 		if (akind == CTF_K_ARRAY) {
1560 			if (ctf_dyn_array_info(cup->cu_ctfp,
1561 			    id, &arinfo) != 0)
1562 				return (CTF_ERR);
1563 			id = arinfo.ctr_contents;
1564 			continue;
1565 		}
1566 
1567 		if (akind != CTF_K_CONST && akind != CTF_K_VOLATILE &&
1568 		    akind != CTF_K_RESTRICT)
1569 			break;
1570 
1571 		id = t->ctt_type;
1572 	}
1573 
1574 	if (kind == akind) {
1575 		ctf_dprintf("ignoring extraneous %s qualifier for array %d\n",
1576 		    ctf_kind_name(cup->cu_ctfp, kind), ref_id);
1577 	}
1578 
1579 	return (kind != akind);
1580 }
1581 
1582 static int
1583 ctf_dwarf_create_reference(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp,
1584     int kind, int isroot)
1585 {
1586 	int ret;
1587 	ctf_id_t id;
1588 	Dwarf_Die tdie;
1589 	char *name;
1590 	size_t namelen;
1591 
1592 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
1593 	    ret != ENOENT)
1594 		return (ret);
1595 	if (ret == ENOENT) {
1596 		name = NULL;
1597 		namelen = 0;
1598 	} else {
1599 		namelen = strlen(name);
1600 	}
1601 
1602 	ctf_dprintf("reference kind %d %s\n", kind, name != NULL ? name : "<>");
1603 
1604 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0) {
1605 		if (ret != ENOENT) {
1606 			ctf_free(name, namelen);
1607 			return (ret);
1608 		}
1609 		if ((id = ctf_dwarf_void(cup)) == CTF_ERR) {
1610 			ctf_free(name, namelen);
1611 			return (ctf_errno(cup->cu_ctfp));
1612 		}
1613 	} else {
1614 		if ((ret = ctf_dwarf_convert_type(cup, tdie, &id,
1615 		    CTF_ADD_NONROOT)) != 0) {
1616 			ctf_free(name, namelen);
1617 			return (ret);
1618 		}
1619 	}
1620 
1621 	if ((ret = needed_array_qualifier(cup, kind, id)) <= 0) {
1622 		if (ret != 0) {
1623 			ret = (ctf_errno(cup->cu_ctfp));
1624 		} else {
1625 			*idp = id;
1626 		}
1627 
1628 		ctf_free(name, namelen);
1629 		return (ret);
1630 	}
1631 
1632 	if ((*idp = ctf_add_reftype(cup->cu_ctfp, isroot, name, id, kind)) ==
1633 	    CTF_ERR) {
1634 		ctf_free(name, namelen);
1635 		return (ctf_errno(cup->cu_ctfp));
1636 	}
1637 
1638 	ctf_free(name, namelen);
1639 	return (ctf_dwmap_add(cup, *idp, die, B_FALSE));
1640 }
1641 
1642 /*
1643  * Get the size of the type of a particular die. Note that this is a simple
1644  * version that doesn't attempt to traverse further than expecting a single
1645  * sized type reference (so no qualifiers etc.). Nor does it attempt to do as
1646  * much as ctf_type_size() - which we cannot use here as that doesn't look up
1647  * dynamic types, and we don't yet want to do a ctf_update().
1648  */
1649 static int
1650 ctf_dwarf_get_type_size(ctf_cu_t *cup, Dwarf_Die die, size_t *sizep)
1651 {
1652 	const ctf_type_t *t;
1653 	Dwarf_Die tdie;
1654 	ctf_id_t tid;
1655 	int ret;
1656 
1657 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0)
1658 		return (ret);
1659 
1660 	if ((ret = ctf_dwarf_convert_type(cup, tdie, &tid,
1661 	    CTF_ADD_NONROOT)) != 0)
1662 		return (ret);
1663 
1664 	if ((t = ctf_dyn_lookup_by_id(cup->cu_ctfp, tid)) == NULL)
1665 		return (ENOENT);
1666 
1667 	*sizep = ctf_get_ctt_size(cup->cu_ctfp, t, NULL, NULL);
1668 	return (0);
1669 }
1670 
1671 static int
1672 ctf_dwarf_create_enum(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot)
1673 {
1674 	size_t size = 0;
1675 	Dwarf_Die child;
1676 	ctf_id_t id;
1677 	char *name;
1678 	int ret;
1679 
1680 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
1681 	    ret != ENOENT)
1682 		return (ret);
1683 	if (ret == ENOENT)
1684 		name = NULL;
1685 
1686 	(void) ctf_dwarf_get_type_size(cup, die, &size);
1687 
1688 	id = ctf_add_enum(cup->cu_ctfp, isroot, name, size);
1689 	ctf_dprintf("added enum %s (%d)\n", name, id);
1690 	if (name != NULL)
1691 		ctf_free(name, strlen(name) + 1);
1692 	if (id == CTF_ERR)
1693 		return (ctf_errno(cup->cu_ctfp));
1694 	*idp = id;
1695 	if ((ret = ctf_dwmap_add(cup, id, die, B_FALSE)) != 0)
1696 		return (ret);
1697 
1698 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) {
1699 		if (ret == ENOENT)
1700 			ret = 0;
1701 		return (ret);
1702 	}
1703 
1704 	while (child != NULL) {
1705 		Dwarf_Half tag;
1706 		Dwarf_Signed sval;
1707 		Dwarf_Unsigned uval;
1708 		Dwarf_Die arg = child;
1709 		int eval;
1710 
1711 		if ((ret = ctf_dwarf_sib(cup, arg, &child)) != 0)
1712 			return (ret);
1713 
1714 		if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0)
1715 			return (ret);
1716 
1717 		if (tag != DW_TAG_enumerator) {
1718 			if ((ret = ctf_dwarf_convert_type(cup, arg, NULL,
1719 			    CTF_ADD_NONROOT)) != 0)
1720 				return (ret);
1721 			continue;
1722 		}
1723 
1724 		/*
1725 		 * DWARF v4 section 5.7 tells us we'll always have names.
1726 		 */
1727 		if ((ret = ctf_dwarf_string(cup, arg, DW_AT_name, &name)) != 0)
1728 			return (ret);
1729 
1730 		/*
1731 		 * We have to be careful here: newer GCCs generate DWARF where
1732 		 * an unsigned value will happily pass ctf_dwarf_signed().
1733 		 * Since negative values will fail ctf_dwarf_unsigned(), we try
1734 		 * that first to make sure we get the right value.
1735 		 */
1736 		if ((ret = ctf_dwarf_unsigned(cup, arg, DW_AT_const_value,
1737 		    &uval)) == 0) {
1738 			eval = (int)uval;
1739 		} else if ((ret = ctf_dwarf_signed(cup, arg, DW_AT_const_value,
1740 		    &sval)) == 0) {
1741 			eval = sval;
1742 		}
1743 
1744 		if (ret != 0) {
1745 			if (ret != ENOENT)
1746 				return (ret);
1747 
1748 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1749 			    "encountered enumeration without constant value\n");
1750 			return (ECTF_CONVBKERR);
1751 		}
1752 
1753 		ret = ctf_add_enumerator(cup->cu_ctfp, id, name, eval);
1754 		if (ret == CTF_ERR) {
1755 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1756 			    "failed to add enumarator %s (%d) to %d\n",
1757 			    name, eval, id);
1758 			ctf_free(name, strlen(name) + 1);
1759 			return (ctf_errno(cup->cu_ctfp));
1760 		}
1761 		ctf_free(name, strlen(name) + 1);
1762 	}
1763 
1764 	return (0);
1765 }
1766 
1767 /*
1768  * For a function pointer, walk over and process all of its children, unless we
1769  * encounter one that's just a declaration. In which case, we error on it.
1770  */
1771 static int
1772 ctf_dwarf_create_fptr(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot)
1773 {
1774 	int ret;
1775 	Dwarf_Bool b;
1776 	ctf_funcinfo_t fi;
1777 	Dwarf_Die retdie;
1778 	ctf_id_t *argv = NULL;
1779 
1780 	bzero(&fi, sizeof (ctf_funcinfo_t));
1781 
1782 	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) != 0) {
1783 		if (ret != ENOENT)
1784 			return (ret);
1785 	} else {
1786 		if (b != 0)
1787 			return (EPROTOTYPE);
1788 	}
1789 
1790 	/*
1791 	 * Return type is in DW_AT_type, if none, it returns void.
1792 	 */
1793 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &retdie)) != 0) {
1794 		if (ret != ENOENT)
1795 			return (ret);
1796 		if ((fi.ctc_return = ctf_dwarf_void(cup)) == CTF_ERR)
1797 			return (ctf_errno(cup->cu_ctfp));
1798 	} else {
1799 		if ((ret = ctf_dwarf_convert_type(cup, retdie, &fi.ctc_return,
1800 		    CTF_ADD_NONROOT)) != 0)
1801 			return (ret);
1802 	}
1803 
1804 	if ((ret = ctf_dwarf_function_count(cup, die, &fi, B_TRUE)) != 0) {
1805 		return (ret);
1806 	}
1807 
1808 	if (fi.ctc_argc != 0) {
1809 		argv = ctf_alloc(sizeof (ctf_id_t) * fi.ctc_argc);
1810 		if (argv == NULL)
1811 			return (ENOMEM);
1812 
1813 		if ((ret = ctf_dwarf_convert_fargs(cup, die, &fi, argv)) != 0) {
1814 			ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1815 			return (ret);
1816 		}
1817 	}
1818 
1819 	if ((*idp = ctf_add_funcptr(cup->cu_ctfp, isroot, &fi, argv)) ==
1820 	    CTF_ERR) {
1821 		ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1822 		return (ctf_errno(cup->cu_ctfp));
1823 	}
1824 
1825 	ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1826 	return (ctf_dwmap_add(cup, *idp, die, B_FALSE));
1827 }
1828 
1829 static int
1830 ctf_dwarf_convert_type(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp,
1831     int isroot)
1832 {
1833 	int ret;
1834 	Dwarf_Off offset;
1835 	Dwarf_Half tag;
1836 	ctf_dwmap_t lookup, *map;
1837 	ctf_id_t id;
1838 
1839 	if (idp == NULL)
1840 		idp = &id;
1841 
1842 	if ((ret = ctf_dwarf_offset(cup, die, &offset)) != 0)
1843 		return (ret);
1844 
1845 	if (offset > cup->cu_maxoff) {
1846 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1847 		    "die offset %llu beyond maximum for header %llu\n",
1848 		    offset, cup->cu_maxoff);
1849 		return (ECTF_CONVBKERR);
1850 	}
1851 
1852 	/*
1853 	 * If we've already added an entry for this offset, then we're done.
1854 	 */
1855 	lookup.cdm_off = offset;
1856 	if ((map = avl_find(&cup->cu_map, &lookup, NULL)) != NULL) {
1857 		*idp = map->cdm_id;
1858 		return (0);
1859 	}
1860 
1861 	if ((ret = ctf_dwarf_tag(cup, die, &tag)) != 0)
1862 		return (ret);
1863 
1864 	ret = ENOTSUP;
1865 	switch (tag) {
1866 	case DW_TAG_base_type:
1867 		ctf_dprintf("base\n");
1868 		ret = ctf_dwarf_create_base(cup, die, idp, isroot, offset);
1869 		break;
1870 	case DW_TAG_array_type:
1871 		ctf_dprintf("array\n");
1872 		ret = ctf_dwarf_create_array(cup, die, idp, isroot);
1873 		break;
1874 	case DW_TAG_enumeration_type:
1875 		ctf_dprintf("enum\n");
1876 		ret = ctf_dwarf_create_enum(cup, die, idp, isroot);
1877 		break;
1878 	case DW_TAG_pointer_type:
1879 		ctf_dprintf("pointer\n");
1880 		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_POINTER,
1881 		    isroot);
1882 		break;
1883 	case DW_TAG_structure_type:
1884 		ctf_dprintf("struct\n");
1885 		ret = ctf_dwarf_create_sou(cup, die, idp, CTF_K_STRUCT,
1886 		    isroot);
1887 		break;
1888 	case DW_TAG_subroutine_type:
1889 		ctf_dprintf("fptr\n");
1890 		ret = ctf_dwarf_create_fptr(cup, die, idp, isroot);
1891 		break;
1892 	case DW_TAG_typedef:
1893 		ctf_dprintf("typedef\n");
1894 		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_TYPEDEF,
1895 		    isroot);
1896 		break;
1897 	case DW_TAG_union_type:
1898 		ctf_dprintf("union\n");
1899 		ret = ctf_dwarf_create_sou(cup, die, idp, CTF_K_UNION,
1900 		    isroot);
1901 		break;
1902 	case DW_TAG_const_type:
1903 		ctf_dprintf("const\n");
1904 		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_CONST,
1905 		    isroot);
1906 		break;
1907 	case DW_TAG_volatile_type:
1908 		ctf_dprintf("volatile\n");
1909 		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_VOLATILE,
1910 		    isroot);
1911 		break;
1912 	case DW_TAG_restrict_type:
1913 		ctf_dprintf("restrict\n");
1914 		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_RESTRICT,
1915 		    isroot);
1916 		break;
1917 	default:
1918 		ctf_dprintf("ignoring tag type %x\n", tag);
1919 		*idp = CTF_ERR;
1920 		ret = 0;
1921 		break;
1922 	}
1923 	ctf_dprintf("ctf_dwarf_convert_type tag specific handler returned %d\n",
1924 	    ret);
1925 
1926 	return (ret);
1927 }
1928 
1929 static int
1930 ctf_dwarf_walk_lexical(ctf_cu_t *cup, Dwarf_Die die)
1931 {
1932 	int ret;
1933 	Dwarf_Die child;
1934 
1935 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
1936 		return (ret);
1937 
1938 	if (child == NULL)
1939 		return (0);
1940 
1941 	return (ctf_dwarf_convert_die(cup, die));
1942 }
1943 
1944 static int
1945 ctf_dwarf_function_count(ctf_cu_t *cup, Dwarf_Die die, ctf_funcinfo_t *fip,
1946     boolean_t fptr)
1947 {
1948 	int ret;
1949 	Dwarf_Die child, sib, arg;
1950 
1951 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
1952 		return (ret);
1953 
1954 	arg = child;
1955 	while (arg != NULL) {
1956 		Dwarf_Half tag;
1957 
1958 		if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0)
1959 			return (ret);
1960 
1961 		/*
1962 		 * We have to check for a varargs type declaration. This will
1963 		 * happen in one of two ways. If we have a function pointer
1964 		 * type, then it'll be done with a tag of type
1965 		 * DW_TAG_unspecified_parameters. However, it only means we have
1966 		 * a variable number of arguments, if we have more than one
1967 		 * argument found so far. Otherwise, when we have a function
1968 		 * type, it instead uses a formal parameter whose name is '...'
1969 		 * to indicate a variable arguments member.
1970 		 *
1971 		 * Also, if we have a function pointer, then we have to expect
1972 		 * that we might not get a name at all.
1973 		 */
1974 		if (tag == DW_TAG_formal_parameter && fptr == B_FALSE) {
1975 			char *name;
1976 			if ((ret = ctf_dwarf_string(cup, die, DW_AT_name,
1977 			    &name)) != 0)
1978 				return (ret);
1979 			if (strcmp(name, DWARF_VARARGS_NAME) == 0)
1980 				fip->ctc_flags |= CTF_FUNC_VARARG;
1981 			else
1982 				fip->ctc_argc++;
1983 			ctf_free(name, strlen(name) + 1);
1984 		} else if (tag == DW_TAG_formal_parameter) {
1985 			fip->ctc_argc++;
1986 		} else if (tag == DW_TAG_unspecified_parameters &&
1987 		    fip->ctc_argc > 0) {
1988 			fip->ctc_flags |= CTF_FUNC_VARARG;
1989 		}
1990 		if ((ret = ctf_dwarf_sib(cup, arg, &sib)) != 0)
1991 			return (ret);
1992 		arg = sib;
1993 	}
1994 
1995 	return (0);
1996 }
1997 
1998 static int
1999 ctf_dwarf_convert_fargs(ctf_cu_t *cup, Dwarf_Die die, ctf_funcinfo_t *fip,
2000     ctf_id_t *argv)
2001 {
2002 	int ret;
2003 	int i = 0;
2004 	Dwarf_Die child, sib, arg;
2005 
2006 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
2007 		return (ret);
2008 
2009 	arg = child;
2010 	while (arg != NULL) {
2011 		Dwarf_Half tag;
2012 
2013 		if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0)
2014 			return (ret);
2015 		if (tag == DW_TAG_formal_parameter) {
2016 			Dwarf_Die tdie;
2017 
2018 			if ((ret = ctf_dwarf_refdie(cup, arg, DW_AT_type,
2019 			    &tdie)) != 0)
2020 				return (ret);
2021 
2022 			if ((ret = ctf_dwarf_convert_type(cup, tdie, &argv[i],
2023 			    CTF_ADD_ROOT)) != 0)
2024 				return (ret);
2025 			i++;
2026 
2027 			/*
2028 			 * Once we hit argc entries, we're done. This ensures we
2029 			 * don't accidentally hit a varargs which should be the
2030 			 * last entry.
2031 			 */
2032 			if (i == fip->ctc_argc)
2033 				break;
2034 		}
2035 
2036 		if ((ret = ctf_dwarf_sib(cup, arg, &sib)) != 0)
2037 			return (ret);
2038 		arg = sib;
2039 	}
2040 
2041 	return (0);
2042 }
2043 
2044 static int
2045 ctf_dwarf_convert_function(ctf_cu_t *cup, Dwarf_Die die)
2046 {
2047 	ctf_dwfunc_t *cdf;
2048 	Dwarf_Die tdie;
2049 	Dwarf_Bool b;
2050 	char *name;
2051 	int ret;
2052 
2053 	/*
2054 	 * Functions that don't have a name are generally functions that have
2055 	 * been inlined and thus most information about them has been lost. If
2056 	 * we can't get a name, then instead of returning ENOENT, we silently
2057 	 * swallow the error.
2058 	 */
2059 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0) {
2060 		if (ret == ENOENT)
2061 			return (0);
2062 		return (ret);
2063 	}
2064 
2065 	ctf_dprintf("beginning work on function %s (die %llx)\n",
2066 	    name, ctf_die_offset(die));
2067 
2068 	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) != 0) {
2069 		if (ret != ENOENT)
2070 			return (ret);
2071 	} else if (b != 0) {
2072 		/*
2073 		 * GCC7 at least creates empty DW_AT_declarations for functions
2074 		 * defined in headers.  As they lack details on the function
2075 		 * prototype, we need to ignore them.  If we later actually
2076 		 * see the relevant function's definition, we will see another
2077 		 * DW_TAG_subprogram that is more complete.
2078 		 */
2079 		ctf_dprintf("ignoring declaration of function %s (die %llx)\n",
2080 		    name, ctf_die_offset(die));
2081 		return (0);
2082 	}
2083 
2084 	if ((cdf = ctf_alloc(sizeof (ctf_dwfunc_t))) == NULL) {
2085 		ctf_free(name, strlen(name) + 1);
2086 		return (ENOMEM);
2087 	}
2088 	bzero(cdf, sizeof (ctf_dwfunc_t));
2089 	cdf->cdf_name = name;
2090 
2091 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) == 0) {
2092 		if ((ret = ctf_dwarf_convert_type(cup, tdie,
2093 		    &(cdf->cdf_fip.ctc_return), CTF_ADD_ROOT)) != 0) {
2094 			ctf_free(name, strlen(name) + 1);
2095 			ctf_free(cdf, sizeof (ctf_dwfunc_t));
2096 			return (ret);
2097 		}
2098 	} else if (ret != ENOENT) {
2099 		ctf_free(name, strlen(name) + 1);
2100 		ctf_free(cdf, sizeof (ctf_dwfunc_t));
2101 		return (ret);
2102 	} else {
2103 		if ((cdf->cdf_fip.ctc_return = ctf_dwarf_void(cup)) ==
2104 		    CTF_ERR) {
2105 			ctf_free(name, strlen(name) + 1);
2106 			ctf_free(cdf, sizeof (ctf_dwfunc_t));
2107 			return (ctf_errno(cup->cu_ctfp));
2108 		}
2109 	}
2110 
2111 	/*
2112 	 * A function has a number of children, some of which may not be ones we
2113 	 * care about. Children that we care about have a type of
2114 	 * DW_TAG_formal_parameter. We're going to do two passes, the first to
2115 	 * count the arguments, the second to process them. Afterwards, we
2116 	 * should be good to go ahead and add this function.
2117 	 *
2118 	 * Note, we already got the return type by going in and grabbing it out
2119 	 * of the DW_AT_type.
2120 	 */
2121 	if ((ret = ctf_dwarf_function_count(cup, die, &cdf->cdf_fip,
2122 	    B_FALSE)) != 0) {
2123 		ctf_free(name, strlen(name) + 1);
2124 		ctf_free(cdf, sizeof (ctf_dwfunc_t));
2125 		return (ret);
2126 	}
2127 
2128 	ctf_dprintf("beginning to convert function arguments %s\n", name);
2129 	if (cdf->cdf_fip.ctc_argc != 0) {
2130 		uint_t argc = cdf->cdf_fip.ctc_argc;
2131 		cdf->cdf_argv = ctf_alloc(sizeof (ctf_id_t) * argc);
2132 		if (cdf->cdf_argv == NULL) {
2133 			ctf_free(name, strlen(name) + 1);
2134 			ctf_free(cdf, sizeof (ctf_dwfunc_t));
2135 			return (ENOMEM);
2136 		}
2137 		if ((ret = ctf_dwarf_convert_fargs(cup, die,
2138 		    &cdf->cdf_fip, cdf->cdf_argv)) != 0) {
2139 			ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) * argc);
2140 			ctf_free(name, strlen(name) + 1);
2141 			ctf_free(cdf, sizeof (ctf_dwfunc_t));
2142 			return (ret);
2143 		}
2144 	} else {
2145 		cdf->cdf_argv = NULL;
2146 	}
2147 
2148 	if ((ret = ctf_dwarf_isglobal(cup, die, &cdf->cdf_global)) != 0) {
2149 		ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) *
2150 		    cdf->cdf_fip.ctc_argc);
2151 		ctf_free(name, strlen(name) + 1);
2152 		ctf_free(cdf, sizeof (ctf_dwfunc_t));
2153 		return (ret);
2154 	}
2155 
2156 	ctf_list_append(&cup->cu_funcs, cdf);
2157 	return (ret);
2158 }
2159 
2160 /*
2161  * Convert variables, but only if they're not prototypes and have names.
2162  */
2163 static int
2164 ctf_dwarf_convert_variable(ctf_cu_t *cup, Dwarf_Die die)
2165 {
2166 	int ret;
2167 	char *name;
2168 	Dwarf_Bool b;
2169 	Dwarf_Die tdie;
2170 	ctf_id_t id;
2171 	ctf_dwvar_t *cdv;
2172 
2173 	/* Skip "Non-Defining Declarations" */
2174 	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) == 0) {
2175 		if (b != 0)
2176 			return (0);
2177 	} else if (ret != ENOENT) {
2178 		return (ret);
2179 	}
2180 
2181 	/*
2182 	 * If we find a DIE of "Declarations Completing Non-Defining
2183 	 * Declarations", we will use the referenced type's DIE.  This isn't
2184 	 * quite correct, e.g. DW_AT_decl_line will be the forward declaration
2185 	 * not this site.  It's sufficient for what we need, however: in
2186 	 * particular, we should find DW_AT_external as needed there.
2187 	 */
2188 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_specification,
2189 	    &tdie)) == 0) {
2190 		Dwarf_Off offset;
2191 		if ((ret = ctf_dwarf_offset(cup, tdie, &offset)) != 0)
2192 			return (ret);
2193 		ctf_dprintf("die 0x%llx DW_AT_specification -> die 0x%llx\n",
2194 		    ctf_die_offset(die), ctf_die_offset(tdie));
2195 		die = tdie;
2196 	} else if (ret != ENOENT) {
2197 		return (ret);
2198 	}
2199 
2200 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
2201 	    ret != ENOENT)
2202 		return (ret);
2203 	if (ret == ENOENT)
2204 		return (0);
2205 
2206 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0) {
2207 		ctf_free(name, strlen(name) + 1);
2208 		return (ret);
2209 	}
2210 
2211 	if ((ret = ctf_dwarf_convert_type(cup, tdie, &id,
2212 	    CTF_ADD_ROOT)) != 0)
2213 		return (ret);
2214 
2215 	if ((cdv = ctf_alloc(sizeof (ctf_dwvar_t))) == NULL) {
2216 		ctf_free(name, strlen(name) + 1);
2217 		return (ENOMEM);
2218 	}
2219 
2220 	cdv->cdv_name = name;
2221 	cdv->cdv_type = id;
2222 
2223 	if ((ret = ctf_dwarf_isglobal(cup, die, &cdv->cdv_global)) != 0) {
2224 		ctf_free(cdv, sizeof (ctf_dwvar_t));
2225 		ctf_free(name, strlen(name) + 1);
2226 		return (ret);
2227 	}
2228 
2229 	ctf_list_append(&cup->cu_vars, cdv);
2230 	return (0);
2231 }
2232 
2233 /*
2234  * Walk through our set of top-level types and process them.
2235  */
2236 static int
2237 ctf_dwarf_walk_toplevel(ctf_cu_t *cup, Dwarf_Die die)
2238 {
2239 	int ret;
2240 	Dwarf_Off offset;
2241 	Dwarf_Half tag;
2242 
2243 	if ((ret = ctf_dwarf_offset(cup, die, &offset)) != 0)
2244 		return (ret);
2245 
2246 	if (offset > cup->cu_maxoff) {
2247 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
2248 		    "die offset %llu beyond maximum for header %llu\n",
2249 		    offset, cup->cu_maxoff);
2250 		return (ECTF_CONVBKERR);
2251 	}
2252 
2253 	if ((ret = ctf_dwarf_tag(cup, die, &tag)) != 0)
2254 		return (ret);
2255 
2256 	ret = 0;
2257 	switch (tag) {
2258 	case DW_TAG_subprogram:
2259 		ctf_dprintf("top level func\n");
2260 		ret = ctf_dwarf_convert_function(cup, die);
2261 		break;
2262 	case DW_TAG_variable:
2263 		ctf_dprintf("top level var\n");
2264 		ret = ctf_dwarf_convert_variable(cup, die);
2265 		break;
2266 	case DW_TAG_lexical_block:
2267 		ctf_dprintf("top level block\n");
2268 		ret = ctf_dwarf_walk_lexical(cup, die);
2269 		break;
2270 	case DW_TAG_enumeration_type:
2271 	case DW_TAG_structure_type:
2272 	case DW_TAG_typedef:
2273 	case DW_TAG_union_type:
2274 		ctf_dprintf("top level type\n");
2275 		ret = ctf_dwarf_convert_type(cup, die, NULL, B_TRUE);
2276 		break;
2277 	default:
2278 		break;
2279 	}
2280 
2281 	return (ret);
2282 }
2283 
2284 
2285 /*
2286  * We're given a node. At this node we need to convert it and then proceed to
2287  * convert any siblings that are associaed with this die.
2288  */
2289 static int
2290 ctf_dwarf_convert_die(ctf_cu_t *cup, Dwarf_Die die)
2291 {
2292 	while (die != NULL) {
2293 		int ret;
2294 		Dwarf_Die sib;
2295 
2296 		if ((ret = ctf_dwarf_walk_toplevel(cup, die)) != 0)
2297 			return (ret);
2298 
2299 		if ((ret = ctf_dwarf_sib(cup, die, &sib)) != 0)
2300 			return (ret);
2301 		die = sib;
2302 	}
2303 	return (0);
2304 }
2305 
2306 static int
2307 ctf_dwarf_fixup_die(ctf_cu_t *cup, boolean_t addpass)
2308 {
2309 	ctf_dwmap_t *map;
2310 
2311 	for (map = avl_first(&cup->cu_map); map != NULL;
2312 	    map = AVL_NEXT(&cup->cu_map, map)) {
2313 		int ret;
2314 		if (map->cdm_fix == B_FALSE)
2315 			continue;
2316 		if ((ret = ctf_dwarf_fixup_sou(cup, map->cdm_die, map->cdm_id,
2317 		    addpass)) != 0)
2318 			return (ret);
2319 	}
2320 
2321 	return (0);
2322 }
2323 
2324 /*
2325  * The DWARF information about a symbol and the information in the symbol table
2326  * may not be the same due to symbol reduction that is performed by ld due to a
2327  * mapfile or other such directive. We process weak symbols at a later time.
2328  *
2329  * The following are the rules that we employ:
2330  *
2331  * 1. A DWARF function that is considered exported matches STB_GLOBAL entries
2332  * with the same name.
2333  *
2334  * 2. A DWARF function that is considered exported matches STB_LOCAL entries
2335  * with the same name and the same file. This case may happen due to mapfile
2336  * reduction.
2337  *
2338  * 3. A DWARF function that is not considered exported matches STB_LOCAL entries
2339  * with the same name and the same file.
2340  *
2341  * 4. A DWARF function that has the same name as the symbol table entry, but the
2342  * files do not match. This is considered a 'fuzzy' match. This may also happen
2343  * due to a mapfile reduction. Fuzzy matching is only used when we know that the
2344  * file in question refers to the primary object. This is because when a symbol
2345  * is reduced in a mapfile, it's always going to be tagged as a local value in
2346  * the generated output and it is considered as to belong to the primary file
2347  * which is the first STT_FILE symbol we see.
2348  */
2349 static boolean_t
2350 ctf_dwarf_symbol_match(const char *symtab_file, const char *symtab_name,
2351     uint_t symtab_bind, const char *dwarf_file, const char *dwarf_name,
2352     boolean_t dwarf_global, boolean_t *is_fuzzy)
2353 {
2354 	*is_fuzzy = B_FALSE;
2355 
2356 	if (symtab_bind != STB_LOCAL && symtab_bind != STB_GLOBAL) {
2357 		return (B_FALSE);
2358 	}
2359 
2360 	if (strcmp(symtab_name, dwarf_name) != 0) {
2361 		return (B_FALSE);
2362 	}
2363 
2364 	if (symtab_bind == STB_GLOBAL) {
2365 		return (dwarf_global);
2366 	}
2367 
2368 	if (strcmp(symtab_file, dwarf_file) == 0) {
2369 		return (B_TRUE);
2370 	}
2371 
2372 	if (dwarf_global) {
2373 		*is_fuzzy = B_TRUE;
2374 		return (B_TRUE);
2375 	}
2376 
2377 	return (B_FALSE);
2378 }
2379 
2380 static ctf_dwfunc_t *
2381 ctf_dwarf_match_func(ctf_cu_t *cup, const char *file, const char *name,
2382     uint_t bind, boolean_t primary)
2383 {
2384 	ctf_dwfunc_t *cdf, *fuzzy = NULL;
2385 
2386 	if (bind == STB_WEAK)
2387 		return (NULL);
2388 
2389 	if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2390 		return (NULL);
2391 
2392 	for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL;
2393 	    cdf = ctf_list_next(cdf)) {
2394 		boolean_t is_fuzzy = B_FALSE;
2395 
2396 		if (ctf_dwarf_symbol_match(file, name, bind, cup->cu_name,
2397 		    cdf->cdf_name, cdf->cdf_global, &is_fuzzy)) {
2398 			if (is_fuzzy) {
2399 				if (primary) {
2400 					fuzzy = cdf;
2401 				}
2402 				continue;
2403 			} else {
2404 				return (cdf);
2405 			}
2406 		}
2407 	}
2408 
2409 	return (fuzzy);
2410 }
2411 
2412 static ctf_dwvar_t *
2413 ctf_dwarf_match_var(ctf_cu_t *cup, const char *file, const char *name,
2414     uint_t bind, boolean_t primary)
2415 {
2416 	ctf_dwvar_t *cdv, *fuzzy = NULL;
2417 
2418 	if (bind == STB_WEAK)
2419 		return (NULL);
2420 
2421 	if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2422 		return (NULL);
2423 
2424 	for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL;
2425 	    cdv = ctf_list_next(cdv)) {
2426 		boolean_t is_fuzzy = B_FALSE;
2427 
2428 		if (ctf_dwarf_symbol_match(file, name, bind, cup->cu_name,
2429 		    cdv->cdv_name, cdv->cdv_global, &is_fuzzy)) {
2430 			if (is_fuzzy) {
2431 				if (primary) {
2432 					fuzzy = cdv;
2433 				}
2434 			} else {
2435 				return (cdv);
2436 			}
2437 		}
2438 	}
2439 
2440 	return (fuzzy);
2441 }
2442 
2443 static int
2444 ctf_dwarf_conv_funcvars_cb(const Elf64_Sym *symp, ulong_t idx,
2445     const char *file, const char *name, boolean_t primary, void *arg)
2446 {
2447 	int ret;
2448 	uint_t bind, type;
2449 	ctf_cu_t *cup = arg;
2450 
2451 	bind = GELF_ST_BIND(symp->st_info);
2452 	type = GELF_ST_TYPE(symp->st_info);
2453 
2454 	/*
2455 	 * Come back to weak symbols in another pass
2456 	 */
2457 	if (bind == STB_WEAK)
2458 		return (0);
2459 
2460 	if (type == STT_OBJECT) {
2461 		ctf_dwvar_t *cdv = ctf_dwarf_match_var(cup, file, name,
2462 		    bind, primary);
2463 		if (cdv == NULL)
2464 			return (0);
2465 		ret = ctf_add_object(cup->cu_ctfp, idx, cdv->cdv_type);
2466 		ctf_dprintf("added object %s->%ld\n", name, cdv->cdv_type);
2467 	} else {
2468 		ctf_dwfunc_t *cdf = ctf_dwarf_match_func(cup, file, name,
2469 		    bind, primary);
2470 		if (cdf == NULL)
2471 			return (0);
2472 		ret = ctf_add_function(cup->cu_ctfp, idx, &cdf->cdf_fip,
2473 		    cdf->cdf_argv);
2474 		ctf_dprintf("added function %s\n", name);
2475 	}
2476 
2477 	if (ret == CTF_ERR) {
2478 		return (ctf_errno(cup->cu_ctfp));
2479 	}
2480 
2481 	return (0);
2482 }
2483 
2484 static int
2485 ctf_dwarf_conv_funcvars(ctf_cu_t *cup)
2486 {
2487 	return (ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_funcvars_cb, cup));
2488 }
2489 
2490 /*
2491  * If we have a weak symbol, attempt to find the strong symbol it will resolve
2492  * to.  Note: the code where this actually happens is in sym_process() in
2493  * cmd/sgs/libld/common/syms.c
2494  *
2495  * Finding the matching symbol is unfortunately not trivial.  For a symbol to be
2496  * a candidate, it must:
2497  *
2498  * - have the same type (function, object)
2499  * - have the same value (address)
2500  * - have the same size
2501  * - not be another weak symbol
2502  * - belong to the same section (checked via section index)
2503  *
2504  * To perform this check, we first iterate over the symbol table. For each weak
2505  * symbol that we encounter, we then do a second walk over the symbol table,
2506  * calling ctf_dwarf_conv_check_weak(). If a symbol matches the above, then it's
2507  * either a local or global symbol. If we find a global symbol then we go with
2508  * it and stop searching for additional matches.
2509  *
2510  * If instead, we find a local symbol, things are more complicated. The first
2511  * thing we do is to try and see if we have file information about both symbols
2512  * (STT_FILE). If they both have file information and it matches, then we treat
2513  * that as a good match and stop searching for additional matches.
2514  *
2515  * Otherwise, this means we have a non-matching file and a local symbol. We
2516  * treat this as a candidate and if we find a better match (one of the two cases
2517  * above), use that instead. There are two different ways this can happen.
2518  * Either this is a completely different symbol, or it's a once-global symbol
2519  * that was scoped to local via a mapfile.  In the former case, curfile is
2520  * likely inaccurate since the linker does not preserve the needed curfile in
2521  * the order of the symbol table (see the comments about locally scoped symbols
2522  * in libld's update_osym()).  As we can't tell this case from the former one,
2523  * we use this symbol iff no other matching symbol is found.
2524  *
2525  * What we really need here is a SUNW section containing weak<->strong mappings
2526  * that we can consume.
2527  */
2528 typedef struct ctf_dwarf_weak_arg {
2529 	const Elf64_Sym *cweak_symp;
2530 	const char *cweak_file;
2531 	boolean_t cweak_candidate;
2532 	ulong_t cweak_idx;
2533 } ctf_dwarf_weak_arg_t;
2534 
2535 static int
2536 ctf_dwarf_conv_check_weak(const Elf64_Sym *symp, ulong_t idx, const char *file,
2537     const char *name, boolean_t primary, void *arg)
2538 {
2539 	ctf_dwarf_weak_arg_t *cweak = arg;
2540 
2541 	const Elf64_Sym *wsymp = cweak->cweak_symp;
2542 
2543 	ctf_dprintf("comparing weak to %s\n", name);
2544 
2545 	if (GELF_ST_BIND(symp->st_info) == STB_WEAK) {
2546 		return (0);
2547 	}
2548 
2549 	if (GELF_ST_TYPE(wsymp->st_info) != GELF_ST_TYPE(symp->st_info)) {
2550 		return (0);
2551 	}
2552 
2553 	if (wsymp->st_value != symp->st_value) {
2554 		return (0);
2555 	}
2556 
2557 	if (wsymp->st_size != symp->st_size) {
2558 		return (0);
2559 	}
2560 
2561 	if (wsymp->st_shndx != symp->st_shndx) {
2562 		return (0);
2563 	}
2564 
2565 	/*
2566 	 * Check if it's a weak candidate.
2567 	 */
2568 	if (GELF_ST_BIND(symp->st_info) == STB_LOCAL &&
2569 	    (file == NULL || cweak->cweak_file == NULL ||
2570 	    strcmp(file, cweak->cweak_file) != 0)) {
2571 		cweak->cweak_candidate = B_TRUE;
2572 		cweak->cweak_idx = idx;
2573 		return (0);
2574 	}
2575 
2576 	/*
2577 	 * Found a match, break.
2578 	 */
2579 	cweak->cweak_idx = idx;
2580 	return (1);
2581 }
2582 
2583 static int
2584 ctf_dwarf_duplicate_sym(ctf_cu_t *cup, ulong_t idx, ulong_t matchidx)
2585 {
2586 	ctf_id_t id = ctf_lookup_by_symbol(cup->cu_ctfp, matchidx);
2587 
2588 	/*
2589 	 * If we matched something that for some reason didn't have type data,
2590 	 * we don't consider that a fatal error and silently swallow it.
2591 	 */
2592 	if (id == CTF_ERR) {
2593 		if (ctf_errno(cup->cu_ctfp) == ECTF_NOTYPEDAT)
2594 			return (0);
2595 		else
2596 			return (ctf_errno(cup->cu_ctfp));
2597 	}
2598 
2599 	if (ctf_add_object(cup->cu_ctfp, idx, id) == CTF_ERR)
2600 		return (ctf_errno(cup->cu_ctfp));
2601 
2602 	return (0);
2603 }
2604 
2605 static int
2606 ctf_dwarf_duplicate_func(ctf_cu_t *cup, ulong_t idx, ulong_t matchidx)
2607 {
2608 	int ret;
2609 	ctf_funcinfo_t fip;
2610 	ctf_id_t *args = NULL;
2611 
2612 	if (ctf_func_info(cup->cu_ctfp, matchidx, &fip) == CTF_ERR) {
2613 		if (ctf_errno(cup->cu_ctfp) == ECTF_NOFUNCDAT)
2614 			return (0);
2615 		else
2616 			return (ctf_errno(cup->cu_ctfp));
2617 	}
2618 
2619 	if (fip.ctc_argc != 0) {
2620 		args = ctf_alloc(sizeof (ctf_id_t) * fip.ctc_argc);
2621 		if (args == NULL)
2622 			return (ENOMEM);
2623 
2624 		if (ctf_func_args(cup->cu_ctfp, matchidx, fip.ctc_argc, args) ==
2625 		    CTF_ERR) {
2626 			ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2627 			return (ctf_errno(cup->cu_ctfp));
2628 		}
2629 	}
2630 
2631 	ret = ctf_add_function(cup->cu_ctfp, idx, &fip, args);
2632 	if (args != NULL)
2633 		ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2634 	if (ret == CTF_ERR)
2635 		return (ctf_errno(cup->cu_ctfp));
2636 
2637 	return (0);
2638 }
2639 
2640 static int
2641 ctf_dwarf_conv_weaks_cb(const Elf64_Sym *symp, ulong_t idx, const char *file,
2642     const char *name, boolean_t primary, void *arg)
2643 {
2644 	int ret, type;
2645 	ctf_dwarf_weak_arg_t cweak;
2646 	ctf_cu_t *cup = arg;
2647 
2648 	/*
2649 	 * We only care about weak symbols.
2650 	 */
2651 	if (GELF_ST_BIND(symp->st_info) != STB_WEAK)
2652 		return (0);
2653 
2654 	type = GELF_ST_TYPE(symp->st_info);
2655 	ASSERT(type == STT_OBJECT || type == STT_FUNC);
2656 
2657 	/*
2658 	 * For each weak symbol we encounter, we need to do a second iteration
2659 	 * to try and find a match. We should probably think about other
2660 	 * techniques to try and save us time in the future.
2661 	 */
2662 	cweak.cweak_symp = symp;
2663 	cweak.cweak_file = file;
2664 	cweak.cweak_candidate = B_FALSE;
2665 	cweak.cweak_idx = 0;
2666 
2667 	ctf_dprintf("Trying to find weak equiv for %s\n", name);
2668 
2669 	ret = ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_check_weak, &cweak);
2670 	VERIFY(ret == 0 || ret == 1);
2671 
2672 	/*
2673 	 * Nothing was ever found, we're not going to add anything for this
2674 	 * entry.
2675 	 */
2676 	if (ret == 0 && cweak.cweak_candidate == B_FALSE) {
2677 		ctf_dprintf("found no weak match for %s\n", name);
2678 		return (0);
2679 	}
2680 
2681 	/*
2682 	 * Now, finally go and add the type based on the match.
2683 	 */
2684 	ctf_dprintf("matched weak symbol %lu to %lu\n", idx, cweak.cweak_idx);
2685 	if (type == STT_OBJECT) {
2686 		ret = ctf_dwarf_duplicate_sym(cup, idx, cweak.cweak_idx);
2687 	} else {
2688 		ret = ctf_dwarf_duplicate_func(cup, idx, cweak.cweak_idx);
2689 	}
2690 
2691 	return (ret);
2692 }
2693 
2694 static int
2695 ctf_dwarf_conv_weaks(ctf_cu_t *cup)
2696 {
2697 	return (ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_weaks_cb, cup));
2698 }
2699 
2700 /* ARGSUSED */
2701 static int
2702 ctf_dwarf_convert_one(void *arg, void *unused)
2703 {
2704 	int ret;
2705 	ctf_file_t *dedup;
2706 	ctf_cu_t *cup = arg;
2707 
2708 	ctf_dprintf("converting die: %s\n", cup->cu_name);
2709 	ctf_dprintf("max offset: %x\n", cup->cu_maxoff);
2710 	VERIFY(cup != NULL);
2711 
2712 	ret = ctf_dwarf_convert_die(cup, cup->cu_cu);
2713 	ctf_dprintf("ctf_dwarf_convert_die (%s) returned %d\n", cup->cu_name,
2714 	    ret);
2715 	if (ret != 0) {
2716 		return (ret);
2717 	}
2718 	if (ctf_update(cup->cu_ctfp) != 0) {
2719 		return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2720 		    "failed to update output ctf container"));
2721 	}
2722 
2723 	ret = ctf_dwarf_fixup_die(cup, B_FALSE);
2724 	ctf_dprintf("ctf_dwarf_fixup_die (%s) returned %d\n", cup->cu_name,
2725 	    ret);
2726 	if (ret != 0) {
2727 		return (ret);
2728 	}
2729 	if (ctf_update(cup->cu_ctfp) != 0) {
2730 		return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2731 		    "failed to update output ctf container"));
2732 	}
2733 
2734 	ret = ctf_dwarf_fixup_die(cup, B_TRUE);
2735 	ctf_dprintf("ctf_dwarf_fixup_die (%s) returned %d\n", cup->cu_name,
2736 	    ret);
2737 	if (ret != 0) {
2738 		return (ret);
2739 	}
2740 	if (ctf_update(cup->cu_ctfp) != 0) {
2741 		return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2742 		    "failed to update output ctf container"));
2743 	}
2744 
2745 
2746 	if ((ret = ctf_dwarf_conv_funcvars(cup)) != 0) {
2747 		return (ctf_dwarf_error(cup, NULL, ret,
2748 		    "failed to convert strong functions and variables"));
2749 	}
2750 
2751 	if (ctf_update(cup->cu_ctfp) != 0) {
2752 		return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2753 		    "failed to update output ctf container"));
2754 	}
2755 
2756 	if (cup->cu_doweaks == B_TRUE) {
2757 		if ((ret = ctf_dwarf_conv_weaks(cup)) != 0) {
2758 			return (ctf_dwarf_error(cup, NULL, ret,
2759 			    "failed to convert weak functions and variables"));
2760 		}
2761 
2762 		if (ctf_update(cup->cu_ctfp) != 0) {
2763 			return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2764 			    "failed to update output ctf container"));
2765 		}
2766 	}
2767 
2768 	ctf_phase_dump(cup->cu_ctfp, "pre-dwarf-dedup", cup->cu_name);
2769 	ctf_dprintf("adding inputs for dedup\n");
2770 	if ((ret = ctf_merge_add(cup->cu_cmh, cup->cu_ctfp)) != 0) {
2771 		return (ctf_dwarf_error(cup, NULL, ret,
2772 		    "failed to add inputs for merge"));
2773 	}
2774 
2775 	ctf_dprintf("starting dedup of %s\n", cup->cu_name);
2776 	if ((ret = ctf_merge_dedup(cup->cu_cmh, &dedup)) != 0) {
2777 		return (ctf_dwarf_error(cup, NULL, ret,
2778 		    "failed to deduplicate die"));
2779 	}
2780 	ctf_close(cup->cu_ctfp);
2781 	cup->cu_ctfp = dedup;
2782 	ctf_phase_dump(cup->cu_ctfp, "post-dwarf-dedup", cup->cu_name);
2783 
2784 	return (0);
2785 }
2786 
2787 /*
2788  * Note, we expect that if we're returning a ctf_file_t from one of the dies,
2789  * say in the single node case, it's been saved and the entry here has been set
2790  * to NULL, which ctf_close happily ignores.
2791  */
2792 static void
2793 ctf_dwarf_free_die(ctf_cu_t *cup)
2794 {
2795 	ctf_dwfunc_t *cdf, *ndf;
2796 	ctf_dwvar_t *cdv, *ndv;
2797 	ctf_dwbitf_t *cdb, *ndb;
2798 	ctf_dwmap_t *map;
2799 	void *cookie;
2800 	Dwarf_Error derr;
2801 
2802 	ctf_dprintf("Beginning to free die: %p\n", cup);
2803 	cup->cu_elf = NULL;
2804 	ctf_dprintf("Trying to free name: %p\n", cup->cu_name);
2805 	if (cup->cu_name != NULL)
2806 		ctf_free(cup->cu_name, strlen(cup->cu_name) + 1);
2807 	ctf_dprintf("Trying to free merge handle: %p\n", cup->cu_cmh);
2808 	if (cup->cu_cmh != NULL) {
2809 		ctf_merge_fini(cup->cu_cmh);
2810 		cup->cu_cmh = NULL;
2811 	}
2812 
2813 	ctf_dprintf("Trying to free functions\n");
2814 	for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL; cdf = ndf) {
2815 		ndf = ctf_list_next(cdf);
2816 		ctf_free(cdf->cdf_name, strlen(cdf->cdf_name) + 1);
2817 		if (cdf->cdf_fip.ctc_argc != 0) {
2818 			ctf_free(cdf->cdf_argv,
2819 			    sizeof (ctf_id_t) * cdf->cdf_fip.ctc_argc);
2820 		}
2821 		ctf_free(cdf, sizeof (ctf_dwfunc_t));
2822 	}
2823 
2824 	ctf_dprintf("Trying to free variables\n");
2825 	for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL; cdv = ndv) {
2826 		ndv = ctf_list_next(cdv);
2827 		ctf_free(cdv->cdv_name, strlen(cdv->cdv_name) + 1);
2828 		ctf_free(cdv, sizeof (ctf_dwvar_t));
2829 	}
2830 
2831 	ctf_dprintf("Trying to free bitfields\n");
2832 	for (cdb = ctf_list_next(&cup->cu_bitfields); cdb != NULL; cdb = ndb) {
2833 		ndb = ctf_list_next(cdb);
2834 		ctf_free(cdb, sizeof (ctf_dwbitf_t));
2835 	}
2836 
2837 	ctf_dprintf("Trying to clean up dwarf_t: %p\n", cup->cu_dwarf);
2838 	if (cup->cu_dwarf != NULL)
2839 		(void) dwarf_finish(cup->cu_dwarf, &derr);
2840 	cup->cu_dwarf = NULL;
2841 	ctf_close(cup->cu_ctfp);
2842 
2843 	cookie = NULL;
2844 	while ((map = avl_destroy_nodes(&cup->cu_map, &cookie)) != NULL) {
2845 		ctf_free(map, sizeof (ctf_dwmap_t));
2846 	}
2847 	avl_destroy(&cup->cu_map);
2848 	cup->cu_errbuf = NULL;
2849 }
2850 
2851 static void
2852 ctf_dwarf_free_dies(ctf_cu_t *cdies, int ndies)
2853 {
2854 	int i;
2855 
2856 	ctf_dprintf("Beginning to free dies\n");
2857 	for (i = 0; i < ndies; i++) {
2858 		ctf_dwarf_free_die(&cdies[i]);
2859 	}
2860 
2861 	ctf_free(cdies, sizeof (ctf_cu_t) * ndies);
2862 }
2863 
2864 static int
2865 ctf_dwarf_count_dies(Dwarf_Debug dw, Dwarf_Error *derr, int *ndies,
2866     char *errbuf, size_t errlen)
2867 {
2868 	int ret;
2869 	Dwarf_Half vers;
2870 	Dwarf_Unsigned nexthdr;
2871 
2872 	while ((ret = dwarf_next_cu_header(dw, NULL, &vers, NULL, NULL,
2873 	    &nexthdr, derr)) != DW_DLV_NO_ENTRY) {
2874 		if (ret != DW_DLV_OK) {
2875 			(void) snprintf(errbuf, errlen,
2876 			    "file does not contain valid DWARF data: %s\n",
2877 			    dwarf_errmsg(*derr));
2878 			return (ECTF_CONVBKERR);
2879 		}
2880 
2881 		if (vers != DWARF_VERSION_TWO) {
2882 			(void) snprintf(errbuf, errlen,
2883 			    "unsupported DWARF version: %d\n", vers);
2884 			return (ECTF_CONVBKERR);
2885 		}
2886 		*ndies = *ndies + 1;
2887 	}
2888 
2889 	return (0);
2890 }
2891 
2892 static int
2893 ctf_dwarf_init_die(int fd, Elf *elf, ctf_cu_t *cup, int ndie, char *errbuf,
2894     size_t errlen)
2895 {
2896 	int ret;
2897 	Dwarf_Unsigned hdrlen, abboff, nexthdr;
2898 	Dwarf_Half addrsz;
2899 	Dwarf_Unsigned offset = 0;
2900 	Dwarf_Error derr;
2901 
2902 	while ((ret = dwarf_next_cu_header(cup->cu_dwarf, &hdrlen, NULL,
2903 	    &abboff, &addrsz, &nexthdr, &derr)) != DW_DLV_NO_ENTRY) {
2904 		char *name;
2905 		Dwarf_Die cu, child;
2906 
2907 		/* Based on the counting above, we should be good to go */
2908 		VERIFY(ret == DW_DLV_OK);
2909 		if (ndie > 0) {
2910 			ndie--;
2911 			offset = nexthdr;
2912 			continue;
2913 		}
2914 
2915 		/*
2916 		 * Compilers are apparently inconsistent. Some emit no DWARF for
2917 		 * empty files and others emit empty compilation unit.
2918 		 */
2919 		cup->cu_voidtid = CTF_ERR;
2920 		cup->cu_longtid = CTF_ERR;
2921 		cup->cu_elf = elf;
2922 		cup->cu_maxoff = nexthdr - 1;
2923 		cup->cu_ctfp = ctf_fdcreate(fd, &ret);
2924 		if (cup->cu_ctfp == NULL)
2925 			return (ret);
2926 
2927 		avl_create(&cup->cu_map, ctf_dwmap_comp, sizeof (ctf_dwmap_t),
2928 		    offsetof(ctf_dwmap_t, cdm_avl));
2929 		cup->cu_errbuf = errbuf;
2930 		cup->cu_errlen = errlen;
2931 		bzero(&cup->cu_vars, sizeof (ctf_list_t));
2932 		bzero(&cup->cu_funcs, sizeof (ctf_list_t));
2933 		bzero(&cup->cu_bitfields, sizeof (ctf_list_t));
2934 
2935 		if ((ret = ctf_dwarf_die_elfenc(elf, cup, errbuf,
2936 		    errlen)) != 0)
2937 			return (ret);
2938 
2939 		if ((ret = ctf_dwarf_sib(cup, NULL, &cu)) != 0)
2940 			return (ret);
2941 
2942 		if (cu == NULL) {
2943 			(void) snprintf(errbuf, errlen,
2944 			    "file does not contain DWARF data");
2945 			return (ECTF_CONVNODEBUG);
2946 		}
2947 
2948 		if ((ret = ctf_dwarf_child(cup, cu, &child)) != 0)
2949 			return (ret);
2950 
2951 		if (child == NULL) {
2952 			(void) snprintf(errbuf, errlen,
2953 			    "file does not contain DWARF data");
2954 			return (ECTF_CONVNODEBUG);
2955 		}
2956 
2957 		cup->cu_cuoff = offset;
2958 		cup->cu_cu = child;
2959 
2960 		if ((cup->cu_cmh = ctf_merge_init(fd, &ret)) == NULL)
2961 			return (ret);
2962 
2963 		if (ctf_dwarf_string(cup, cu, DW_AT_name, &name) == 0) {
2964 			size_t len = strlen(name) + 1;
2965 			char *b = basename(name);
2966 			cup->cu_name = strdup(b);
2967 			ctf_free(name, len);
2968 		}
2969 		break;
2970 	}
2971 
2972 	return (0);
2973 }
2974 
2975 /*
2976  * This is our only recourse to identify a C source file that is missing debug
2977  * info: it will be mentioned as an STT_FILE, but not have a compile unit entry.
2978  * (A traditional ctfmerge works on individual files, so can identify missing
2979  * DWARF more directly, via ctf_has_c_source() on the .o file.)
2980  *
2981  * As we operate on basenames, this can of course miss some cases, but it's
2982  * better than not checking at all.
2983  *
2984  * We explicitly whitelist some CRT components.  Failing that, there's always
2985  * the -m option.
2986  */
2987 static boolean_t
2988 c_source_has_debug(const char *file, ctf_cu_t *cus, size_t nr_cus)
2989 {
2990 	const char *basename = strrchr(file, '/');
2991 
2992 	if (basename == NULL)
2993 		basename = file;
2994 	else
2995 		basename++;
2996 
2997 	if (strcmp(basename, "common-crt.c") == 0 ||
2998 	    strcmp(basename, "gmon.c") == 0 ||
2999 	    strcmp(basename, "dlink_init.c") == 0 ||
3000 	    strcmp(basename, "dlink_common.c") == 0 ||
3001 	    strncmp(basename, "crt", strlen("crt")) == 0 ||
3002 	    strncmp(basename, "values-", strlen("values-")) == 0)
3003 		return (B_TRUE);
3004 
3005 	for (size_t i = 0; i < nr_cus; i++) {
3006 		if (strcmp(basename, cus[i].cu_name) == 0)
3007 			return (B_TRUE);
3008 	}
3009 
3010 	return (B_FALSE);
3011 }
3012 
3013 static int
3014 ctf_dwarf_check_missing(ctf_cu_t *cus, size_t nr_cus, Elf *elf,
3015     char *errmsg, size_t errlen)
3016 {
3017 	Elf_Scn *scn, *strscn;
3018 	Elf_Data *data, *strdata;
3019 	GElf_Shdr shdr;
3020 	ulong_t i;
3021 
3022 	scn = NULL;
3023 	while ((scn = elf_nextscn(elf, scn)) != NULL) {
3024 		if (gelf_getshdr(scn, &shdr) == NULL) {
3025 			(void) snprintf(errmsg, errlen,
3026 			    "failed to get section header: %s\n",
3027 			    elf_errmsg(elf_errno()));
3028 			return (EINVAL);
3029 		}
3030 
3031 		if (shdr.sh_type == SHT_SYMTAB)
3032 			break;
3033 	}
3034 
3035 	if (scn == NULL)
3036 		return (0);
3037 
3038 	if ((strscn = elf_getscn(elf, shdr.sh_link)) == NULL) {
3039 		(void) snprintf(errmsg, errlen,
3040 		    "failed to get str section: %s\n",
3041 		    elf_errmsg(elf_errno()));
3042 		return (EINVAL);
3043 	}
3044 
3045 	if ((data = elf_getdata(scn, NULL)) == NULL) {
3046 		(void) snprintf(errmsg, errlen, "failed to read section: %s\n",
3047 		    elf_errmsg(elf_errno()));
3048 		return (EINVAL);
3049 	}
3050 
3051 	if ((strdata = elf_getdata(strscn, NULL)) == NULL) {
3052 		(void) snprintf(errmsg, errlen,
3053 		    "failed to read string table: %s\n",
3054 		    elf_errmsg(elf_errno()));
3055 		return (EINVAL);
3056 	}
3057 
3058 	for (i = 0; i < shdr.sh_size / shdr.sh_entsize; i++) {
3059 		GElf_Sym sym;
3060 		const char *file;
3061 		size_t len;
3062 
3063 		if (gelf_getsym(data, i, &sym) == NULL) {
3064 			(void) snprintf(errmsg, errlen,
3065 			    "failed to read sym %lu: %s\n",
3066 			    i, elf_errmsg(elf_errno()));
3067 			return (EINVAL);
3068 		}
3069 
3070 		if (GELF_ST_TYPE(sym.st_info) != STT_FILE)
3071 			continue;
3072 
3073 		file = (const char *)((uintptr_t)strdata->d_buf + sym.st_name);
3074 		len = strlen(file);
3075 		if (len < 2 || strncmp(".c", &file[len - 2], 2) != 0)
3076 			continue;
3077 
3078 		if (!c_source_has_debug(file, cus, nr_cus)) {
3079 			(void) snprintf(errmsg, errlen,
3080 			    "file %s is missing debug info\n", file);
3081 			return (ECTF_CONVNODEBUG);
3082 		}
3083 	}
3084 
3085 	return (0);
3086 }
3087 
3088 int
3089 ctf_dwarf_convert(int fd, Elf *elf, uint_t nthrs, uint_t flags,
3090     ctf_file_t **fpp, char *errbuf, size_t errlen)
3091 {
3092 	int err, ret, ndies, i;
3093 	Dwarf_Debug dw;
3094 	Dwarf_Error derr;
3095 	ctf_cu_t *cdies = NULL, *cup;
3096 	workq_t *wqp = NULL;
3097 
3098 	*fpp = NULL;
3099 
3100 	ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL, &dw, &derr);
3101 	if (ret != DW_DLV_OK) {
3102 		if (ret == DW_DLV_NO_ENTRY ||
3103 		    dwarf_errno(derr) == DW_DLE_DEBUG_INFO_NULL) {
3104 			(void) snprintf(errbuf, errlen,
3105 			    "file does not contain DWARF data\n");
3106 			return (ECTF_CONVNODEBUG);
3107 		}
3108 
3109 		(void) snprintf(errbuf, errlen,
3110 		    "dwarf_elf_init() failed: %s\n", dwarf_errmsg(derr));
3111 		return (ECTF_CONVBKERR);
3112 	}
3113 
3114 	/*
3115 	 * Iterate over all of the compilation units and create a ctf_cu_t for
3116 	 * each of them.  This is used to determine if we have zero, one, or
3117 	 * multiple dies to convert. If we have zero, that's an error. If
3118 	 * there's only one die, that's the simple case.  No merge needed and
3119 	 * only a single Dwarf_Debug as well.
3120 	 */
3121 	ndies = 0;
3122 	err = ctf_dwarf_count_dies(dw, &derr, &ndies, errbuf, errlen);
3123 
3124 	ctf_dprintf("found %d DWARF CUs\n", ndies);
3125 
3126 	if (ndies == 0) {
3127 		(void) snprintf(errbuf, errlen,
3128 		    "file does not contain DWARF data\n");
3129 		return (ECTF_CONVNODEBUG);
3130 	}
3131 
3132 	(void) dwarf_finish(dw, &derr);
3133 	cdies = ctf_alloc(sizeof (ctf_cu_t) * ndies);
3134 	if (cdies == NULL) {
3135 		return (ENOMEM);
3136 	}
3137 
3138 	bzero(cdies, sizeof (ctf_cu_t) * ndies);
3139 
3140 	for (i = 0; i < ndies; i++) {
3141 		cup = &cdies[i];
3142 		ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL,
3143 		    &cup->cu_dwarf, &derr);
3144 		if (ret != 0) {
3145 			ctf_free(cdies, sizeof (ctf_cu_t) * ndies);
3146 			(void) snprintf(errbuf, errlen,
3147 			    "failed to initialize DWARF: %s\n",
3148 			    dwarf_errmsg(derr));
3149 			return (ECTF_CONVBKERR);
3150 		}
3151 
3152 		err = ctf_dwarf_init_die(fd, elf, cup, i, errbuf, errlen);
3153 		if (err != 0)
3154 			goto out;
3155 
3156 		cup->cu_doweaks = ndies > 1 ? B_FALSE : B_TRUE;
3157 	}
3158 
3159 	if (!(flags & CTF_ALLOW_MISSING_DEBUG) &&
3160 	    (err = ctf_dwarf_check_missing(cdies, ndies,
3161 	    elf, errbuf, errlen)) != 0)
3162 		goto out;
3163 
3164 	/*
3165 	 * If we only have one compilation unit, there's no reason to use
3166 	 * multiple threads, even if the user requested them. After all, they
3167 	 * just gave us an upper bound.
3168 	 */
3169 	if (ndies == 1)
3170 		nthrs = 1;
3171 
3172 	if (workq_init(&wqp, nthrs) == -1) {
3173 		err = errno;
3174 		goto out;
3175 	}
3176 
3177 	for (i = 0; i < ndies; i++) {
3178 		cup = &cdies[i];
3179 		ctf_dprintf("adding cu %s: %p, %x %x\n", cup->cu_name,
3180 		    cup->cu_cu, cup->cu_cuoff, cup->cu_maxoff);
3181 		if (workq_add(wqp, cup) == -1) {
3182 			err = errno;
3183 			goto out;
3184 		}
3185 	}
3186 
3187 	ret = workq_work(wqp, ctf_dwarf_convert_one, NULL, &err);
3188 	if (ret == WORKQ_ERROR) {
3189 		err = errno;
3190 		goto out;
3191 	} else if (ret == WORKQ_UERROR) {
3192 		ctf_dprintf("internal convert failed: %s\n",
3193 		    ctf_errmsg(err));
3194 		goto out;
3195 	}
3196 
3197 	ctf_dprintf("Determining next phase: have %d CUs\n", ndies);
3198 	if (ndies != 1) {
3199 		ctf_merge_t *cmp;
3200 
3201 		cmp = ctf_merge_init(fd, &err);
3202 		if (cmp == NULL)
3203 			goto out;
3204 
3205 		ctf_dprintf("setting threads\n");
3206 		if ((err = ctf_merge_set_nthreads(cmp, nthrs)) != 0) {
3207 			ctf_merge_fini(cmp);
3208 			goto out;
3209 		}
3210 
3211 		for (i = 0; i < ndies; i++) {
3212 			cup = &cdies[i];
3213 			if ((err = ctf_merge_add(cmp, cup->cu_ctfp)) != 0) {
3214 				ctf_merge_fini(cmp);
3215 				goto out;
3216 			}
3217 		}
3218 
3219 		ctf_dprintf("performing merge\n");
3220 		err = ctf_merge_merge(cmp, fpp);
3221 		if (err != 0) {
3222 			ctf_dprintf("failed merge!\n");
3223 			*fpp = NULL;
3224 			ctf_merge_fini(cmp);
3225 			goto out;
3226 		}
3227 		ctf_merge_fini(cmp);
3228 		err = 0;
3229 		ctf_dprintf("successfully converted!\n");
3230 	} else {
3231 		err = 0;
3232 		*fpp = cdies->cu_ctfp;
3233 		cdies->cu_ctfp = NULL;
3234 		ctf_dprintf("successfully converted!\n");
3235 	}
3236 
3237 out:
3238 	workq_fini(wqp);
3239 	ctf_dwarf_free_dies(cdies, ndies);
3240 	return (err);
3241 }
3242