1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25/*
26 * Copyright 2012 Jason King.  All rights reserved.
27 * Use is subject to license terms.
28 */
29
30/*
31 * Copyright 2020 Joyent, Inc.
32 * Copyright 2020 Robert Mustacchi
33 */
34
35/*
36 * CTF DWARF conversion theory.
37 *
38 * DWARF data contains a series of compilation units. Each compilation unit
39 * generally refers to an object file or what once was, in the case of linked
40 * binaries and shared objects. Each compilation unit has a series of what DWARF
41 * calls a DIE (Debugging Information Entry). The set of entries that we care
42 * about have type information stored in a series of attributes. Each DIE also
43 * has a tag that identifies the kind of attributes that it has.
44 *
45 * A given DIE may itself have children. For example, a DIE that represents a
46 * structure has children which represent members. Whenever we encounter a DIE
47 * that has children or other values or types associated with it, we recursively
48 * process those children first so that way we can then refer to the generated
49 * CTF type id while processing its parent. This reduces the amount of unknowns
50 * and fixups that we need. It also ensures that we don't accidentally add types
51 * that an overzealous compiler might add to the DWARF data but aren't used by
52 * anything in the system.
53 *
54 * Once we do a conversion, we store a mapping in an AVL tree that goes from the
55 * DWARF's die offset, which is relative to the given compilation unit, to a
56 * ctf_id_t.
57 *
58 * Unfortunately, some compilers actually will emit duplicate entries for a
59 * given type that look similar, but aren't quite. To that end, we go through
60 * and do a variant on a merge once we're done processing a single compilation
61 * unit which deduplicates all of the types that are in the unit.
62 *
63 * Finally, if we encounter an object that has multiple compilation units, then
64 * we'll convert all of the compilation units separately and then do a merge, so
65 * that way we can result in one single ctf_file_t that represents everything
66 * for the object.
67 *
68 * Conversion Steps
69 * ----------------
70 *
71 * Because a given object we've been given to convert may have multiple
72 * compilation units, we break the work into two halves. The first half
73 * processes each compilation unit (potentially in parallel) and then the second
74 * half optionally merges all of the dies in the first half. First, we'll cover
75 * what's involved in converting a single ctf_cu_t's dwarf to CTF. This covers
76 * the work done in ctf_dwarf_convert_one().
77 *
78 * An individual ctf_cu_t, which represents a compilation unit, is converted to
79 * CTF in a series of multiple passes.
80 *
81 * Pass 1: During the first pass we walk all of the top-level dies and if we
82 * find a function, variable, struct, union, enum or typedef, we recursively
83 * transform all of its types. We don't recurse or process everything, because
84 * we don't want to add some of the types that compilers may add which are
85 * effectively unused.
86 *
87 * During pass 1, if we encounter any structures or unions we mark them for
88 * fixing up later. This is necessary because we may not be able to determine
89 * the full size of a structure at the beginning of time. This will happen if
90 * the DWARF attribute DW_AT_byte_size is not present for a member. Because of
91 * this possibility we defer adding members to structures or even converting
92 * them during pass 1 and save that for pass 2. Adding all of the base
93 * structures without any of their members helps deal with any circular
94 * dependencies that we might encounter.
95 *
96 * Pass 2: This pass is used to do the first half of fixing up structures and
97 * unions. Rather than walk the entire type space again, we actually walk the
98 * list of structures and unions that we marked for later fixing up. Here, we
99 * iterate over every structure and add members to the underlying ctf_file_t,
100 * but not to the structs themselves. One might wonder why we don't, and the
101 * main reason is that libctf requires a ctf_update() be done before adding the
102 * members to structures or unions.
103 *
104 * Pass 3: This pass is used to do the second half of fixing up structures and
105 * unions. During this part we always go through and add members to structures
106 * and unions that we added to the container in the previous pass. In addition,
107 * we set the structure and union's actual size, which may have additional
108 * padding added by the compiler, it isn't simply the last offset. DWARF always
109 * guarantees an attribute exists for this. Importantly no ctf_id_t's change
110 * during pass 2.
111 *
112 * Pass 4: The next phase is to add CTF entries for all of the symbols and
113 * variables that are present in this die. During pass 1 we added entries to a
114 * map for each variable and function. During this pass, we iterate over the
115 * symbol table and when we encounter a symbol that we have in our lists of
116 * translated information which matches, we then add it to the ctf_file_t.
117 *
118 * Pass 5: Here we go and look for any weak symbols and functions and see if
119 * they match anything that we recognize. If so, then we add type information
120 * for them at this point based on the matching type.
121 *
122 * Pass 6: This pass is actually a variant on a merge. The traditional merge
123 * process expects there to be no duplicate types. As such, at the end of
124 * conversion, we do a dedup on all of the types in the system. The
125 * deduplication process is described in lib/libctf/common/ctf_merge.c.
126 *
127 * Once pass 6 is done, we've finished processing the individual compilation
128 * unit.
129 *
130 * The following steps reflect the general process of doing a conversion.
131 *
132 * 1) Walk the dwarf section and determine the number of compilation units
133 * 2) Create a ctf_cu_t for each compilation unit
134 * 3) Add all ctf_cu_t's to a workq
135 * 4) Have the workq process each die with ctf_dwarf_convert_one. This itself
136 *    is comprised of several steps, which were already enumerated.
137 * 5) If we have multiple cu's, we do a ctf merge of all the dies. The mechanics
138 *    of the merge are discussed in lib/libctf/common/ctf_merge.c.
139 * 6) Free everything up and return a ctf_file_t to the user. If we only had a
140 *    single compilation unit, then we give that to the user. Otherwise, we
141 *    return the merged ctf_file_t.
142 *
143 * Threading
144 * ---------
145 *
146 * The process has been designed to be amenable to threading. Each compilation
147 * unit has its own type stream, therefore the logical place to divide and
148 * conquer is at the compilation unit. Each ctf_cu_t has been built to be able
149 * to be processed independently of the others. It has its own libdwarf handle,
150 * as a given libdwarf handle may only be used by a single thread at a time.
151 * This allows the various ctf_cu_t's to be processed in parallel by different
152 * threads.
153 *
154 * All of the ctf_cu_t's are loaded into a workq which allows for a number of
155 * threads to be specified and used as a thread pool to process all of the
156 * queued work. We set the number of threads to use in the workq equal to the
157 * number of threads that the user has specified.
158 *
159 * After all of the compilation units have been drained, we use the same number
160 * of threads when performing a merge of multiple compilation units, if they
161 * exist.
162 *
163 * While all of these different parts do support and allow for multiple threads,
164 * it's important that when only a single thread is specified, that it be the
165 * calling thread. This allows the conversion routines to be used in a context
166 * that doesn't allow additional threads, such as rtld.
167 *
168 * Common DWARF Mechanics and Notes
169 * --------------------------------
170 *
171 * At this time, we really only support DWARFv2, though support for DWARFv4 is
172 * mostly there. There is no intent to support DWARFv3.
173 *
174 * Generally types for something are stored in the DW_AT_type attribute. For
175 * example, a function's return type will be stored in the local DW_AT_type
176 * attribute while the arguments will be in child DIEs. There are also various
177 * times when we don't have any DW_AT_type. In that case, the lack of a type
178 * implies, at least for C, that its C type is void. Because DWARF doesn't emit
179 * one, we have a synthetic void type that we create and manipulate instead and
180 * pass it off to consumers on an as-needed basis. If nothing has a void type,
181 * it will not be emitted.
182 *
183 * Architecture Specific Parts
184 * ---------------------------
185 *
186 * The CTF tooling encodes various information about the various architectures
187 * in the system. Importantly, the tool assumes that every architecture has a
188 * data model where long and pointer are the same size. This is currently the
189 * case, as the two data models illumos supports are ILP32 and LP64.
190 *
191 * In addition, we encode the mapping of various floating point sizes to various
192 * types for each architecture. If a new architecture is being added, it should
193 * be added to the list. The general design of the ctf conversion tools is to be
194 * architecture independent. eg. any of the tools here should be able to convert
195 * any architecture's DWARF into ctf; however, this has not been rigorously
196 * tested and more importantly, the ctf routines don't currently write out the
197 * data in an endian-aware form, they only use that of the currently running
198 * library.
199 */
200
201#include <libctf_impl.h>
202#include <sys/avl.h>
203#include <sys/debug.h>
204#include <gelf.h>
205#include <libdwarf.h>
206#include <dwarf.h>
207#include <libgen.h>
208#include <workq.h>
209#include <errno.h>
210
211#define	DWARF_VERSION_TWO	2
212#define	DWARF_VERSION_FOUR	4
213#define	DWARF_VARARGS_NAME	"..."
214
215/*
216 * Dwarf may refer recursively to other types that we've already processed. To
217 * see if we've already converted them, we look them up in an AVL tree that's
218 * sorted by the DWARF id.
219 */
220typedef struct ctf_dwmap {
221	avl_node_t	cdm_avl;
222	Dwarf_Off	cdm_off;
223	Dwarf_Die	cdm_die;
224	ctf_id_t	cdm_id;
225	boolean_t	cdm_fix;
226} ctf_dwmap_t;
227
228typedef struct ctf_dwvar {
229	ctf_list_t	cdv_list;
230	char		*cdv_name;
231	ctf_id_t	cdv_type;
232	boolean_t	cdv_global;
233} ctf_dwvar_t;
234
235typedef struct ctf_dwfunc {
236	ctf_list_t	cdf_list;
237	char		*cdf_name;
238	ctf_funcinfo_t	cdf_fip;
239	ctf_id_t	*cdf_argv;
240	boolean_t	cdf_global;
241} ctf_dwfunc_t;
242
243typedef struct ctf_dwbitf {
244	ctf_list_t	cdb_list;
245	ctf_id_t	cdb_base;
246	uint_t		cdb_nbits;
247	ctf_id_t	cdb_id;
248} ctf_dwbitf_t;
249
250/*
251 * The ctf_cu_t represents a single top-level DWARF die unit. While generally,
252 * the typical object file has only a single die, if we're asked to convert
253 * something that's been linked from multiple sources, multiple dies will exist.
254 */
255typedef struct ctf_die {
256	Elf		*cu_elf;	/* shared libelf handle */
257	char		*cu_name;	/* basename of the DIE */
258	ctf_merge_t	*cu_cmh;	/* merge handle */
259	ctf_list_t	cu_vars;	/* List of variables */
260	ctf_list_t	cu_funcs;	/* List of functions */
261	ctf_list_t	cu_bitfields;	/* Bit field members */
262	Dwarf_Debug	cu_dwarf;	/* libdwarf handle */
263	Dwarf_Die	cu_cu;		/* libdwarf compilation unit */
264	Dwarf_Off	cu_cuoff;	/* cu's offset */
265	Dwarf_Off	cu_maxoff;	/* maximum offset */
266	Dwarf_Half	cu_vers;	/* Dwarf Version */
267	Dwarf_Half	cu_addrsz;	/* Dwarf Address Size */
268	ctf_file_t	*cu_ctfp;	/* output CTF file */
269	avl_tree_t	cu_map;		/* map die offsets to CTF types */
270	char		*cu_errbuf;	/* error message buffer */
271	size_t		cu_errlen;	/* error message buffer length */
272	size_t		cu_ptrsz;	/* object's pointer size */
273	boolean_t	cu_bigend;	/* is it big endian */
274	boolean_t	cu_doweaks;	/* should we convert weak symbols? */
275	uint_t		cu_mach;	/* machine type */
276	ctf_id_t	cu_voidtid;	/* void pointer */
277	ctf_id_t	cu_longtid;	/* id for a 'long' */
278} ctf_cu_t;
279
280static int ctf_dwarf_offset(ctf_cu_t *, Dwarf_Die, Dwarf_Off *);
281static int ctf_dwarf_convert_die(ctf_cu_t *, Dwarf_Die);
282static int ctf_dwarf_convert_type(ctf_cu_t *, Dwarf_Die, ctf_id_t *, int);
283
284static int ctf_dwarf_function_count(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
285    boolean_t);
286static int ctf_dwarf_convert_fargs(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
287    ctf_id_t *);
288
289/*
290 * This is a generic way to set a CTF Conversion backend error depending on what
291 * we were doing. Unless it was one of a specific set of errors that don't
292 * indicate a programming / translation bug, eg. ENOMEM, then we transform it
293 * into a CTF backend error and fill in the error buffer.
294 */
295static int
296ctf_dwarf_error(ctf_cu_t *cup, ctf_file_t *cfp, int err, const char *fmt, ...)
297{
298	va_list ap;
299	int ret;
300	size_t off = 0;
301	ssize_t rem = cup->cu_errlen;
302	if (cfp != NULL)
303		err = ctf_errno(cfp);
304
305	if (err == ENOMEM)
306		return (err);
307
308	ret = snprintf(cup->cu_errbuf, rem, "die %s: ", cup->cu_name);
309	if (ret < 0)
310		goto err;
311	off += ret;
312	rem = MAX(rem - ret, 0);
313
314	va_start(ap, fmt);
315	ret = vsnprintf(cup->cu_errbuf + off, rem, fmt, ap);
316	va_end(ap);
317	if (ret < 0)
318		goto err;
319
320	off += ret;
321	rem = MAX(rem - ret, 0);
322	if (fmt[strlen(fmt) - 1] != '\n') {
323		(void) snprintf(cup->cu_errbuf + off, rem,
324		    ": %s\n", ctf_errmsg(err));
325	}
326	va_end(ap);
327	return (ECTF_CONVBKERR);
328
329err:
330	cup->cu_errbuf[0] = '\0';
331	return (ECTF_CONVBKERR);
332}
333
334/*
335 * DWARF often opts to put no explicit type to describe a void type. eg. if we
336 * have a reference type whose DW_AT_type member doesn't exist, then we should
337 * instead assume it points to void. Because this isn't represented, we
338 * instead cause it to come into existence.
339 */
340static ctf_id_t
341ctf_dwarf_void(ctf_cu_t *cup)
342{
343	if (cup->cu_voidtid == CTF_ERR) {
344		ctf_encoding_t enc = { CTF_INT_SIGNED, 0, 0 };
345		cup->cu_voidtid = ctf_add_integer(cup->cu_ctfp, CTF_ADD_ROOT,
346		    "void", &enc);
347		if (cup->cu_voidtid == CTF_ERR) {
348			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
349			    "failed to create void type: %s\n",
350			    ctf_errmsg(ctf_errno(cup->cu_ctfp)));
351		}
352	}
353
354	return (cup->cu_voidtid);
355}
356
357/*
358 * There are many different forms that an array index may take. However, we just
359 * always force it to be of a type long no matter what. Therefore we use this to
360 * have a single instance of long across everything.
361 */
362static ctf_id_t
363ctf_dwarf_long(ctf_cu_t *cup)
364{
365	if (cup->cu_longtid == CTF_ERR) {
366		ctf_encoding_t enc;
367
368		enc.cte_format = CTF_INT_SIGNED;
369		enc.cte_offset = 0;
370		/* All illumos systems are LP */
371		enc.cte_bits = cup->cu_ptrsz * 8;
372		cup->cu_longtid = ctf_add_integer(cup->cu_ctfp, CTF_ADD_NONROOT,
373		    "long", &enc);
374		if (cup->cu_longtid == CTF_ERR) {
375			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
376			    "failed to create long type: %s\n",
377			    ctf_errmsg(ctf_errno(cup->cu_ctfp)));
378		}
379
380	}
381
382	return (cup->cu_longtid);
383}
384
385static int
386ctf_dwmap_comp(const void *a, const void *b)
387{
388	const ctf_dwmap_t *ca = a;
389	const ctf_dwmap_t *cb = b;
390
391	if (ca->cdm_off > cb->cdm_off)
392		return (1);
393	if (ca->cdm_off < cb->cdm_off)
394		return (-1);
395	return (0);
396}
397
398static int
399ctf_dwmap_add(ctf_cu_t *cup, ctf_id_t id, Dwarf_Die die, boolean_t fix)
400{
401	int ret;
402	avl_index_t index;
403	ctf_dwmap_t *dwmap;
404	Dwarf_Off off;
405
406	VERIFY(id > 0 && id < CTF_MAX_TYPE);
407
408	if ((ret = ctf_dwarf_offset(cup, die, &off)) != 0)
409		return (ret);
410
411	if ((dwmap = ctf_alloc(sizeof (ctf_dwmap_t))) == NULL)
412		return (ENOMEM);
413
414	dwmap->cdm_die = die;
415	dwmap->cdm_off = off;
416	dwmap->cdm_id = id;
417	dwmap->cdm_fix = fix;
418
419	ctf_dprintf("dwmap: %p %" DW_PR_DUx "->%d\n", dwmap, off, id);
420	VERIFY(avl_find(&cup->cu_map, dwmap, &index) == NULL);
421	avl_insert(&cup->cu_map, dwmap, index);
422	return (0);
423}
424
425static int
426ctf_dwarf_attribute(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
427    Dwarf_Attribute *attrp)
428{
429	int ret;
430	Dwarf_Error derr;
431
432	if ((ret = dwarf_attr(die, name, attrp, &derr)) == DW_DLV_OK)
433		return (0);
434	if (ret == DW_DLV_NO_ENTRY) {
435		*attrp = NULL;
436		return (ENOENT);
437	}
438	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
439	    "failed to get attribute for type: %s\n",
440	    dwarf_errmsg(derr));
441	return (ECTF_CONVBKERR);
442}
443
444static int
445ctf_dwarf_ref(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, Dwarf_Off *refp)
446{
447	int ret;
448	Dwarf_Attribute attr;
449	Dwarf_Error derr;
450
451	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
452		return (ret);
453
454	if (dwarf_formref(attr, refp, &derr) == DW_DLV_OK) {
455		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
456		return (0);
457	}
458
459	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
460	    "failed to get unsigned attribute for type: %s\n",
461	    dwarf_errmsg(derr));
462	return (ECTF_CONVBKERR);
463}
464
465static int
466ctf_dwarf_refdie(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
467    Dwarf_Die *diep)
468{
469	int ret;
470	Dwarf_Off off;
471	Dwarf_Error derr;
472
473	if ((ret = ctf_dwarf_ref(cup, die, name, &off)) != 0)
474		return (ret);
475
476	off += cup->cu_cuoff;
477	if ((ret = dwarf_offdie(cup->cu_dwarf, off, diep, &derr)) !=
478	    DW_DLV_OK) {
479		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
480		    "failed to get die from offset %" DW_PR_DUu ": %s\n",
481		    off, dwarf_errmsg(derr));
482		return (ECTF_CONVBKERR);
483	}
484
485	return (0);
486}
487
488static int
489ctf_dwarf_signed(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
490    Dwarf_Signed *valp)
491{
492	int ret;
493	Dwarf_Attribute attr;
494	Dwarf_Error derr;
495
496	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
497		return (ret);
498
499	if (dwarf_formsdata(attr, valp, &derr) == DW_DLV_OK) {
500		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
501		return (0);
502	}
503
504	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
505	    "failed to get unsigned attribute for type: %s\n",
506	    dwarf_errmsg(derr));
507	return (ECTF_CONVBKERR);
508}
509
510static int
511ctf_dwarf_unsigned(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
512    Dwarf_Unsigned *valp)
513{
514	int ret;
515	Dwarf_Attribute attr;
516	Dwarf_Error derr;
517
518	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
519		return (ret);
520
521	if (dwarf_formudata(attr, valp, &derr) == DW_DLV_OK) {
522		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
523		return (0);
524	}
525
526	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
527	    "failed to get unsigned attribute for type: %s\n",
528	    dwarf_errmsg(derr));
529	return (ECTF_CONVBKERR);
530}
531
532static int
533ctf_dwarf_boolean(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
534    Dwarf_Bool *val)
535{
536	int ret;
537	Dwarf_Attribute attr;
538	Dwarf_Error derr;
539
540	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
541		return (ret);
542
543	if (dwarf_formflag(attr, val, &derr) == DW_DLV_OK) {
544		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
545		return (0);
546	}
547
548	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
549	    "failed to get boolean attribute for type: %s\n",
550	    dwarf_errmsg(derr));
551
552	return (ECTF_CONVBKERR);
553}
554
555static int
556ctf_dwarf_string(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, char **strp)
557{
558	int ret;
559	char *s;
560	Dwarf_Attribute attr;
561	Dwarf_Error derr;
562
563	*strp = NULL;
564	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
565		return (ret);
566
567	if (dwarf_formstring(attr, &s, &derr) == DW_DLV_OK) {
568		if ((*strp = ctf_strdup(s)) == NULL)
569			ret = ENOMEM;
570		else
571			ret = 0;
572		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
573		return (ret);
574	}
575
576	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
577	    "failed to get string attribute for type: %s\n",
578	    dwarf_errmsg(derr));
579	return (ECTF_CONVBKERR);
580}
581
582/*
583 * The encoding of a DW_AT_data_member_location has changed between different
584 * revisions of the specification. It may be a general udata form or it may be
585 * location data information. In DWARF 2, it is only the latter. In later
586 * revisions of the spec, it may be either. To determine the form, we ask the
587 * class, which will be of type CONSTANT.
588 */
589static int
590ctf_dwarf_member_location(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Unsigned *valp)
591{
592	int ret;
593	Dwarf_Error derr;
594	Dwarf_Attribute attr;
595	Dwarf_Locdesc *loc;
596	Dwarf_Signed locnum;
597	Dwarf_Half form;
598	enum Dwarf_Form_Class class;
599
600	if ((ret = ctf_dwarf_attribute(cup, die, DW_AT_data_member_location,
601	    &attr)) != 0)
602		return (ret);
603
604	if (dwarf_whatform(attr, &form, &derr) != DW_DLV_OK) {
605		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
606		    "failed to get dwarf attribute for for member location: %s",
607		    dwarf_errmsg(derr));
608		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
609		return (ECTF_CONVBKERR);
610	}
611
612	class = dwarf_get_form_class(cup->cu_vers, DW_AT_data_member_location,
613	    cup->cu_addrsz, form);
614	if (class == DW_FORM_CLASS_CONSTANT) {
615		Dwarf_Signed sign;
616
617		/*
618		 * We have a constant. We need to try to get both this as signed
619		 * and unsigned data, as unfortunately, DWARF doesn't define the
620		 * sign. Which is a joy. We try unsigned first. If neither
621		 * match, fall through to the normal path.
622		 */
623		if (dwarf_formudata(attr, valp, &derr) == DW_DLV_OK) {
624			dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
625			return (0);
626		}
627
628		if (dwarf_formsdata(attr, &sign, &derr) == DW_DLV_OK) {
629			dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
630			if (sign < 0) {
631				(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
632				    "encountered negative member data "
633				    "location: %d", sign);
634			}
635			*valp = (Dwarf_Unsigned)sign;
636			return (0);
637		}
638	}
639
640	if (dwarf_loclist(attr, &loc, &locnum, &derr) != DW_DLV_OK) {
641		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
642		    "failed to obtain location list for member offset: %s",
643		    dwarf_errmsg(derr));
644		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
645		return (ECTF_CONVBKERR);
646	}
647	dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
648
649	if (locnum != 1 || loc->ld_s->lr_atom != DW_OP_plus_uconst) {
650		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
651		    "failed to parse location structure for member");
652		dwarf_dealloc(cup->cu_dwarf, loc->ld_s, DW_DLA_LOC_BLOCK);
653		dwarf_dealloc(cup->cu_dwarf, loc, DW_DLA_LOCDESC);
654		return (ECTF_CONVBKERR);
655	}
656
657	*valp = loc->ld_s->lr_number;
658
659	dwarf_dealloc(cup->cu_dwarf, loc->ld_s, DW_DLA_LOC_BLOCK);
660	dwarf_dealloc(cup->cu_dwarf, loc, DW_DLA_LOCDESC);
661	return (0);
662}
663
664
665static int
666ctf_dwarf_offset(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Off *offsetp)
667{
668	Dwarf_Error derr;
669
670	if (dwarf_dieoffset(die, offsetp, &derr) == DW_DLV_OK)
671		return (0);
672
673	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
674	    "failed to get die offset: %s\n",
675	    dwarf_errmsg(derr));
676	return (ECTF_CONVBKERR);
677}
678
679/* simpler variant for debugging output */
680static Dwarf_Off
681ctf_die_offset(Dwarf_Die die)
682{
683	Dwarf_Off off = -1;
684	Dwarf_Error derr;
685
686	(void) dwarf_dieoffset(die, &off, &derr);
687	return (off);
688}
689
690static int
691ctf_dwarf_tag(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half *tagp)
692{
693	Dwarf_Error derr;
694
695	if (dwarf_tag(die, tagp, &derr) == DW_DLV_OK)
696		return (0);
697
698	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
699	    "failed to get tag type: %s\n",
700	    dwarf_errmsg(derr));
701	return (ECTF_CONVBKERR);
702}
703
704static int
705ctf_dwarf_sib(ctf_cu_t *cup, Dwarf_Die base, Dwarf_Die *sibp)
706{
707	Dwarf_Error derr;
708	int ret;
709
710	*sibp = NULL;
711	ret = dwarf_siblingof(cup->cu_dwarf, base, sibp, &derr);
712	if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY)
713		return (0);
714
715	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
716	    "failed to sibling from die: %s\n",
717	    dwarf_errmsg(derr));
718	return (ECTF_CONVBKERR);
719}
720
721static int
722ctf_dwarf_child(ctf_cu_t *cup, Dwarf_Die base, Dwarf_Die *childp)
723{
724	Dwarf_Error derr;
725	int ret;
726
727	*childp = NULL;
728	ret = dwarf_child(base, childp, &derr);
729	if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY)
730		return (0);
731
732	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
733	    "failed to child from die: %s\n",
734	    dwarf_errmsg(derr));
735	return (ECTF_CONVBKERR);
736}
737
738/*
739 * Compilers disagree on what to do to determine if something has global
740 * visiblity. Traditionally gcc has used DW_AT_external to indicate this while
741 * Studio has used DW_AT_visibility. We check DW_AT_visibility first and then
742 * fall back to DW_AT_external. Lack of DW_AT_external implies that it is not.
743 */
744static int
745ctf_dwarf_isglobal(ctf_cu_t *cup, Dwarf_Die die, boolean_t *igp)
746{
747	int ret;
748	Dwarf_Signed vis;
749	Dwarf_Bool ext;
750
751	if ((ret = ctf_dwarf_signed(cup, die, DW_AT_visibility, &vis)) == 0) {
752		*igp = vis == DW_VIS_exported;
753		return (0);
754	} else if (ret != ENOENT) {
755		return (ret);
756	}
757
758	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_external, &ext)) != 0) {
759		if (ret == ENOENT) {
760			*igp = B_FALSE;
761			return (0);
762		}
763		return (ret);
764	}
765	*igp = ext != 0 ? B_TRUE : B_FALSE;
766	return (0);
767}
768
769static int
770ctf_dwarf_die_elfenc(Elf *elf, ctf_cu_t *cup, char *errbuf, size_t errlen)
771{
772	GElf_Ehdr ehdr;
773
774	if (gelf_getehdr(elf, &ehdr) == NULL) {
775		(void) snprintf(errbuf, errlen,
776		    "failed to get ELF header: %s\n",
777		    elf_errmsg(elf_errno()));
778		return (ECTF_CONVBKERR);
779	}
780
781	cup->cu_mach = ehdr.e_machine;
782
783	if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
784		cup->cu_ptrsz = 4;
785		VERIFY(ctf_setmodel(cup->cu_ctfp, CTF_MODEL_ILP32) == 0);
786	} else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
787		cup->cu_ptrsz = 8;
788		VERIFY(ctf_setmodel(cup->cu_ctfp, CTF_MODEL_LP64) == 0);
789	} else {
790		(void) snprintf(errbuf, errlen,
791		    "unknown ELF class %d", ehdr.e_ident[EI_CLASS]);
792		return (ECTF_CONVBKERR);
793	}
794
795	if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB) {
796		cup->cu_bigend = B_FALSE;
797	} else if (ehdr.e_ident[EI_DATA] == ELFDATA2MSB) {
798		cup->cu_bigend = B_TRUE;
799	} else {
800		(void) snprintf(errbuf, errlen,
801		    "unknown ELF data encoding: %hhu", ehdr.e_ident[EI_DATA]);
802		return (ECTF_CONVBKERR);
803	}
804
805	return (0);
806}
807
808typedef struct ctf_dwarf_fpent {
809	size_t	cdfe_size;
810	uint_t	cdfe_enc[3];
811} ctf_dwarf_fpent_t;
812
813typedef struct ctf_dwarf_fpmap {
814	uint_t			cdf_mach;
815	ctf_dwarf_fpent_t	cdf_ents[4];
816} ctf_dwarf_fpmap_t;
817
818static const ctf_dwarf_fpmap_t ctf_dwarf_fpmaps[] = {
819	{ EM_SPARC, {
820		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
821		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
822		{ 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
823		{ 0, { 0 } }
824	} },
825	{ EM_SPARC32PLUS, {
826		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
827		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
828		{ 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
829		{ 0, { 0 } }
830	} },
831	{ EM_SPARCV9, {
832		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
833		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
834		{ 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
835		{ 0, { 0 } }
836	} },
837	{ EM_386, {
838		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
839		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
840		{ 12, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
841		{ 0, { 0 } }
842	} },
843	{ EM_X86_64, {
844		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
845		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
846		{ 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
847		{ 0, { 0 } }
848	} },
849	{ EM_NONE }
850};
851
852/*
853 * We want to normalize the type names that are used between compilers in the
854 * case of complex. gcc prefixes things with types like 'long complex' where as
855 * clang only calls them 'complex' in the dwarf even if in the C they are long
856 * complex or similar.
857 */
858static int
859ctf_dwarf_fixup_complex(ctf_cu_t *cup, ctf_encoding_t *enc, char **namep)
860{
861	const char *name;
862	*namep = NULL;
863
864	switch (enc->cte_format) {
865	case CTF_FP_CPLX:
866		name = "complex float";
867		break;
868	case CTF_FP_DCPLX:
869		name = "complex double";
870		break;
871	case CTF_FP_LDCPLX:
872		name = "complex long double";
873		break;
874	default:
875		return (0);
876	}
877
878	*namep = ctf_strdup(name);
879	if (*namep == NULL) {
880		return (ENOMEM);
881	}
882
883	return (0);
884}
885
886static int
887ctf_dwarf_float_base(ctf_cu_t *cup, Dwarf_Signed type, ctf_encoding_t *enc)
888{
889	const ctf_dwarf_fpmap_t *map = &ctf_dwarf_fpmaps[0];
890	const ctf_dwarf_fpent_t *ent;
891	uint_t col = 0, mult = 1;
892
893	for (map = &ctf_dwarf_fpmaps[0]; map->cdf_mach != EM_NONE; map++) {
894		if (map->cdf_mach == cup->cu_mach)
895			break;
896	}
897
898	if (map->cdf_mach == EM_NONE) {
899		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
900		    "Unsupported machine type: %d\n", cup->cu_mach);
901		return (ENOTSUP);
902	}
903
904	if (type == DW_ATE_complex_float) {
905		mult = 2;
906		col = 1;
907	} else if (type == DW_ATE_imaginary_float ||
908	    type == DW_ATE_SUN_imaginary_float) {
909		col = 2;
910	}
911
912	ent = &map->cdf_ents[0];
913	for (ent = &map->cdf_ents[0]; ent->cdfe_size != 0; ent++) {
914		if (ent->cdfe_size * mult * 8 == enc->cte_bits) {
915			enc->cte_format = ent->cdfe_enc[col];
916			return (0);
917		}
918	}
919
920	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
921	    "failed to find valid fp mapping for encoding %d, size %d bits\n",
922	    type, enc->cte_bits);
923	return (EINVAL);
924}
925
926static int
927ctf_dwarf_dwarf_base(ctf_cu_t *cup, Dwarf_Die die, int *kindp,
928    ctf_encoding_t *enc)
929{
930	int ret;
931	Dwarf_Signed type;
932
933	if ((ret = ctf_dwarf_signed(cup, die, DW_AT_encoding, &type)) != 0)
934		return (ret);
935
936	switch (type) {
937	case DW_ATE_unsigned:
938	case DW_ATE_address:
939		*kindp = CTF_K_INTEGER;
940		enc->cte_format = 0;
941		break;
942	case DW_ATE_unsigned_char:
943		*kindp = CTF_K_INTEGER;
944		enc->cte_format = CTF_INT_CHAR;
945		break;
946	case DW_ATE_signed:
947		*kindp = CTF_K_INTEGER;
948		enc->cte_format = CTF_INT_SIGNED;
949		break;
950	case DW_ATE_signed_char:
951		*kindp = CTF_K_INTEGER;
952		enc->cte_format = CTF_INT_SIGNED | CTF_INT_CHAR;
953		break;
954	case DW_ATE_boolean:
955		*kindp = CTF_K_INTEGER;
956		enc->cte_format = CTF_INT_SIGNED | CTF_INT_BOOL;
957		break;
958	case DW_ATE_float:
959	case DW_ATE_complex_float:
960	case DW_ATE_imaginary_float:
961	case DW_ATE_SUN_imaginary_float:
962	case DW_ATE_SUN_interval_float:
963		*kindp = CTF_K_FLOAT;
964		if ((ret = ctf_dwarf_float_base(cup, type, enc)) != 0)
965			return (ret);
966		break;
967	default:
968		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
969		    "encountered unknown DWARF encoding: %d", type);
970		return (ECTF_CONVBKERR);
971	}
972
973	return (0);
974}
975
976/*
977 * Different compilers (at least GCC and Studio) use different names for types.
978 * This parses the types and attempts to unify them. If this fails, we just fall
979 * back to using the DWARF itself.
980 */
981static int
982ctf_dwarf_parse_int(const char *name, int *kindp, ctf_encoding_t *enc,
983    char **newnamep)
984{
985	char buf[256];
986	char *base, *c, *last;
987	int nlong = 0, nshort = 0, nchar = 0, nint = 0;
988	int sign = 1;
989
990	if (strlen(name) + 1 > sizeof (buf))
991		return (EINVAL);
992
993	(void) strlcpy(buf, name, sizeof (buf));
994	for (c = strtok_r(buf, " ", &last); c != NULL;
995	    c = strtok_r(NULL, " ", &last)) {
996		if (strcmp(c, "signed") == 0) {
997			sign = 1;
998		} else if (strcmp(c, "unsigned") == 0) {
999			sign = 0;
1000		} else if (strcmp(c, "long") == 0) {
1001			nlong++;
1002		} else if (strcmp(c, "char") == 0) {
1003			nchar++;
1004		} else if (strcmp(c, "short") == 0) {
1005			nshort++;
1006		} else if (strcmp(c, "int") == 0) {
1007			nint++;
1008		} else {
1009			/*
1010			 * If we don't recognize any of the tokens, we'll tell
1011			 * the caller to fall back to the dwarf-provided
1012			 * encoding information.
1013			 */
1014			return (EINVAL);
1015		}
1016	}
1017
1018	if (nchar > 1 || nshort > 1 || nint > 1 || nlong > 2)
1019		return (EINVAL);
1020
1021	if (nchar > 0) {
1022		if (nlong > 0 || nshort > 0 || nint > 0)
1023			return (EINVAL);
1024		base = "char";
1025	} else if (nshort > 0) {
1026		if (nlong > 0)
1027			return (EINVAL);
1028		base = "short";
1029	} else if (nlong > 0) {
1030		base = "long";
1031	} else {
1032		base = "int";
1033	}
1034
1035	if (nchar > 0)
1036		enc->cte_format = CTF_INT_CHAR;
1037	else
1038		enc->cte_format = 0;
1039
1040	if (sign > 0)
1041		enc->cte_format |= CTF_INT_SIGNED;
1042
1043	(void) snprintf(buf, sizeof (buf), "%s%s%s",
1044	    (sign ? "" : "unsigned "),
1045	    (nlong > 1 ? "long " : ""),
1046	    base);
1047
1048	*newnamep = ctf_strdup(buf);
1049	if (*newnamep == NULL)
1050		return (ENOMEM);
1051	*kindp = CTF_K_INTEGER;
1052	return (0);
1053}
1054
1055static int
1056ctf_dwarf_create_base(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot,
1057    Dwarf_Off off)
1058{
1059	int ret;
1060	char *name, *nname = NULL;
1061	Dwarf_Unsigned sz;
1062	int kind;
1063	ctf_encoding_t enc;
1064	ctf_id_t id;
1065
1066	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0)
1067		return (ret);
1068	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &sz)) != 0) {
1069		goto out;
1070	}
1071	ctf_dprintf("Creating base type %s from off %llu, size: %d\n", name,
1072	    off, sz);
1073
1074	bzero(&enc, sizeof (ctf_encoding_t));
1075	enc.cte_bits = sz * 8;
1076	if ((ret = ctf_dwarf_parse_int(name, &kind, &enc, &nname)) == 0) {
1077		ctf_free(name, strlen(name) + 1);
1078		name = nname;
1079	} else {
1080		if (ret != EINVAL) {
1081			goto out;
1082		}
1083		ctf_dprintf("falling back to dwarf for base type %s\n", name);
1084		if ((ret = ctf_dwarf_dwarf_base(cup, die, &kind, &enc)) != 0) {
1085			goto out;
1086		}
1087
1088		if (kind == CTF_K_FLOAT && (ret = ctf_dwarf_fixup_complex(cup,
1089		    &enc, &nname)) != 0) {
1090			goto out;
1091		} else if (nname != NULL) {
1092			ctf_free(name, strlen(name) + 1);
1093			name = nname;
1094		}
1095	}
1096
1097	id = ctf_add_encoded(cup->cu_ctfp, isroot, name, &enc, kind);
1098	if (id == CTF_ERR) {
1099		ret = ctf_errno(cup->cu_ctfp);
1100	} else {
1101		*idp = id;
1102		ret = ctf_dwmap_add(cup, id, die, B_FALSE);
1103	}
1104out:
1105	ctf_free(name, strlen(name) + 1);
1106	return (ret);
1107}
1108
1109/*
1110 * Getting a member's offset is a surprisingly intricate dance. It works as
1111 * follows:
1112 *
1113 * 1) If we're in DWARFv4, then we either have a DW_AT_data_bit_offset or we
1114 * have a DW_AT_data_member_location. We won't have both. Thus we check first
1115 * for DW_AT_data_bit_offset, and if it exists, we're set.
1116 *
1117 * Next, if we have a bitfield and we don't have a DW_AT_data_bit_offset, then
1118 * we have to grab the data location and use the following dance:
1119 *
1120 * 2) Gather the set of DW_AT_byte_size, DW_AT_bit_offset, and DW_AT_bit_size.
1121 * Of course, the DW_AT_byte_size may be omitted, even though it isn't always.
1122 * When it's been omitted, we then have to say that the size is that of the
1123 * underlying type, which forces that to be after a ctf_update(). Here, we have
1124 * to do different things based on whether or not we're using big endian or
1125 * little endian to obtain the proper offset.
1126 */
1127static int
1128ctf_dwarf_member_offset(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t mid,
1129    ulong_t *offp)
1130{
1131	int ret;
1132	Dwarf_Unsigned loc, bitsz, bytesz;
1133	Dwarf_Signed bitoff;
1134	size_t off;
1135	ssize_t tsz;
1136
1137	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_data_bit_offset,
1138	    &loc)) == 0) {
1139		*offp = loc;
1140		return (0);
1141	} else if (ret != ENOENT) {
1142		return (ret);
1143	}
1144
1145	if ((ret = ctf_dwarf_member_location(cup, die, &loc)) != 0)
1146		return (ret);
1147	off = loc * 8;
1148
1149	if ((ret = ctf_dwarf_signed(cup, die, DW_AT_bit_offset,
1150	    &bitoff)) != 0) {
1151		if (ret != ENOENT)
1152			return (ret);
1153		*offp = off;
1154		return (0);
1155	}
1156
1157	/* At this point we have to have DW_AT_bit_size */
1158	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_bit_size, &bitsz)) != 0)
1159		return (ret);
1160
1161	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size,
1162	    &bytesz)) != 0) {
1163		if (ret != ENOENT)
1164			return (ret);
1165		if ((tsz = ctf_type_size(cup->cu_ctfp, mid)) == CTF_ERR) {
1166			int e = ctf_errno(cup->cu_ctfp);
1167			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1168			    "failed to get type size: %s", ctf_errmsg(e));
1169			return (ECTF_CONVBKERR);
1170		}
1171	} else {
1172		tsz = bytesz;
1173	}
1174	tsz *= 8;
1175	if (cup->cu_bigend == B_TRUE) {
1176		*offp = off + bitoff;
1177	} else {
1178		*offp = off + tsz - bitoff - bitsz;
1179	}
1180
1181	return (0);
1182}
1183
1184/*
1185 * We need to determine if the member in question is a bitfield. If it is, then
1186 * we need to go through and create a new type that's based on the actual base
1187 * type, but has a different size. We also rename the type as a result to help
1188 * deal with future collisions.
1189 *
1190 * Here we need to look and see if we have a DW_AT_bit_size value. If we have a
1191 * bit size member and it does not equal the byte size member, then we need to
1192 * create a bitfield type based on this.
1193 *
1194 * Note: When we support DWARFv4, there may be a chance that we need to also
1195 * search for the DW_AT_byte_size if we don't have a DW_AT_bit_size member.
1196 */
1197static int
1198ctf_dwarf_member_bitfield(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp)
1199{
1200	int ret;
1201	Dwarf_Unsigned bitsz;
1202	ctf_encoding_t e;
1203	ctf_dwbitf_t *cdb;
1204	ctf_dtdef_t *dtd;
1205	ctf_id_t base = *idp;
1206	int kind;
1207
1208	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_bit_size, &bitsz)) != 0) {
1209		if (ret == ENOENT)
1210			return (0);
1211		return (ret);
1212	}
1213
1214	ctf_dprintf("Trying to deal with bitfields on %d:%d\n", base, bitsz);
1215	/*
1216	 * Given that we now have a bitsize, time to go do something about it.
1217	 * We're going to create a new type based on the current one, but first
1218	 * we need to find the base type. This means we need to traverse any
1219	 * typedef's, consts, and volatiles until we get to what should be
1220	 * something of type integer or enumeration.
1221	 */
1222	VERIFY(bitsz < UINT32_MAX);
1223	dtd = ctf_dtd_lookup(cup->cu_ctfp, base);
1224	VERIFY(dtd != NULL);
1225	kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info);
1226	while (kind == CTF_K_TYPEDEF || kind == CTF_K_CONST ||
1227	    kind == CTF_K_VOLATILE) {
1228		dtd = ctf_dtd_lookup(cup->cu_ctfp, dtd->dtd_data.ctt_type);
1229		VERIFY(dtd != NULL);
1230		kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info);
1231	}
1232	ctf_dprintf("got kind %d\n", kind);
1233	VERIFY(kind == CTF_K_INTEGER || kind == CTF_K_ENUM);
1234
1235	/*
1236	 * As surprising as it may be, it is strictly possible to create a
1237	 * bitfield that is based on an enum. Of course, the C standard leaves
1238	 * enums sizing as an ABI concern more or less. To that effect, today on
1239	 * all illumos platforms the size of an enum is generally that of an
1240	 * int as our supported data models and ABIs all agree on that. So what
1241	 * we'll do is fake up a CTF encoding here to use. In this case, we'll
1242	 * treat it as an unsigned value of whatever size the underlying enum
1243	 * currently has (which is in the ctt_size member of its dynamic type
1244	 * data).
1245	 */
1246	if (kind == CTF_K_INTEGER) {
1247		e = dtd->dtd_u.dtu_enc;
1248	} else {
1249		bzero(&e, sizeof (ctf_encoding_t));
1250		e.cte_bits = dtd->dtd_data.ctt_size * NBBY;
1251	}
1252
1253	for (cdb = ctf_list_next(&cup->cu_bitfields); cdb != NULL;
1254	    cdb = ctf_list_next(cdb)) {
1255		if (cdb->cdb_base == base && cdb->cdb_nbits == bitsz)
1256			break;
1257	}
1258
1259	/*
1260	 * Create a new type if none exists. We name all types in a way that is
1261	 * guaranteed not to conflict with the corresponding C type. We do this
1262	 * by using the ':' operator.
1263	 */
1264	if (cdb == NULL) {
1265		size_t namesz;
1266		char *name;
1267
1268		e.cte_bits = bitsz;
1269		namesz = snprintf(NULL, 0, "%s:%d", dtd->dtd_name,
1270		    (uint32_t)bitsz);
1271		name = ctf_alloc(namesz + 1);
1272		if (name == NULL)
1273			return (ENOMEM);
1274		cdb = ctf_alloc(sizeof (ctf_dwbitf_t));
1275		if (cdb == NULL) {
1276			ctf_free(name, namesz + 1);
1277			return (ENOMEM);
1278		}
1279		(void) snprintf(name, namesz + 1, "%s:%d", dtd->dtd_name,
1280		    (uint32_t)bitsz);
1281
1282		cdb->cdb_base = base;
1283		cdb->cdb_nbits = bitsz;
1284		cdb->cdb_id = ctf_add_integer(cup->cu_ctfp, CTF_ADD_NONROOT,
1285		    name, &e);
1286		if (cdb->cdb_id == CTF_ERR) {
1287			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1288			    "failed to get add bitfield type %s: %s", name,
1289			    ctf_errmsg(ctf_errno(cup->cu_ctfp)));
1290			ctf_free(name, namesz + 1);
1291			ctf_free(cdb, sizeof (ctf_dwbitf_t));
1292			return (ECTF_CONVBKERR);
1293		}
1294		ctf_free(name, namesz + 1);
1295		ctf_list_append(&cup->cu_bitfields, cdb);
1296	}
1297
1298	*idp = cdb->cdb_id;
1299
1300	return (0);
1301}
1302
1303static int
1304ctf_dwarf_fixup_sou(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t base, boolean_t add)
1305{
1306	int ret, kind;
1307	Dwarf_Die child, memb;
1308	Dwarf_Unsigned size;
1309
1310	kind = ctf_type_kind(cup->cu_ctfp, base);
1311	VERIFY(kind != CTF_ERR);
1312	VERIFY(kind == CTF_K_STRUCT || kind == CTF_K_UNION);
1313
1314	/*
1315	 * Members are in children. However, gcc also allows empty ones.
1316	 */
1317	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
1318		return (ret);
1319	if (child == NULL)
1320		return (0);
1321
1322	memb = child;
1323	while (memb != NULL) {
1324		Dwarf_Die sib, tdie;
1325		Dwarf_Half tag;
1326		ctf_id_t mid;
1327		char *mname;
1328		ulong_t memboff = 0;
1329
1330		if ((ret = ctf_dwarf_tag(cup, memb, &tag)) != 0)
1331			return (ret);
1332
1333		if (tag != DW_TAG_member)
1334			goto next;
1335
1336		if ((ret = ctf_dwarf_refdie(cup, memb, DW_AT_type, &tdie)) != 0)
1337			return (ret);
1338
1339		if ((ret = ctf_dwarf_convert_type(cup, tdie, &mid,
1340		    CTF_ADD_NONROOT)) != 0)
1341			return (ret);
1342		ctf_dprintf("Got back type id: %d\n", mid);
1343
1344		/*
1345		 * If we're not adding a member, just go ahead and return.
1346		 */
1347		if (add == B_FALSE) {
1348			if ((ret = ctf_dwarf_member_bitfield(cup, memb,
1349			    &mid)) != 0)
1350				return (ret);
1351			goto next;
1352		}
1353
1354		if ((ret = ctf_dwarf_string(cup, memb, DW_AT_name,
1355		    &mname)) != 0 && ret != ENOENT)
1356			return (ret);
1357		if (ret == ENOENT)
1358			mname = NULL;
1359
1360		if (kind == CTF_K_UNION) {
1361			memboff = 0;
1362		} else if ((ret = ctf_dwarf_member_offset(cup, memb, mid,
1363		    &memboff)) != 0) {
1364			if (mname != NULL)
1365				ctf_free(mname, strlen(mname) + 1);
1366			return (ret);
1367		}
1368
1369		if ((ret = ctf_dwarf_member_bitfield(cup, memb, &mid)) != 0)
1370			return (ret);
1371
1372		ret = ctf_add_member(cup->cu_ctfp, base, mname, mid, memboff);
1373		if (ret == CTF_ERR) {
1374			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1375			    "failed to add member %s: %s",
1376			    mname, ctf_errmsg(ctf_errno(cup->cu_ctfp)));
1377			if (mname != NULL)
1378				ctf_free(mname, strlen(mname) + 1);
1379			return (ECTF_CONVBKERR);
1380		}
1381
1382		if (mname != NULL)
1383			ctf_free(mname, strlen(mname) + 1);
1384
1385next:
1386		if ((ret = ctf_dwarf_sib(cup, memb, &sib)) != 0)
1387			return (ret);
1388		memb = sib;
1389	}
1390
1391	/*
1392	 * If we're not adding members, then we don't know the final size of the
1393	 * structure, so end here.
1394	 */
1395	if (add == B_FALSE)
1396		return (0);
1397
1398	/* Finally set the size of the structure to the actual byte size */
1399	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &size)) != 0)
1400		return (ret);
1401	if ((ctf_set_size(cup->cu_ctfp, base, size)) == CTF_ERR) {
1402		int e = ctf_errno(cup->cu_ctfp);
1403		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1404		    "failed to set type size for %d to 0x%x: %s", base,
1405		    (uint32_t)size, ctf_errmsg(e));
1406		return (ECTF_CONVBKERR);
1407	}
1408
1409	return (0);
1410}
1411
1412static int
1413ctf_dwarf_create_sou(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp,
1414    int kind, int isroot)
1415{
1416	int ret;
1417	char *name;
1418	ctf_id_t base;
1419	Dwarf_Die child;
1420	Dwarf_Bool decl;
1421
1422	/*
1423	 * Deal with the terribly annoying case of anonymous structs and unions.
1424	 * If they don't have a name, set the name to the empty string.
1425	 */
1426	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
1427	    ret != ENOENT)
1428		return (ret);
1429	if (ret == ENOENT)
1430		name = NULL;
1431
1432	/*
1433	 * We need to check if we just have a declaration here. If we do, then
1434	 * instead of creating an actual structure or union, we're just going to
1435	 * go ahead and create a forward. During a dedup or merge, the forward
1436	 * will be replaced with the real thing.
1437	 */
1438	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration,
1439	    &decl)) != 0) {
1440		if (ret != ENOENT)
1441			return (ret);
1442		decl = 0;
1443	}
1444
1445	if (decl != 0) {
1446		base = ctf_add_forward(cup->cu_ctfp, isroot, name, kind);
1447	} else if (kind == CTF_K_STRUCT) {
1448		base = ctf_add_struct(cup->cu_ctfp, isroot, name);
1449	} else {
1450		base = ctf_add_union(cup->cu_ctfp, isroot, name);
1451	}
1452	ctf_dprintf("added sou %s (%d) (%d)\n", name, kind, base);
1453	if (name != NULL)
1454		ctf_free(name, strlen(name) + 1);
1455	if (base == CTF_ERR)
1456		return (ctf_errno(cup->cu_ctfp));
1457	*idp = base;
1458
1459	/*
1460	 * If it's just a declaration, we're not going to mark it for fix up or
1461	 * do anything else.
1462	 */
1463	if (decl == B_TRUE)
1464		return (ctf_dwmap_add(cup, base, die, B_FALSE));
1465	if ((ret = ctf_dwmap_add(cup, base, die, B_TRUE)) != 0)
1466		return (ret);
1467
1468	/*
1469	 * The children of a structure or union are generally members. However,
1470	 * some compilers actually insert structs and unions there and not as a
1471	 * top-level die. Therefore, to make sure we honor our pass 1 contract
1472	 * of having all the base types, but not members, we need to walk this
1473	 * for instances of a DW_TAG_union_type.
1474	 */
1475	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
1476		return (ret);
1477
1478	while (child != NULL) {
1479		Dwarf_Half tag;
1480		Dwarf_Die sib;
1481
1482		if ((ret = ctf_dwarf_tag(cup, child, &tag)) != 0)
1483			return (ret);
1484
1485		switch (tag) {
1486		case DW_TAG_union_type:
1487		case DW_TAG_structure_type:
1488			ret = ctf_dwarf_convert_type(cup, child, NULL,
1489			    CTF_ADD_NONROOT);
1490			if (ret != 0) {
1491				return (ret);
1492			}
1493			break;
1494		default:
1495			break;
1496		}
1497
1498		if ((ret = ctf_dwarf_sib(cup, child, &sib)) != 0)
1499			return (ret);
1500		child = sib;
1501	}
1502
1503	return (0);
1504}
1505
1506static int
1507ctf_dwarf_array_upper_bound(ctf_cu_t *cup, Dwarf_Die range, ctf_arinfo_t *ar)
1508{
1509	Dwarf_Attribute attr;
1510	Dwarf_Unsigned uval;
1511	Dwarf_Signed sval;
1512	Dwarf_Half form;
1513	Dwarf_Error derr;
1514	const char *formstr = NULL;
1515	uint_t adj = 0;
1516	int ret = 0;
1517
1518	ctf_dprintf("setting array upper bound\n");
1519
1520	ar->ctr_nelems = 0;
1521
1522	/*
1523	 * Different compilers use different attributes to indicate the size of
1524	 * an array. GCC has traditionally used DW_AT_upper_bound, while Clang
1525	 * uses DW_AT_count. They have slightly different semantics. DW_AT_count
1526	 * indicates the total number of elements that are present, while
1527	 * DW_AT_upper_bound indicates the last index, hence we need to add one
1528	 * to that index to get the count.
1529	 *
1530	 * We first search for DW_AT_count and then for DW_AT_upper_bound. If we
1531	 * find neither, then we treat the lack of this as a zero element array.
1532	 * Our value is initialized assuming we find a DW_AT_count value.
1533	 */
1534	ret = ctf_dwarf_attribute(cup, range, DW_AT_count, &attr);
1535	if (ret != 0 && ret != ENOENT) {
1536		return (ret);
1537	} else if (ret == ENOENT) {
1538		ret = ctf_dwarf_attribute(cup, range, DW_AT_upper_bound, &attr);
1539		if (ret != 0 && ret != ENOENT) {
1540			return (ret);
1541		} else if (ret == ENOENT) {
1542			return (0);
1543		} else {
1544			adj = 1;
1545		}
1546	}
1547
1548	if (dwarf_whatform(attr, &form, &derr) != DW_DLV_OK) {
1549		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1550		    "failed to get DW_AT_upper_bound attribute form: %s\n",
1551		    dwarf_errmsg(derr));
1552		ret = ECTF_CONVBKERR;
1553		goto done;
1554	}
1555
1556	/*
1557	 * Compilers can indicate array bounds using signed or unsigned values.
1558	 * Additionally, some compilers may also store the array bounds
1559	 * using as DW_FORM_data{1,2,4,8} (which DWARF treats as raw data and
1560	 * expects the caller to understand how to interpret the value).
1561	 *
1562	 * GCC 4.4.4 appears to always use unsigned values to encode the
1563	 * array size (using '(unsigned)-1' to represent a zero-length or
1564	 * unknown length array). Later versions of GCC use a signed value of
1565	 * -1 for zero/unknown length arrays, and unsigned values to encode
1566	 * known array sizes.
1567	 *
1568	 * Both dwarf_formsdata() and dwarf_formudata() will retrieve values
1569	 * as their respective signed/unsigned forms, but both will also
1570	 * retreive DW_FORM_data{1,2,4,8} values and treat them as signed or
1571	 * unsigned integers (i.e. dwarf_formsdata() treats DW_FORM_dataXX
1572	 * as signed integers and dwarf_formudata() treats DW_FORM_dataXX as
1573	 * unsigned integers). Both will return an error if the form is not
1574	 * their respective signed/unsigned form, or DW_FORM_dataXX.
1575	 *
1576	 * To obtain the upper bound, we use the appropriate
1577	 * dwarf_form[su]data() function based on the form of DW_AT_upper_bound.
1578	 * Additionally, we let dwarf_formudata() handle the DW_FORM_dataXX
1579	 * forms (via the default option in the switch). If the value is in an
1580	 * unexpected form (i.e. not DW_FORM_udata or DW_FORM_dataXX),
1581	 * dwarf_formudata() will return failure (i.e. not DW_DLV_OK) and set
1582	 * derr with the specific error value.
1583	 */
1584	switch (form) {
1585	case DW_FORM_sdata:
1586		if (dwarf_formsdata(attr, &sval, &derr) == DW_DLV_OK) {
1587			ar->ctr_nelems = sval + adj;
1588			goto done;
1589		}
1590		break;
1591	case DW_FORM_udata:
1592	default:
1593		if (dwarf_formudata(attr, &uval, &derr) == DW_DLV_OK) {
1594			ar->ctr_nelems = uval + adj;
1595			goto done;
1596		}
1597		break;
1598	}
1599
1600	if (dwarf_get_FORM_name(form, &formstr) != DW_DLV_OK)
1601		formstr = "unknown DWARF form";
1602
1603	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1604	    "failed to get %s (%hu) value for DW_AT_upper_bound: %s\n",
1605	    formstr, form, dwarf_errmsg(derr));
1606	ret = ECTF_CONVBKERR;
1607
1608done:
1609	dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
1610	return (ret);
1611}
1612
1613static int
1614ctf_dwarf_create_array_range(ctf_cu_t *cup, Dwarf_Die range, ctf_id_t *idp,
1615    ctf_id_t base, int isroot)
1616{
1617	int ret;
1618	Dwarf_Die sib;
1619	ctf_arinfo_t ar;
1620
1621	ctf_dprintf("creating array range\n");
1622
1623	if ((ret = ctf_dwarf_sib(cup, range, &sib)) != 0)
1624		return (ret);
1625	if (sib != NULL) {
1626		ctf_id_t id;
1627		if ((ret = ctf_dwarf_create_array_range(cup, sib, &id,
1628		    base, CTF_ADD_NONROOT)) != 0)
1629			return (ret);
1630		ar.ctr_contents = id;
1631	} else {
1632		ar.ctr_contents = base;
1633	}
1634
1635	if ((ar.ctr_index = ctf_dwarf_long(cup)) == CTF_ERR)
1636		return (ctf_errno(cup->cu_ctfp));
1637
1638	if ((ret = ctf_dwarf_array_upper_bound(cup, range, &ar)) != 0)
1639		return (ret);
1640
1641	if ((*idp = ctf_add_array(cup->cu_ctfp, isroot, &ar)) == CTF_ERR)
1642		return (ctf_errno(cup->cu_ctfp));
1643
1644	return (0);
1645}
1646
1647/*
1648 * Try and create an array type. First, the kind of the array is specified in
1649 * the DW_AT_type entry. Next, the number of entries is stored in a more
1650 * complicated form, we should have a child that has the DW_TAG_subrange type.
1651 */
1652static int
1653ctf_dwarf_create_array(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot)
1654{
1655	int ret;
1656	Dwarf_Die tdie, rdie;
1657	ctf_id_t tid;
1658	Dwarf_Half rtag;
1659
1660	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0)
1661		return (ret);
1662	if ((ret = ctf_dwarf_convert_type(cup, tdie, &tid,
1663	    CTF_ADD_NONROOT)) != 0)
1664		return (ret);
1665
1666	if ((ret = ctf_dwarf_child(cup, die, &rdie)) != 0)
1667		return (ret);
1668	if ((ret = ctf_dwarf_tag(cup, rdie, &rtag)) != 0)
1669		return (ret);
1670	if (rtag != DW_TAG_subrange_type) {
1671		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1672		    "encountered array without DW_TAG_subrange_type child\n");
1673		return (ECTF_CONVBKERR);
1674	}
1675
1676	/*
1677	 * The compiler may opt to describe a multi-dimensional array as one
1678	 * giant array or it may opt to instead encode it as a series of
1679	 * subranges. If it's the latter, then for each subrange we introduce a
1680	 * type. We can always use the base type.
1681	 */
1682	if ((ret = ctf_dwarf_create_array_range(cup, rdie, idp, tid,
1683	    isroot)) != 0)
1684		return (ret);
1685	ctf_dprintf("Got back id %d\n", *idp);
1686	return (ctf_dwmap_add(cup, *idp, die, B_FALSE));
1687}
1688
1689/*
1690 * Given "const int const_array3[11]", GCC7 at least will create a DIE tree of
1691 * DW_TAG_const_type:DW_TAG_array_type:DW_Tag_const_type:<member_type>.
1692 *
1693 * Given C's syntax, this renders out as "const const int const_array3[11]".  To
1694 * get closer to round-tripping (and make the unit tests work), we'll peek for
1695 * this case, and avoid adding the extraneous qualifier if we see that the
1696 * underlying array referent already has the same qualifier.
1697 *
1698 * This is unfortunately less trivial than it could be: this issue applies to
1699 * qualifier sets like "const volatile", as well as multi-dimensional arrays, so
1700 * we need to descend down those.
1701 *
1702 * Returns CTF_ERR on error, or a boolean value otherwise.
1703 */
1704static int
1705needed_array_qualifier(ctf_cu_t *cup, int kind, ctf_id_t ref_id)
1706{
1707	const ctf_type_t *t;
1708	ctf_arinfo_t arinfo;
1709	int akind;
1710
1711	if (kind != CTF_K_CONST && kind != CTF_K_VOLATILE &&
1712	    kind != CTF_K_RESTRICT)
1713		return (1);
1714
1715	if ((t = ctf_dyn_lookup_by_id(cup->cu_ctfp, ref_id)) == NULL)
1716		return (CTF_ERR);
1717
1718	if (LCTF_INFO_KIND(cup->cu_ctfp, t->ctt_info) != CTF_K_ARRAY)
1719		return (1);
1720
1721	if (ctf_dyn_array_info(cup->cu_ctfp, ref_id, &arinfo) != 0)
1722		return (CTF_ERR);
1723
1724	ctf_id_t id = arinfo.ctr_contents;
1725
1726	for (;;) {
1727		if ((t = ctf_dyn_lookup_by_id(cup->cu_ctfp, id)) == NULL)
1728			return (CTF_ERR);
1729
1730		akind = LCTF_INFO_KIND(cup->cu_ctfp, t->ctt_info);
1731
1732		if (akind == kind)
1733			break;
1734
1735		if (akind == CTF_K_ARRAY) {
1736			if (ctf_dyn_array_info(cup->cu_ctfp,
1737			    id, &arinfo) != 0)
1738				return (CTF_ERR);
1739			id = arinfo.ctr_contents;
1740			continue;
1741		}
1742
1743		if (akind != CTF_K_CONST && akind != CTF_K_VOLATILE &&
1744		    akind != CTF_K_RESTRICT)
1745			break;
1746
1747		id = t->ctt_type;
1748	}
1749
1750	if (kind == akind) {
1751		ctf_dprintf("ignoring extraneous %s qualifier for array %d\n",
1752		    ctf_kind_name(cup->cu_ctfp, kind), ref_id);
1753	}
1754
1755	return (kind != akind);
1756}
1757
1758static int
1759ctf_dwarf_create_reference(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp,
1760    int kind, int isroot)
1761{
1762	int ret;
1763	ctf_id_t id;
1764	Dwarf_Die tdie;
1765	char *name;
1766	size_t namelen;
1767
1768	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
1769	    ret != ENOENT)
1770		return (ret);
1771	if (ret == ENOENT) {
1772		name = NULL;
1773		namelen = 0;
1774	} else {
1775		namelen = strlen(name);
1776	}
1777
1778	ctf_dprintf("reference kind %d %s\n", kind, name != NULL ? name : "<>");
1779
1780	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0) {
1781		if (ret != ENOENT) {
1782			ctf_free(name, namelen);
1783			return (ret);
1784		}
1785		if ((id = ctf_dwarf_void(cup)) == CTF_ERR) {
1786			ctf_free(name, namelen);
1787			return (ctf_errno(cup->cu_ctfp));
1788		}
1789	} else {
1790		if ((ret = ctf_dwarf_convert_type(cup, tdie, &id,
1791		    CTF_ADD_NONROOT)) != 0) {
1792			ctf_free(name, namelen);
1793			return (ret);
1794		}
1795	}
1796
1797	if ((ret = needed_array_qualifier(cup, kind, id)) <= 0) {
1798		if (ret != 0) {
1799			ret = (ctf_errno(cup->cu_ctfp));
1800		} else {
1801			*idp = id;
1802		}
1803
1804		ctf_free(name, namelen);
1805		return (ret);
1806	}
1807
1808	if ((*idp = ctf_add_reftype(cup->cu_ctfp, isroot, name, id, kind)) ==
1809	    CTF_ERR) {
1810		ctf_free(name, namelen);
1811		return (ctf_errno(cup->cu_ctfp));
1812	}
1813
1814	ctf_free(name, namelen);
1815	return (ctf_dwmap_add(cup, *idp, die, B_FALSE));
1816}
1817
1818static int
1819ctf_dwarf_create_enum(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot)
1820{
1821	size_t size = 0;
1822	Dwarf_Die child;
1823	Dwarf_Unsigned dw;
1824	ctf_id_t id;
1825	char *name;
1826	int ret;
1827
1828	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
1829	    ret != ENOENT)
1830		return (ret);
1831	if (ret == ENOENT)
1832		name = NULL;
1833
1834	/*
1835	 * Enumerations may have a size associated with them, particularly if
1836	 * they're packed. Note, a Dwarf_Unsigned is larger than a size_t on an
1837	 * ILP32 system.
1838	 */
1839	if (ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &dw) == 0 &&
1840	    dw < SIZE_MAX) {
1841		size = (size_t)dw;
1842	}
1843
1844	id = ctf_add_enum(cup->cu_ctfp, isroot, name, size);
1845	ctf_dprintf("added enum %s (%d)\n", name, id);
1846	if (name != NULL)
1847		ctf_free(name, strlen(name) + 1);
1848	if (id == CTF_ERR)
1849		return (ctf_errno(cup->cu_ctfp));
1850	*idp = id;
1851	if ((ret = ctf_dwmap_add(cup, id, die, B_FALSE)) != 0)
1852		return (ret);
1853
1854	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) {
1855		if (ret == ENOENT)
1856			ret = 0;
1857		return (ret);
1858	}
1859
1860	while (child != NULL) {
1861		Dwarf_Half tag;
1862		Dwarf_Signed sval;
1863		Dwarf_Unsigned uval;
1864		Dwarf_Die arg = child;
1865		int eval;
1866
1867		if ((ret = ctf_dwarf_sib(cup, arg, &child)) != 0)
1868			return (ret);
1869
1870		if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0)
1871			return (ret);
1872
1873		if (tag != DW_TAG_enumerator) {
1874			if ((ret = ctf_dwarf_convert_type(cup, arg, NULL,
1875			    CTF_ADD_NONROOT)) != 0)
1876				return (ret);
1877			continue;
1878		}
1879
1880		/*
1881		 * DWARF v4 section 5.7 tells us we'll always have names.
1882		 */
1883		if ((ret = ctf_dwarf_string(cup, arg, DW_AT_name, &name)) != 0)
1884			return (ret);
1885
1886		/*
1887		 * We have to be careful here: newer GCCs generate DWARF where
1888		 * an unsigned value will happily pass ctf_dwarf_signed().
1889		 * Since negative values will fail ctf_dwarf_unsigned(), we try
1890		 * that first to make sure we get the right value.
1891		 */
1892		if ((ret = ctf_dwarf_unsigned(cup, arg, DW_AT_const_value,
1893		    &uval)) == 0) {
1894			eval = (int)uval;
1895		} else if ((ret = ctf_dwarf_signed(cup, arg, DW_AT_const_value,
1896		    &sval)) == 0) {
1897			eval = sval;
1898		}
1899
1900		if (ret != 0) {
1901			if (ret != ENOENT)
1902				return (ret);
1903
1904			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1905			    "encountered enumeration without constant value\n");
1906			return (ECTF_CONVBKERR);
1907		}
1908
1909		ret = ctf_add_enumerator(cup->cu_ctfp, id, name, eval);
1910		if (ret == CTF_ERR) {
1911			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1912			    "failed to add enumarator %s (%d) to %d\n",
1913			    name, eval, id);
1914			ctf_free(name, strlen(name) + 1);
1915			return (ctf_errno(cup->cu_ctfp));
1916		}
1917		ctf_free(name, strlen(name) + 1);
1918	}
1919
1920	return (0);
1921}
1922
1923/*
1924 * For a function pointer, walk over and process all of its children, unless we
1925 * encounter one that's just a declaration. In which case, we error on it.
1926 */
1927static int
1928ctf_dwarf_create_fptr(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot)
1929{
1930	int ret;
1931	Dwarf_Bool b;
1932	ctf_funcinfo_t fi;
1933	Dwarf_Die retdie;
1934	ctf_id_t *argv = NULL;
1935
1936	bzero(&fi, sizeof (ctf_funcinfo_t));
1937
1938	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) != 0) {
1939		if (ret != ENOENT)
1940			return (ret);
1941	} else {
1942		if (b != 0)
1943			return (EPROTOTYPE);
1944	}
1945
1946	/*
1947	 * Return type is in DW_AT_type, if none, it returns void.
1948	 */
1949	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &retdie)) != 0) {
1950		if (ret != ENOENT)
1951			return (ret);
1952		if ((fi.ctc_return = ctf_dwarf_void(cup)) == CTF_ERR)
1953			return (ctf_errno(cup->cu_ctfp));
1954	} else {
1955		if ((ret = ctf_dwarf_convert_type(cup, retdie, &fi.ctc_return,
1956		    CTF_ADD_NONROOT)) != 0)
1957			return (ret);
1958	}
1959
1960	if ((ret = ctf_dwarf_function_count(cup, die, &fi, B_TRUE)) != 0) {
1961		return (ret);
1962	}
1963
1964	if (fi.ctc_argc != 0) {
1965		argv = ctf_alloc(sizeof (ctf_id_t) * fi.ctc_argc);
1966		if (argv == NULL)
1967			return (ENOMEM);
1968
1969		if ((ret = ctf_dwarf_convert_fargs(cup, die, &fi, argv)) != 0) {
1970			ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1971			return (ret);
1972		}
1973	}
1974
1975	if ((*idp = ctf_add_funcptr(cup->cu_ctfp, isroot, &fi, argv)) ==
1976	    CTF_ERR) {
1977		ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1978		return (ctf_errno(cup->cu_ctfp));
1979	}
1980
1981	ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1982	return (ctf_dwmap_add(cup, *idp, die, B_FALSE));
1983}
1984
1985static int
1986ctf_dwarf_convert_type(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp,
1987    int isroot)
1988{
1989	int ret;
1990	Dwarf_Off offset;
1991	Dwarf_Half tag;
1992	ctf_dwmap_t lookup, *map;
1993	ctf_id_t id;
1994
1995	if (idp == NULL)
1996		idp = &id;
1997
1998	if ((ret = ctf_dwarf_offset(cup, die, &offset)) != 0)
1999		return (ret);
2000
2001	if (offset > cup->cu_maxoff) {
2002		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
2003		    "die offset %llu beyond maximum for header %llu\n",
2004		    offset, cup->cu_maxoff);
2005		return (ECTF_CONVBKERR);
2006	}
2007
2008	/*
2009	 * If we've already added an entry for this offset, then we're done.
2010	 */
2011	lookup.cdm_off = offset;
2012	if ((map = avl_find(&cup->cu_map, &lookup, NULL)) != NULL) {
2013		*idp = map->cdm_id;
2014		return (0);
2015	}
2016
2017	if ((ret = ctf_dwarf_tag(cup, die, &tag)) != 0)
2018		return (ret);
2019
2020	ret = ENOTSUP;
2021	switch (tag) {
2022	case DW_TAG_base_type:
2023		ctf_dprintf("base\n");
2024		ret = ctf_dwarf_create_base(cup, die, idp, isroot, offset);
2025		break;
2026	case DW_TAG_array_type:
2027		ctf_dprintf("array\n");
2028		ret = ctf_dwarf_create_array(cup, die, idp, isroot);
2029		break;
2030	case DW_TAG_enumeration_type:
2031		ctf_dprintf("enum\n");
2032		ret = ctf_dwarf_create_enum(cup, die, idp, isroot);
2033		break;
2034	case DW_TAG_pointer_type:
2035		ctf_dprintf("pointer\n");
2036		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_POINTER,
2037		    isroot);
2038		break;
2039	case DW_TAG_structure_type:
2040		ctf_dprintf("struct\n");
2041		ret = ctf_dwarf_create_sou(cup, die, idp, CTF_K_STRUCT,
2042		    isroot);
2043		break;
2044	case DW_TAG_subroutine_type:
2045		ctf_dprintf("fptr\n");
2046		ret = ctf_dwarf_create_fptr(cup, die, idp, isroot);
2047		break;
2048	case DW_TAG_typedef:
2049		ctf_dprintf("typedef\n");
2050		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_TYPEDEF,
2051		    isroot);
2052		break;
2053	case DW_TAG_union_type:
2054		ctf_dprintf("union\n");
2055		ret = ctf_dwarf_create_sou(cup, die, idp, CTF_K_UNION,
2056		    isroot);
2057		break;
2058	case DW_TAG_const_type:
2059		ctf_dprintf("const\n");
2060		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_CONST,
2061		    isroot);
2062		break;
2063	case DW_TAG_volatile_type:
2064		ctf_dprintf("volatile\n");
2065		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_VOLATILE,
2066		    isroot);
2067		break;
2068	case DW_TAG_restrict_type:
2069		ctf_dprintf("restrict\n");
2070		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_RESTRICT,
2071		    isroot);
2072		break;
2073	default:
2074		ctf_dprintf("ignoring tag type %x\n", tag);
2075		*idp = CTF_ERR;
2076		ret = 0;
2077		break;
2078	}
2079	ctf_dprintf("ctf_dwarf_convert_type tag specific handler returned %d\n",
2080	    ret);
2081
2082	return (ret);
2083}
2084
2085static int
2086ctf_dwarf_walk_lexical(ctf_cu_t *cup, Dwarf_Die die)
2087{
2088	int ret;
2089	Dwarf_Die child;
2090
2091	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
2092		return (ret);
2093
2094	if (child == NULL)
2095		return (0);
2096
2097	return (ctf_dwarf_convert_die(cup, die));
2098}
2099
2100static int
2101ctf_dwarf_function_count(ctf_cu_t *cup, Dwarf_Die die, ctf_funcinfo_t *fip,
2102    boolean_t fptr)
2103{
2104	int ret;
2105	Dwarf_Die child, sib, arg;
2106
2107	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
2108		return (ret);
2109
2110	arg = child;
2111	while (arg != NULL) {
2112		Dwarf_Half tag;
2113
2114		if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0)
2115			return (ret);
2116
2117		/*
2118		 * We have to check for a varargs type declaration. This will
2119		 * happen in one of two ways. If we have a function pointer
2120		 * type, then it'll be done with a tag of type
2121		 * DW_TAG_unspecified_parameters. However, it only means we have
2122		 * a variable number of arguments, if we have more than one
2123		 * argument found so far. Otherwise, when we have a function
2124		 * type, it instead uses a formal parameter whose name is '...'
2125		 * to indicate a variable arguments member.
2126		 *
2127		 * Also, if we have a function pointer, then we have to expect
2128		 * that we might not get a name at all.
2129		 */
2130		if (tag == DW_TAG_formal_parameter && fptr == B_FALSE) {
2131			char *name;
2132			if ((ret = ctf_dwarf_string(cup, die, DW_AT_name,
2133			    &name)) != 0)
2134				return (ret);
2135			if (strcmp(name, DWARF_VARARGS_NAME) == 0)
2136				fip->ctc_flags |= CTF_FUNC_VARARG;
2137			else
2138				fip->ctc_argc++;
2139			ctf_free(name, strlen(name) + 1);
2140		} else if (tag == DW_TAG_formal_parameter) {
2141			fip->ctc_argc++;
2142		} else if (tag == DW_TAG_unspecified_parameters &&
2143		    fip->ctc_argc > 0) {
2144			fip->ctc_flags |= CTF_FUNC_VARARG;
2145		}
2146		if ((ret = ctf_dwarf_sib(cup, arg, &sib)) != 0)
2147			return (ret);
2148		arg = sib;
2149	}
2150
2151	return (0);
2152}
2153
2154static int
2155ctf_dwarf_convert_fargs(ctf_cu_t *cup, Dwarf_Die die, ctf_funcinfo_t *fip,
2156    ctf_id_t *argv)
2157{
2158	int ret;
2159	int i = 0;
2160	Dwarf_Die child, sib, arg;
2161
2162	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
2163		return (ret);
2164
2165	arg = child;
2166	while (arg != NULL) {
2167		Dwarf_Half tag;
2168
2169		if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0)
2170			return (ret);
2171		if (tag == DW_TAG_formal_parameter) {
2172			Dwarf_Die tdie;
2173
2174			if ((ret = ctf_dwarf_refdie(cup, arg, DW_AT_type,
2175			    &tdie)) != 0)
2176				return (ret);
2177
2178			if ((ret = ctf_dwarf_convert_type(cup, tdie, &argv[i],
2179			    CTF_ADD_ROOT)) != 0)
2180				return (ret);
2181			i++;
2182
2183			/*
2184			 * Once we hit argc entries, we're done. This ensures we
2185			 * don't accidentally hit a varargs which should be the
2186			 * last entry.
2187			 */
2188			if (i == fip->ctc_argc)
2189				break;
2190		}
2191
2192		if ((ret = ctf_dwarf_sib(cup, arg, &sib)) != 0)
2193			return (ret);
2194		arg = sib;
2195	}
2196
2197	return (0);
2198}
2199
2200static int
2201ctf_dwarf_convert_function(ctf_cu_t *cup, Dwarf_Die die)
2202{
2203	ctf_dwfunc_t *cdf;
2204	Dwarf_Die tdie;
2205	Dwarf_Bool b;
2206	char *name;
2207	int ret;
2208
2209	/*
2210	 * Functions that don't have a name are generally functions that have
2211	 * been inlined and thus most information about them has been lost. If
2212	 * we can't get a name, then instead of returning ENOENT, we silently
2213	 * swallow the error.
2214	 */
2215	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0) {
2216		if (ret == ENOENT)
2217			return (0);
2218		return (ret);
2219	}
2220
2221	ctf_dprintf("beginning work on function %s (die %llx)\n",
2222	    name, ctf_die_offset(die));
2223
2224	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) != 0) {
2225		if (ret != ENOENT)
2226			return (ret);
2227	} else if (b != 0) {
2228		/*
2229		 * GCC7 at least creates empty DW_AT_declarations for functions
2230		 * defined in headers.  As they lack details on the function
2231		 * prototype, we need to ignore them.  If we later actually
2232		 * see the relevant function's definition, we will see another
2233		 * DW_TAG_subprogram that is more complete.
2234		 */
2235		ctf_dprintf("ignoring declaration of function %s (die %llx)\n",
2236		    name, ctf_die_offset(die));
2237		return (0);
2238	}
2239
2240	if ((cdf = ctf_alloc(sizeof (ctf_dwfunc_t))) == NULL) {
2241		ctf_free(name, strlen(name) + 1);
2242		return (ENOMEM);
2243	}
2244	bzero(cdf, sizeof (ctf_dwfunc_t));
2245	cdf->cdf_name = name;
2246
2247	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) == 0) {
2248		if ((ret = ctf_dwarf_convert_type(cup, tdie,
2249		    &(cdf->cdf_fip.ctc_return), CTF_ADD_ROOT)) != 0) {
2250			ctf_free(name, strlen(name) + 1);
2251			ctf_free(cdf, sizeof (ctf_dwfunc_t));
2252			return (ret);
2253		}
2254	} else if (ret != ENOENT) {
2255		ctf_free(name, strlen(name) + 1);
2256		ctf_free(cdf, sizeof (ctf_dwfunc_t));
2257		return (ret);
2258	} else {
2259		if ((cdf->cdf_fip.ctc_return = ctf_dwarf_void(cup)) ==
2260		    CTF_ERR) {
2261			ctf_free(name, strlen(name) + 1);
2262			ctf_free(cdf, sizeof (ctf_dwfunc_t));
2263			return (ctf_errno(cup->cu_ctfp));
2264		}
2265	}
2266
2267	/*
2268	 * A function has a number of children, some of which may not be ones we
2269	 * care about. Children that we care about have a type of
2270	 * DW_TAG_formal_parameter. We're going to do two passes, the first to
2271	 * count the arguments, the second to process them. Afterwards, we
2272	 * should be good to go ahead and add this function.
2273	 *
2274	 * Note, we already got the return type by going in and grabbing it out
2275	 * of the DW_AT_type.
2276	 */
2277	if ((ret = ctf_dwarf_function_count(cup, die, &cdf->cdf_fip,
2278	    B_FALSE)) != 0) {
2279		ctf_free(name, strlen(name) + 1);
2280		ctf_free(cdf, sizeof (ctf_dwfunc_t));
2281		return (ret);
2282	}
2283
2284	ctf_dprintf("beginning to convert function arguments %s\n", name);
2285	if (cdf->cdf_fip.ctc_argc != 0) {
2286		uint_t argc = cdf->cdf_fip.ctc_argc;
2287		cdf->cdf_argv = ctf_alloc(sizeof (ctf_id_t) * argc);
2288		if (cdf->cdf_argv == NULL) {
2289			ctf_free(name, strlen(name) + 1);
2290			ctf_free(cdf, sizeof (ctf_dwfunc_t));
2291			return (ENOMEM);
2292		}
2293		if ((ret = ctf_dwarf_convert_fargs(cup, die,
2294		    &cdf->cdf_fip, cdf->cdf_argv)) != 0) {
2295			ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) * argc);
2296			ctf_free(name, strlen(name) + 1);
2297			ctf_free(cdf, sizeof (ctf_dwfunc_t));
2298			return (ret);
2299		}
2300	} else {
2301		cdf->cdf_argv = NULL;
2302	}
2303
2304	if ((ret = ctf_dwarf_isglobal(cup, die, &cdf->cdf_global)) != 0) {
2305		ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) *
2306		    cdf->cdf_fip.ctc_argc);
2307		ctf_free(name, strlen(name) + 1);
2308		ctf_free(cdf, sizeof (ctf_dwfunc_t));
2309		return (ret);
2310	}
2311
2312	ctf_list_append(&cup->cu_funcs, cdf);
2313	return (ret);
2314}
2315
2316/*
2317 * Convert variables, but only if they're not prototypes and have names.
2318 */
2319static int
2320ctf_dwarf_convert_variable(ctf_cu_t *cup, Dwarf_Die die)
2321{
2322	int ret;
2323	char *name;
2324	Dwarf_Bool b;
2325	Dwarf_Die tdie;
2326	ctf_id_t id;
2327	ctf_dwvar_t *cdv;
2328
2329	/* Skip "Non-Defining Declarations" */
2330	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) == 0) {
2331		if (b != 0)
2332			return (0);
2333	} else if (ret != ENOENT) {
2334		return (ret);
2335	}
2336
2337	/*
2338	 * If we find a DIE of "Declarations Completing Non-Defining
2339	 * Declarations", we will use the referenced type's DIE.  This isn't
2340	 * quite correct, e.g. DW_AT_decl_line will be the forward declaration
2341	 * not this site.  It's sufficient for what we need, however: in
2342	 * particular, we should find DW_AT_external as needed there.
2343	 */
2344	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_specification,
2345	    &tdie)) == 0) {
2346		Dwarf_Off offset;
2347		if ((ret = ctf_dwarf_offset(cup, tdie, &offset)) != 0)
2348			return (ret);
2349		ctf_dprintf("die 0x%llx DW_AT_specification -> die 0x%llx\n",
2350		    ctf_die_offset(die), ctf_die_offset(tdie));
2351		die = tdie;
2352	} else if (ret != ENOENT) {
2353		return (ret);
2354	}
2355
2356	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
2357	    ret != ENOENT)
2358		return (ret);
2359	if (ret == ENOENT)
2360		return (0);
2361
2362	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0) {
2363		ctf_free(name, strlen(name) + 1);
2364		return (ret);
2365	}
2366
2367	if ((ret = ctf_dwarf_convert_type(cup, tdie, &id,
2368	    CTF_ADD_ROOT)) != 0)
2369		return (ret);
2370
2371	if ((cdv = ctf_alloc(sizeof (ctf_dwvar_t))) == NULL) {
2372		ctf_free(name, strlen(name) + 1);
2373		return (ENOMEM);
2374	}
2375
2376	cdv->cdv_name = name;
2377	cdv->cdv_type = id;
2378
2379	if ((ret = ctf_dwarf_isglobal(cup, die, &cdv->cdv_global)) != 0) {
2380		ctf_free(cdv, sizeof (ctf_dwvar_t));
2381		ctf_free(name, strlen(name) + 1);
2382		return (ret);
2383	}
2384
2385	ctf_list_append(&cup->cu_vars, cdv);
2386	return (0);
2387}
2388
2389/*
2390 * Walk through our set of top-level types and process them.
2391 */
2392static int
2393ctf_dwarf_walk_toplevel(ctf_cu_t *cup, Dwarf_Die die)
2394{
2395	int ret;
2396	Dwarf_Off offset;
2397	Dwarf_Half tag;
2398
2399	if ((ret = ctf_dwarf_offset(cup, die, &offset)) != 0)
2400		return (ret);
2401
2402	if (offset > cup->cu_maxoff) {
2403		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
2404		    "die offset %llu beyond maximum for header %llu\n",
2405		    offset, cup->cu_maxoff);
2406		return (ECTF_CONVBKERR);
2407	}
2408
2409	if ((ret = ctf_dwarf_tag(cup, die, &tag)) != 0)
2410		return (ret);
2411
2412	ret = 0;
2413	switch (tag) {
2414	case DW_TAG_subprogram:
2415		ctf_dprintf("top level func\n");
2416		ret = ctf_dwarf_convert_function(cup, die);
2417		break;
2418	case DW_TAG_variable:
2419		ctf_dprintf("top level var\n");
2420		ret = ctf_dwarf_convert_variable(cup, die);
2421		break;
2422	case DW_TAG_lexical_block:
2423		ctf_dprintf("top level block\n");
2424		ret = ctf_dwarf_walk_lexical(cup, die);
2425		break;
2426	case DW_TAG_enumeration_type:
2427	case DW_TAG_structure_type:
2428	case DW_TAG_typedef:
2429	case DW_TAG_union_type:
2430		ctf_dprintf("top level type\n");
2431		ret = ctf_dwarf_convert_type(cup, die, NULL, B_TRUE);
2432		break;
2433	default:
2434		break;
2435	}
2436
2437	return (ret);
2438}
2439
2440
2441/*
2442 * We're given a node. At this node we need to convert it and then proceed to
2443 * convert any siblings that are associaed with this die.
2444 */
2445static int
2446ctf_dwarf_convert_die(ctf_cu_t *cup, Dwarf_Die die)
2447{
2448	while (die != NULL) {
2449		int ret;
2450		Dwarf_Die sib;
2451
2452		if ((ret = ctf_dwarf_walk_toplevel(cup, die)) != 0)
2453			return (ret);
2454
2455		if ((ret = ctf_dwarf_sib(cup, die, &sib)) != 0)
2456			return (ret);
2457		die = sib;
2458	}
2459	return (0);
2460}
2461
2462static int
2463ctf_dwarf_fixup_die(ctf_cu_t *cup, boolean_t addpass)
2464{
2465	ctf_dwmap_t *map;
2466
2467	for (map = avl_first(&cup->cu_map); map != NULL;
2468	    map = AVL_NEXT(&cup->cu_map, map)) {
2469		int ret;
2470		if (map->cdm_fix == B_FALSE)
2471			continue;
2472		if ((ret = ctf_dwarf_fixup_sou(cup, map->cdm_die, map->cdm_id,
2473		    addpass)) != 0)
2474			return (ret);
2475	}
2476
2477	return (0);
2478}
2479
2480/*
2481 * The DWARF information about a symbol and the information in the symbol table
2482 * may not be the same due to symbol reduction that is performed by ld due to a
2483 * mapfile or other such directive. We process weak symbols at a later time.
2484 *
2485 * The following are the rules that we employ:
2486 *
2487 * 1. A DWARF function that is considered exported matches STB_GLOBAL entries
2488 * with the same name.
2489 *
2490 * 2. A DWARF function that is considered exported matches STB_LOCAL entries
2491 * with the same name and the same file. This case may happen due to mapfile
2492 * reduction.
2493 *
2494 * 3. A DWARF function that is not considered exported matches STB_LOCAL entries
2495 * with the same name and the same file.
2496 *
2497 * 4. A DWARF function that has the same name as the symbol table entry, but the
2498 * files do not match. This is considered a 'fuzzy' match. This may also happen
2499 * due to a mapfile reduction. Fuzzy matching is only used when we know that the
2500 * file in question refers to the primary object. This is because when a symbol
2501 * is reduced in a mapfile, it's always going to be tagged as a local value in
2502 * the generated output and it is considered as to belong to the primary file
2503 * which is the first STT_FILE symbol we see.
2504 */
2505static boolean_t
2506ctf_dwarf_symbol_match(const char *symtab_file, const char *symtab_name,
2507    uint_t symtab_bind, const char *dwarf_file, const char *dwarf_name,
2508    boolean_t dwarf_global, boolean_t *is_fuzzy)
2509{
2510	*is_fuzzy = B_FALSE;
2511
2512	if (symtab_bind != STB_LOCAL && symtab_bind != STB_GLOBAL) {
2513		return (B_FALSE);
2514	}
2515
2516	if (strcmp(symtab_name, dwarf_name) != 0) {
2517		return (B_FALSE);
2518	}
2519
2520	if (symtab_bind == STB_GLOBAL) {
2521		return (dwarf_global);
2522	}
2523
2524	if (strcmp(symtab_file, dwarf_file) == 0) {
2525		return (B_TRUE);
2526	}
2527
2528	if (dwarf_global) {
2529		*is_fuzzy = B_TRUE;
2530		return (B_TRUE);
2531	}
2532
2533	return (B_FALSE);
2534}
2535
2536static ctf_dwfunc_t *
2537ctf_dwarf_match_func(ctf_cu_t *cup, const char *file, const char *name,
2538    uint_t bind, boolean_t primary)
2539{
2540	ctf_dwfunc_t *cdf, *fuzzy = NULL;
2541
2542	if (bind == STB_WEAK)
2543		return (NULL);
2544
2545	if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2546		return (NULL);
2547
2548	for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL;
2549	    cdf = ctf_list_next(cdf)) {
2550		boolean_t is_fuzzy = B_FALSE;
2551
2552		if (ctf_dwarf_symbol_match(file, name, bind, cup->cu_name,
2553		    cdf->cdf_name, cdf->cdf_global, &is_fuzzy)) {
2554			if (is_fuzzy) {
2555				if (primary) {
2556					fuzzy = cdf;
2557				}
2558				continue;
2559			} else {
2560				return (cdf);
2561			}
2562		}
2563	}
2564
2565	return (fuzzy);
2566}
2567
2568static ctf_dwvar_t *
2569ctf_dwarf_match_var(ctf_cu_t *cup, const char *file, const char *name,
2570    uint_t bind, boolean_t primary)
2571{
2572	ctf_dwvar_t *cdv, *fuzzy = NULL;
2573
2574	if (bind == STB_WEAK)
2575		return (NULL);
2576
2577	if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2578		return (NULL);
2579
2580	for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL;
2581	    cdv = ctf_list_next(cdv)) {
2582		boolean_t is_fuzzy = B_FALSE;
2583
2584		if (ctf_dwarf_symbol_match(file, name, bind, cup->cu_name,
2585		    cdv->cdv_name, cdv->cdv_global, &is_fuzzy)) {
2586			if (is_fuzzy) {
2587				if (primary) {
2588					fuzzy = cdv;
2589				}
2590			} else {
2591				return (cdv);
2592			}
2593		}
2594	}
2595
2596	return (fuzzy);
2597}
2598
2599static int
2600ctf_dwarf_conv_funcvars_cb(const Elf64_Sym *symp, ulong_t idx,
2601    const char *file, const char *name, boolean_t primary, void *arg)
2602{
2603	int ret;
2604	uint_t bind, type;
2605	ctf_cu_t *cup = arg;
2606
2607	bind = GELF_ST_BIND(symp->st_info);
2608	type = GELF_ST_TYPE(symp->st_info);
2609
2610	/*
2611	 * Come back to weak symbols in another pass
2612	 */
2613	if (bind == STB_WEAK)
2614		return (0);
2615
2616	if (type == STT_OBJECT) {
2617		ctf_dwvar_t *cdv = ctf_dwarf_match_var(cup, file, name,
2618		    bind, primary);
2619		if (cdv == NULL)
2620			return (0);
2621		ret = ctf_add_object(cup->cu_ctfp, idx, cdv->cdv_type);
2622		ctf_dprintf("added object %s->%ld\n", name, cdv->cdv_type);
2623	} else {
2624		ctf_dwfunc_t *cdf = ctf_dwarf_match_func(cup, file, name,
2625		    bind, primary);
2626		if (cdf == NULL)
2627			return (0);
2628		ret = ctf_add_function(cup->cu_ctfp, idx, &cdf->cdf_fip,
2629		    cdf->cdf_argv);
2630		ctf_dprintf("added function %s\n", name);
2631	}
2632
2633	if (ret == CTF_ERR) {
2634		return (ctf_errno(cup->cu_ctfp));
2635	}
2636
2637	return (0);
2638}
2639
2640static int
2641ctf_dwarf_conv_funcvars(ctf_cu_t *cup)
2642{
2643	return (ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_funcvars_cb, cup));
2644}
2645
2646/*
2647 * If we have a weak symbol, attempt to find the strong symbol it will resolve
2648 * to.  Note: the code where this actually happens is in sym_process() in
2649 * cmd/sgs/libld/common/syms.c
2650 *
2651 * Finding the matching symbol is unfortunately not trivial.  For a symbol to be
2652 * a candidate, it must:
2653 *
2654 * - have the same type (function, object)
2655 * - have the same value (address)
2656 * - have the same size
2657 * - not be another weak symbol
2658 * - belong to the same section (checked via section index)
2659 *
2660 * To perform this check, we first iterate over the symbol table. For each weak
2661 * symbol that we encounter, we then do a second walk over the symbol table,
2662 * calling ctf_dwarf_conv_check_weak(). If a symbol matches the above, then it's
2663 * either a local or global symbol. If we find a global symbol then we go with
2664 * it and stop searching for additional matches.
2665 *
2666 * If instead, we find a local symbol, things are more complicated. The first
2667 * thing we do is to try and see if we have file information about both symbols
2668 * (STT_FILE). If they both have file information and it matches, then we treat
2669 * that as a good match and stop searching for additional matches.
2670 *
2671 * Otherwise, this means we have a non-matching file and a local symbol. We
2672 * treat this as a candidate and if we find a better match (one of the two cases
2673 * above), use that instead. There are two different ways this can happen.
2674 * Either this is a completely different symbol, or it's a once-global symbol
2675 * that was scoped to local via a mapfile.  In the former case, curfile is
2676 * likely inaccurate since the linker does not preserve the needed curfile in
2677 * the order of the symbol table (see the comments about locally scoped symbols
2678 * in libld's update_osym()).  As we can't tell this case from the former one,
2679 * we use this symbol iff no other matching symbol is found.
2680 *
2681 * What we really need here is a SUNW section containing weak<->strong mappings
2682 * that we can consume.
2683 */
2684typedef struct ctf_dwarf_weak_arg {
2685	const Elf64_Sym *cweak_symp;
2686	const char *cweak_file;
2687	boolean_t cweak_candidate;
2688	ulong_t cweak_idx;
2689} ctf_dwarf_weak_arg_t;
2690
2691static int
2692ctf_dwarf_conv_check_weak(const Elf64_Sym *symp, ulong_t idx, const char *file,
2693    const char *name, boolean_t primary, void *arg)
2694{
2695	ctf_dwarf_weak_arg_t *cweak = arg;
2696
2697	const Elf64_Sym *wsymp = cweak->cweak_symp;
2698
2699	ctf_dprintf("comparing weak to %s\n", name);
2700
2701	if (GELF_ST_BIND(symp->st_info) == STB_WEAK) {
2702		return (0);
2703	}
2704
2705	if (GELF_ST_TYPE(wsymp->st_info) != GELF_ST_TYPE(symp->st_info)) {
2706		return (0);
2707	}
2708
2709	if (wsymp->st_value != symp->st_value) {
2710		return (0);
2711	}
2712
2713	if (wsymp->st_size != symp->st_size) {
2714		return (0);
2715	}
2716
2717	if (wsymp->st_shndx != symp->st_shndx) {
2718		return (0);
2719	}
2720
2721	/*
2722	 * Check if it's a weak candidate.
2723	 */
2724	if (GELF_ST_BIND(symp->st_info) == STB_LOCAL &&
2725	    (file == NULL || cweak->cweak_file == NULL ||
2726	    strcmp(file, cweak->cweak_file) != 0)) {
2727		cweak->cweak_candidate = B_TRUE;
2728		cweak->cweak_idx = idx;
2729		return (0);
2730	}
2731
2732	/*
2733	 * Found a match, break.
2734	 */
2735	cweak->cweak_idx = idx;
2736	return (1);
2737}
2738
2739static int
2740ctf_dwarf_duplicate_sym(ctf_cu_t *cup, ulong_t idx, ulong_t matchidx)
2741{
2742	ctf_id_t id = ctf_lookup_by_symbol(cup->cu_ctfp, matchidx);
2743
2744	/*
2745	 * If we matched something that for some reason didn't have type data,
2746	 * we don't consider that a fatal error and silently swallow it.
2747	 */
2748	if (id == CTF_ERR) {
2749		if (ctf_errno(cup->cu_ctfp) == ECTF_NOTYPEDAT)
2750			return (0);
2751		else
2752			return (ctf_errno(cup->cu_ctfp));
2753	}
2754
2755	if (ctf_add_object(cup->cu_ctfp, idx, id) == CTF_ERR)
2756		return (ctf_errno(cup->cu_ctfp));
2757
2758	return (0);
2759}
2760
2761static int
2762ctf_dwarf_duplicate_func(ctf_cu_t *cup, ulong_t idx, ulong_t matchidx)
2763{
2764	int ret;
2765	ctf_funcinfo_t fip;
2766	ctf_id_t *args = NULL;
2767
2768	if (ctf_func_info(cup->cu_ctfp, matchidx, &fip) == CTF_ERR) {
2769		if (ctf_errno(cup->cu_ctfp) == ECTF_NOFUNCDAT)
2770			return (0);
2771		else
2772			return (ctf_errno(cup->cu_ctfp));
2773	}
2774
2775	if (fip.ctc_argc != 0) {
2776		args = ctf_alloc(sizeof (ctf_id_t) * fip.ctc_argc);
2777		if (args == NULL)
2778			return (ENOMEM);
2779
2780		if (ctf_func_args(cup->cu_ctfp, matchidx, fip.ctc_argc, args) ==
2781		    CTF_ERR) {
2782			ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2783			return (ctf_errno(cup->cu_ctfp));
2784		}
2785	}
2786
2787	ret = ctf_add_function(cup->cu_ctfp, idx, &fip, args);
2788	if (args != NULL)
2789		ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2790	if (ret == CTF_ERR)
2791		return (ctf_errno(cup->cu_ctfp));
2792
2793	return (0);
2794}
2795
2796static int
2797ctf_dwarf_conv_weaks_cb(const Elf64_Sym *symp, ulong_t idx, const char *file,
2798    const char *name, boolean_t primary, void *arg)
2799{
2800	int ret, type;
2801	ctf_dwarf_weak_arg_t cweak;
2802	ctf_cu_t *cup = arg;
2803
2804	/*
2805	 * We only care about weak symbols.
2806	 */
2807	if (GELF_ST_BIND(symp->st_info) != STB_WEAK)
2808		return (0);
2809
2810	type = GELF_ST_TYPE(symp->st_info);
2811	ASSERT(type == STT_OBJECT || type == STT_FUNC);
2812
2813	/*
2814	 * For each weak symbol we encounter, we need to do a second iteration
2815	 * to try and find a match. We should probably think about other
2816	 * techniques to try and save us time in the future.
2817	 */
2818	cweak.cweak_symp = symp;
2819	cweak.cweak_file = file;
2820	cweak.cweak_candidate = B_FALSE;
2821	cweak.cweak_idx = 0;
2822
2823	ctf_dprintf("Trying to find weak equiv for %s\n", name);
2824
2825	ret = ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_check_weak, &cweak);
2826	VERIFY(ret == 0 || ret == 1);
2827
2828	/*
2829	 * Nothing was ever found, we're not going to add anything for this
2830	 * entry.
2831	 */
2832	if (ret == 0 && cweak.cweak_candidate == B_FALSE) {
2833		ctf_dprintf("found no weak match for %s\n", name);
2834		return (0);
2835	}
2836
2837	/*
2838	 * Now, finally go and add the type based on the match.
2839	 */
2840	ctf_dprintf("matched weak symbol %lu to %lu\n", idx, cweak.cweak_idx);
2841	if (type == STT_OBJECT) {
2842		ret = ctf_dwarf_duplicate_sym(cup, idx, cweak.cweak_idx);
2843	} else {
2844		ret = ctf_dwarf_duplicate_func(cup, idx, cweak.cweak_idx);
2845	}
2846
2847	return (ret);
2848}
2849
2850static int
2851ctf_dwarf_conv_weaks(ctf_cu_t *cup)
2852{
2853	return (ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_weaks_cb, cup));
2854}
2855
2856/* ARGSUSED */
2857static int
2858ctf_dwarf_convert_one(void *arg, void *unused)
2859{
2860	int ret;
2861	ctf_file_t *dedup;
2862	ctf_cu_t *cup = arg;
2863
2864	ctf_dprintf("converting die: %s\n", cup->cu_name);
2865	ctf_dprintf("max offset: %x\n", cup->cu_maxoff);
2866	VERIFY(cup != NULL);
2867
2868	ret = ctf_dwarf_convert_die(cup, cup->cu_cu);
2869	ctf_dprintf("ctf_dwarf_convert_die (%s) returned %d\n", cup->cu_name,
2870	    ret);
2871	if (ret != 0) {
2872		return (ret);
2873	}
2874	if (ctf_update(cup->cu_ctfp) != 0) {
2875		return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2876		    "failed to update output ctf container"));
2877	}
2878
2879	ret = ctf_dwarf_fixup_die(cup, B_FALSE);
2880	ctf_dprintf("ctf_dwarf_fixup_die (%s) returned %d\n", cup->cu_name,
2881	    ret);
2882	if (ret != 0) {
2883		return (ret);
2884	}
2885	if (ctf_update(cup->cu_ctfp) != 0) {
2886		return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2887		    "failed to update output ctf container"));
2888	}
2889
2890	ret = ctf_dwarf_fixup_die(cup, B_TRUE);
2891	ctf_dprintf("ctf_dwarf_fixup_die (%s) returned %d\n", cup->cu_name,
2892	    ret);
2893	if (ret != 0) {
2894		return (ret);
2895	}
2896	if (ctf_update(cup->cu_ctfp) != 0) {
2897		return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2898		    "failed to update output ctf container"));
2899	}
2900
2901
2902	if ((ret = ctf_dwarf_conv_funcvars(cup)) != 0) {
2903		return (ctf_dwarf_error(cup, NULL, ret,
2904		    "failed to convert strong functions and variables"));
2905	}
2906
2907	if (ctf_update(cup->cu_ctfp) != 0) {
2908		return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2909		    "failed to update output ctf container"));
2910	}
2911
2912	if (cup->cu_doweaks == B_TRUE) {
2913		if ((ret = ctf_dwarf_conv_weaks(cup)) != 0) {
2914			return (ctf_dwarf_error(cup, NULL, ret,
2915			    "failed to convert weak functions and variables"));
2916		}
2917
2918		if (ctf_update(cup->cu_ctfp) != 0) {
2919			return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2920			    "failed to update output ctf container"));
2921		}
2922	}
2923
2924	ctf_phase_dump(cup->cu_ctfp, "pre-dwarf-dedup", cup->cu_name);
2925	ctf_dprintf("adding inputs for dedup\n");
2926	if ((ret = ctf_merge_add(cup->cu_cmh, cup->cu_ctfp)) != 0) {
2927		return (ctf_dwarf_error(cup, NULL, ret,
2928		    "failed to add inputs for merge"));
2929	}
2930
2931	ctf_dprintf("starting dedup of %s\n", cup->cu_name);
2932	if ((ret = ctf_merge_dedup(cup->cu_cmh, &dedup)) != 0) {
2933		return (ctf_dwarf_error(cup, NULL, ret,
2934		    "failed to deduplicate die"));
2935	}
2936	ctf_close(cup->cu_ctfp);
2937	cup->cu_ctfp = dedup;
2938	ctf_phase_dump(cup->cu_ctfp, "post-dwarf-dedup", cup->cu_name);
2939
2940	return (0);
2941}
2942
2943/*
2944 * Note, we expect that if we're returning a ctf_file_t from one of the dies,
2945 * say in the single node case, it's been saved and the entry here has been set
2946 * to NULL, which ctf_close happily ignores.
2947 */
2948static void
2949ctf_dwarf_free_die(ctf_cu_t *cup)
2950{
2951	ctf_dwfunc_t *cdf, *ndf;
2952	ctf_dwvar_t *cdv, *ndv;
2953	ctf_dwbitf_t *cdb, *ndb;
2954	ctf_dwmap_t *map;
2955	void *cookie;
2956	Dwarf_Error derr;
2957
2958	ctf_dprintf("Beginning to free die: %p\n", cup);
2959	cup->cu_elf = NULL;
2960	ctf_dprintf("Trying to free name: %p\n", cup->cu_name);
2961	if (cup->cu_name != NULL)
2962		ctf_free(cup->cu_name, strlen(cup->cu_name) + 1);
2963	ctf_dprintf("Trying to free merge handle: %p\n", cup->cu_cmh);
2964	if (cup->cu_cmh != NULL) {
2965		ctf_merge_fini(cup->cu_cmh);
2966		cup->cu_cmh = NULL;
2967	}
2968
2969	ctf_dprintf("Trying to free functions\n");
2970	for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL; cdf = ndf) {
2971		ndf = ctf_list_next(cdf);
2972		ctf_free(cdf->cdf_name, strlen(cdf->cdf_name) + 1);
2973		if (cdf->cdf_fip.ctc_argc != 0) {
2974			ctf_free(cdf->cdf_argv,
2975			    sizeof (ctf_id_t) * cdf->cdf_fip.ctc_argc);
2976		}
2977		ctf_free(cdf, sizeof (ctf_dwfunc_t));
2978	}
2979
2980	ctf_dprintf("Trying to free variables\n");
2981	for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL; cdv = ndv) {
2982		ndv = ctf_list_next(cdv);
2983		ctf_free(cdv->cdv_name, strlen(cdv->cdv_name) + 1);
2984		ctf_free(cdv, sizeof (ctf_dwvar_t));
2985	}
2986
2987	ctf_dprintf("Trying to free bitfields\n");
2988	for (cdb = ctf_list_next(&cup->cu_bitfields); cdb != NULL; cdb = ndb) {
2989		ndb = ctf_list_next(cdb);
2990		ctf_free(cdb, sizeof (ctf_dwbitf_t));
2991	}
2992
2993	ctf_dprintf("Trying to clean up dwarf_t: %p\n", cup->cu_dwarf);
2994	if (cup->cu_dwarf != NULL)
2995		(void) dwarf_finish(cup->cu_dwarf, &derr);
2996	cup->cu_dwarf = NULL;
2997	ctf_close(cup->cu_ctfp);
2998
2999	cookie = NULL;
3000	while ((map = avl_destroy_nodes(&cup->cu_map, &cookie)) != NULL) {
3001		ctf_free(map, sizeof (ctf_dwmap_t));
3002	}
3003	avl_destroy(&cup->cu_map);
3004	cup->cu_errbuf = NULL;
3005}
3006
3007static void
3008ctf_dwarf_free_dies(ctf_cu_t *cdies, int ndies)
3009{
3010	int i;
3011
3012	ctf_dprintf("Beginning to free dies\n");
3013	for (i = 0; i < ndies; i++) {
3014		ctf_dwarf_free_die(&cdies[i]);
3015	}
3016
3017	ctf_free(cdies, sizeof (ctf_cu_t) * ndies);
3018}
3019
3020static int
3021ctf_dwarf_count_dies(Dwarf_Debug dw, Dwarf_Error *derr, int *ndies,
3022    char *errbuf, size_t errlen)
3023{
3024	int ret;
3025	Dwarf_Half vers;
3026	Dwarf_Unsigned nexthdr;
3027
3028	while ((ret = dwarf_next_cu_header(dw, NULL, &vers, NULL, NULL,
3029	    &nexthdr, derr)) != DW_DLV_NO_ENTRY) {
3030		if (ret != DW_DLV_OK) {
3031			(void) snprintf(errbuf, errlen,
3032			    "file does not contain valid DWARF data: %s\n",
3033			    dwarf_errmsg(*derr));
3034			return (ECTF_CONVBKERR);
3035		}
3036
3037		switch (vers) {
3038		case DWARF_VERSION_TWO:
3039		case DWARF_VERSION_FOUR:
3040			break;
3041		default:
3042			(void) snprintf(errbuf, errlen,
3043			    "unsupported DWARF version: %d\n", vers);
3044			return (ECTF_CONVBKERR);
3045		}
3046		*ndies = *ndies + 1;
3047	}
3048
3049	return (0);
3050}
3051
3052static int
3053ctf_dwarf_init_die(int fd, Elf *elf, ctf_cu_t *cup, int ndie, char *errbuf,
3054    size_t errlen)
3055{
3056	int ret;
3057	Dwarf_Unsigned hdrlen, abboff, nexthdr;
3058	Dwarf_Half addrsz, vers;
3059	Dwarf_Unsigned offset = 0;
3060	Dwarf_Error derr;
3061
3062	while ((ret = dwarf_next_cu_header(cup->cu_dwarf, &hdrlen, &vers,
3063	    &abboff, &addrsz, &nexthdr, &derr)) != DW_DLV_NO_ENTRY) {
3064		char *name;
3065		Dwarf_Die cu, child;
3066
3067		/* Based on the counting above, we should be good to go */
3068		VERIFY(ret == DW_DLV_OK);
3069		if (ndie > 0) {
3070			ndie--;
3071			offset = nexthdr;
3072			continue;
3073		}
3074
3075		/*
3076		 * Compilers are apparently inconsistent. Some emit no DWARF for
3077		 * empty files and others emit empty compilation unit.
3078		 */
3079		cup->cu_voidtid = CTF_ERR;
3080		cup->cu_longtid = CTF_ERR;
3081		cup->cu_elf = elf;
3082		cup->cu_maxoff = nexthdr - 1;
3083		cup->cu_vers = vers;
3084		cup->cu_addrsz = addrsz;
3085		cup->cu_ctfp = ctf_fdcreate(fd, &ret);
3086		if (cup->cu_ctfp == NULL)
3087			return (ret);
3088
3089		avl_create(&cup->cu_map, ctf_dwmap_comp, sizeof (ctf_dwmap_t),
3090		    offsetof(ctf_dwmap_t, cdm_avl));
3091		cup->cu_errbuf = errbuf;
3092		cup->cu_errlen = errlen;
3093		bzero(&cup->cu_vars, sizeof (ctf_list_t));
3094		bzero(&cup->cu_funcs, sizeof (ctf_list_t));
3095		bzero(&cup->cu_bitfields, sizeof (ctf_list_t));
3096
3097		if ((ret = ctf_dwarf_die_elfenc(elf, cup, errbuf,
3098		    errlen)) != 0)
3099			return (ret);
3100
3101		if ((ret = ctf_dwarf_sib(cup, NULL, &cu)) != 0)
3102			return (ret);
3103
3104		if (cu == NULL) {
3105			(void) snprintf(errbuf, errlen,
3106			    "file does not contain DWARF data");
3107			return (ECTF_CONVNODEBUG);
3108		}
3109
3110		if ((ret = ctf_dwarf_child(cup, cu, &child)) != 0)
3111			return (ret);
3112
3113		if (child == NULL) {
3114			(void) snprintf(errbuf, errlen,
3115			    "file does not contain DWARF data");
3116			return (ECTF_CONVNODEBUG);
3117		}
3118
3119		cup->cu_cuoff = offset;
3120		cup->cu_cu = child;
3121
3122		if ((cup->cu_cmh = ctf_merge_init(fd, &ret)) == NULL)
3123			return (ret);
3124
3125		if (ctf_dwarf_string(cup, cu, DW_AT_name, &name) == 0) {
3126			size_t len = strlen(name) + 1;
3127			char *b = basename(name);
3128			cup->cu_name = strdup(b);
3129			ctf_free(name, len);
3130		}
3131		break;
3132	}
3133
3134	return (0);
3135}
3136
3137/*
3138 * This is our only recourse to identify a C source file that is missing debug
3139 * info: it will be mentioned as an STT_FILE, but not have a compile unit entry.
3140 * (A traditional ctfmerge works on individual files, so can identify missing
3141 * DWARF more directly, via ctf_has_c_source() on the .o file.)
3142 *
3143 * As we operate on basenames, this can of course miss some cases, but it's
3144 * better than not checking at all.
3145 *
3146 * We explicitly whitelist some CRT components.  Failing that, there's always
3147 * the -m option.
3148 */
3149static boolean_t
3150c_source_has_debug(const char *file, ctf_cu_t *cus, size_t nr_cus)
3151{
3152	const char *basename = strrchr(file, '/');
3153
3154	if (basename == NULL)
3155		basename = file;
3156	else
3157		basename++;
3158
3159	if (strcmp(basename, "common-crt.c") == 0 ||
3160	    strcmp(basename, "gmon.c") == 0 ||
3161	    strcmp(basename, "dlink_init.c") == 0 ||
3162	    strcmp(basename, "dlink_common.c") == 0 ||
3163	    strncmp(basename, "crt", strlen("crt")) == 0 ||
3164	    strncmp(basename, "values-", strlen("values-")) == 0)
3165		return (B_TRUE);
3166
3167	for (size_t i = 0; i < nr_cus; i++) {
3168		if (strcmp(basename, cus[i].cu_name) == 0)
3169			return (B_TRUE);
3170	}
3171
3172	return (B_FALSE);
3173}
3174
3175static int
3176ctf_dwarf_check_missing(ctf_cu_t *cus, size_t nr_cus, Elf *elf,
3177    char *errmsg, size_t errlen)
3178{
3179	Elf_Scn *scn, *strscn;
3180	Elf_Data *data, *strdata;
3181	GElf_Shdr shdr;
3182	ulong_t i;
3183
3184	scn = NULL;
3185	while ((scn = elf_nextscn(elf, scn)) != NULL) {
3186		if (gelf_getshdr(scn, &shdr) == NULL) {
3187			(void) snprintf(errmsg, errlen,
3188			    "failed to get section header: %s\n",
3189			    elf_errmsg(elf_errno()));
3190			return (EINVAL);
3191		}
3192
3193		if (shdr.sh_type == SHT_SYMTAB)
3194			break;
3195	}
3196
3197	if (scn == NULL)
3198		return (0);
3199
3200	if ((strscn = elf_getscn(elf, shdr.sh_link)) == NULL) {
3201		(void) snprintf(errmsg, errlen,
3202		    "failed to get str section: %s\n",
3203		    elf_errmsg(elf_errno()));
3204		return (EINVAL);
3205	}
3206
3207	if ((data = elf_getdata(scn, NULL)) == NULL) {
3208		(void) snprintf(errmsg, errlen, "failed to read section: %s\n",
3209		    elf_errmsg(elf_errno()));
3210		return (EINVAL);
3211	}
3212
3213	if ((strdata = elf_getdata(strscn, NULL)) == NULL) {
3214		(void) snprintf(errmsg, errlen,
3215		    "failed to read string table: %s\n",
3216		    elf_errmsg(elf_errno()));
3217		return (EINVAL);
3218	}
3219
3220	for (i = 0; i < shdr.sh_size / shdr.sh_entsize; i++) {
3221		GElf_Sym sym;
3222		const char *file;
3223		size_t len;
3224
3225		if (gelf_getsym(data, i, &sym) == NULL) {
3226			(void) snprintf(errmsg, errlen,
3227			    "failed to read sym %lu: %s\n",
3228			    i, elf_errmsg(elf_errno()));
3229			return (EINVAL);
3230		}
3231
3232		if (GELF_ST_TYPE(sym.st_info) != STT_FILE)
3233			continue;
3234
3235		file = (const char *)((uintptr_t)strdata->d_buf + sym.st_name);
3236		len = strlen(file);
3237		if (len < 2 || strncmp(".c", &file[len - 2], 2) != 0)
3238			continue;
3239
3240		if (!c_source_has_debug(file, cus, nr_cus)) {
3241			(void) snprintf(errmsg, errlen,
3242			    "file %s is missing debug info\n", file);
3243			return (ECTF_CONVNODEBUG);
3244		}
3245	}
3246
3247	return (0);
3248}
3249
3250int
3251ctf_dwarf_convert(int fd, Elf *elf, uint_t nthrs, uint_t flags,
3252    ctf_file_t **fpp, char *errbuf, size_t errlen)
3253{
3254	int err, ret, ndies, i;
3255	Dwarf_Debug dw;
3256	Dwarf_Error derr;
3257	ctf_cu_t *cdies = NULL, *cup;
3258	workq_t *wqp = NULL;
3259
3260	*fpp = NULL;
3261
3262	ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL, &dw, &derr);
3263	if (ret != DW_DLV_OK) {
3264		if (ret == DW_DLV_NO_ENTRY ||
3265		    dwarf_errno(derr) == DW_DLE_DEBUG_INFO_NULL) {
3266			(void) snprintf(errbuf, errlen,
3267			    "file does not contain DWARF data\n");
3268			return (ECTF_CONVNODEBUG);
3269		}
3270
3271		(void) snprintf(errbuf, errlen,
3272		    "dwarf_elf_init() failed: %s\n", dwarf_errmsg(derr));
3273		return (ECTF_CONVBKERR);
3274	}
3275
3276	/*
3277	 * Iterate over all of the compilation units and create a ctf_cu_t for
3278	 * each of them.  This is used to determine if we have zero, one, or
3279	 * multiple dies to convert. If we have zero, that's an error. If
3280	 * there's only one die, that's the simple case.  No merge needed and
3281	 * only a single Dwarf_Debug as well.
3282	 */
3283	ndies = 0;
3284	err = ctf_dwarf_count_dies(dw, &derr, &ndies, errbuf, errlen);
3285
3286	ctf_dprintf("found %d DWARF CUs\n", ndies);
3287
3288	if (ndies == 0) {
3289		(void) snprintf(errbuf, errlen,
3290		    "file does not contain DWARF data\n");
3291		return (ECTF_CONVNODEBUG);
3292	}
3293
3294	(void) dwarf_finish(dw, &derr);
3295	cdies = ctf_alloc(sizeof (ctf_cu_t) * ndies);
3296	if (cdies == NULL) {
3297		return (ENOMEM);
3298	}
3299
3300	bzero(cdies, sizeof (ctf_cu_t) * ndies);
3301
3302	for (i = 0; i < ndies; i++) {
3303		cup = &cdies[i];
3304		ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL,
3305		    &cup->cu_dwarf, &derr);
3306		if (ret != 0) {
3307			ctf_free(cdies, sizeof (ctf_cu_t) * ndies);
3308			(void) snprintf(errbuf, errlen,
3309			    "failed to initialize DWARF: %s\n",
3310			    dwarf_errmsg(derr));
3311			return (ECTF_CONVBKERR);
3312		}
3313
3314		err = ctf_dwarf_init_die(fd, elf, cup, i, errbuf, errlen);
3315		if (err != 0)
3316			goto out;
3317
3318		cup->cu_doweaks = ndies > 1 ? B_FALSE : B_TRUE;
3319	}
3320
3321	if (!(flags & CTF_ALLOW_MISSING_DEBUG) &&
3322	    (err = ctf_dwarf_check_missing(cdies, ndies,
3323	    elf, errbuf, errlen)) != 0)
3324		goto out;
3325
3326	/*
3327	 * If we only have one compilation unit, there's no reason to use
3328	 * multiple threads, even if the user requested them. After all, they
3329	 * just gave us an upper bound.
3330	 */
3331	if (ndies == 1)
3332		nthrs = 1;
3333
3334	if (workq_init(&wqp, nthrs) == -1) {
3335		err = errno;
3336		goto out;
3337	}
3338
3339	for (i = 0; i < ndies; i++) {
3340		cup = &cdies[i];
3341		ctf_dprintf("adding cu %s: %p, %x %x\n", cup->cu_name,
3342		    cup->cu_cu, cup->cu_cuoff, cup->cu_maxoff);
3343		if (workq_add(wqp, cup) == -1) {
3344			err = errno;
3345			goto out;
3346		}
3347	}
3348
3349	ret = workq_work(wqp, ctf_dwarf_convert_one, NULL, &err);
3350	if (ret == WORKQ_ERROR) {
3351		err = errno;
3352		goto out;
3353	} else if (ret == WORKQ_UERROR) {
3354		ctf_dprintf("internal convert failed: %s\n",
3355		    ctf_errmsg(err));
3356		goto out;
3357	}
3358
3359	ctf_dprintf("Determining next phase: have %d CUs\n", ndies);
3360	if (ndies != 1) {
3361		ctf_merge_t *cmp;
3362
3363		cmp = ctf_merge_init(fd, &err);
3364		if (cmp == NULL)
3365			goto out;
3366
3367		ctf_dprintf("setting threads\n");
3368		if ((err = ctf_merge_set_nthreads(cmp, nthrs)) != 0) {
3369			ctf_merge_fini(cmp);
3370			goto out;
3371		}
3372
3373		for (i = 0; i < ndies; i++) {
3374			cup = &cdies[i];
3375			if ((err = ctf_merge_add(cmp, cup->cu_ctfp)) != 0) {
3376				ctf_merge_fini(cmp);
3377				goto out;
3378			}
3379		}
3380
3381		ctf_dprintf("performing merge\n");
3382		err = ctf_merge_merge(cmp, fpp);
3383		if (err != 0) {
3384			ctf_dprintf("failed merge!\n");
3385			*fpp = NULL;
3386			ctf_merge_fini(cmp);
3387			goto out;
3388		}
3389		ctf_merge_fini(cmp);
3390		err = 0;
3391		ctf_dprintf("successfully converted!\n");
3392	} else {
3393		err = 0;
3394		*fpp = cdies->cu_ctfp;
3395		cdies->cu_ctfp = NULL;
3396		ctf_dprintf("successfully converted!\n");
3397	}
3398
3399out:
3400	workq_fini(wqp);
3401	ctf_dwarf_free_dies(cdies, ndies);
3402	return (err);
3403}
3404