1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2020 Joyent, Inc.
25  */
26 
27 #include <sys/fm/protocol.h>
28 #include <assert.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <strings.h>
33 #include <alloca.h>
34 #include <fcntl.h>
35 #include <unistd.h>
36 #include <sys/param.h>
37 #include <sys/pci.h>
38 #include <sys/pcie.h>
39 #include <libdevinfo.h>
40 #include <libnvpair.h>
41 #include <fm/topo_mod.h>
42 #include <fm/topo_hc.h>
43 #include <sys/ddi_ufm.h>
44 #include <sys/stat.h>
45 #include <sys/types.h>
46 
47 #include <hostbridge.h>
48 #include <pcibus.h>
49 #include <did.h>
50 #include <did_props.h>
51 #include <util.h>
52 #include <topo_nic.h>
53 #include <topo_usb.h>
54 
55 extern txprop_t Bus_common_props[];
56 extern txprop_t Dev_common_props[];
57 extern txprop_t Fn_common_props[];
58 extern int Bus_propcnt;
59 extern int Dev_propcnt;
60 extern int Fn_propcnt;
61 
62 extern int platform_pci_label(topo_mod_t *mod, tnode_t *, nvlist_t *,
63     nvlist_t **);
64 extern int platform_pci_fru(topo_mod_t *mod, tnode_t *, nvlist_t *,
65     nvlist_t **);
66 static void pci_release(topo_mod_t *, tnode_t *);
67 static int pci_enum(topo_mod_t *, tnode_t *, const char *, topo_instance_t,
68     topo_instance_t, void *, void *);
69 static int pci_label(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
70     nvlist_t **);
71 static int pci_fru(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
72     nvlist_t **);
73 
74 static const topo_modops_t Pci_ops =
75 	{ pci_enum, pci_release };
76 static const topo_modinfo_t Pci_info =
77 	{ PCI_BUS, FM_FMRI_SCHEME_HC, PCI_ENUMR_VERS, &Pci_ops };
78 
79 static const topo_method_t Pci_methods[] = {
80 	{ TOPO_METH_LABEL, TOPO_METH_LABEL_DESC,
81 	    TOPO_METH_LABEL_VERSION, TOPO_STABILITY_INTERNAL, pci_label },
82 	{ TOPO_METH_FRU_COMPUTE, TOPO_METH_FRU_COMPUTE_DESC,
83 	    TOPO_METH_FRU_COMPUTE_VERSION, TOPO_STABILITY_INTERNAL, pci_fru },
84 	{ NULL }
85 };
86 
87 int
88 _topo_init(topo_mod_t *modhdl, topo_version_t version)
89 {
90 	/*
91 	 * Turn on module debugging output
92 	 */
93 	if (getenv("TOPOPCIDBG") != NULL)
94 		topo_mod_setdebug(modhdl);
95 	topo_mod_dprintf(modhdl, "initializing pcibus builtin\n");
96 
97 	if (version != PCI_ENUMR_VERS)
98 		return (topo_mod_seterrno(modhdl, EMOD_VER_NEW));
99 
100 	if (topo_mod_register(modhdl, &Pci_info, TOPO_VERSION) != 0) {
101 		topo_mod_dprintf(modhdl, "failed to register module");
102 		return (-1);
103 	}
104 	topo_mod_dprintf(modhdl, "PCI Enumr initd\n");
105 
106 	return (0);
107 }
108 
109 void
110 _topo_fini(topo_mod_t *modhdl)
111 {
112 	topo_mod_unregister(modhdl);
113 }
114 
115 static int
116 pci_label(topo_mod_t *mp, tnode_t *node, topo_version_t version,
117     nvlist_t *in, nvlist_t **out)
118 {
119 	if (version > TOPO_METH_LABEL_VERSION)
120 		return (topo_mod_seterrno(mp, EMOD_VER_NEW));
121 	return (platform_pci_label(mp, node, in, out));
122 }
123 static int
124 pci_fru(topo_mod_t *mp, tnode_t *node, topo_version_t version,
125     nvlist_t *in, nvlist_t **out)
126 {
127 	if (version > TOPO_METH_FRU_COMPUTE_VERSION)
128 		return (topo_mod_seterrno(mp, EMOD_VER_NEW));
129 	return (platform_pci_fru(mp, node, in, out));
130 }
131 static tnode_t *
132 pci_tnode_create(topo_mod_t *mod, tnode_t *parent,
133     const char *name, topo_instance_t i, void *priv)
134 {
135 	tnode_t *ntn;
136 
137 	if ((ntn = tnode_create(mod, parent, name, i, priv)) == NULL)
138 		return (NULL);
139 	if (topo_method_register(mod, ntn, Pci_methods) < 0) {
140 		topo_mod_dprintf(mod, "topo_method_register failed: %s\n",
141 		    topo_strerror(topo_mod_errno(mod)));
142 		topo_node_unbind(ntn);
143 		return (NULL);
144 	}
145 	return (ntn);
146 }
147 
148 /*ARGSUSED*/
149 static int
150 hostbridge_asdevice(topo_mod_t *mod, tnode_t *bus)
151 {
152 	di_node_t di;
153 	tnode_t *dev32;
154 
155 	di = topo_node_getspecific(bus);
156 	assert(di != DI_NODE_NIL);
157 
158 	if ((dev32 = pcidev_declare(mod, bus, di, 32)) == NULL)
159 		return (-1);
160 	if (pcifn_declare(mod, dev32, di, 0) == NULL) {
161 		topo_node_unbind(dev32);
162 		return (-1);
163 	}
164 	return (0);
165 }
166 
167 static int
168 pciexfn_add_ufm(topo_mod_t *mod, tnode_t *parent, tnode_t *node)
169 {
170 	char *devpath = NULL;
171 	ufm_ioc_getcaps_t ugc = { 0 };
172 	ufm_ioc_bufsz_t ufbz = { 0 };
173 	ufm_ioc_report_t ufmr = { 0 };
174 	nvlist_t *ufminfo = NULL, **images;
175 	uint_t nimages;
176 	int err, fd, ret = -1;
177 	tnode_t *create;
178 
179 	if (topo_prop_get_string(node, TOPO_PGROUP_IO, TOPO_IO_DEV, &devpath,
180 	    &err) != 0) {
181 		return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
182 	}
183 	if (strlen(devpath) >= MAXPATHLEN) {
184 		topo_mod_dprintf(mod, "devpath is too long: %s", devpath);
185 		topo_mod_strfree(mod, devpath);
186 		return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
187 	}
188 
189 	if ((fd = open(DDI_UFM_DEV, O_RDONLY)) < 0) {
190 		topo_mod_dprintf(mod, "%s: failed to open %s", __func__,
191 		    DDI_UFM_DEV);
192 		topo_mod_strfree(mod, devpath);
193 		return (0);
194 	}
195 	/*
196 	 * Make an ioctl to probe if the driver for this function is
197 	 * UFM-capable.  If the ioctl fails or if it doesn't advertise the
198 	 * DDI_UFM_CAP_REPORT capability, we bail out.
199 	 */
200 	ugc.ufmg_version = DDI_UFM_CURRENT_VERSION;
201 	(void) strlcpy(ugc.ufmg_devpath, devpath, MAXPATHLEN);
202 	if (ioctl(fd, UFM_IOC_GETCAPS, &ugc) < 0) {
203 		topo_mod_dprintf(mod, "UFM_IOC_GETCAPS failed: %s",
204 		    strerror(errno));
205 		(void) close(fd);
206 		topo_mod_strfree(mod, devpath);
207 		return (0);
208 	}
209 	if ((ugc.ufmg_caps & DDI_UFM_CAP_REPORT) == 0) {
210 		topo_mod_dprintf(mod, "driver doesn't advertise "
211 		    "DDI_UFM_CAP_REPORT");
212 		(void) close(fd);
213 		topo_mod_strfree(mod, devpath);
214 		return (0);
215 	}
216 
217 	/*
218 	 * If we made it this far, then the driver is indeed UFM-capable and
219 	 * is capable of reporting its firmware information.  First step is to
220 	 * make an ioctl to query the size of the report data so that we can
221 	 * allocate a buffer large enough to hold it.
222 	 */
223 	ufbz.ufbz_version = DDI_UFM_CURRENT_VERSION;
224 	(void) strlcpy(ufbz.ufbz_devpath, devpath, MAXPATHLEN);
225 	if (ioctl(fd, UFM_IOC_REPORTSZ, &ufbz) < 0) {
226 		topo_mod_dprintf(mod, "UFM_IOC_REPORTSZ failed: %s\n",
227 		    strerror(errno));
228 		(void) close(fd);
229 		topo_mod_strfree(mod, devpath);
230 		return (0);
231 	}
232 
233 	ufmr.ufmr_version = DDI_UFM_CURRENT_VERSION;
234 	if ((ufmr.ufmr_buf = topo_mod_alloc(mod, ufbz.ufbz_size)) == NULL) {
235 		topo_mod_dprintf(mod, "failed to alloc %u bytes\n",
236 		    ufbz.ufbz_size);
237 		(void) close(fd);
238 		topo_mod_strfree(mod, devpath);
239 		return (topo_mod_seterrno(mod, EMOD_NOMEM));
240 	}
241 	ufmr.ufmr_bufsz = ufbz.ufbz_size;
242 	(void) strlcpy(ufmr.ufmr_devpath, devpath, MAXPATHLEN);
243 	topo_mod_strfree(mod, devpath);
244 
245 	/*
246 	 * Now, make the ioctl to retrieve the actual report data.  The data
247 	 * is stored as a packed nvlist.
248 	 */
249 	if (ioctl(fd, UFM_IOC_REPORT, &ufmr) < 0) {
250 		topo_mod_dprintf(mod, "UFM_IOC_REPORT failed: %s\n",
251 		    strerror(errno));
252 		topo_mod_free(mod, ufmr.ufmr_buf, ufmr.ufmr_bufsz);
253 		(void) close(fd);
254 		return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
255 	}
256 	(void) close(fd);
257 
258 	if (nvlist_unpack(ufmr.ufmr_buf, ufmr.ufmr_bufsz, &ufminfo,
259 	    NV_ENCODE_NATIVE) != 0) {
260 		topo_mod_dprintf(mod, "failed to unpack nvlist\n");
261 		topo_mod_free(mod, ufmr.ufmr_buf, ufmr.ufmr_bufsz);
262 		return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
263 	}
264 	topo_mod_free(mod, ufmr.ufmr_buf, ufmr.ufmr_bufsz);
265 
266 	if (nvlist_lookup_nvlist_array(ufminfo, DDI_UFM_NV_IMAGES, &images,
267 	    &nimages) != 0) {
268 		topo_mod_dprintf(mod, "failed to lookup %s nvpair",
269 		    DDI_UFM_NV_IMAGES);
270 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
271 		goto err;
272 	}
273 
274 	/*
275 	 * There's nothing for us to do if there are no images.
276 	 */
277 	if (nimages == 0) {
278 		ret = 0;
279 		goto err;
280 	}
281 
282 	/*
283 	 * In general, almost all UFMs are device-wide. That is, in a
284 	 * multi-function device, there is still a single global firmware image.
285 	 * At this time, we default to putting the UFM data always on the device
286 	 * node. However, if someone creates a UFM on something that's not the
287 	 * first function, we'll create a UFM under that function for now. If we
288 	 * add support for hardware that has per-function UFMs, then we should
289 	 * update the UFM API to convey that scope.
290 	 */
291 	if (topo_node_instance(node) != 0) {
292 		create = node;
293 	} else {
294 		create = parent;
295 	}
296 
297 	if (topo_node_range_create(mod, create, UFM, 0, (nimages - 1)) != 0) {
298 		topo_mod_dprintf(mod, "failed to create %s range", UFM);
299 		/* errno set */
300 		goto err;
301 	}
302 	for (uint_t i = 0; i < nimages; i++) {
303 		tnode_t *ufmnode = NULL;
304 		char *descr;
305 		uint_t nslots;
306 		nvlist_t **slots;
307 
308 		if (nvlist_lookup_string(images[i], DDI_UFM_NV_IMAGE_DESC,
309 		    &descr) != 0 ||
310 		    nvlist_lookup_nvlist_array(images[i],
311 		    DDI_UFM_NV_IMAGE_SLOTS, &slots, &nslots) != 0) {
312 			(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
313 			goto err;
314 		}
315 
316 		if ((ufmnode = topo_mod_create_ufm(mod, create, descr, NULL)) ==
317 		    NULL) {
318 			topo_mod_dprintf(mod, "failed to create ufm nodes for "
319 			    "%s", descr);
320 			/* errno set */
321 			goto err;
322 		}
323 		for (uint_t s = 0; s < nslots; s++) {
324 			topo_ufm_slot_info_t slotinfo = { 0 };
325 			uint32_t slotattrs;
326 
327 			if (nvlist_lookup_string(slots[s],
328 			    DDI_UFM_NV_SLOT_VERSION,
329 			    (char **)&slotinfo.usi_version) != 0 ||
330 			    nvlist_lookup_uint32(slots[s],
331 			    DDI_UFM_NV_SLOT_ATTR, &slotattrs) != 0) {
332 				topo_node_unbind(ufmnode);
333 				topo_mod_dprintf(mod, "malformed slot nvlist");
334 				(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
335 				goto err;
336 			}
337 			(void) nvlist_lookup_nvlist(slots[s],
338 			    DDI_UFM_NV_SLOT_MISC, &slotinfo.usi_extra);
339 
340 			if (slotattrs & DDI_UFM_ATTR_READABLE &&
341 			    slotattrs & DDI_UFM_ATTR_WRITEABLE)
342 				slotinfo.usi_mode = TOPO_UFM_SLOT_MODE_RW;
343 			else if (slotattrs & DDI_UFM_ATTR_READABLE)
344 				slotinfo.usi_mode = TOPO_UFM_SLOT_MODE_RO;
345 			else if (slotattrs & DDI_UFM_ATTR_WRITEABLE)
346 				slotinfo.usi_mode = TOPO_UFM_SLOT_MODE_WO;
347 			else
348 				slotinfo.usi_mode = TOPO_UFM_SLOT_MODE_NONE;
349 
350 			if (slotattrs & DDI_UFM_ATTR_ACTIVE)
351 				slotinfo.usi_active = B_TRUE;
352 
353 			if (topo_node_range_create(mod, ufmnode, SLOT, 0,
354 			    (nslots - 1)) < 0) {
355 				topo_mod_dprintf(mod, "failed to create %s "
356 				    "range", SLOT);
357 				/* errno set */
358 				goto err;
359 			}
360 			if (topo_mod_create_ufm_slot(mod, ufmnode,
361 			    &slotinfo) == NULL) {
362 				topo_node_unbind(ufmnode);
363 				topo_mod_dprintf(mod, "failed to create ufm "
364 				    "slot %d for %s", s, descr);
365 				/* errno set */
366 				goto err;
367 			}
368 		}
369 	}
370 	ret = 0;
371 err:
372 	nvlist_free(ufminfo);
373 	return (ret);
374 }
375 
376 tnode_t *
377 pciexfn_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
378     topo_instance_t i)
379 {
380 	did_t *pd;
381 	tnode_t *ntn, *ptn;
382 	di_node_t pdn;
383 	uint_t class, subclass;
384 	char *devtyp, *pdevtyp;
385 	int pcie_devtyp, pexcap;
386 	boolean_t dev_is_pcie, pdev_is_pcie;
387 
388 	/* We need the parent's dev info node for some of the info */
389 	ptn = find_predecessor(parent, PCIEX_FUNCTION);
390 	/* If this is the first child under root, get root's ptn */
391 	if (ptn == NULL)
392 		ptn = find_predecessor(parent, PCIEX_ROOT);
393 	if (ptn == NULL)
394 		return (NULL);
395 	pdn = topo_node_getspecific(ptn);
396 
397 	/* Get the required info to populate the excap */
398 	(void) pci_classcode_get(mod, dn, &class, &subclass);
399 	devtyp = pci_devtype_get(mod, dn);
400 	pdevtyp = pci_devtype_get(mod, pdn);
401 	pexcap = pciex_cap_get(mod, pdn);
402 
403 	dev_is_pcie = devtyp && (strcmp(devtyp, "pciex") == 0);
404 	pdev_is_pcie = pdevtyp && (strcmp(pdevtyp, "pciex") == 0);
405 
406 	/*
407 	 * Populate the excap with correct PCIe device type.
408 	 *
409 	 * Device	Parent		Device		Parent	Device
410 	 * excap	device-type	device-type	excap	Class Code
411 	 * -------------------------------------------------------------------
412 	 * PCI(default)	pci		N/A		N/A	!= bridge
413 	 * PCIe		pciex		N/A		N/A	!= bridge
414 	 * Root Port	Defined in hostbridge
415 	 * Switch Up	pciex		pciex		!= up	= bridge
416 	 * Switch Down	pciex		pciex		= up	= bridge
417 	 * PCIe-PCI	pciex		pci		N/A	= bridge
418 	 * PCI-PCIe	pci		pciex		N/A	= bridge
419 	 */
420 	pcie_devtyp = PCIE_PCIECAP_DEV_TYPE_PCI_DEV;
421 	if (class == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI) {
422 		if (pdev_is_pcie) {
423 			if (dev_is_pcie) {
424 				if (pexcap != PCIE_PCIECAP_DEV_TYPE_UP)
425 					pcie_devtyp = PCIE_PCIECAP_DEV_TYPE_UP;
426 				else
427 					pcie_devtyp =
428 					    PCIE_PCIECAP_DEV_TYPE_DOWN;
429 			} else {
430 				pcie_devtyp = PCIE_PCIECAP_DEV_TYPE_PCIE2PCI;
431 			}
432 		} else {
433 			if (dev_is_pcie)
434 				pcie_devtyp = PCIE_PCIECAP_DEV_TYPE_PCI2PCIE;
435 		}
436 	} else {
437 		if (pdev_is_pcie)
438 			pcie_devtyp = PCIE_PCIECAP_DEV_TYPE_PCIE_DEV;
439 	}
440 
441 	if ((pd = did_find(mod, dn)) == NULL)
442 		return (NULL);
443 	did_excap_set(pd, pcie_devtyp);
444 
445 	if ((ntn = pci_tnode_create(mod, parent, PCIEX_FUNCTION, i, dn))
446 	    == NULL)
447 		return (NULL);
448 	if (did_props_set(ntn, pd, Fn_common_props, Fn_propcnt) < 0) {
449 		topo_node_unbind(ntn);
450 		return (NULL);
451 	}
452 
453 	/*
454 	 * Check if the driver associated with this function exports firmware
455 	 * information via the DDI UFM subsystem and, if so, create the
456 	 * corresponding ufm topo nodes.
457 	 */
458 	if (pciexfn_add_ufm(mod, parent, ntn) != 0) {
459 		topo_node_unbind(ntn);
460 		return (NULL);
461 	}
462 
463 	/*
464 	 * We may find pci-express buses or plain-pci buses beneath a function
465 	 */
466 	if (child_range_add(mod, ntn, PCIEX_BUS, 0, MAX_HB_BUSES) < 0) {
467 		topo_node_unbind(ntn);
468 		return (NULL);
469 	}
470 	if (child_range_add(mod, ntn, PCI_BUS, 0, MAX_HB_BUSES) < 0) {
471 		topo_node_range_destroy(ntn, PCIEX_BUS);
472 		topo_node_unbind(ntn);
473 		return (NULL);
474 	}
475 	return (ntn);
476 }
477 
478 tnode_t *
479 pciexdev_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
480     topo_instance_t i)
481 {
482 	did_t *pd;
483 	tnode_t *ntn;
484 
485 	if ((pd = did_find(mod, dn)) == NULL)
486 		return (NULL);
487 	did_settnode(pd, parent);
488 
489 	if ((ntn = pci_tnode_create(mod, parent, PCIEX_DEVICE, i, dn)) == NULL)
490 		return (NULL);
491 	if (did_props_set(ntn, pd, Dev_common_props, Dev_propcnt) < 0) {
492 		topo_node_unbind(ntn);
493 		return (NULL);
494 	}
495 
496 	/*
497 	 * We can expect to find pci-express functions beneath the device
498 	 */
499 	if (child_range_add(mod,
500 	    ntn, PCIEX_FUNCTION, 0, MAX_PCIDEV_FNS) < 0) {
501 		topo_node_unbind(ntn);
502 		return (NULL);
503 	}
504 	return (ntn);
505 }
506 
507 tnode_t *
508 pciexbus_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
509     topo_instance_t i)
510 {
511 	did_t *pd;
512 	tnode_t *ntn;
513 
514 	if ((pd = did_find(mod, dn)) == NULL)
515 		return (NULL);
516 	did_settnode(pd, parent);
517 	if ((ntn = pci_tnode_create(mod, parent, PCIEX_BUS, i, dn)) == NULL)
518 		return (NULL);
519 	if (did_props_set(ntn, pd, Bus_common_props, Bus_propcnt) < 0) {
520 		topo_node_unbind(ntn);
521 		return (NULL);
522 	}
523 	/*
524 	 * We can expect to find pci-express devices beneath the bus
525 	 */
526 	if (child_range_add(mod,
527 	    ntn, PCIEX_DEVICE, 0, MAX_PCIBUS_DEVS) < 0) {
528 		topo_node_unbind(ntn);
529 		return (NULL);
530 	}
531 	return (ntn);
532 }
533 
534 tnode_t *
535 pcifn_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
536     topo_instance_t i)
537 {
538 	did_t *pd;
539 	tnode_t *ntn;
540 
541 	if ((pd = did_find(mod, dn)) == NULL)
542 		return (NULL);
543 	did_excap_set(pd, PCIE_PCIECAP_DEV_TYPE_PCI_DEV);
544 
545 	if ((ntn = pci_tnode_create(mod, parent, PCI_FUNCTION, i, dn)) == NULL)
546 		return (NULL);
547 	if (did_props_set(ntn, pd, Fn_common_props, Fn_propcnt) < 0) {
548 		topo_node_unbind(ntn);
549 		return (NULL);
550 	}
551 	/*
552 	 * We may find pci buses beneath a function
553 	 */
554 	if (child_range_add(mod, ntn, PCI_BUS, 0, MAX_HB_BUSES) < 0) {
555 		topo_node_unbind(ntn);
556 		return (NULL);
557 	}
558 	return (ntn);
559 }
560 
561 tnode_t *
562 pcidev_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
563     topo_instance_t i)
564 {
565 	did_t *pd;
566 	tnode_t *ntn;
567 
568 	if ((pd = did_find(mod, dn)) == NULL)
569 		return (NULL);
570 	/* remember parent tnode */
571 	did_settnode(pd, parent);
572 
573 	if ((ntn = pci_tnode_create(mod, parent, PCI_DEVICE, i, dn)) == NULL)
574 		return (NULL);
575 	if (did_props_set(ntn, pd, Dev_common_props, Dev_propcnt) < 0) {
576 		topo_node_unbind(ntn);
577 		return (NULL);
578 	}
579 
580 	/*
581 	 * We can expect to find pci functions beneath the device
582 	 */
583 	if (child_range_add(mod, ntn, PCI_FUNCTION, 0, MAX_PCIDEV_FNS) < 0) {
584 		topo_node_unbind(ntn);
585 		return (NULL);
586 	}
587 	return (ntn);
588 }
589 
590 tnode_t *
591 pcibus_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
592     topo_instance_t i)
593 {
594 	did_t *pd;
595 	tnode_t *ntn;
596 	int hbchild = 0;
597 
598 	if ((pd = did_find(mod, dn)) == NULL)
599 		return (NULL);
600 	did_settnode(pd, parent);
601 	if ((ntn = pci_tnode_create(mod, parent, PCI_BUS, i, dn)) == NULL)
602 		return (NULL);
603 	/*
604 	 * If our devinfo node is lacking certain information of its
605 	 * own, and our parent topology node is a hostbridge, we may
606 	 * need/want to inherit information available in the
607 	 * hostbridge node's private data.
608 	 */
609 	if (strcmp(topo_node_name(parent), HOSTBRIDGE) == 0)
610 		hbchild = 1;
611 	if (did_props_set(ntn, pd, Bus_common_props, Bus_propcnt) < 0) {
612 		topo_node_unbind(ntn);
613 		return (NULL);
614 	}
615 	/*
616 	 * We can expect to find pci devices beneath the bus
617 	 */
618 	if (child_range_add(mod, ntn, PCI_DEVICE, 0, MAX_PCIBUS_DEVS) < 0) {
619 		topo_node_unbind(ntn);
620 		return (NULL);
621 	}
622 	/*
623 	 * On each bus child of the hostbridge, we represent the
624 	 * hostbridge as a device outside the range of legal device
625 	 * numbers.
626 	 */
627 	if (hbchild == 1) {
628 		if (hostbridge_asdevice(mod, ntn) < 0) {
629 			topo_node_range_destroy(ntn, PCI_DEVICE);
630 			topo_node_unbind(ntn);
631 			return (NULL);
632 		}
633 	}
634 	return (ntn);
635 }
636 
637 static int
638 pci_bridge_declare(topo_mod_t *mod, tnode_t *fn, di_node_t din, int board,
639     int bridge, int rc, int depth)
640 {
641 	int err;
642 	char *devtyp;
643 
644 	devtyp = pci_devtype_get(mod, din);
645 	/* Check if the children are PCI or PCIe */
646 	if (devtyp && (strcmp(devtyp, "pciex") == 0))
647 		err = pci_children_instantiate(mod, fn, din, board, bridge,
648 		    rc, TRUST_BDF, depth + 1);
649 	else
650 		err = pci_children_instantiate(mod, fn, din, board, bridge,
651 		    rc - TO_PCI, TRUST_BDF, depth + 1);
652 	return (err);
653 }
654 
655 static void
656 declare_dev_and_fn(topo_mod_t *mod, tnode_t *bus, tnode_t **dev, di_node_t din,
657     int board, int bridge, int rc, int devno, int fnno, int depth)
658 {
659 	int dcnt = 0, rcnt, err;
660 	char *propstr, *label = NULL, *pdev = NULL;
661 	tnode_t *fn;
662 	uint_t class, subclass;
663 	uint_t vid, did;
664 	uint_t pdev_sz;
665 	did_t *dp = NULL;
666 
667 	if (*dev == NULL) {
668 		if (rc >= 0)
669 			*dev = pciexdev_declare(mod, bus, din, devno);
670 		else
671 			*dev = pcidev_declare(mod, bus, din, devno);
672 		if (*dev == NULL)
673 			return;
674 		++dcnt;
675 	}
676 	if (rc >= 0)
677 		fn = pciexfn_declare(mod, *dev, din, fnno);
678 	else
679 		fn = pcifn_declare(mod, *dev, din, fnno);
680 
681 	if (fn == NULL) {
682 		if (dcnt) {
683 			topo_node_unbind(*dev);
684 			*dev = NULL;
685 		}
686 		return;
687 	}
688 
689 	if (pci_classcode_get(mod, din, &class, &subclass) < 0) {
690 		topo_node_unbind(fn);
691 		if (dcnt)
692 			topo_node_unbind(*dev);
693 		return;
694 	}
695 
696 	/*
697 	 * This function may be a bridge.  If not, check for a possible
698 	 * topology map file and kick off its enumeration of lower-level
699 	 * devices.
700 	 */
701 	if (class == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI) {
702 		(void) pci_bridge_declare(mod, fn, din, board, bridge, rc,
703 		    depth);
704 	}
705 
706 	/*
707 	 * Check for a Neptune-based NIC. This could either be a Neptune
708 	 * adapter card or an Neptune ASIC on a board (e.g. motherboard)
709 	 *
710 	 * For Netpune adapter cards, use xfp-hc-topology.xml to expand
711 	 * topology to include the XFP optical module, which is a FRU on
712 	 * the Neptune based 10giga fiber NICs.
713 	 *
714 	 * For Neptune ASICs, use the XAUI enumerator to expand topology.
715 	 * The 10giga ports are externalized by a XAUI cards, which
716 	 * are FRUs. The XAUI enumerator in turn instantiates the XFP
717 	 * optical module FRUs.
718 	 */
719 	else if (class == PCI_CLASS_NET &&
720 	    di_uintprop_get(mod, din, DI_VENDIDPROP, &vid) >= 0 &&
721 	    di_uintprop_get(mod, din, DI_DEVIDPROP, &did) >= 0 &&
722 	    vid == SUN_VENDOR_ID && did == NEPTUNE_DEVICE_ID) {
723 		/*
724 		 * Is this an adapter card? Check the bus's physlot
725 		 */
726 		dp = did_find(mod, topo_node_getspecific(bus));
727 		if (did_physlot(dp) >= 0) {
728 			topo_mod_dprintf(mod, "Found Neptune slot\n");
729 			(void) topo_mod_enummap(mod, fn,
730 			    "xfp", FM_FMRI_SCHEME_HC);
731 		} else {
732 			topo_mod_dprintf(mod, "Found Neptune ASIC\n");
733 			if (topo_mod_load(mod, XAUI, TOPO_VERSION) == NULL) {
734 				topo_mod_dprintf(mod, "pcibus enum "
735 				    "could not load xaui enum\n");
736 				(void) topo_mod_seterrno(mod,
737 				    EMOD_PARTIAL_ENUM);
738 				return;
739 			} else {
740 				if (topo_node_range_create(mod, fn,
741 				    XAUI, 0, 1) < 0) {
742 					topo_mod_dprintf(mod,
743 					    "child_range_add for "
744 					    "XAUI failed: %s\n",
745 					    topo_strerror(
746 					    topo_mod_errno(mod)));
747 					return;
748 				}
749 				(void) topo_mod_enumerate(mod, fn,
750 				    XAUI, XAUI, fnno, fnno, fn);
751 			}
752 		}
753 	} else if (class == PCI_CLASS_NET) {
754 		/*
755 		 * Ask the nic module if there are any nodes that need to be
756 		 * enumerated under this device. This might include things like
757 		 * transceivers or some day, LEDs.
758 		 */
759 		if (topo_mod_load(mod, NIC, NIC_VERSION) == NULL) {
760 			topo_mod_dprintf(mod, "pcibus enum could not load "
761 			    "nic enum\n");
762 			(void) topo_mod_seterrno(mod, EMOD_PARTIAL_ENUM);
763 			return;
764 		}
765 
766 		(void) topo_mod_enumerate(mod, fn, NIC, NIC, 0, 0, din);
767 	} else if (class == PCI_CLASS_SERIALBUS && subclass == PCI_SERIAL_USB) {
768 		/*
769 		 * If we encounter a USB controller, make sure to enumerate all
770 		 * of its USB ports.
771 		 */
772 		if (topo_mod_load(mod, USB, USB_VERSION) == NULL) {
773 			topo_mod_dprintf(mod, "pcibus enum could not load "
774 			    "usb enum\n");
775 			(void) topo_mod_seterrno(mod, EMOD_PARTIAL_ENUM);
776 			return;
777 		}
778 
779 		(void) topo_mod_enumerate(mod, fn, USB, USB_PCI, 0, 0, din);
780 	} else if (class == PCI_CLASS_MASS) {
781 		di_node_t cn;
782 		int niports = 0;
783 		extern void pci_iports_instantiate(topo_mod_t *, tnode_t *,
784 		    di_node_t, int);
785 		extern void pci_receptacle_instantiate(topo_mod_t *, tnode_t *,
786 		    di_node_t);
787 
788 		for (cn = di_child_node(din); cn != DI_NODE_NIL;
789 		    cn = di_sibling_node(cn)) {
790 			if (strcmp(di_node_name(cn), IPORT) == 0)
791 				niports++;
792 		}
793 		if (niports > 0)
794 			pci_iports_instantiate(mod, fn, din, niports);
795 
796 		if ((rcnt = di_prop_lookup_strings(DDI_DEV_T_ANY, din,
797 		    DI_RECEPTACLE_PHYMASK, &propstr)) > 0) {
798 			if (topo_node_range_create(mod, fn, RECEPTACLE, 0,
799 			    rcnt) >= 0)
800 				pci_receptacle_instantiate(mod, fn, din);
801 		}
802 	}
803 
804 	/*
805 	 * If this is an NVMe device and if the FRU label indicates it's not an
806 	 * onboard device then invoke the disk enumerator to enumerate the NVMe
807 	 * controller and associated namespaces.
808 	 *
809 	 * We skip NVMe devices that appear to be onboard as those are likely
810 	 * M.2 or U.2 devices and so should be enumerated via a
811 	 * platform-specific XML map so that they can be associated with the
812 	 * correct physical bay/slot.  This code is intended to pick up NVMe
813 	 * devices that are part of PCIe add-in cards.
814 	 */
815 	if (topo_node_label(fn, &label, &err) != 0) {
816 		topo_mod_dprintf(mod, "%s: failed to lookup FRU label on %s=%d",
817 		    __func__, topo_node_name(fn), topo_node_instance(fn));
818 		goto out;
819 	}
820 
821 	if (class == PCI_CLASS_MASS && subclass == PCI_MASS_NVME &&
822 	    strcmp(label, "MB") != 0) {
823 		char *driver = di_driver_name(din);
824 		char *slash;
825 		topo_pgroup_info_t pgi;
826 
827 		if (topo_prop_get_string(fn, TOPO_PGROUP_IO, TOPO_IO_DEV,
828 		    &pdev, &err) != 0) {
829 			topo_mod_dprintf(mod, "%s: failed to lookup %s on "
830 			    "%s=%d", __func__, TOPO_IO_DEV, topo_node_name(fn),
831 			    topo_node_instance(fn));
832 			goto out;
833 		}
834 
835 		/*
836 		 * Add the binding properties that are required by the disk
837 		 * enumerator to discover the accociated NVMe controller.
838 		 */
839 		pdev_sz = strlen(pdev) + 1;
840 		if ((slash = strrchr(pdev, '/')) == NULL) {
841 			topo_mod_dprintf(mod, "%s: malformed dev path\n",
842 			    __func__);
843 			(void) topo_mod_seterrno(mod, EMOD_PARTIAL_ENUM);
844 			goto out;
845 		}
846 		*slash = '\0';
847 
848 		pgi.tpi_name = TOPO_PGROUP_BINDING;
849 		pgi.tpi_namestab = TOPO_STABILITY_PRIVATE;
850 		pgi.tpi_datastab = TOPO_STABILITY_PRIVATE;
851 		pgi.tpi_version = TOPO_VERSION;
852 		if (topo_pgroup_create(fn, &pgi, &err) != 0 ||
853 		    topo_prop_set_string(fn, TOPO_PGROUP_BINDING,
854 		    TOPO_BINDING_DRIVER, TOPO_PROP_IMMUTABLE, driver,
855 		    &err) != 0 ||
856 		    topo_prop_set_string(fn, TOPO_PGROUP_BINDING,
857 		    TOPO_BINDING_PARENT_DEV, TOPO_PROP_IMMUTABLE, pdev,
858 		    &err) != 0) {
859 			topo_mod_dprintf(mod, "%s: failed to set binding "
860 			    "props", __func__);
861 			(void) topo_mod_seterrno(mod, EMOD_PARTIAL_ENUM);
862 			goto out;
863 		}
864 
865 		/*
866 		 * Load and invoke the disk enumerator module.
867 		 */
868 		if (topo_mod_load(mod, DISK, TOPO_VERSION) == NULL) {
869 			topo_mod_dprintf(mod, "pcibus enum could not load "
870 			    "disk enum\n");
871 			(void) topo_mod_seterrno(mod, EMOD_PARTIAL_ENUM);
872 			goto out;
873 		}
874 		(void) topo_mod_enumerate(mod, fn, DISK, NVME, 0, 0, NULL);
875 	}
876 out:
877 	if (pdev != NULL) {
878 		topo_mod_free(mod, pdev, pdev_sz);
879 	}
880 	topo_mod_strfree(mod, label);
881 }
882 
883 int
884 pci_children_instantiate(topo_mod_t *mod, tnode_t *parent, di_node_t pn,
885     int board, int bridge, int rc, int bover, int depth)
886 {
887 	did_t *pps[MAX_PCIBUS_DEVS][MAX_PCIDEV_FNS];
888 	did_t *bp = NULL;
889 	did_t *np;
890 	di_node_t sib;
891 	di_node_t din;
892 	tnode_t *bn = NULL;
893 	tnode_t *dn = NULL;
894 	int pb = -1;
895 	int b, d, f;
896 
897 	for (d = 0; d < MAX_PCIBUS_DEVS; d++)
898 		for (f = 0; f < MAX_PCIDEV_FNS; f++)
899 			pps[d][f] = NULL;
900 
901 	/* start at the parent's first sibling */
902 	sib = di_child_node(pn);
903 	while (sib != DI_NODE_NIL) {
904 		np = did_create(mod, sib, board, bridge, rc, bover);
905 		if (np == NULL)
906 			return (-1);
907 		did_BDF(np, &b, &d, &f);
908 		pps[d][f] = np;
909 		if (bp == NULL)
910 			bp = np;
911 		if (pb < 0)
912 			pb = ((bover == TRUST_BDF) ? b : bover);
913 		sib = di_sibling_node(sib);
914 	}
915 	if (pb < 0 && bover < 0)
916 		return (0);
917 	if (rc >= 0)
918 		bn = pciexbus_declare(mod, parent, pn, ((pb < 0) ? bover : pb));
919 	else
920 		bn = pcibus_declare(mod, parent, pn, ((pb < 0) ? bover : pb));
921 	if (bn == NULL)
922 		return (-1);
923 	if (pb < 0)
924 		return (0);
925 
926 	for (d = 0; d < MAX_PCIBUS_DEVS; d++) {
927 		for (f = 0; f < MAX_PCIDEV_FNS; f++) {
928 			if (pps[d][f] == NULL)
929 				continue;
930 			din = did_dinode(pps[d][f]);
931 
932 			/*
933 			 * Try to enumerate as many devices and functions as
934 			 * possible.  If we fail to declare a device, break
935 			 * out of the function loop.
936 			 */
937 			declare_dev_and_fn(mod, bn,
938 			    &dn, din, board, bridge, rc, d, f, depth);
939 			did_rele(pps[d][f]);
940 
941 			if (dn == NULL)
942 				break;
943 		}
944 		dn = NULL;
945 	}
946 	return (0);
947 }
948 
949 static int
950 pciexbus_enum(topo_mod_t *mp, tnode_t *ptn, char *pnm, topo_instance_t min,
951     topo_instance_t max)
952 {
953 	di_node_t pdn;
954 	int rc, hb;
955 	tnode_t *hbtn;
956 	int retval;
957 
958 	/*
959 	 * PCI-Express; parent node's private data is a simple di_node_t
960 	 * and we have to construct our own did hash and did_t.
961 	 */
962 	rc = topo_node_instance(ptn);
963 	if ((hbtn = topo_node_parent(ptn)) != NULL)
964 		hb = topo_node_instance(hbtn);
965 	else
966 		hb = rc;
967 
968 	if ((pdn = topo_node_getspecific(ptn)) == DI_NODE_NIL) {
969 		topo_mod_dprintf(mp,
970 		    "Parent %s node missing private data.\n"
971 		    "Unable to proceed with %s enumeration.\n", pnm, PCIEX_BUS);
972 		return (0);
973 	}
974 	if (did_hash_init(mp) != 0)
975 		return (-1);
976 	if ((did_create(mp, pdn, 0, hb, rc, TRUST_BDF)) == NULL)
977 		return (-1);	/* errno already set */
978 
979 	retval = pci_children_instantiate(mp, ptn, pdn, 0, hb, rc,
980 	    (min == max) ? min : TRUST_BDF, 0);
981 	did_hash_fini(mp);
982 
983 	return (retval);
984 }
985 
986 static int
987 pcibus_enum(topo_mod_t *mp, tnode_t *ptn, char *pnm, topo_instance_t min,
988     topo_instance_t max, void *data)
989 {
990 	did_t *didp, *hbdid = (did_t *)data;
991 	int retval;
992 
993 	/*
994 	 * XXTOPO: we should not be sharing private node data with another
995 	 * module. PCI Bus; Parent node's private data is a did_t.  We'll
996 	 * use the did hash established by the parent.
997 	 */
998 	did_setspecific(mp, data);
999 
1000 	/*
1001 	 * If we're looking for a specific bus-instance, find the right
1002 	 * did_t in the chain, otherwise, there should be only one did_t.
1003 	 */
1004 	if (min == max) {
1005 		int b;
1006 		didp = hbdid;
1007 		while (didp != NULL) {
1008 			did_BDF(didp, &b, NULL, NULL);
1009 			if (b == min)
1010 				break;
1011 			didp = did_link_get(didp);
1012 		}
1013 		if (didp == NULL) {
1014 			topo_mod_dprintf(mp,
1015 			    "Parent %s node missing private data related\n"
1016 			    "to %s instance %d.\n", pnm, PCI_BUS, min);
1017 			topo_mod_setspecific(mp, NULL);
1018 			return (0);
1019 		}
1020 	} else {
1021 		assert(did_link_get(hbdid) == NULL);
1022 		didp = hbdid;
1023 	}
1024 	retval = pci_children_instantiate(mp, ptn, did_dinode(didp),
1025 	    did_board(didp), did_bridge(didp), did_rc(didp),
1026 	    (min == max) ? min : TRUST_BDF, 0);
1027 
1028 	topo_mod_setspecific(mp, NULL);
1029 
1030 	return (retval);
1031 }
1032 
1033 /*ARGSUSED*/
1034 static int
1035 pci_enum(topo_mod_t *mod, tnode_t *ptn, const char *name,
1036     topo_instance_t min, topo_instance_t max, void *notused, void *data)
1037 {
1038 	int retval;
1039 	char *pname;
1040 
1041 	topo_mod_dprintf(mod, "Enumerating pci!\n");
1042 
1043 	if (strcmp(name, PCI_BUS) != 0 && strcmp(name, PCIEX_BUS) != 0) {
1044 		topo_mod_dprintf(mod,
1045 		    "Currently only know how to enumerate %s or %s.\n",
1046 		    PCI_BUS, PCIEX_BUS);
1047 		return (0);
1048 	}
1049 	pname = topo_node_name(ptn);
1050 	if (strcmp(pname, HOSTBRIDGE) != 0 && strcmp(pname, PCIEX_ROOT) != 0) {
1051 		topo_mod_dprintf(mod,
1052 		    "Currently can only enumerate a %s or %s directly\n",
1053 		    PCI_BUS, PCIEX_BUS);
1054 		topo_mod_dprintf(mod,
1055 		    "descended from a %s or %s node.\n",
1056 		    HOSTBRIDGE, PCIEX_ROOT);
1057 		return (0);
1058 	}
1059 
1060 	if (strcmp(name, PCI_BUS) == 0) {
1061 		retval = pcibus_enum(mod, ptn, pname, min, max, data);
1062 	} else if (strcmp(name, PCIEX_BUS) == 0) {
1063 		retval = pciexbus_enum(mod, ptn, pname, min, max);
1064 	} else {
1065 		topo_mod_dprintf(mod,
1066 		    "Currently only know how to enumerate %s or %s not %s.\n",
1067 		    PCI_BUS, PCIEX_BUS, name);
1068 		return (0);
1069 	}
1070 
1071 	return (retval);
1072 }
1073 
1074 /*ARGSUSED*/
1075 static void
1076 pci_release(topo_mod_t *mp, tnode_t *node)
1077 {
1078 	topo_method_unregister_all(mp, node);
1079 
1080 	/*
1081 	 * node private data (did_t) for this node is destroyed in
1082 	 * did_hash_destroy()
1083 	 */
1084 
1085 	topo_node_unbind(node);
1086 }
1087