1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2020 Joyent, Inc.
14  */
15 
16 /*
17  * This file drives topo node enumeration of NVMe controllers.  A single "nvme"
18  * node is enumerated for each NVMe controller.   Child "disk" nodes are then
19  * enumerated for each configured NVMe namespace.
20  *
21  * nvme nodes are expected to be enumerated under either a "bay" node (for U.2
22  * devices) or a "slot" node (for M.2 devices) or a "pciexfn" node (for AIC
23  * devices).
24  *
25  * Enumeration of NVMe controllers on PCIe add-in cards is automatically driven
26  * by the pcibus topo module.
27  *
28  * In order to allow for associating a given NVMe controller with a physical
29  * location, enumeration of U.2 and M.2 devices should be driven by a
30  * platform-specific topo map which statically sets the following two
31  * properties on the parent "bay" or "slot" node:
32  *
33  * propgroup        property        description
34  * ---------        --------        ------------
35  * binding          driver          "nvme"
36  * binding          parent-device   devpath of parent PCIe device
37  *
38  * for example:
39  *
40  * <propgroup name="binding" version="1" name-stability="Private"
41  *   data-stability="Private">
42  *     <propval name="driver" type="string" value="nvme"/>
43  *     <propval name="parent-device" type="string"
44  *       value="/pci@0,0/pci8086,6f09@3,1"/>
45  * </propgroup>
46  * <dependents grouping="children">
47  *     <range name="nvme" min="0" max="0">
48  *         <enum-method name="disk" version="1"/>
49  *     </range>
50  * </dependents>
51  */
52 #include <stdlib.h>
53 #include <sys/types.h>
54 #include <sys/stat.h>
55 #include <fcntl.h>
56 #include <unistd.h>
57 #include <string.h>
58 #include <strings.h>
59 
60 #include <sys/fm/protocol.h>
61 #include <fm/topo_hc.h>
62 #include <fm/topo_mod.h>
63 
64 #include <sys/dkio.h>
65 #include <sys/scsi/generic/inquiry.h>
66 
67 #include <sys/nvme.h>
68 #include "disk.h"
69 #include "disk_drivers.h"
70 
71 typedef struct nvme_enum_info {
72 	topo_mod_t		*nei_mod;
73 	di_node_t		nei_dinode;
74 	nvme_identify_ctrl_t	*nei_idctl;
75 	nvme_version_t		nei_vers;
76 	tnode_t			*nei_parent;
77 	tnode_t			*nei_nvme;
78 	nvlist_t		*nei_nvme_fmri;
79 	const char		*nei_nvme_path;
80 	int			nei_fd;
81 } nvme_enum_info_t;
82 
83 typedef struct devlink_arg {
84 	topo_mod_t		*dla_mod;
85 	char			*dla_logical_disk;
86 	uint_t			dla_strsz;
87 } devlink_arg_t;
88 
89 static int
devlink_cb(di_devlink_t dl,void * arg)90 devlink_cb(di_devlink_t dl, void *arg)
91 {
92 	devlink_arg_t *dlarg = (devlink_arg_t *)arg;
93 	topo_mod_t *mod = dlarg->dla_mod;
94 	const char *devpath;
95 	char *slice, *ctds;
96 
97 	if ((devpath = di_devlink_path(dl)) == NULL ||
98 	    (dlarg->dla_logical_disk = topo_mod_strdup(mod, devpath)) ==
99 	    NULL) {
100 		return (DI_WALK_TERMINATE);
101 	}
102 
103 	/*
104 	 * We need to keep track of the original string size before we
105 	 * truncate it with a NUL, so that we can free the right number of
106 	 * bytes when we're done, otherwise libumem will complain.
107 	 */
108 	dlarg->dla_strsz = strlen(dlarg->dla_logical_disk) + 1;
109 
110 	/* trim the slice off the public name */
111 	if (((ctds = strrchr(dlarg->dla_logical_disk, '/')) != NULL) &&
112 	    ((slice = strchr(ctds, 's')) != NULL))
113 		*slice = '\0';
114 
115 	return (DI_WALK_TERMINATE);
116 }
117 
118 static char *
get_logical_disk(topo_mod_t * mod,const char * devpath,uint_t * bufsz)119 get_logical_disk(topo_mod_t *mod, const char *devpath, uint_t *bufsz)
120 {
121 	di_devlink_handle_t devhdl;
122 	devlink_arg_t dlarg = { 0 };
123 	char *minorpath = NULL;
124 
125 	if (asprintf(&minorpath, "%s:a", devpath) < 0) {
126 		return (NULL);
127 	}
128 
129 	if ((devhdl = di_devlink_init(NULL, 0)) == DI_NODE_NIL) {
130 		topo_mod_dprintf(mod, "%s: di_devlink_init failed", __func__);
131 		free(minorpath);
132 		return (NULL);
133 	}
134 
135 	dlarg.dla_mod = mod;
136 
137 	(void) di_devlink_walk(devhdl, "^dsk/", minorpath, DI_PRIMARY_LINK,
138 	    &dlarg, devlink_cb);
139 
140 	(void) di_devlink_fini(&devhdl);
141 	free(minorpath);
142 
143 	*bufsz = dlarg.dla_strsz;
144 	return (dlarg.dla_logical_disk);
145 }
146 
147 static int
make_disk_node(nvme_enum_info_t * nvme_info,di_node_t dinode,topo_instance_t inst)148 make_disk_node(nvme_enum_info_t *nvme_info, di_node_t dinode,
149     topo_instance_t inst)
150 {
151 	topo_mod_t *mod = nvme_info->nei_mod;
152 	nvlist_t *auth = NULL, *fmri = NULL;
153 	tnode_t *disk;
154 	char *rev = NULL, *model = NULL, *serial = NULL, *path;
155 	char *logical_disk = NULL, *devid, *manuf, *ctd = NULL;
156 	char *cap_bytes_str = NULL, full_path[MAXPATHLEN + 1];
157 	char *pname = topo_node_name(nvme_info->nei_parent);
158 	topo_instance_t pinst = topo_node_instance(nvme_info->nei_parent);
159 	const char **ppaths = NULL;
160 	struct dk_minfo minfo;
161 	uint64_t cap_bytes;
162 	uint_t bufsz;
163 	int fd = -1, err, ret = -1, r;
164 
165 	if ((path = di_devfs_path(dinode)) == NULL) {
166 		topo_mod_dprintf(mod, "%s: failed to get dev path", __func__);
167 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
168 		return (ret);
169 	}
170 
171 	topo_mod_dprintf(mod, "%s: found nvme namespace: %s", __func__, path);
172 
173 	/*
174 	 * Issue the DKIOCGMEDIAINFO ioctl to get the capacity
175 	 */
176 	(void) snprintf(full_path, MAXPATHLEN, "/devices%s%s", path,
177 	    PHYS_EXTN);
178 	if ((fd = open(full_path, O_RDWR)) < 0 ||
179 	    ioctl(fd, DKIOCGMEDIAINFO, &minfo) < 0) {
180 		topo_mod_dprintf(mod, "failed to get blkdev capacity (%s)",
181 		    strerror(errno));
182 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
183 		goto error;
184 	}
185 
186 	cap_bytes = minfo.dki_lbsize * minfo.dki_capacity;
187 
188 	if (asprintf(&cap_bytes_str, "%" PRIu64, cap_bytes) < 0) {
189 		topo_mod_dprintf(mod, "%s: failed to alloc string", __func__);
190 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
191 		goto error;
192 	}
193 
194 	/*
195 	 * Gather the FRU identity information from the devinfo properties
196 	 */
197 	if (di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, DEVID_PROP_NAME,
198 	    &devid) == -1 ||
199 	    di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, INQUIRY_VENDOR_ID,
200 	    &manuf) == -1 ||
201 	    di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, INQUIRY_PRODUCT_ID,
202 	    &model) == -1 ||
203 	    di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, INQUIRY_REVISION_ID,
204 	    &rev) == -1 ||
205 	    di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, INQUIRY_SERIAL_NO,
206 	    &serial) == -1) {
207 		topo_mod_dprintf(mod, "%s: failed to lookup devinfo props on "
208 		    "%s", __func__, path);
209 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
210 		goto error;
211 	}
212 
213 	model = topo_mod_clean_str(mod, model);
214 	rev = topo_mod_clean_str(mod, rev);
215 	serial = topo_mod_clean_str(mod, serial);
216 
217 	/*
218 	 * Lookup the /dev/dsk/c#t#d# disk device name from the blkdev path
219 	 */
220 	if ((logical_disk = get_logical_disk(mod, path, &bufsz)) == NULL) {
221 		topo_mod_dprintf(mod, "failed to find logical disk");
222 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
223 		goto error;
224 	}
225 
226 	/*
227 	 * If we were able to look up the logical disk path for this namespace
228 	 * then set ctd to be that pathname, minus the "/dev/dsk/" portion.
229 	 */
230 	if ((ctd = strrchr(logical_disk, '/')) !=  NULL) {
231 		ctd = ctd + 1;
232 	} else {
233 		topo_mod_dprintf(mod, "malformed logical disk path: %s",
234 		    logical_disk);
235 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
236 		goto error;
237 	}
238 
239 	/*
240 	 * Build the FMRI and then bind the disk node to the parent nvme node.
241 	 */
242 	auth = topo_mod_auth(mod, nvme_info->nei_nvme);
243 	fmri = topo_mod_hcfmri(mod, nvme_info->nei_nvme, FM_HC_SCHEME_VERSION,
244 	    DISK, inst, NULL, auth, model, rev, serial);
245 
246 	if (fmri == NULL) {
247 		/* errno set */
248 		topo_mod_dprintf(mod, "%s: hcfmri failed for %s=%u/%s=0/%s=%u",
249 		    __func__, pname, pinst, NVME, DISK, inst);
250 		goto error;
251 	}
252 	if ((disk = topo_node_bind(mod, nvme_info->nei_nvme, DISK, inst,
253 	    fmri)) == NULL) {
254 		/* errno set */
255 		topo_mod_dprintf(mod, "%s: bind failed for %s=%u/%s=0/%s=%u",
256 		    __func__, pname, pinst, NVME, DISK, inst);
257 		goto error;
258 	}
259 
260 	/* Create authority and system propgroups */
261 	topo_pgroup_hcset(disk, auth);
262 
263 	/*
264 	 * As the "disk" in this case is simply a logical construct
265 	 * representing an NVMe namespace, we inherit the FRU from the parent
266 	 * node.
267 	 */
268 	if (topo_node_fru_set(disk, NULL, 0, &err) != 0) {
269 		topo_mod_dprintf(mod, "%s: failed to set FRU: %s", __func__,
270 		    topo_strerror(err));
271 		(void) topo_mod_seterrno(mod, err);
272 		goto error;
273 	}
274 
275 	if ((ppaths = topo_mod_zalloc(mod, sizeof (char *))) == NULL) {
276 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
277 		goto error;
278 	}
279 	ppaths[0] = path;
280 
281 	/*
282 	 * Create the "storage" and "io" property groups and then fill them
283 	 * with the standard set of properties for "disk" nodes.
284 	 */
285 	if (topo_pgroup_create(disk, &io_pgroup, &err) != 0 ||
286 	    topo_pgroup_create(disk, &storage_pgroup, &err) != 0) {
287 		topo_mod_dprintf(mod, "%s: failed to create propgroups: %s",
288 		    __func__, topo_strerror(err));
289 		(void) topo_mod_seterrno(mod, err);
290 		goto error;
291 	}
292 
293 	r = topo_prop_set_string(disk, TOPO_PGROUP_IO, TOPO_IO_DEV_PATH,
294 	    TOPO_PROP_IMMUTABLE, path, &err);
295 
296 	r += topo_prop_set_string_array(disk, TOPO_PGROUP_IO,
297 	    TOPO_IO_PHYS_PATH, TOPO_PROP_IMMUTABLE, ppaths, 1, &err);
298 
299 	r += topo_prop_set_string(disk, TOPO_PGROUP_IO, TOPO_IO_DEVID,
300 	    TOPO_PROP_IMMUTABLE, devid, &err);
301 
302 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
303 	    TOPO_STORAGE_MANUFACTURER, TOPO_PROP_IMMUTABLE, manuf, &err);
304 
305 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
306 	    TOPO_STORAGE_CAPACITY, TOPO_PROP_IMMUTABLE, cap_bytes_str,
307 	    &err);
308 
309 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
310 	    TOPO_STORAGE_SERIAL_NUM, TOPO_PROP_IMMUTABLE, serial, &err);
311 
312 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
313 	    TOPO_STORAGE_MODEL, TOPO_PROP_IMMUTABLE, model, &err);
314 
315 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
316 	    TOPO_STORAGE_FIRMWARE_REV, TOPO_PROP_IMMUTABLE, rev, &err);
317 
318 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
319 	    TOPO_STORAGE_LOGICAL_DISK_NAME, TOPO_PROP_IMMUTABLE, ctd, &err);
320 
321 	if (r != 0) {
322 		topo_mod_dprintf(mod, "%s: failed to create properties: %s",
323 		    __func__, topo_strerror(err));
324 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
325 		goto error;
326 	}
327 
328 	ret = 0;
329 
330 error:
331 	free(cap_bytes_str);
332 	if (fd > 0)
333 		(void) close(fd);
334 	if (ppaths != NULL)
335 		topo_mod_free(mod, ppaths, sizeof (char *));
336 	di_devfs_path_free(path);
337 	nvlist_free(auth);
338 	nvlist_free(fmri);
339 	topo_mod_strfree(mod, rev);
340 	topo_mod_strfree(mod, model);
341 	topo_mod_strfree(mod, serial);
342 	topo_mod_free(mod, logical_disk, bufsz);
343 	return (ret);
344 }
345 
346 static const topo_pgroup_info_t nvme_pgroup = {
347 	TOPO_PGROUP_NVME,
348 	TOPO_STABILITY_PRIVATE,
349 	TOPO_STABILITY_PRIVATE,
350 	1
351 };
352 
353 
354 static int
make_nvme_node(nvme_enum_info_t * nvme_info)355 make_nvme_node(nvme_enum_info_t *nvme_info)
356 {
357 	topo_mod_t *mod = nvme_info->nei_mod;
358 	nvlist_t *auth = NULL, *fmri = NULL, *fru;
359 	tnode_t *nvme;
360 	char raw_rev[NVME_FWVER_SZ + 1], raw_model[NVME_MODEL_SZ + 1];
361 	char raw_serial[NVME_SERIAL_SZ + 1];
362 	char *rev = NULL, *model = NULL, *serial = NULL, *vers = NULL;
363 	char *pname = topo_node_name(nvme_info->nei_parent);
364 	char *label = NULL;
365 	topo_instance_t pinst = topo_node_instance(nvme_info->nei_parent);
366 	int err = 0, ret = -1;
367 	di_node_t cn;
368 	uint_t i;
369 
370 	/*
371 	 * The raw strings returned by the IDENTIFY CONTROLLER command are
372 	 * not NUL-terminated, so we fix that up.
373 	 */
374 	(void) strncpy(raw_rev, nvme_info->nei_idctl->id_fwrev, NVME_FWVER_SZ);
375 	raw_rev[NVME_FWVER_SZ] = '\0';
376 	(void) strncpy(raw_model, nvme_info->nei_idctl->id_model,
377 	    NVME_MODEL_SZ);
378 	raw_model[NVME_MODEL_SZ] = '\0';
379 	(void) strncpy(raw_serial, nvme_info->nei_idctl->id_serial,
380 	    NVME_SERIAL_SZ);
381 	raw_serial[NVME_SERIAL_SZ] = '\0';
382 
383 	/*
384 	 * Next we pass the strings through a function that sanitizes them of
385 	 * any characters that can't be used in an FMRI string.
386 	 */
387 	rev = topo_mod_clean_str(mod, raw_rev);
388 	model = topo_mod_clean_str(mod, raw_model);
389 	serial = topo_mod_clean_str(mod, raw_serial);
390 
391 	auth = topo_mod_auth(mod, nvme_info->nei_parent);
392 	fmri = topo_mod_hcfmri(mod, nvme_info->nei_parent, FM_HC_SCHEME_VERSION,
393 	    NVME, 0, NULL, auth, model, rev, serial);
394 
395 	if (fmri == NULL) {
396 		/* errno set */
397 		topo_mod_dprintf(mod, "%s: hcfmri failed for %s=%u/%s=0",
398 		    __func__, pname, pinst, NVME);
399 		goto error;
400 	}
401 
402 	/*
403 	 * If our parent is a pciexfn node, then we need to create a nvme range
404 	 * underneath it to hold the nvme heirarchy.  For other cases, where
405 	 * enumeration is being driven by a topo map file, this range will have
406 	 * already been statically defined in the XML.
407 	 */
408 	if (strcmp(pname, PCIEX_FUNCTION) == 0) {
409 		if (topo_node_range_create(mod, nvme_info->nei_parent, NVME, 0,
410 		    0) < 0) {
411 			/* errno set */
412 			topo_mod_dprintf(mod, "%s: error creating %s range",
413 			    __func__, NVME);
414 			goto error;
415 		}
416 	}
417 
418 	/*
419 	 * Create a new topo node to represent the NVMe controller and bind it
420 	 * to the parent node.
421 	 */
422 	if ((nvme = topo_node_bind(mod, nvme_info->nei_parent, NVME, 0,
423 	    fmri)) == NULL) {
424 		/* errno set */
425 		topo_mod_dprintf(mod, "%s: bind failed for %s=%u/%s=0",
426 		    __func__, pname, pinst, NVME);
427 		goto error;
428 	}
429 	nvme_info->nei_nvme = nvme;
430 	nvme_info->nei_nvme_fmri = fmri;
431 
432 	/*
433 	 * If our parent node is a "pciexfn" node then this is a NVMe device on
434 	 * a PCIe AIC, so we inherit our parent's FRU.  Otherwise, we set the
435 	 * FRU to ourself.
436 	 */
437 	if (strcmp(topo_node_name(nvme_info->nei_parent), PCIEX_FUNCTION) == 0)
438 		fru = NULL;
439 	else
440 		fru = fmri;
441 
442 	if (topo_node_fru_set(nvme, fru, 0, &err) != 0) {
443 		topo_mod_dprintf(mod, "%s: failed to set FRU: %s", __func__,
444 		    topo_strerror(err));
445 		(void) topo_mod_seterrno(mod, err);
446 		goto error;
447 	}
448 
449 	/*
450 	 * Clone the label from our parent node.  We can't inherit the property
451 	 * because the label prop is mutable on bay nodes and only immutable
452 	 * properties can be inherited.
453 	 */
454 	if ((topo_node_label(nvme_info->nei_parent, &label, &err) != 0 &&
455 	    err != ETOPO_PROP_NOENT) ||
456 	    topo_node_label_set(nvme, label, &err) != 0) {
457 		topo_mod_dprintf(mod, "%s: failed to set label: %s",
458 		    __func__, topo_strerror(err));
459 		(void) topo_mod_seterrno(mod, err);
460 		goto error;
461 	}
462 
463 	if (topo_pgroup_create(nvme, &nvme_pgroup, &err) != 0) {
464 		topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s",
465 		    __func__, TOPO_PGROUP_NVME, topo_strerror(err));
466 		(void) topo_mod_seterrno(mod, err);
467 		goto error;
468 	}
469 
470 	if (asprintf(&vers, "%u.%u", nvme_info->nei_vers.v_major,
471 	    nvme_info->nei_vers.v_minor) < 0) {
472 		topo_mod_dprintf(mod, "%s: failed to alloc string", __func__);
473 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
474 		goto error;
475 	}
476 	if (topo_prop_set_string(nvme, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER,
477 	    TOPO_PROP_IMMUTABLE, vers, &err) != 0) {
478 		topo_mod_dprintf(mod, "%s: failed to set %s/%s property",
479 		    __func__, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER);
480 		(void) topo_mod_seterrno(mod, err);
481 		goto error;
482 	}
483 
484 	if (topo_pgroup_create(nvme, &io_pgroup, &err) != 0) {
485 		topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s",
486 		    __func__, TOPO_PGROUP_IO, topo_strerror(err));
487 		(void) topo_mod_seterrno(mod, err);
488 		goto error;
489 	}
490 	if (topo_prop_set_string(nvme, TOPO_PGROUP_IO, TOPO_IO_DEV_PATH,
491 	    TOPO_PROP_IMMUTABLE, nvme_info->nei_nvme_path, &err) != 0) {
492 		topo_mod_dprintf(mod, "%s: failed to set %s/%s property",
493 		    __func__, TOPO_PGROUP_IO, TOPO_IO_DEV_PATH);
494 		(void) topo_mod_seterrno(mod, err);
495 		goto error;
496 	}
497 
498 	/*
499 	 * Create a child disk node for each namespace.
500 	 */
501 	if (topo_node_range_create(mod, nvme, DISK, 0,
502 	    (nvme_info->nei_idctl->id_nn - 1)) < 0) {
503 		/* errno set */
504 		topo_mod_dprintf(mod, "%s: error creating %s range", __func__,
505 		    DISK);
506 		goto error;
507 	}
508 
509 	for (i = 0, cn = di_child_node(nvme_info->nei_dinode);
510 	    cn != DI_NODE_NIL;
511 	    i++, cn = di_sibling_node(cn)) {
512 
513 		if (make_disk_node(nvme_info, cn, i) != 0) {
514 			char *path = di_devfs_path(cn);
515 			/*
516 			 * We note the failure, but attempt to forge ahead and
517 			 * enumerate any other namespaces.
518 			 */
519 			topo_mod_dprintf(mod, "%s: make_disk_node() failed "
520 			    "for %s\n", __func__,
521 			    path ? path : "unknown path");
522 			di_devfs_path_free(path);
523 		}
524 	}
525 	ret = 0;
526 
527 error:
528 	free(vers);
529 	nvlist_free(auth);
530 	nvlist_free(fmri);
531 	topo_mod_strfree(mod, rev);
532 	topo_mod_strfree(mod, model);
533 	topo_mod_strfree(mod, serial);
534 	topo_mod_strfree(mod, label);
535 	return (ret);
536 }
537 
538 struct diwalk_arg {
539 	topo_mod_t	*diwk_mod;
540 	tnode_t		*diwk_parent;
541 };
542 
543 /*
544  * This function gathers identity information from the NVMe controller and
545  * stores it in a struct.  This struct is passed to make_nvme_node(), which
546  * does the actual topo node creation.
547  */
548 static int
discover_nvme_ctl(di_node_t node,di_minor_t minor,void * arg)549 discover_nvme_ctl(di_node_t node, di_minor_t minor, void *arg)
550 {
551 	struct diwalk_arg *wkarg = arg;
552 	topo_mod_t *mod = wkarg->diwk_mod;
553 	char *path = NULL, *devctl = NULL;
554 	nvme_ioctl_t nioc = { 0 };
555 	nvme_identify_ctrl_t *idctl = NULL;
556 	nvme_enum_info_t nvme_info = { 0 };
557 	int fd = -1, ret = DI_WALK_TERMINATE;
558 
559 	if ((path = di_devfs_minor_path(minor)) == NULL) {
560 		topo_mod_dprintf(mod, "failed to get minor path");
561 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
562 		return (ret);
563 	}
564 
565 	topo_mod_dprintf(mod, "%s=%u: found nvme controller: %s",
566 	    topo_node_name(wkarg->diwk_parent),
567 	    topo_node_instance(wkarg->diwk_parent), path);
568 
569 	if (asprintf(&devctl, "/devices%s", path) < 0) {
570 		topo_mod_dprintf(mod, "failed to alloc string");
571 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
572 		goto error;
573 	}
574 
575 	if ((fd = open(devctl, O_RDWR)) < 0) {
576 		topo_mod_dprintf(mod, "failed to open %s", devctl);
577 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
578 		goto error;
579 	}
580 	if ((idctl = topo_mod_zalloc(mod, NVME_IDENTIFY_BUFSIZE)) == NULL) {
581 		topo_mod_dprintf(mod, "zalloc failed");
582 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
583 		goto error;
584 	}
585 	nioc.n_len = NVME_IDENTIFY_BUFSIZE;
586 	nioc.n_buf = (uintptr_t)idctl;
587 
588 	if (ioctl(fd, NVME_IOC_IDENTIFY_CTRL, &nioc) != 0) {
589 		topo_mod_dprintf(mod, "NVME_IOC_IDENTIFY_CTRL ioctl "
590 		    "failed: %s", strerror(errno));
591 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
592 		goto error;
593 	}
594 
595 	nioc.n_len = sizeof (nvme_version_t);
596 	nioc.n_buf = (uintptr_t)&nvme_info.nei_vers;
597 
598 	if (ioctl(fd, NVME_IOC_VERSION, &nioc) != 0) {
599 		topo_mod_dprintf(mod, "NVME_IOC_VERSION ioctl failed: %s",
600 		    strerror(errno));
601 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
602 		goto error;
603 	}
604 
605 	nvme_info.nei_mod = mod;
606 	nvme_info.nei_nvme_path = path;
607 	nvme_info.nei_dinode = node;
608 	nvme_info.nei_idctl = idctl;
609 	nvme_info.nei_parent = wkarg->diwk_parent;
610 	nvme_info.nei_fd = fd;
611 
612 	if (make_nvme_node(&nvme_info) != 0) {
613 		/* errno set */
614 		goto error;
615 	}
616 
617 	ret = DI_WALK_CONTINUE;
618 
619 error:
620 	if (fd > 0)
621 		(void) close(fd);
622 	di_devfs_path_free(path);
623 	free(devctl);
624 	if (idctl != NULL)
625 		topo_mod_free(mod, idctl, NVME_IDENTIFY_BUFSIZE);
626 	return (ret);
627 }
628 
629 int
disk_nvme_enum_disk(topo_mod_t * mod,tnode_t * pnode)630 disk_nvme_enum_disk(topo_mod_t *mod, tnode_t *pnode)
631 {
632 	char *parent = NULL;
633 	int err;
634 	di_node_t devtree;
635 	di_node_t dnode;
636 	struct diwalk_arg wkarg = { 0 };
637 	int ret = -1;
638 
639 	/*
640 	 * Lookup a property containing the devfs path of the parent PCIe
641 	 * device of the NVMe device we're attempting to enumerate.  This
642 	 * property is hard-coded in per-platform topo XML maps that are
643 	 * delivered with the OS.  This hard-coded path allows topo to map a
644 	 * given NVMe controller to a physical location (bay or slot) on the
645 	 * platform, when generating the topo snapshot.
646 	 */
647 	if (topo_prop_get_string(pnode, TOPO_PGROUP_BINDING,
648 	    TOPO_BINDING_PARENT_DEV, &parent, &err) != 0) {
649 		topo_mod_dprintf(mod, "parent node was missing nvme binding "
650 		    "properties\n");
651 		(void) topo_mod_seterrno(mod, err);
652 		goto out;
653 	}
654 	if ((devtree = topo_mod_devinfo(mod)) == DI_NODE_NIL) {
655 		topo_mod_dprintf(mod, "failed to get devinfo snapshot");
656 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
657 		goto out;
658 	}
659 
660 	/*
661 	 * Walk the devinfo tree looking NVMe devices. For each NVMe device,
662 	 * check if the devfs path of the parent matches the one specified in
663 	 * TOPO_BINDING_PARENT_DEV.
664 	 */
665 	wkarg.diwk_mod = mod;
666 	wkarg.diwk_parent = pnode;
667 	dnode = di_drv_first_node(NVME_DRV, devtree);
668 	while (dnode != DI_NODE_NIL) {
669 		char *path;
670 
671 		if ((path = di_devfs_path(di_parent_node(dnode))) == NULL) {
672 			topo_mod_dprintf(mod, "failed to get dev path");
673 			(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
674 			goto out;
675 		}
676 		if (strcmp(parent, path) == 0) {
677 			if (di_walk_minor(dnode, DDI_NT_NVME_NEXUS, 0,
678 			    &wkarg, discover_nvme_ctl) < 0) {
679 				di_devfs_path_free(path);
680 				goto out;
681 			}
682 		}
683 		di_devfs_path_free(path);
684 		dnode = di_drv_next_node(dnode);
685 	}
686 	ret = 0;
687 
688 out:
689 	topo_mod_strfree(mod, parent);
690 	return (ret);
691 }
692