xref: /illumos-gate/usr/src/uts/common/os/devid_cache.c (revision 5f61829a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2018 by Delphix. All rights reserved.
24  * Copyright 2023 Oxide Computer Company
25  */
26 
27 #include <sys/note.h>
28 #include <sys/t_lock.h>
29 #include <sys/cmn_err.h>
30 #include <sys/instance.h>
31 #include <sys/conf.h>
32 #include <sys/stat.h>
33 #include <sys/ddi.h>
34 #include <sys/hwconf.h>
35 #include <sys/sunddi.h>
36 #include <sys/sunndi.h>
37 #include <sys/sunmdi.h>
38 #include <sys/ddi_impldefs.h>
39 #include <sys/ndi_impldefs.h>
40 #include <sys/kobj.h>
41 #include <sys/devcache.h>
42 #include <sys/devid_cache.h>
43 #include <sys/sysmacros.h>
44 
45 /*
46  * Discovery refers to the heroic effort made to discover a device which
47  * cannot be accessed at the physical path where it once resided.  Discovery
48  * involves walking the entire device tree attaching all possible disk
49  * instances, to search for the device referenced by a devid.  Obviously,
50  * full device discovery is something to be avoided where possible.
51  * Note that simply invoking devfsadm(8) is equivalent to running full
52  * discovery at the devid cache level.
53  *
54  * Reasons why a disk may not be accessible:
55  *	disk powered off
56  *	disk removed or cable disconnected
57  *	disk or adapter broken
58  *
59  * Note that discovery is not needed and cannot succeed in any of these
60  * cases.
61  *
62  * When discovery may succeed:
63  *	Discovery will result in success when a device has been moved
64  *	to a different address.  Note that it's recommended that
65  *	devfsadm(8) be invoked (no arguments required) whenever a system's
66  *	h/w configuration has been updated.  Alternatively, a
67  *	reconfiguration boot can be used to accomplish the same result.
68  *
69  * Note that discovery is not necessary to be able to correct an access
70  * failure for a device which was powered off.  Assuming the cache has an
71  * entry for such a device, simply powering it on should permit the system
72  * to access it.  If problems persist after powering it on, invoke
73  * devfsadm(8).
74  *
75  * Discovery prior to mounting root is only of interest when booting
76  * from a filesystem which accesses devices by device id, which of
77  * not all do.
78  *
79  * Tunables
80  *
81  * devid_discovery_boot (default 1)
82  *	Number of times discovery will be attempted prior to mounting root.
83  *	Must be done at least once to recover from corrupted or missing
84  *	devid cache backing store.  Probably there's no reason to ever
85  *	set this to greater than one as a missing device will remain
86  *	unavailable no matter how often the system searches for it.
87  *
88  * devid_discovery_postboot (default 1)
89  *	Number of times discovery will be attempted after mounting root.
90  *	This must be performed at least once to discover any devices
91  *	needed after root is mounted which may have been powered
92  *	off and moved before booting.
93  *	Setting this to a larger positive number will introduce
94  *	some inconsistency in system operation.  Searching for a device
95  *	will take an indeterminate amount of time, sometimes slower,
96  *	sometimes faster.  In addition, the system will sometimes
97  *	discover a newly powered on device, sometimes it won't.
98  *	Use of this option is not therefore recommended.
99  *
100  * devid_discovery_postboot_always (default 0)
101  *	Set to 1, the system will always attempt full discovery.
102  *
103  * devid_discovery_secs (default 0)
104  *	Set to a positive value, the system will attempt full discovery
105  *	but with a minimum delay between attempts.  A device search
106  *	within the period of time specified will result in failure.
107  *
108  * devid_cache_read_disable (default 0)
109  *	Set to 1 to disable reading /etc/devices/devid_cache.
110  *	Devid cache will continue to operate normally but
111  *	at least one discovery attempt will be required.
112  *
113  * devid_cache_write_disable (default 0)
114  *	Set to 1 to disable updates to /etc/devices/devid_cache.
115  *	Any updates to the devid cache will not be preserved across a reboot.
116  *
117  * devid_report_error (default 0)
118  *	Set to 1 to enable some error messages related to devid
119  *	cache failures.
120  *
121  * The devid is packed in the cache file as a byte array.  For
122  * portability, this could be done in the encoded string format.
123  */
124 
125 
126 int devid_discovery_boot = 1;
127 int devid_discovery_postboot = 1;
128 int devid_discovery_postboot_always = 0;
129 int devid_discovery_secs = 0;
130 
131 int devid_cache_read_disable = 0;
132 int devid_cache_write_disable = 0;
133 
134 int devid_report_error = 0;
135 
136 
137 /*
138  * State to manage discovery of devices providing a devid
139  */
140 static int		devid_discovery_busy = 0;
141 static kmutex_t		devid_discovery_mutex;
142 static kcondvar_t	devid_discovery_cv;
143 static clock_t		devid_last_discovery = 0;
144 
145 
146 #ifdef	DEBUG
147 int nvp_devid_debug = 0;
148 int devid_debug = 0;
149 int devid_log_registers = 0;
150 int devid_log_finds = 0;
151 int devid_log_lookups = 0;
152 int devid_log_discovery = 0;
153 int devid_log_matches = 0;
154 int devid_log_paths = 0;
155 int devid_log_failures = 0;
156 int devid_log_hold = 0;
157 int devid_log_unregisters = 0;
158 int devid_log_removes = 0;
159 int devid_register_debug = 0;
160 int devid_log_stale = 0;
161 int devid_log_detaches = 0;
162 #endif	/* DEBUG */
163 
164 /*
165  * devid cache file registration for cache reads and updates
166  */
167 static nvf_ops_t devid_cache_ops = {
168 	"/etc/devices/devid_cache",		/* path to cache */
169 	devid_cache_unpack_nvlist,		/* read: nvlist to nvp */
170 	devid_cache_pack_list,			/* write: nvp to nvlist */
171 	devid_list_free,			/* free data list */
172 	NULL					/* write complete callback */
173 };
174 
175 /*
176  * handle to registered devid cache handlers
177  */
178 nvf_handle_t	dcfd_handle;
179 
180 
181 /*
182  * Initialize devid cache file management
183  */
184 void
devid_cache_init(void)185 devid_cache_init(void)
186 {
187 	dcfd_handle = nvf_register_file(&devid_cache_ops);
188 	ASSERT(dcfd_handle);
189 
190 	list_create(nvf_list(dcfd_handle), sizeof (nvp_devid_t),
191 	    offsetof(nvp_devid_t, nvp_link));
192 
193 	mutex_init(&devid_discovery_mutex, NULL, MUTEX_DEFAULT, NULL);
194 	cv_init(&devid_discovery_cv, NULL, CV_DRIVER, NULL);
195 }
196 
197 /*
198  * Read and initialize the devid cache from the persistent store
199  */
200 void
devid_cache_read(void)201 devid_cache_read(void)
202 {
203 	if (!devid_cache_read_disable) {
204 		rw_enter(nvf_lock(dcfd_handle), RW_WRITER);
205 		ASSERT(list_head(nvf_list(dcfd_handle)) == NULL);
206 		(void) nvf_read_file(dcfd_handle);
207 		rw_exit(nvf_lock(dcfd_handle));
208 	}
209 }
210 
211 static void
devid_nvp_free(nvp_devid_t * dp)212 devid_nvp_free(nvp_devid_t *dp)
213 {
214 	if (dp->nvp_devpath)
215 		kmem_free(dp->nvp_devpath, strlen(dp->nvp_devpath)+1);
216 	if (dp->nvp_devid)
217 		kmem_free(dp->nvp_devid, ddi_devid_sizeof(dp->nvp_devid));
218 
219 	kmem_free(dp, sizeof (nvp_devid_t));
220 }
221 
222 static void
devid_list_free(nvf_handle_t fd)223 devid_list_free(nvf_handle_t fd)
224 {
225 	list_t		*listp;
226 	nvp_devid_t	*np;
227 
228 	ASSERT(RW_WRITE_HELD(nvf_lock(dcfd_handle)));
229 
230 	listp = nvf_list(fd);
231 	while (np = list_head(listp)) {
232 		list_remove(listp, np);
233 		devid_nvp_free(np);
234 	}
235 }
236 
237 /*
238  * Free an nvp element in a list
239  */
240 static void
devid_nvp_unlink_and_free(nvf_handle_t fd,nvp_devid_t * np)241 devid_nvp_unlink_and_free(nvf_handle_t fd, nvp_devid_t *np)
242 {
243 	list_remove(nvf_list(fd), np);
244 	devid_nvp_free(np);
245 }
246 
247 /*
248  * Unpack a device path/nvlist pair to the list of devid cache elements.
249  * Used to parse the nvlist format when reading
250  * /etc/devices/devid_cache
251  */
252 static int
devid_cache_unpack_nvlist(nvf_handle_t fd,nvlist_t * nvl,char * name)253 devid_cache_unpack_nvlist(nvf_handle_t fd, nvlist_t *nvl, char *name)
254 {
255 	nvp_devid_t *np;
256 	ddi_devid_t devidp;
257 	int rval;
258 	uint_t n;
259 
260 	NVP_DEVID_DEBUG_PATH((name));
261 	ASSERT(RW_WRITE_HELD(nvf_lock(dcfd_handle)));
262 
263 	/*
264 	 * check path for a devid
265 	 */
266 	rval = nvlist_lookup_byte_array(nvl,
267 	    DP_DEVID_ID, (uchar_t **)&devidp, &n);
268 	if (rval == 0) {
269 		if (ddi_devid_valid(devidp) == DDI_SUCCESS) {
270 			ASSERT(n == ddi_devid_sizeof(devidp));
271 			np = kmem_zalloc(sizeof (nvp_devid_t), KM_SLEEP);
272 			np->nvp_devpath = i_ddi_strdup(name, KM_SLEEP);
273 			np->nvp_devid = kmem_alloc(n, KM_SLEEP);
274 			(void) bcopy(devidp, np->nvp_devid, n);
275 			list_insert_tail(nvf_list(fd), np);
276 			NVP_DEVID_DEBUG_DEVID((np->nvp_devid));
277 		} else {
278 			DEVIDERR((CE_CONT,
279 			    "%s: invalid devid\n", name));
280 		}
281 	} else {
282 		DEVIDERR((CE_CONT,
283 		    "%s: devid not available\n", name));
284 	}
285 
286 	return (0);
287 }
288 
289 /*
290  * Pack the list of devid cache elements into a single nvlist
291  * Used when writing the nvlist file.
292  */
293 static int
devid_cache_pack_list(nvf_handle_t fd,nvlist_t ** ret_nvl)294 devid_cache_pack_list(nvf_handle_t fd, nvlist_t **ret_nvl)
295 {
296 	nvlist_t	*nvl, *sub_nvl;
297 	nvp_devid_t	*np;
298 	int		rval;
299 	list_t		*listp;
300 
301 	ASSERT(RW_WRITE_HELD(nvf_lock(dcfd_handle)));
302 
303 	rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
304 	if (rval != 0) {
305 		nvf_error("%s: nvlist alloc error %d\n",
306 		    nvf_cache_name(fd), rval);
307 		return (DDI_FAILURE);
308 	}
309 
310 	listp = nvf_list(fd);
311 	for (np = list_head(listp); np; np = list_next(listp, np)) {
312 		if (np->nvp_devid == NULL)
313 			continue;
314 		NVP_DEVID_DEBUG_PATH(np->nvp_devpath);
315 		rval = nvlist_alloc(&sub_nvl, NV_UNIQUE_NAME, KM_SLEEP);
316 		if (rval != 0) {
317 			nvf_error("%s: nvlist alloc error %d\n",
318 			    nvf_cache_name(fd), rval);
319 			sub_nvl = NULL;
320 			goto err;
321 		}
322 
323 		rval = nvlist_add_byte_array(sub_nvl, DP_DEVID_ID,
324 		    (uchar_t *)np->nvp_devid,
325 		    ddi_devid_sizeof(np->nvp_devid));
326 		if (rval == 0) {
327 			NVP_DEVID_DEBUG_DEVID(np->nvp_devid);
328 		} else {
329 			nvf_error(
330 			    "%s: nvlist add error %d (devid)\n",
331 			    nvf_cache_name(fd), rval);
332 			goto err;
333 		}
334 
335 		rval = nvlist_add_nvlist(nvl, np->nvp_devpath, sub_nvl);
336 		if (rval != 0) {
337 			nvf_error("%s: nvlist add error %d (sublist)\n",
338 			    nvf_cache_name(fd), rval);
339 			goto err;
340 		}
341 		nvlist_free(sub_nvl);
342 	}
343 
344 	*ret_nvl = nvl;
345 	return (DDI_SUCCESS);
346 
347 err:
348 	nvlist_free(sub_nvl);
349 	nvlist_free(nvl);
350 	*ret_nvl = NULL;
351 	return (DDI_FAILURE);
352 }
353 
354 static int
e_devid_do_discovery(void)355 e_devid_do_discovery(void)
356 {
357 	ASSERT(mutex_owned(&devid_discovery_mutex));
358 
359 	if (i_ddi_io_initialized() == 0) {
360 		if (devid_discovery_boot > 0) {
361 			devid_discovery_boot--;
362 			return (1);
363 		}
364 	} else {
365 		if (devid_discovery_postboot_always > 0)
366 			return (1);
367 		if (devid_discovery_postboot > 0) {
368 			devid_discovery_postboot--;
369 			return (1);
370 		}
371 		if (devid_discovery_secs > 0) {
372 			if ((ddi_get_lbolt() - devid_last_discovery) >
373 			    drv_usectohz(devid_discovery_secs * MICROSEC)) {
374 				return (1);
375 			}
376 		}
377 	}
378 
379 	DEVID_LOG_DISC((CE_CONT, "devid_discovery: no discovery\n"));
380 	return (0);
381 }
382 
383 static void
e_ddi_devid_hold_by_major(major_t major)384 e_ddi_devid_hold_by_major(major_t major)
385 {
386 	DEVID_LOG_DISC((CE_CONT,
387 	    "devid_discovery: ddi_hold_installed_driver %d\n", major));
388 
389 	if (ddi_hold_installed_driver(major) == NULL)
390 		return;
391 
392 	ddi_rele_driver(major);
393 }
394 
395 /* legacy support - see below */
396 static char *e_ddi_devid_hold_driver_list[] = { "sd", "ssd" };
397 
398 #define	N_DRIVERS_TO_HOLD	\
399 	(sizeof (e_ddi_devid_hold_driver_list) / sizeof (char *))
400 
401 static void
e_ddi_devid_hold_installed_driver(ddi_devid_t devid)402 e_ddi_devid_hold_installed_driver(ddi_devid_t devid)
403 {
404 	impl_devid_t	*id = (impl_devid_t *)devid;
405 	major_t		major, hint_major;
406 	char		hint[DEVID_HINT_SIZE + 1];
407 	struct devnames	*dnp;
408 	char		**drvp;
409 	int		i;
410 
411 	/* Count non-null bytes */
412 	for (i = 0; i < DEVID_HINT_SIZE; i++)
413 		if (id->did_driver[i] == '\0')
414 			break;
415 
416 	/* Make a copy of the driver hint */
417 	bcopy(id->did_driver, hint, i);
418 	hint[i] = '\0';
419 
420 	/* search for the devid using the hint driver */
421 	hint_major = ddi_name_to_major(hint);
422 	if (hint_major != DDI_MAJOR_T_NONE) {
423 		e_ddi_devid_hold_by_major(hint_major);
424 	}
425 
426 	/*
427 	 * search for the devid with each driver declaring
428 	 * itself as a devid registrant.
429 	 */
430 	for (major = 0; major < devcnt; major++) {
431 		if (major == hint_major)
432 			continue;
433 		dnp = &devnamesp[major];
434 		if (dnp->dn_flags & DN_DEVID_REGISTRANT) {
435 			e_ddi_devid_hold_by_major(major);
436 		}
437 	}
438 
439 	/*
440 	 * Legacy support: may be removed once an upgrade mechanism
441 	 * for driver conf files is available.
442 	 */
443 	drvp = e_ddi_devid_hold_driver_list;
444 	for (i = 0; i < N_DRIVERS_TO_HOLD; i++, drvp++) {
445 		major = ddi_name_to_major(*drvp);
446 		if (major != DDI_MAJOR_T_NONE && major != hint_major) {
447 			e_ddi_devid_hold_by_major(major);
448 		}
449 	}
450 }
451 
452 /*
453  * Return success if discovery was attempted, to indicate
454  * that the desired device may now be available.
455  */
456 int
e_ddi_devid_discovery(ddi_devid_t devid)457 e_ddi_devid_discovery(ddi_devid_t devid)
458 {
459 	int flags;
460 	int rval = DDI_SUCCESS;
461 
462 	mutex_enter(&devid_discovery_mutex);
463 
464 	if (devid_discovery_busy) {
465 		DEVID_LOG_DISC((CE_CONT, "devid_discovery: busy\n"));
466 		while (devid_discovery_busy) {
467 			cv_wait(&devid_discovery_cv, &devid_discovery_mutex);
468 		}
469 	} else if (e_devid_do_discovery()) {
470 		devid_discovery_busy = 1;
471 		mutex_exit(&devid_discovery_mutex);
472 
473 		if (i_ddi_io_initialized() == 0) {
474 			e_ddi_devid_hold_installed_driver(devid);
475 		} else {
476 			DEVID_LOG_DISC((CE_CONT,
477 			    "devid_discovery: ndi_devi_config\n"));
478 			flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT;
479 			if (i_ddi_io_initialized())
480 				flags |= NDI_DRV_CONF_REPROBE;
481 			(void) ndi_devi_config(ddi_root_node(), flags);
482 		}
483 
484 		mutex_enter(&devid_discovery_mutex);
485 		devid_discovery_busy = 0;
486 		cv_broadcast(&devid_discovery_cv);
487 		if (devid_discovery_secs > 0)
488 			devid_last_discovery = ddi_get_lbolt();
489 		DEVID_LOG_DISC((CE_CONT, "devid_discovery: done\n"));
490 	} else {
491 		rval = DDI_FAILURE;
492 		DEVID_LOG_DISC((CE_CONT, "no devid discovery\n"));
493 	}
494 
495 	mutex_exit(&devid_discovery_mutex);
496 
497 	return (rval);
498 }
499 
500 /*
501  * As part of registering a devid for a device,
502  * update the devid cache with this device/devid pair
503  * or note that this combination has registered.
504  *
505  * If a devpath is provided it will be used as the path to register the
506  * devid against, otherwise we use ddi_pathname(dip).  In both cases
507  * we duplicate the path string so that it can be cached/freed indepdently
508  * of the original owner.
509  */
510 static int
e_devid_cache_register_cmn(dev_info_t * dip,ddi_devid_t devid,char * devpath)511 e_devid_cache_register_cmn(dev_info_t *dip, ddi_devid_t devid, char *devpath)
512 {
513 	nvp_devid_t *np;
514 	nvp_devid_t *new_nvp;
515 	ddi_devid_t new_devid;
516 	int new_devid_size;
517 	char *path, *fullpath;
518 	ddi_devid_t free_devid = NULL;
519 	int pathlen;
520 	list_t *listp;
521 	int is_dirty = 0;
522 
523 
524 	ASSERT(ddi_devid_valid(devid) == DDI_SUCCESS);
525 
526 	if (devpath) {
527 		pathlen = strlen(devpath) + 1;
528 		path = kmem_alloc(pathlen, KM_SLEEP);
529 		bcopy(devpath, path, pathlen);
530 	} else {
531 		/*
532 		 * We are willing to accept DS_BOUND nodes if we can form a full
533 		 * ddi_pathname (i.e. the node is part way to becomming
534 		 * DS_INITIALIZED and devi_addr/ddi_get_name_addr are non-NULL).
535 		 */
536 		if (ddi_get_name_addr(dip) == NULL)
537 			return (DDI_FAILURE);
538 
539 		fullpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
540 		(void) ddi_pathname(dip, fullpath);
541 		pathlen = strlen(fullpath) + 1;
542 		path = kmem_alloc(pathlen, KM_SLEEP);
543 		bcopy(fullpath, path, pathlen);
544 		kmem_free(fullpath, MAXPATHLEN);
545 	}
546 
547 	DEVID_LOG_REG(("register", devid, path));
548 
549 	new_nvp = kmem_zalloc(sizeof (nvp_devid_t), KM_SLEEP);
550 	new_devid_size = ddi_devid_sizeof(devid);
551 	new_devid = kmem_alloc(new_devid_size, KM_SLEEP);
552 	(void) bcopy(devid, new_devid, new_devid_size);
553 
554 	rw_enter(nvf_lock(dcfd_handle), RW_WRITER);
555 
556 	listp = nvf_list(dcfd_handle);
557 	for (np = list_head(listp); np; np = list_next(listp, np)) {
558 		if (strcmp(path, np->nvp_devpath) == 0) {
559 			DEVID_DEBUG2((CE_CONT,
560 			    "register: %s path match\n", path));
561 			if (np->nvp_devid == NULL) {
562 replace:			np->nvp_devid = new_devid;
563 				np->nvp_flags |=
564 				    NVP_DEVID_DIP | NVP_DEVID_REGISTERED;
565 				np->nvp_dip = dip;
566 				if (!devid_cache_write_disable) {
567 					nvf_mark_dirty(dcfd_handle);
568 					is_dirty = 1;
569 				}
570 				rw_exit(nvf_lock(dcfd_handle));
571 				kmem_free(new_nvp, sizeof (nvp_devid_t));
572 				kmem_free(path, pathlen);
573 				goto exit;
574 			}
575 			if (ddi_devid_valid(np->nvp_devid) != DDI_SUCCESS) {
576 				/* replace invalid devid */
577 				free_devid = np->nvp_devid;
578 				goto replace;
579 			}
580 			/*
581 			 * We're registering an already-cached path
582 			 * Does the device's devid match the cache?
583 			 */
584 			if (ddi_devid_compare(devid, np->nvp_devid) != 0) {
585 				DEVID_DEBUG((CE_CONT, "devid register: "
586 				    "devid %s does not match\n", path));
587 				/*
588 				 * We do not expect devids to change, log it.
589 				 */
590 				char *devid_stored =
591 				    ddi_devid_str_encode(np->nvp_devid, NULL);
592 				char *devid_new =
593 				    ddi_devid_str_encode(devid, NULL);
594 
595 				cmn_err(CE_CONT, "devid register: devid for "
596 				    "%s does not match. stored: %s, new: %s.",
597 				    path, devid_stored, devid_new);
598 
599 				ddi_devid_str_free(devid_stored);
600 				ddi_devid_str_free(devid_new);
601 
602 				/*
603 				 * Replace cached devid for this path
604 				 * with newly registered devid.  A devid
605 				 * may map to multiple paths but one path
606 				 * should only map to one devid.
607 				 */
608 				devid_nvp_unlink_and_free(dcfd_handle, np);
609 				np = NULL;
610 				break;
611 			} else {
612 				DEVID_DEBUG2((CE_CONT,
613 				    "devid register: %s devid match\n", path));
614 				np->nvp_flags |=
615 				    NVP_DEVID_DIP | NVP_DEVID_REGISTERED;
616 				np->nvp_dip = dip;
617 				rw_exit(nvf_lock(dcfd_handle));
618 				kmem_free(new_nvp, sizeof (nvp_devid_t));
619 				kmem_free(path, pathlen);
620 				kmem_free(new_devid, new_devid_size);
621 				return (DDI_SUCCESS);
622 			}
623 		}
624 	}
625 
626 	/*
627 	 * Add newly registered devid to the cache
628 	 */
629 	ASSERT(np == NULL);
630 
631 	new_nvp->nvp_devpath = path;
632 	new_nvp->nvp_flags = NVP_DEVID_DIP | NVP_DEVID_REGISTERED;
633 	new_nvp->nvp_dip = dip;
634 	new_nvp->nvp_devid = new_devid;
635 
636 	if (!devid_cache_write_disable) {
637 		is_dirty = 1;
638 		nvf_mark_dirty(dcfd_handle);
639 	}
640 	list_insert_tail(nvf_list(dcfd_handle), new_nvp);
641 
642 	rw_exit(nvf_lock(dcfd_handle));
643 
644 exit:
645 	if (free_devid)
646 		kmem_free(free_devid, ddi_devid_sizeof(free_devid));
647 
648 	if (is_dirty)
649 		nvf_wake_daemon();
650 
651 	return (DDI_SUCCESS);
652 }
653 
654 int
e_devid_cache_register(dev_info_t * dip,ddi_devid_t devid)655 e_devid_cache_register(dev_info_t *dip, ddi_devid_t devid)
656 {
657 	return (e_devid_cache_register_cmn(dip, devid, NULL));
658 }
659 
660 /*
661  * Unregister a device's devid; the devinfo may hit on multiple entries
662  * arising from both pHCI and vHCI paths.
663  * Called as an instance detachs.
664  * Invalidate the devid's devinfo reference.
665  * Devid-path remains in the cache.
666  */
667 
668 void
e_devid_cache_unregister(dev_info_t * dip)669 e_devid_cache_unregister(dev_info_t *dip)
670 {
671 	nvp_devid_t *np;
672 	list_t *listp;
673 
674 	rw_enter(nvf_lock(dcfd_handle), RW_WRITER);
675 
676 	listp = nvf_list(dcfd_handle);
677 	for (np = list_head(listp); np; np = list_next(listp, np)) {
678 		if (np->nvp_devid == NULL)
679 			continue;
680 		if ((np->nvp_flags & NVP_DEVID_DIP) && np->nvp_dip == dip) {
681 			DEVID_LOG_UNREG((CE_CONT,
682 			    "unregister: %s\n", np->nvp_devpath));
683 			np->nvp_flags &= ~NVP_DEVID_DIP;
684 			np->nvp_dip = NULL;
685 		}
686 	}
687 
688 	rw_exit(nvf_lock(dcfd_handle));
689 }
690 
691 int
e_devid_cache_pathinfo(mdi_pathinfo_t * pip,ddi_devid_t devid)692 e_devid_cache_pathinfo(mdi_pathinfo_t *pip, ddi_devid_t devid)
693 {
694 	char *path = mdi_pi_pathname(pip);
695 
696 	return (e_devid_cache_register_cmn(mdi_pi_get_client(pip), devid,
697 	    path));
698 }
699 
700 /*
701  * Purge devid cache of stale devids
702  */
703 void
devid_cache_cleanup(void)704 devid_cache_cleanup(void)
705 {
706 	nvp_devid_t *np, *next;
707 	list_t *listp;
708 	int is_dirty = 0;
709 
710 	rw_enter(nvf_lock(dcfd_handle), RW_WRITER);
711 
712 	listp = nvf_list(dcfd_handle);
713 	for (np = list_head(listp); np; np = next) {
714 		next = list_next(listp, np);
715 		if (np->nvp_devid == NULL)
716 			continue;
717 		if ((np->nvp_flags & NVP_DEVID_REGISTERED) == 0) {
718 			DEVID_LOG_REMOVE((CE_CONT,
719 			    "cleanup: %s\n", np->nvp_devpath));
720 			if (!devid_cache_write_disable) {
721 				nvf_mark_dirty(dcfd_handle);
722 				is_dirty = 0;
723 			}
724 			devid_nvp_unlink_and_free(dcfd_handle, np);
725 		}
726 	}
727 
728 	rw_exit(nvf_lock(dcfd_handle));
729 
730 	if (is_dirty)
731 		nvf_wake_daemon();
732 }
733 
734 
735 /*
736  * Build a list of dev_t's for a device/devid
737  *
738  * The effect of this function is cumulative, adding dev_t's
739  * for the device to the list of all dev_t's for a given
740  * devid.
741  */
742 static void
e_devid_minor_to_devlist(dev_info_t * dip,const char * minor_name,int ndevts_alloced,int * devtcntp,dev_t * devtsp)743 e_devid_minor_to_devlist(
744 	dev_info_t	*dip,
745 	const char	*minor_name,
746 	int		ndevts_alloced,
747 	int		*devtcntp,
748 	dev_t		*devtsp)
749 {
750 	struct ddi_minor_data	*dmdp;
751 	int			minor_all = 0;
752 	int			ndevts = *devtcntp;
753 
754 	ASSERT(i_ddi_devi_attached(dip));
755 
756 	/* are we looking for a set of minor nodes? */
757 	if ((minor_name == DEVID_MINOR_NAME_ALL) ||
758 	    (minor_name == DEVID_MINOR_NAME_ALL_CHR) ||
759 	    (minor_name == DEVID_MINOR_NAME_ALL_BLK))
760 		minor_all = 1;
761 
762 	/* Find matching minor names */
763 	ndi_devi_enter(dip);
764 	for (dmdp = DEVI(dip)->devi_minor; dmdp; dmdp = dmdp->next) {
765 
766 		/* Skip non-minors, and non matching minor names */
767 		if ((dmdp->type != DDM_MINOR) || ((minor_all == 0) &&
768 		    strcmp(dmdp->ddm_name, minor_name)))
769 			continue;
770 
771 		/* filter out minor_all mismatches */
772 		if (minor_all &&
773 		    (((minor_name == DEVID_MINOR_NAME_ALL_CHR) &&
774 		    (dmdp->ddm_spec_type != S_IFCHR)) ||
775 		    ((minor_name == DEVID_MINOR_NAME_ALL_BLK) &&
776 		    (dmdp->ddm_spec_type != S_IFBLK))))
777 			continue;
778 
779 		if (ndevts < ndevts_alloced)
780 			devtsp[ndevts] = dmdp->ddm_dev;
781 		ndevts++;
782 	}
783 	ndi_devi_exit(dip);
784 
785 	*devtcntp = ndevts;
786 }
787 
788 /*
789  * Search for cached entries matching a devid
790  * Return two lists:
791  *	a list of dev_info nodes, for those devices in the attached state
792  *	a list of pathnames whose instances registered the given devid
793  * If the lists passed in are not sufficient to return the matching
794  * references, return the size of lists required.
795  * The dev_info nodes are returned with a hold that the caller must release.
796  */
797 static int
e_devid_cache_devi_path_lists(ddi_devid_t devid,int retmax,int * retndevis,dev_info_t ** retdevis,int * retnpaths,char ** retpaths)798 e_devid_cache_devi_path_lists(ddi_devid_t devid, int retmax,
799     int *retndevis, dev_info_t **retdevis, int *retnpaths, char **retpaths)
800 {
801 	nvp_devid_t *np;
802 	int ndevis, npaths;
803 	dev_info_t *dip, *pdip;
804 	int maxdevis = 0;
805 	int maxpaths = 0;
806 	list_t *listp;
807 
808 	ndevis = 0;
809 	npaths = 0;
810 	listp = nvf_list(dcfd_handle);
811 	for (np = list_head(listp); np; np = list_next(listp, np)) {
812 		if (np->nvp_devid == NULL)
813 			continue;
814 		if (ddi_devid_valid(np->nvp_devid) != DDI_SUCCESS) {
815 			DEVIDERR((CE_CONT,
816 			    "find: invalid devid %s\n",
817 			    np->nvp_devpath));
818 			continue;
819 		}
820 		if (ddi_devid_compare(devid, np->nvp_devid) == 0) {
821 			DEVID_DEBUG2((CE_CONT,
822 			    "find: devid match: %s 0x%x\n",
823 			    np->nvp_devpath, np->nvp_flags));
824 			DEVID_LOG_MATCH(("find", devid, np->nvp_devpath));
825 			DEVID_LOG_PATHS((CE_CONT, "%s\n", np->nvp_devpath));
826 
827 			/*
828 			 * Check if we have a cached devinfo reference for this
829 			 * devid.  Place a hold on it to prevent detach
830 			 * Otherwise, use the path instead.
831 			 * Note: returns with a hold on each dev_info
832 			 * node in the list.
833 			 */
834 			dip = NULL;
835 			if (np->nvp_flags & NVP_DEVID_DIP) {
836 				pdip = ddi_get_parent(np->nvp_dip);
837 				if (ndi_devi_tryenter(pdip)) {
838 					dip = np->nvp_dip;
839 					ndi_hold_devi(dip);
840 					ndi_devi_exit(pdip);
841 					ASSERT(!DEVI_IS_ATTACHING(dip));
842 					ASSERT(!DEVI_IS_DETACHING(dip));
843 				} else {
844 					DEVID_LOG_DETACH((CE_CONT,
845 					    "may be detaching: %s\n",
846 					    np->nvp_devpath));
847 				}
848 			}
849 
850 			if (dip) {
851 				if (ndevis < retmax) {
852 					retdevis[ndevis++] = dip;
853 				} else {
854 					ndi_rele_devi(dip);
855 				}
856 				maxdevis++;
857 			} else {
858 				if (npaths < retmax)
859 					retpaths[npaths++] = np->nvp_devpath;
860 				maxpaths++;
861 			}
862 		}
863 	}
864 
865 	*retndevis = ndevis;
866 	*retnpaths = npaths;
867 	return (maxdevis > maxpaths ? maxdevis : maxpaths);
868 }
869 
870 
871 /*
872  * Search the devid cache, returning dev_t list for all
873  * device paths mapping to the device identified by the
874  * given devid.
875  *
876  * Primary interface used by ddi_lyr_devid_to_devlist()
877  */
878 int
e_devid_cache_to_devt_list(ddi_devid_t devid,const char * minor_name,int * retndevts,dev_t ** retdevts)879 e_devid_cache_to_devt_list(ddi_devid_t devid, const char *minor_name,
880     int *retndevts, dev_t **retdevts)
881 {
882 	char		*path, **paths;
883 	int		i, j, n;
884 	dev_t		*devts, *udevts;
885 	dev_t		tdevt;
886 	int		ndevts, undevts, ndevts_alloced;
887 	dev_info_t	*devi, **devis;
888 	int		ndevis, npaths, nalloced;
889 	ddi_devid_t	match_devid;
890 
891 	DEVID_LOG_FIND(("find", devid, NULL));
892 
893 	ASSERT(ddi_devid_valid(devid) == DDI_SUCCESS);
894 	if (ddi_devid_valid(devid) != DDI_SUCCESS) {
895 		DEVID_LOG_ERR(("invalid devid", devid, NULL));
896 		return (DDI_FAILURE);
897 	}
898 
899 	nalloced = 128;
900 
901 	for (;;) {
902 		paths = kmem_zalloc(nalloced * sizeof (char *), KM_SLEEP);
903 		devis = kmem_zalloc(nalloced * sizeof (dev_info_t *), KM_SLEEP);
904 
905 		rw_enter(nvf_lock(dcfd_handle), RW_READER);
906 		n = e_devid_cache_devi_path_lists(devid, nalloced,
907 		    &ndevis, devis, &npaths, paths);
908 		if (n <= nalloced)
909 			break;
910 		rw_exit(nvf_lock(dcfd_handle));
911 		for (i = 0; i < ndevis; i++)
912 			ndi_rele_devi(devis[i]);
913 		kmem_free(paths, nalloced * sizeof (char *));
914 		kmem_free(devis, nalloced * sizeof (dev_info_t *));
915 		nalloced = n + 128;
916 	}
917 
918 	for (i = 0; i < npaths; i++) {
919 		path = i_ddi_strdup(paths[i], KM_SLEEP);
920 		paths[i] = path;
921 	}
922 	rw_exit(nvf_lock(dcfd_handle));
923 
924 	if (ndevis == 0 && npaths == 0) {
925 		DEVID_LOG_ERR(("no devid found", devid, NULL));
926 		kmem_free(paths, nalloced * sizeof (char *));
927 		kmem_free(devis, nalloced * sizeof (dev_info_t *));
928 		return (DDI_FAILURE);
929 	}
930 
931 	ndevts_alloced = 128;
932 restart:
933 	ndevts = 0;
934 	devts = kmem_alloc(ndevts_alloced * sizeof (dev_t), KM_SLEEP);
935 	for (i = 0; i < ndevis; i++) {
936 		ASSERT(!DEVI_IS_ATTACHING(devis[i]));
937 		ASSERT(!DEVI_IS_DETACHING(devis[i]));
938 		e_devid_minor_to_devlist(devis[i], minor_name,
939 		    ndevts_alloced, &ndevts, devts);
940 		if (ndevts > ndevts_alloced) {
941 			kmem_free(devts, ndevts_alloced * sizeof (dev_t));
942 			ndevts_alloced += 128;
943 			goto restart;
944 		}
945 	}
946 	for (i = 0; i < npaths; i++) {
947 		DEVID_LOG_LOOKUP((CE_CONT, "lookup %s\n", paths[i]));
948 		devi = e_ddi_hold_devi_by_path(paths[i], 0);
949 		if (devi == NULL) {
950 			DEVID_LOG_STALE(("stale device reference",
951 			    devid, paths[i]));
952 			continue;
953 		}
954 		/*
955 		 * Verify the newly attached device registered a matching devid
956 		 */
957 		if (i_ddi_devi_get_devid(DDI_DEV_T_ANY, devi,
958 		    &match_devid) != DDI_SUCCESS) {
959 			DEVIDERR((CE_CONT,
960 			    "%s: no devid registered on attach\n",
961 			    paths[i]));
962 			ddi_release_devi(devi);
963 			continue;
964 		}
965 
966 		if (ddi_devid_compare(devid, match_devid) != 0) {
967 			DEVID_LOG_STALE(("new devid registered",
968 			    devid, paths[i]));
969 			ddi_release_devi(devi);
970 			ddi_devid_free(match_devid);
971 			continue;
972 		}
973 		ddi_devid_free(match_devid);
974 
975 		e_devid_minor_to_devlist(devi, minor_name,
976 		    ndevts_alloced, &ndevts, devts);
977 		ddi_release_devi(devi);
978 		if (ndevts > ndevts_alloced) {
979 			kmem_free(devts,
980 			    ndevts_alloced * sizeof (dev_t));
981 			ndevts_alloced += 128;
982 			goto restart;
983 		}
984 	}
985 
986 	/* drop hold from e_devid_cache_devi_path_lists */
987 	for (i = 0; i < ndevis; i++) {
988 		ndi_rele_devi(devis[i]);
989 	}
990 	for (i = 0; i < npaths; i++) {
991 		kmem_free(paths[i], strlen(paths[i]) + 1);
992 	}
993 	kmem_free(paths, nalloced * sizeof (char *));
994 	kmem_free(devis, nalloced * sizeof (dev_info_t *));
995 
996 	if (ndevts == 0) {
997 		DEVID_LOG_ERR(("no devid found", devid, NULL));
998 		kmem_free(devts, ndevts_alloced * sizeof (dev_t));
999 		return (DDI_FAILURE);
1000 	}
1001 
1002 	/*
1003 	 * Build the final list of sorted dev_t's with duplicates collapsed so
1004 	 * returned results are consistent. This prevents implementation
1005 	 * artifacts from causing unnecessary changes in SVM namespace.
1006 	 */
1007 	/* bubble sort */
1008 	for (i = 0; i < (ndevts - 1); i++) {
1009 		for (j = 0; j < ((ndevts - 1) - i); j++) {
1010 			if (devts[j + 1] < devts[j]) {
1011 				tdevt = devts[j];
1012 				devts[j] = devts[j + 1];
1013 				devts[j + 1] = tdevt;
1014 			}
1015 		}
1016 	}
1017 
1018 	/* determine number of unique values */
1019 	for (undevts = ndevts, i = 1; i < ndevts; i++) {
1020 		if (devts[i - 1] == devts[i])
1021 			undevts--;
1022 	}
1023 
1024 	/* allocate unique */
1025 	udevts = kmem_alloc(undevts * sizeof (dev_t), KM_SLEEP);
1026 
1027 	/* copy unique */
1028 	udevts[0] = devts[0];
1029 	for (i = 1, j = 1; i < ndevts; i++) {
1030 		if (devts[i - 1] != devts[i])
1031 			udevts[j++] = devts[i];
1032 	}
1033 	ASSERT(j == undevts);
1034 
1035 	kmem_free(devts, ndevts_alloced * sizeof (dev_t));
1036 
1037 	*retndevts = undevts;
1038 	*retdevts = udevts;
1039 
1040 	return (DDI_SUCCESS);
1041 }
1042 
1043 void
e_devid_cache_free_devt_list(int ndevts,dev_t * devt_list)1044 e_devid_cache_free_devt_list(int ndevts, dev_t *devt_list)
1045 {
1046 	kmem_free(devt_list, ndevts * sizeof (dev_t *));
1047 }
1048 
1049 /*
1050  * If given a full path and NULL ua, search for a cache entry
1051  * whose path matches the full path.  On a cache hit duplicate the
1052  * devid of the matched entry into the given devid (caller
1053  * must free);  nodenamebuf is not touched for this usage.
1054  *
1055  * Given a path and a non-NULL unit address, search the cache for any entry
1056  * matching "<path>/%@<unit-address>" where '%' is a wildcard meaning
1057  * any node name.  The path should not end a '/'.  On a cache hit
1058  * duplicate the devid as before (caller must free) and copy into
1059  * the caller-provided nodenamebuf (if not NULL) the nodename of the
1060  * matched entry.
1061  *
1062  * We must not make use of nvp_dip since that may be NULL for cached
1063  * entries that are not present in the current tree.
1064  */
1065 int
e_devid_cache_path_to_devid(char * path,char * ua,char * nodenamebuf,ddi_devid_t * devidp)1066 e_devid_cache_path_to_devid(char *path, char *ua,
1067     char *nodenamebuf, ddi_devid_t *devidp)
1068 {
1069 	size_t pathlen, ualen;
1070 	int rv = DDI_FAILURE;
1071 	nvp_devid_t *np;
1072 	list_t *listp;
1073 	char *cand;
1074 
1075 	if (path == NULL || *path == '\0' || (ua && *ua == '\0') ||
1076 	    devidp == NULL)
1077 		return (DDI_FAILURE);
1078 
1079 	*devidp = NULL;
1080 
1081 	pathlen = 0;
1082 	ualen = 0;
1083 	if (ua) {
1084 		pathlen = strlen(path);
1085 		ualen = strlen(ua);
1086 	}
1087 
1088 	rw_enter(nvf_lock(dcfd_handle), RW_READER);
1089 
1090 	listp = nvf_list(dcfd_handle);
1091 	for (np = list_head(listp); np; np = list_next(listp, np)) {
1092 		size_t nodelen, candlen, n;
1093 		ddi_devid_t devid_dup;
1094 		char *uasep, *node;
1095 
1096 		if (np->nvp_devid == NULL)
1097 			continue;
1098 
1099 		if (ddi_devid_valid(np->nvp_devid) != DDI_SUCCESS) {
1100 			DEVIDERR((CE_CONT,
1101 			    "pathsearch: invalid devid %s\n",
1102 			    np->nvp_devpath));
1103 			continue;
1104 		}
1105 
1106 		cand = np->nvp_devpath;		/* candidate path */
1107 
1108 		/* If a full pathname was provided the compare is easy */
1109 		if (ua == NULL) {
1110 			if (strcmp(cand, path) == 0)
1111 				goto match;
1112 			else
1113 				continue;
1114 		}
1115 
1116 		/*
1117 		 * The compare for initial path plus ua and unknown nodename
1118 		 * is trickier.
1119 		 *
1120 		 * Does the initial path component match 'path'?
1121 		 */
1122 		if (strncmp(path, cand, pathlen) != 0)
1123 			continue;
1124 
1125 		candlen = strlen(cand);
1126 
1127 		/*
1128 		 * The next character must be a '/' and there must be no
1129 		 * further '/' thereafter.  Begin by checking that the
1130 		 * candidate is long enough to include at mininum a
1131 		 * "/<nodename>@<ua>" after the initial portion already
1132 		 * matched assuming a nodename length of 1.
1133 		 */
1134 		if (candlen < pathlen + 1 + 1 + 1 + ualen ||
1135 		    cand[pathlen] != '/' ||
1136 		    strchr(cand + pathlen + 1, '/') != NULL)
1137 			continue;
1138 
1139 		node = cand + pathlen + 1;	/* <node>@<ua> string */
1140 
1141 		/*
1142 		 * Find the '@' before the unit address.  Check for
1143 		 * unit address match.
1144 		 */
1145 		if ((uasep = strchr(node, '@')) == NULL)
1146 			continue;
1147 
1148 		/*
1149 		 * Check we still have enough length and that ua matches
1150 		 */
1151 		nodelen = (uintptr_t)uasep - (uintptr_t)node;
1152 		if (candlen < pathlen + 1 + nodelen + 1 + ualen ||
1153 		    strncmp(ua, uasep + 1, ualen) != 0)
1154 			continue;
1155 match:
1156 		n = ddi_devid_sizeof(np->nvp_devid);
1157 		devid_dup = kmem_alloc(n, KM_SLEEP);	/* caller must free */
1158 		(void) bcopy(np->nvp_devid, devid_dup, n);
1159 		*devidp = devid_dup;
1160 
1161 		if (ua && nodenamebuf) {
1162 			(void) strncpy(nodenamebuf, node, nodelen);
1163 			nodenamebuf[nodelen] = '\0';
1164 		}
1165 
1166 		rv = DDI_SUCCESS;
1167 		break;
1168 	}
1169 
1170 	rw_exit(nvf_lock(dcfd_handle));
1171 
1172 	return (rv);
1173 }
1174 
1175 #ifdef	DEBUG
1176 static void
devid_log(char * fmt,ddi_devid_t devid,char * path)1177 devid_log(char *fmt, ddi_devid_t devid, char *path)
1178 {
1179 	char *devidstr = ddi_devid_str_encode(devid, NULL);
1180 	if (path) {
1181 		cmn_err(CE_CONT, "%s: %s %s\n", fmt, path, devidstr);
1182 	} else {
1183 		cmn_err(CE_CONT, "%s: %s\n", fmt, devidstr);
1184 	}
1185 	ddi_devid_str_free(devidstr);
1186 }
1187 #endif	/* DEBUG */
1188