xref: /illumos-gate/usr/src/uts/common/os/devcfg.c (revision 3ce53722)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
24  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
25  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
26  * Copyright (c) 2016 by Delphix. All rights reserved.
27  * Copyright 2020 Joshua M. Clulow <josh@sysmgr.org>
28  */
29 
30 #include <sys/note.h>
31 #include <sys/t_lock.h>
32 #include <sys/cmn_err.h>
33 #include <sys/instance.h>
34 #include <sys/conf.h>
35 #include <sys/stat.h>
36 #include <sys/ddi.h>
37 #include <sys/hwconf.h>
38 #include <sys/sunddi.h>
39 #include <sys/sunndi.h>
40 #include <sys/ddi_impldefs.h>
41 #include <sys/ndi_impldefs.h>
42 #include <sys/modctl.h>
43 #include <sys/contract/device_impl.h>
44 #include <sys/dacf.h>
45 #include <sys/promif.h>
46 #include <sys/pci.h>
47 #include <sys/cpuvar.h>
48 #include <sys/pathname.h>
49 #include <sys/taskq.h>
50 #include <sys/sysevent.h>
51 #include <sys/sunmdi.h>
52 #include <sys/stream.h>
53 #include <sys/strsubr.h>
54 #include <sys/fs/snode.h>
55 #include <sys/fs/dv_node.h>
56 #include <sys/reboot.h>
57 #include <sys/sysmacros.h>
58 #include <sys/systm.h>
59 #include <sys/fs/sdev_impl.h>
60 #include <sys/sunldi.h>
61 #include <sys/sunldi_impl.h>
62 #include <sys/bootprops.h>
63 #include <sys/varargs.h>
64 #include <sys/modhash.h>
65 #include <sys/instance.h>
66 #include <sys/sysevent/eventdefs.h>
67 
68 #if defined(__amd64) && !defined(__xpv)
69 #include <sys/iommulib.h>
70 #endif
71 
72 #ifdef DEBUG
73 int ddidebug = DDI_AUDIT;
74 #else
75 int ddidebug = 0;
76 #endif
77 
78 #define	MT_CONFIG_OP	0
79 #define	MT_UNCONFIG_OP	1
80 
81 /* Multi-threaded configuration */
82 struct mt_config_handle {
83 	kmutex_t mtc_lock;
84 	kcondvar_t mtc_cv;
85 	int mtc_thr_count;
86 	dev_info_t *mtc_pdip;	/* parent dip for mt_config_children */
87 	dev_info_t **mtc_fdip;	/* "a" dip where unconfigure failed */
88 	major_t mtc_parmajor;	/* parent major for mt_config_driver */
89 	major_t mtc_major;
90 	int mtc_flags;
91 	int mtc_op;		/* config or unconfig */
92 	int mtc_error;		/* operation error */
93 	struct brevq_node **mtc_brevqp;	/* outstanding branch events queue */
94 #ifdef DEBUG
95 	int total_time;
96 	timestruc_t start_time;
97 #endif /* DEBUG */
98 };
99 
100 struct devi_nodeid {
101 	pnode_t nodeid;
102 	dev_info_t *dip;
103 	struct devi_nodeid *next;
104 };
105 
106 struct devi_nodeid_list {
107 	kmutex_t dno_lock;		/* Protects other fields */
108 	struct devi_nodeid *dno_head;	/* list of devi nodeid elements */
109 	struct devi_nodeid *dno_free;	/* Free list */
110 	uint_t dno_list_length;		/* number of dips in list */
111 };
112 
113 /* used to keep track of branch remove events to be generated */
114 struct brevq_node {
115 	char *brn_deviname;
116 	struct brevq_node *brn_sibling;
117 	struct brevq_node *brn_child;
118 };
119 
120 static struct devi_nodeid_list devi_nodeid_list;
121 static struct devi_nodeid_list *devimap = &devi_nodeid_list;
122 
123 /*
124  * Well known nodes which are attached first at boot time.
125  */
126 dev_info_t *top_devinfo;		/* root of device tree */
127 dev_info_t *options_dip;
128 dev_info_t *pseudo_dip;
129 dev_info_t *clone_dip;
130 dev_info_t *scsi_vhci_dip;		/* MPXIO dip */
131 major_t clone_major;
132 
133 /*
134  * A non-global zone's /dev is derived from the device tree.
135  * This generation number serves to indicate when a zone's
136  * /dev may need to be updated.
137  */
138 volatile ulong_t devtree_gen;		/* generation number */
139 
140 /* block all future dev_info state changes */
141 hrtime_t volatile devinfo_freeze = 0;
142 
143 /* number of dev_info attaches/detaches currently in progress */
144 static ulong_t devinfo_attach_detach = 0;
145 
146 extern int	sys_shutdown;
147 extern kmutex_t global_vhci_lock;
148 
149 /* bitset of DS_SYSAVAIL & DS_RECONFIG - no races, no lock */
150 static int devname_state = 0;
151 
152 /*
153  * The devinfo snapshot cache and related variables.
154  * The only field in the di_cache structure that needs initialization
155  * is the mutex (cache_lock). However, since this is an adaptive mutex
156  * (MUTEX_DEFAULT) - it is automatically initialized by being allocated
157  * in zeroed memory (static storage class). Therefore no explicit
158  * initialization of the di_cache structure is needed.
159  */
160 struct di_cache	di_cache = {1};
161 int		di_cache_debug = 0;
162 
163 /* For ddvis, which needs pseudo children under PCI */
164 int pci_allow_pseudo_children = 0;
165 
166 /* Allow path-oriented alias driver binding on driver.conf enumerated nodes */
167 int driver_conf_allow_path_alias = 1;
168 
169 /*
170  * The following switch is for service people, in case a
171  * 3rd party driver depends on identify(9e) being called.
172  */
173 int identify_9e = 0;
174 
175 /*
176  * Add flag so behaviour of preventing attach for retired persistant nodes
177  * can be disabled.
178  */
179 int retire_prevents_attach = 1;
180 
181 int mtc_off;					/* turn off mt config */
182 
183 int quiesce_debug = 0;
184 
185 boolean_t ddi_aliases_present = B_FALSE;
186 ddi_alias_t ddi_aliases;
187 uint_t tsd_ddi_redirect;
188 
189 #define	DDI_ALIAS_HASH_SIZE	(2700)
190 
191 static kmem_cache_t *ddi_node_cache;		/* devinfo node cache */
192 static devinfo_log_header_t *devinfo_audit_log;	/* devinfo log */
193 static int devinfo_log_size;			/* size in pages */
194 
195 boolean_t ddi_err_panic = B_FALSE;
196 
197 static int lookup_compatible(dev_info_t *, uint_t);
198 static char *encode_composite_string(char **, uint_t, size_t *, uint_t);
199 static void link_to_driver_list(dev_info_t *);
200 static void unlink_from_driver_list(dev_info_t *);
201 static void add_to_dn_list(struct devnames *, dev_info_t *);
202 static void remove_from_dn_list(struct devnames *, dev_info_t *);
203 static dev_info_t *find_duplicate_child();
204 static void add_global_props(dev_info_t *);
205 static void remove_global_props(dev_info_t *);
206 static int uninit_node(dev_info_t *);
207 static void da_log_init(void);
208 static void da_log_enter(dev_info_t *);
209 static int walk_devs(dev_info_t *, int (*f)(dev_info_t *, void *), void *, int);
210 static int reset_nexus_flags(dev_info_t *, void *);
211 static void ddi_optimize_dtree(dev_info_t *);
212 static int is_leaf_node(dev_info_t *);
213 static struct mt_config_handle *mt_config_init(dev_info_t *, dev_info_t **,
214     int, major_t, int, struct brevq_node **);
215 static void mt_config_children(struct mt_config_handle *);
216 static void mt_config_driver(struct mt_config_handle *);
217 static int mt_config_fini(struct mt_config_handle *);
218 static int devi_unconfig_common(dev_info_t *, dev_info_t **, int, major_t,
219     struct brevq_node **);
220 static int
221 ndi_devi_config_obp_args(dev_info_t *parent, char *devnm,
222     dev_info_t **childp, int flags);
223 static void i_link_vhci_node(dev_info_t *);
224 static void ndi_devi_exit_and_wait(dev_info_t *dip,
225     int circular, clock_t end_time);
226 static int ndi_devi_unbind_driver(dev_info_t *dip);
227 
228 static int i_ddi_check_retire(dev_info_t *dip);
229 
230 static void quiesce_one_device(dev_info_t *, void *);
231 
232 dev_info_t *ddi_alias_redirect(char *alias);
233 char *ddi_curr_redirect(char *currpath);
234 
235 
236 /*
237  * dev_info cache and node management
238  */
239 
240 /* initialize dev_info node cache */
241 void
i_ddi_node_cache_init()242 i_ddi_node_cache_init()
243 {
244 	ASSERT(ddi_node_cache == NULL);
245 	ddi_node_cache = kmem_cache_create("dev_info_node_cache",
246 	    sizeof (struct dev_info), 0, NULL, NULL, NULL, NULL, NULL, 0);
247 
248 	if (ddidebug & DDI_AUDIT)
249 		da_log_init();
250 }
251 
252 
253 /*
254  * Allocating a dev_info node, callable from interrupt context with KM_NOSLEEP
255  * The allocated node has a reference count of 0.
256  */
257 dev_info_t *
i_ddi_alloc_node(dev_info_t * pdip,const char * node_name,pnode_t nodeid,int instance,ddi_prop_t * sys_prop,int flag)258 i_ddi_alloc_node(dev_info_t *pdip, const char *node_name, pnode_t nodeid,
259     int instance, ddi_prop_t *sys_prop, int flag)
260 {
261 	struct dev_info *devi;
262 	struct devi_nodeid *elem;
263 	static char failed[] = "i_ddi_alloc_node: out of memory";
264 
265 	ASSERT(node_name != NULL);
266 
267 	if ((devi = kmem_cache_alloc(ddi_node_cache, flag)) == NULL) {
268 		cmn_err(CE_NOTE, failed);
269 		return (NULL);
270 	}
271 
272 	bzero(devi, sizeof (struct dev_info));
273 
274 	if (devinfo_audit_log) {
275 		devi->devi_audit = kmem_zalloc(sizeof (devinfo_audit_t), flag);
276 		if (devi->devi_audit == NULL)
277 			goto fail;
278 	}
279 
280 	if ((devi->devi_node_name = i_ddi_strdup(node_name, flag)) == NULL)
281 		goto fail;
282 
283 	/* default binding name is node name */
284 	devi->devi_binding_name = devi->devi_node_name;
285 	devi->devi_major = DDI_MAJOR_T_NONE;	/* unbound by default */
286 
287 	/*
288 	 * Make a copy of system property
289 	 */
290 	if (sys_prop &&
291 	    (devi->devi_sys_prop_ptr = i_ddi_prop_list_dup(sys_prop, flag))
292 	    == NULL)
293 		goto fail;
294 
295 	/*
296 	 * Assign devi_nodeid, devi_node_class, devi_node_attributes
297 	 * according to the following algorithm:
298 	 *
299 	 * nodeid arg			node class		node attributes
300 	 *
301 	 * DEVI_PSEUDO_NODEID		DDI_NC_PSEUDO		A
302 	 * DEVI_SID_NODEID		DDI_NC_PSEUDO		A,P
303 	 * DEVI_SID_HIDDEN_NODEID	DDI_NC_PSEUDO		A,P,H
304 	 * DEVI_SID_HP_NODEID		DDI_NC_PSEUDO		A,P,h
305 	 * DEVI_SID_HP_HIDDEN_NODEID	DDI_NC_PSEUDO		A,P,H,h
306 	 * other			DDI_NC_PROM		P
307 	 *
308 	 * Where A = DDI_AUTO_ASSIGNED_NODEID (auto-assign a nodeid)
309 	 * and	 P = DDI_PERSISTENT
310 	 * and	 H = DDI_HIDDEN_NODE
311 	 * and	 h = DDI_HOTPLUG_NODE
312 	 *
313 	 * auto-assigned nodeids are also auto-freed.
314 	 */
315 	devi->devi_node_attributes = 0;
316 	elem = NULL;
317 	switch (nodeid) {
318 	case DEVI_SID_HIDDEN_NODEID:
319 		devi->devi_node_attributes |= DDI_HIDDEN_NODE;
320 		goto sid;
321 
322 	case DEVI_SID_HP_NODEID:
323 		devi->devi_node_attributes |= DDI_HOTPLUG_NODE;
324 		goto sid;
325 
326 	case DEVI_SID_HP_HIDDEN_NODEID:
327 		devi->devi_node_attributes |= DDI_HIDDEN_NODE;
328 		devi->devi_node_attributes |= DDI_HOTPLUG_NODE;
329 		goto sid;
330 
331 	case DEVI_SID_NODEID:
332 sid:		devi->devi_node_attributes |= DDI_PERSISTENT;
333 		if ((elem = kmem_zalloc(sizeof (*elem), flag)) == NULL)
334 			goto fail;
335 		/*FALLTHROUGH*/
336 
337 	case DEVI_PSEUDO_NODEID:
338 		devi->devi_node_attributes |= DDI_AUTO_ASSIGNED_NODEID;
339 		devi->devi_node_class = DDI_NC_PSEUDO;
340 		if (impl_ddi_alloc_nodeid(&devi->devi_nodeid)) {
341 			panic("i_ddi_alloc_node: out of nodeids");
342 			/*NOTREACHED*/
343 		}
344 		break;
345 
346 	default:
347 		if ((elem = kmem_zalloc(sizeof (*elem), flag)) == NULL)
348 			goto fail;
349 
350 		/*
351 		 * the nodetype is 'prom', try to 'take' the nodeid now.
352 		 * This requires memory allocation, so check for failure.
353 		 */
354 		if (impl_ddi_take_nodeid(nodeid, flag) != 0) {
355 			kmem_free(elem, sizeof (*elem));
356 			goto fail;
357 		}
358 
359 		devi->devi_nodeid = nodeid;
360 		devi->devi_node_class = DDI_NC_PROM;
361 		devi->devi_node_attributes = DDI_PERSISTENT;
362 		break;
363 	}
364 
365 	if (ndi_dev_is_persistent_node((dev_info_t *)devi)) {
366 		mutex_enter(&devimap->dno_lock);
367 		elem->next = devimap->dno_free;
368 		devimap->dno_free = elem;
369 		mutex_exit(&devimap->dno_lock);
370 	}
371 
372 	/*
373 	 * Instance is normally initialized to -1. In a few special
374 	 * cases, the caller may specify an instance (e.g. CPU nodes).
375 	 */
376 	devi->devi_instance = instance;
377 
378 	/*
379 	 * set parent and bus_ctl parent
380 	 */
381 	devi->devi_parent = DEVI(pdip);
382 	devi->devi_bus_ctl = DEVI(pdip);
383 
384 	NDI_CONFIG_DEBUG((CE_CONT,
385 	    "i_ddi_alloc_node: name=%s id=%d\n", node_name, devi->devi_nodeid));
386 
387 	cv_init(&(devi->devi_cv), NULL, CV_DEFAULT, NULL);
388 	mutex_init(&(devi->devi_lock), NULL, MUTEX_DEFAULT, NULL);
389 	mutex_init(&(devi->devi_pm_lock), NULL, MUTEX_DEFAULT, NULL);
390 	mutex_init(&(devi->devi_pm_busy_lock), NULL, MUTEX_DEFAULT, NULL);
391 
392 	RIO_TRACE((CE_NOTE, "i_ddi_alloc_node: Initing contract fields: "
393 	    "dip=%p, name=%s", (void *)devi, node_name));
394 
395 	mutex_init(&(devi->devi_ct_lock), NULL, MUTEX_DEFAULT, NULL);
396 	cv_init(&(devi->devi_ct_cv), NULL, CV_DEFAULT, NULL);
397 	devi->devi_ct_count = -1;	/* counter not in use if -1 */
398 	list_create(&(devi->devi_ct), sizeof (cont_device_t),
399 	    offsetof(cont_device_t, cond_next));
400 	list_create(&devi->devi_unbind_cbs, sizeof (ddi_unbind_callback_t),
401 	    offsetof(ddi_unbind_callback_t, ddiub_next));
402 	mutex_init(&devi->devi_unbind_lock, NULL, MUTEX_DEFAULT, NULL);
403 
404 	i_ddi_set_node_state((dev_info_t *)devi, DS_PROTO);
405 	da_log_enter((dev_info_t *)devi);
406 	return ((dev_info_t *)devi);
407 
408 fail:
409 	if (devi->devi_sys_prop_ptr)
410 		i_ddi_prop_list_delete(devi->devi_sys_prop_ptr);
411 	if (devi->devi_node_name)
412 		kmem_free(devi->devi_node_name, strlen(node_name) + 1);
413 	if (devi->devi_audit)
414 		kmem_free(devi->devi_audit, sizeof (devinfo_audit_t));
415 	kmem_cache_free(ddi_node_cache, devi);
416 	cmn_err(CE_NOTE, failed);
417 	return (NULL);
418 }
419 
420 /*
421  * free a dev_info structure.
422  * NB. Not callable from interrupt since impl_ddi_free_nodeid may block.
423  */
424 void
i_ddi_free_node(dev_info_t * dip)425 i_ddi_free_node(dev_info_t *dip)
426 {
427 	struct dev_info *devi = DEVI(dip);
428 	struct devi_nodeid *elem;
429 
430 	ASSERT(devi->devi_ref == 0);
431 	ASSERT(devi->devi_addr == NULL);
432 	ASSERT(devi->devi_node_state == DS_PROTO);
433 	ASSERT(devi->devi_child == NULL);
434 	ASSERT(devi->devi_hp_hdlp == NULL);
435 
436 	/* free devi_addr_buf allocated by ddi_set_name_addr() */
437 	if (devi->devi_addr_buf)
438 		kmem_free(devi->devi_addr_buf, 2 * MAXNAMELEN);
439 
440 	if (i_ndi_dev_is_auto_assigned_node(dip))
441 		impl_ddi_free_nodeid(DEVI(dip)->devi_nodeid);
442 
443 	if (ndi_dev_is_persistent_node(dip)) {
444 		mutex_enter(&devimap->dno_lock);
445 		ASSERT(devimap->dno_free);
446 		elem = devimap->dno_free;
447 		devimap->dno_free = elem->next;
448 		mutex_exit(&devimap->dno_lock);
449 		kmem_free(elem, sizeof (*elem));
450 	}
451 
452 	if (DEVI(dip)->devi_compat_names)
453 		kmem_free(DEVI(dip)->devi_compat_names,
454 		    DEVI(dip)->devi_compat_length);
455 	if (DEVI(dip)->devi_rebinding_name)
456 		kmem_free(DEVI(dip)->devi_rebinding_name,
457 		    strlen(DEVI(dip)->devi_rebinding_name) + 1);
458 
459 	ddi_prop_remove_all(dip);	/* remove driver properties */
460 	if (devi->devi_sys_prop_ptr)
461 		i_ddi_prop_list_delete(devi->devi_sys_prop_ptr);
462 	if (devi->devi_hw_prop_ptr)
463 		i_ddi_prop_list_delete(devi->devi_hw_prop_ptr);
464 
465 	if (DEVI(dip)->devi_devid_str)
466 		ddi_devid_str_free(DEVI(dip)->devi_devid_str);
467 
468 	i_ddi_set_node_state(dip, DS_INVAL);
469 	da_log_enter(dip);
470 	if (devi->devi_audit) {
471 		kmem_free(devi->devi_audit, sizeof (devinfo_audit_t));
472 	}
473 	if (devi->devi_device_class)
474 		kmem_free(devi->devi_device_class,
475 		    strlen(devi->devi_device_class) + 1);
476 	cv_destroy(&(devi->devi_cv));
477 	mutex_destroy(&(devi->devi_lock));
478 	mutex_destroy(&(devi->devi_pm_lock));
479 	mutex_destroy(&(devi->devi_pm_busy_lock));
480 
481 	RIO_TRACE((CE_NOTE, "i_ddi_free_node: destroying contract fields: "
482 	    "dip=%p", (void *)dip));
483 	contract_device_remove_dip(dip);
484 	ASSERT(devi->devi_ct_count == -1);
485 	ASSERT(list_is_empty(&(devi->devi_ct)));
486 	cv_destroy(&(devi->devi_ct_cv));
487 	list_destroy(&(devi->devi_ct));
488 	/* free this last since contract_device_remove_dip() uses it */
489 	mutex_destroy(&(devi->devi_ct_lock));
490 	RIO_TRACE((CE_NOTE, "i_ddi_free_node: destroyed all contract fields: "
491 	    "dip=%p, name=%s", (void *)dip, devi->devi_node_name));
492 
493 	kmem_free(devi->devi_node_name, strlen(devi->devi_node_name) + 1);
494 
495 	/* free event data */
496 	if (devi->devi_ev_path)
497 		kmem_free(devi->devi_ev_path, MAXPATHLEN);
498 
499 	mutex_destroy(&devi->devi_unbind_lock);
500 	list_destroy(&devi->devi_unbind_cbs);
501 
502 	kmem_cache_free(ddi_node_cache, devi);
503 }
504 
505 
506 /*
507  * Node state transitions
508  */
509 
510 /*
511  * Change the node name
512  */
513 int
ndi_devi_set_nodename(dev_info_t * dip,char * name,int flags)514 ndi_devi_set_nodename(dev_info_t *dip, char *name, int flags)
515 {
516 	_NOTE(ARGUNUSED(flags))
517 	char *nname, *oname;
518 
519 	ASSERT(dip && name);
520 
521 	oname = DEVI(dip)->devi_node_name;
522 	if (strcmp(oname, name) == 0)
523 		return (DDI_SUCCESS);
524 
525 	/*
526 	 * pcicfg_fix_ethernet requires a name change after node
527 	 * is linked into the tree. When pcicfg is fixed, we
528 	 * should only allow name change in DS_PROTO state.
529 	 */
530 	if (i_ddi_node_state(dip) >= DS_BOUND) {
531 		/*
532 		 * Don't allow name change once node is bound
533 		 */
534 		cmn_err(CE_NOTE,
535 		    "ndi_devi_set_nodename: node already bound dip = %p,"
536 		    " %s -> %s", (void *)dip, ddi_node_name(dip), name);
537 		return (NDI_FAILURE);
538 	}
539 
540 	nname = i_ddi_strdup(name, KM_SLEEP);
541 	DEVI(dip)->devi_node_name = nname;
542 	i_ddi_set_binding_name(dip, nname);
543 	kmem_free(oname, strlen(oname) + 1);
544 
545 	da_log_enter(dip);
546 	return (NDI_SUCCESS);
547 }
548 
549 void
i_ddi_add_devimap(dev_info_t * dip)550 i_ddi_add_devimap(dev_info_t *dip)
551 {
552 	struct devi_nodeid *elem;
553 
554 	ASSERT(dip);
555 
556 	if (!ndi_dev_is_persistent_node(dip))
557 		return;
558 
559 	ASSERT(ddi_get_parent(dip) == NULL || (DEVI_VHCI_NODE(dip)) ||
560 	    DEVI_BUSY_OWNED(ddi_get_parent(dip)));
561 
562 	mutex_enter(&devimap->dno_lock);
563 
564 	ASSERT(devimap->dno_free);
565 
566 	elem = devimap->dno_free;
567 	devimap->dno_free = elem->next;
568 
569 	elem->nodeid = ddi_get_nodeid(dip);
570 	elem->dip = dip;
571 	elem->next = devimap->dno_head;
572 	devimap->dno_head = elem;
573 
574 	devimap->dno_list_length++;
575 
576 	mutex_exit(&devimap->dno_lock);
577 }
578 
579 static int
i_ddi_remove_devimap(dev_info_t * dip)580 i_ddi_remove_devimap(dev_info_t *dip)
581 {
582 	struct devi_nodeid *prev, *elem;
583 	static const char *fcn = "i_ddi_remove_devimap";
584 
585 	ASSERT(dip);
586 
587 	if (!ndi_dev_is_persistent_node(dip))
588 		return (DDI_SUCCESS);
589 
590 	mutex_enter(&devimap->dno_lock);
591 
592 	/*
593 	 * The following check is done with dno_lock held
594 	 * to prevent race between dip removal and
595 	 * e_ddi_prom_node_to_dip()
596 	 */
597 	if (e_ddi_devi_holdcnt(dip)) {
598 		mutex_exit(&devimap->dno_lock);
599 		return (DDI_FAILURE);
600 	}
601 
602 	ASSERT(devimap->dno_head);
603 	ASSERT(devimap->dno_list_length > 0);
604 
605 	prev = NULL;
606 	for (elem = devimap->dno_head; elem; elem = elem->next) {
607 		if (elem->dip == dip) {
608 			ASSERT(elem->nodeid == ddi_get_nodeid(dip));
609 			break;
610 		}
611 		prev = elem;
612 	}
613 
614 	if (elem && prev)
615 		prev->next = elem->next;
616 	else if (elem)
617 		devimap->dno_head = elem->next;
618 	else
619 		panic("%s: devinfo node(%p) not found",
620 		    fcn, (void *)dip);
621 
622 	devimap->dno_list_length--;
623 
624 	elem->nodeid = 0;
625 	elem->dip = NULL;
626 
627 	elem->next = devimap->dno_free;
628 	devimap->dno_free = elem;
629 
630 	mutex_exit(&devimap->dno_lock);
631 
632 	return (DDI_SUCCESS);
633 }
634 
635 /*
636  * Link this node into the devinfo tree and add to orphan list
637  * Not callable from interrupt context
638  */
639 static void
link_node(dev_info_t * dip)640 link_node(dev_info_t *dip)
641 {
642 	struct dev_info *devi = DEVI(dip);
643 	struct dev_info *parent = devi->devi_parent;
644 	dev_info_t **dipp;
645 
646 	ASSERT(parent);	/* never called for root node */
647 
648 	NDI_CONFIG_DEBUG((CE_CONT, "link_node: parent = %s child = %s\n",
649 	    parent->devi_node_name, devi->devi_node_name));
650 
651 	/*
652 	 * Hold the global_vhci_lock before linking any direct
653 	 * children of rootnex driver. This special lock protects
654 	 * linking and unlinking for rootnext direct children.
655 	 */
656 	if ((dev_info_t *)parent == ddi_root_node())
657 		mutex_enter(&global_vhci_lock);
658 
659 	/*
660 	 * attach the node to end of the list unless the node is already there
661 	 */
662 	dipp = (dev_info_t **)(&DEVI(parent)->devi_child);
663 	while (*dipp && (*dipp != dip)) {
664 		dipp = (dev_info_t **)(&DEVI(*dipp)->devi_sibling);
665 	}
666 	ASSERT(*dipp == NULL);	/* node is not linked */
667 
668 	/*
669 	 * Now that we are in the tree, update the devi-nodeid map.
670 	 */
671 	i_ddi_add_devimap(dip);
672 
673 	/*
674 	 * This is a temporary workaround for Bug 4618861.
675 	 * We keep the scsi_vhci nexus node on the left side of the devinfo
676 	 * tree (under the root nexus driver), so that virtual nodes under
677 	 * scsi_vhci will be SUSPENDed first and RESUMEd last.	This ensures
678 	 * that the pHCI nodes are active during times when their clients
679 	 * may be depending on them.  This workaround embodies the knowledge
680 	 * that system PM and CPR both traverse the tree left-to-right during
681 	 * SUSPEND and right-to-left during RESUME.
682 	 * Extending the workaround to IB Nexus/VHCI
683 	 * driver also.
684 	 */
685 	if (strcmp(devi->devi_binding_name, "scsi_vhci") == 0) {
686 		/* Add scsi_vhci to beginning of list */
687 		ASSERT((dev_info_t *)parent == top_devinfo);
688 		/* scsi_vhci under rootnex */
689 		devi->devi_sibling = parent->devi_child;
690 		parent->devi_child = devi;
691 	} else if (strcmp(devi->devi_binding_name, "ib") == 0) {
692 		i_link_vhci_node(dip);
693 	} else {
694 		/* Add to end of list */
695 		*dipp = dip;
696 		DEVI(dip)->devi_sibling = NULL;
697 	}
698 
699 	/*
700 	 * Release the global_vhci_lock before linking any direct
701 	 * children of rootnex driver.
702 	 */
703 	if ((dev_info_t *)parent == ddi_root_node())
704 		mutex_exit(&global_vhci_lock);
705 
706 	/* persistent nodes go on orphan list */
707 	if (ndi_dev_is_persistent_node(dip))
708 		add_to_dn_list(&orphanlist, dip);
709 }
710 
711 /*
712  * Unlink this node from the devinfo tree
713  */
714 static int
unlink_node(dev_info_t * dip)715 unlink_node(dev_info_t *dip)
716 {
717 	struct dev_info *devi = DEVI(dip);
718 	struct dev_info *parent = devi->devi_parent;
719 	dev_info_t **dipp;
720 	ddi_hp_cn_handle_t *hdlp;
721 
722 	ASSERT(parent != NULL);
723 	ASSERT(devi->devi_node_state == DS_LINKED);
724 
725 	NDI_CONFIG_DEBUG((CE_CONT, "unlink_node: name = %s\n",
726 	    ddi_node_name(dip)));
727 
728 	/* check references */
729 	if (devi->devi_ref || i_ddi_remove_devimap(dip) != DDI_SUCCESS)
730 		return (DDI_FAILURE);
731 
732 	/*
733 	 * Hold the global_vhci_lock before linking any direct
734 	 * children of rootnex driver.
735 	 */
736 	if ((dev_info_t *)parent == ddi_root_node())
737 		mutex_enter(&global_vhci_lock);
738 
739 	dipp = (dev_info_t **)(&DEVI(parent)->devi_child);
740 	while (*dipp && (*dipp != dip)) {
741 		dipp = (dev_info_t **)(&DEVI(*dipp)->devi_sibling);
742 	}
743 	if (*dipp) {
744 		*dipp = (dev_info_t *)(devi->devi_sibling);
745 		devi->devi_sibling = NULL;
746 	} else {
747 		NDI_CONFIG_DEBUG((CE_NOTE, "unlink_node: %s not linked",
748 		    devi->devi_node_name));
749 	}
750 
751 	/*
752 	 * Release the global_vhci_lock before linking any direct
753 	 * children of rootnex driver.
754 	 */
755 	if ((dev_info_t *)parent == ddi_root_node())
756 		mutex_exit(&global_vhci_lock);
757 
758 	/* Remove node from orphan list */
759 	if (ndi_dev_is_persistent_node(dip)) {
760 		remove_from_dn_list(&orphanlist, dip);
761 	}
762 
763 	/* Update parent's hotplug handle list */
764 	for (hdlp = DEVI(parent)->devi_hp_hdlp; hdlp; hdlp = hdlp->next) {
765 		if (hdlp->cn_info.cn_child == dip)
766 			hdlp->cn_info.cn_child = NULL;
767 	}
768 	return (DDI_SUCCESS);
769 }
770 
771 /*
772  * Bind this devinfo node to a driver. If compat is NON-NULL, try that first.
773  * Else, use the node-name.
774  *
775  * NOTE: IEEE1275 specifies that nodename should be tried before compatible.
776  *	Solaris implementation binds nodename after compatible.
777  *
778  * If we find a binding,
779  * - set the binding name to the string,
780  * - set major number to driver major
781  *
782  * If we don't find a binding,
783  * - return failure
784  */
785 static int
bind_node(dev_info_t * dip)786 bind_node(dev_info_t *dip)
787 {
788 	char *p = NULL;
789 	major_t major = DDI_MAJOR_T_NONE;
790 	struct dev_info *devi = DEVI(dip);
791 	dev_info_t *parent = ddi_get_parent(dip);
792 
793 	ASSERT(devi->devi_node_state == DS_LINKED);
794 
795 	NDI_CONFIG_DEBUG((CE_CONT, "bind_node: 0x%p(name = %s)\n",
796 	    (void *)dip, ddi_node_name(dip)));
797 
798 	mutex_enter(&DEVI(dip)->devi_lock);
799 	if (DEVI(dip)->devi_flags & DEVI_NO_BIND) {
800 		mutex_exit(&DEVI(dip)->devi_lock);
801 		return (DDI_FAILURE);
802 	}
803 	mutex_exit(&DEVI(dip)->devi_lock);
804 
805 	/* find the driver with most specific binding using compatible */
806 	major = ddi_compatible_driver_major(dip, &p);
807 	if (major == DDI_MAJOR_T_NONE)
808 		return (DDI_FAILURE);
809 
810 	devi->devi_major = major;
811 	if (p != NULL) {
812 		i_ddi_set_binding_name(dip, p);
813 		NDI_CONFIG_DEBUG((CE_CONT, "bind_node: %s bound to %s\n",
814 		    devi->devi_node_name, p));
815 	}
816 
817 	/* Link node to per-driver list */
818 	link_to_driver_list(dip);
819 
820 	/*
821 	 * reset parent flag so that nexus will merge .conf props
822 	 */
823 	if (ndi_dev_is_persistent_node(dip)) {
824 		mutex_enter(&DEVI(parent)->devi_lock);
825 		DEVI(parent)->devi_flags &=
826 		    ~(DEVI_ATTACHED_CHILDREN|DEVI_MADE_CHILDREN);
827 		mutex_exit(&DEVI(parent)->devi_lock);
828 	}
829 	return (DDI_SUCCESS);
830 }
831 
832 /*
833  * Unbind this devinfo node
834  * Called before the node is destroyed or driver is removed from system
835  */
836 static int
unbind_node(dev_info_t * dip)837 unbind_node(dev_info_t *dip)
838 {
839 	ddi_unbind_callback_t *cb;
840 	ASSERT(DEVI(dip)->devi_node_state == DS_BOUND);
841 	ASSERT(DEVI(dip)->devi_major != DDI_MAJOR_T_NONE);
842 
843 	/* check references */
844 	if (DEVI(dip)->devi_ref)
845 		return (DDI_FAILURE);
846 
847 	NDI_CONFIG_DEBUG((CE_CONT, "unbind_node: 0x%p(name = %s)\n",
848 	    (void *)dip, ddi_node_name(dip)));
849 
850 	unlink_from_driver_list(dip);
851 
852 	DEVI(dip)->devi_major = DDI_MAJOR_T_NONE;
853 	DEVI(dip)->devi_binding_name = DEVI(dip)->devi_node_name;
854 
855 	while ((cb = list_remove_head(&DEVI(dip)->devi_unbind_cbs)) != NULL) {
856 		cb->ddiub_cb(cb->ddiub_arg, dip);
857 	}
858 
859 	return (DDI_SUCCESS);
860 }
861 
862 /*
863  * Initialize a node: calls the parent nexus' bus_ctl ops to do the operation.
864  * Must hold parent and per-driver list while calling this function.
865  * A successful init_node() returns with an active ndi_hold_devi() hold on
866  * the parent.
867  */
868 static int
init_node(dev_info_t * dip)869 init_node(dev_info_t *dip)
870 {
871 	int error;
872 	dev_info_t *pdip = ddi_get_parent(dip);
873 	int (*f)(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *, void *);
874 	char *path;
875 	major_t	major;
876 	ddi_devid_t devid = NULL;
877 
878 	ASSERT(i_ddi_node_state(dip) == DS_BOUND);
879 
880 	/* should be DS_READY except for pcmcia ... */
881 	ASSERT(i_ddi_node_state(pdip) >= DS_PROBED);
882 
883 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
884 	(void) ddi_pathname(dip, path);
885 	NDI_CONFIG_DEBUG((CE_CONT, "init_node: entry: path %s 0x%p\n",
886 	    path, (void *)dip));
887 
888 	/*
889 	 * The parent must have a bus_ctl operation.
890 	 */
891 	if ((DEVI(pdip)->devi_ops->devo_bus_ops == NULL) ||
892 	    (f = DEVI(pdip)->devi_ops->devo_bus_ops->bus_ctl) == NULL) {
893 		error = DDI_FAILURE;
894 		goto out;
895 	}
896 
897 	add_global_props(dip);
898 
899 	/*
900 	 * Invoke the parent's bus_ctl operation with the DDI_CTLOPS_INITCHILD
901 	 * command to transform the child to canonical form 1. If there
902 	 * is an error, ddi_remove_child should be called, to clean up.
903 	 */
904 	error = (*f)(pdip, pdip, DDI_CTLOPS_INITCHILD, dip, NULL);
905 	if (error != DDI_SUCCESS) {
906 		NDI_CONFIG_DEBUG((CE_CONT, "init_node: %s 0x%p failed\n",
907 		    path, (void *)dip));
908 		remove_global_props(dip);
909 
910 		/*
911 		 * If a nexus INITCHILD implementation calls ddi_devid_regster()
912 		 * prior to setting devi_addr, the devid is not recorded in
913 		 * the devid cache (i.e. DEVI_CACHED_DEVID is not set).
914 		 * With mpxio, while the vhci client path may be missing
915 		 * from the cache, phci pathinfo paths may have already be
916 		 * added to the cache, against the client dip, by use of
917 		 * e_devid_cache_pathinfo().  Because of this, when INITCHILD
918 		 * of the client fails, we need to purge the client dip from
919 		 * the cache even if DEVI_CACHED_DEVID is not set - if only
920 		 * devi_devid_str is set.
921 		 */
922 		mutex_enter(&DEVI(dip)->devi_lock);
923 		if ((DEVI(dip)->devi_flags & DEVI_CACHED_DEVID) ||
924 		    DEVI(dip)->devi_devid_str) {
925 			DEVI(dip)->devi_flags &= ~DEVI_CACHED_DEVID;
926 			mutex_exit(&DEVI(dip)->devi_lock);
927 			ddi_devid_unregister(dip);
928 		} else
929 			mutex_exit(&DEVI(dip)->devi_lock);
930 
931 		/* in case nexus driver didn't clear this field */
932 		ddi_set_name_addr(dip, NULL);
933 		error = DDI_FAILURE;
934 		goto out;
935 	}
936 
937 	ndi_hold_devi(pdip);			/* initial hold of parent */
938 
939 	/* recompute path after initchild for @addr information */
940 	(void) ddi_pathname(dip, path);
941 
942 	/* Check for duplicate nodes */
943 	if (find_duplicate_child(pdip, dip) != NULL) {
944 		/*
945 		 * uninit_node() the duplicate - a successful uninit_node()
946 		 * will release inital hold of parent using ndi_rele_devi().
947 		 */
948 		if ((error = uninit_node(dip)) != DDI_SUCCESS) {
949 			ndi_rele_devi(pdip);	/* release initial hold */
950 			cmn_err(CE_WARN, "init_node: uninit of duplicate "
951 			    "node %s failed", path);
952 		}
953 		NDI_CONFIG_DEBUG((CE_CONT, "init_node: duplicate uninit "
954 		    "%s 0x%p%s\n", path, (void *)dip,
955 		    (error == DDI_SUCCESS) ? "" : " failed"));
956 		error = DDI_FAILURE;
957 		goto out;
958 	}
959 
960 	/*
961 	 * If a devid was registered for a DS_BOUND node then the devid_cache
962 	 * may not have captured the path. Detect this situation and ensure that
963 	 * the path enters the cache now that devi_addr is established.
964 	 */
965 	if (!(DEVI(dip)->devi_flags & DEVI_CACHED_DEVID) &&
966 	    (ddi_devid_get(dip, &devid) == DDI_SUCCESS)) {
967 		if (e_devid_cache_register(dip, devid) == DDI_SUCCESS) {
968 			mutex_enter(&DEVI(dip)->devi_lock);
969 			DEVI(dip)->devi_flags |= DEVI_CACHED_DEVID;
970 			mutex_exit(&DEVI(dip)->devi_lock);
971 		}
972 
973 		ddi_devid_free(devid);
974 	}
975 
976 	/*
977 	 * Check to see if we have a path-oriented driver alias that overrides
978 	 * the current driver binding. If so, we need to rebind. This check
979 	 * needs to be delayed until after a successful DDI_CTLOPS_INITCHILD,
980 	 * so the unit-address is established on the last component of the path.
981 	 *
982 	 * NOTE: Allowing a path-oriented alias to change the driver binding
983 	 * of a driver.conf node results in non-intuitive property behavior.
984 	 * We provide a tunable (driver_conf_allow_path_alias) to control
985 	 * this behavior. See uninit_node() for more details.
986 	 *
987 	 * NOTE: If you are adding a path-oriented alias for the boot device,
988 	 * and there is mismatch between OBP and the kernel in regard to
989 	 * generic name use, like "disk" .vs. "ssd", then you will need
990 	 * to add a path-oriented alias for both paths.
991 	 */
992 	major = ddi_name_to_major(path);
993 	if (driver_active(major) && (major != DEVI(dip)->devi_major) &&
994 	    (ndi_dev_is_persistent_node(dip) || driver_conf_allow_path_alias)) {
995 
996 		/* Mark node for rebind processing. */
997 		mutex_enter(&DEVI(dip)->devi_lock);
998 		DEVI(dip)->devi_flags |= DEVI_REBIND;
999 		mutex_exit(&DEVI(dip)->devi_lock);
1000 
1001 		/*
1002 		 * Add an extra hold on the parent to prevent it from ever
1003 		 * having a zero devi_ref during the child rebind process.
1004 		 * This is necessary to ensure that the parent will never
1005 		 * detach(9E) during the rebind.
1006 		 */
1007 		ndi_hold_devi(pdip);		/* extra hold of parent */
1008 
1009 		/*
1010 		 * uninit_node() current binding - a successful uninit_node()
1011 		 * will release extra hold of parent using ndi_rele_devi().
1012 		 */
1013 		if ((error = uninit_node(dip)) != DDI_SUCCESS) {
1014 			ndi_rele_devi(pdip);	/* release extra hold */
1015 			ndi_rele_devi(pdip);	/* release initial hold */
1016 			cmn_err(CE_WARN, "init_node: uninit for rebind "
1017 			    "of node %s failed", path);
1018 			goto out;
1019 		}
1020 
1021 		/* Unbind: demote the node back to DS_LINKED.  */
1022 		if ((error = ndi_devi_unbind_driver(dip)) != DDI_SUCCESS) {
1023 			ndi_rele_devi(pdip);	/* release initial hold */
1024 			cmn_err(CE_WARN, "init_node: unbind for rebind "
1025 			    "of node %s failed", path);
1026 			goto out;
1027 		}
1028 
1029 		/* establish rebinding name */
1030 		if (DEVI(dip)->devi_rebinding_name == NULL)
1031 			DEVI(dip)->devi_rebinding_name =
1032 			    i_ddi_strdup(path, KM_SLEEP);
1033 
1034 		/*
1035 		 * Now that we are demoted and marked for rebind, repromote.
1036 		 * We need to do this in steps, instead of just calling
1037 		 * ddi_initchild, so that we can redo the merge operation
1038 		 * after we are rebound to the path-bound driver.
1039 		 *
1040 		 * Start by rebinding node to the path-bound driver.
1041 		 */
1042 		if ((error = ndi_devi_bind_driver(dip, 0)) != DDI_SUCCESS) {
1043 			ndi_rele_devi(pdip);	/* release initial hold */
1044 			cmn_err(CE_WARN, "init_node: rebind "
1045 			    "of node %s failed", path);
1046 			goto out;
1047 		}
1048 
1049 		/*
1050 		 * If the node is not a driver.conf node then merge
1051 		 * driver.conf properties from new path-bound driver.conf.
1052 		 */
1053 		if (ndi_dev_is_persistent_node(dip))
1054 			(void) i_ndi_make_spec_children(pdip, 0);
1055 
1056 		/*
1057 		 * Now that we have taken care of merge, repromote back
1058 		 * to DS_INITIALIZED.
1059 		 */
1060 		error = ddi_initchild(pdip, dip);
1061 		NDI_CONFIG_DEBUG((CE_CONT, "init_node: rebind "
1062 		    "%s 0x%p\n", path, (void *)dip));
1063 
1064 		/*
1065 		 * Release our initial hold. If ddi_initchild() was
1066 		 * successful then it will return with the active hold.
1067 		 */
1068 		ndi_rele_devi(pdip);
1069 		goto out;
1070 	}
1071 
1072 	/*
1073 	 * Apply multi-parent/deep-nexus optimization to the new node
1074 	 */
1075 	DEVI(dip)->devi_instance = e_ddi_assign_instance(dip);
1076 	ddi_optimize_dtree(dip);
1077 	error = DDI_SUCCESS;		/* return with active hold */
1078 
1079 out:	if (error != DDI_SUCCESS) {
1080 		/* On failure ensure that DEVI_REBIND is cleared */
1081 		mutex_enter(&DEVI(dip)->devi_lock);
1082 		DEVI(dip)->devi_flags &= ~DEVI_REBIND;
1083 		mutex_exit(&DEVI(dip)->devi_lock);
1084 	}
1085 	kmem_free(path, MAXPATHLEN);
1086 	return (error);
1087 }
1088 
1089 /*
1090  * Uninitialize node
1091  * The per-driver list must be held busy during the call.
1092  * A successful uninit_node() releases the init_node() hold on
1093  * the parent by calling ndi_rele_devi().
1094  */
1095 static int
uninit_node(dev_info_t * dip)1096 uninit_node(dev_info_t *dip)
1097 {
1098 	int node_state_entry;
1099 	dev_info_t *pdip;
1100 	struct dev_ops *ops;
1101 	int (*f)();
1102 	int error;
1103 	char *addr;
1104 
1105 	/*
1106 	 * Don't check for references here or else a ref-counted
1107 	 * dip cannot be downgraded by the framework.
1108 	 */
1109 	node_state_entry = i_ddi_node_state(dip);
1110 	ASSERT((node_state_entry == DS_BOUND) ||
1111 	    (node_state_entry == DS_INITIALIZED));
1112 	pdip = ddi_get_parent(dip);
1113 	ASSERT(pdip);
1114 
1115 	NDI_CONFIG_DEBUG((CE_CONT, "uninit_node: 0x%p(%s%d)\n",
1116 	    (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip)));
1117 
1118 	if (((ops = ddi_get_driver(pdip)) == NULL) ||
1119 	    (ops->devo_bus_ops == NULL) ||
1120 	    ((f = ops->devo_bus_ops->bus_ctl) == NULL)) {
1121 		return (DDI_FAILURE);
1122 	}
1123 
1124 	/*
1125 	 * save the @addr prior to DDI_CTLOPS_UNINITCHILD for use in
1126 	 * freeing the instance if it succeeds.
1127 	 */
1128 	if (node_state_entry == DS_INITIALIZED) {
1129 		addr = ddi_get_name_addr(dip);
1130 		if (addr)
1131 			addr = i_ddi_strdup(addr, KM_SLEEP);
1132 	} else {
1133 		addr = NULL;
1134 	}
1135 
1136 	error = (*f)(pdip, pdip, DDI_CTLOPS_UNINITCHILD, dip, (void *)NULL);
1137 	if (error == DDI_SUCCESS) {
1138 		/* ensure that devids are unregistered */
1139 		mutex_enter(&DEVI(dip)->devi_lock);
1140 		if ((DEVI(dip)->devi_flags & DEVI_CACHED_DEVID)) {
1141 			DEVI(dip)->devi_flags &= ~DEVI_CACHED_DEVID;
1142 			mutex_exit(&DEVI(dip)->devi_lock);
1143 			ddi_devid_unregister(dip);
1144 		} else
1145 			mutex_exit(&DEVI(dip)->devi_lock);
1146 
1147 		/* if uninitchild forgot to set devi_addr to NULL do it now */
1148 		ddi_set_name_addr(dip, NULL);
1149 
1150 		/*
1151 		 * Free instance number. This is a no-op if instance has
1152 		 * been kept by probe_node().  Avoid free when we are called
1153 		 * from init_node (DS_BOUND) because the instance has not yet
1154 		 * been assigned.
1155 		 */
1156 		if (node_state_entry == DS_INITIALIZED) {
1157 			e_ddi_free_instance(dip, addr);
1158 			DEVI(dip)->devi_instance = -1;
1159 		}
1160 
1161 		/* release the init_node hold */
1162 		ndi_rele_devi(pdip);
1163 
1164 		remove_global_props(dip);
1165 
1166 		/*
1167 		 * NOTE: The decision on whether to allow a path-oriented
1168 		 * rebind of a driver.conf enumerated node is made by
1169 		 * init_node() based on driver_conf_allow_path_alias. The
1170 		 * rebind code below prevents deletion of system properties
1171 		 * on driver.conf nodes.
1172 		 *
1173 		 * When driver_conf_allow_path_alias is set, property behavior
1174 		 * on rebound driver.conf file is non-intuitive. For a
1175 		 * driver.conf node, the unit-address properties come from
1176 		 * the driver.conf file as system properties. Removing system
1177 		 * properties from a driver.conf node makes the node
1178 		 * useless (we get node without unit-address properties) - so
1179 		 * we leave system properties in place. The result is a node
1180 		 * where system properties come from the node being rebound,
1181 		 * and global properties come from the driver.conf file
1182 		 * of the driver we are rebinding to.  If we could determine
1183 		 * that the path-oriented alias driver.conf file defined a
1184 		 * node at the same unit address, it would be best to use
1185 		 * that node and avoid the non-intuitive property behavior.
1186 		 * Unfortunately, the current "merge" code does not support
1187 		 * this, so we live with the non-intuitive property behavior.
1188 		 */
1189 		if (!((ndi_dev_is_persistent_node(dip) == 0) &&
1190 		    (DEVI(dip)->devi_flags & DEVI_REBIND)))
1191 			e_ddi_prop_remove_all(dip);
1192 	} else {
1193 		NDI_CONFIG_DEBUG((CE_CONT, "uninit_node failed: 0x%p(%s%d)\n",
1194 		    (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip)));
1195 	}
1196 
1197 	if (addr)
1198 		kmem_free(addr, strlen(addr) + 1);
1199 	return (error);
1200 }
1201 
1202 /*
1203  * Invoke driver's probe entry point to probe for existence of hardware.
1204  * Keep instance permanent for successful probe and leaf nodes.
1205  *
1206  * Per-driver list must be held busy while calling this function.
1207  */
1208 static int
probe_node(dev_info_t * dip)1209 probe_node(dev_info_t *dip)
1210 {
1211 	int rv;
1212 
1213 	ASSERT(i_ddi_node_state(dip) == DS_INITIALIZED);
1214 
1215 	NDI_CONFIG_DEBUG((CE_CONT, "probe_node: 0x%p(%s%d)\n",
1216 	    (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip)));
1217 
1218 	/* temporarily hold the driver while we probe */
1219 	DEVI(dip)->devi_ops = ndi_hold_driver(dip);
1220 	if (DEVI(dip)->devi_ops == NULL) {
1221 		NDI_CONFIG_DEBUG((CE_CONT,
1222 		    "probe_node: 0x%p(%s%d) cannot load driver\n",
1223 		    (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip)));
1224 		return (DDI_FAILURE);
1225 	}
1226 
1227 	if (identify_9e != 0)
1228 		(void) devi_identify(dip);
1229 
1230 	rv = devi_probe(dip);
1231 
1232 	/* release the driver now that probe is complete */
1233 	ndi_rele_driver(dip);
1234 	DEVI(dip)->devi_ops = NULL;
1235 
1236 	switch (rv) {
1237 	case DDI_PROBE_SUCCESS:			/* found */
1238 	case DDI_PROBE_DONTCARE:		/* ddi_dev_is_sid */
1239 		e_ddi_keep_instance(dip);	/* persist instance */
1240 		rv = DDI_SUCCESS;
1241 		break;
1242 
1243 	case DDI_PROBE_PARTIAL:			/* maybe later */
1244 	case DDI_PROBE_FAILURE:			/* not found */
1245 		NDI_CONFIG_DEBUG((CE_CONT,
1246 		    "probe_node: 0x%p(%s%d) no hardware found%s\n",
1247 		    (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip),
1248 		    (rv == DDI_PROBE_PARTIAL) ? " yet" : ""));
1249 		rv = DDI_FAILURE;
1250 		break;
1251 
1252 	default:
1253 #ifdef	DEBUG
1254 		cmn_err(CE_WARN, "probe_node: %s%d: illegal probe(9E) value",
1255 		    ddi_driver_name(dip), ddi_get_instance(dip));
1256 #endif	/* DEBUG */
1257 		rv = DDI_FAILURE;
1258 		break;
1259 	}
1260 	return (rv);
1261 }
1262 
1263 /*
1264  * Unprobe a node. Simply reset the node state.
1265  * Per-driver list must be held busy while calling this function.
1266  */
1267 static int
unprobe_node(dev_info_t * dip)1268 unprobe_node(dev_info_t *dip)
1269 {
1270 	ASSERT(i_ddi_node_state(dip) == DS_PROBED);
1271 
1272 	/*
1273 	 * Don't check for references here or else a ref-counted
1274 	 * dip cannot be downgraded by the framework.
1275 	 */
1276 
1277 	NDI_CONFIG_DEBUG((CE_CONT, "unprobe_node: 0x%p(name = %s)\n",
1278 	    (void *)dip, ddi_node_name(dip)));
1279 	return (DDI_SUCCESS);
1280 }
1281 
1282 /*
1283  * Attach devinfo node.
1284  * Per-driver list must be held busy.
1285  */
1286 static int
attach_node(dev_info_t * dip)1287 attach_node(dev_info_t *dip)
1288 {
1289 	int rv;
1290 
1291 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(dip)));
1292 	ASSERT(i_ddi_node_state(dip) == DS_PROBED);
1293 
1294 	NDI_CONFIG_DEBUG((CE_CONT, "attach_node: 0x%p(%s%d)\n",
1295 	    (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip)));
1296 
1297 	/*
1298 	 * Tell mpxio framework that a node is about to online.
1299 	 */
1300 	if ((rv = mdi_devi_online(dip, 0)) != NDI_SUCCESS) {
1301 		return (DDI_FAILURE);
1302 	}
1303 
1304 	/* no recursive attachment */
1305 	ASSERT(DEVI(dip)->devi_ops == NULL);
1306 
1307 	/*
1308 	 * Hold driver the node is bound to.
1309 	 */
1310 	DEVI(dip)->devi_ops = ndi_hold_driver(dip);
1311 	if (DEVI(dip)->devi_ops == NULL) {
1312 		/*
1313 		 * We were able to load driver for probing, so we should
1314 		 * not get here unless something really bad happened.
1315 		 */
1316 		cmn_err(CE_WARN, "attach_node: no driver for major %d",
1317 		    DEVI(dip)->devi_major);
1318 		return (DDI_FAILURE);
1319 	}
1320 
1321 	if (NEXUS_DRV(DEVI(dip)->devi_ops))
1322 		DEVI(dip)->devi_taskq = ddi_taskq_create(dip,
1323 		    "nexus_enum_tq", 1,
1324 		    TASKQ_DEFAULTPRI, 0);
1325 
1326 	mutex_enter(&(DEVI(dip)->devi_lock));
1327 	DEVI_SET_ATTACHING(dip);
1328 	DEVI_SET_NEED_RESET(dip);
1329 	mutex_exit(&(DEVI(dip)->devi_lock));
1330 
1331 	rv = devi_attach(dip, DDI_ATTACH);
1332 
1333 	mutex_enter(&(DEVI(dip)->devi_lock));
1334 	DEVI_CLR_ATTACHING(dip);
1335 
1336 	if (rv != DDI_SUCCESS) {
1337 		DEVI_CLR_NEED_RESET(dip);
1338 		mutex_exit(&DEVI(dip)->devi_lock);
1339 
1340 		/*
1341 		 * Cleanup dacf reservations
1342 		 */
1343 		mutex_enter(&dacf_lock);
1344 		dacf_clr_rsrvs(dip, DACF_OPID_POSTATTACH);
1345 		dacf_clr_rsrvs(dip, DACF_OPID_PREDETACH);
1346 		mutex_exit(&dacf_lock);
1347 		if (DEVI(dip)->devi_taskq)
1348 			ddi_taskq_destroy(DEVI(dip)->devi_taskq);
1349 		ddi_remove_minor_node(dip, NULL);
1350 
1351 		/* release the driver if attach failed */
1352 		ndi_rele_driver(dip);
1353 		DEVI(dip)->devi_ops = NULL;
1354 		NDI_CONFIG_DEBUG((CE_CONT, "attach_node: 0x%p(%s%d) failed\n",
1355 		    (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip)));
1356 		return (DDI_FAILURE);
1357 	} else
1358 		mutex_exit(&DEVI(dip)->devi_lock);
1359 
1360 	/* successful attach, return with driver held */
1361 
1362 	return (DDI_SUCCESS);
1363 }
1364 
1365 /*
1366  * Detach devinfo node.
1367  * Per-driver list must be held busy.
1368  */
1369 static int
detach_node(dev_info_t * dip,uint_t flag)1370 detach_node(dev_info_t *dip, uint_t flag)
1371 {
1372 	struct devnames	*dnp;
1373 	int		rv;
1374 
1375 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(dip)));
1376 	ASSERT(i_ddi_node_state(dip) == DS_ATTACHED);
1377 
1378 	/* check references */
1379 	if (DEVI(dip)->devi_ref)
1380 		return (DDI_FAILURE);
1381 
1382 	NDI_CONFIG_DEBUG((CE_CONT, "detach_node: 0x%p(%s%d)\n",
1383 	    (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip)));
1384 
1385 	/*
1386 	 * NOTE: If we are processing a pHCI node then the calling code
1387 	 * must detect this and ndi_devi_enter() in (vHCI, parent(pHCI))
1388 	 * order unless pHCI and vHCI are siblings.  Code paths leading
1389 	 * here that must ensure this ordering include:
1390 	 * unconfig_immediate_children(), devi_unconfig_one(),
1391 	 * ndi_devi_unconfig_one(), ndi_devi_offline().
1392 	 */
1393 	ASSERT(!MDI_PHCI(dip) ||
1394 	    (ddi_get_parent(mdi_devi_get_vdip(dip)) == ddi_get_parent(dip)) ||
1395 	    DEVI_BUSY_OWNED(mdi_devi_get_vdip(dip)));
1396 
1397 	/* Offline the device node with the mpxio framework. */
1398 	if (mdi_devi_offline(dip, flag) != NDI_SUCCESS) {
1399 		return (DDI_FAILURE);
1400 	}
1401 
1402 	/* drain the taskq */
1403 	if (DEVI(dip)->devi_taskq)
1404 		ddi_taskq_wait(DEVI(dip)->devi_taskq);
1405 
1406 	rv = devi_detach(dip, DDI_DETACH);
1407 
1408 	if (rv != DDI_SUCCESS) {
1409 		NDI_CONFIG_DEBUG((CE_CONT,
1410 		    "detach_node: 0x%p(%s%d) failed\n",
1411 		    (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip)));
1412 		return (DDI_FAILURE);
1413 	}
1414 
1415 	mutex_enter(&(DEVI(dip)->devi_lock));
1416 	DEVI_CLR_NEED_RESET(dip);
1417 	mutex_exit(&(DEVI(dip)->devi_lock));
1418 
1419 #if defined(__amd64) && !defined(__xpv)
1420 	/*
1421 	 * Close any iommulib mediated linkage to an IOMMU
1422 	 */
1423 	if (IOMMU_USED(dip))
1424 		iommulib_nex_close(dip);
1425 #endif
1426 
1427 	/* destroy the taskq */
1428 	if (DEVI(dip)->devi_taskq) {
1429 		ddi_taskq_destroy(DEVI(dip)->devi_taskq);
1430 		DEVI(dip)->devi_taskq = NULL;
1431 	}
1432 
1433 	/* Cleanup dacf reservations */
1434 	mutex_enter(&dacf_lock);
1435 	dacf_clr_rsrvs(dip, DACF_OPID_POSTATTACH);
1436 	dacf_clr_rsrvs(dip, DACF_OPID_PREDETACH);
1437 	mutex_exit(&dacf_lock);
1438 
1439 	/* remove any additional flavors that were added */
1440 	if (DEVI(dip)->devi_flavorv_n > 1 && DEVI(dip)->devi_flavorv != NULL) {
1441 		kmem_free(DEVI(dip)->devi_flavorv,
1442 		    (DEVI(dip)->devi_flavorv_n - 1) * sizeof (void *));
1443 		DEVI(dip)->devi_flavorv = NULL;
1444 	}
1445 
1446 	/* Remove properties and minor nodes in case driver forgots */
1447 	ddi_remove_minor_node(dip, NULL);
1448 	ddi_prop_remove_all(dip);
1449 
1450 	/* a detached node can't have attached or .conf children */
1451 	mutex_enter(&DEVI(dip)->devi_lock);
1452 	DEVI(dip)->devi_flags &= ~(DEVI_MADE_CHILDREN|DEVI_ATTACHED_CHILDREN);
1453 	mutex_exit(&DEVI(dip)->devi_lock);
1454 
1455 	/*
1456 	 * If the instance has successfully detached in detach_driver() context,
1457 	 * clear DN_DRIVER_HELD for correct ddi_hold_installed_driver()
1458 	 * behavior. Consumers like qassociate() depend on this (via clnopen()).
1459 	 */
1460 	if (flag & NDI_DETACH_DRIVER) {
1461 		dnp = &(devnamesp[DEVI(dip)->devi_major]);
1462 		LOCK_DEV_OPS(&dnp->dn_lock);
1463 		dnp->dn_flags &= ~DN_DRIVER_HELD;
1464 		UNLOCK_DEV_OPS(&dnp->dn_lock);
1465 	}
1466 
1467 	/* successful detach, release the driver */
1468 	ndi_rele_driver(dip);
1469 	DEVI(dip)->devi_ops = NULL;
1470 	return (DDI_SUCCESS);
1471 }
1472 
1473 /*
1474  * Run dacf post_attach routines
1475  */
1476 static int
postattach_node(dev_info_t * dip)1477 postattach_node(dev_info_t *dip)
1478 {
1479 	int rval;
1480 
1481 	/*
1482 	 * For hotplug busses like USB, it's possible that devices
1483 	 * are removed but dip is still around. We don't want to
1484 	 * run dacf routines as part of detach failure recovery.
1485 	 *
1486 	 * Pretend success until we figure out how to prevent
1487 	 * access to such devinfo nodes.
1488 	 */
1489 	if (DEVI_IS_DEVICE_REMOVED(dip))
1490 		return (DDI_SUCCESS);
1491 
1492 	/*
1493 	 * if dacf_postattach failed, report it to the framework
1494 	 * so that it can be retried later at the open time.
1495 	 */
1496 	mutex_enter(&dacf_lock);
1497 	rval = dacfc_postattach(dip);
1498 	mutex_exit(&dacf_lock);
1499 
1500 	/*
1501 	 * Plumbing during postattach may fail because of the
1502 	 * underlying device is not ready. This will fail ndi_devi_config()
1503 	 * in dv_filldir().
1504 	 */
1505 	if (rval != DACF_SUCCESS) {
1506 		NDI_CONFIG_DEBUG((CE_CONT, "postattach_node: %s%d (%p) "
1507 		    "postattach failed\n", ddi_driver_name(dip),
1508 		    ddi_get_instance(dip), (void *)dip));
1509 		return (DDI_FAILURE);
1510 	}
1511 
1512 	return (DDI_SUCCESS);
1513 }
1514 
1515 /*
1516  * Run dacf pre-detach routines
1517  */
1518 static int
predetach_node(dev_info_t * dip,uint_t flag)1519 predetach_node(dev_info_t *dip, uint_t flag)
1520 {
1521 	int ret;
1522 
1523 	/*
1524 	 * Don't auto-detach if DDI_FORCEATTACH or DDI_NO_AUTODETACH
1525 	 * properties are set.
1526 	 */
1527 	if (flag & NDI_AUTODETACH) {
1528 		struct devnames *dnp;
1529 		int pflag = DDI_PROP_NOTPROM | DDI_PROP_DONTPASS;
1530 
1531 		if ((ddi_prop_get_int(DDI_DEV_T_ANY, dip,
1532 		    pflag, DDI_FORCEATTACH, 0) == 1) ||
1533 		    (ddi_prop_get_int(DDI_DEV_T_ANY, dip,
1534 		    pflag, DDI_NO_AUTODETACH, 0) == 1))
1535 			return (DDI_FAILURE);
1536 
1537 		/* check for driver global version of DDI_NO_AUTODETACH */
1538 		dnp = &devnamesp[DEVI(dip)->devi_major];
1539 		LOCK_DEV_OPS(&dnp->dn_lock);
1540 		if (dnp->dn_flags & DN_NO_AUTODETACH) {
1541 			UNLOCK_DEV_OPS(&dnp->dn_lock);
1542 			return (DDI_FAILURE);
1543 		}
1544 		UNLOCK_DEV_OPS(&dnp->dn_lock);
1545 	}
1546 
1547 	mutex_enter(&dacf_lock);
1548 	ret = dacfc_predetach(dip);
1549 	mutex_exit(&dacf_lock);
1550 
1551 	return (ret);
1552 }
1553 
1554 /*
1555  * Wrapper for making multiple state transitions
1556  */
1557 
1558 /*
1559  * i_ndi_config_node: upgrade dev_info node into a specified state.
1560  * It is a bit tricky because the locking protocol changes before and
1561  * after a node is bound to a driver. All locks are held external to
1562  * this function.
1563  */
1564 int
i_ndi_config_node(dev_info_t * dip,ddi_node_state_t state,uint_t flag)1565 i_ndi_config_node(dev_info_t *dip, ddi_node_state_t state, uint_t flag)
1566 {
1567 	_NOTE(ARGUNUSED(flag))
1568 	int rv = DDI_SUCCESS;
1569 
1570 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(dip)));
1571 
1572 	while ((i_ddi_node_state(dip) < state) && (rv == DDI_SUCCESS)) {
1573 
1574 		/* don't allow any more changes to the device tree */
1575 		if (devinfo_freeze) {
1576 			rv = DDI_FAILURE;
1577 			break;
1578 		}
1579 
1580 		switch (i_ddi_node_state(dip)) {
1581 		case DS_PROTO:
1582 			/*
1583 			 * only caller can reference this node, no external
1584 			 * locking needed.
1585 			 */
1586 			link_node(dip);
1587 			translate_devid((dev_info_t *)dip);
1588 			i_ddi_set_node_state(dip, DS_LINKED);
1589 			break;
1590 		case DS_LINKED:
1591 			/*
1592 			 * Three code path may attempt to bind a node:
1593 			 * - boot code
1594 			 * - add_drv
1595 			 * - hotplug thread
1596 			 * Boot code is single threaded, add_drv synchronize
1597 			 * on a userland lock, and hotplug synchronize on
1598 			 * hotplug_lk. There could be a race between add_drv
1599 			 * and hotplug thread. We'll live with this until the
1600 			 * conversion to top-down loading.
1601 			 */
1602 			if ((rv = bind_node(dip)) == DDI_SUCCESS)
1603 				i_ddi_set_node_state(dip, DS_BOUND);
1604 
1605 			break;
1606 		case DS_BOUND:
1607 			/*
1608 			 * The following transitions synchronizes on the
1609 			 * per-driver busy changing flag, since we already
1610 			 * have a driver.
1611 			 */
1612 			if ((rv = init_node(dip)) == DDI_SUCCESS)
1613 				i_ddi_set_node_state(dip, DS_INITIALIZED);
1614 			break;
1615 		case DS_INITIALIZED:
1616 			if ((rv = probe_node(dip)) == DDI_SUCCESS)
1617 				i_ddi_set_node_state(dip, DS_PROBED);
1618 			break;
1619 		case DS_PROBED:
1620 			/*
1621 			 * If node is retired and persistent, then prevent
1622 			 * attach. We can't do this for non-persistent nodes
1623 			 * as we would lose evidence that the node existed.
1624 			 */
1625 			if (i_ddi_check_retire(dip) == 1 &&
1626 			    ndi_dev_is_persistent_node(dip) &&
1627 			    retire_prevents_attach == 1) {
1628 				rv = DDI_FAILURE;
1629 				break;
1630 			}
1631 			atomic_inc_ulong(&devinfo_attach_detach);
1632 			if ((rv = attach_node(dip)) == DDI_SUCCESS)
1633 				i_ddi_set_node_state(dip, DS_ATTACHED);
1634 			atomic_dec_ulong(&devinfo_attach_detach);
1635 			break;
1636 		case DS_ATTACHED:
1637 			if ((rv = postattach_node(dip)) == DDI_SUCCESS)
1638 				i_ddi_set_node_state(dip, DS_READY);
1639 			break;
1640 		case DS_READY:
1641 			break;
1642 		default:
1643 			/* should never reach here */
1644 			ASSERT("unknown devinfo state");
1645 		}
1646 	}
1647 
1648 	if (ddidebug & DDI_AUDIT)
1649 		da_log_enter(dip);
1650 	return (rv);
1651 }
1652 
1653 /*
1654  * i_ndi_unconfig_node: downgrade dev_info node into a specified state.
1655  */
1656 int
i_ndi_unconfig_node(dev_info_t * dip,ddi_node_state_t state,uint_t flag)1657 i_ndi_unconfig_node(dev_info_t *dip, ddi_node_state_t state, uint_t flag)
1658 {
1659 	int	rv = DDI_SUCCESS;
1660 
1661 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(dip)));
1662 
1663 	while ((i_ddi_node_state(dip) > state) && (rv == DDI_SUCCESS)) {
1664 
1665 		/* don't allow any more changes to the device tree */
1666 		if (devinfo_freeze) {
1667 			rv = DDI_FAILURE;
1668 			break;
1669 		}
1670 
1671 		switch (i_ddi_node_state(dip)) {
1672 		case DS_PROTO:
1673 			break;
1674 		case DS_LINKED:
1675 			/*
1676 			 * Persistent nodes are only removed by hotplug code
1677 			 * .conf nodes synchronizes on per-driver list.
1678 			 */
1679 			if ((rv = unlink_node(dip)) == DDI_SUCCESS)
1680 				i_ddi_set_node_state(dip, DS_PROTO);
1681 			break;
1682 		case DS_BOUND:
1683 			/*
1684 			 * The following transitions synchronizes on the
1685 			 * per-driver busy changing flag, since we already
1686 			 * have a driver.
1687 			 */
1688 			if ((rv = unbind_node(dip)) == DDI_SUCCESS)
1689 				i_ddi_set_node_state(dip, DS_LINKED);
1690 			break;
1691 		case DS_INITIALIZED:
1692 			if ((rv = uninit_node(dip)) == DDI_SUCCESS)
1693 				i_ddi_set_node_state(dip, DS_BOUND);
1694 			break;
1695 		case DS_PROBED:
1696 			if ((rv = unprobe_node(dip)) == DDI_SUCCESS)
1697 				i_ddi_set_node_state(dip, DS_INITIALIZED);
1698 			break;
1699 		case DS_ATTACHED:
1700 			atomic_inc_ulong(&devinfo_attach_detach);
1701 
1702 			mutex_enter(&(DEVI(dip)->devi_lock));
1703 			DEVI_SET_DETACHING(dip);
1704 			mutex_exit(&(DEVI(dip)->devi_lock));
1705 
1706 			membar_enter();	/* ensure visibility for hold_devi */
1707 
1708 			if ((rv = detach_node(dip, flag)) == DDI_SUCCESS)
1709 				i_ddi_set_node_state(dip, DS_PROBED);
1710 
1711 			mutex_enter(&(DEVI(dip)->devi_lock));
1712 			DEVI_CLR_DETACHING(dip);
1713 			mutex_exit(&(DEVI(dip)->devi_lock));
1714 
1715 			atomic_dec_ulong(&devinfo_attach_detach);
1716 			break;
1717 		case DS_READY:
1718 			if ((rv = predetach_node(dip, flag)) == DDI_SUCCESS)
1719 				i_ddi_set_node_state(dip, DS_ATTACHED);
1720 			break;
1721 		default:
1722 			ASSERT("unknown devinfo state");
1723 		}
1724 	}
1725 	da_log_enter(dip);
1726 	return (rv);
1727 }
1728 
1729 /*
1730  * ddi_initchild: transform node to DS_INITIALIZED state
1731  */
1732 int
ddi_initchild(dev_info_t * parent,dev_info_t * proto)1733 ddi_initchild(dev_info_t *parent, dev_info_t *proto)
1734 {
1735 	int ret, circ;
1736 
1737 	ndi_devi_enter(parent, &circ);
1738 	ret = i_ndi_config_node(proto, DS_INITIALIZED, 0);
1739 	ndi_devi_exit(parent, circ);
1740 
1741 	return (ret);
1742 }
1743 
1744 /*
1745  * ddi_uninitchild: transform node down to DS_BOUND state
1746  */
1747 int
ddi_uninitchild(dev_info_t * dip)1748 ddi_uninitchild(dev_info_t *dip)
1749 {
1750 	int ret, circ;
1751 	dev_info_t *parent = ddi_get_parent(dip);
1752 	ASSERT(parent);
1753 
1754 	ndi_devi_enter(parent, &circ);
1755 	ret = i_ndi_unconfig_node(dip, DS_BOUND, 0);
1756 	ndi_devi_exit(parent, circ);
1757 
1758 	return (ret);
1759 }
1760 
1761 /*
1762  * i_ddi_attachchild: transform node to DS_READY/i_ddi_devi_attached() state
1763  */
1764 static int
i_ddi_attachchild(dev_info_t * dip)1765 i_ddi_attachchild(dev_info_t *dip)
1766 {
1767 	dev_info_t	*parent = ddi_get_parent(dip);
1768 	int		ret;
1769 
1770 	ASSERT(parent && DEVI_BUSY_OWNED(parent));
1771 
1772 	if ((i_ddi_node_state(dip) < DS_BOUND) || DEVI_IS_DEVICE_OFFLINE(dip))
1773 		return (DDI_FAILURE);
1774 
1775 	ret = i_ndi_config_node(dip, DS_READY, 0);
1776 	if (ret == NDI_SUCCESS) {
1777 		ret = DDI_SUCCESS;
1778 	} else {
1779 		/*
1780 		 * Take it down to DS_INITIALIZED so pm_pre_probe is run
1781 		 * on the next attach
1782 		 */
1783 		(void) i_ndi_unconfig_node(dip, DS_INITIALIZED, 0);
1784 		ret = DDI_FAILURE;
1785 	}
1786 
1787 	return (ret);
1788 }
1789 
1790 /*
1791  * i_ddi_detachchild: transform node down to DS_PROBED state
1792  *	If it fails, put it back to DS_READY state.
1793  * NOTE: A node that fails detach may be at DS_ATTACHED instead
1794  * of DS_READY for a small amount of time - this is the source of
1795  * transient DS_READY->DS_ATTACHED->DS_READY state changes.
1796  */
1797 static int
i_ddi_detachchild(dev_info_t * dip,uint_t flags)1798 i_ddi_detachchild(dev_info_t *dip, uint_t flags)
1799 {
1800 	dev_info_t	*parent = ddi_get_parent(dip);
1801 	int		ret;
1802 
1803 	ASSERT(parent && DEVI_BUSY_OWNED(parent));
1804 
1805 	ret = i_ndi_unconfig_node(dip, DS_PROBED, flags);
1806 	if (ret != DDI_SUCCESS)
1807 		(void) i_ndi_config_node(dip, DS_READY, 0);
1808 	else
1809 		/* allow pm_pre_probe to reestablish pm state */
1810 		(void) i_ndi_unconfig_node(dip, DS_INITIALIZED, 0);
1811 	return (ret);
1812 }
1813 
1814 /*
1815  * Add a child and bind to driver
1816  */
1817 dev_info_t *
ddi_add_child(dev_info_t * pdip,char * name,uint_t nodeid,uint_t unit)1818 ddi_add_child(dev_info_t *pdip, char *name, uint_t nodeid, uint_t unit)
1819 {
1820 	int circ;
1821 	dev_info_t *dip;
1822 
1823 	/* allocate a new node */
1824 	dip = i_ddi_alloc_node(pdip, name, nodeid, (int)unit, NULL, KM_SLEEP);
1825 
1826 	ndi_devi_enter(pdip, &circ);
1827 	(void) i_ndi_config_node(dip, DS_BOUND, 0);
1828 	ndi_devi_exit(pdip, circ);
1829 	return (dip);
1830 }
1831 
1832 /*
1833  * ddi_remove_child: remove the dip. The parent must be attached and held
1834  */
1835 int
ddi_remove_child(dev_info_t * dip,int dummy)1836 ddi_remove_child(dev_info_t *dip, int dummy)
1837 {
1838 	_NOTE(ARGUNUSED(dummy))
1839 	int circ, ret;
1840 	dev_info_t *parent = ddi_get_parent(dip);
1841 	ASSERT(parent);
1842 
1843 	ndi_devi_enter(parent, &circ);
1844 
1845 	/*
1846 	 * If we still have children, for example SID nodes marked
1847 	 * as persistent but not attached, attempt to remove them.
1848 	 */
1849 	if (DEVI(dip)->devi_child) {
1850 		ret = ndi_devi_unconfig(dip, NDI_DEVI_REMOVE);
1851 		if (ret != NDI_SUCCESS) {
1852 			ndi_devi_exit(parent, circ);
1853 			return (DDI_FAILURE);
1854 		}
1855 		ASSERT(DEVI(dip)->devi_child == NULL);
1856 	}
1857 
1858 	ret = i_ndi_unconfig_node(dip, DS_PROTO, 0);
1859 	ndi_devi_exit(parent, circ);
1860 
1861 	if (ret != DDI_SUCCESS)
1862 		return (ret);
1863 
1864 	ASSERT(i_ddi_node_state(dip) == DS_PROTO);
1865 	i_ddi_free_node(dip);
1866 	return (DDI_SUCCESS);
1867 }
1868 
1869 /*
1870  * NDI wrappers for ref counting, node allocation, and transitions
1871  */
1872 
1873 /*
1874  * Hold/release the devinfo node itself.
1875  * Caller is assumed to prevent the devi from detaching during this call
1876  */
1877 void
ndi_hold_devi(dev_info_t * dip)1878 ndi_hold_devi(dev_info_t *dip)
1879 {
1880 	mutex_enter(&DEVI(dip)->devi_lock);
1881 	ASSERT(DEVI(dip)->devi_ref >= 0);
1882 	DEVI(dip)->devi_ref++;
1883 	membar_enter();			/* make sure stores are flushed */
1884 	mutex_exit(&DEVI(dip)->devi_lock);
1885 }
1886 
1887 void
ndi_rele_devi(dev_info_t * dip)1888 ndi_rele_devi(dev_info_t *dip)
1889 {
1890 	ASSERT(DEVI(dip)->devi_ref > 0);
1891 
1892 	mutex_enter(&DEVI(dip)->devi_lock);
1893 	DEVI(dip)->devi_ref--;
1894 	membar_enter();			/* make sure stores are flushed */
1895 	mutex_exit(&DEVI(dip)->devi_lock);
1896 }
1897 
1898 int
e_ddi_devi_holdcnt(dev_info_t * dip)1899 e_ddi_devi_holdcnt(dev_info_t *dip)
1900 {
1901 	return (DEVI(dip)->devi_ref);
1902 }
1903 
1904 /*
1905  * Hold/release the driver the devinfo node is bound to.
1906  */
1907 struct dev_ops *
ndi_hold_driver(dev_info_t * dip)1908 ndi_hold_driver(dev_info_t *dip)
1909 {
1910 	if (i_ddi_node_state(dip) < DS_BOUND)
1911 		return (NULL);
1912 
1913 	ASSERT(DEVI(dip)->devi_major != -1);
1914 	return (mod_hold_dev_by_major(DEVI(dip)->devi_major));
1915 }
1916 
1917 void
ndi_rele_driver(dev_info_t * dip)1918 ndi_rele_driver(dev_info_t *dip)
1919 {
1920 	ASSERT(i_ddi_node_state(dip) >= DS_BOUND);
1921 	mod_rele_dev_by_major(DEVI(dip)->devi_major);
1922 }
1923 
1924 /*
1925  * Single thread entry into devinfo node for modifying its children (devinfo,
1926  * pathinfo, and minor). To verify in ASSERTS use DEVI_BUSY_OWNED macro.
1927  */
1928 void
ndi_devi_enter(dev_info_t * dip,int * circular)1929 ndi_devi_enter(dev_info_t *dip, int *circular)
1930 {
1931 	struct dev_info *devi = DEVI(dip);
1932 	ASSERT(dip != NULL);
1933 
1934 	/* for vHCI, enforce (vHCI, pHCI) ndi_deve_enter() order */
1935 	ASSERT(!MDI_VHCI(dip) || (mdi_devi_pdip_entered(dip) == 0) ||
1936 	    DEVI_BUSY_OWNED(dip));
1937 
1938 	mutex_enter(&devi->devi_lock);
1939 	if (devi->devi_busy_thread == curthread) {
1940 		devi->devi_circular++;
1941 	} else {
1942 		while (DEVI_BUSY_CHANGING(devi) && !panicstr)
1943 			cv_wait(&(devi->devi_cv), &(devi->devi_lock));
1944 		if (panicstr) {
1945 			mutex_exit(&devi->devi_lock);
1946 			return;
1947 		}
1948 		devi->devi_flags |= DEVI_BUSY;
1949 		devi->devi_busy_thread = curthread;
1950 	}
1951 	*circular = devi->devi_circular;
1952 	mutex_exit(&devi->devi_lock);
1953 }
1954 
1955 /*
1956  * Release ndi_devi_enter or successful ndi_devi_tryenter.
1957  */
1958 void
ndi_devi_exit(dev_info_t * dip,int circular)1959 ndi_devi_exit(dev_info_t *dip, int circular)
1960 {
1961 	struct dev_info	*devi = DEVI(dip);
1962 	struct dev_info	*vdevi;
1963 	ASSERT(dip != NULL);
1964 
1965 	if (panicstr)
1966 		return;
1967 
1968 	mutex_enter(&(devi->devi_lock));
1969 	if (circular != 0) {
1970 		devi->devi_circular--;
1971 	} else {
1972 		devi->devi_flags &= ~DEVI_BUSY;
1973 		ASSERT(devi->devi_busy_thread == curthread);
1974 		devi->devi_busy_thread = NULL;
1975 		cv_broadcast(&(devi->devi_cv));
1976 	}
1977 	mutex_exit(&(devi->devi_lock));
1978 
1979 	/*
1980 	 * For pHCI exit we issue a broadcast to vHCI for ndi_devi_config_one()
1981 	 * doing cv_wait on vHCI.
1982 	 */
1983 	if (MDI_PHCI(dip)) {
1984 		vdevi = DEVI(mdi_devi_get_vdip(dip));
1985 		if (vdevi) {
1986 			mutex_enter(&(vdevi->devi_lock));
1987 			if (vdevi->devi_flags & DEVI_PHCI_SIGNALS_VHCI) {
1988 				vdevi->devi_flags &= ~DEVI_PHCI_SIGNALS_VHCI;
1989 				cv_broadcast(&(vdevi->devi_cv));
1990 			}
1991 			mutex_exit(&(vdevi->devi_lock));
1992 		}
1993 	}
1994 }
1995 
1996 /*
1997  * Release ndi_devi_enter and wait for possibility of new children, avoiding
1998  * possibility of missing broadcast before getting to cv_timedwait().
1999  */
2000 static void
ndi_devi_exit_and_wait(dev_info_t * dip,int circular,clock_t end_time)2001 ndi_devi_exit_and_wait(dev_info_t *dip, int circular, clock_t end_time)
2002 {
2003 	struct dev_info	*devi = DEVI(dip);
2004 	ASSERT(dip != NULL);
2005 
2006 	if (panicstr)
2007 		return;
2008 
2009 	/*
2010 	 * We are called to wait for of a new child, and new child can
2011 	 * only be added if circular is zero.
2012 	 */
2013 	ASSERT(circular == 0);
2014 
2015 	/* like ndi_devi_exit with circular of zero */
2016 	mutex_enter(&(devi->devi_lock));
2017 	devi->devi_flags &= ~DEVI_BUSY;
2018 	ASSERT(devi->devi_busy_thread == curthread);
2019 	devi->devi_busy_thread = NULL;
2020 	cv_broadcast(&(devi->devi_cv));
2021 
2022 	/* now wait for new children while still holding devi_lock */
2023 	(void) cv_timedwait(&devi->devi_cv, &(devi->devi_lock), end_time);
2024 	mutex_exit(&(devi->devi_lock));
2025 }
2026 
2027 /*
2028  * Attempt to single thread entry into devinfo node for modifying its children.
2029  */
2030 int
ndi_devi_tryenter(dev_info_t * dip,int * circular)2031 ndi_devi_tryenter(dev_info_t *dip, int *circular)
2032 {
2033 	int rval = 1;		   /* assume we enter */
2034 	struct dev_info *devi = DEVI(dip);
2035 	ASSERT(dip != NULL);
2036 
2037 	mutex_enter(&devi->devi_lock);
2038 	if (devi->devi_busy_thread == (void *)curthread) {
2039 		devi->devi_circular++;
2040 	} else {
2041 		if (!DEVI_BUSY_CHANGING(devi)) {
2042 			devi->devi_flags |= DEVI_BUSY;
2043 			devi->devi_busy_thread = (void *)curthread;
2044 		} else {
2045 			rval = 0;	/* devi is busy */
2046 		}
2047 	}
2048 	*circular = devi->devi_circular;
2049 	mutex_exit(&devi->devi_lock);
2050 	return (rval);
2051 }
2052 
2053 /*
2054  * Allocate and initialize a new dev_info structure.
2055  *
2056  * This routine may be called at interrupt time by a nexus in
2057  * response to a hotplug event, therefore memory allocations are
2058  * not allowed to sleep.
2059  */
2060 int
ndi_devi_alloc(dev_info_t * parent,const char * node_name,pnode_t nodeid,dev_info_t ** ret_dip)2061 ndi_devi_alloc(dev_info_t *parent, const char *node_name, pnode_t nodeid,
2062     dev_info_t **ret_dip)
2063 {
2064 	ASSERT(node_name != NULL);
2065 	ASSERT(ret_dip != NULL);
2066 
2067 	*ret_dip = i_ddi_alloc_node(parent, node_name, nodeid, -1, NULL,
2068 	    KM_NOSLEEP);
2069 	if (*ret_dip == NULL) {
2070 		return (NDI_NOMEM);
2071 	}
2072 
2073 	return (NDI_SUCCESS);
2074 }
2075 
2076 /*
2077  * Allocate and initialize a new dev_info structure
2078  * This routine may sleep and should not be called at interrupt time
2079  */
2080 void
ndi_devi_alloc_sleep(dev_info_t * parent,const char * node_name,pnode_t nodeid,dev_info_t ** ret_dip)2081 ndi_devi_alloc_sleep(dev_info_t *parent, const char *node_name, pnode_t nodeid,
2082     dev_info_t **ret_dip)
2083 {
2084 	ASSERT(node_name != NULL);
2085 	ASSERT(ret_dip != NULL);
2086 
2087 	*ret_dip = i_ddi_alloc_node(parent, node_name, nodeid, -1, NULL,
2088 	    KM_SLEEP);
2089 	ASSERT(*ret_dip);
2090 }
2091 
2092 /*
2093  * Remove an initialized (but not yet attached) dev_info
2094  * node from it's parent.
2095  */
2096 int
ndi_devi_free(dev_info_t * dip)2097 ndi_devi_free(dev_info_t *dip)
2098 {
2099 	ASSERT(dip != NULL);
2100 
2101 	if (i_ddi_node_state(dip) >= DS_INITIALIZED)
2102 		return (DDI_FAILURE);
2103 
2104 	NDI_CONFIG_DEBUG((CE_CONT, "ndi_devi_free: %s%d (%p)\n",
2105 	    ddi_driver_name(dip), ddi_get_instance(dip), (void *)dip));
2106 
2107 	(void) ddi_remove_child(dip, 0);
2108 
2109 	return (NDI_SUCCESS);
2110 }
2111 
2112 /*
2113  * ndi_devi_bind_driver() binds a driver to a given device. If it fails
2114  * to bind the driver, it returns an appropriate error back. Some drivers
2115  * may want to know if the actually failed to bind.
2116  */
2117 int
ndi_devi_bind_driver(dev_info_t * dip,uint_t flags)2118 ndi_devi_bind_driver(dev_info_t *dip, uint_t flags)
2119 {
2120 	int ret = NDI_FAILURE;
2121 	int circ;
2122 	dev_info_t *pdip = ddi_get_parent(dip);
2123 	ASSERT(pdip);
2124 
2125 	NDI_CONFIG_DEBUG((CE_CONT,
2126 	    "ndi_devi_bind_driver: %s%d (%p) flags: %x\n",
2127 	    ddi_driver_name(dip), ddi_get_instance(dip), (void *)dip, flags));
2128 
2129 	ndi_devi_enter(pdip, &circ);
2130 	if (i_ndi_config_node(dip, DS_BOUND, flags) == DDI_SUCCESS)
2131 		ret = NDI_SUCCESS;
2132 	ndi_devi_exit(pdip, circ);
2133 
2134 	return (ret);
2135 }
2136 
2137 /*
2138  * ndi_devi_unbind_driver: unbind the dip
2139  */
2140 static int
ndi_devi_unbind_driver(dev_info_t * dip)2141 ndi_devi_unbind_driver(dev_info_t *dip)
2142 {
2143 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(dip)));
2144 
2145 	return (i_ndi_unconfig_node(dip, DS_LINKED, 0));
2146 }
2147 
2148 /*
2149  * Misc. help routines called by framework only
2150  */
2151 
2152 /*
2153  * Get the state of node
2154  */
2155 ddi_node_state_t
i_ddi_node_state(dev_info_t * dip)2156 i_ddi_node_state(dev_info_t *dip)
2157 {
2158 	return (DEVI(dip)->devi_node_state);
2159 }
2160 
2161 /*
2162  * Set the state of node
2163  */
2164 void
i_ddi_set_node_state(dev_info_t * dip,ddi_node_state_t state)2165 i_ddi_set_node_state(dev_info_t *dip, ddi_node_state_t state)
2166 {
2167 	DEVI(dip)->devi_node_state = state;
2168 	membar_enter();			/* make sure stores are flushed */
2169 }
2170 
2171 /*
2172  * Determine if node is attached. The implementation accommodates transient
2173  * DS_READY->DS_ATTACHED->DS_READY state changes.  Outside this file, this
2174  * function should be instead of i_ddi_node_state() DS_ATTACHED/DS_READY
2175  * state checks.
2176  */
2177 int
i_ddi_devi_attached(dev_info_t * dip)2178 i_ddi_devi_attached(dev_info_t *dip)
2179 {
2180 	return (DEVI(dip)->devi_node_state >= DS_ATTACHED);
2181 }
2182 
2183 /*
2184  * Common function for finding a node in a sibling list given name and addr.
2185  *
2186  * By default, name is matched with devi_node_name. The following
2187  * alternative match strategies are supported:
2188  *
2189  *	FIND_NODE_BY_NODENAME: Match on node name - typical use.
2190  *
2191  *	FIND_NODE_BY_DRIVER: A match on driver name bound to node is conducted.
2192  *		This support is used for support of OBP generic names and
2193  *		for the conversion from driver names to generic names. When
2194  *		more consistency in the generic name environment is achieved
2195  *		(and not needed for upgrade) this support can be removed.
2196  *
2197  *	FIND_NODE_BY_ADDR: Match on just the addr.
2198  *		This support is only used/needed during boot to match
2199  *		a node bound via a path-based driver alias.
2200  *
2201  * If a child is not named (dev_addr == NULL), there are three
2202  * possible actions:
2203  *
2204  *	(1) skip it
2205  *	(2) FIND_ADDR_BY_INIT: bring child to DS_INITIALIZED state
2206  *	(3) FIND_ADDR_BY_CALLBACK: use a caller-supplied callback function
2207  */
2208 #define	FIND_NODE_BY_NODENAME	0x01
2209 #define	FIND_NODE_BY_DRIVER	0x02
2210 #define	FIND_NODE_BY_ADDR	0x04
2211 #define	FIND_ADDR_BY_INIT	0x10
2212 #define	FIND_ADDR_BY_CALLBACK	0x20
2213 
2214 static dev_info_t *
find_sibling(dev_info_t * head,char * cname,char * caddr,uint_t flag,int (* callback)(dev_info_t *,char *,int))2215 find_sibling(dev_info_t *head, char *cname, char *caddr, uint_t flag,
2216     int (*callback)(dev_info_t *, char *, int))
2217 {
2218 	dev_info_t	*dip;
2219 	char		*addr, *buf;
2220 	major_t		major;
2221 	uint_t		by;
2222 
2223 	/* only one way to find a node */
2224 	by = flag &
2225 	    (FIND_NODE_BY_DRIVER | FIND_NODE_BY_NODENAME | FIND_NODE_BY_ADDR);
2226 	ASSERT(by && BIT_ONLYONESET(by));
2227 
2228 	/* only one way to name a node */
2229 	ASSERT(((flag & FIND_ADDR_BY_INIT) == 0) ||
2230 	    ((flag & FIND_ADDR_BY_CALLBACK) == 0));
2231 
2232 	if (by == FIND_NODE_BY_DRIVER) {
2233 		major = ddi_name_to_major(cname);
2234 		if (major == DDI_MAJOR_T_NONE)
2235 			return (NULL);
2236 	}
2237 
2238 	buf = NULL;
2239 	/* preallocate buffer of naming node by callback */
2240 	if (flag & FIND_ADDR_BY_CALLBACK)
2241 		buf = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2242 
2243 	/*
2244 	 * Walk the child list to find a match
2245 	 */
2246 	if (head == NULL)
2247 		return (NULL);
2248 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(head)));
2249 	for (dip = head; dip; dip = ddi_get_next_sibling(dip)) {
2250 		if (by == FIND_NODE_BY_NODENAME) {
2251 			/* match node name */
2252 			if (strcmp(cname, DEVI(dip)->devi_node_name) != 0)
2253 				continue;
2254 		} else if (by == FIND_NODE_BY_DRIVER) {
2255 			/* match driver major */
2256 			if (DEVI(dip)->devi_major != major)
2257 				continue;
2258 		}
2259 
2260 		if ((addr = DEVI(dip)->devi_addr) == NULL) {
2261 			/* name the child based on the flag */
2262 			if (flag & FIND_ADDR_BY_INIT) {
2263 				if (ddi_initchild(ddi_get_parent(dip), dip)
2264 				    != DDI_SUCCESS)
2265 					continue;
2266 				addr = DEVI(dip)->devi_addr;
2267 			} else if (flag & FIND_ADDR_BY_CALLBACK) {
2268 				if ((callback == NULL) || (callback(
2269 				    dip, buf, MAXNAMELEN) != DDI_SUCCESS))
2270 					continue;
2271 				addr = buf;
2272 			} else {
2273 				continue;	/* skip */
2274 			}
2275 		}
2276 
2277 		/* match addr */
2278 		ASSERT(addr != NULL);
2279 		if (strcmp(caddr, addr) == 0)
2280 			break;	/* node found */
2281 
2282 	}
2283 	if (flag & FIND_ADDR_BY_CALLBACK)
2284 		kmem_free(buf, MAXNAMELEN);
2285 	return (dip);
2286 }
2287 
2288 /*
2289  * Find child of pdip with name: cname@caddr
2290  * Called by init_node() to look for duplicate nodes
2291  */
2292 static dev_info_t *
find_duplicate_child(dev_info_t * pdip,dev_info_t * dip)2293 find_duplicate_child(dev_info_t *pdip, dev_info_t *dip)
2294 {
2295 	dev_info_t *dup;
2296 	char *cname = DEVI(dip)->devi_node_name;
2297 	char *caddr = DEVI(dip)->devi_addr;
2298 
2299 	/* search nodes before dip */
2300 	dup = find_sibling(ddi_get_child(pdip), cname, caddr,
2301 	    FIND_NODE_BY_NODENAME, NULL);
2302 	if (dup != dip)
2303 		return (dup);
2304 
2305 	/*
2306 	 * search nodes after dip; normally this is not needed,
2307 	 */
2308 	return (find_sibling(ddi_get_next_sibling(dip), cname, caddr,
2309 	    FIND_NODE_BY_NODENAME, NULL));
2310 }
2311 
2312 /*
2313  * Find a child of a given name and address, using a callback to name
2314  * unnamed children. cname is the binding name.
2315  */
2316 dev_info_t *
ndi_devi_findchild_by_callback(dev_info_t * pdip,char * dname,char * ua,int (* make_ua)(dev_info_t *,char *,int))2317 ndi_devi_findchild_by_callback(dev_info_t *pdip, char *dname, char *ua,
2318     int (*make_ua)(dev_info_t *, char *, int))
2319 {
2320 	int	by = FIND_ADDR_BY_CALLBACK;
2321 
2322 	ASSERT(DEVI_BUSY_OWNED(pdip));
2323 	by |= dname ? FIND_NODE_BY_DRIVER : FIND_NODE_BY_ADDR;
2324 	return (find_sibling(ddi_get_child(pdip), dname, ua, by, make_ua));
2325 }
2326 
2327 /*
2328  * Find a child of a given name and address, invoking initchild to name
2329  * unnamed children. cname is the node name.
2330  */
2331 static dev_info_t *
find_child_by_name(dev_info_t * pdip,char * cname,char * caddr)2332 find_child_by_name(dev_info_t *pdip, char *cname, char *caddr)
2333 {
2334 	dev_info_t	*dip;
2335 
2336 	/* attempt search without changing state of preceding siblings */
2337 	dip = find_sibling(ddi_get_child(pdip), cname, caddr,
2338 	    FIND_NODE_BY_NODENAME, NULL);
2339 	if (dip)
2340 		return (dip);
2341 
2342 	return (find_sibling(ddi_get_child(pdip), cname, caddr,
2343 	    FIND_NODE_BY_NODENAME|FIND_ADDR_BY_INIT, NULL));
2344 }
2345 
2346 /*
2347  * Find a child of a given name and address, invoking initchild to name
2348  * unnamed children. cname is the node name.
2349  */
2350 static dev_info_t *
find_child_by_driver(dev_info_t * pdip,char * cname,char * caddr)2351 find_child_by_driver(dev_info_t *pdip, char *cname, char *caddr)
2352 {
2353 	dev_info_t	*dip;
2354 
2355 	/* attempt search without changing state of preceding siblings */
2356 	dip = find_sibling(ddi_get_child(pdip), cname, caddr,
2357 	    FIND_NODE_BY_DRIVER, NULL);
2358 	if (dip)
2359 		return (dip);
2360 
2361 	return (find_sibling(ddi_get_child(pdip), cname, caddr,
2362 	    FIND_NODE_BY_DRIVER|FIND_ADDR_BY_INIT, NULL));
2363 }
2364 
2365 /*
2366  * Find a child of a given address, invoking initchild to name
2367  * unnamed children. cname is the node name.
2368  *
2369  * NOTE: This function is only used during boot. One would hope that
2370  * unique sibling unit-addresses on hardware branches of the tree would
2371  * be a requirement to avoid two drivers trying to control the same
2372  * piece of hardware. Unfortunately there are some cases where this
2373  * situation exists (/ssm@0,0/pci@1c,700000 /ssm@0,0/sghsc@1c,700000).
2374  * Until unit-address uniqueness of siblings is guaranteed, use of this
2375  * interface for purposes other than boot should be avoided.
2376  */
2377 static dev_info_t *
find_child_by_addr(dev_info_t * pdip,char * caddr)2378 find_child_by_addr(dev_info_t *pdip, char *caddr)
2379 {
2380 	dev_info_t	*dip;
2381 
2382 	/* return NULL if called without a unit-address */
2383 	if ((caddr == NULL) || (*caddr == '\0'))
2384 		return (NULL);
2385 
2386 	/* attempt search without changing state of preceding siblings */
2387 	dip = find_sibling(ddi_get_child(pdip), NULL, caddr,
2388 	    FIND_NODE_BY_ADDR, NULL);
2389 	if (dip)
2390 		return (dip);
2391 
2392 	return (find_sibling(ddi_get_child(pdip), NULL, caddr,
2393 	    FIND_NODE_BY_ADDR|FIND_ADDR_BY_INIT, NULL));
2394 }
2395 
2396 /*
2397  * Deleting a property list. Take care, since some property structures
2398  * may not be fully built.
2399  */
2400 void
i_ddi_prop_list_delete(ddi_prop_t * prop)2401 i_ddi_prop_list_delete(ddi_prop_t *prop)
2402 {
2403 	while (prop) {
2404 		ddi_prop_t *next = prop->prop_next;
2405 		if (prop->prop_name)
2406 			kmem_free(prop->prop_name, strlen(prop->prop_name) + 1);
2407 		if ((prop->prop_len != 0) && prop->prop_val)
2408 			kmem_free(prop->prop_val, prop->prop_len);
2409 		kmem_free(prop, sizeof (struct ddi_prop));
2410 		prop = next;
2411 	}
2412 }
2413 
2414 /*
2415  * Duplicate property list
2416  */
2417 ddi_prop_t *
i_ddi_prop_list_dup(ddi_prop_t * prop,uint_t flag)2418 i_ddi_prop_list_dup(ddi_prop_t *prop, uint_t flag)
2419 {
2420 	ddi_prop_t *result, *prev, *copy;
2421 
2422 	if (prop == NULL)
2423 		return (NULL);
2424 
2425 	result = prev = NULL;
2426 	for (; prop != NULL; prop = prop->prop_next) {
2427 		ASSERT(prop->prop_name != NULL);
2428 		copy = kmem_zalloc(sizeof (struct ddi_prop), flag);
2429 		if (copy == NULL)
2430 			goto fail;
2431 
2432 		copy->prop_dev = prop->prop_dev;
2433 		copy->prop_flags = prop->prop_flags;
2434 		copy->prop_name = i_ddi_strdup(prop->prop_name, flag);
2435 		if (copy->prop_name == NULL)
2436 			goto fail;
2437 
2438 		if ((copy->prop_len = prop->prop_len) != 0) {
2439 			copy->prop_val = kmem_zalloc(prop->prop_len, flag);
2440 			if (copy->prop_val == NULL)
2441 				goto fail;
2442 
2443 			bcopy(prop->prop_val, copy->prop_val, prop->prop_len);
2444 		}
2445 
2446 		if (prev == NULL)
2447 			result = prev = copy;
2448 		else
2449 			prev->prop_next = copy;
2450 		prev = copy;
2451 	}
2452 	return (result);
2453 
2454 fail:
2455 	i_ddi_prop_list_delete(result);
2456 	return (NULL);
2457 }
2458 
2459 /*
2460  * Create a reference property list, currently used only for
2461  * driver global properties. Created with ref count of 1.
2462  */
2463 ddi_prop_list_t *
i_ddi_prop_list_create(ddi_prop_t * props)2464 i_ddi_prop_list_create(ddi_prop_t *props)
2465 {
2466 	ddi_prop_list_t *list = kmem_alloc(sizeof (*list), KM_SLEEP);
2467 	list->prop_list = props;
2468 	list->prop_ref = 1;
2469 	return (list);
2470 }
2471 
2472 /*
2473  * Increment/decrement reference count. The reference is
2474  * protected by dn_lock. The only interfaces modifying
2475  * dn_global_prop_ptr is in impl_make[free]_parlist().
2476  */
2477 void
i_ddi_prop_list_hold(ddi_prop_list_t * prop_list,struct devnames * dnp)2478 i_ddi_prop_list_hold(ddi_prop_list_t *prop_list, struct devnames *dnp)
2479 {
2480 	ASSERT(prop_list->prop_ref >= 0);
2481 	ASSERT(mutex_owned(&dnp->dn_lock));
2482 	prop_list->prop_ref++;
2483 }
2484 
2485 void
i_ddi_prop_list_rele(ddi_prop_list_t * prop_list,struct devnames * dnp)2486 i_ddi_prop_list_rele(ddi_prop_list_t *prop_list, struct devnames *dnp)
2487 {
2488 	ASSERT(prop_list->prop_ref > 0);
2489 	ASSERT(mutex_owned(&dnp->dn_lock));
2490 	prop_list->prop_ref--;
2491 
2492 	if (prop_list->prop_ref == 0) {
2493 		i_ddi_prop_list_delete(prop_list->prop_list);
2494 		kmem_free(prop_list, sizeof (*prop_list));
2495 	}
2496 }
2497 
2498 /*
2499  * Free table of classes by drivers
2500  */
2501 void
i_ddi_free_exported_classes(char ** classes,int n)2502 i_ddi_free_exported_classes(char **classes, int n)
2503 {
2504 	if ((n == 0) || (classes == NULL))
2505 		return;
2506 
2507 	kmem_free(classes, n * sizeof (char *));
2508 }
2509 
2510 /*
2511  * Get all classes exported by dip
2512  */
2513 int
i_ddi_get_exported_classes(dev_info_t * dip,char *** classes)2514 i_ddi_get_exported_classes(dev_info_t *dip, char ***classes)
2515 {
2516 	extern void lock_hw_class_list();
2517 	extern void unlock_hw_class_list();
2518 	extern int get_class(const char *, char **);
2519 
2520 	static char *rootclass = "root";
2521 	int n = 0, nclass = 0;
2522 	char **buf;
2523 
2524 	ASSERT(i_ddi_node_state(dip) >= DS_BOUND);
2525 
2526 	if (dip == ddi_root_node())	/* rootnode exports class "root" */
2527 		nclass = 1;
2528 	lock_hw_class_list();
2529 	nclass += get_class(ddi_driver_name(dip), NULL);
2530 	if (nclass == 0) {
2531 		unlock_hw_class_list();
2532 		return (0);		/* no class exported */
2533 	}
2534 
2535 	*classes = buf = kmem_alloc(nclass * sizeof (char *), KM_SLEEP);
2536 	if (dip == ddi_root_node()) {
2537 		*buf++ = rootclass;
2538 		n = 1;
2539 	}
2540 	n += get_class(ddi_driver_name(dip), buf);
2541 	unlock_hw_class_list();
2542 
2543 	ASSERT(n == nclass);	/* make sure buf wasn't overrun */
2544 	return (nclass);
2545 }
2546 
2547 /*
2548  * Helper functions, returns NULL if no memory.
2549  */
2550 char *
i_ddi_strdup(const char * str,uint_t flag)2551 i_ddi_strdup(const char *str, uint_t flag)
2552 {
2553 	char *copy;
2554 
2555 	if (str == NULL)
2556 		return (NULL);
2557 
2558 	copy = kmem_alloc(strlen(str) + 1, flag);
2559 	if (copy == NULL)
2560 		return (NULL);
2561 
2562 	(void) strcpy(copy, str);
2563 	return (copy);
2564 }
2565 
2566 /*
2567  * Load driver.conf file for major. Load all if major == -1.
2568  *
2569  * This is called
2570  * - early in boot after devnames array is initialized
2571  * - from vfs code when certain file systems are mounted
2572  * - from add_drv when a new driver is added
2573  */
2574 int
i_ddi_load_drvconf(major_t major)2575 i_ddi_load_drvconf(major_t major)
2576 {
2577 	extern int modrootloaded;
2578 
2579 	major_t low, high, m;
2580 
2581 	if (major == DDI_MAJOR_T_NONE) {
2582 		low = 0;
2583 		high = devcnt - 1;
2584 	} else {
2585 		if (major >= devcnt)
2586 			return (EINVAL);
2587 		low = high = major;
2588 	}
2589 
2590 	for (m = low; m <= high; m++) {
2591 		struct devnames *dnp = &devnamesp[m];
2592 		LOCK_DEV_OPS(&dnp->dn_lock);
2593 		dnp->dn_flags &= ~(DN_DRIVER_HELD|DN_DRIVER_INACTIVE);
2594 		(void) impl_make_parlist(m);
2595 		UNLOCK_DEV_OPS(&dnp->dn_lock);
2596 	}
2597 
2598 	if (modrootloaded) {
2599 		ddi_walk_devs(ddi_root_node(), reset_nexus_flags,
2600 		    (void *)(uintptr_t)major);
2601 	}
2602 
2603 	/* build dn_list from old entries in path_to_inst */
2604 	e_ddi_unorphan_instance_nos();
2605 	return (0);
2606 }
2607 
2608 /*
2609  * Unload a specific driver.conf.
2610  * Don't support unload all because it doesn't make any sense
2611  */
2612 int
i_ddi_unload_drvconf(major_t major)2613 i_ddi_unload_drvconf(major_t major)
2614 {
2615 	int error;
2616 	struct devnames *dnp;
2617 
2618 	if (major >= devcnt)
2619 		return (EINVAL);
2620 
2621 	/*
2622 	 * Take the per-driver lock while unloading driver.conf
2623 	 */
2624 	dnp = &devnamesp[major];
2625 	LOCK_DEV_OPS(&dnp->dn_lock);
2626 	error = impl_free_parlist(major);
2627 	UNLOCK_DEV_OPS(&dnp->dn_lock);
2628 	return (error);
2629 }
2630 
2631 /*
2632  * Merge a .conf node. This is called by nexus drivers to augment
2633  * hw node with properties specified in driver.conf file. This function
2634  * takes a callback routine to name nexus children.
2635  * The parent node must be held busy.
2636  *
2637  * It returns DDI_SUCCESS if the node is merged and DDI_FAILURE otherwise.
2638  */
2639 int
ndi_merge_node(dev_info_t * dip,int (* make_ua)(dev_info_t *,char *,int))2640 ndi_merge_node(dev_info_t *dip, int (*make_ua)(dev_info_t *, char *, int))
2641 {
2642 	dev_info_t *hwdip;
2643 
2644 	ASSERT(ndi_dev_is_persistent_node(dip) == 0);
2645 	ASSERT(ddi_get_name_addr(dip) != NULL);
2646 
2647 	hwdip = ndi_devi_findchild_by_callback(ddi_get_parent(dip),
2648 	    ddi_binding_name(dip), ddi_get_name_addr(dip), make_ua);
2649 
2650 	/*
2651 	 * Look for the hardware node that is the target of the merge;
2652 	 * return failure if not found.
2653 	 */
2654 	if ((hwdip == NULL) || (hwdip == dip)) {
2655 		char *buf = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2656 		NDI_CONFIG_DEBUG((CE_WARN, "No HW node to merge conf node %s",
2657 		    ddi_deviname(dip, buf)));
2658 		kmem_free(buf, MAXNAMELEN);
2659 		return (DDI_FAILURE);
2660 	}
2661 
2662 	/*
2663 	 * Make sure the hardware node is uninitialized and has no property.
2664 	 * This may not be the case if new .conf files are load after some
2665 	 * hardware nodes have already been initialized and attached.
2666 	 *
2667 	 * N.B. We return success here because the node was *intended*
2668 	 *	to be a merge node because there is a hw node with the name.
2669 	 */
2670 	mutex_enter(&DEVI(hwdip)->devi_lock);
2671 	if (ndi_dev_is_persistent_node(hwdip) == 0) {
2672 		char *buf;
2673 		mutex_exit(&DEVI(hwdip)->devi_lock);
2674 
2675 		buf = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2676 		NDI_CONFIG_DEBUG((CE_NOTE, "Duplicate .conf node %s",
2677 		    ddi_deviname(dip, buf)));
2678 		kmem_free(buf, MAXNAMELEN);
2679 		return (DDI_SUCCESS);
2680 	}
2681 
2682 	/*
2683 	 * If it is possible that the hardware has already been touched
2684 	 * then don't merge.
2685 	 */
2686 	if (i_ddi_node_state(hwdip) >= DS_INITIALIZED ||
2687 	    (DEVI(hwdip)->devi_sys_prop_ptr != NULL) ||
2688 	    (DEVI(hwdip)->devi_drv_prop_ptr != NULL)) {
2689 		char *buf;
2690 		mutex_exit(&DEVI(hwdip)->devi_lock);
2691 
2692 		buf = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2693 		NDI_CONFIG_DEBUG((CE_NOTE,
2694 		    "!Cannot merge .conf node %s with hw node %p "
2695 		    "-- not in proper state",
2696 		    ddi_deviname(dip, buf), (void *)hwdip));
2697 		kmem_free(buf, MAXNAMELEN);
2698 		return (DDI_SUCCESS);
2699 	}
2700 
2701 	mutex_enter(&DEVI(dip)->devi_lock);
2702 	DEVI(hwdip)->devi_sys_prop_ptr = DEVI(dip)->devi_sys_prop_ptr;
2703 	DEVI(hwdip)->devi_drv_prop_ptr = DEVI(dip)->devi_drv_prop_ptr;
2704 	DEVI(dip)->devi_sys_prop_ptr = NULL;
2705 	DEVI(dip)->devi_drv_prop_ptr = NULL;
2706 	mutex_exit(&DEVI(dip)->devi_lock);
2707 	mutex_exit(&DEVI(hwdip)->devi_lock);
2708 
2709 	return (DDI_SUCCESS);
2710 }
2711 
2712 /*
2713  * Merge a "wildcard" .conf node. This is called by nexus drivers to
2714  * augment a set of hw node with properties specified in driver.conf file.
2715  * The parent node must be held busy.
2716  *
2717  * There is no failure mode, since the nexus may or may not have child
2718  * node bound the driver specified by the wildcard node.
2719  */
2720 void
ndi_merge_wildcard_node(dev_info_t * dip)2721 ndi_merge_wildcard_node(dev_info_t *dip)
2722 {
2723 	dev_info_t *hwdip;
2724 	dev_info_t *pdip = ddi_get_parent(dip);
2725 	major_t major = ddi_driver_major(dip);
2726 
2727 	/* never attempt to merge a hw node */
2728 	ASSERT(ndi_dev_is_persistent_node(dip) == 0);
2729 	/* must be bound to a driver major number */
2730 	ASSERT(major != DDI_MAJOR_T_NONE);
2731 
2732 	/*
2733 	 * Walk the child list to find all nodes bound to major
2734 	 * and copy properties.
2735 	 */
2736 	mutex_enter(&DEVI(dip)->devi_lock);
2737 	ASSERT(DEVI_BUSY_OWNED(pdip));
2738 	for (hwdip = ddi_get_child(pdip); hwdip;
2739 	    hwdip = ddi_get_next_sibling(hwdip)) {
2740 		/*
2741 		 * Skip nodes not bound to same driver
2742 		 */
2743 		if (ddi_driver_major(hwdip) != major)
2744 			continue;
2745 
2746 		/*
2747 		 * Skip .conf nodes
2748 		 */
2749 		if (ndi_dev_is_persistent_node(hwdip) == 0)
2750 			continue;
2751 
2752 		/*
2753 		 * Make sure the node is uninitialized and has no property.
2754 		 */
2755 		mutex_enter(&DEVI(hwdip)->devi_lock);
2756 		if (i_ddi_node_state(hwdip) >= DS_INITIALIZED ||
2757 		    (DEVI(hwdip)->devi_sys_prop_ptr != NULL) ||
2758 		    (DEVI(hwdip)->devi_drv_prop_ptr != NULL)) {
2759 			mutex_exit(&DEVI(hwdip)->devi_lock);
2760 			NDI_CONFIG_DEBUG((CE_NOTE, "HW node %p state not "
2761 			    "suitable for merging wildcard conf node %s",
2762 			    (void *)hwdip, ddi_node_name(dip)));
2763 			continue;
2764 		}
2765 
2766 		DEVI(hwdip)->devi_sys_prop_ptr =
2767 		    i_ddi_prop_list_dup(DEVI(dip)->devi_sys_prop_ptr, KM_SLEEP);
2768 		DEVI(hwdip)->devi_drv_prop_ptr =
2769 		    i_ddi_prop_list_dup(DEVI(dip)->devi_drv_prop_ptr, KM_SLEEP);
2770 		mutex_exit(&DEVI(hwdip)->devi_lock);
2771 	}
2772 	mutex_exit(&DEVI(dip)->devi_lock);
2773 }
2774 
2775 /*
2776  * Return the major number based on the compatible property. This interface
2777  * may be used in situations where we are trying to detect if a better driver
2778  * now exists for a device, so it must use the 'compatible' property.  If
2779  * a non-NULL formp is specified and the binding was based on compatible then
2780  * return the pointer to the form used in *formp.
2781  */
2782 major_t
ddi_compatible_driver_major(dev_info_t * dip,char ** formp)2783 ddi_compatible_driver_major(dev_info_t *dip, char **formp)
2784 {
2785 	struct dev_info *devi = DEVI(dip);
2786 	void		*compat;
2787 	size_t		len;
2788 	char		*p = NULL;
2789 	major_t		major = DDI_MAJOR_T_NONE;
2790 
2791 	if (formp)
2792 		*formp = NULL;
2793 
2794 	if (ddi_prop_exists(DDI_DEV_T_NONE, dip, DDI_PROP_DONTPASS,
2795 	    "ddi-assigned")) {
2796 		major = ddi_name_to_major("nulldriver");
2797 		return (major);
2798 	}
2799 
2800 	/*
2801 	 * Highest precedence binding is a path-oriented alias. Since this
2802 	 * requires a 'path', this type of binding occurs via more obtuse
2803 	 * 'rebind'. The need for a path-oriented alias 'rebind' is detected
2804 	 * after a successful DDI_CTLOPS_INITCHILD to another driver: this is
2805 	 * is the first point at which the unit-address (or instance) of the
2806 	 * last component of the path is available (even though the path is
2807 	 * bound to the wrong driver at this point).
2808 	 */
2809 	if (devi->devi_flags & DEVI_REBIND) {
2810 		p = devi->devi_rebinding_name;
2811 		major = ddi_name_to_major(p);
2812 		if (driver_active(major)) {
2813 			if (formp)
2814 				*formp = p;
2815 			return (major);
2816 		}
2817 
2818 		/*
2819 		 * If for some reason devi_rebinding_name no longer resolves
2820 		 * to a proper driver then clear DEVI_REBIND.
2821 		 */
2822 		mutex_enter(&devi->devi_lock);
2823 		devi->devi_flags &= ~DEVI_REBIND;
2824 		mutex_exit(&devi->devi_lock);
2825 	}
2826 
2827 	/* look up compatible property */
2828 	(void) lookup_compatible(dip, KM_SLEEP);
2829 	compat = (void *)(devi->devi_compat_names);
2830 	len = devi->devi_compat_length;
2831 
2832 	/* find the highest precedence compatible form with a driver binding */
2833 	while ((p = prom_decode_composite_string(compat, len, p)) != NULL) {
2834 		major = ddi_name_to_major(p);
2835 		if (driver_active(major)) {
2836 			if (formp)
2837 				*formp = p;
2838 			return (major);
2839 		}
2840 	}
2841 
2842 	/*
2843 	 * none of the compatible forms have a driver binding, see if
2844 	 * the node name has a driver binding.
2845 	 */
2846 	major = ddi_name_to_major(ddi_node_name(dip));
2847 	if (driver_active(major))
2848 		return (major);
2849 
2850 	/* no driver */
2851 	return (DDI_MAJOR_T_NONE);
2852 }
2853 
2854 /*
2855  * Static help functions
2856  */
2857 
2858 /*
2859  * lookup the "compatible" property and cache it's contents in the
2860  * device node.
2861  */
2862 static int
lookup_compatible(dev_info_t * dip,uint_t flag)2863 lookup_compatible(dev_info_t *dip, uint_t flag)
2864 {
2865 	int rv;
2866 	int prop_flags;
2867 	uint_t ncompatstrs;
2868 	char **compatstrpp;
2869 	char *di_compat_strp;
2870 	size_t di_compat_strlen;
2871 
2872 	if (DEVI(dip)->devi_compat_names) {
2873 		return (DDI_SUCCESS);
2874 	}
2875 
2876 	prop_flags = DDI_PROP_TYPE_STRING | DDI_PROP_DONTPASS;
2877 
2878 	if (flag & KM_NOSLEEP) {
2879 		prop_flags |= DDI_PROP_DONTSLEEP;
2880 	}
2881 
2882 	if (ndi_dev_is_prom_node(dip) == 0) {
2883 		prop_flags |= DDI_PROP_NOTPROM;
2884 	}
2885 
2886 	rv = ddi_prop_lookup_common(DDI_DEV_T_ANY, dip, prop_flags,
2887 	    "compatible", &compatstrpp, &ncompatstrs,
2888 	    ddi_prop_fm_decode_strings);
2889 
2890 	if (rv == DDI_PROP_NOT_FOUND) {
2891 		return (DDI_SUCCESS);
2892 	}
2893 
2894 	if (rv != DDI_PROP_SUCCESS) {
2895 		return (DDI_FAILURE);
2896 	}
2897 
2898 	/*
2899 	 * encode the compatible property data in the dev_info node
2900 	 */
2901 	rv = DDI_SUCCESS;
2902 	if (ncompatstrs != 0) {
2903 		di_compat_strp = encode_composite_string(compatstrpp,
2904 		    ncompatstrs, &di_compat_strlen, flag);
2905 		if (di_compat_strp != NULL) {
2906 			DEVI(dip)->devi_compat_names = di_compat_strp;
2907 			DEVI(dip)->devi_compat_length = di_compat_strlen;
2908 		} else {
2909 			rv = DDI_FAILURE;
2910 		}
2911 	}
2912 	ddi_prop_free(compatstrpp);
2913 	return (rv);
2914 }
2915 
2916 /*
2917  * Create a composite string from a list of strings.
2918  *
2919  * A composite string consists of a single buffer containing one
2920  * or more NULL terminated strings.
2921  */
2922 static char *
encode_composite_string(char ** strings,uint_t nstrings,size_t * retsz,uint_t flag)2923 encode_composite_string(char **strings, uint_t nstrings, size_t *retsz,
2924     uint_t flag)
2925 {
2926 	uint_t index;
2927 	char  **strpp;
2928 	uint_t slen;
2929 	size_t cbuf_sz = 0;
2930 	char *cbuf_p;
2931 	char *cbuf_ip;
2932 
2933 	if (strings == NULL || nstrings == 0 || retsz == NULL) {
2934 		return (NULL);
2935 	}
2936 
2937 	for (index = 0, strpp = strings; index < nstrings; index++)
2938 		cbuf_sz += strlen(*(strpp++)) + 1;
2939 
2940 	if ((cbuf_p = kmem_alloc(cbuf_sz, flag)) == NULL) {
2941 		cmn_err(CE_NOTE,
2942 		    "?failed to allocate device node compatstr");
2943 		return (NULL);
2944 	}
2945 
2946 	cbuf_ip = cbuf_p;
2947 	for (index = 0, strpp = strings; index < nstrings; index++) {
2948 		slen = strlen(*strpp);
2949 		bcopy(*(strpp++), cbuf_ip, slen);
2950 		cbuf_ip += slen;
2951 		*(cbuf_ip++) = '\0';
2952 	}
2953 
2954 	*retsz = cbuf_sz;
2955 	return (cbuf_p);
2956 }
2957 
2958 static void
link_to_driver_list(dev_info_t * dip)2959 link_to_driver_list(dev_info_t *dip)
2960 {
2961 	major_t major = DEVI(dip)->devi_major;
2962 	struct devnames *dnp;
2963 
2964 	ASSERT(major != DDI_MAJOR_T_NONE);
2965 
2966 	/*
2967 	 * Remove from orphan list
2968 	 */
2969 	if (ndi_dev_is_persistent_node(dip)) {
2970 		dnp = &orphanlist;
2971 		remove_from_dn_list(dnp, dip);
2972 	}
2973 
2974 	/*
2975 	 * Add to per driver list
2976 	 */
2977 	dnp = &devnamesp[major];
2978 	add_to_dn_list(dnp, dip);
2979 }
2980 
2981 static void
unlink_from_driver_list(dev_info_t * dip)2982 unlink_from_driver_list(dev_info_t *dip)
2983 {
2984 	major_t major = DEVI(dip)->devi_major;
2985 	struct devnames *dnp;
2986 
2987 	ASSERT(major != DDI_MAJOR_T_NONE);
2988 
2989 	/*
2990 	 * Remove from per-driver list
2991 	 */
2992 	dnp = &devnamesp[major];
2993 	remove_from_dn_list(dnp, dip);
2994 
2995 	/*
2996 	 * Add to orphan list
2997 	 */
2998 	if (ndi_dev_is_persistent_node(dip)) {
2999 		dnp = &orphanlist;
3000 		add_to_dn_list(dnp, dip);
3001 	}
3002 }
3003 
3004 /*
3005  * scan the per-driver list looking for dev_info "dip"
3006  */
3007 static dev_info_t *
in_dn_list(struct devnames * dnp,dev_info_t * dip)3008 in_dn_list(struct devnames *dnp, dev_info_t *dip)
3009 {
3010 	struct dev_info *idevi;
3011 
3012 	if ((idevi = DEVI(dnp->dn_head)) == NULL)
3013 		return (NULL);
3014 
3015 	while (idevi) {
3016 		if (idevi == DEVI(dip))
3017 			return (dip);
3018 		idevi = idevi->devi_next;
3019 	}
3020 	return (NULL);
3021 }
3022 
3023 /*
3024  * insert devinfo node 'dip' into the per-driver instance list
3025  * headed by 'dnp'
3026  *
3027  * Nodes on the per-driver list are ordered: HW - SID - PSEUDO.  The order is
3028  * required for merging of .conf file data to work properly.
3029  */
3030 static void
add_to_ordered_dn_list(struct devnames * dnp,dev_info_t * dip)3031 add_to_ordered_dn_list(struct devnames *dnp, dev_info_t *dip)
3032 {
3033 	dev_info_t **dipp;
3034 
3035 	ASSERT(mutex_owned(&(dnp->dn_lock)));
3036 
3037 	dipp = &dnp->dn_head;
3038 	if (ndi_dev_is_prom_node(dip)) {
3039 		/*
3040 		 * Find the first non-prom node or end of list
3041 		 */
3042 		while (*dipp && (ndi_dev_is_prom_node(*dipp) != 0)) {
3043 			dipp = (dev_info_t **)&DEVI(*dipp)->devi_next;
3044 		}
3045 	} else if (ndi_dev_is_persistent_node(dip)) {
3046 		/*
3047 		 * Find the first non-persistent node
3048 		 */
3049 		while (*dipp && (ndi_dev_is_persistent_node(*dipp) != 0)) {
3050 			dipp = (dev_info_t **)&DEVI(*dipp)->devi_next;
3051 		}
3052 	} else {
3053 		/*
3054 		 * Find the end of the list
3055 		 */
3056 		while (*dipp) {
3057 			dipp = (dev_info_t **)&DEVI(*dipp)->devi_next;
3058 		}
3059 	}
3060 
3061 	DEVI(dip)->devi_next = DEVI(*dipp);
3062 	*dipp = dip;
3063 }
3064 
3065 /*
3066  * add a list of device nodes to the device node list in the
3067  * devnames structure
3068  */
3069 static void
add_to_dn_list(struct devnames * dnp,dev_info_t * dip)3070 add_to_dn_list(struct devnames *dnp, dev_info_t *dip)
3071 {
3072 	/*
3073 	 * Look to see if node already exists
3074 	 */
3075 	LOCK_DEV_OPS(&(dnp->dn_lock));
3076 	if (in_dn_list(dnp, dip)) {
3077 		cmn_err(CE_NOTE, "add_to_dn_list: node %s already in list",
3078 		    DEVI(dip)->devi_node_name);
3079 	} else {
3080 		add_to_ordered_dn_list(dnp, dip);
3081 	}
3082 	UNLOCK_DEV_OPS(&(dnp->dn_lock));
3083 }
3084 
3085 static void
remove_from_dn_list(struct devnames * dnp,dev_info_t * dip)3086 remove_from_dn_list(struct devnames *dnp, dev_info_t *dip)
3087 {
3088 	dev_info_t **plist;
3089 
3090 	LOCK_DEV_OPS(&(dnp->dn_lock));
3091 
3092 	plist = (dev_info_t **)&dnp->dn_head;
3093 	while (*plist && (*plist != dip)) {
3094 		plist = (dev_info_t **)&DEVI(*plist)->devi_next;
3095 	}
3096 
3097 	if (*plist != NULL) {
3098 		ASSERT(*plist == dip);
3099 		*plist = (dev_info_t *)(DEVI(dip)->devi_next);
3100 		DEVI(dip)->devi_next = NULL;
3101 	} else {
3102 		NDI_CONFIG_DEBUG((CE_NOTE,
3103 		    "remove_from_dn_list: node %s not found in list",
3104 		    DEVI(dip)->devi_node_name));
3105 	}
3106 
3107 	UNLOCK_DEV_OPS(&(dnp->dn_lock));
3108 }
3109 
3110 /*
3111  * Add and remove reference driver global property list
3112  */
3113 static void
add_global_props(dev_info_t * dip)3114 add_global_props(dev_info_t *dip)
3115 {
3116 	struct devnames *dnp;
3117 	ddi_prop_list_t *plist;
3118 
3119 	ASSERT(DEVI(dip)->devi_global_prop_list == NULL);
3120 	ASSERT(DEVI(dip)->devi_major != DDI_MAJOR_T_NONE);
3121 
3122 	dnp = &devnamesp[DEVI(dip)->devi_major];
3123 	LOCK_DEV_OPS(&dnp->dn_lock);
3124 	plist = dnp->dn_global_prop_ptr;
3125 	if (plist == NULL) {
3126 		UNLOCK_DEV_OPS(&dnp->dn_lock);
3127 		return;
3128 	}
3129 	i_ddi_prop_list_hold(plist, dnp);
3130 	UNLOCK_DEV_OPS(&dnp->dn_lock);
3131 
3132 	mutex_enter(&DEVI(dip)->devi_lock);
3133 	DEVI(dip)->devi_global_prop_list = plist;
3134 	mutex_exit(&DEVI(dip)->devi_lock);
3135 }
3136 
3137 static void
remove_global_props(dev_info_t * dip)3138 remove_global_props(dev_info_t *dip)
3139 {
3140 	ddi_prop_list_t *proplist;
3141 
3142 	mutex_enter(&DEVI(dip)->devi_lock);
3143 	proplist = DEVI(dip)->devi_global_prop_list;
3144 	DEVI(dip)->devi_global_prop_list = NULL;
3145 	mutex_exit(&DEVI(dip)->devi_lock);
3146 
3147 	if (proplist) {
3148 		major_t major;
3149 		struct devnames *dnp;
3150 
3151 		major = ddi_driver_major(dip);
3152 		ASSERT(major != DDI_MAJOR_T_NONE);
3153 		dnp = &devnamesp[major];
3154 		LOCK_DEV_OPS(&dnp->dn_lock);
3155 		i_ddi_prop_list_rele(proplist, dnp);
3156 		UNLOCK_DEV_OPS(&dnp->dn_lock);
3157 	}
3158 }
3159 
3160 #ifdef DEBUG
3161 /*
3162  * Set this variable to '0' to disable the optimization,
3163  * and to 2 to print debug message.
3164  */
3165 static int optimize_dtree = 1;
3166 
3167 static void
debug_dtree(dev_info_t * devi,struct dev_info * adevi,char * service)3168 debug_dtree(dev_info_t *devi, struct dev_info *adevi, char *service)
3169 {
3170 	char *adeviname, *buf;
3171 
3172 	/*
3173 	 * Don't print unless optimize dtree is set to 2+
3174 	 */
3175 	if (optimize_dtree <= 1)
3176 		return;
3177 
3178 	buf = kmem_alloc(MAXNAMELEN, KM_SLEEP);
3179 	adeviname = ddi_deviname((dev_info_t *)adevi, buf);
3180 	if (*adeviname == '\0')
3181 		adeviname = "root";
3182 
3183 	cmn_err(CE_CONT, "%s %s -> %s\n",
3184 	    ddi_deviname(devi, buf), service, adeviname);
3185 
3186 	kmem_free(buf, MAXNAMELEN);
3187 }
3188 #else /* DEBUG */
3189 #define	debug_dtree(a1, a2, a3)	 /* nothing */
3190 #endif	/* DEBUG */
3191 
3192 static void
ddi_optimize_dtree(dev_info_t * devi)3193 ddi_optimize_dtree(dev_info_t *devi)
3194 {
3195 	struct dev_info *pdevi;
3196 	struct bus_ops *b;
3197 
3198 	pdevi = DEVI(devi)->devi_parent;
3199 	ASSERT(pdevi);
3200 
3201 	/*
3202 	 * Set the unoptimized values
3203 	 */
3204 	DEVI(devi)->devi_bus_map_fault = pdevi;
3205 	DEVI(devi)->devi_bus_dma_allochdl = pdevi;
3206 	DEVI(devi)->devi_bus_dma_freehdl = pdevi;
3207 	DEVI(devi)->devi_bus_dma_bindhdl = pdevi;
3208 	DEVI(devi)->devi_bus_dma_bindfunc =
3209 	    pdevi->devi_ops->devo_bus_ops->bus_dma_bindhdl;
3210 	DEVI(devi)->devi_bus_dma_unbindhdl = pdevi;
3211 	DEVI(devi)->devi_bus_dma_unbindfunc =
3212 	    pdevi->devi_ops->devo_bus_ops->bus_dma_unbindhdl;
3213 	DEVI(devi)->devi_bus_dma_flush = pdevi;
3214 	DEVI(devi)->devi_bus_dma_win = pdevi;
3215 	DEVI(devi)->devi_bus_dma_ctl = pdevi;
3216 	DEVI(devi)->devi_bus_ctl = pdevi;
3217 
3218 #ifdef DEBUG
3219 	if (optimize_dtree == 0)
3220 		return;
3221 #endif /* DEBUG */
3222 
3223 	b = pdevi->devi_ops->devo_bus_ops;
3224 
3225 	if (i_ddi_map_fault == b->bus_map_fault) {
3226 		DEVI(devi)->devi_bus_map_fault = pdevi->devi_bus_map_fault;
3227 		debug_dtree(devi, DEVI(devi)->devi_bus_map_fault,
3228 		    "bus_map_fault");
3229 	}
3230 
3231 	if (ddi_dma_allochdl == b->bus_dma_allochdl) {
3232 		DEVI(devi)->devi_bus_dma_allochdl =
3233 		    pdevi->devi_bus_dma_allochdl;
3234 		debug_dtree(devi, DEVI(devi)->devi_bus_dma_allochdl,
3235 		    "bus_dma_allochdl");
3236 	}
3237 
3238 	if (ddi_dma_freehdl == b->bus_dma_freehdl) {
3239 		DEVI(devi)->devi_bus_dma_freehdl = pdevi->devi_bus_dma_freehdl;
3240 		debug_dtree(devi, DEVI(devi)->devi_bus_dma_freehdl,
3241 		    "bus_dma_freehdl");
3242 	}
3243 
3244 	if (ddi_dma_bindhdl == b->bus_dma_bindhdl) {
3245 		DEVI(devi)->devi_bus_dma_bindhdl = pdevi->devi_bus_dma_bindhdl;
3246 		DEVI(devi)->devi_bus_dma_bindfunc =
3247 		    pdevi->devi_bus_dma_bindhdl->devi_ops->
3248 		    devo_bus_ops->bus_dma_bindhdl;
3249 		debug_dtree(devi, DEVI(devi)->devi_bus_dma_bindhdl,
3250 		    "bus_dma_bindhdl");
3251 	}
3252 
3253 	if (ddi_dma_unbindhdl == b->bus_dma_unbindhdl) {
3254 		DEVI(devi)->devi_bus_dma_unbindhdl =
3255 		    pdevi->devi_bus_dma_unbindhdl;
3256 		DEVI(devi)->devi_bus_dma_unbindfunc =
3257 		    pdevi->devi_bus_dma_unbindhdl->devi_ops->
3258 		    devo_bus_ops->bus_dma_unbindhdl;
3259 		debug_dtree(devi, DEVI(devi)->devi_bus_dma_unbindhdl,
3260 		    "bus_dma_unbindhdl");
3261 	}
3262 
3263 	if (ddi_dma_flush == b->bus_dma_flush) {
3264 		DEVI(devi)->devi_bus_dma_flush = pdevi->devi_bus_dma_flush;
3265 		debug_dtree(devi, DEVI(devi)->devi_bus_dma_flush,
3266 		    "bus_dma_flush");
3267 	}
3268 
3269 	if (ddi_dma_win == b->bus_dma_win) {
3270 		DEVI(devi)->devi_bus_dma_win = pdevi->devi_bus_dma_win;
3271 		debug_dtree(devi, DEVI(devi)->devi_bus_dma_win,
3272 		    "bus_dma_win");
3273 	}
3274 
3275 	if (ddi_dma_mctl == b->bus_dma_ctl) {
3276 		DEVI(devi)->devi_bus_dma_ctl = pdevi->devi_bus_dma_ctl;
3277 		debug_dtree(devi, DEVI(devi)->devi_bus_dma_ctl, "bus_dma_ctl");
3278 	}
3279 
3280 	if (ddi_ctlops == b->bus_ctl) {
3281 		DEVI(devi)->devi_bus_ctl = pdevi->devi_bus_ctl;
3282 		debug_dtree(devi, DEVI(devi)->devi_bus_ctl, "bus_ctl");
3283 	}
3284 }
3285 
3286 #define	MIN_DEVINFO_LOG_SIZE	max_ncpus
3287 #define	MAX_DEVINFO_LOG_SIZE	max_ncpus * 10
3288 
3289 static void
da_log_init()3290 da_log_init()
3291 {
3292 	devinfo_log_header_t *dh;
3293 	int logsize = devinfo_log_size;
3294 
3295 	if (logsize == 0)
3296 		logsize = MIN_DEVINFO_LOG_SIZE;
3297 	else if (logsize > MAX_DEVINFO_LOG_SIZE)
3298 		logsize = MAX_DEVINFO_LOG_SIZE;
3299 
3300 	dh = kmem_alloc(logsize * PAGESIZE, KM_SLEEP);
3301 	mutex_init(&dh->dh_lock, NULL, MUTEX_DEFAULT, NULL);
3302 	dh->dh_max = ((logsize * PAGESIZE) - sizeof (*dh)) /
3303 	    sizeof (devinfo_audit_t) + 1;
3304 	dh->dh_curr = -1;
3305 	dh->dh_hits = 0;
3306 
3307 	devinfo_audit_log = dh;
3308 }
3309 
3310 /*
3311  * Log the stack trace in per-devinfo audit structure and also enter
3312  * it into a system wide log for recording the time history.
3313  */
3314 static void
da_log_enter(dev_info_t * dip)3315 da_log_enter(dev_info_t *dip)
3316 {
3317 	devinfo_audit_t *da_log, *da = DEVI(dip)->devi_audit;
3318 	devinfo_log_header_t *dh = devinfo_audit_log;
3319 
3320 	if (devinfo_audit_log == NULL)
3321 		return;
3322 
3323 	ASSERT(da != NULL);
3324 
3325 	da->da_devinfo = dip;
3326 	da->da_timestamp = gethrtime();
3327 	da->da_thread = curthread;
3328 	da->da_node_state = DEVI(dip)->devi_node_state;
3329 	da->da_device_state = DEVI(dip)->devi_state;
3330 	da->da_depth = getpcstack(da->da_stack, DDI_STACK_DEPTH);
3331 
3332 	/*
3333 	 * Copy into common log and note the location for tracing history
3334 	 */
3335 	mutex_enter(&dh->dh_lock);
3336 	dh->dh_hits++;
3337 	dh->dh_curr++;
3338 	if (dh->dh_curr >= dh->dh_max)
3339 		dh->dh_curr -= dh->dh_max;
3340 	da_log = &dh->dh_entry[dh->dh_curr];
3341 	mutex_exit(&dh->dh_lock);
3342 
3343 	bcopy(da, da_log, sizeof (devinfo_audit_t));
3344 	da->da_lastlog = da_log;
3345 }
3346 
3347 static void
attach_drivers()3348 attach_drivers()
3349 {
3350 	int i;
3351 	for (i = 0; i < devcnt; i++) {
3352 		struct devnames *dnp = &devnamesp[i];
3353 		if ((dnp->dn_flags & DN_FORCE_ATTACH) &&
3354 		    (ddi_hold_installed_driver((major_t)i) != NULL))
3355 			ddi_rele_driver((major_t)i);
3356 	}
3357 }
3358 
3359 /*
3360  * Launch a thread to force attach drivers. This avoids penalty on boot time.
3361  */
3362 void
i_ddi_forceattach_drivers()3363 i_ddi_forceattach_drivers()
3364 {
3365 
3366 	/*
3367 	 * Attach IB VHCI driver before the force-attach thread attaches the
3368 	 * IB HCA driver. IB HCA driver will fail if IB Nexus has not yet
3369 	 * been attached.
3370 	 */
3371 	(void) ddi_hold_installed_driver(ddi_name_to_major("ib"));
3372 
3373 	(void) thread_create(NULL, 0, (void (*)())attach_drivers, NULL, 0, &p0,
3374 	    TS_RUN, minclsyspri);
3375 }
3376 
3377 /*
3378  * This is a private DDI interface for optimizing boot performance.
3379  * I/O subsystem initialization is considered complete when devfsadm
3380  * is executed.
3381  *
3382  * NOTE: The start of syseventd happens to be a convenient indicator
3383  *	of the completion of I/O initialization during boot.
3384  *	The implementation should be replaced by something more robust.
3385  */
3386 int
i_ddi_io_initialized()3387 i_ddi_io_initialized()
3388 {
3389 	extern int sysevent_daemon_init;
3390 	return (sysevent_daemon_init);
3391 }
3392 
3393 /*
3394  * May be used to determine system boot state
3395  * "Available" means the system is for the most part up
3396  * and initialized, with all system services either up or
3397  * capable of being started.  This state is set by devfsadm
3398  * during the boot process.  The /dev filesystem infers
3399  * from this when implicit reconfig can be performed,
3400  * ie, devfsadm can be invoked.  Please avoid making
3401  * further use of this unless it's really necessary.
3402  */
3403 int
i_ddi_sysavail()3404 i_ddi_sysavail()
3405 {
3406 	return (devname_state & DS_SYSAVAIL);
3407 }
3408 
3409 /*
3410  * May be used to determine if boot is a reconfigure boot.
3411  */
3412 int
i_ddi_reconfig()3413 i_ddi_reconfig()
3414 {
3415 	return (devname_state & DS_RECONFIG);
3416 }
3417 
3418 /*
3419  * Note system services are up, inform /dev.
3420  */
3421 void
i_ddi_set_sysavail()3422 i_ddi_set_sysavail()
3423 {
3424 	if ((devname_state & DS_SYSAVAIL) == 0) {
3425 		devname_state |= DS_SYSAVAIL;
3426 		sdev_devstate_change();
3427 	}
3428 }
3429 
3430 /*
3431  * Note reconfiguration boot, inform /dev.
3432  */
3433 void
i_ddi_set_reconfig()3434 i_ddi_set_reconfig()
3435 {
3436 	if ((devname_state & DS_RECONFIG) == 0) {
3437 		devname_state |= DS_RECONFIG;
3438 		sdev_devstate_change();
3439 	}
3440 }
3441 
3442 
3443 /*
3444  * device tree walking
3445  */
3446 
3447 struct walk_elem {
3448 	struct walk_elem *next;
3449 	dev_info_t *dip;
3450 };
3451 
3452 static void
free_list(struct walk_elem * list)3453 free_list(struct walk_elem *list)
3454 {
3455 	while (list) {
3456 		struct walk_elem *next = list->next;
3457 		kmem_free(list, sizeof (*list));
3458 		list = next;
3459 	}
3460 }
3461 
3462 static void
append_node(struct walk_elem ** list,dev_info_t * dip)3463 append_node(struct walk_elem **list, dev_info_t *dip)
3464 {
3465 	struct walk_elem *tail;
3466 	struct walk_elem *elem = kmem_alloc(sizeof (*elem), KM_SLEEP);
3467 
3468 	elem->next = NULL;
3469 	elem->dip = dip;
3470 
3471 	if (*list == NULL) {
3472 		*list = elem;
3473 		return;
3474 	}
3475 
3476 	tail = *list;
3477 	while (tail->next)
3478 		tail = tail->next;
3479 
3480 	tail->next = elem;
3481 }
3482 
3483 /*
3484  * The implementation of ddi_walk_devs().
3485  */
3486 static int
walk_devs(dev_info_t * dip,int (* f)(dev_info_t *,void *),void * arg,int do_locking)3487 walk_devs(dev_info_t *dip, int (*f)(dev_info_t *, void *), void *arg,
3488     int do_locking)
3489 {
3490 	struct walk_elem *head = NULL;
3491 
3492 	/*
3493 	 * Do it in two passes. First pass invoke callback on each
3494 	 * dip on the sibling list. Second pass invoke callback on
3495 	 * children of each dip.
3496 	 */
3497 	while (dip) {
3498 		switch ((*f)(dip, arg)) {
3499 		case DDI_WALK_TERMINATE:
3500 			free_list(head);
3501 			return (DDI_WALK_TERMINATE);
3502 
3503 		case DDI_WALK_PRUNESIB:
3504 			/* ignore sibling by setting dip to NULL */
3505 			append_node(&head, dip);
3506 			dip = NULL;
3507 			break;
3508 
3509 		case DDI_WALK_PRUNECHILD:
3510 			/* don't worry about children */
3511 			dip = ddi_get_next_sibling(dip);
3512