xref: /illumos-gate/usr/src/uts/common/xen/os/xvdi.c (revision 7eea693d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Xen virtual device driver interfaces
29  */
30 
31 /*
32  * todo:
33  * + name space clean up:
34  *	xvdi_* - public xen interfaces, for use by all leaf drivers
35  *	xd_* - public xen data structures
36  *	i_xvdi_* - implementation private functions
37  *	xendev_* - xendev driver interfaces, both internal and in cb_ops/bus_ops
38  * + add mdb dcmds to dump ring status
39  * + implement xvdi_xxx to wrap xenbus_xxx read/write function
40  * + convert (xendev_ring_t *) into xvdi_ring_handle_t
41  */
42 #include <sys/conf.h>
43 #include <sys/param.h>
44 #include <sys/kmem.h>
45 #include <vm/seg_kmem.h>
46 #include <sys/debug.h>
47 #include <sys/modctl.h>
48 #include <sys/autoconf.h>
49 #include <sys/ddi_impldefs.h>
50 #include <sys/ddi_subrdefs.h>
51 #include <sys/ddi.h>
52 #include <sys/sunddi.h>
53 #include <sys/sunndi.h>
54 #include <sys/sunldi.h>
55 #include <sys/fs/dv_node.h>
56 #include <sys/avintr.h>
57 #include <sys/psm.h>
58 #include <sys/spl.h>
59 #include <sys/promif.h>
60 #include <sys/list.h>
61 #include <sys/bootconf.h>
62 #include <sys/bootsvcs.h>
63 #include <sys/bootinfo.h>
64 #include <sys/note.h>
65 #ifdef XPV_HVM_DRIVER
66 #include <sys/xpv_support.h>
67 #include <sys/hypervisor.h>
68 #include <public/grant_table.h>
69 #include <public/xen.h>
70 #include <public/io/xenbus.h>
71 #include <public/io/xs_wire.h>
72 #include <public/event_channel.h>
73 #include <public/io/xenbus.h>
74 #else /* XPV_HVM_DRIVER */
75 #include <sys/hypervisor.h>
76 #include <sys/xen_mmu.h>
77 #include <xen/sys/xenbus_impl.h>
78 #include <sys/evtchn_impl.h>
79 #endif /* XPV_HVM_DRIVER */
80 #include <sys/gnttab.h>
81 #include <xen/sys/xendev.h>
82 #include <vm/hat_i86.h>
83 #include <sys/scsi/generic/inquiry.h>
84 #include <util/sscanf.h>
85 #include <xen/public/io/xs_wire.h>
86 
87 
88 #define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
89 #define	isxdigit(ch)	(isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
90 			((ch) >= 'A' && (ch) <= 'F'))
91 
92 static void xvdi_ring_init_sring(xendev_ring_t *);
93 static void xvdi_ring_init_front_ring(xendev_ring_t *, size_t, size_t);
94 #ifndef XPV_HVM_DRIVER
95 static void xvdi_ring_init_back_ring(xendev_ring_t *, size_t, size_t);
96 #endif
97 static void xvdi_reinit_ring(dev_info_t *, grant_ref_t *, xendev_ring_t *);
98 
99 static int i_xvdi_add_watches(dev_info_t *);
100 static void i_xvdi_rem_watches(dev_info_t *);
101 
102 static int i_xvdi_add_watch_oestate(dev_info_t *);
103 static void i_xvdi_rem_watch_oestate(dev_info_t *);
104 static void i_xvdi_oestate_cb(struct xenbus_device *, XenbusState);
105 static void i_xvdi_oestate_handler(void *);
106 
107 static int i_xvdi_add_watch_hpstate(dev_info_t *);
108 static void i_xvdi_rem_watch_hpstate(dev_info_t *);
109 static void i_xvdi_hpstate_cb(struct xenbus_watch *, const char **,
110     unsigned int);
111 static void i_xvdi_hpstate_handler(void *);
112 
113 static int i_xvdi_add_watch_bepath(dev_info_t *);
114 static void i_xvdi_rem_watch_bepath(dev_info_t *);
115 static void i_xvdi_bepath_cb(struct xenbus_watch *, const char **,
116     unsigned in);
117 
118 static void xendev_offline_device(void *);
119 
120 static void i_xvdi_probe_path_cb(struct xenbus_watch *, const char **,
121     unsigned int);
122 static void i_xvdi_probe_path_handler(void *);
123 
124 typedef struct oestate_evt {
125 	dev_info_t *dip;
126 	XenbusState state;
127 } i_oestate_evt_t;
128 
129 typedef struct xd_cfg {
130 	xendev_devclass_t devclass;
131 	char *xsdev;
132 	char *xs_path_fe;
133 	char *xs_path_be;
134 	char *node_fe;
135 	char *node_be;
136 	char *device_type;
137 	int xd_ipl;
138 	int flags;
139 } i_xd_cfg_t;
140 
141 #define	XD_DOM_ZERO	0x01	/* dom0 only. */
142 #define	XD_DOM_GUEST	0x02	/* Guest domains (i.e. non-dom0). */
143 #define	XD_DOM_IO	0x04	/* IO domains. */
144 
145 #define	XD_DOM_ALL	(XD_DOM_ZERO | XD_DOM_GUEST)
146 
147 static i_xd_cfg_t xdci[] = {
148 	{ XEN_CONSOLE, NULL, NULL, NULL, "xencons", NULL,
149 	    "console", IPL_CONS, XD_DOM_ALL, },
150 
151 	{ XEN_VNET, "vif", "device/vif", "backend/vif", "xnf", "xnb",
152 	    "network", IPL_VIF, XD_DOM_ALL, },
153 
154 	{ XEN_VBLK, "vbd", "device/vbd", "backend/vbd", "xdf", "xdb",
155 	    "block", IPL_VBD, XD_DOM_ALL, },
156 
157 	{ XEN_BLKTAP, "tap", NULL, "backend/tap", NULL, "xpvtap",
158 	    "block", IPL_VBD, XD_DOM_ALL, },
159 
160 	{ XEN_XENBUS, NULL, NULL, NULL, "xenbus", NULL,
161 	    NULL, 0, XD_DOM_ALL, },
162 
163 	{ XEN_DOMCAPS, NULL, NULL, NULL, "domcaps", NULL,
164 	    NULL, 0, XD_DOM_ALL, },
165 
166 	{ XEN_BALLOON, NULL, NULL, NULL, "balloon", NULL,
167 	    NULL, 0, XD_DOM_ALL, },
168 
169 	{ XEN_EVTCHN, NULL, NULL, NULL, "evtchn", NULL,
170 	    NULL, 0, XD_DOM_ZERO, },
171 
172 	{ XEN_PRIVCMD, NULL, NULL, NULL, "privcmd", NULL,
173 	    NULL, 0, XD_DOM_ZERO, },
174 };
175 #define	NXDC	(sizeof (xdci) / sizeof (xdci[0]))
176 
177 static void i_xvdi_enum_fe(dev_info_t *, i_xd_cfg_t *);
178 static void i_xvdi_enum_be(dev_info_t *, i_xd_cfg_t *);
179 static void i_xvdi_enum_worker(dev_info_t *, i_xd_cfg_t *, char *);
180 
181 /*
182  * Xen device channel device access and DMA attributes
183  */
184 static ddi_device_acc_attr_t xendev_dc_accattr = {
185 	DDI_DEVICE_ATTR_V0, DDI_NEVERSWAP_ACC, DDI_STRICTORDER_ACC
186 };
187 
188 static ddi_dma_attr_t xendev_dc_dmaattr = {
189 	DMA_ATTR_V0,		/* version of this structure */
190 	0,			/* lowest usable address */
191 	0xffffffffffffffffULL,	/* highest usable address */
192 	0x7fffffff,		/* maximum DMAable byte count */
193 	MMU_PAGESIZE,		/* alignment in bytes */
194 	0x7ff,			/* bitmap of burst sizes */
195 	1,			/* minimum transfer */
196 	0xffffffffU,		/* maximum transfer */
197 	0xffffffffffffffffULL,	/* maximum segment length */
198 	1,			/* maximum number of segments */
199 	1,			/* granularity */
200 	0,			/* flags (reserved) */
201 };
202 
203 static dev_info_t *xendev_dip = NULL;
204 
205 #define	XVDI_DBG_STATE	0x01
206 #define	XVDI_DBG_PROBE	0x02
207 
208 #ifdef DEBUG
209 int i_xvdi_debug = 0;
210 
211 #define	XVDI_DPRINTF(flag, format, ...)			\
212 {							\
213 	if (i_xvdi_debug & (flag))			\
214 		prom_printf((format), __VA_ARGS__);	\
215 }
216 #else
217 #define	XVDI_DPRINTF(flag, format, ...)
218 #endif /* DEBUG */
219 
220 static i_xd_cfg_t *
221 i_xvdi_devclass2cfg(xendev_devclass_t devclass)
222 {
223 	i_xd_cfg_t *xdcp;
224 	int i;
225 
226 	for (i = 0, xdcp = xdci; i < NXDC; i++, xdcp++)
227 		if (xdcp->devclass == devclass)
228 			return (xdcp);
229 
230 	return (NULL);
231 }
232 
233 int
234 xvdi_init_dev(dev_info_t *dip)
235 {
236 	xendev_devclass_t devcls;
237 	int vdevnum;
238 	domid_t domid;
239 	struct xendev_ppd *pdp;
240 	i_xd_cfg_t *xdcp;
241 	boolean_t backend;
242 	char xsnamebuf[TYPICALMAXPATHLEN];
243 	char *xsname;
244 	void *prop_str;
245 	unsigned int prop_len;
246 	char unitaddr[8];
247 
248 	devcls = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
249 	    DDI_PROP_DONTPASS, "devclass", XEN_INVAL);
250 	vdevnum = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
251 	    DDI_PROP_DONTPASS, "vdev", VDEV_NOXS);
252 	domid = (domid_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
253 	    DDI_PROP_DONTPASS, "domain", DOMID_SELF);
254 
255 	backend = (domid != DOMID_SELF);
256 	xdcp = i_xvdi_devclass2cfg(devcls);
257 	if (xdcp->device_type != NULL)
258 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
259 		    "device_type", xdcp->device_type);
260 
261 	pdp = kmem_zalloc(sizeof (*pdp), KM_SLEEP);
262 	pdp->xd_domain = domid;
263 	pdp->xd_vdevnum = vdevnum;
264 	pdp->xd_devclass = devcls;
265 	pdp->xd_evtchn = INVALID_EVTCHN;
266 	mutex_init(&pdp->xd_evt_lk, NULL, MUTEX_DRIVER, NULL);
267 	mutex_init(&pdp->xd_ndi_lk, NULL, MUTEX_DRIVER, NULL);
268 	ddi_set_parent_data(dip, pdp);
269 
270 	/*
271 	 * devices that do not need to interact with xenstore
272 	 */
273 	if (vdevnum == VDEV_NOXS) {
274 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
275 		    "unit-address", "0");
276 		if (devcls == XEN_CONSOLE)
277 			(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
278 			    "pm-hardware-state", "needs-suspend-resume");
279 		return (DDI_SUCCESS);
280 	}
281 
282 	/*
283 	 * PV devices that need to probe xenstore
284 	 */
285 
286 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
287 	    "pm-hardware-state", "needs-suspend-resume");
288 
289 	xsname = xsnamebuf;
290 	if (!backend)
291 		(void) snprintf(xsnamebuf, sizeof (xsnamebuf),
292 		    "%s/%d", xdcp->xs_path_fe, vdevnum);
293 	else
294 		(void) snprintf(xsnamebuf, sizeof (xsnamebuf),
295 		    "%s/%d/%d", xdcp->xs_path_be, domid, vdevnum);
296 	if ((xenbus_read_driver_state(xsname) >= XenbusStateClosing)) {
297 		/* Don't try to init a dev that may be closing */
298 		mutex_destroy(&pdp->xd_ndi_lk);
299 		mutex_destroy(&pdp->xd_evt_lk);
300 		kmem_free(pdp, sizeof (*pdp));
301 		ddi_set_parent_data(dip, NULL);
302 		return (DDI_FAILURE);
303 	}
304 
305 	pdp->xd_xsdev.nodename = i_ddi_strdup(xsname, KM_SLEEP);
306 	pdp->xd_xsdev.devicetype = xdcp->xsdev;
307 	pdp->xd_xsdev.frontend = (backend ? 0 : 1);
308 	pdp->xd_xsdev.data = dip;
309 	pdp->xd_xsdev.otherend_id = (backend ? domid : -1);
310 	if (i_xvdi_add_watches(dip) != DDI_SUCCESS) {
311 		cmn_err(CE_WARN, "xvdi_init_dev: "
312 		    "cannot add watches for %s", xsname);
313 		xvdi_uninit_dev(dip);
314 		return (DDI_FAILURE);
315 	}
316 
317 	if (backend)
318 		return (DDI_SUCCESS);
319 
320 	/*
321 	 * The unit-address for frontend devices is the name of the
322 	 * of the xenstore node containing the device configuration
323 	 * and is contained in the 'vdev' property.
324 	 * VIF devices are named using an incrementing integer.
325 	 * VBD devices are either named using the 16-bit dev_t value
326 	 * for linux 'hd' and 'xvd' devices, or a simple integer value
327 	 * in the range 0..767.  768 is the base value of the linux
328 	 * dev_t namespace, the dev_t value for 'hda'.
329 	 */
330 	(void) snprintf(unitaddr, sizeof (unitaddr), "%d", vdevnum);
331 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, "unit-address",
332 	    unitaddr);
333 
334 	switch (devcls) {
335 	case XEN_VNET:
336 		if (xenbus_read(XBT_NULL, xsname, "mac", (void *)&prop_str,
337 		    &prop_len) != 0)
338 			break;
339 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, "mac",
340 		    prop_str);
341 		kmem_free(prop_str, prop_len);
342 		break;
343 	case XEN_VBLK:
344 		/*
345 		 * cache a copy of the otherend name
346 		 * for ease of observeability
347 		 */
348 		if (xenbus_read(XBT_NULL, pdp->xd_xsdev.otherend, "dev",
349 		    &prop_str, &prop_len) != 0)
350 			break;
351 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
352 		    "dev-address", prop_str);
353 		kmem_free(prop_str, prop_len);
354 		break;
355 	default:
356 		break;
357 	}
358 
359 	return (DDI_SUCCESS);
360 }
361 
362 void
363 xvdi_uninit_dev(dev_info_t *dip)
364 {
365 	struct xendev_ppd *pdp = ddi_get_parent_data(dip);
366 
367 	if (pdp != NULL) {
368 		/* Remove any registered callbacks. */
369 		xvdi_remove_event_handler(dip, NULL);
370 
371 		/* Remove any registered watches. */
372 		i_xvdi_rem_watches(dip);
373 
374 		/* tell other end to close */
375 		if (pdp->xd_xsdev.otherend_id != (domid_t)-1)
376 			(void) xvdi_switch_state(dip, XBT_NULL,
377 			    XenbusStateClosed);
378 
379 		if (pdp->xd_xsdev.nodename != NULL)
380 			kmem_free((char *)(pdp->xd_xsdev.nodename),
381 			    strlen(pdp->xd_xsdev.nodename) + 1);
382 
383 		ddi_set_parent_data(dip, NULL);
384 
385 		mutex_destroy(&pdp->xd_ndi_lk);
386 		mutex_destroy(&pdp->xd_evt_lk);
387 		kmem_free(pdp, sizeof (*pdp));
388 	}
389 }
390 
391 /*
392  * Bind the event channel for this device instance.
393  * Currently we only support one evtchn per device instance.
394  */
395 int
396 xvdi_bind_evtchn(dev_info_t *dip, evtchn_port_t evtchn)
397 {
398 	struct xendev_ppd *pdp;
399 	domid_t oeid;
400 	int r;
401 
402 	pdp = ddi_get_parent_data(dip);
403 	ASSERT(pdp != NULL);
404 	ASSERT(pdp->xd_evtchn == INVALID_EVTCHN);
405 
406 	mutex_enter(&pdp->xd_evt_lk);
407 	if (pdp->xd_devclass == XEN_CONSOLE) {
408 		if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
409 			pdp->xd_evtchn = xen_info->console.domU.evtchn;
410 		} else {
411 			pdp->xd_evtchn = INVALID_EVTCHN;
412 			mutex_exit(&pdp->xd_evt_lk);
413 			return (DDI_SUCCESS);
414 		}
415 	} else {
416 		oeid = pdp->xd_xsdev.otherend_id;
417 		if (oeid == (domid_t)-1) {
418 			mutex_exit(&pdp->xd_evt_lk);
419 			return (DDI_FAILURE);
420 		}
421 
422 		if ((r = xen_bind_interdomain(oeid, evtchn, &pdp->xd_evtchn))) {
423 			xvdi_dev_error(dip, r, "bind event channel");
424 			mutex_exit(&pdp->xd_evt_lk);
425 			return (DDI_FAILURE);
426 		}
427 	}
428 #ifndef XPV_HVM_DRIVER
429 	pdp->xd_ispec.intrspec_vec = ec_bind_evtchn_to_irq(pdp->xd_evtchn);
430 #endif
431 	mutex_exit(&pdp->xd_evt_lk);
432 
433 	return (DDI_SUCCESS);
434 }
435 
436 /*
437  * Allocate an event channel for this device instance.
438  * Currently we only support one evtchn per device instance.
439  */
440 int
441 xvdi_alloc_evtchn(dev_info_t *dip)
442 {
443 	struct xendev_ppd *pdp;
444 	domid_t oeid;
445 	int rv;
446 
447 	pdp = ddi_get_parent_data(dip);
448 	ASSERT(pdp != NULL);
449 	ASSERT(pdp->xd_evtchn == INVALID_EVTCHN);
450 
451 	mutex_enter(&pdp->xd_evt_lk);
452 	if (pdp->xd_devclass == XEN_CONSOLE) {
453 		if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
454 			pdp->xd_evtchn = xen_info->console.domU.evtchn;
455 		} else {
456 			pdp->xd_evtchn = INVALID_EVTCHN;
457 			mutex_exit(&pdp->xd_evt_lk);
458 			return (DDI_SUCCESS);
459 		}
460 	} else {
461 		oeid = pdp->xd_xsdev.otherend_id;
462 		if (oeid == (domid_t)-1) {
463 			mutex_exit(&pdp->xd_evt_lk);
464 			return (DDI_FAILURE);
465 		}
466 
467 		if ((rv = xen_alloc_unbound_evtchn(oeid, &pdp->xd_evtchn))) {
468 			xvdi_dev_error(dip, rv, "bind event channel");
469 			mutex_exit(&pdp->xd_evt_lk);
470 			return (DDI_FAILURE);
471 		}
472 	}
473 #ifndef XPV_HVM_DRIVER
474 	pdp->xd_ispec.intrspec_vec = ec_bind_evtchn_to_irq(pdp->xd_evtchn);
475 #endif
476 	mutex_exit(&pdp->xd_evt_lk);
477 
478 	return (DDI_SUCCESS);
479 }
480 
481 /*
482  * Unbind the event channel for this device instance.
483  * Currently we only support one evtchn per device instance.
484  */
485 void
486 xvdi_free_evtchn(dev_info_t *dip)
487 {
488 	struct xendev_ppd *pdp;
489 
490 	pdp = ddi_get_parent_data(dip);
491 	ASSERT(pdp != NULL);
492 
493 	mutex_enter(&pdp->xd_evt_lk);
494 	if (pdp->xd_evtchn != INVALID_EVTCHN) {
495 #ifndef XPV_HVM_DRIVER
496 		ec_unbind_irq(pdp->xd_ispec.intrspec_vec);
497 		pdp->xd_ispec.intrspec_vec = 0;
498 #endif
499 		pdp->xd_evtchn = INVALID_EVTCHN;
500 	}
501 	mutex_exit(&pdp->xd_evt_lk);
502 }
503 
504 #ifndef XPV_HVM_DRIVER
505 /*
506  * Map an inter-domain communication ring for a virtual device.
507  * This is used by backend drivers.
508  */
509 int
510 xvdi_map_ring(dev_info_t *dip, size_t nentry, size_t entrysize,
511     grant_ref_t gref, xendev_ring_t **ringpp)
512 {
513 	domid_t oeid;
514 	gnttab_map_grant_ref_t mapop;
515 	gnttab_unmap_grant_ref_t unmapop;
516 	caddr_t ringva;
517 	ddi_acc_hdl_t *ap;
518 	ddi_acc_impl_t *iap;
519 	xendev_ring_t *ring;
520 	int err;
521 	char errstr[] = "mapping in ring buffer";
522 
523 	ring = kmem_zalloc(sizeof (xendev_ring_t), KM_SLEEP);
524 	oeid = xvdi_get_oeid(dip);
525 
526 	/* alloc va in backend dom for ring buffer */
527 	ringva = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
528 	    0, 0, 0, 0, VM_SLEEP);
529 
530 	/* map in ring page */
531 	hat_prepare_mapping(kas.a_hat, ringva, NULL);
532 	mapop.host_addr = (uint64_t)(uintptr_t)ringva;
533 	mapop.flags = GNTMAP_host_map;
534 	mapop.ref = gref;
535 	mapop.dom = oeid;
536 	err = xen_map_gref(GNTTABOP_map_grant_ref, &mapop, 1, B_FALSE);
537 	if (err) {
538 		xvdi_fatal_error(dip, err, errstr);
539 		goto errout1;
540 	}
541 
542 	if (mapop.status != 0) {
543 		xvdi_fatal_error(dip, err, errstr);
544 		goto errout2;
545 	}
546 	ring->xr_vaddr = ringva;
547 	ring->xr_grant_hdl = mapop.handle;
548 	ring->xr_gref = gref;
549 
550 	/*
551 	 * init an acc handle and associate it w/ this ring
552 	 * this is only for backend drivers. we get the memory by calling
553 	 * vmem_xalloc(), instead of calling any ddi function, so we have
554 	 * to init an acc handle by ourselves
555 	 */
556 	ring->xr_acc_hdl = impl_acc_hdl_alloc(KM_SLEEP, NULL);
557 	ap = impl_acc_hdl_get(ring->xr_acc_hdl);
558 	ap->ah_vers = VERS_ACCHDL;
559 	ap->ah_dip = dip;
560 	ap->ah_xfermodes = DDI_DMA_CONSISTENT;
561 	ap->ah_acc = xendev_dc_accattr;
562 	iap = (ddi_acc_impl_t *)ap->ah_platform_private;
563 	iap->ahi_acc_attr |= DDI_ACCATTR_CPU_VADDR;
564 	impl_acc_hdl_init(ap);
565 	ap->ah_offset = 0;
566 	ap->ah_len = (off_t)PAGESIZE;
567 	ap->ah_addr = ring->xr_vaddr;
568 
569 	/* init backend ring */
570 	xvdi_ring_init_back_ring(ring, nentry, entrysize);
571 
572 	*ringpp = ring;
573 
574 	return (DDI_SUCCESS);
575 
576 errout2:
577 	/* unmap ring page */
578 	unmapop.host_addr = (uint64_t)(uintptr_t)ringva;
579 	unmapop.handle = ring->xr_grant_hdl;
580 	unmapop.dev_bus_addr = NULL;
581 	(void) HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unmapop, 1);
582 	hat_release_mapping(kas.a_hat, ringva);
583 errout1:
584 	vmem_xfree(heap_arena, ringva, PAGESIZE);
585 	kmem_free(ring, sizeof (xendev_ring_t));
586 	return (DDI_FAILURE);
587 }
588 
589 /*
590  * Unmap a ring for a virtual device.
591  * This is used by backend drivers.
592  */
593 void
594 xvdi_unmap_ring(xendev_ring_t *ring)
595 {
596 	gnttab_unmap_grant_ref_t unmapop;
597 
598 	ASSERT((ring != NULL) && (ring->xr_vaddr != NULL));
599 
600 	impl_acc_hdl_free(ring->xr_acc_hdl);
601 	unmapop.host_addr = (uint64_t)(uintptr_t)ring->xr_vaddr;
602 	unmapop.handle = ring->xr_grant_hdl;
603 	unmapop.dev_bus_addr = NULL;
604 	(void) HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unmapop, 1);
605 	hat_release_mapping(kas.a_hat, ring->xr_vaddr);
606 	vmem_xfree(heap_arena, ring->xr_vaddr, PAGESIZE);
607 	kmem_free(ring, sizeof (xendev_ring_t));
608 }
609 #endif /* XPV_HVM_DRIVER */
610 
611 /*
612  * Re-initialise an inter-domain communications ring for the backend domain.
613  * ring will be re-initialized after re-grant succeed
614  * ring will be freed if fails to re-grant access to backend domain
615  * so, don't keep useful data in the ring
616  * used only in frontend driver
617  */
618 static void
619 xvdi_reinit_ring(dev_info_t *dip, grant_ref_t *gref, xendev_ring_t *ringp)
620 {
621 	paddr_t rpaddr;
622 	maddr_t rmaddr;
623 
624 	ASSERT((ringp != NULL) && (ringp->xr_paddr != 0));
625 	rpaddr = ringp->xr_paddr;
626 
627 	rmaddr = DOMAIN_IS_INITDOMAIN(xen_info) ? rpaddr : pa_to_ma(rpaddr);
628 	gnttab_grant_foreign_access_ref(ringp->xr_gref, xvdi_get_oeid(dip),
629 	    rmaddr >> PAGESHIFT, 0);
630 	*gref = ringp->xr_gref;
631 
632 	/* init frontend ring */
633 	xvdi_ring_init_sring(ringp);
634 	xvdi_ring_init_front_ring(ringp, ringp->xr_sring.fr.nr_ents,
635 	    ringp->xr_entry_size);
636 }
637 
638 /*
639  * allocate Xen inter-domain communications ring for Xen virtual devices
640  * used only in frontend driver
641  * if *ringpp is not NULL, we'll simply re-init it
642  */
643 int
644 xvdi_alloc_ring(dev_info_t *dip, size_t nentry, size_t entrysize,
645     grant_ref_t *gref, xendev_ring_t **ringpp)
646 {
647 	size_t len;
648 	xendev_ring_t *ring;
649 	ddi_dma_cookie_t dma_cookie;
650 	uint_t ncookies;
651 	grant_ref_t ring_gref;
652 	domid_t oeid;
653 	maddr_t rmaddr;
654 
655 	if (*ringpp) {
656 		xvdi_reinit_ring(dip, gref, *ringpp);
657 		return (DDI_SUCCESS);
658 	}
659 
660 	*ringpp = ring = kmem_zalloc(sizeof (xendev_ring_t), KM_SLEEP);
661 	oeid = xvdi_get_oeid(dip);
662 
663 	/*
664 	 * Allocate page for this ring buffer
665 	 */
666 	if (ddi_dma_alloc_handle(dip, &xendev_dc_dmaattr, DDI_DMA_SLEEP,
667 	    0, &ring->xr_dma_hdl) != DDI_SUCCESS)
668 		goto err;
669 
670 	if (ddi_dma_mem_alloc(ring->xr_dma_hdl, PAGESIZE,
671 	    &xendev_dc_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 0,
672 	    &ring->xr_vaddr, &len, &ring->xr_acc_hdl) != DDI_SUCCESS) {
673 		ddi_dma_free_handle(&ring->xr_dma_hdl);
674 		goto err;
675 	}
676 
677 	if (ddi_dma_addr_bind_handle(ring->xr_dma_hdl, NULL,
678 	    ring->xr_vaddr, len, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
679 	    DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED) {
680 		ddi_dma_mem_free(&ring->xr_acc_hdl);
681 		ring->xr_vaddr = NULL;
682 		ddi_dma_free_handle(&ring->xr_dma_hdl);
683 		goto err;
684 	}
685 	ASSERT(ncookies == 1);
686 	ring->xr_paddr = dma_cookie.dmac_laddress;
687 	rmaddr = DOMAIN_IS_INITDOMAIN(xen_info) ? ring->xr_paddr :
688 	    pa_to_ma(ring->xr_paddr);
689 
690 	if ((ring_gref = gnttab_grant_foreign_access(oeid,
691 	    rmaddr >> PAGESHIFT, 0)) == (grant_ref_t)-1) {
692 		(void) ddi_dma_unbind_handle(ring->xr_dma_hdl);
693 		ddi_dma_mem_free(&ring->xr_acc_hdl);
694 		ring->xr_vaddr = NULL;
695 		ddi_dma_free_handle(&ring->xr_dma_hdl);
696 		goto err;
697 	}
698 	*gref = ring->xr_gref = ring_gref;
699 
700 	/* init frontend ring */
701 	xvdi_ring_init_sring(ring);
702 	xvdi_ring_init_front_ring(ring, nentry, entrysize);
703 
704 	return (DDI_SUCCESS);
705 
706 err:
707 	kmem_free(ring, sizeof (xendev_ring_t));
708 	return (DDI_FAILURE);
709 }
710 
711 /*
712  * Release ring buffers allocated for Xen devices
713  * used for frontend driver
714  */
715 void
716 xvdi_free_ring(xendev_ring_t *ring)
717 {
718 	ASSERT((ring != NULL) && (ring->xr_vaddr != NULL));
719 
720 	(void) gnttab_end_foreign_access_ref(ring->xr_gref, 0);
721 	(void) ddi_dma_unbind_handle(ring->xr_dma_hdl);
722 	ddi_dma_mem_free(&ring->xr_acc_hdl);
723 	ddi_dma_free_handle(&ring->xr_dma_hdl);
724 	kmem_free(ring, sizeof (xendev_ring_t));
725 }
726 
727 dev_info_t *
728 xvdi_create_dev(dev_info_t *parent, xendev_devclass_t devclass,
729     domid_t dom, int vdev)
730 {
731 	dev_info_t *dip;
732 	boolean_t backend;
733 	i_xd_cfg_t *xdcp;
734 	char xsnamebuf[TYPICALMAXPATHLEN];
735 	char *type, *node = NULL, *xsname = NULL;
736 	unsigned int tlen;
737 	int ret;
738 
739 	ASSERT(DEVI_BUSY_OWNED(parent));
740 
741 	backend = (dom != DOMID_SELF);
742 	xdcp = i_xvdi_devclass2cfg(devclass);
743 	ASSERT(xdcp != NULL);
744 
745 	if (vdev != VDEV_NOXS) {
746 		if (!backend) {
747 			(void) snprintf(xsnamebuf, sizeof (xsnamebuf),
748 			    "%s/%d", xdcp->xs_path_fe, vdev);
749 			xsname = xsnamebuf;
750 			node = xdcp->node_fe;
751 		} else {
752 			(void) snprintf(xsnamebuf, sizeof (xsnamebuf),
753 			    "%s/%d/%d", xdcp->xs_path_be, dom, vdev);
754 			xsname = xsnamebuf;
755 			node = xdcp->node_be;
756 		}
757 	} else {
758 		node = xdcp->node_fe;
759 	}
760 
761 	/* Must have a driver to use. */
762 	if (node == NULL)
763 		return (NULL);
764 
765 	/*
766 	 * We need to check the state of this device before we go
767 	 * further, otherwise we'll end up with a dead loop if
768 	 * anything goes wrong.
769 	 */
770 	if ((xsname != NULL) &&
771 	    (xenbus_read_driver_state(xsname) >= XenbusStateClosing))
772 		return (NULL);
773 
774 	ndi_devi_alloc_sleep(parent, node, DEVI_SID_NODEID, &dip);
775 
776 	/*
777 	 * Driver binding uses the compatible property _before_ the
778 	 * node name, so we set the node name to the 'model' of the
779 	 * device (i.e. 'xnb' or 'xdb') and, if 'type' is present,
780 	 * encode both the model and the type in a compatible property
781 	 * (i.e. 'xnb,netfront' or 'xnb,SUNW_mac').  This allows a
782 	 * driver binding based on the <model,type> pair _before_ a
783 	 * binding based on the node name.
784 	 */
785 	if ((xsname != NULL) &&
786 	    (xenbus_read(XBT_NULL, xsname, "type", (void *)&type, &tlen)
787 	    == 0)) {
788 		size_t clen;
789 		char *c[1];
790 
791 		clen = strlen(node) + strlen(type) + 2;
792 		c[0] = kmem_alloc(clen, KM_SLEEP);
793 		(void) snprintf(c[0], clen, "%s,%s", node, type);
794 
795 		(void) ndi_prop_update_string_array(DDI_DEV_T_NONE,
796 		    dip, "compatible", (char **)c, 1);
797 
798 		kmem_free(c[0], clen);
799 		kmem_free(type, tlen);
800 	}
801 
802 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, dip, "devclass", devclass);
803 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, dip, "domain", dom);
804 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, dip, "vdev", vdev);
805 
806 	if (i_ddi_devi_attached(parent))
807 		ret = ndi_devi_online(dip, 0);
808 	else
809 		ret = ndi_devi_bind_driver(dip, 0);
810 	if (ret != NDI_SUCCESS)
811 		(void) ndi_devi_offline(dip, NDI_DEVI_REMOVE);
812 
813 	return (dip);
814 }
815 
816 /*
817  * xendev_enum_class()
818  */
819 void
820 xendev_enum_class(dev_info_t *parent, xendev_devclass_t devclass)
821 {
822 	boolean_t dom0 = DOMAIN_IS_INITDOMAIN(xen_info);
823 	boolean_t domU = !dom0;
824 	i_xd_cfg_t *xdcp;
825 
826 	xdcp = i_xvdi_devclass2cfg(devclass);
827 	ASSERT(xdcp != NULL);
828 
829 	if (dom0 && !(xdcp->flags & XD_DOM_ZERO))
830 		return;
831 
832 	if (domU && !(xdcp->flags & XD_DOM_GUEST))
833 		return;
834 
835 	if (xdcp->xsdev == NULL) {
836 		int circ;
837 
838 		/*
839 		 * Don't need to probe this kind of device from the
840 		 * store, just create one if it doesn't exist.
841 		 */
842 
843 		ndi_devi_enter(parent, &circ);
844 		if (xvdi_find_dev(parent, devclass, DOMID_SELF, VDEV_NOXS)
845 		    == NULL)
846 			(void) xvdi_create_dev(parent, devclass,
847 			    DOMID_SELF, VDEV_NOXS);
848 		ndi_devi_exit(parent, circ);
849 	} else {
850 		/*
851 		 * Probe this kind of device from the store, both
852 		 * frontend and backend.
853 		 */
854 		if (xdcp->node_fe != NULL) {
855 			i_xvdi_enum_fe(parent, xdcp);
856 		}
857 		if (xdcp->node_be != NULL) {
858 			i_xvdi_enum_be(parent, xdcp);
859 		}
860 	}
861 }
862 
863 /*
864  * xendev_enum_all()
865  */
866 void
867 xendev_enum_all(dev_info_t *parent, boolean_t store_unavailable)
868 {
869 	int i;
870 	i_xd_cfg_t *xdcp;
871 	boolean_t dom0 = DOMAIN_IS_INITDOMAIN(xen_info);
872 
873 	for (i = 0, xdcp = xdci; i < NXDC; i++, xdcp++) {
874 		/*
875 		 * Dom0 relies on watchpoints to create non-soft
876 		 * devices - don't attempt to iterate over the store.
877 		 */
878 		if (dom0 && (xdcp->xsdev != NULL))
879 			continue;
880 
881 		/*
882 		 * If the store is not yet available, don't attempt to
883 		 * iterate.
884 		 */
885 		if (store_unavailable && (xdcp->xsdev != NULL))
886 			continue;
887 
888 		xendev_enum_class(parent, xdcp->devclass);
889 	}
890 }
891 
892 xendev_devclass_t
893 xendev_nodename_to_devclass(char *nodename)
894 {
895 	int i;
896 	i_xd_cfg_t *xdcp;
897 
898 	/*
899 	 * This relies on the convention that variants of a base
900 	 * driver share the same prefix and that there are no drivers
901 	 * which share a common prefix with the name of any other base
902 	 * drivers.
903 	 *
904 	 * So for a base driver 'xnb' (which is the name listed in
905 	 * xdci) the variants all begin with the string 'xnb' (in fact
906 	 * they are 'xnbe', 'xnbo' and 'xnbu') and there are no other
907 	 * base drivers which have the prefix 'xnb'.
908 	 */
909 	ASSERT(nodename != NULL);
910 	for (i = 0, xdcp = xdci; i < NXDC; i++, xdcp++) {
911 		if (((xdcp->node_fe != NULL) &&
912 		    (strncmp(nodename, xdcp->node_fe,
913 		    strlen(xdcp->node_fe)) == 0)) ||
914 		    ((xdcp->node_be != NULL) &&
915 		    (strncmp(nodename, xdcp->node_be,
916 		    strlen(xdcp->node_be)) == 0)))
917 
918 			return (xdcp->devclass);
919 	}
920 	return (XEN_INVAL);
921 }
922 
923 int
924 xendev_devclass_ipl(xendev_devclass_t devclass)
925 {
926 	i_xd_cfg_t *xdcp;
927 
928 	xdcp = i_xvdi_devclass2cfg(devclass);
929 	ASSERT(xdcp != NULL);
930 
931 	return (xdcp->xd_ipl);
932 }
933 
934 /*
935  * Determine if a devinfo instance exists of a particular device
936  * class, domain and xenstore virtual device number.
937  */
938 dev_info_t *
939 xvdi_find_dev(dev_info_t *parent, xendev_devclass_t devclass,
940     domid_t dom, int vdev)
941 {
942 	dev_info_t *dip;
943 
944 	ASSERT(DEVI_BUSY_OWNED(parent));
945 
946 	switch (devclass) {
947 	case XEN_CONSOLE:
948 	case XEN_XENBUS:
949 	case XEN_DOMCAPS:
950 	case XEN_BALLOON:
951 	case XEN_EVTCHN:
952 	case XEN_PRIVCMD:
953 		/* Console and soft devices have no vdev. */
954 		vdev = VDEV_NOXS;
955 		break;
956 	default:
957 		break;
958 	}
959 
960 	for (dip = ddi_get_child(parent); dip != NULL;
961 	    dip = ddi_get_next_sibling(dip)) {
962 		int *vdevnump, *domidp, *devclsp, vdevnum;
963 		uint_t ndomid, nvdevnum, ndevcls;
964 		xendev_devclass_t devcls;
965 		domid_t domid;
966 		struct xendev_ppd *pdp = ddi_get_parent_data(dip);
967 
968 		if (pdp == NULL) {
969 			if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip,
970 			    DDI_PROP_DONTPASS, "domain", &domidp, &ndomid) !=
971 			    DDI_PROP_SUCCESS)
972 				continue;
973 			ASSERT(ndomid == 1);
974 			domid = (domid_t)*domidp;
975 			ddi_prop_free(domidp);
976 
977 			if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip,
978 			    DDI_PROP_DONTPASS, "vdev", &vdevnump, &nvdevnum) !=
979 			    DDI_PROP_SUCCESS)
980 				continue;
981 			ASSERT(nvdevnum == 1);
982 			vdevnum = *vdevnump;
983 			ddi_prop_free(vdevnump);
984 
985 			if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip,
986 			    DDI_PROP_DONTPASS, "devclass", &devclsp,
987 			    &ndevcls) != DDI_PROP_SUCCESS)
988 				continue;
989 			ASSERT(ndevcls == 1);
990 			devcls = (xendev_devclass_t)*devclsp;
991 			ddi_prop_free(devclsp);
992 		} else {
993 			domid = pdp->xd_domain;
994 			vdevnum = pdp->xd_vdevnum;
995 			devcls = pdp->xd_devclass;
996 		}
997 
998 		if ((domid == dom) && (vdevnum == vdev) && (devcls == devclass))
999 			return (dip);
1000 	}
1001 	return (NULL);
1002 }
1003 
1004 int
1005 xvdi_get_evtchn(dev_info_t *xdip)
1006 {
1007 	struct xendev_ppd *pdp = ddi_get_parent_data(xdip);
1008 
1009 	ASSERT(pdp != NULL);
1010 	return (pdp->xd_evtchn);
1011 }
1012 
1013 int
1014 xvdi_get_vdevnum(dev_info_t *xdip)
1015 {
1016 	struct xendev_ppd *pdp = ddi_get_parent_data(xdip);
1017 
1018 	ASSERT(pdp != NULL);
1019 	return (pdp->xd_vdevnum);
1020 }
1021 
1022 char *
1023 xvdi_get_xsname(dev_info_t *xdip)
1024 {
1025 	struct xendev_ppd *pdp = ddi_get_parent_data(xdip);
1026 
1027 	ASSERT(pdp != NULL);
1028 	return ((char *)(pdp->xd_xsdev.nodename));
1029 }
1030 
1031 char *
1032 xvdi_get_oename(dev_info_t *xdip)
1033 {
1034 	struct xendev_ppd *pdp = ddi_get_parent_data(xdip);
1035 
1036 	ASSERT(pdp != NULL);
1037 	if (pdp->xd_devclass == XEN_CONSOLE)
1038 		return (NULL);
1039 	return ((char *)(pdp->xd_xsdev.otherend));
1040 }
1041 
1042 struct xenbus_device *
1043 xvdi_get_xsd(dev_info_t *xdip)
1044 {
1045 	struct xendev_ppd *pdp = ddi_get_parent_data(xdip);
1046 
1047 	ASSERT(pdp != NULL);
1048 	return (&pdp->xd_xsdev);
1049 }
1050 
1051 domid_t
1052 xvdi_get_oeid(dev_info_t *xdip)
1053 {
1054 	struct xendev_ppd *pdp = ddi_get_parent_data(xdip);
1055 
1056 	ASSERT(pdp != NULL);
1057 	if (pdp->xd_devclass == XEN_CONSOLE)
1058 		return ((domid_t)-1);
1059 	return ((domid_t)(pdp->xd_xsdev.otherend_id));
1060 }
1061 
1062 void
1063 xvdi_dev_error(dev_info_t *dip, int errno, char *errstr)
1064 {
1065 	struct xendev_ppd *pdp = ddi_get_parent_data(dip);
1066 
1067 	ASSERT(pdp != NULL);
1068 	xenbus_dev_error(&pdp->xd_xsdev, errno, errstr);
1069 }
1070 
1071 void
1072 xvdi_fatal_error(dev_info_t *dip, int errno, char *errstr)
1073 {
1074 	struct xendev_ppd *pdp = ddi_get_parent_data(dip);
1075 
1076 	ASSERT(pdp != NULL);
1077 	xenbus_dev_fatal(&pdp->xd_xsdev, errno, errstr);
1078 }
1079 
1080 static void
1081 i_xvdi_oestate_handler(void *arg)
1082 {
1083 	i_oestate_evt_t *evt = (i_oestate_evt_t *)arg;
1084 	dev_info_t *dip = evt->dip;
1085 	struct xendev_ppd *pdp = ddi_get_parent_data(dip);
1086 	XenbusState oestate = pdp->xd_xsdev.otherend_state;
1087 	XenbusState curr_oestate = evt->state;
1088 	ddi_eventcookie_t evc;
1089 
1090 	/* evt is alloc'ed in i_xvdi_oestate_cb */
1091 	kmem_free(evt, sizeof (i_oestate_evt_t));
1092 
1093 	/*
1094 	 * If the oestate we're handling is not the latest one,
1095 	 * it does not make any sense to continue handling it.
1096 	 */
1097 	if (curr_oestate != oestate)
1098 		return;
1099 
1100 	mutex_enter(&pdp->xd_ndi_lk);
1101 
1102 	if (pdp->xd_oe_ehid != NULL) {
1103 		/* send notification to driver */
1104 		if (ddi_get_eventcookie(dip, XS_OE_STATE,
1105 		    &evc) == DDI_SUCCESS) {
1106 			mutex_exit(&pdp->xd_ndi_lk);
1107 			(void) ndi_post_event(dip, dip, evc, &oestate);
1108 			mutex_enter(&pdp->xd_ndi_lk);
1109 		}
1110 	} else {
1111 		/*
1112 		 * take default action, if driver hasn't registered its
1113 		 * event handler yet
1114 		 */
1115 		if (oestate == XenbusStateClosing) {
1116 			(void) xvdi_switch_state(dip, XBT_NULL,
1117 			    XenbusStateClosed);
1118 		} else if (oestate == XenbusStateClosed) {
1119 			(void) xvdi_switch_state(dip, XBT_NULL,
1120 			    XenbusStateClosed);
1121 			(void) xvdi_post_event(dip, XEN_HP_REMOVE);
1122 		}
1123 	}
1124 
1125 	mutex_exit(&pdp->xd_ndi_lk);
1126 
1127 	/*
1128 	 * We'll try to remove the devinfo node of this device if the
1129 	 * other end has closed.
1130 	 */
1131 	if (oestate == XenbusStateClosed)
1132 		(void) ddi_taskq_dispatch(DEVI(ddi_get_parent(dip))->devi_taskq,
1133 		    xendev_offline_device, dip, DDI_SLEEP);
1134 }
1135 
1136 static void
1137 i_xvdi_hpstate_handler(void *arg)
1138 {
1139 	dev_info_t *dip = (dev_info_t *)arg;
1140 	struct xendev_ppd *pdp = ddi_get_parent_data(dip);
1141 	ddi_eventcookie_t evc;
1142 	char *hp_status;
1143 	unsigned int hpl;
1144 
1145 	mutex_enter(&pdp->xd_ndi_lk);
1146 	if ((ddi_get_eventcookie(dip, XS_HP_STATE, &evc) == DDI_SUCCESS) &&
1147 	    (xenbus_read(XBT_NULL, pdp->xd_hp_watch.node, "",
1148 	    (void *)&hp_status, &hpl) == 0)) {
1149 
1150 		xendev_hotplug_state_t new_state = Unrecognized;
1151 
1152 		if (strcmp(hp_status, "connected") == 0)
1153 			new_state = Connected;
1154 
1155 		mutex_exit(&pdp->xd_ndi_lk);
1156 
1157 		(void) ndi_post_event(dip, dip, evc, &new_state);
1158 		kmem_free(hp_status, hpl);
1159 		return;
1160 	}
1161 	mutex_exit(&pdp->xd_ndi_lk);
1162 }
1163 
1164 void
1165 xvdi_notify_oe(dev_info_t *dip)
1166 {
1167 	struct xendev_ppd *pdp;
1168 
1169 	pdp = ddi_get_parent_data(dip);
1170 	ASSERT(pdp->xd_evtchn != INVALID_EVTCHN);
1171 	ec_notify_via_evtchn(pdp->xd_evtchn);
1172 }
1173 
1174 static void
1175 i_xvdi_bepath_cb(struct xenbus_watch *w, const char **vec, unsigned int len)
1176 {
1177 	dev_info_t *dip = (dev_info_t *)w->dev;
1178 	struct xendev_ppd *pdp = ddi_get_parent_data(dip);
1179 	char *be = NULL;
1180 	unsigned int bel;
1181 
1182 	ASSERT(len > XS_WATCH_PATH);
1183 	ASSERT(vec[XS_WATCH_PATH] != NULL);
1184 
1185 	/*
1186 	 * If the backend is not the same as that we already stored,
1187 	 * re-set our watch for its' state.
1188 	 */
1189 	if ((xenbus_read(XBT_NULL, "", vec[XS_WATCH_PATH], (void *)be, &bel)
1190 	    == 0) && (strcmp(be, pdp->xd_xsdev.otherend) != 0))
1191 		(void) i_xvdi_add_watch_oestate(dip);
1192 
1193 	if (be != NULL) {
1194 		ASSERT(bel > 0);
1195 		kmem_free(be, bel);
1196 	}
1197 }
1198 
1199 static int
1200 i_xvdi_add_watch_oestate(dev_info_t *dip)
1201 {
1202 	struct xendev_ppd *pdp = ddi_get_parent_data(dip);
1203 
1204 	ASSERT(pdp != NULL);
1205 	ASSERT(pdp->xd_xsdev.nodename != NULL);
1206 	ASSERT(mutex_owned(&pdp->xd_ndi_lk));
1207 
1208 	/*
1209 	 * Create taskq for delivering other end state change event to
1210 	 * this device later.
1211 	 *
1212 	 * Set nthreads to 1 to make sure that events can be delivered
1213 	 * in order.
1214 	 *
1215 	 * Note: It is _not_ guaranteed that driver can see every
1216 	 * xenstore change under the path that it is watching. If two
1217 	 * changes happen consecutively in a very short amount of
1218 	 * time, it is likely that the driver will see only the last
1219 	 * one.
1220 	 */
1221 	if (pdp->xd_oe_taskq == NULL)
1222 		if ((pdp->xd_oe_taskq = ddi_taskq_create(dip,
1223 		    "xendev_oe_taskq", 1, TASKQ_DEFAULTPRI, 0)) == NULL)
1224 			return (DDI_FAILURE);
1225 
1226 	/*
1227 	 * Watch for changes to the XenbusState of otherend.
1228 	 */
1229 	pdp->xd_xsdev.otherend_state = XenbusStateUnknown;
1230 	pdp->xd_xsdev.otherend_changed = i_xvdi_oestate_cb;
1231 
1232 	if (talk_to_otherend(&pdp->xd_xsdev) != 0) {
1233 		i_xvdi_rem_watch_oestate(dip);
1234 		return (DDI_FAILURE);
1235 	}
1236 
1237 	return (DDI_SUCCESS);
1238 }
1239 
1240 static void
1241 i_xvdi_rem_watch_oestate(dev_info_t *dip)
1242 {
1243 	struct xendev_ppd *pdp;
1244 	struct xenbus_device *dev;
1245 
1246 	pdp = ddi_get_parent_data(dip);
1247 	ASSERT(pdp != NULL);
1248 	ASSERT(mutex_owned(&pdp->xd_ndi_lk));
1249 
1250 	dev = &pdp->xd_xsdev;
1251 
1252 	/* Unwatch for changes to XenbusState of otherend */
1253 	if (dev->otherend_watch.node != NULL) {
1254 		mutex_exit(&pdp->xd_ndi_lk);
1255 		unregister_xenbus_watch(&dev->otherend_watch);
1256 		mutex_enter(&pdp->xd_ndi_lk);
1257 	}
1258 
1259 	/* make sure no event handler is running */
1260 	if (pdp->xd_oe_taskq != NULL) {
1261 		mutex_exit(&pdp->xd_ndi_lk);
1262 		ddi_taskq_destroy(pdp->xd_oe_taskq);
1263 		mutex_enter(&pdp->xd_ndi_lk);
1264 		pdp->xd_oe_taskq = NULL;
1265 	}
1266 
1267 	/* clean up */
1268 	dev->otherend_state = XenbusStateUnknown;
1269 	dev->otherend_id = (domid_t)-1;
1270 	if (dev->otherend_watch.node != NULL)
1271 		kmem_free((void *)dev->otherend_watch.node,
1272 		    strlen(dev->otherend_watch.node) + 1);
1273 	dev->otherend_watch.node = NULL;
1274 	if (dev->otherend != NULL)
1275 		kmem_free((void *)dev->otherend, strlen(dev->otherend) + 1);
1276 	dev->otherend = NULL;
1277 }
1278 
1279 static int
1280 i_xvdi_add_watch_hpstate(dev_info_t *dip)
1281 {
1282 	struct xendev_ppd *pdp = ddi_get_parent_data(dip);
1283 
1284 	ASSERT(pdp != NULL);
1285 	ASSERT(pdp->xd_xsdev.frontend == 0);
1286 	ASSERT(mutex_owned(&pdp->xd_ndi_lk));
1287 
1288 	/*
1289 	 * Create taskq for delivering hotplug status change event to
1290 	 * this device later.
1291 	 *
1292 	 * Set nthreads to 1 to make sure that events can be delivered
1293 	 * in order.
1294 	 *
1295 	 * Note: It is _not_ guaranteed that driver can see every
1296 	 * hotplug status change under the path that it is
1297 	 * watching. If two changes happen consecutively in a very
1298 	 * short amount of time, it is likely that the driver only
1299 	 * sees the last one.
1300 	 */
1301 	if (pdp->xd_hp_taskq == NULL)
1302 		if ((pdp->xd_hp_taskq = ddi_taskq_create(dip,
1303 		    "xendev_hp_taskq", 1, TASKQ_DEFAULTPRI, 0)) == NULL)
1304 			return (DDI_FAILURE);
1305 
1306 	if (pdp->xd_hp_watch.node == NULL) {
1307 		size_t len;
1308 		char *path;
1309 
1310 		ASSERT(pdp->xd_xsdev.nodename != NULL);
1311 
1312 		len = strlen(pdp->xd_xsdev.nodename) +
1313 		    strlen("/hotplug-status") + 1;
1314 		path = kmem_alloc(len, KM_SLEEP);
1315 		(void) snprintf(path, len, "%s/hotplug-status",
1316 		    pdp->xd_xsdev.nodename);
1317 
1318 		pdp->xd_hp_watch.node = path;
1319 		pdp->xd_hp_watch.callback = i_xvdi_hpstate_cb;
1320 		pdp->xd_hp_watch.dev = (struct xenbus_device *)dip; /* yuck! */
1321 		if (register_xenbus_watch(&pdp->xd_hp_watch) != 0) {
1322 			i_xvdi_rem_watch_hpstate(dip);
1323 			return (DDI_FAILURE);
1324 		}
1325 	}
1326 
1327 	return (DDI_SUCCESS);
1328 }
1329 
1330 static void
1331 i_xvdi_rem_watch_hpstate(dev_info_t *dip)
1332 {
1333 	struct xendev_ppd *pdp;
1334 	pdp = ddi_get_parent_data(dip);
1335 
1336 	ASSERT(pdp != NULL);
1337 	ASSERT(pdp->xd_xsdev.frontend == 0);
1338 	ASSERT(mutex_owned(&pdp->xd_ndi_lk));
1339 
1340 	/* Unwatch for changes to "hotplug-status" node for backend device. */
1341 	if (pdp->xd_hp_watch.node != NULL) {
1342 		mutex_exit(&pdp->xd_ndi_lk);
1343 		unregister_xenbus_watch(&pdp->xd_hp_watch);
1344 		mutex_enter(&pdp->xd_ndi_lk);
1345 	}
1346 
1347 	/* Make sure no event handler is running. */
1348 	if (pdp->xd_hp_taskq != NULL) {
1349 		mutex_exit(&pdp->xd_ndi_lk);
1350 		ddi_taskq_destroy(pdp->xd_hp_taskq);
1351 		mutex_enter(&pdp->xd_ndi_lk);
1352 		pdp->xd_hp_taskq = NULL;
1353 	}
1354 
1355 	/* Clean up. */
1356 	if (pdp->xd_hp_watch.node != NULL) {
1357 		kmem_free((void *)pdp->xd_hp_watch.node,
1358 		    strlen(pdp->xd_hp_watch.node) + 1);
1359 		pdp->xd_hp_watch.node = NULL;
1360 	}
1361 }
1362 
1363 static int
1364 i_xvdi_add_watches(dev_info_t *dip)
1365 {
1366 	struct xendev_ppd *pdp = ddi_get_parent_data(dip);
1367 
1368 	ASSERT(pdp != NULL);
1369 
1370 	mutex_enter(&pdp->xd_ndi_lk);
1371 
1372 	if (i_xvdi_add_watch_oestate(dip) != DDI_SUCCESS) {
1373 		mutex_exit(&pdp->xd_ndi_lk);
1374 		return (DDI_FAILURE);
1375 	}
1376 
1377 	if (pdp->xd_xsdev.frontend == 1) {
1378 		/*
1379 		 * Frontend devices must watch for the backend path
1380 		 * changing.
1381 		 */
1382 		if (i_xvdi_add_watch_bepath(dip) != DDI_SUCCESS)
1383 			goto unwatch_and_fail;
1384 	} else {
1385 		/*
1386 		 * Backend devices must watch for hotplug events.
1387 		 */
1388 		if (i_xvdi_add_watch_hpstate(dip) != DDI_SUCCESS)
1389 			goto unwatch_and_fail;
1390 	}
1391 
1392 	mutex_exit(&pdp->xd_ndi_lk);
1393 
1394 	return (DDI_SUCCESS);
1395 
1396 unwatch_and_fail:
1397 	i_xvdi_rem_watch_oestate(dip);
1398 	mutex_exit(&pdp->xd_ndi_lk);
1399 
1400 	return (DDI_FAILURE);
1401 }
1402 
1403 static void
1404 i_xvdi_rem_watches(dev_info_t *dip)
1405 {
1406 	struct xendev_ppd *pdp = ddi_get_parent_data(dip);
1407 
1408 	ASSERT(pdp != NULL);
1409 
1410 	mutex_enter(&pdp->xd_ndi_lk);
1411 
1412 	i_xvdi_rem_watch_oestate(dip);
1413 
1414 	if (pdp->xd_xsdev.frontend == 1)
1415 		i_xvdi_rem_watch_bepath(dip);
1416 	else
1417 		i_xvdi_rem_watch_hpstate(dip);
1418 
1419 	mutex_exit(&pdp->xd_ndi_lk);
1420 }
1421 
1422 static int
1423 i_xvdi_add_watch_bepath(dev_info_t *dip)
1424 {
1425 	struct xendev_ppd *pdp = ddi_get_parent_data(dip);
1426 
1427 	ASSERT(pdp != NULL);
1428 	ASSERT(pdp->xd_xsdev.frontend == 1);
1429 
1430 	/*
1431 	 * Frontend devices need to watch for the backend path changing.
1432 	 */
1433 	if (pdp->xd_bepath_watch.node == NULL) {
1434 		size_t len;
1435 		char *path;
1436 
1437 		ASSERT(pdp->xd_xsdev.nodename != NULL);
1438 
1439 		len = strlen(pdp->xd_xsdev.nodename) + strlen("/backend") + 1;
1440 		path = kmem_alloc(len, KM_SLEEP);
1441 		(void) snprintf(path, len, "%s/backend",
1442 		    pdp->xd_xsdev.nodename);
1443 
1444 		pdp->xd_bepath_watch.node = path;
1445 		pdp->xd_bepath_watch.callback = i_xvdi_bepath_cb;
1446 		pdp->xd_bepath_watch.dev = (struct xenbus_device *)dip;
1447 		if (register_xenbus_watch(&pdp->xd_bepath_watch) != 0) {
1448 			kmem_free(path, len);
1449 			pdp->xd_bepath_watch.node = NULL;
1450 			return (DDI_FAILURE);
1451 		}
1452 	}
1453 
1454 	return (DDI_SUCCESS);
1455 }
1456 
1457 static void
1458 i_xvdi_rem_watch_bepath(dev_info_t *dip)
1459 {
1460 	struct xendev_ppd *pdp = ddi_get_parent_data(dip);
1461 
1462 	ASSERT(pdp != NULL);
1463 	ASSERT(pdp->xd_xsdev.frontend == 1);
1464 	ASSERT(mutex_owned(&pdp->xd_ndi_lk));
1465 
1466 	if (pdp->xd_bepath_watch.node != NULL) {
1467 		mutex_exit(&pdp->xd_ndi_lk);
1468 		unregister_xenbus_watch(&pdp->xd_bepath_watch);
1469 		mutex_enter(&pdp->xd_ndi_lk);
1470 
1471 		kmem_free((void *)(pdp->xd_bepath_watch.node),
1472 		    strlen(pdp->xd_bepath_watch.node) + 1);
1473 		pdp->xd_bepath_watch.node = NULL;
1474 	}
1475 }
1476 
1477 int
1478 xvdi_switch_state(dev_info_t *dip, xenbus_transaction_t xbt,
1479     XenbusState newState)
1480 {
1481 	int rv;
1482 	struct xendev_ppd *pdp;
1483 
1484 	pdp = ddi_get_parent_data(dip);
1485 	ASSERT(pdp != NULL);
1486 
1487 	XVDI_DPRINTF(XVDI_DBG_STATE,
1488 	    "xvdi_switch_state: %s@%s's xenbus state moves to %d\n",
1489 	    ddi_binding_name(dip) == NULL ? "null" : ddi_binding_name(dip),
1490 	    ddi_get_name_addr(dip) == NULL ? "null" : ddi_get_name_addr(dip),
1491 	    newState);
1492 
1493 	rv = xenbus_switch_state(&pdp->xd_xsdev, xbt, newState);
1494 	if (rv > 0)
1495 		cmn_err(CE_WARN, "xvdi_switch_state: change state failed");
1496 
1497 	return (rv);
1498 }
1499 
1500 /*
1501  * Notify hotplug script running in userland
1502  */
1503 int
1504 xvdi_post_event(dev_info_t *dip, xendev_hotplug_cmd_t hpc)
1505 {
1506 	struct xendev_ppd *pdp;
1507 	nvlist_t *attr_list = NULL;
1508 	i_xd_cfg_t *xdcp;
1509 	sysevent_id_t eid;
1510 	int err;
1511 	char devname[256]; /* XXPV dme: ? */
1512 
1513 	pdp = ddi_get_parent_data(dip);
1514 	ASSERT(pdp != NULL);
1515 
1516 	xdcp = i_xvdi_devclass2cfg(pdp->xd_devclass);
1517 	ASSERT(xdcp != NULL);
1518 
1519 	(void) snprintf(devname, sizeof (devname) - 1, "%s%d",
1520 	    ddi_driver_name(dip),  ddi_get_instance(dip));
1521 
1522 	err = nvlist_alloc(&attr_list, NV_UNIQUE_NAME, KM_NOSLEEP);
1523 	if (err != DDI_SUCCESS)
1524 		goto failure;
1525 
1526 	err = nvlist_add_int32(attr_list, "domain", pdp->xd_domain);
1527 	if (err != DDI_SUCCESS)
1528 		goto failure;
1529 	err = nvlist_add_int32(attr_list, "vdev", pdp->xd_vdevnum);
1530 	if (err != DDI_SUCCESS)
1531 		goto failure;
1532 	err = nvlist_add_string(attr_list, "devclass", xdcp->xsdev);
1533 	if (err != DDI_SUCCESS)
1534 		goto failure;
1535 	err = nvlist_add_string(attr_list, "device", devname);
1536 	if (err != DDI_SUCCESS)
1537 		goto failure;
1538 	err = nvlist_add_string(attr_list, "fob",
1539 	    ((pdp->xd_xsdev.frontend == 1) ? "frontend" : "backend"));
1540 	if (err != DDI_SUCCESS)
1541 		goto failure;
1542 
1543 	switch (hpc) {
1544 	case XEN_HP_ADD:
1545 		err = ddi_log_sysevent(dip, DDI_VENDOR_SUNW, "EC_xendev",
1546 		    "add", attr_list, &eid, DDI_NOSLEEP);
1547 		break;
1548 	case XEN_HP_REMOVE:
1549 		err = ddi_log_sysevent(dip, DDI_VENDOR_SUNW, "EC_xendev",
1550 		    "remove", attr_list, &eid, DDI_NOSLEEP);
1551 		break;
1552 	default:
1553 		err = DDI_FAILURE;
1554 		goto failure;
1555 	}
1556 
1557 failure:
1558 	if (attr_list != NULL)
1559 		nvlist_free(attr_list);
1560 
1561 	return (err);
1562 }
1563 
1564 /* ARGSUSED */
1565 static void
1566 i_xvdi_probe_path_cb(struct xenbus_watch *w, const char **vec,
1567     unsigned int len)
1568 {
1569 	char *path;
1570 
1571 	if (xendev_dip == NULL)
1572 		xendev_dip = ddi_find_devinfo("xpvd", -1, 0);
1573 
1574 	path = i_ddi_strdup((char *)vec[XS_WATCH_PATH], KM_SLEEP);
1575 
1576 	(void) ddi_taskq_dispatch(DEVI(xendev_dip)->devi_taskq,
1577 	    i_xvdi_probe_path_handler, (void *)path, DDI_SLEEP);
1578 }
1579 
1580 static void
1581 i_xvdi_watch_device(char *path)
1582 {
1583 	struct xenbus_watch *w;
1584 
1585 	ASSERT(path != NULL);
1586 
1587 	w = kmem_zalloc(sizeof (*w), KM_SLEEP);
1588 	w->node = path;
1589 	w->callback = &i_xvdi_probe_path_cb;
1590 	w->dev = NULL;
1591 
1592 	if (register_xenbus_watch(w) != 0) {
1593 		cmn_err(CE_WARN, "i_xvdi_watch_device: "
1594 		    "cannot set watch on %s", path);
1595 		kmem_free(w, sizeof (*w));
1596 		return;
1597 	}
1598 }
1599 
1600 void
1601 xvdi_watch_devices(int newstate)
1602 {
1603 	int devclass;
1604 
1605 	/*
1606 	 * Watch for devices being created in the store.
1607 	 */
1608 	if (newstate == XENSTORE_DOWN)
1609 		return;
1610 	for (devclass = 0; devclass < NXDC; devclass++) {
1611 		if (xdci[devclass].xs_path_fe != NULL)
1612 			i_xvdi_watch_device(xdci[devclass].xs_path_fe);
1613 		if (xdci[devclass].xs_path_be != NULL)
1614 			i_xvdi_watch_device(xdci[devclass].xs_path_be);
1615 	}
1616 }
1617 
1618 /*
1619  * Iterate over the store looking for backend devices to create.
1620  */
1621 static void
1622 i_xvdi_enum_be(dev_info_t *parent, i_xd_cfg_t *xdcp)
1623 {
1624 	char **domains;
1625 	unsigned int ndomains;
1626 	int ldomains, i;
1627 
1628 	if ((domains = xenbus_directory(XBT_NULL, xdcp->xs_path_be, "",
1629 	    &ndomains)) == NULL)
1630 		return;
1631 
1632 	for (i = 0, ldomains = 0; i < ndomains; i++) {
1633 		ldomains += strlen(domains[i]) + 1 + sizeof (char *);
1634 
1635 		i_xvdi_enum_worker(parent, xdcp, domains[i]);
1636 	}
1637 	kmem_free(domains, ldomains);
1638 }
1639 
1640 /*
1641  * Iterate over the store looking for frontend devices to create.
1642  */
1643 static void
1644 i_xvdi_enum_fe(dev_info_t *parent, i_xd_cfg_t *xdcp)
1645 {
1646 	i_xvdi_enum_worker(parent, xdcp, NULL);
1647 }
1648 
1649 static void
1650 i_xvdi_enum_worker(dev_info_t *parent, i_xd_cfg_t *xdcp,
1651     char *domain)
1652 {
1653 	char *path, *domain_path, *ep;
1654 	char **devices;
1655 	unsigned int ndevices;
1656 	int ldevices, j, circ;
1657 	domid_t dom;
1658 	long tmplong;
1659 
1660 	if (domain == NULL) {
1661 		dom = DOMID_SELF;
1662 		path = xdcp->xs_path_fe;
1663 		domain_path = "";
1664 	} else {
1665 		(void) ddi_strtol(domain, &ep, 0, &tmplong);
1666 		dom = tmplong;
1667 		path = xdcp->xs_path_be;
1668 		domain_path = domain;
1669 	}
1670 
1671 	if ((devices = xenbus_directory(XBT_NULL, path, domain_path,
1672 	    &ndevices)) == NULL)
1673 		return;
1674 
1675 	for (j = 0, ldevices = 0; j < ndevices; j++) {
1676 		int vdev;
1677 
1678 		ldevices += strlen(devices[j]) + 1 + sizeof (char *);
1679 		(void) ddi_strtol(devices[j], &ep, 0, &tmplong);
1680 		vdev = tmplong;
1681 
1682 		ndi_devi_enter(parent, &circ);
1683 
1684 		if (xvdi_find_dev(parent, xdcp->devclass, dom, vdev) == NULL)
1685 			(void) xvdi_create_dev(parent, xdcp->devclass,
1686 			    dom, vdev);
1687 
1688 		ndi_devi_exit(parent, circ);
1689 	}
1690 	kmem_free(devices, ldevices);
1691 }
1692 
1693 /*
1694  * Leaf drivers should call this in their detach() routine during suspend.
1695  */
1696 void
1697 xvdi_suspend(dev_info_t *dip)
1698 {
1699 	i_xvdi_rem_watches(dip);
1700 }
1701 
1702 /*
1703  * Leaf drivers should call this in their attach() routine during resume.
1704  */
1705 int
1706 xvdi_resume(dev_info_t *dip)
1707 {
1708 	return (i_xvdi_add_watches(dip));
1709 }
1710 
1711 /*
1712  * Add event handler for the leaf driver
1713  * to handle event triggered by the change in xenstore
1714  */
1715 int
1716 xvdi_add_event_handler(dev_info_t *dip, char *name,
1717     void (*evthandler)(dev_info_t *, ddi_eventcookie_t, void *, void *),
1718     void *arg)
1719 {
1720 	ddi_eventcookie_t ecv;
1721 	struct xendev_ppd *pdp = ddi_get_parent_data(dip);
1722 	ddi_callback_id_t *cbid;
1723 	boolean_t call_handler;
1724 	i_oestate_evt_t *evt = NULL;
1725 	XenbusState oestate;
1726 
1727 	ASSERT(pdp != NULL);
1728 
1729 	mutex_enter(&pdp->xd_ndi_lk);
1730 
1731 	if (strcmp(name, XS_OE_STATE) == 0) {
1732 		ASSERT(pdp->xd_xsdev.otherend != NULL);
1733 
1734 		cbid = &pdp->xd_oe_ehid;
1735 	} else if (strcmp(name, XS_HP_STATE) == 0) {
1736 		if (pdp->xd_xsdev.frontend == 1) {
1737 			mutex_exit(&pdp->xd_ndi_lk);
1738 			return (DDI_FAILURE);
1739 		}
1740 
1741 		ASSERT(pdp->xd_hp_watch.node != NULL);
1742 
1743 		cbid = &pdp->xd_hp_ehid;
1744 	} else {
1745 		/* Unsupported watch. */
1746 		mutex_exit(&pdp->xd_ndi_lk);
1747 		return (DDI_FAILURE);
1748 	}
1749 
1750 	/*
1751 	 * No event handler provided, take default action to handle
1752 	 * event.
1753 	 */
1754 	if (evthandler == NULL) {
1755 		mutex_exit(&pdp->xd_ndi_lk);
1756 		return (DDI_SUCCESS);
1757 	}
1758 
1759 	ASSERT(*cbid == NULL);
1760 
1761 	if (ddi_get_eventcookie(dip, name, &ecv) != DDI_SUCCESS) {
1762 		cmn_err(CE_WARN, "failed to find %s cookie for %s@%s",
1763 		    name, ddi_get_name(dip), ddi_get_name_addr(dip));
1764 		mutex_exit(&pdp->xd_ndi_lk);
1765 		return (DDI_FAILURE);
1766 	}
1767 	if (ddi_add_event_handler(dip, ecv, evthandler, arg, cbid)
1768 	    != DDI_SUCCESS) {
1769 		cmn_err(CE_WARN, "failed to add %s event handler for %s@%s",
1770 		    name, ddi_get_name(dip), ddi_get_name_addr(dip));
1771 		*cbid = NULL;
1772 		mutex_exit(&pdp->xd_ndi_lk);
1773 		return (DDI_FAILURE);
1774 	}
1775 
1776 	/*
1777 	 * if we're adding an oe state callback, and the ring has already
1778 	 * transitioned out of Unknown, call the handler after we release
1779 	 * the mutex.
1780 	 */
1781 	call_handler = B_FALSE;
1782 	if ((strcmp(name, XS_OE_STATE) == 0) &&
1783 	    (pdp->xd_xsdev.otherend_state != XenbusStateUnknown)) {
1784 		oestate = pdp->xd_xsdev.otherend_state;
1785 		call_handler = B_TRUE;
1786 	}
1787 
1788 	mutex_exit(&pdp->xd_ndi_lk);
1789 
1790 	if (call_handler) {
1791 		evt = kmem_alloc(sizeof (i_oestate_evt_t), KM_SLEEP);
1792 		evt->dip = dip;
1793 		evt->state = oestate;
1794 		(void) ddi_taskq_dispatch(pdp->xd_oe_taskq,
1795 		    i_xvdi_oestate_handler, (void *)evt, DDI_SLEEP);
1796 	}
1797 
1798 	return (DDI_SUCCESS);
1799 }
1800 
1801 /*
1802  * Remove event handler for the leaf driver and unwatch xenstore
1803  * so, driver will not be notified when xenstore entry changed later
1804  */
1805 void
1806 xvdi_remove_event_handler(dev_info_t *dip, char *name)
1807 {
1808 	struct xendev_ppd *pdp;
1809 	boolean_t rem_oe = B_FALSE, rem_hp = B_FALSE;
1810 	ddi_callback_id_t oeid = NULL, hpid = NULL;
1811 
1812 	pdp = ddi_get_parent_data(dip);
1813 	ASSERT(pdp != NULL);
1814 
1815 	if (name == NULL) {
1816 		rem_oe = B_TRUE;
1817 		rem_hp = B_TRUE;
1818 	} else if (strcmp(name, XS_OE_STATE) == 0) {
1819 		rem_oe = B_TRUE;
1820 	} else if (strcmp(name, XS_HP_STATE) == 0) {
1821 		rem_hp = B_TRUE;
1822 	} else {
1823 		cmn_err(CE_WARN, "event %s not supported, cannot remove", name);
1824 		return;
1825 	}
1826 
1827 	mutex_enter(&pdp->xd_ndi_lk);
1828 
1829 	if (rem_oe && (pdp->xd_oe_ehid != NULL)) {
1830 		oeid = pdp->xd_oe_ehid;
1831 		pdp->xd_oe_ehid = NULL;
1832 	}
1833 
1834 	if (rem_hp && (pdp->xd_hp_ehid != NULL)) {
1835 		hpid = pdp->xd_hp_ehid;
1836 		pdp->xd_hp_ehid = NULL;
1837 	}
1838 
1839 	mutex_exit(&pdp->xd_ndi_lk);
1840 
1841 	if (oeid != NULL)
1842 		(void) ddi_remove_event_handler(oeid);
1843 	if (hpid != NULL)
1844 		(void) ddi_remove_event_handler(hpid);
1845 }
1846 
1847 
1848 /*
1849  * common ring interfaces
1850  */
1851 
1852 #define	FRONT_RING(_ringp)	(&(_ringp)->xr_sring.fr)
1853 #define	BACK_RING(_ringp)	(&(_ringp)->xr_sring.br)
1854 #define	GET_RING_SIZE(_ringp)	RING_SIZE(FRONT_RING(ringp))
1855 #define	GET_RING_ENTRY_FE(_ringp, _idx)		\
1856 	(FRONT_RING(_ringp)->sring->ring +	\
1857 	(_ringp)->xr_entry_size * ((_idx) & (GET_RING_SIZE(_ringp) - 1)))
1858 #define	GET_RING_ENTRY_BE(_ringp, _idx)		\
1859 	(BACK_RING(_ringp)->sring->ring +	\
1860 	(_ringp)->xr_entry_size * ((_idx) & (GET_RING_SIZE(_ringp) - 1)))
1861 
1862 unsigned int
1863 xvdi_ring_avail_slots(xendev_ring_t *ringp)
1864 {
1865 	comif_ring_fe_t *frp;
1866 	comif_ring_be_t *brp;
1867 
1868 	if (ringp->xr_frontend) {
1869 		frp = FRONT_RING(ringp);
1870 		return (GET_RING_SIZE(ringp) -
1871 		    (frp->req_prod_pvt - frp->rsp_cons));
1872 	} else {
1873 		brp = BACK_RING(ringp);
1874 		return (GET_RING_SIZE(ringp) -
1875 		    (brp->rsp_prod_pvt - brp->req_cons));
1876 	}
1877 }
1878 
1879 int
1880 xvdi_ring_has_unconsumed_requests(xendev_ring_t *ringp)
1881 {
1882 	comif_ring_be_t *brp;
1883 
1884 	ASSERT(!ringp->xr_frontend);
1885 	brp = BACK_RING(ringp);
1886 	return ((brp->req_cons !=
1887 	    ddi_get32(ringp->xr_acc_hdl, &brp->sring->req_prod)) &&
1888 	    ((brp->req_cons - brp->rsp_prod_pvt) != RING_SIZE(brp)));
1889 }
1890 
1891 int
1892 xvdi_ring_has_incomp_request(xendev_ring_t *ringp)
1893 {
1894 	comif_ring_fe_t *frp;
1895 
1896 	ASSERT(ringp->xr_frontend);
1897 	frp = FRONT_RING(ringp);
1898 	return (frp->req_prod_pvt !=
1899 	    ddi_get32(ringp->xr_acc_hdl, &frp->sring->rsp_prod));
1900 }
1901 
1902 int
1903 xvdi_ring_has_unconsumed_responses(xendev_ring_t *ringp)
1904 {
1905 	comif_ring_fe_t *frp;
1906 
1907 	ASSERT(ringp->xr_frontend);
1908 	frp = FRONT_RING(ringp);
1909 	return (frp->rsp_cons !=
1910 	    ddi_get32(ringp->xr_acc_hdl, &frp->sring->rsp_prod));
1911 }
1912 
1913 /* NOTE: req_event will be increased as needed */
1914 void *
1915 xvdi_ring_get_request(xendev_ring_t *ringp)
1916 {
1917 	comif_ring_fe_t *frp;
1918 	comif_ring_be_t *brp;
1919 
1920 	if (ringp->xr_frontend) {
1921 		/* for frontend ring */
1922 		frp = FRONT_RING(ringp);
1923 		if (!RING_FULL(frp))
1924 			return (GET_RING_ENTRY_FE(ringp, frp->req_prod_pvt++));
1925 		else
1926 			return (NULL);
1927 	} else {
1928 		/* for backend ring */
1929 		brp = BACK_RING(ringp);
1930 		/* RING_FINAL_CHECK_FOR_REQUESTS() */
1931 		if (xvdi_ring_has_unconsumed_requests(ringp))
1932 			return (GET_RING_ENTRY_BE(ringp, brp->req_cons++));
1933 		else {
1934 			ddi_put32(ringp->xr_acc_hdl, &brp->sring->req_event,
1935 			    brp->req_cons + 1);
1936 			membar_enter();
1937 			if (xvdi_ring_has_unconsumed_requests(ringp))
1938 				return (GET_RING_ENTRY_BE(ringp,
1939 				    brp->req_cons++));
1940 			else
1941 				return (NULL);
1942 		}
1943 	}
1944 }
1945 
1946 int
1947 xvdi_ring_push_request(xendev_ring_t *ringp)
1948 {
1949 	RING_IDX old, new, reqevt;
1950 	comif_ring_fe_t *frp;
1951 
1952 	/* only frontend should be able to push request */
1953 	ASSERT(ringp->xr_frontend);
1954 
1955 	/* RING_PUSH_REQUEST_AND_CHECK_NOTIFY() */
1956 	frp = FRONT_RING(ringp);
1957 	old = ddi_get32(ringp->xr_acc_hdl, &frp->sring->req_prod);
1958 	new = frp->req_prod_pvt;
1959 	ddi_put32(ringp->xr_acc_hdl, &frp->sring->req_prod, new);
1960 	membar_enter();
1961 	reqevt = ddi_get32(ringp->xr_acc_hdl, &frp->sring->req_event);
1962 	return ((RING_IDX)(new - reqevt) < (RING_IDX)(new - old));
1963 }
1964 
1965 /* NOTE: rsp_event will be increased as needed */
1966 void *
1967 xvdi_ring_get_response(xendev_ring_t *ringp)
1968 {
1969 	comif_ring_fe_t *frp;
1970 	comif_ring_be_t *brp;
1971 
1972 	if (!ringp->xr_frontend) {
1973 		/* for backend ring */
1974 		brp = BACK_RING(ringp);
1975 		return (GET_RING_ENTRY_BE(ringp, brp->rsp_prod_pvt++));
1976 	} else {
1977 		/* for frontend ring */
1978 		frp = FRONT_RING(ringp);
1979 		/* RING_FINAL_CHECK_FOR_RESPONSES() */
1980 		if (xvdi_ring_has_unconsumed_responses(ringp))
1981 			return (GET_RING_ENTRY_FE(ringp, frp->rsp_cons++));
1982 		else {
1983 			ddi_put32(ringp->xr_acc_hdl, &frp->sring->rsp_event,
1984 			    frp->rsp_cons + 1);
1985 			membar_enter();
1986 			if (xvdi_ring_has_unconsumed_responses(ringp))
1987 				return (GET_RING_ENTRY_FE(ringp,
1988 				    frp->rsp_cons++));
1989 			else
1990 				return (NULL);
1991 		}
1992 	}
1993 }
1994 
1995 int
1996 xvdi_ring_push_response(xendev_ring_t *ringp)
1997 {
1998 	RING_IDX old, new, rspevt;
1999 	comif_ring_be_t *brp;
2000 
2001 	/* only backend should be able to push response */
2002 	ASSERT(!ringp->xr_frontend);
2003 
2004 	/* RING_PUSH_RESPONSE_AND_CHECK_NOTIFY() */
2005 	brp = BACK_RING(ringp);
2006 	old = ddi_get32(ringp->xr_acc_hdl, &brp->sring->rsp_prod);
2007 	new = brp->rsp_prod_pvt;
2008 	ddi_put32(ringp->xr_acc_hdl, &brp->sring->rsp_prod, new);
2009 	membar_enter();
2010 	rspevt = ddi_get32(ringp->xr_acc_hdl, &brp->sring->rsp_event);
2011 	return ((RING_IDX)(new - rspevt) < (RING_IDX)(new - old));
2012 }
2013 
2014 static void
2015 xvdi_ring_init_sring(xendev_ring_t *ringp)
2016 {
2017 	ddi_acc_handle_t acchdl;
2018 	comif_sring_t *xsrp;
2019 	int i;
2020 
2021 	xsrp = (comif_sring_t *)ringp->xr_vaddr;
2022 	acchdl = ringp->xr_acc_hdl;
2023 
2024 	/* shared ring initialization */
2025 	ddi_put32(acchdl, &xsrp->req_prod, 0);
2026 	ddi_put32(acchdl, &xsrp->rsp_prod, 0);
2027 	ddi_put32(acchdl, &xsrp->req_event, 1);
2028 	ddi_put32(acchdl, &xsrp->rsp_event, 1);
2029 	for (i = 0; i < sizeof (xsrp->pad); i++)
2030 		ddi_put8(acchdl, xsrp->pad + i, 0);
2031 }
2032 
2033 static void
2034 xvdi_ring_init_front_ring(xendev_ring_t *ringp, size_t nentry, size_t entrysize)
2035 {
2036 	comif_ring_fe_t *xfrp;
2037 
2038 	xfrp = &ringp->xr_sring.fr;
2039 	xfrp->req_prod_pvt = 0;
2040 	xfrp->rsp_cons = 0;
2041 	xfrp->nr_ents = nentry;
2042 	xfrp->sring = (comif_sring_t *)ringp->xr_vaddr;
2043 
2044 	ringp->xr_frontend = 1;
2045 	ringp->xr_entry_size = entrysize;
2046 }
2047 
2048 #ifndef XPV_HVM_DRIVER
2049 static void
2050 xvdi_ring_init_back_ring(xendev_ring_t *ringp, size_t nentry, size_t entrysize)
2051 {
2052 	comif_ring_be_t *xbrp;
2053 
2054 	xbrp = &ringp->xr_sring.br;
2055 	xbrp->rsp_prod_pvt = 0;
2056 	xbrp->req_cons = 0;
2057 	xbrp->nr_ents = nentry;
2058 	xbrp->sring = (comif_sring_t *)ringp->xr_vaddr;
2059 
2060 	ringp->xr_frontend = 0;
2061 	ringp->xr_entry_size = entrysize;
2062 }
2063 #endif /* XPV_HVM_DRIVER */
2064 
2065 static void
2066 xendev_offline_device(void *arg)
2067 {
2068 	dev_info_t *dip = (dev_info_t *)arg;
2069 	char devname[MAXNAMELEN] = {0};
2070 
2071 	/*
2072 	 * This is currently the only chance to delete a devinfo node, which
2073 	 * is _not_ always successful.
2074 	 */
2075 	(void) ddi_deviname(dip, devname);
2076 	(void) devfs_clean(ddi_get_parent(dip), devname + 1, DV_CLEAN_FORCE);
2077 	(void) ndi_devi_offline(dip, NDI_DEVI_REMOVE);
2078 }
2079 
2080 static void
2081 i_xvdi_oestate_cb(struct xenbus_device *dev, XenbusState oestate)
2082 {
2083 	dev_info_t *dip = (dev_info_t *)dev->data;
2084 	struct xendev_ppd *pdp = ddi_get_parent_data(dip);
2085 	i_oestate_evt_t *evt = NULL;
2086 	boolean_t call_handler;
2087 
2088 	XVDI_DPRINTF(XVDI_DBG_STATE,
2089 	    "i_xvdi_oestate_cb: %s@%s sees oestate change to %d\n",
2090 	    ddi_binding_name(dip) == NULL ? "null" : ddi_binding_name(dip),
2091 	    ddi_get_name_addr(dip) == NULL ? "null" : ddi_get_name_addr(dip),
2092 	    oestate);
2093 
2094 	/* only call the handler if our state has changed */
2095 	call_handler = B_FALSE;
2096 	mutex_enter(&pdp->xd_ndi_lk);
2097 	if (dev->otherend_state != oestate) {
2098 		dev->otherend_state = oestate;
2099 		call_handler = B_TRUE;
2100 	}
2101 	mutex_exit(&pdp->xd_ndi_lk);
2102 
2103 	if (call_handler) {
2104 		/*
2105 		 * Try to deliver the oestate change event to the dip
2106 		 */
2107 		evt = kmem_alloc(sizeof (i_oestate_evt_t), KM_SLEEP);
2108 		evt->dip = dip;
2109 		evt->state = oestate;
2110 		(void) ddi_taskq_dispatch(pdp->xd_oe_taskq,
2111 		    i_xvdi_oestate_handler, (void *)evt, DDI_SLEEP);
2112 	}
2113 }
2114 
2115 /*ARGSUSED*/
2116 static void
2117 i_xvdi_hpstate_cb(struct xenbus_watch *w, const char **vec,
2118     unsigned int len)
2119 {
2120 	dev_info_t *dip = (dev_info_t *)w->dev;
2121 	struct xendev_ppd *pdp = ddi_get_parent_data(dip);
2122 
2123 #ifdef DEBUG
2124 	char *hp_status = NULL;
2125 	unsigned int hpl = 0;
2126 
2127 	(void) xenbus_read(XBT_NULL, pdp->xd_hp_watch.node, "",
2128 	    (void *)&hp_status, &hpl);
2129 	XVDI_DPRINTF(XVDI_DBG_STATE,
2130 	    "i_xvdi_hpstate_cb: %s@%s sees hpstate change to %s\n",
2131 	    ddi_binding_name(dip) == NULL ?  "null" : ddi_binding_name(dip),
2132 	    ddi_get_name_addr(dip) == NULL ?  "null" : ddi_get_name_addr(dip),
2133 	    hp_status == NULL ? "null" : hp_status);
2134 	if (hp_status != NULL)
2135 		kmem_free(hp_status, hpl);
2136 #endif /* DEBUG */
2137 
2138 	(void) ddi_taskq_dispatch(pdp->xd_hp_taskq,
2139 	    i_xvdi_hpstate_handler, (void *)dip, DDI_SLEEP);
2140 }
2141 
2142 static void
2143 i_xvdi_probe_path_handler(void *arg)
2144 {
2145 	dev_info_t *parent;
2146 	char *path = arg, *p = NULL;
2147 	int i, vdev, circ;
2148 	i_xd_cfg_t *xdcp;
2149 	boolean_t frontend;
2150 	domid_t dom;
2151 
2152 	for (i = 0, xdcp = &xdci[0]; i < NXDC; i++, xdcp++) {
2153 
2154 		if ((xdcp->xs_path_fe != NULL) &&
2155 		    (strncmp(path, xdcp->xs_path_fe, strlen(xdcp->xs_path_fe))
2156 		    == 0)) {
2157 
2158 			frontend = B_TRUE;
2159 			p = path + strlen(xdcp->xs_path_fe);
2160 			break;
2161 		}
2162 
2163 		if ((xdcp->xs_path_be != NULL) &&
2164 		    (strncmp(path, xdcp->xs_path_be, strlen(xdcp->xs_path_be))
2165 		    == 0)) {
2166 
2167 			frontend = B_FALSE;
2168 			p = path + strlen(xdcp->xs_path_be);
2169 			break;
2170 		}
2171 
2172 	}
2173 
2174 	if (p == NULL) {
2175 		cmn_err(CE_WARN, "i_xvdi_probe_path_handler: "
2176 		    "unexpected path prefix in %s", path);
2177 		goto done;
2178 	}
2179 
2180 	if (frontend) {
2181 		dom = DOMID_SELF;
2182 		if (sscanf(p, "/%d/", &vdev) != 1) {
2183 			XVDI_DPRINTF(XVDI_DBG_PROBE,
2184 			    "i_xvdi_probe_path_handler: "
2185 			    "cannot parse frontend path %s",
2186 			    path);
2187 			goto done;
2188 		}
2189 	} else {
2190 		if (sscanf(p, "/%hu/%d/", &dom, &vdev) != 2) {
2191 			XVDI_DPRINTF(XVDI_DBG_PROBE,
2192 			    "i_xvdi_probe_path_handler: "
2193 			    "cannot parse backend path %s",
2194 			    path);
2195 			goto done;
2196 		}
2197 	}
2198 
2199 	/*
2200 	 * This is an oxymoron, so indicates a bogus configuration we
2201 	 * must check for.
2202 	 */
2203 	if (vdev == VDEV_NOXS) {
2204 		cmn_err(CE_WARN, "i_xvdi_probe_path_handler: "
2205 		    "invalid path %s", path);
2206 		goto done;
2207 	}
2208 
2209 	parent = xendev_dip;
2210 	ASSERT(parent != NULL);
2211 
2212 	ndi_devi_enter(parent, &circ);
2213 
2214 	if (xvdi_find_dev(parent, xdcp->devclass, dom, vdev) == NULL) {
2215 		XVDI_DPRINTF(XVDI_DBG_PROBE,
2216 		    "i_xvdi_probe_path_handler: create for %s", path);
2217 		(void) xvdi_create_dev(parent, xdcp->devclass, dom, vdev);
2218 	} else {
2219 		XVDI_DPRINTF(XVDI_DBG_PROBE,
2220 		    "i_xvdi_probe_path_handler: %s already exists", path);
2221 	}
2222 
2223 	ndi_devi_exit(parent, circ);
2224 
2225 done:
2226 	kmem_free(path, strlen(path) + 1);
2227 }
2228