xref: /illumos-gate/usr/src/uts/common/xen/io/xenbus_xs.c (revision ab4a9beb)
1843e1988Sjohnlev /*
2843e1988Sjohnlev  * CDDL HEADER START
3843e1988Sjohnlev  *
4843e1988Sjohnlev  * The contents of this file are subject to the terms of the
5843e1988Sjohnlev  * Common Development and Distribution License (the "License").
6843e1988Sjohnlev  * You may not use this file except in compliance with the License.
7843e1988Sjohnlev  *
8843e1988Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e1988Sjohnlev  * or http://www.opensolaris.org/os/licensing.
10843e1988Sjohnlev  * See the License for the specific language governing permissions
11843e1988Sjohnlev  * and limitations under the License.
12843e1988Sjohnlev  *
13843e1988Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
14843e1988Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e1988Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
16843e1988Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
17843e1988Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
18843e1988Sjohnlev  *
19843e1988Sjohnlev  * CDDL HEADER END
20843e1988Sjohnlev  */
21843e1988Sjohnlev 
22843e1988Sjohnlev /*
23843e1988Sjohnlev  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24843e1988Sjohnlev  * Use is subject to license terms.
25843e1988Sjohnlev  */
26843e1988Sjohnlev 
27843e1988Sjohnlev /*
28843e1988Sjohnlev  *
29843e1988Sjohnlev  * xenbus_xs.c
30843e1988Sjohnlev  *
31843e1988Sjohnlev  * This is the kernel equivalent of the "xs" library.  We don't need everything
32843e1988Sjohnlev  * and we use xenbus_comms for communication.
33843e1988Sjohnlev  *
34843e1988Sjohnlev  * Copyright (C) 2005 Rusty Russell, IBM Corporation
35843e1988Sjohnlev  *
36843e1988Sjohnlev  * This file may be distributed separately from the Linux kernel, or
37843e1988Sjohnlev  * incorporated into other software packages, subject to the following license:
38843e1988Sjohnlev  *
39843e1988Sjohnlev  * Permission is hereby granted, free of charge, to any person obtaining a copy
40843e1988Sjohnlev  * of this source file (the "Software"), to deal in the Software without
41843e1988Sjohnlev  * restriction, including without limitation the rights to use, copy, modify,
42843e1988Sjohnlev  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
43843e1988Sjohnlev  * and to permit persons to whom the Software is furnished to do so, subject to
44843e1988Sjohnlev  * the following conditions:
45843e1988Sjohnlev  *
46843e1988Sjohnlev  * The above copyright notice and this permission notice shall be included in
47843e1988Sjohnlev  * all copies or substantial portions of the Software.
48843e1988Sjohnlev  *
49843e1988Sjohnlev  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
50843e1988Sjohnlev  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51843e1988Sjohnlev  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
52843e1988Sjohnlev  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
53843e1988Sjohnlev  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
54843e1988Sjohnlev  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
55843e1988Sjohnlev  * IN THE SOFTWARE.
56843e1988Sjohnlev  */
57843e1988Sjohnlev 
58843e1988Sjohnlev /*
59843e1988Sjohnlev  * NOTE: To future maintainers of the Solaris version of this file:
60843e1988Sjohnlev  * I found the Linux version of this code to be very disgusting in
61843e1988Sjohnlev  * overloading pointers and error codes into void * return values.
62843e1988Sjohnlev  * The main difference you will find is that all such usage is changed
63843e1988Sjohnlev  * to pass pointers to void* to be filled in with return values and
64843e1988Sjohnlev  * the functions return error codes.
65843e1988Sjohnlev  */
66843e1988Sjohnlev 
67843e1988Sjohnlev #pragma ident	"%Z%%M%	%I%	%E% SMI"
68843e1988Sjohnlev 
69843e1988Sjohnlev #include <sys/errno.h>
70843e1988Sjohnlev #include <sys/types.h>
71843e1988Sjohnlev #include <sys/sysmacros.h>
72843e1988Sjohnlev #include <sys/uio.h>
73843e1988Sjohnlev #include <sys/mutex.h>
74843e1988Sjohnlev #include <sys/condvar.h>
75843e1988Sjohnlev #include <sys/rwlock.h>
76843e1988Sjohnlev #include <sys/disp.h>
77843e1988Sjohnlev #include <sys/ddi.h>
78843e1988Sjohnlev #include <sys/sunddi.h>
79843e1988Sjohnlev #include <sys/avintr.h>
80843e1988Sjohnlev #include <sys/cmn_err.h>
81843e1988Sjohnlev #include <util/sscanf.h>
82843e1988Sjohnlev #define	_XSD_ERRORS_DEFINED
83843e1988Sjohnlev #include <sys/hypervisor.h>
84843e1988Sjohnlev #include <sys/mach_mmu.h>
85*ab4a9bebSjohnlev #include <sys/taskq.h>
86*ab4a9bebSjohnlev #include <sys/sdt.h>
87843e1988Sjohnlev #include <xen/sys/xenbus_impl.h>
88843e1988Sjohnlev #include <xen/sys/xenbus_comms.h>
89843e1988Sjohnlev #include <xen/sys/xendev.h>
90843e1988Sjohnlev #include <xen/public/io/xs_wire.h>
91843e1988Sjohnlev 
92843e1988Sjohnlev #define	streq(a, b) (strcmp((a), (b)) == 0)
93843e1988Sjohnlev 
94843e1988Sjohnlev #define	list_empty(list) (list_head(list) == NULL)
95843e1988Sjohnlev 
96843e1988Sjohnlev struct xs_stored_msg {
97843e1988Sjohnlev 	list_t list;
98843e1988Sjohnlev 
99843e1988Sjohnlev 	struct xsd_sockmsg hdr;
100843e1988Sjohnlev 
101843e1988Sjohnlev 	union {
102843e1988Sjohnlev 		/* Queued replies. */
103843e1988Sjohnlev 		struct {
104843e1988Sjohnlev 			char *body;
105843e1988Sjohnlev 		} reply;
106843e1988Sjohnlev 
107843e1988Sjohnlev 		/* Queued watch events. */
108843e1988Sjohnlev 		struct {
109843e1988Sjohnlev 			struct xenbus_watch *handle;
110843e1988Sjohnlev 			char **vec;
111843e1988Sjohnlev 			unsigned int vec_size;
112843e1988Sjohnlev 		} watch;
113843e1988Sjohnlev 	} un;
114843e1988Sjohnlev };
115843e1988Sjohnlev 
116843e1988Sjohnlev static struct xs_handle {
117843e1988Sjohnlev 	/* A list of replies. Currently only one will ever be outstanding. */
118843e1988Sjohnlev 	list_t reply_list;
119843e1988Sjohnlev 	kmutex_t reply_lock;
120843e1988Sjohnlev 	kcondvar_t reply_cv;
121843e1988Sjohnlev 
122843e1988Sjohnlev 	/* One request at a time. */
123843e1988Sjohnlev 	kmutex_t request_mutex;
124843e1988Sjohnlev 
125843e1988Sjohnlev 	/* Protect transactions against save/restore. */
126843e1988Sjohnlev 	krwlock_t suspend_lock;
127843e1988Sjohnlev } xs_state;
128843e1988Sjohnlev 
129843e1988Sjohnlev static int last_req_id;
130843e1988Sjohnlev 
131843e1988Sjohnlev /*
132843e1988Sjohnlev  * List of clients wanting a xenstore up notification, and a lock to protect it
133843e1988Sjohnlev  */
134843e1988Sjohnlev static boolean_t xenstore_up;
135843e1988Sjohnlev static list_t notify_list;
136843e1988Sjohnlev static kmutex_t notify_list_lock;
137843e1988Sjohnlev static taskq_t *xenbus_taskq;
138843e1988Sjohnlev 
139843e1988Sjohnlev /* List of registered watches, and a lock to protect it. */
140843e1988Sjohnlev static list_t watches;
141843e1988Sjohnlev static kmutex_t watches_lock;
142843e1988Sjohnlev 
143843e1988Sjohnlev /* List of pending watch callback events, and a lock to protect it. */
144843e1988Sjohnlev static list_t watch_events;
145843e1988Sjohnlev static kmutex_t watch_events_lock;
146843e1988Sjohnlev 
147843e1988Sjohnlev /*
148843e1988Sjohnlev  * Details of the xenwatch callback kernel thread. The thread waits on the
149843e1988Sjohnlev  * watch_events_cv for work to do (queued on watch_events list). When it
150843e1988Sjohnlev  * wakes up it acquires the xenwatch_mutex before reading the list and
151843e1988Sjohnlev  * carrying out work.
152843e1988Sjohnlev  */
153843e1988Sjohnlev static kmutex_t xenwatch_mutex;
154843e1988Sjohnlev static kcondvar_t watch_events_cv;
155843e1988Sjohnlev 
156843e1988Sjohnlev static int process_msg(void);
157843e1988Sjohnlev 
158843e1988Sjohnlev static int
159843e1988Sjohnlev get_error(const char *errorstring)
160843e1988Sjohnlev {
161843e1988Sjohnlev 	unsigned int i;
162843e1988Sjohnlev 
163843e1988Sjohnlev 	for (i = 0; !streq(errorstring, xsd_errors[i].errstring); i++) {
164843e1988Sjohnlev 		if (i == (sizeof (xsd_errors) / sizeof (xsd_errors[0])) - 1) {
165843e1988Sjohnlev 			cmn_err(CE_WARN,
166843e1988Sjohnlev 			    "XENBUS xen store gave: unknown error %s",
167843e1988Sjohnlev 			    errorstring);
168843e1988Sjohnlev 			return (EINVAL);
169843e1988Sjohnlev 		}
170843e1988Sjohnlev 	}
171843e1988Sjohnlev 	return (xsd_errors[i].errnum);
172843e1988Sjohnlev }
173843e1988Sjohnlev 
174843e1988Sjohnlev /*
175843e1988Sjohnlev  * Read a synchronous reply from xenstore.  Since we can return early before
176843e1988Sjohnlev  * reading a relevant reply, we discard any messages not matching the request
177843e1988Sjohnlev  * ID.  Caller must free returned message on success.
178843e1988Sjohnlev  */
179843e1988Sjohnlev static int
180843e1988Sjohnlev read_reply(struct xsd_sockmsg *req_hdr, struct xs_stored_msg **reply)
181843e1988Sjohnlev {
182843e1988Sjohnlev 	extern int do_polled_io;
183843e1988Sjohnlev 
184843e1988Sjohnlev 	mutex_enter(&xs_state.reply_lock);
185843e1988Sjohnlev 
186843e1988Sjohnlev 	for (;;) {
187843e1988Sjohnlev 		while (list_empty(&xs_state.reply_list)) {
188843e1988Sjohnlev 			if (interrupts_unleashed && !do_polled_io) {
189843e1988Sjohnlev 				if (cv_wait_sig(&xs_state.reply_cv,
190843e1988Sjohnlev 				    &xs_state.reply_lock) == 0) {
191843e1988Sjohnlev 					mutex_exit(&xs_state.reply_lock);
192843e1988Sjohnlev 					*reply = NULL;
193843e1988Sjohnlev 					return (EINTR);
194843e1988Sjohnlev 				}
195843e1988Sjohnlev 			} else { /* polled mode needed for early probes */
196843e1988Sjohnlev 				mutex_exit(&xs_state.reply_lock);
197843e1988Sjohnlev 				(void) HYPERVISOR_yield();
198843e1988Sjohnlev 				(void) process_msg();
199843e1988Sjohnlev 				mutex_enter(&xs_state.reply_lock);
200843e1988Sjohnlev 			}
201843e1988Sjohnlev 		}
202843e1988Sjohnlev 
203843e1988Sjohnlev 		*reply = list_head(&xs_state.reply_list);
204843e1988Sjohnlev 		list_remove(&xs_state.reply_list, *reply);
205843e1988Sjohnlev 
206843e1988Sjohnlev 		if ((*reply)->hdr.req_id == req_hdr->req_id)
207843e1988Sjohnlev 			break;
208843e1988Sjohnlev 	}
209843e1988Sjohnlev 
210843e1988Sjohnlev 	mutex_exit(&xs_state.reply_lock);
211843e1988Sjohnlev 	return (0);
212843e1988Sjohnlev }
213843e1988Sjohnlev 
214843e1988Sjohnlev /* Emergency write. */
215843e1988Sjohnlev void
216843e1988Sjohnlev xenbus_debug_write(const char *str, unsigned int count)
217843e1988Sjohnlev {
218843e1988Sjohnlev 	struct xsd_sockmsg msg = { 0 };
219843e1988Sjohnlev 
220843e1988Sjohnlev 	msg.type = XS_DEBUG;
221843e1988Sjohnlev 	msg.len = sizeof ("print") + count + 1;
222843e1988Sjohnlev 
223843e1988Sjohnlev 	mutex_enter(&xs_state.request_mutex);
224843e1988Sjohnlev 	(void) xb_write(&msg, sizeof (msg));
225843e1988Sjohnlev 	(void) xb_write("print", sizeof ("print"));
226843e1988Sjohnlev 	(void) xb_write(str, count);
227843e1988Sjohnlev 	(void) xb_write("", 1);
228843e1988Sjohnlev 	mutex_exit(&xs_state.request_mutex);
229843e1988Sjohnlev }
230843e1988Sjohnlev 
231843e1988Sjohnlev /*
232843e1988Sjohnlev  * This is pretty unpleasant.  First off, there's the horrible logic around
233843e1988Sjohnlev  * suspend_lock and transactions.  Also, we can be interrupted either before we
234843e1988Sjohnlev  * write a message, or before we receive a reply.  A client that wants to
235843e1988Sjohnlev  * survive this can't know which case happened.  Luckily all clients don't care
236843e1988Sjohnlev  * about signals currently, and the alternative (a hard wait on a userspace
237843e1988Sjohnlev  * daemon) isn't exactly preferable.  Caller must free 'reply' on success.
238843e1988Sjohnlev  */
239843e1988Sjohnlev int
240843e1988Sjohnlev xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **reply)
241843e1988Sjohnlev {
242843e1988Sjohnlev 	struct xsd_sockmsg req_msg = *msg;
243843e1988Sjohnlev 	struct xs_stored_msg *reply_msg = NULL;
244843e1988Sjohnlev 	int err;
245843e1988Sjohnlev 
246843e1988Sjohnlev 	if (req_msg.type == XS_TRANSACTION_START)
247843e1988Sjohnlev 		rw_enter(&xs_state.suspend_lock, RW_READER);
248843e1988Sjohnlev 
249843e1988Sjohnlev 	mutex_enter(&xs_state.request_mutex);
250843e1988Sjohnlev 
251843e1988Sjohnlev 	msg->req_id = last_req_id++;
252843e1988Sjohnlev 
253843e1988Sjohnlev 	err = xb_write(msg, sizeof (*msg) + msg->len);
254843e1988Sjohnlev 	if (err) {
255843e1988Sjohnlev 		if (req_msg.type == XS_TRANSACTION_START)
256843e1988Sjohnlev 			rw_exit(&xs_state.suspend_lock);
257843e1988Sjohnlev 		msg->type = XS_ERROR;
258843e1988Sjohnlev 		*reply = NULL;
259843e1988Sjohnlev 		goto out;
260843e1988Sjohnlev 	}
261843e1988Sjohnlev 
262843e1988Sjohnlev 	err = read_reply(msg, &reply_msg);
263843e1988Sjohnlev 
264843e1988Sjohnlev 	if (err) {
265843e1988Sjohnlev 		if (msg->type == XS_TRANSACTION_START)
266843e1988Sjohnlev 			rw_exit(&xs_state.suspend_lock);
267843e1988Sjohnlev 		*reply = NULL;
268843e1988Sjohnlev 		goto out;
269843e1988Sjohnlev 	}
270843e1988Sjohnlev 
271843e1988Sjohnlev 	*reply = reply_msg->un.reply.body;
272843e1988Sjohnlev 	*msg = reply_msg->hdr;
273843e1988Sjohnlev 
274843e1988Sjohnlev 	if (reply_msg->hdr.type == XS_TRANSACTION_END)
275843e1988Sjohnlev 		rw_exit(&xs_state.suspend_lock);
276843e1988Sjohnlev 
277843e1988Sjohnlev out:
278843e1988Sjohnlev 	if (reply_msg != NULL)
279843e1988Sjohnlev 		kmem_free(reply_msg, sizeof (*reply_msg));
280843e1988Sjohnlev 
281843e1988Sjohnlev 	mutex_exit(&xs_state.request_mutex);
282843e1988Sjohnlev 	return (err);
283843e1988Sjohnlev }
284843e1988Sjohnlev 
285843e1988Sjohnlev /*
286843e1988Sjohnlev  * Send message to xs, return errcode, rval filled in with pointer
287843e1988Sjohnlev  * to kmem_alloc'ed reply.
288843e1988Sjohnlev  */
289843e1988Sjohnlev static int
290843e1988Sjohnlev xs_talkv(xenbus_transaction_t t,
291843e1988Sjohnlev 		    enum xsd_sockmsg_type type,
292843e1988Sjohnlev 		    const iovec_t *iovec,
293843e1988Sjohnlev 		    unsigned int num_vecs,
294843e1988Sjohnlev 		    void **rval,
295843e1988Sjohnlev 		    unsigned int *len)
296843e1988Sjohnlev {
297843e1988Sjohnlev 	struct xsd_sockmsg msg;
298843e1988Sjohnlev 	struct xs_stored_msg *reply_msg;
299843e1988Sjohnlev 	char *reply;
300843e1988Sjohnlev 	unsigned int i;
301843e1988Sjohnlev 	int err;
302843e1988Sjohnlev 
303843e1988Sjohnlev 	msg.tx_id = (uint32_t)(unsigned long)t;
304843e1988Sjohnlev 	msg.type = type;
305843e1988Sjohnlev 	msg.len = 0;
306843e1988Sjohnlev 	for (i = 0; i < num_vecs; i++)
307843e1988Sjohnlev 		msg.len += iovec[i].iov_len;
308843e1988Sjohnlev 
309843e1988Sjohnlev 	mutex_enter(&xs_state.request_mutex);
310843e1988Sjohnlev 
311843e1988Sjohnlev 	msg.req_id = last_req_id++;
312843e1988Sjohnlev 
313843e1988Sjohnlev 	err = xb_write(&msg, sizeof (msg));
314843e1988Sjohnlev 	if (err) {
315843e1988Sjohnlev 		mutex_exit(&xs_state.request_mutex);
316843e1988Sjohnlev 		return (err);
317843e1988Sjohnlev 	}
318843e1988Sjohnlev 
319843e1988Sjohnlev 	for (i = 0; i < num_vecs; i++) {
320843e1988Sjohnlev 		err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
321843e1988Sjohnlev 		if (err) {
322843e1988Sjohnlev 			mutex_exit(&xs_state.request_mutex);
323843e1988Sjohnlev 			return (err);
324843e1988Sjohnlev 		}
325843e1988Sjohnlev 	}
326843e1988Sjohnlev 
327843e1988Sjohnlev 	err = read_reply(&msg, &reply_msg);
328843e1988Sjohnlev 
329843e1988Sjohnlev 	mutex_exit(&xs_state.request_mutex);
330843e1988Sjohnlev 
331843e1988Sjohnlev 	if (err)
332843e1988Sjohnlev 		return (err);
333843e1988Sjohnlev 
334843e1988Sjohnlev 	reply = reply_msg->un.reply.body;
335843e1988Sjohnlev 
336843e1988Sjohnlev 	if (reply_msg->hdr.type == XS_ERROR) {
337843e1988Sjohnlev 		err = get_error(reply);
338843e1988Sjohnlev 		kmem_free(reply, reply_msg->hdr.len + 1);
339843e1988Sjohnlev 		goto out;
340843e1988Sjohnlev 	}
341843e1988Sjohnlev 
342843e1988Sjohnlev 	if (len != NULL)
343843e1988Sjohnlev 		*len = reply_msg->hdr.len + 1;
344843e1988Sjohnlev 
345843e1988Sjohnlev 	ASSERT(reply_msg->hdr.type == type);
346843e1988Sjohnlev 
347843e1988Sjohnlev 	if (rval != NULL)
348843e1988Sjohnlev 		*rval = reply;
349843e1988Sjohnlev 	else
350843e1988Sjohnlev 		kmem_free(reply, reply_msg->hdr.len + 1);
351843e1988Sjohnlev 
352843e1988Sjohnlev out:
353843e1988Sjohnlev 	kmem_free(reply_msg, sizeof (*reply_msg));
354843e1988Sjohnlev 	return (err);
355843e1988Sjohnlev }
356843e1988Sjohnlev 
357843e1988Sjohnlev /* Simplified version of xs_talkv: single message. */
358843e1988Sjohnlev static int
359843e1988Sjohnlev xs_single(xenbus_transaction_t t,
360843e1988Sjohnlev 			enum xsd_sockmsg_type type,
361843e1988Sjohnlev 			const char *string, void **ret,
362843e1988Sjohnlev 			unsigned int *len)
363843e1988Sjohnlev {
364843e1988Sjohnlev 	iovec_t iovec;
365843e1988Sjohnlev 
366843e1988Sjohnlev 	iovec.iov_base = (char *)string;
367843e1988Sjohnlev 	iovec.iov_len = strlen(string) + 1;
368843e1988Sjohnlev 	return (xs_talkv(t, type, &iovec, 1, ret, len));
369843e1988Sjohnlev }
370843e1988Sjohnlev 
371843e1988Sjohnlev static unsigned int
372843e1988Sjohnlev count_strings(const char *strings, unsigned int len)
373843e1988Sjohnlev {
374843e1988Sjohnlev 	unsigned int num;
375843e1988Sjohnlev 	const char *p;
376843e1988Sjohnlev 
377843e1988Sjohnlev 	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
378843e1988Sjohnlev 		num++;
379843e1988Sjohnlev 
380843e1988Sjohnlev 	return (num);
381843e1988Sjohnlev }
382843e1988Sjohnlev 
383843e1988Sjohnlev /* Return the path to dir with /name appended. Buffer must be kmem_free()'ed */
384843e1988Sjohnlev static char *
385843e1988Sjohnlev join(const char *dir, const char *name)
386843e1988Sjohnlev {
387843e1988Sjohnlev 	char *buffer;
388843e1988Sjohnlev 	size_t slashlen;
389843e1988Sjohnlev 
390843e1988Sjohnlev 	slashlen = streq(name, "") ? 0 : 1;
391843e1988Sjohnlev 	buffer = kmem_alloc(strlen(dir) + slashlen + strlen(name) + 1,
392843e1988Sjohnlev 	    KM_SLEEP);
393843e1988Sjohnlev 
394843e1988Sjohnlev 	(void) strcpy(buffer, dir);
395843e1988Sjohnlev 	if (slashlen != 0) {
396843e1988Sjohnlev 		(void) strcat(buffer, "/");
397843e1988Sjohnlev 		(void) strcat(buffer, name);
398843e1988Sjohnlev 	}
399843e1988Sjohnlev 	return (buffer);
400843e1988Sjohnlev }
401843e1988Sjohnlev 
402843e1988Sjohnlev static char **
403843e1988Sjohnlev split(char *strings, unsigned int len, unsigned int *num)
404843e1988Sjohnlev {
405843e1988Sjohnlev 	char *p, **ret;
406843e1988Sjohnlev 
407843e1988Sjohnlev 	/* Count the strings. */
408843e1988Sjohnlev 	if ((*num = count_strings(strings, len - 1)) == 0)
409843e1988Sjohnlev 		return (NULL);
410843e1988Sjohnlev 
411843e1988Sjohnlev 	/* Transfer to one big alloc for easy freeing. */
412843e1988Sjohnlev 	ret = kmem_alloc(*num * sizeof (char *) + (len - 1), KM_SLEEP);
413843e1988Sjohnlev 	(void) memcpy(&ret[*num], strings, len - 1);
414843e1988Sjohnlev 	kmem_free(strings, len);
415843e1988Sjohnlev 
416843e1988Sjohnlev 	strings = (char *)&ret[*num];
417843e1988Sjohnlev 	for (p = strings, *num = 0; p < strings + (len - 1);
418843e1988Sjohnlev 	    p += strlen(p) + 1) {
419843e1988Sjohnlev 		ret[(*num)++] = p;
420843e1988Sjohnlev 	}
421843e1988Sjohnlev 
422843e1988Sjohnlev 	return (ret);
423843e1988Sjohnlev }
424843e1988Sjohnlev 
425843e1988Sjohnlev char **
426843e1988Sjohnlev xenbus_directory(xenbus_transaction_t t,
427843e1988Sjohnlev 			const char *dir, const char *node, unsigned int *num)
428843e1988Sjohnlev {
429843e1988Sjohnlev 	char *strings, *path;
430843e1988Sjohnlev 	unsigned int len;
431843e1988Sjohnlev 	int err;
432843e1988Sjohnlev 
433843e1988Sjohnlev 	path = join(dir, node);
434843e1988Sjohnlev 	err = xs_single(t, XS_DIRECTORY, path, (void **)&strings, &len);
435843e1988Sjohnlev 	kmem_free(path, strlen(path) + 1);
436843e1988Sjohnlev 	if (err != 0 || strings == NULL) {
437843e1988Sjohnlev 		/* sigh, we lose error code info here */
438843e1988Sjohnlev 		*num = 0;
439843e1988Sjohnlev 		return (NULL);
440843e1988Sjohnlev 	}
441843e1988Sjohnlev 
442843e1988Sjohnlev 	return (split(strings, len, num));
443843e1988Sjohnlev }
444843e1988Sjohnlev 
445843e1988Sjohnlev /* Check if a path exists. Return 1 if it does. */
446843e1988Sjohnlev int
447843e1988Sjohnlev xenbus_exists(xenbus_transaction_t t, const char *dir, const char *node)
448843e1988Sjohnlev {
449843e1988Sjohnlev 	char **d;
450843e1988Sjohnlev 	unsigned int dir_n;
451843e1988Sjohnlev 	int i, len;
452843e1988Sjohnlev 
453843e1988Sjohnlev 	d = xenbus_directory(t, dir, node, &dir_n);
454843e1988Sjohnlev 	if (d == NULL)
455843e1988Sjohnlev 		return (0);
456843e1988Sjohnlev 	for (i = 0, len = 0; i < dir_n; i++)
457843e1988Sjohnlev 		len += strlen(d[i]) + 1 + sizeof (char *);
458843e1988Sjohnlev 	kmem_free(d, len);
459843e1988Sjohnlev 	return (1);
460843e1988Sjohnlev }
461843e1988Sjohnlev 
462843e1988Sjohnlev /*
463843e1988Sjohnlev  * Get the value of a single file.
464843e1988Sjohnlev  * Returns a kmem_alloced value in retp: call kmem_free() on it after use.
465843e1988Sjohnlev  * len indicates length in bytes.
466843e1988Sjohnlev  */
467843e1988Sjohnlev int
468843e1988Sjohnlev xenbus_read(xenbus_transaction_t t,
469843e1988Sjohnlev 	    const char *dir, const char *node, void **retp, unsigned int *len)
470843e1988Sjohnlev {
471843e1988Sjohnlev 	char *path;
472843e1988Sjohnlev 	int err;
473843e1988Sjohnlev 
474843e1988Sjohnlev 	path = join(dir, node);
475843e1988Sjohnlev 	err = xs_single(t, XS_READ, path, retp, len);
476843e1988Sjohnlev 	kmem_free(path, strlen(path) + 1);
477843e1988Sjohnlev 	return (err);
478843e1988Sjohnlev }
479843e1988Sjohnlev 
480843e1988Sjohnlev /*
481843e1988Sjohnlev  * Write the value of a single file.
482843e1988Sjohnlev  * Returns err on failure.
483843e1988Sjohnlev  */
484843e1988Sjohnlev int
485843e1988Sjohnlev xenbus_write(xenbus_transaction_t t,
486843e1988Sjohnlev 		const char *dir, const char *node, const char *string)
487843e1988Sjohnlev {
488843e1988Sjohnlev 	char *path;
489843e1988Sjohnlev 	iovec_t iovec[2];
490843e1988Sjohnlev 	int ret;
491843e1988Sjohnlev 
492843e1988Sjohnlev 	path = join(dir, node);
493843e1988Sjohnlev 
494843e1988Sjohnlev 	iovec[0].iov_base = (void *)path;
495843e1988Sjohnlev 	iovec[0].iov_len = strlen(path) + 1;
496843e1988Sjohnlev 	iovec[1].iov_base = (void *)string;
497843e1988Sjohnlev 	iovec[1].iov_len = strlen(string);
498843e1988Sjohnlev 
499843e1988Sjohnlev 	ret = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
500843e1988Sjohnlev 	kmem_free(path, iovec[0].iov_len);
501843e1988Sjohnlev 	return (ret);
502843e1988Sjohnlev }
503843e1988Sjohnlev 
504843e1988Sjohnlev /* Create a new directory. */
505843e1988Sjohnlev int
506843e1988Sjohnlev xenbus_mkdir(xenbus_transaction_t t, const char *dir, const char *node)
507843e1988Sjohnlev {
508843e1988Sjohnlev 	char *path;
509843e1988Sjohnlev 	int ret;
510843e1988Sjohnlev 
511843e1988Sjohnlev 	path = join(dir, node);
512843e1988Sjohnlev 	ret = xs_single(t, XS_MKDIR, path, NULL, NULL);
513843e1988Sjohnlev 	kmem_free(path, strlen(path) + 1);
514843e1988Sjohnlev 	return (ret);
515843e1988Sjohnlev }
516843e1988Sjohnlev 
517843e1988Sjohnlev /* Destroy a file or directory (directories must be empty). */
518843e1988Sjohnlev int
519843e1988Sjohnlev xenbus_rm(xenbus_transaction_t t, const char *dir, const char *node)
520843e1988Sjohnlev {
521843e1988Sjohnlev 	char *path;
522843e1988Sjohnlev 	int ret;
523843e1988Sjohnlev 
524843e1988Sjohnlev 	path = join(dir, node);
525843e1988Sjohnlev 	ret = xs_single(t, XS_RM, path, NULL, NULL);
526843e1988Sjohnlev 	kmem_free(path, strlen(path) + 1);
527843e1988Sjohnlev 	return (ret);
528843e1988Sjohnlev }
529843e1988Sjohnlev 
530843e1988Sjohnlev /*
531843e1988Sjohnlev  * Start a transaction: changes by others will not be seen during this
532843e1988Sjohnlev  * transaction, and changes will not be visible to others until end.
533843e1988Sjohnlev  */
534843e1988Sjohnlev int
535843e1988Sjohnlev xenbus_transaction_start(xenbus_transaction_t *t)
536843e1988Sjohnlev {
537843e1988Sjohnlev 	void *id_str;
538843e1988Sjohnlev 	unsigned long id;
539843e1988Sjohnlev 	int err;
540843e1988Sjohnlev 	unsigned int len;
541843e1988Sjohnlev 
542843e1988Sjohnlev 	rw_enter(&xs_state.suspend_lock, RW_READER);
543843e1988Sjohnlev 
544843e1988Sjohnlev 	err = xs_single(XBT_NULL, XS_TRANSACTION_START, "", &id_str, &len);
545843e1988Sjohnlev 	if (err) {
546843e1988Sjohnlev 		rw_exit(&xs_state.suspend_lock);
547843e1988Sjohnlev 		return (err);
548843e1988Sjohnlev 	}
549843e1988Sjohnlev 
550843e1988Sjohnlev 	(void) ddi_strtoul((char *)id_str, NULL, 0, &id);
551843e1988Sjohnlev 	*t = (xenbus_transaction_t)id;
552843e1988Sjohnlev 	kmem_free(id_str, len);
553843e1988Sjohnlev 
554843e1988Sjohnlev 	return (0);
555843e1988Sjohnlev }
556843e1988Sjohnlev 
557843e1988Sjohnlev /*
558843e1988Sjohnlev  * End a transaction.
559843e1988Sjohnlev  * If abandon is true, transaction is discarded instead of committed.
560843e1988Sjohnlev  */
561843e1988Sjohnlev int
562843e1988Sjohnlev xenbus_transaction_end(xenbus_transaction_t t, int abort)
563843e1988Sjohnlev {
564843e1988Sjohnlev 	char abortstr[2];
565843e1988Sjohnlev 	int err;
566843e1988Sjohnlev 
567843e1988Sjohnlev 	if (abort)
568843e1988Sjohnlev 		(void) strcpy(abortstr, "F");
569843e1988Sjohnlev 	else
570843e1988Sjohnlev 		(void) strcpy(abortstr, "T");
571843e1988Sjohnlev 
572843e1988Sjohnlev 	err = xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL);
573843e1988Sjohnlev 
574843e1988Sjohnlev 	rw_exit(&xs_state.suspend_lock);
575843e1988Sjohnlev 
576843e1988Sjohnlev 	return (err);
577843e1988Sjohnlev }
578843e1988Sjohnlev 
579843e1988Sjohnlev /*
580843e1988Sjohnlev  * Single read and scanf: returns errno or 0.  This can only handle a single
581843e1988Sjohnlev  * conversion specifier.
582843e1988Sjohnlev  */
583843e1988Sjohnlev /* SCANFLIKE4 */
584843e1988Sjohnlev int
585843e1988Sjohnlev xenbus_scanf(xenbus_transaction_t t,
586843e1988Sjohnlev 		const char *dir, const char *node, const char *fmt, ...)
587843e1988Sjohnlev {
588843e1988Sjohnlev 	va_list ap;
589843e1988Sjohnlev 	int ret;
590843e1988Sjohnlev 	char *val;
591843e1988Sjohnlev 	unsigned int len;
592843e1988Sjohnlev 
593843e1988Sjohnlev 	ret = xenbus_read(t, dir, node, (void **)&val, &len);
594843e1988Sjohnlev 	if (ret)
595843e1988Sjohnlev 		return (ret);
596843e1988Sjohnlev 
597843e1988Sjohnlev 	va_start(ap, fmt);
598843e1988Sjohnlev 	if (vsscanf(val, fmt, ap) != 1)
599843e1988Sjohnlev 		ret = ERANGE;
600843e1988Sjohnlev 	va_end(ap);
601843e1988Sjohnlev 	kmem_free(val, len);
602843e1988Sjohnlev 	return (ret);
603843e1988Sjohnlev }
604843e1988Sjohnlev 
605843e1988Sjohnlev /* Single printf and write: returns errno or 0. */
606843e1988Sjohnlev /* PRINTFLIKE4 */
607843e1988Sjohnlev int
608843e1988Sjohnlev xenbus_printf(xenbus_transaction_t t,
609843e1988Sjohnlev 		const char *dir, const char *node, const char *fmt, ...)
610843e1988Sjohnlev {
611843e1988Sjohnlev 	va_list ap;
612843e1988Sjohnlev 	int ret;
613843e1988Sjohnlev #define	PRINTF_BUFFER_SIZE 4096
614843e1988Sjohnlev 	char *printf_buffer;
615843e1988Sjohnlev 
616843e1988Sjohnlev 	printf_buffer = kmem_alloc(PRINTF_BUFFER_SIZE, KM_SLEEP);
617843e1988Sjohnlev 
618843e1988Sjohnlev 	va_start(ap, fmt);
619843e1988Sjohnlev 	ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
620843e1988Sjohnlev 	va_end(ap);
621843e1988Sjohnlev 
622843e1988Sjohnlev 	ASSERT(ret <= PRINTF_BUFFER_SIZE-1);
623843e1988Sjohnlev 	ret = xenbus_write(t, dir, node, printf_buffer);
624843e1988Sjohnlev 
625843e1988Sjohnlev 	kmem_free(printf_buffer, PRINTF_BUFFER_SIZE);
626843e1988Sjohnlev 
627843e1988Sjohnlev 	return (ret);
628843e1988Sjohnlev }
629843e1988Sjohnlev 
630843e1988Sjohnlev 
631843e1988Sjohnlev /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
632843e1988Sjohnlev int
633843e1988Sjohnlev xenbus_gather(xenbus_transaction_t t, const char *dir, ...)
634843e1988Sjohnlev {
635843e1988Sjohnlev 	va_list ap;
636843e1988Sjohnlev 	const char *name;
637843e1988Sjohnlev 	int ret = 0;
638843e1988Sjohnlev 	unsigned int len;
639843e1988Sjohnlev 
640843e1988Sjohnlev 	va_start(ap, dir);
641843e1988Sjohnlev 	while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
642843e1988Sjohnlev 		const char *fmt = va_arg(ap, char *);
643843e1988Sjohnlev 		void *result = va_arg(ap, void *);
644843e1988Sjohnlev 		char *p;
645843e1988Sjohnlev 
646843e1988Sjohnlev 		ret = xenbus_read(t, dir, name, (void **)&p, &len);
647843e1988Sjohnlev 		if (ret)
648843e1988Sjohnlev 			break;
649843e1988Sjohnlev 		if (fmt) {
650843e1988Sjohnlev 			ASSERT(result != NULL);
651843e1988Sjohnlev 			if (sscanf(p, fmt, result) != 1)
652843e1988Sjohnlev 				ret = EINVAL;
653843e1988Sjohnlev 			kmem_free(p, len);
654843e1988Sjohnlev 		} else
655843e1988Sjohnlev 			*(char **)result = p;
656843e1988Sjohnlev 	}
657843e1988Sjohnlev 	va_end(ap);
658843e1988Sjohnlev 	return (ret);
659843e1988Sjohnlev }
660843e1988Sjohnlev 
661843e1988Sjohnlev static int
662843e1988Sjohnlev xs_watch(const char *path, const char *token)
663843e1988Sjohnlev {
664843e1988Sjohnlev 	iovec_t iov[2];
665843e1988Sjohnlev 
666843e1988Sjohnlev 	iov[0].iov_base = (void *)path;
667843e1988Sjohnlev 	iov[0].iov_len = strlen(path) + 1;
668843e1988Sjohnlev 	iov[1].iov_base = (void *)token;
669843e1988Sjohnlev 	iov[1].iov_len = strlen(token) + 1;
670843e1988Sjohnlev 
671843e1988Sjohnlev 	return (xs_talkv(XBT_NULL, XS_WATCH, iov, 2, NULL, NULL));
672843e1988Sjohnlev }
673843e1988Sjohnlev 
674843e1988Sjohnlev static int
675843e1988Sjohnlev xs_unwatch(const char *path, const char *token)
676843e1988Sjohnlev {
677843e1988Sjohnlev 	iovec_t iov[2];
678843e1988Sjohnlev 
679843e1988Sjohnlev 	iov[0].iov_base = (char *)path;
680843e1988Sjohnlev 	iov[0].iov_len = strlen(path) + 1;
681843e1988Sjohnlev 	iov[1].iov_base = (char *)token;
682843e1988Sjohnlev 	iov[1].iov_len = strlen(token) + 1;
683843e1988Sjohnlev 
684843e1988Sjohnlev 	return (xs_talkv(XBT_NULL, XS_UNWATCH, iov, 2, NULL, NULL));
685843e1988Sjohnlev }
686843e1988Sjohnlev 
687843e1988Sjohnlev static struct xenbus_watch *
688843e1988Sjohnlev find_watch(const char *token)
689843e1988Sjohnlev {
690843e1988Sjohnlev 	struct xenbus_watch *i, *cmp;
691843e1988Sjohnlev 
692843e1988Sjohnlev 	(void) ddi_strtoul(token, NULL, 16, (unsigned long *)&cmp);
693843e1988Sjohnlev 
694843e1988Sjohnlev 	for (i = list_head(&watches); i != NULL; i = list_next(&watches, i))
695843e1988Sjohnlev 		if (i == cmp)
696843e1988Sjohnlev 			break;
697843e1988Sjohnlev 
698843e1988Sjohnlev 	return (i);
699843e1988Sjohnlev }
700843e1988Sjohnlev 
701843e1988Sjohnlev /* Register a xenstore state notify callback */
702843e1988Sjohnlev int
703843e1988Sjohnlev xs_register_xenbus_callback(void (*callback)(int))
704843e1988Sjohnlev {
705843e1988Sjohnlev 	struct xenbus_notify *xbn, *xnp;
706843e1988Sjohnlev 
707843e1988Sjohnlev 	xbn = kmem_alloc(sizeof (struct xenbus_notify), KM_SLEEP);
708843e1988Sjohnlev 	xbn->notify_func = callback;
709843e1988Sjohnlev 	mutex_enter(&notify_list_lock);
710843e1988Sjohnlev 	/*
711843e1988Sjohnlev 	 * Make sure not already on the list
712843e1988Sjohnlev 	 */
713843e1988Sjohnlev 	xnp = list_head(&notify_list);
714843e1988Sjohnlev 	for (; xnp != NULL; xnp = list_next(&notify_list, xnp)) {
715843e1988Sjohnlev 		if (xnp->notify_func == callback) {
716843e1988Sjohnlev 			kmem_free(xbn, sizeof (struct xenbus_notify));
717843e1988Sjohnlev 			mutex_exit(&notify_list_lock);
718843e1988Sjohnlev 			return (EEXIST);
719843e1988Sjohnlev 		}
720843e1988Sjohnlev 	}
721843e1988Sjohnlev 	xnp = xbn;
722843e1988Sjohnlev 	list_insert_tail(&notify_list, xbn);
723843e1988Sjohnlev done:
724843e1988Sjohnlev 	if (xenstore_up)
725843e1988Sjohnlev 		xnp->notify_func(XENSTORE_UP);
726843e1988Sjohnlev 	mutex_exit(&notify_list_lock);
727843e1988Sjohnlev 	return (0);
728843e1988Sjohnlev }
729843e1988Sjohnlev 
730843e1988Sjohnlev /*
731843e1988Sjohnlev  * Notify clients of xenstore state
732843e1988Sjohnlev  */
733843e1988Sjohnlev static void
734843e1988Sjohnlev do_notify_callbacks(void *arg)
735843e1988Sjohnlev {
736843e1988Sjohnlev 	struct xenbus_notify *xnp;
737843e1988Sjohnlev 
738843e1988Sjohnlev 	mutex_enter(&notify_list_lock);
739843e1988Sjohnlev 	xnp = list_head(&notify_list);
740843e1988Sjohnlev 	for (; xnp != NULL; xnp = list_next(&notify_list, xnp)) {
741843e1988Sjohnlev 		xnp->notify_func((int)((uintptr_t)arg));
742843e1988Sjohnlev 	}
743843e1988Sjohnlev 	mutex_exit(&notify_list_lock);
744843e1988Sjohnlev }
745843e1988Sjohnlev 
746843e1988Sjohnlev void
747843e1988Sjohnlev xs_notify_xenstore_up(void)
748843e1988Sjohnlev {
749843e1988Sjohnlev 	xenstore_up = B_TRUE;
750843e1988Sjohnlev 	(void) taskq_dispatch(xenbus_taskq, do_notify_callbacks,
751843e1988Sjohnlev 	    (void *)XENSTORE_UP, 0);
752843e1988Sjohnlev }
753843e1988Sjohnlev 
754843e1988Sjohnlev void
755843e1988Sjohnlev xs_notify_xenstore_down(void)
756843e1988Sjohnlev {
757843e1988Sjohnlev 	xenstore_up = B_FALSE;
758843e1988Sjohnlev 	(void) taskq_dispatch(xenbus_taskq, do_notify_callbacks,
759843e1988Sjohnlev 	    (void *)XENSTORE_DOWN, 0);
760843e1988Sjohnlev }
761843e1988Sjohnlev 
762843e1988Sjohnlev /* Register callback to watch this node. */
763843e1988Sjohnlev int
764843e1988Sjohnlev register_xenbus_watch(struct xenbus_watch *watch)
765843e1988Sjohnlev {
766843e1988Sjohnlev 	/* Pointer in ascii is the token. */
767843e1988Sjohnlev 	char token[sizeof (watch) * 2 + 1];
768843e1988Sjohnlev 	int err;
769843e1988Sjohnlev 
770843e1988Sjohnlev 	ASSERT(xenstore_up);
771843e1988Sjohnlev 	(void) snprintf(token, sizeof (token), "%lX", (long)watch);
772843e1988Sjohnlev 
773843e1988Sjohnlev 	rw_enter(&xs_state.suspend_lock, RW_READER);
774843e1988Sjohnlev 
775843e1988Sjohnlev 	mutex_enter(&watches_lock);
776843e1988Sjohnlev 	/*
777843e1988Sjohnlev 	 * May be re-registering a watch if xenstore daemon was restarted
778843e1988Sjohnlev 	 */
779843e1988Sjohnlev 	if (find_watch(token) == NULL)
780843e1988Sjohnlev 		list_insert_tail(&watches, watch);
781843e1988Sjohnlev 	mutex_exit(&watches_lock);
782843e1988Sjohnlev 
783*ab4a9bebSjohnlev 	DTRACE_XPV3(xenbus__register__watch, const char *, watch->node,
784*ab4a9bebSjohnlev 	    uintptr_t, watch->callback, struct xenbus_watch *, watch);
785*ab4a9bebSjohnlev 
786843e1988Sjohnlev 	err = xs_watch(watch->node, token);
787843e1988Sjohnlev 
788843e1988Sjohnlev 	/* Ignore errors due to multiple registration. */
789843e1988Sjohnlev 	if ((err != 0) && (err != EEXIST)) {
790843e1988Sjohnlev 		mutex_enter(&watches_lock);
791843e1988Sjohnlev 		list_remove(&watches, watch);
792843e1988Sjohnlev 		mutex_exit(&watches_lock);
793843e1988Sjohnlev 	}
794843e1988Sjohnlev 
795843e1988Sjohnlev 	rw_exit(&xs_state.suspend_lock);
796843e1988Sjohnlev 
797843e1988Sjohnlev 	return (err);
798843e1988Sjohnlev }
799843e1988Sjohnlev 
800843e1988Sjohnlev static void
801843e1988Sjohnlev free_stored_msg(struct xs_stored_msg *msg)
802843e1988Sjohnlev {
803843e1988Sjohnlev 	int i, len = 0;
804843e1988Sjohnlev 
805843e1988Sjohnlev 	for (i = 0; i < msg->un.watch.vec_size; i++)
806843e1988Sjohnlev 		len += strlen(msg->un.watch.vec[i]) + 1 + sizeof (char *);
807843e1988Sjohnlev 	kmem_free(msg->un.watch.vec, len);
808843e1988Sjohnlev 	kmem_free(msg, sizeof (*msg));
809843e1988Sjohnlev }
810843e1988Sjohnlev 
811843e1988Sjohnlev void
812843e1988Sjohnlev unregister_xenbus_watch(struct xenbus_watch *watch)
813843e1988Sjohnlev {
814843e1988Sjohnlev 	struct xs_stored_msg *msg;
815843e1988Sjohnlev 	char token[sizeof (watch) * 2 + 1];
816843e1988Sjohnlev 	int err;
817843e1988Sjohnlev 
818843e1988Sjohnlev 	(void) snprintf(token, sizeof (token), "%lX", (long)watch);
819843e1988Sjohnlev 
820843e1988Sjohnlev 	rw_enter(&xs_state.suspend_lock, RW_READER);
821843e1988Sjohnlev 
822843e1988Sjohnlev 	mutex_enter(&watches_lock);
823843e1988Sjohnlev 	ASSERT(find_watch(token));
824843e1988Sjohnlev 	list_remove(&watches, watch);
825843e1988Sjohnlev 	mutex_exit(&watches_lock);
826843e1988Sjohnlev 
827*ab4a9bebSjohnlev 	DTRACE_XPV3(xenbus__unregister__watch, const char *, watch->node,
828*ab4a9bebSjohnlev 	    uintptr_t, watch->callback, struct xenbus_watch *, watch);
829*ab4a9bebSjohnlev 
830843e1988Sjohnlev 	err = xs_unwatch(watch->node, token);
831843e1988Sjohnlev 	if (err)
832843e1988Sjohnlev 		cmn_err(CE_WARN, "XENBUS Failed to release watch %s: %d",
833843e1988Sjohnlev 		    watch->node, err);
834843e1988Sjohnlev 
835843e1988Sjohnlev 	rw_exit(&xs_state.suspend_lock);
836843e1988Sjohnlev 
837843e1988Sjohnlev 	/* Cancel pending watch events. */
838843e1988Sjohnlev 	mutex_enter(&watch_events_lock);
839843e1988Sjohnlev 	msg = list_head(&watch_events);
840843e1988Sjohnlev 
841843e1988Sjohnlev 	while (msg != NULL) {
842843e1988Sjohnlev 		struct xs_stored_msg *tmp = list_next(&watch_events, msg);
843843e1988Sjohnlev 		if (msg->un.watch.handle == watch) {
844843e1988Sjohnlev 			list_remove(&watch_events, msg);
845843e1988Sjohnlev 			free_stored_msg(msg);
846843e1988Sjohnlev 		}
847843e1988Sjohnlev 		msg = tmp;
848843e1988Sjohnlev 	}
849843e1988Sjohnlev 
850843e1988Sjohnlev 	mutex_exit(&watch_events_lock);
851843e1988Sjohnlev 
852843e1988Sjohnlev 	/* Flush any currently-executing callback, unless we are it. :-) */
853843e1988Sjohnlev 	if (mutex_owner(&xenwatch_mutex) != curthread) {
854843e1988Sjohnlev 		mutex_enter(&xenwatch_mutex);
855843e1988Sjohnlev 		mutex_exit(&xenwatch_mutex);
856843e1988Sjohnlev 	}
857843e1988Sjohnlev }
858843e1988Sjohnlev 
859843e1988Sjohnlev void
860843e1988Sjohnlev xenbus_suspend(void)
861843e1988Sjohnlev {
862843e1988Sjohnlev 	rw_enter(&xs_state.suspend_lock, RW_WRITER);
863843e1988Sjohnlev 	mutex_enter(&xs_state.request_mutex);
864843e1988Sjohnlev 
865843e1988Sjohnlev 	xb_suspend();
866843e1988Sjohnlev }
867843e1988Sjohnlev 
868843e1988Sjohnlev void
869843e1988Sjohnlev xenbus_resume(void)
870843e1988Sjohnlev {
871843e1988Sjohnlev 	struct xenbus_watch *watch;
872843e1988Sjohnlev 	char token[sizeof (watch) * 2 + 1];
873843e1988Sjohnlev 
874843e1988Sjohnlev 	mutex_exit(&xs_state.request_mutex);
875843e1988Sjohnlev 
876843e1988Sjohnlev 	xb_init();
877843e1988Sjohnlev 	xb_setup_intr();
878843e1988Sjohnlev 
879843e1988Sjohnlev 	/* No need for watches_lock: the suspend_lock is sufficient. */
880843e1988Sjohnlev 	for (watch = list_head(&watches); watch != NULL;
881843e1988Sjohnlev 	    watch = list_next(&watches, watch)) {
882843e1988Sjohnlev 		(void) snprintf(token, sizeof (token), "%lX", (long)watch);
883843e1988Sjohnlev 		(void) xs_watch(watch->node, token);
884843e1988Sjohnlev 	}
885843e1988Sjohnlev 
886843e1988Sjohnlev 	rw_exit(&xs_state.suspend_lock);
887843e1988Sjohnlev }
888843e1988Sjohnlev 
889843e1988Sjohnlev static void
890843e1988Sjohnlev xenwatch_thread(void)
891843e1988Sjohnlev {
892843e1988Sjohnlev 	struct xs_stored_msg *msg;
893*ab4a9bebSjohnlev 	struct xenbus_watch *watch;
894843e1988Sjohnlev 
895843e1988Sjohnlev 	for (;;) {
896843e1988Sjohnlev 		mutex_enter(&watch_events_lock);
897843e1988Sjohnlev 		while (list_empty(&watch_events))
898843e1988Sjohnlev 			cv_wait(&watch_events_cv, &watch_events_lock);
899843e1988Sjohnlev 		msg = list_head(&watch_events);
9001d03c31eSjohnlev 		ASSERT(msg != NULL);
9011d03c31eSjohnlev 		list_remove(&watch_events, msg);
902*ab4a9bebSjohnlev 		watch = msg->un.watch.handle;
903843e1988Sjohnlev 		mutex_exit(&watch_events_lock);
904843e1988Sjohnlev 
9051d03c31eSjohnlev 		mutex_enter(&xenwatch_mutex);
906*ab4a9bebSjohnlev 
907*ab4a9bebSjohnlev 		DTRACE_XPV4(xenbus__fire__watch,
908*ab4a9bebSjohnlev 		    const char *, watch->node,
909*ab4a9bebSjohnlev 		    uintptr_t, watch->callback,
910*ab4a9bebSjohnlev 		    struct xenbus_watch *, watch,
911*ab4a9bebSjohnlev 		    const char *, msg->un.watch.vec[XS_WATCH_PATH]);
912*ab4a9bebSjohnlev 
913*ab4a9bebSjohnlev 		watch->callback(watch, (const char **)msg->un.watch.vec,
914*ab4a9bebSjohnlev 		    msg->un.watch.vec_size);
915*ab4a9bebSjohnlev 
9161d03c31eSjohnlev 		free_stored_msg(msg);
917843e1988Sjohnlev 		mutex_exit(&xenwatch_mutex);
918843e1988Sjohnlev 	}
919843e1988Sjohnlev }
920843e1988Sjohnlev 
921843e1988Sjohnlev static int
922843e1988Sjohnlev process_msg(void)
923843e1988Sjohnlev {
924843e1988Sjohnlev 	struct xs_stored_msg *msg;
925843e1988Sjohnlev 	char *body;
926843e1988Sjohnlev 	int err, mlen;
927843e1988Sjohnlev 
928843e1988Sjohnlev 	msg = kmem_alloc(sizeof (*msg), KM_SLEEP);
929843e1988Sjohnlev 
930843e1988Sjohnlev 	err = xb_read(&msg->hdr, sizeof (msg->hdr));
931843e1988Sjohnlev 	if (err) {
932843e1988Sjohnlev 		kmem_free(msg, sizeof (*msg));
933843e1988Sjohnlev 		return (err);
934843e1988Sjohnlev 	}
935843e1988Sjohnlev 
936843e1988Sjohnlev 	mlen = msg->hdr.len + 1;
937843e1988Sjohnlev 	body = kmem_alloc(mlen, KM_SLEEP);
938843e1988Sjohnlev 
939843e1988Sjohnlev 	err = xb_read(body, msg->hdr.len);
940843e1988Sjohnlev 	if (err) {
941843e1988Sjohnlev 		kmem_free(body, mlen);
942843e1988Sjohnlev 		kmem_free(msg, sizeof (*msg));
943843e1988Sjohnlev 		return (err);
944843e1988Sjohnlev 	}
945843e1988Sjohnlev 
946843e1988Sjohnlev 	body[mlen - 1] = '\0';
947843e1988Sjohnlev 
948843e1988Sjohnlev 	if (msg->hdr.type == XS_WATCH_EVENT) {
949*ab4a9bebSjohnlev 		const char *token;
950843e1988Sjohnlev 		msg->un.watch.vec = split(body, msg->hdr.len + 1,
951843e1988Sjohnlev 		    &msg->un.watch.vec_size);
952843e1988Sjohnlev 		if (msg->un.watch.vec == NULL) {
953843e1988Sjohnlev 			kmem_free(msg, sizeof (*msg));
954843e1988Sjohnlev 			return (EIO);
955843e1988Sjohnlev 		}
956843e1988Sjohnlev 
957843e1988Sjohnlev 		mutex_enter(&watches_lock);
958*ab4a9bebSjohnlev 		token = msg->un.watch.vec[XS_WATCH_TOKEN];
959*ab4a9bebSjohnlev 		if ((msg->un.watch.handle = find_watch(token)) != NULL) {
960843e1988Sjohnlev 			mutex_enter(&watch_events_lock);
961*ab4a9bebSjohnlev 
962*ab4a9bebSjohnlev 			DTRACE_XPV4(xenbus__enqueue__watch,
963*ab4a9bebSjohnlev 			    const char *, msg->un.watch.handle->node,
964*ab4a9bebSjohnlev 			    uintptr_t, msg->un.watch.handle->callback,
965*ab4a9bebSjohnlev 			    struct xenbus_watch *, msg->un.watch.handle,
966*ab4a9bebSjohnlev 			    const char *, msg->un.watch.vec[XS_WATCH_PATH]);
967*ab4a9bebSjohnlev 
968843e1988Sjohnlev 			list_insert_tail(&watch_events, msg);
969843e1988Sjohnlev 			cv_broadcast(&watch_events_cv);
970843e1988Sjohnlev 			mutex_exit(&watch_events_lock);
971843e1988Sjohnlev 		} else {
972843e1988Sjohnlev 			free_stored_msg(msg);
973843e1988Sjohnlev 		}
974843e1988Sjohnlev 		mutex_exit(&watches_lock);
975843e1988Sjohnlev 	} else {
976843e1988Sjohnlev 		msg->un.reply.body = body;
977843e1988Sjohnlev 		mutex_enter(&xs_state.reply_lock);
978843e1988Sjohnlev 		list_insert_tail(&xs_state.reply_list, msg);
979843e1988Sjohnlev 		mutex_exit(&xs_state.reply_lock);
980843e1988Sjohnlev 		cv_signal(&xs_state.reply_cv);
981843e1988Sjohnlev 	}
982843e1988Sjohnlev 
983843e1988Sjohnlev 	return (0);
984843e1988Sjohnlev }
985843e1988Sjohnlev 
986843e1988Sjohnlev static void
987843e1988Sjohnlev xenbus_thread(void)
988843e1988Sjohnlev {
989843e1988Sjohnlev 	int err;
990843e1988Sjohnlev 
991843e1988Sjohnlev 	for (; interrupts_unleashed != 0; ) {
992843e1988Sjohnlev 		err = process_msg();
993843e1988Sjohnlev 		if (err)
994843e1988Sjohnlev 			cmn_err(CE_WARN, "XENBUS error %d while reading "
995843e1988Sjohnlev 			    "message", err);
996843e1988Sjohnlev 	}
997843e1988Sjohnlev }
998843e1988Sjohnlev 
999843e1988Sjohnlev /*
1000843e1988Sjohnlev  * When setting up xenbus, dom0 and domU have to take different paths, which
1001843e1988Sjohnlev  * makes this code a little confusing. For dom0:
1002843e1988Sjohnlev  *
1003843e1988Sjohnlev  * xs_early_init - mutex init only
1004843e1988Sjohnlev  * xs_dom0_init - called on xenbus dev attach: set up our xenstore page and
1005843e1988Sjohnlev  * event channel; start xenbus threads for responding to interrupts.
1006843e1988Sjohnlev  *
1007843e1988Sjohnlev  * And for domU:
1008843e1988Sjohnlev  *
1009843e1988Sjohnlev  * xs_early_init - mutex init; set up our xenstore page and event channel
1010843e1988Sjohnlev  * xs_domu_init - installation of IRQ handler; start xenbus threads.
1011843e1988Sjohnlev  *
1012843e1988Sjohnlev  * We need an early init on domU so we can use xenbus in polled mode to
1013843e1988Sjohnlev  * discover devices, VCPUs etc.
1014843e1988Sjohnlev  *
1015843e1988Sjohnlev  * On resume, we use xb_init() and xb_setup_intr() to restore xenbus to a
1016843e1988Sjohnlev  * working state.
1017843e1988Sjohnlev  */
1018843e1988Sjohnlev 
1019843e1988Sjohnlev void
1020843e1988Sjohnlev xs_early_init(void)
1021843e1988Sjohnlev {
1022843e1988Sjohnlev 	list_create(&xs_state.reply_list, sizeof (struct xs_stored_msg),
1023843e1988Sjohnlev 	    offsetof(struct xs_stored_msg, list));
1024843e1988Sjohnlev 	list_create(&watch_events, sizeof (struct xs_stored_msg),
1025843e1988Sjohnlev 	    offsetof(struct xs_stored_msg, list));
1026843e1988Sjohnlev 	list_create(&watches, sizeof (struct xenbus_watch),
1027843e1988Sjohnlev 	    offsetof(struct xenbus_watch, list));
1028843e1988Sjohnlev 	list_create(&notify_list, sizeof (struct xenbus_notify),
1029843e1988Sjohnlev 	    offsetof(struct xenbus_notify, list));
1030843e1988Sjohnlev 	mutex_init(&xs_state.reply_lock, NULL, MUTEX_DEFAULT, NULL);
1031843e1988Sjohnlev 	mutex_init(&xs_state.request_mutex, NULL, MUTEX_DEFAULT, NULL);
1032843e1988Sjohnlev 	mutex_init(&notify_list_lock, NULL, MUTEX_DEFAULT, NULL);
1033843e1988Sjohnlev 	rw_init(&xs_state.suspend_lock, NULL, RW_DEFAULT, NULL);
1034843e1988Sjohnlev 	cv_init(&xs_state.reply_cv, NULL, CV_DEFAULT, NULL);
1035843e1988Sjohnlev 
1036843e1988Sjohnlev 	if (DOMAIN_IS_INITDOMAIN(xen_info))
1037843e1988Sjohnlev 		return;
1038843e1988Sjohnlev 
1039843e1988Sjohnlev 	xb_init();
1040843e1988Sjohnlev 	xenstore_up = B_TRUE;
1041843e1988Sjohnlev }
1042843e1988Sjohnlev 
1043843e1988Sjohnlev static void
1044843e1988Sjohnlev xs_thread_init(void)
1045843e1988Sjohnlev {
1046843e1988Sjohnlev 	(void) thread_create(NULL, 0, xenwatch_thread, NULL, 0, &p0,
1047843e1988Sjohnlev 	    TS_RUN, minclsyspri);
1048843e1988Sjohnlev 	(void) thread_create(NULL, 0, xenbus_thread, NULL, 0, &p0,
1049843e1988Sjohnlev 	    TS_RUN, minclsyspri);
1050843e1988Sjohnlev 	xenbus_taskq = taskq_create("xenbus_taskq", 1,
1051843e1988Sjohnlev 	    maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
1052843e1988Sjohnlev 	ASSERT(xenbus_taskq != NULL);
1053843e1988Sjohnlev }
1054843e1988Sjohnlev 
1055843e1988Sjohnlev void
1056843e1988Sjohnlev xs_domu_init(void)
1057843e1988Sjohnlev {
1058843e1988Sjohnlev 	if (DOMAIN_IS_INITDOMAIN(xen_info))
1059843e1988Sjohnlev 		return;
1060843e1988Sjohnlev 
1061843e1988Sjohnlev 	/*
1062843e1988Sjohnlev 	 * Add interrupt handler for xenbus now, must wait till after
1063843e1988Sjohnlev 	 * psm module is loaded.  All use of xenbus is in polled mode
1064843e1988Sjohnlev 	 * until xs_init is called since it is what kicks off the xs
1065843e1988Sjohnlev 	 * server threads.
1066843e1988Sjohnlev 	 */
1067843e1988Sjohnlev 	xs_thread_init();
1068843e1988Sjohnlev 	xb_setup_intr();
1069843e1988Sjohnlev }
1070843e1988Sjohnlev 
1071843e1988Sjohnlev 
1072843e1988Sjohnlev void
1073843e1988Sjohnlev xs_dom0_init(void)
1074843e1988Sjohnlev {
1075843e1988Sjohnlev 	static boolean_t initialized = B_FALSE;
1076843e1988Sjohnlev 
1077843e1988Sjohnlev 	ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1078843e1988Sjohnlev 
1079843e1988Sjohnlev 	/*
1080843e1988Sjohnlev 	 * The xenbus driver might be re-attaching.
1081843e1988Sjohnlev 	 */
1082843e1988Sjohnlev 	if (initialized)
1083843e1988Sjohnlev 		return;
1084843e1988Sjohnlev 
1085843e1988Sjohnlev 	xb_init();
1086843e1988Sjohnlev 	xs_thread_init();
1087843e1988Sjohnlev 	xb_setup_intr();
1088843e1988Sjohnlev 
1089843e1988Sjohnlev 	initialized = B_TRUE;
1090843e1988Sjohnlev }
1091