xref: /illumos-gate/usr/src/uts/common/xen/io/xenbus_xs.c (revision fc621ef0)
1843e1988Sjohnlev /*
2843e1988Sjohnlev  * CDDL HEADER START
3843e1988Sjohnlev  *
4843e1988Sjohnlev  * The contents of this file are subject to the terms of the
5843e1988Sjohnlev  * Common Development and Distribution License (the "License").
6843e1988Sjohnlev  * You may not use this file except in compliance with the License.
7843e1988Sjohnlev  *
8843e1988Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e1988Sjohnlev  * or http://www.opensolaris.org/os/licensing.
10843e1988Sjohnlev  * See the License for the specific language governing permissions
11843e1988Sjohnlev  * and limitations under the License.
12843e1988Sjohnlev  *
13843e1988Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
14843e1988Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e1988Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
16843e1988Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
17843e1988Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
18843e1988Sjohnlev  *
19843e1988Sjohnlev  * CDDL HEADER END
20843e1988Sjohnlev  */
21843e1988Sjohnlev 
22843e1988Sjohnlev /*
237f0b8309SEdward Pilatowicz  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24843e1988Sjohnlev  * Use is subject to license terms.
25843e1988Sjohnlev  */
26843e1988Sjohnlev 
27843e1988Sjohnlev /*
28843e1988Sjohnlev  *
29843e1988Sjohnlev  * xenbus_xs.c
30843e1988Sjohnlev  *
31843e1988Sjohnlev  * This is the kernel equivalent of the "xs" library.  We don't need everything
32843e1988Sjohnlev  * and we use xenbus_comms for communication.
33843e1988Sjohnlev  *
34843e1988Sjohnlev  * Copyright (C) 2005 Rusty Russell, IBM Corporation
35843e1988Sjohnlev  *
36843e1988Sjohnlev  * This file may be distributed separately from the Linux kernel, or
37843e1988Sjohnlev  * incorporated into other software packages, subject to the following license:
38843e1988Sjohnlev  *
39843e1988Sjohnlev  * Permission is hereby granted, free of charge, to any person obtaining a copy
40843e1988Sjohnlev  * of this source file (the "Software"), to deal in the Software without
41843e1988Sjohnlev  * restriction, including without limitation the rights to use, copy, modify,
42843e1988Sjohnlev  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
43843e1988Sjohnlev  * and to permit persons to whom the Software is furnished to do so, subject to
44843e1988Sjohnlev  * the following conditions:
45843e1988Sjohnlev  *
46843e1988Sjohnlev  * The above copyright notice and this permission notice shall be included in
47843e1988Sjohnlev  * all copies or substantial portions of the Software.
48843e1988Sjohnlev  *
49843e1988Sjohnlev  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
50843e1988Sjohnlev  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51843e1988Sjohnlev  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
52843e1988Sjohnlev  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
53843e1988Sjohnlev  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
54843e1988Sjohnlev  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
55843e1988Sjohnlev  * IN THE SOFTWARE.
56843e1988Sjohnlev  */
57843e1988Sjohnlev 
58843e1988Sjohnlev /*
59843e1988Sjohnlev  * NOTE: To future maintainers of the Solaris version of this file:
60843e1988Sjohnlev  * I found the Linux version of this code to be very disgusting in
61843e1988Sjohnlev  * overloading pointers and error codes into void * return values.
62843e1988Sjohnlev  * The main difference you will find is that all such usage is changed
63843e1988Sjohnlev  * to pass pointers to void* to be filled in with return values and
64843e1988Sjohnlev  * the functions return error codes.
65843e1988Sjohnlev  */
66843e1988Sjohnlev 
67843e1988Sjohnlev #include <sys/errno.h>
68843e1988Sjohnlev #include <sys/types.h>
69843e1988Sjohnlev #include <sys/sysmacros.h>
70843e1988Sjohnlev #include <sys/uio.h>
71843e1988Sjohnlev #include <sys/mutex.h>
72843e1988Sjohnlev #include <sys/condvar.h>
73843e1988Sjohnlev #include <sys/rwlock.h>
74843e1988Sjohnlev #include <sys/disp.h>
75843e1988Sjohnlev #include <sys/ddi.h>
76843e1988Sjohnlev #include <sys/sunddi.h>
77843e1988Sjohnlev #include <sys/avintr.h>
78843e1988Sjohnlev #include <sys/cmn_err.h>
79551bc2a6Smrj #include <sys/mach_mmu.h>
80843e1988Sjohnlev #include <util/sscanf.h>
81843e1988Sjohnlev #define	_XSD_ERRORS_DEFINED
82551bc2a6Smrj #ifdef XPV_HVM_DRIVER
83551bc2a6Smrj #include <sys/xpv_support.h>
84551bc2a6Smrj #endif
85843e1988Sjohnlev #include <sys/hypervisor.h>
86ab4a9bebSjohnlev #include <sys/taskq.h>
87ab4a9bebSjohnlev #include <sys/sdt.h>
88843e1988Sjohnlev #include <xen/sys/xenbus_impl.h>
89843e1988Sjohnlev #include <xen/sys/xenbus_comms.h>
90843e1988Sjohnlev #include <xen/sys/xendev.h>
91843e1988Sjohnlev #include <xen/public/io/xs_wire.h>
92843e1988Sjohnlev 
93843e1988Sjohnlev #define	streq(a, b) (strcmp((a), (b)) == 0)
94843e1988Sjohnlev 
95843e1988Sjohnlev #define	list_empty(list) (list_head(list) == NULL)
96843e1988Sjohnlev 
97843e1988Sjohnlev struct xs_stored_msg {
98349b53ddSStuart Maybee 	list_node_t list;
99843e1988Sjohnlev 
100843e1988Sjohnlev 	struct xsd_sockmsg hdr;
101843e1988Sjohnlev 
102843e1988Sjohnlev 	union {
103843e1988Sjohnlev 		/* Queued replies. */
104843e1988Sjohnlev 		struct {
105843e1988Sjohnlev 			char *body;
106843e1988Sjohnlev 		} reply;
107843e1988Sjohnlev 
108843e1988Sjohnlev 		/* Queued watch events. */
109843e1988Sjohnlev 		struct {
110843e1988Sjohnlev 			struct xenbus_watch *handle;
111843e1988Sjohnlev 			char **vec;
112843e1988Sjohnlev 			unsigned int vec_size;
113843e1988Sjohnlev 		} watch;
114843e1988Sjohnlev 	} un;
115843e1988Sjohnlev };
116843e1988Sjohnlev 
117843e1988Sjohnlev static struct xs_handle {
118843e1988Sjohnlev 	/* A list of replies. Currently only one will ever be outstanding. */
119843e1988Sjohnlev 	list_t reply_list;
120843e1988Sjohnlev 	kmutex_t reply_lock;
121843e1988Sjohnlev 	kcondvar_t reply_cv;
122843e1988Sjohnlev 
123843e1988Sjohnlev 	/* One request at a time. */
124843e1988Sjohnlev 	kmutex_t request_mutex;
125843e1988Sjohnlev 
126843e1988Sjohnlev 	/* Protect transactions against save/restore. */
127843e1988Sjohnlev 	krwlock_t suspend_lock;
128843e1988Sjohnlev } xs_state;
129843e1988Sjohnlev 
130843e1988Sjohnlev static int last_req_id;
131843e1988Sjohnlev 
132843e1988Sjohnlev /*
133843e1988Sjohnlev  * List of clients wanting a xenstore up notification, and a lock to protect it
134843e1988Sjohnlev  */
135843e1988Sjohnlev static boolean_t xenstore_up;
136843e1988Sjohnlev static list_t notify_list;
137843e1988Sjohnlev static kmutex_t notify_list_lock;
138843e1988Sjohnlev static taskq_t *xenbus_taskq;
139843e1988Sjohnlev 
140843e1988Sjohnlev /* List of registered watches, and a lock to protect it. */
141843e1988Sjohnlev static list_t watches;
142843e1988Sjohnlev static kmutex_t watches_lock;
143843e1988Sjohnlev 
144843e1988Sjohnlev /* List of pending watch callback events, and a lock to protect it. */
145843e1988Sjohnlev static list_t watch_events;
146843e1988Sjohnlev static kmutex_t watch_events_lock;
147843e1988Sjohnlev 
148843e1988Sjohnlev /*
149843e1988Sjohnlev  * Details of the xenwatch callback kernel thread. The thread waits on the
150843e1988Sjohnlev  * watch_events_cv for work to do (queued on watch_events list). When it
151843e1988Sjohnlev  * wakes up it acquires the xenwatch_mutex before reading the list and
152843e1988Sjohnlev  * carrying out work.
153843e1988Sjohnlev  */
154843e1988Sjohnlev static kmutex_t xenwatch_mutex;
155843e1988Sjohnlev static kcondvar_t watch_events_cv;
156843e1988Sjohnlev 
157843e1988Sjohnlev static int process_msg(void);
158843e1988Sjohnlev 
159843e1988Sjohnlev static int
get_error(const char * errorstring)160843e1988Sjohnlev get_error(const char *errorstring)
161843e1988Sjohnlev {
162843e1988Sjohnlev 	unsigned int i;
163843e1988Sjohnlev 
164843e1988Sjohnlev 	for (i = 0; !streq(errorstring, xsd_errors[i].errstring); i++) {
165843e1988Sjohnlev 		if (i == (sizeof (xsd_errors) / sizeof (xsd_errors[0])) - 1) {
166843e1988Sjohnlev 			cmn_err(CE_WARN,
167843e1988Sjohnlev 			    "XENBUS xen store gave: unknown error %s",
168843e1988Sjohnlev 			    errorstring);
169843e1988Sjohnlev 			return (EINVAL);
170843e1988Sjohnlev 		}
171843e1988Sjohnlev 	}
172843e1988Sjohnlev 	return (xsd_errors[i].errnum);
173843e1988Sjohnlev }
174843e1988Sjohnlev 
175843e1988Sjohnlev /*
176843e1988Sjohnlev  * Read a synchronous reply from xenstore.  Since we can return early before
177843e1988Sjohnlev  * reading a relevant reply, we discard any messages not matching the request
178843e1988Sjohnlev  * ID.  Caller must free returned message on success.
179843e1988Sjohnlev  */
180843e1988Sjohnlev static int
read_reply(struct xsd_sockmsg * req_hdr,struct xs_stored_msg ** reply)181843e1988Sjohnlev read_reply(struct xsd_sockmsg *req_hdr, struct xs_stored_msg **reply)
182843e1988Sjohnlev {
183843e1988Sjohnlev 	extern int do_polled_io;
184843e1988Sjohnlev 
185843e1988Sjohnlev 	mutex_enter(&xs_state.reply_lock);
186843e1988Sjohnlev 
187843e1988Sjohnlev 	for (;;) {
188843e1988Sjohnlev 		while (list_empty(&xs_state.reply_list)) {
189843e1988Sjohnlev 			if (interrupts_unleashed && !do_polled_io) {
190843e1988Sjohnlev 				if (cv_wait_sig(&xs_state.reply_cv,
191843e1988Sjohnlev 				    &xs_state.reply_lock) == 0) {
192843e1988Sjohnlev 					mutex_exit(&xs_state.reply_lock);
193843e1988Sjohnlev 					*reply = NULL;
194843e1988Sjohnlev 					return (EINTR);
195843e1988Sjohnlev 				}
196843e1988Sjohnlev 			} else { /* polled mode needed for early probes */
197843e1988Sjohnlev 				mutex_exit(&xs_state.reply_lock);
198843e1988Sjohnlev 				(void) HYPERVISOR_yield();
199843e1988Sjohnlev 				(void) process_msg();
200843e1988Sjohnlev 				mutex_enter(&xs_state.reply_lock);
201843e1988Sjohnlev 			}
202843e1988Sjohnlev 		}
203843e1988Sjohnlev 
204843e1988Sjohnlev 		*reply = list_head(&xs_state.reply_list);
205843e1988Sjohnlev 		list_remove(&xs_state.reply_list, *reply);
206843e1988Sjohnlev 
207843e1988Sjohnlev 		if ((*reply)->hdr.req_id == req_hdr->req_id)
208843e1988Sjohnlev 			break;
209843e1988Sjohnlev 	}
210843e1988Sjohnlev 
211843e1988Sjohnlev 	mutex_exit(&xs_state.reply_lock);
212843e1988Sjohnlev 	return (0);
213843e1988Sjohnlev }
214843e1988Sjohnlev 
215843e1988Sjohnlev /* Emergency write. */
216843e1988Sjohnlev void
xenbus_debug_write(const char * str,unsigned int count)217843e1988Sjohnlev xenbus_debug_write(const char *str, unsigned int count)
218843e1988Sjohnlev {
219843e1988Sjohnlev 	struct xsd_sockmsg msg = { 0 };
220843e1988Sjohnlev 
221843e1988Sjohnlev 	msg.type = XS_DEBUG;
222843e1988Sjohnlev 	msg.len = sizeof ("print") + count + 1;
223843e1988Sjohnlev 
224843e1988Sjohnlev 	mutex_enter(&xs_state.request_mutex);
225843e1988Sjohnlev 	(void) xb_write(&msg, sizeof (msg));
226843e1988Sjohnlev 	(void) xb_write("print", sizeof ("print"));
227843e1988Sjohnlev 	(void) xb_write(str, count);
228843e1988Sjohnlev 	(void) xb_write("", 1);
229843e1988Sjohnlev 	mutex_exit(&xs_state.request_mutex);
230843e1988Sjohnlev }
231843e1988Sjohnlev 
232843e1988Sjohnlev /*
233843e1988Sjohnlev  * This is pretty unpleasant.  First off, there's the horrible logic around
234843e1988Sjohnlev  * suspend_lock and transactions.  Also, we can be interrupted either before we
235843e1988Sjohnlev  * write a message, or before we receive a reply.  A client that wants to
236843e1988Sjohnlev  * survive this can't know which case happened.  Luckily all clients don't care
237843e1988Sjohnlev  * about signals currently, and the alternative (a hard wait on a userspace
238843e1988Sjohnlev  * daemon) isn't exactly preferable.  Caller must free 'reply' on success.
239843e1988Sjohnlev  */
240843e1988Sjohnlev int
xenbus_dev_request_and_reply(struct xsd_sockmsg * msg,void ** reply)241843e1988Sjohnlev xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **reply)
242843e1988Sjohnlev {
243843e1988Sjohnlev 	struct xsd_sockmsg req_msg = *msg;
244843e1988Sjohnlev 	struct xs_stored_msg *reply_msg = NULL;
245843e1988Sjohnlev 	int err;
246843e1988Sjohnlev 
247843e1988Sjohnlev 	if (req_msg.type == XS_TRANSACTION_START)
248843e1988Sjohnlev 		rw_enter(&xs_state.suspend_lock, RW_READER);
249843e1988Sjohnlev 
250843e1988Sjohnlev 	mutex_enter(&xs_state.request_mutex);
251843e1988Sjohnlev 
252843e1988Sjohnlev 	msg->req_id = last_req_id++;
253843e1988Sjohnlev 
254843e1988Sjohnlev 	err = xb_write(msg, sizeof (*msg) + msg->len);
255843e1988Sjohnlev 	if (err) {
256843e1988Sjohnlev 		if (req_msg.type == XS_TRANSACTION_START)
257843e1988Sjohnlev 			rw_exit(&xs_state.suspend_lock);
258843e1988Sjohnlev 		msg->type = XS_ERROR;
259843e1988Sjohnlev 		*reply = NULL;
260843e1988Sjohnlev 		goto out;
261843e1988Sjohnlev 	}
262843e1988Sjohnlev 
263843e1988Sjohnlev 	err = read_reply(msg, &reply_msg);
264843e1988Sjohnlev 
265843e1988Sjohnlev 	if (err) {
266843e1988Sjohnlev 		if (msg->type == XS_TRANSACTION_START)
267843e1988Sjohnlev 			rw_exit(&xs_state.suspend_lock);
268843e1988Sjohnlev 		*reply = NULL;
269843e1988Sjohnlev 		goto out;
270843e1988Sjohnlev 	}
271843e1988Sjohnlev 
272843e1988Sjohnlev 	*reply = reply_msg->un.reply.body;
273843e1988Sjohnlev 	*msg = reply_msg->hdr;
274843e1988Sjohnlev 
275843e1988Sjohnlev 	if (reply_msg->hdr.type == XS_TRANSACTION_END)
276843e1988Sjohnlev 		rw_exit(&xs_state.suspend_lock);
277843e1988Sjohnlev 
278843e1988Sjohnlev out:
279843e1988Sjohnlev 	if (reply_msg != NULL)
280843e1988Sjohnlev 		kmem_free(reply_msg, sizeof (*reply_msg));
281843e1988Sjohnlev 
282843e1988Sjohnlev 	mutex_exit(&xs_state.request_mutex);
283843e1988Sjohnlev 	return (err);
284843e1988Sjohnlev }
285843e1988Sjohnlev 
286843e1988Sjohnlev /*
287843e1988Sjohnlev  * Send message to xs, return errcode, rval filled in with pointer
288843e1988Sjohnlev  * to kmem_alloc'ed reply.
289843e1988Sjohnlev  */
290843e1988Sjohnlev static int
xs_talkv(xenbus_transaction_t t,enum xsd_sockmsg_type type,const iovec_t * iovec,unsigned int num_vecs,void ** rval,unsigned int * len)291843e1988Sjohnlev xs_talkv(xenbus_transaction_t t,
292843e1988Sjohnlev 		    enum xsd_sockmsg_type type,
293843e1988Sjohnlev 		    const iovec_t *iovec,
294843e1988Sjohnlev 		    unsigned int num_vecs,
295843e1988Sjohnlev 		    void **rval,
296843e1988Sjohnlev 		    unsigned int *len)
297843e1988Sjohnlev {
298843e1988Sjohnlev 	struct xsd_sockmsg msg;
299843e1988Sjohnlev 	struct xs_stored_msg *reply_msg;
300843e1988Sjohnlev 	char *reply;
301843e1988Sjohnlev 	unsigned int i;
302843e1988Sjohnlev 	int err;
303843e1988Sjohnlev 
304843e1988Sjohnlev 	msg.tx_id = (uint32_t)(unsigned long)t;
305843e1988Sjohnlev 	msg.type = type;
306843e1988Sjohnlev 	msg.len = 0;
307843e1988Sjohnlev 	for (i = 0; i < num_vecs; i++)
308843e1988Sjohnlev 		msg.len += iovec[i].iov_len;
309843e1988Sjohnlev 
310843e1988Sjohnlev 	mutex_enter(&xs_state.request_mutex);
311843e1988Sjohnlev 
312843e1988Sjohnlev 	msg.req_id = last_req_id++;
313843e1988Sjohnlev 
314843e1988Sjohnlev 	err = xb_write(&msg, sizeof (msg));
315843e1988Sjohnlev 	if (err) {
316843e1988Sjohnlev 		mutex_exit(&xs_state.request_mutex);
317843e1988Sjohnlev 		return (err);
318843e1988Sjohnlev 	}
319843e1988Sjohnlev 
320843e1988Sjohnlev 	for (i = 0; i < num_vecs; i++) {
321843e1988Sjohnlev 		err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
322843e1988Sjohnlev 		if (err) {
323843e1988Sjohnlev 			mutex_exit(&xs_state.request_mutex);
324843e1988Sjohnlev 			return (err);
325