xref: /illumos-gate/usr/src/uts/i86xpv/io/privcmd.c (revision 349b53dd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/xpv_user.h>
28 
29 #include <sys/types.h>
30 #include <sys/file.h>
31 #include <sys/errno.h>
32 #include <sys/open.h>
33 #include <sys/cred.h>
34 #include <sys/conf.h>
35 #include <sys/stat.h>
36 #include <sys/modctl.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/vmsystm.h>
40 #include <sys/sdt.h>
41 #include <sys/hypervisor.h>
42 #include <sys/xen_errno.h>
43 #include <sys/policy.h>
44 
45 #include <vm/hat_i86.h>
46 #include <vm/hat_pte.h>
47 #include <vm/seg_mf.h>
48 
49 #include <xen/sys/privcmd.h>
50 #include <sys/privcmd_impl.h>
51 
52 static dev_info_t *privcmd_devi;
53 
54 /*ARGSUSED*/
55 static int
56 privcmd_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result)
57 {
58 	switch (cmd) {
59 	case DDI_INFO_DEVT2DEVINFO:
60 	case DDI_INFO_DEVT2INSTANCE:
61 		break;
62 	default:
63 		return (DDI_FAILURE);
64 	}
65 
66 	switch (getminor((dev_t)arg)) {
67 	case PRIVCMD_MINOR:
68 		break;
69 	default:
70 		return (DDI_FAILURE);
71 	}
72 
73 	if (cmd == DDI_INFO_DEVT2INSTANCE)
74 		*result = 0;
75 	else
76 		*result = privcmd_devi;
77 	return (DDI_SUCCESS);
78 }
79 
80 static int
81 privcmd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
82 {
83 	if (cmd != DDI_ATTACH)
84 		return (DDI_FAILURE);
85 
86 	if (ddi_create_minor_node(devi, PRIVCMD_NODE,
87 	    S_IFCHR, PRIVCMD_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS)
88 		return (DDI_FAILURE);
89 
90 	privcmd_devi = devi;
91 	ddi_report_dev(devi);
92 	return (DDI_SUCCESS);
93 }
94 
95 static int
96 privcmd_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
97 {
98 	if (cmd != DDI_DETACH)
99 		return (DDI_FAILURE);
100 	ddi_remove_minor_node(devi, NULL);
101 	privcmd_devi = NULL;
102 	return (DDI_SUCCESS);
103 }
104 
105 /*ARGSUSED1*/
106 static int
107 privcmd_open(dev_t *dev, int flag, int otyp, cred_t *cr)
108 {
109 	return (getminor(*dev) == PRIVCMD_MINOR ? 0 : ENXIO);
110 }
111 
112 /*
113  * Map a contiguous set of machine frames in a foreign domain.
114  * Used in the following way:
115  *
116  *	privcmd_mmap_t p;
117  *	privcmd_mmap_entry_t e;
118  *
119  *	addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
120  *	p.num = number of privcmd_mmap_entry_t's
121  *	p.dom = domid;
122  *	p.entry = &e;
123  *	e.va = addr;
124  *	e.mfn = mfn;
125  *	e.npages = btopr(size);
126  *	ioctl(fd, IOCTL_PRIVCMD_MMAP, &p);
127  */
128 /*ARGSUSED2*/
129 int
130 do_privcmd_mmap(void *uarg, int mode, cred_t *cr)
131 {
132 	privcmd_mmap_t __mmapcmd, *mmc = &__mmapcmd;
133 	privcmd_mmap_entry_t *umme;
134 	struct as *as = curproc->p_as;
135 	struct seg *seg;
136 	int i, error = 0;
137 
138 	if (ddi_copyin(uarg, mmc, sizeof (*mmc), mode))
139 		return (EFAULT);
140 
141 	DTRACE_XPV3(mmap__start, domid_t, mmc->dom, int, mmc->num,
142 	    privcmd_mmap_entry_t *, mmc->entry);
143 
144 	if (mmc->dom == DOMID_SELF) {
145 		error = ENOTSUP;	/* Too paranoid? */
146 		goto done;
147 	}
148 
149 	for (umme = mmc->entry, i = 0; i < mmc->num; i++, umme++) {
150 		privcmd_mmap_entry_t __mmapent, *mme = &__mmapent;
151 		caddr_t addr;
152 
153 		if (ddi_copyin(umme, mme, sizeof (*mme), mode)) {
154 			error = EFAULT;
155 			break;
156 		}
157 
158 		DTRACE_XPV3(mmap__entry, ulong_t, mme->va, ulong_t, mme->mfn,
159 		    ulong_t, mme->npages);
160 
161 		if (mme->mfn == MFN_INVALID) {
162 			error = EINVAL;
163 			break;
164 		}
165 
166 		addr = (caddr_t)mme->va;
167 
168 		/*
169 		 * Find the segment we want to mess with, then add
170 		 * the mfn range to the segment.
171 		 */
172 		AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
173 		if ((seg = as_findseg(as, addr, 0)) == NULL ||
174 		    addr + mmu_ptob(mme->npages) > seg->s_base + seg->s_size)
175 			error = EINVAL;
176 		else
177 			error = segmf_add_mfns(seg, addr,
178 			    mme->mfn, mme->npages, mmc->dom);
179 		AS_LOCK_EXIT(as, &as->a_lock);
180 
181 		if (error != 0)
182 			break;
183 	}
184 
185 done:
186 	DTRACE_XPV1(mmap__end, int, error);
187 
188 	return (error);
189 }
190 
191 /*
192  * Set up the address range to map to an array of mfns in
193  * a foreign domain.  Used in the following way:
194  *
195  *	privcmd_mmap_batch_t p;
196  *
197  *	addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
198  *	p.num = number of pages
199  *	p.dom = domid
200  *	p.addr = addr;
201  *	p.arr = array of mfns, indexed 0 .. p.num - 1
202  *	ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &p);
203  */
204 /*ARGSUSED2*/
205 static int
206 do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr)
207 {
208 	privcmd_mmapbatch_t __mmapbatch, *mmb = &__mmapbatch;
209 	struct as *as = curproc->p_as;
210 	struct seg *seg;
211 	int i, error = 0;
212 	caddr_t addr;
213 	ulong_t *ulp;
214 
215 	if (ddi_copyin(uarg, mmb, sizeof (*mmb), mode))
216 		return (EFAULT);
217 
218 	DTRACE_XPV3(mmapbatch__start, domid_t, mmb->dom, int, mmb->num,
219 	    caddr_t, mmb->addr);
220 
221 	addr = (caddr_t)mmb->addr;
222 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
223 	if ((seg = as_findseg(as, addr, 0)) == NULL ||
224 	    addr + ptob(mmb->num) > seg->s_base + seg->s_size) {
225 		error = EINVAL;
226 		goto done;
227 	}
228 
229 	for (i = 0, ulp = mmb->arr;
230 	    i < mmb->num; i++, addr += PAGESIZE, ulp++) {
231 		mfn_t mfn;
232 
233 		if (fulword(ulp, &mfn) != 0) {
234 			error = EFAULT;
235 			break;
236 		}
237 
238 		if (mfn == MFN_INVALID) {
239 			/*
240 			 * This mfn is invalid and should not be added to
241 			 * segmf, as we'd only cause an immediate EFAULT when
242 			 * we tried to fault it in.
243 			 */
244 			mfn |= XEN_DOMCTL_PFINFO_XTAB;
245 			continue;
246 		}
247 
248 		if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0)
249 			continue;
250 
251 		/*
252 		 * Tell the process that this MFN could not be mapped, so it
253 		 * won't later try to access it.
254 		 */
255 		mfn |= XEN_DOMCTL_PFINFO_XTAB;
256 		if (sulword(ulp, mfn) != 0) {
257 			error = EFAULT;
258 			break;
259 		}
260 	}
261 
262 done:
263 	AS_LOCK_EXIT(as, &as->a_lock);
264 
265 	DTRACE_XPV3(mmapbatch__end, int, error, struct seg *, seg, caddr_t,
266 	    mmb->addr);
267 
268 	return (error);
269 }
270 
271 /*ARGSUSED*/
272 static int
273 privcmd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, int *rval)
274 {
275 	if (secpolicy_xvm_control(cr))
276 		return (EPERM);
277 
278 	/*
279 	 * Everything is a -native- data type.
280 	 */
281 	if ((mode & FMODELS) != FNATIVE)
282 		return (EOVERFLOW);
283 
284 	switch (cmd) {
285 	case IOCTL_PRIVCMD_HYPERCALL:
286 		return (do_privcmd_hypercall((void *)arg, mode, cr, rval));
287 	case IOCTL_PRIVCMD_MMAP:
288 		if (DOMAIN_IS_PRIVILEGED(xen_info))
289 			return (do_privcmd_mmap((void *)arg, mode, cr));
290 		break;
291 	case IOCTL_PRIVCMD_MMAPBATCH:
292 		if (DOMAIN_IS_PRIVILEGED(xen_info))
293 			return (do_privcmd_mmapbatch((void *)arg, mode, cr));
294 		break;
295 	default:
296 		break;
297 	}
298 	return (EINVAL);
299 }
300 
301 /*
302  * The real magic happens in the segmf segment driver.
303  */
304 /*ARGSUSED8*/
305 static int
306 privcmd_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp,
307     off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr)
308 {
309 	struct segmf_crargs a;
310 	int error;
311 
312 	if (secpolicy_xvm_control(cr))
313 		return (EPERM);
314 
315 	as_rangelock(as);
316 	if ((flags & MAP_FIXED) == 0) {
317 		map_addr(addrp, len, (offset_t)off, 0, flags);
318 		if (*addrp == NULL) {
319 			error = ENOMEM;
320 			goto rangeunlock;
321 		}
322 	} else {
323 		/*
324 		 * User specified address
325 		 */
326 		(void) as_unmap(as, *addrp, len);
327 	}
328 
329 	/*
330 	 * The mapping *must* be MAP_SHARED at offset 0.
331 	 *
332 	 * (Foreign pages are treated like device memory; the
333 	 * ioctl interface allows the backing objects to be
334 	 * arbitrarily redefined to point at any machine frame.)
335 	 */
336 	if ((flags & MAP_TYPE) != MAP_SHARED || off != 0) {
337 		error = EINVAL;
338 		goto rangeunlock;
339 	}
340 
341 	a.dev = dev;
342 	a.prot = (uchar_t)prot;
343 	a.maxprot = (uchar_t)maxprot;
344 	error = as_map(as, *addrp, len, segmf_create, &a);
345 
346 rangeunlock:
347 	as_rangeunlock(as);
348 	return (error);
349 }
350 
351 static struct cb_ops privcmd_cb_ops = {
352 	privcmd_open,
353 	nulldev,	/* close */
354 	nodev,		/* strategy */
355 	nodev,		/* print */
356 	nodev,		/* dump */
357 	nodev,		/* read */
358 	nodev,		/* write */
359 	privcmd_ioctl,
360 	nodev,		/* devmap */
361 	nodev,		/* mmap */
362 	privcmd_segmap,
363 	nochpoll,	/* poll */
364 	ddi_prop_op,
365 	NULL,
366 	D_64BIT | D_NEW | D_MP
367 };
368 
369 static struct dev_ops privcmd_dv_ops = {
370 	DEVO_REV,
371 	0,
372 	privcmd_getinfo,
373 	nulldev,		/* identify */
374 	nulldev,		/* probe */
375 	privcmd_attach,
376 	privcmd_detach,
377 	nodev,			/* reset */
378 	&privcmd_cb_ops,
379 	0,			/* struct bus_ops */
380 	NULL,			/* power */
381 	ddi_quiesce_not_needed,		/* quiesce */
382 };
383 
384 static struct modldrv modldrv = {
385 	&mod_driverops,
386 	"privcmd driver",
387 	&privcmd_dv_ops
388 };
389 
390 static struct modlinkage modl = {
391 	MODREV_1,
392 	&modldrv
393 };
394 
395 int
396 _init(void)
397 {
398 	return (mod_install(&modl));
399 }
400 
401 int
402 _fini(void)
403 {
404 	return (mod_remove(&modl));
405 }
406 
407 int
408 _info(struct modinfo *modinfo)
409 {
410 	return (mod_info(&modl, modinfo));
411 }
412