xref: /illumos-gate/usr/src/uts/i86pc/vm/seg_vmm.c (revision 04909c8c)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2018 Joyent, Inc.
14  */
15 
16 /*
17  * segvmm - Virtual-Machine-Memory segment
18  *
19  * The vmm segment driver was designed for mapping regions of kernel memory
20  * allocated to an HVM instance into userspace for manipulation there.  It
21  * draws direct lineage from the umap segment driver, but meant for larger
22  * mappings with fewer restrictions.
23  *
24  * seg*k*vmm, in contrast, has mappings for every VMM into kas.  We use its
25  * mappings here only to find the relevant PFNs in segvmm_fault_in().
26  */
27 
28 
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/errno.h>
32 #include <sys/cred.h>
33 #include <sys/kmem.h>
34 #include <sys/lgrp.h>
35 #include <sys/mman.h>
36 
37 #include <vm/hat.h>
38 #include <vm/hat_pte.h>
39 #include <vm/htable.h>
40 #include <vm/as.h>
41 #include <vm/seg.h>
42 #include <vm/seg_kmem.h>
43 #include <vm/seg_vmm.h>
44 
45 
46 static int segvmm_dup(struct seg *, struct seg *);
47 static int segvmm_unmap(struct seg *, caddr_t, size_t);
48 static void segvmm_free(struct seg *);
49 static faultcode_t segvmm_fault(struct hat *, struct seg *, caddr_t, size_t,
50     enum fault_type, enum seg_rw);
51 static faultcode_t segvmm_faulta(struct seg *, caddr_t);
52 static int segvmm_setprot(struct seg *, caddr_t, size_t, uint_t);
53 static int segvmm_checkprot(struct seg *, caddr_t, size_t, uint_t);
54 static int segvmm_sync(struct seg *, caddr_t, size_t, int, uint_t);
55 static size_t segvmm_incore(struct seg *, caddr_t, size_t, char *);
56 static int segvmm_lockop(struct seg *, caddr_t, size_t, int, int, ulong_t *,
57     size_t);
58 static int segvmm_getprot(struct seg *, caddr_t, size_t, uint_t *);
59 static u_offset_t segvmm_getoffset(struct seg *, caddr_t);
60 static int segvmm_gettype(struct seg *, caddr_t);
61 static int segvmm_getvp(struct seg *, caddr_t, struct vnode **);
62 static int segvmm_advise(struct seg *, caddr_t, size_t, uint_t);
63 static void segvmm_dump(struct seg *);
64 static int segvmm_pagelock(struct seg *, caddr_t, size_t, struct page ***,
65     enum lock_type, enum seg_rw);
66 static int segvmm_setpagesize(struct seg *, caddr_t, size_t, uint_t);
67 static int segvmm_getmemid(struct seg *, caddr_t, memid_t *);
68 static int segvmm_capable(struct seg *, segcapability_t);
69 
70 static struct seg_ops segvmm_ops = {
71 	.dup		= segvmm_dup,
72 	.unmap		= segvmm_unmap,
73 	.free		= segvmm_free,
74 	.fault		= segvmm_fault,
75 	.faulta		= segvmm_faulta,
76 	.setprot	= segvmm_setprot,
77 	.checkprot	= segvmm_checkprot,
78 	.kluster	= NULL,
79 	.swapout	= NULL,
80 	.sync		= segvmm_sync,
81 	.incore		= segvmm_incore,
82 	.lockop		= segvmm_lockop,
83 	.getprot	= segvmm_getprot,
84 	.getoffset	= segvmm_getoffset,
85 	.gettype	= segvmm_gettype,
86 	.getvp		= segvmm_getvp,
87 	.advise		= segvmm_advise,
88 	.dump		= segvmm_dump,
89 	.pagelock	= segvmm_pagelock,
90 	.setpagesize	= segvmm_setpagesize,
91 	.getmemid	= segvmm_getmemid,
92 	.getpolicy	= NULL,
93 	.capable	= segvmm_capable,
94 	.inherit	= seg_inherit_notsup
95 };
96 
97 
98 /*
99  * Create a kernel/user-mapped segment.  ->kaddr is the segkvmm mapping.
100  */
101 int
segvmm_create(struct seg ** segpp,void * argsp)102 segvmm_create(struct seg **segpp, void *argsp)
103 {
104 	struct seg *seg = *segpp;
105 	segvmm_crargs_t *cra = argsp;
106 	segvmm_data_t *data;
107 
108 	/*
109 	 * Check several aspects of the mapping request to ensure validity:
110 	 * - kernel pages must reside entirely in kernel space
111 	 * - target protection must be user-accessible
112 	 * - kernel address must be page-aligned
113 	 */
114 	if ((uintptr_t)cra->kaddr <= _userlimit ||
115 	    ((uintptr_t)cra->kaddr + seg->s_size) < (uintptr_t)cra->kaddr ||
116 	    (cra->prot & PROT_USER) == 0 ||
117 	    ((uintptr_t)cra->kaddr & PAGEOFFSET) != 0) {
118 		return (EINVAL);
119 	}
120 
121 	data = kmem_zalloc(sizeof (*data), KM_SLEEP);
122 	rw_init(&data->svmd_lock, NULL, RW_DEFAULT, NULL);
123 	data->svmd_kaddr = (uintptr_t)cra->kaddr;
124 	data->svmd_prot = cra->prot;
125 	data->svmd_cookie = cra->cookie;
126 	data->svmd_hold = cra->hold;
127 	data->svmd_rele = cra->rele;
128 
129 	/* Since initial checks have passed, grab a reference on the cookie */
130 	if (data->svmd_hold != NULL) {
131 		data->svmd_hold(data->svmd_cookie);
132 	}
133 
134 	seg->s_ops = &segvmm_ops;
135 	seg->s_data = data;
136 	return (0);
137 }
138 
139 static int
segvmm_dup(struct seg * seg,struct seg * newseg)140 segvmm_dup(struct seg *seg, struct seg *newseg)
141 {
142 	segvmm_data_t *svmd = seg->s_data;
143 	segvmm_data_t *newsvmd;
144 
145 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
146 
147 	newsvmd = kmem_zalloc(sizeof (segvmm_data_t), KM_SLEEP);
148 	rw_init(&newsvmd->svmd_lock, NULL, RW_DEFAULT, NULL);
149 	newsvmd->svmd_kaddr = svmd->svmd_kaddr;
150 	newsvmd->svmd_prot = svmd->svmd_prot;
151 	newsvmd->svmd_cookie = svmd->svmd_cookie;
152 	newsvmd->svmd_hold = svmd->svmd_hold;
153 	newsvmd->svmd_rele = svmd->svmd_rele;
154 
155 	/* Grab another hold for the duplicate segment */
156 	if (svmd->svmd_hold != NULL) {
157 		newsvmd->svmd_hold(newsvmd->svmd_cookie);
158 	}
159 
160 	newseg->s_ops = seg->s_ops;
161 	newseg->s_data = newsvmd;
162 	return (0);
163 }
164 
165 static int
segvmm_unmap(struct seg * seg,caddr_t addr,size_t len)166 segvmm_unmap(struct seg *seg, caddr_t addr, size_t len)
167 {
168 	segvmm_data_t *svmd = seg->s_data;
169 
170 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
171 
172 	/* Only allow unmap of entire segment */
173 	if (addr != seg->s_base || len != seg->s_size) {
174 		return (EINVAL);
175 	}
176 	if (svmd->svmd_softlockcnt != 0) {
177 		return (EAGAIN);
178 	}
179 
180 	/* Unconditionally unload the entire segment range.  */
181 	hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
182 
183 	/* Release the hold this segment possessed */
184 	if (svmd->svmd_rele != NULL) {
185 		svmd->svmd_rele(svmd->svmd_cookie);
186 	}
187 
188 	seg_free(seg);
189 	return (0);
190 }
191 
192 static void
segvmm_free(struct seg * seg)193 segvmm_free(struct seg *seg)
194 {
195 	segvmm_data_t *data = seg->s_data;
196 
197 	ASSERT(data != NULL);
198 
199 	rw_destroy(&data->svmd_lock);
200 	VERIFY(data->svmd_softlockcnt == 0);
201 	kmem_free(data, sizeof (*data));
202 	seg->s_data = NULL;
203 }
204 
205 static int
segvmm_fault_in(struct hat * hat,struct seg * seg,uintptr_t va,size_t len)206 segvmm_fault_in(struct hat *hat, struct seg *seg, uintptr_t va, size_t len)
207 {
208 	segvmm_data_t *svmd = seg->s_data;
209 	const uintptr_t koff = svmd->svmd_kaddr - (uintptr_t)seg->s_base;
210 	const uintptr_t end = va + len;
211 	const uintptr_t prot = svmd->svmd_prot;
212 
213 	/* Stick to the simple non-large-page case for now */
214 	va &= PAGEMASK;
215 
216 	do {
217 		htable_t *ht;
218 		uint_t entry, lvl;
219 		size_t psz;
220 		pfn_t pfn;
221 		const uintptr_t kaddr = va + koff;
222 
223 		ASSERT(kaddr >= (uintptr_t)svmd->svmd_kaddr);
224 		ASSERT(kaddr < ((uintptr_t)svmd->svmd_kaddr + seg->s_size));
225 
226 		ht = htable_getpage(kas.a_hat, kaddr, &entry);
227 		if (ht == NULL) {
228 			return (-1);
229 		}
230 		lvl = ht->ht_level;
231 		pfn = PTE2PFN(x86pte_get(ht, entry), lvl);
232 		htable_release(ht);
233 		if (pfn == PFN_INVALID) {
234 			return (-1);
235 		}
236 
237 		/* For the time being, handling for large pages is absent. */
238 		psz = PAGESIZE;
239 		pfn += mmu_btop(kaddr & LEVEL_OFFSET(lvl));
240 
241 		hat_devload(hat, (caddr_t)va, psz, pfn, prot, HAT_LOAD);
242 
243 		va = va + psz;
244 	} while (va < end);
245 
246 	return (0);
247 }
248 
249 /* ARGSUSED */
250 static faultcode_t
segvmm_fault(struct hat * hat,struct seg * seg,caddr_t addr,size_t len,enum fault_type type,enum seg_rw tw)251 segvmm_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
252     enum fault_type type, enum seg_rw tw)
253 {
254 	segvmm_data_t *svmd = seg->s_data;
255 	int err = 0;
256 
257 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
258 
259 	if (type == F_PROT) {
260 		/*
261 		 * Since protection on the segment is fixed, there is nothing
262 		 * to do but report an error for protection faults.
263 		 */
264 		return (FC_PROT);
265 	} else if (type == F_SOFTUNLOCK) {
266 		size_t plen = btop(len);
267 
268 		rw_enter(&svmd->svmd_lock, RW_WRITER);
269 		VERIFY(svmd->svmd_softlockcnt >= plen);
270 		svmd->svmd_softlockcnt -= plen;
271 		rw_exit(&svmd->svmd_lock);
272 		return (0);
273 	}
274 
275 	VERIFY(type == F_INVAL || type == F_SOFTLOCK);
276 	rw_enter(&svmd->svmd_lock, RW_WRITER);
277 
278 	err = segvmm_fault_in(hat, seg, (uintptr_t)addr, len);
279 	if (type == F_SOFTLOCK && err == 0) {
280 		size_t nval = svmd->svmd_softlockcnt + btop(len);
281 
282 		if (svmd->svmd_softlockcnt >= nval) {
283 			rw_exit(&svmd->svmd_lock);
284 			return (FC_MAKE_ERR(EOVERFLOW));
285 		}
286 		svmd->svmd_softlockcnt = nval;
287 	}
288 
289 	rw_exit(&svmd->svmd_lock);
290 	return (err);
291 }
292 
293 /* ARGSUSED */
294 static faultcode_t
segvmm_faulta(struct seg * seg,caddr_t addr)295 segvmm_faulta(struct seg *seg, caddr_t addr)
296 {
297 	/* Do nothing since asynch pagefault should not load translation. */
298 	return (0);
299 }
300 
301 /* ARGSUSED */
302 static int
segvmm_setprot(struct seg * seg,caddr_t addr,size_t len,uint_t prot)303 segvmm_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
304 {
305 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
306 
307 	/* The seg_vmm driver does not yet allow protection to be changed. */
308 	return (EACCES);
309 }
310 
311 /* ARGSUSED */
312 static int
segvmm_checkprot(struct seg * seg,caddr_t addr,size_t len,uint_t prot)313 segvmm_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
314 {
315 	segvmm_data_t *svmd = seg->s_data;
316 	int error = 0;
317 
318 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
319 
320 	rw_enter(&svmd->svmd_lock, RW_READER);
321 	if ((svmd->svmd_prot & prot) != prot) {
322 		error = EACCES;
323 	}
324 	rw_exit(&svmd->svmd_lock);
325 	return (error);
326 }
327 
328 /* ARGSUSED */
329 static int
segvmm_sync(struct seg * seg,caddr_t addr,size_t len,int attr,uint_t flags)330 segvmm_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
331 {
332 	/* Always succeed since there are no backing store to sync */
333 	return (0);
334 }
335 
336 /* ARGSUSED */
337 static size_t
segvmm_incore(struct seg * seg,caddr_t addr,size_t len,char * vec)338 segvmm_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
339 {
340 	size_t sz = 0;
341 
342 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
343 
344 	len = (len + PAGEOFFSET) & PAGEMASK;
345 	while (len > 0) {
346 		*vec = 1;
347 		sz += PAGESIZE;
348 		vec++;
349 		len -= PAGESIZE;
350 	}
351 	return (sz);
352 }
353 
354 /* ARGSUSED */
355 static int
segvmm_lockop(struct seg * seg,caddr_t addr,size_t len,int attr,int op,ulong_t * lockmap,size_t pos)356 segvmm_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, int op,
357     ulong_t *lockmap, size_t pos)
358 {
359 	/* Report success since kernel pages are always in memory. */
360 	return (0);
361 }
362 
363 static int
segvmm_getprot(struct seg * seg,caddr_t addr,size_t len,uint_t * protv)364 segvmm_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
365 {
366 	segvmm_data_t *svmd = seg->s_data;
367 	size_t pgno;
368 	uint_t prot;
369 
370 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
371 
372 	rw_enter(&svmd->svmd_lock, RW_READER);
373 	prot = svmd->svmd_prot;
374 	rw_exit(&svmd->svmd_lock);
375 
376 	/*
377 	 * Reporting protection is simple since it is not tracked per-page.
378 	 */
379 	pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
380 	while (pgno > 0) {
381 		protv[--pgno] = prot;
382 	}
383 	return (0);
384 }
385 
386 /* ARGSUSED */
387 static u_offset_t
segvmm_getoffset(struct seg * seg,caddr_t addr)388 segvmm_getoffset(struct seg *seg, caddr_t addr)
389 {
390 	/*
391 	 * To avoid leaking information about the layout of the kernel address
392 	 * space, always report '0' as the offset.
393 	 */
394 	return (0);
395 }
396 
397 /* ARGSUSED */
398 static int
segvmm_gettype(struct seg * seg,caddr_t addr)399 segvmm_gettype(struct seg *seg, caddr_t addr)
400 {
401 	/*
402 	 * Since already-existing kernel pages are being mapped into userspace,
403 	 * always report the segment type as shared.
404 	 */
405 	return (MAP_SHARED);
406 }
407 
408 /* ARGSUSED */
409 static int
segvmm_getvp(struct seg * seg,caddr_t addr,struct vnode ** vpp)410 segvmm_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
411 {
412 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
413 
414 	*vpp = NULL;
415 	return (0);
416 }
417 
418 /* ARGSUSED */
419 static int
segvmm_advise(struct seg * seg,caddr_t addr,size_t len,uint_t behav)420 segvmm_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
421 {
422 	if (behav == MADV_PURGE) {
423 		/* Purge does not make sense for this mapping */
424 		return (EINVAL);
425 	}
426 	/* Indicate success for everything else. */
427 	return (0);
428 }
429 
430 /* ARGSUSED */
431 static void
segvmm_dump(struct seg * seg)432 segvmm_dump(struct seg *seg)
433 {
434 	/*
435 	 * Since this is a mapping to share kernel data with userspace, nothing
436 	 * additional should be dumped.
437 	 */
438 }
439 
440 /* ARGSUSED */
441 static int
segvmm_pagelock(struct seg * seg,caddr_t addr,size_t len,struct page *** ppp,enum lock_type type,enum seg_rw rw)442 segvmm_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp,
443     enum lock_type type, enum seg_rw rw)
444 {
445 	return (ENOTSUP);
446 }
447 
448 /* ARGSUSED */
449 static int
segvmm_setpagesize(struct seg * seg,caddr_t addr,size_t len,uint_t szc)450 segvmm_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
451 {
452 	return (ENOTSUP);
453 }
454 
455 static int
segvmm_getmemid(struct seg * seg,caddr_t addr,memid_t * memidp)456 segvmm_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
457 {
458 	segvmm_data_t *svmd = seg->s_data;
459 
460 	memidp->val[0] = (uintptr_t)svmd->svmd_kaddr;
461 	memidp->val[1] = (uintptr_t)(addr - seg->s_base);
462 	return (0);
463 }
464 
465 /* ARGSUSED */
466 static int
segvmm_capable(struct seg * seg,segcapability_t capability)467 segvmm_capable(struct seg *seg, segcapability_t capability)
468 {
469 	/* no special capablities */
470 	return (0);
471 }
472