1843e1988Sjohnlev /*
2843e1988Sjohnlev * CDDL HEADER START
3843e1988Sjohnlev *
4843e1988Sjohnlev * The contents of this file are subject to the terms of the
5843e1988Sjohnlev * Common Development and Distribution License (the "License").
6843e1988Sjohnlev * You may not use this file except in compliance with the License.
7843e1988Sjohnlev *
8843e1988Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e1988Sjohnlev * or http://www.opensolaris.org/os/licensing.
10843e1988Sjohnlev * See the License for the specific language governing permissions
11843e1988Sjohnlev * and limitations under the License.
12843e1988Sjohnlev *
13843e1988Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each
14843e1988Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e1988Sjohnlev * If applicable, add the following below this CDDL HEADER, with the
16843e1988Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying
17843e1988Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner]
18843e1988Sjohnlev *
19843e1988Sjohnlev * CDDL HEADER END
20843e1988Sjohnlev */
21843e1988Sjohnlev
22843e1988Sjohnlev /*
237eea693dSMark Johnson * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24843e1988Sjohnlev * Use is subject to license terms.
25*284ce987SPatrick Mooney * Copyright 2018 Joyent, Inc.
26843e1988Sjohnlev */
27843e1988Sjohnlev
28843e1988Sjohnlev /*
29843e1988Sjohnlev * Machine frame segment driver. This segment driver allows dom0 processes to
30843e1988Sjohnlev * map pages of other domains or Xen (e.g. during save/restore). ioctl()s on
31843e1988Sjohnlev * the privcmd driver provide the MFN values backing each mapping, and we map
32843e1988Sjohnlev * them into the process's address space at this time. Demand-faulting is not
33843e1988Sjohnlev * supported by this driver due to the requirements upon some of the ioctl()s.
34843e1988Sjohnlev */
35843e1988Sjohnlev
36843e1988Sjohnlev
37843e1988Sjohnlev #include <sys/types.h>
38843e1988Sjohnlev #include <sys/systm.h>
39843e1988Sjohnlev #include <sys/vmsystm.h>
40843e1988Sjohnlev #include <sys/mman.h>
41843e1988Sjohnlev #include <sys/errno.h>
42843e1988Sjohnlev #include <sys/kmem.h>
43843e1988Sjohnlev #include <sys/cmn_err.h>
44843e1988Sjohnlev #include <sys/vnode.h>
45843e1988Sjohnlev #include <sys/conf.h>
46843e1988Sjohnlev #include <sys/debug.h>
47843e1988Sjohnlev #include <sys/lgrp.h>
48843e1988Sjohnlev #include <sys/hypervisor.h>
49843e1988Sjohnlev
50843e1988Sjohnlev #include <vm/page.h>
51843e1988Sjohnlev #include <vm/hat.h>
52843e1988Sjohnlev #include <vm/as.h>
53843e1988Sjohnlev #include <vm/seg.h>
54843e1988Sjohnlev
55843e1988Sjohnlev #include <vm/hat_pte.h>
567eea693dSMark Johnson #include <vm/hat_i86.h>
57843e1988Sjohnlev #include <vm/seg_mf.h>
58843e1988Sjohnlev
59843e1988Sjohnlev #include <sys/fs/snode.h>
60843e1988Sjohnlev
61843e1988Sjohnlev #define VTOCVP(vp) (VTOS(vp)->s_commonvp)
62843e1988Sjohnlev
637eea693dSMark Johnson typedef struct segmf_mfn_s {
647eea693dSMark Johnson mfn_t m_mfn;
657eea693dSMark Johnson } segmf_mfn_t;
667eea693dSMark Johnson
677eea693dSMark Johnson /* g_flags */
687eea693dSMark Johnson #define SEGMF_GFLAGS_WR 0x1
697eea693dSMark Johnson #define SEGMF_GFLAGS_MAPPED 0x2
707eea693dSMark Johnson typedef struct segmf_gref_s {
717eea693dSMark Johnson uint64_t g_ptep;
727eea693dSMark Johnson grant_ref_t g_gref;
737eea693dSMark Johnson uint32_t g_flags;
747eea693dSMark Johnson grant_handle_t g_handle;
757eea693dSMark Johnson } segmf_gref_t;
767eea693dSMark Johnson
777eea693dSMark Johnson typedef union segmf_mu_u {
787eea693dSMark Johnson segmf_mfn_t m;
797eea693dSMark Johnson segmf_gref_t g;
807eea693dSMark Johnson } segmf_mu_t;
817eea693dSMark Johnson
827eea693dSMark Johnson typedef enum {
837eea693dSMark Johnson SEGMF_MAP_EMPTY = 0,
847eea693dSMark Johnson SEGMF_MAP_MFN,
857eea693dSMark Johnson SEGMF_MAP_GREF
867eea693dSMark Johnson } segmf_map_type_t;
877eea693dSMark Johnson
887eea693dSMark Johnson typedef struct segmf_map_s {
897eea693dSMark Johnson segmf_map_type_t t_type;
907eea693dSMark Johnson segmf_mu_t u;
917eea693dSMark Johnson } segmf_map_t;
92843e1988Sjohnlev
93843e1988Sjohnlev struct segmf_data {
94843e1988Sjohnlev kmutex_t lock;
95843e1988Sjohnlev struct vnode *vp;
96843e1988Sjohnlev uchar_t prot;
97843e1988Sjohnlev uchar_t maxprot;
98843e1988Sjohnlev size_t softlockcnt;
99843e1988Sjohnlev domid_t domid;
1007eea693dSMark Johnson segmf_map_t *map;
101843e1988Sjohnlev };
102843e1988Sjohnlev
103843e1988Sjohnlev static struct seg_ops segmf_ops;
104843e1988Sjohnlev
1057eea693dSMark Johnson static int segmf_fault_gref_range(struct seg *seg, caddr_t addr, size_t len);
1067eea693dSMark Johnson
107843e1988Sjohnlev static struct segmf_data *
segmf_data_zalloc(struct seg * seg)108843e1988Sjohnlev segmf_data_zalloc(struct seg *seg)
109843e1988Sjohnlev {
110843e1988Sjohnlev struct segmf_data *data = kmem_zalloc(sizeof (*data), KM_SLEEP);
111843e1988Sjohnlev
112843e1988Sjohnlev mutex_init(&data->lock, "segmf.lock", MUTEX_DEFAULT, NULL);
113843e1988Sjohnlev seg->s_ops = &segmf_ops;
114843e1988Sjohnlev seg->s_data = data;
115843e1988Sjohnlev return (data);
116843e1988Sjohnlev }
117843e1988Sjohnlev
118843e1988Sjohnlev int
segmf_create(struct seg ** segpp,void * args)119*284ce987SPatrick Mooney segmf_create(struct seg **segpp, void *args)
120843e1988Sjohnlev {
121*284ce987SPatrick Mooney struct seg *seg = *segpp;
122843e1988Sjohnlev struct segmf_crargs *a = args;
123843e1988Sjohnlev struct segmf_data *data;
124843e1988Sjohnlev struct as *as = seg->s_as;
125843e1988Sjohnlev pgcnt_t i, npages = seg_pages(seg);
126843e1988Sjohnlev int error;
127843e1988Sjohnlev
128843e1988Sjohnlev hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
129843e1988Sjohnlev
130843e1988Sjohnlev data = segmf_data_zalloc(seg);
131843e1988Sjohnlev data->vp = specfind(a->dev, VCHR);
132843e1988Sjohnlev data->prot = a->prot;
133843e1988Sjohnlev data->maxprot = a->maxprot;
134843e1988Sjohnlev
1357eea693dSMark Johnson data->map = kmem_alloc(npages * sizeof (segmf_map_t), KM_SLEEP);
1367eea693dSMark Johnson for (i = 0; i < npages; i++) {
1377eea693dSMark Johnson data->map[i].t_type = SEGMF_MAP_EMPTY;
1387eea693dSMark Johnson }
139843e1988Sjohnlev
140843e1988Sjohnlev error = VOP_ADDMAP(VTOCVP(data->vp), 0, as, seg->s_base, seg->s_size,
141da6c28aaSamw data->prot, data->maxprot, MAP_SHARED, CRED(), NULL);
142843e1988Sjohnlev
143843e1988Sjohnlev if (error != 0)
144843e1988Sjohnlev hat_unload(as->a_hat,
145843e1988Sjohnlev seg->s_base, seg->s_size, HAT_UNLOAD_UNMAP);
146843e1988Sjohnlev return (error);
147843e1988Sjohnlev }
148843e1988Sjohnlev
149843e1988Sjohnlev /*
150843e1988Sjohnlev * Duplicate a seg and return new segment in newseg.
151843e1988Sjohnlev */
152843e1988Sjohnlev static int
segmf_dup(struct seg * seg,struct seg * newseg)153843e1988Sjohnlev segmf_dup(struct seg *seg, struct seg *newseg)
154843e1988Sjohnlev {
155843e1988Sjohnlev struct segmf_data *data = seg->s_data;
156843e1988Sjohnlev struct segmf_data *ndata;
157843e1988Sjohnlev pgcnt_t npages = seg_pages(newseg);
1587eea693dSMark Johnson size_t sz;
159843e1988Sjohnlev
160843e1988Sjohnlev ndata = segmf_data_zalloc(newseg);
161843e1988Sjohnlev
162843e1988Sjohnlev VN_HOLD(data->vp);
163843e1988Sjohnlev ndata->vp = data->vp;
164843e1988Sjohnlev ndata->prot = data->prot;
165843e1988Sjohnlev ndata->maxprot = data->maxprot;
166843e1988Sjohnlev ndata->domid = data->domid;
167843e1988Sjohnlev
1687eea693dSMark Johnson sz = npages * sizeof (segmf_map_t);
1697eea693dSMark Johnson ndata->map = kmem_alloc(sz, KM_SLEEP);
1707eea693dSMark Johnson bcopy(data->map, ndata->map, sz);
171843e1988Sjohnlev
172843e1988Sjohnlev return (VOP_ADDMAP(VTOCVP(ndata->vp), 0, newseg->s_as,
173843e1988Sjohnlev newseg->s_base, newseg->s_size, ndata->prot, ndata->maxprot,
174da6c28aaSamw MAP_SHARED, CRED(), NULL));
175843e1988Sjohnlev }
176843e1988Sjohnlev
177843e1988Sjohnlev /*
178843e1988Sjohnlev * We only support unmapping the whole segment, and we automatically unlock
179843e1988Sjohnlev * what we previously soft-locked.
180843e1988Sjohnlev */
181843e1988Sjohnlev static int
segmf_unmap(struct seg * seg,caddr_t addr,size_t len)182843e1988Sjohnlev segmf_unmap(struct seg *seg, caddr_t addr, size_t len)
183843e1988Sjohnlev {
184843e1988Sjohnlev struct segmf_data *data = seg->s_data;
185843e1988Sjohnlev offset_t off;
186843e1988Sjohnlev
187843e1988Sjohnlev if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
188843e1988Sjohnlev (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
189843e1988Sjohnlev panic("segmf_unmap");
190843e1988Sjohnlev
191843e1988Sjohnlev if (addr != seg->s_base || len != seg->s_size)
192843e1988Sjohnlev return (ENOTSUP);
193843e1988Sjohnlev
194843e1988Sjohnlev hat_unload(seg->s_as->a_hat, addr, len,
195843e1988Sjohnlev HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK);
196843e1988Sjohnlev
197843e1988Sjohnlev off = (offset_t)seg_page(seg, addr);
198843e1988Sjohnlev
199843e1988Sjohnlev ASSERT(data->vp != NULL);
200843e1988Sjohnlev
201843e1988Sjohnlev (void) VOP_DELMAP(VTOCVP(data->vp), off, seg->s_as, addr, len,
202da6c28aaSamw data->prot, data->maxprot, MAP_SHARED, CRED(), NULL);
203843e1988Sjohnlev
204843e1988Sjohnlev seg_free(seg);
205843e1988Sjohnlev return (0);
206843e1988Sjohnlev }
207843e1988Sjohnlev
208843e1988Sjohnlev static void
segmf_free(struct seg * seg)209843e1988Sjohnlev segmf_free(struct seg *seg)
210843e1988Sjohnlev {
211843e1988Sjohnlev struct segmf_data *data = seg->s_data;
212843e1988Sjohnlev pgcnt_t npages = seg_pages(seg);
213843e1988Sjohnlev
2147eea693dSMark Johnson kmem_free(data->map, npages * sizeof (segmf_map_t));
215843e1988Sjohnlev VN_RELE(data->vp);
216843e1988Sjohnlev mutex_destroy(&data->lock);
217843e1988Sjohnlev kmem_free(data, sizeof (*data));
218843e1988Sjohnlev }
219843e1988Sjohnlev
220843e1988Sjohnlev static int segmf_faultpage_debug = 0;
221843e1988Sjohnlev /*ARGSUSED*/
222843e1988Sjohnlev static int
segmf_faultpage(struct hat * hat,struct seg * seg,caddr_t addr,enum fault_type type,uint_t prot)223843e1988Sjohnlev segmf_faultpage(struct hat *hat, struct seg *seg, caddr_t addr,
224843e1988Sjohnlev enum fault_type type, uint_t prot)
225843e1988Sjohnlev {
226843e1988Sjohnlev struct segmf_data *data = seg->s_data;
227843e1988Sjohnlev uint_t hat_flags = HAT_LOAD_NOCONSIST;
228843e1988Sjohnlev mfn_t mfn;
229843e1988Sjohnlev x86pte_t pte;
2307eea693dSMark Johnson segmf_map_t *map;
2317eea693dSMark Johnson uint_t idx;
232843e1988Sjohnlev
233843e1988Sjohnlev
2347eea693dSMark Johnson idx = seg_page(seg, addr);
2357eea693dSMark Johnson map = &data->map[idx];
2367eea693dSMark Johnson ASSERT(map->t_type == SEGMF_MAP_MFN);
2377eea693dSMark Johnson
2387eea693dSMark Johnson mfn = map->u.m.m_mfn;
239843e1988Sjohnlev
240843e1988Sjohnlev if (type == F_SOFTLOCK) {
241843e1988Sjohnlev mutex_enter(&freemem_lock);
242843e1988Sjohnlev data->softlockcnt++;
243843e1988Sjohnlev mutex_exit(&freemem_lock);
244843e1988Sjohnlev hat_flags |= HAT_LOAD_LOCK;
245843e1988Sjohnlev } else
246843e1988Sjohnlev hat_flags |= HAT_LOAD;
247843e1988Sjohnlev
248843e1988Sjohnlev if (segmf_faultpage_debug > 0) {
249843e1988Sjohnlev uprintf("segmf_faultpage: addr %p domid %x mfn %lx prot %x\n",
250843e1988Sjohnlev (void *)addr, data->domid, mfn, prot);
251843e1988Sjohnlev segmf_faultpage_debug--;
252843e1988Sjohnlev }
253843e1988Sjohnlev
254843e1988Sjohnlev /*
255843e1988Sjohnlev * Ask the HAT to load a throwaway mapping to page zero, then
256843e1988Sjohnlev * overwrite it with our foreign domain mapping. It gets removed
257843e1988Sjohnlev * later via hat_unload()
258843e1988Sjohnlev */
259843e1988Sjohnlev hat_devload(hat, addr, MMU_PAGESIZE, (pfn_t)0,
260843e1988Sjohnlev PROT_READ | HAT_UNORDERED_OK, hat_flags);
261843e1988Sjohnlev
262843e1988Sjohnlev pte = mmu_ptob((x86pte_t)mfn) | PT_VALID | PT_USER | PT_FOREIGN;
263843e1988Sjohnlev if (prot & PROT_WRITE)
264843e1988Sjohnlev pte |= PT_WRITABLE;
265843e1988Sjohnlev
266843e1988Sjohnlev if (HYPERVISOR_update_va_mapping_otherdomain((uintptr_t)addr, pte,
267843e1988Sjohnlev UVMF_INVLPG | UVMF_ALL, data->domid) != 0) {
268843e1988Sjohnlev hat_flags = HAT_UNLOAD_UNMAP;
269843e1988Sjohnlev
270843e1988Sjohnlev if (type == F_SOFTLOCK) {
271843e1988Sjohnlev hat_flags |= HAT_UNLOAD_UNLOCK;
272843e1988Sjohnlev mutex_enter(&freemem_lock);
273843e1988Sjohnlev data->softlockcnt--;
274843e1988Sjohnlev mutex_exit(&freemem_lock);
275843e1988Sjohnlev }
276843e1988Sjohnlev
277843e1988Sjohnlev hat_unload(hat, addr, MMU_PAGESIZE, hat_flags);
278843e1988Sjohnlev return (FC_MAKE_ERR(EFAULT));
279843e1988Sjohnlev }
280843e1988Sjohnlev
281843e1988Sjohnlev return (0);
282843e1988Sjohnlev }
283843e1988Sjohnlev
284843e1988Sjohnlev static int
seg_rw_to_prot(enum seg_rw rw)285843e1988Sjohnlev seg_rw_to_prot(enum seg_rw rw)
286843e1988Sjohnlev {
287843e1988Sjohnlev switch (rw) {
288843e1988Sjohnlev case S_READ:
289843e1988Sjohnlev return (PROT_READ);
290843e1988Sjohnlev case S_WRITE:
291843e1988Sjohnlev return (PROT_WRITE);
292843e1988Sjohnlev case S_EXEC:
293843e1988Sjohnlev return (PROT_EXEC);
294843e1988Sjohnlev case S_OTHER:
295843e1988Sjohnlev default:
296843e1988Sjohnlev break;
297843e1988Sjohnlev }
298843e1988Sjohnlev return (PROT_READ | PROT_WRITE | PROT_EXEC);
299843e1988Sjohnlev }
300843e1988Sjohnlev
301843e1988Sjohnlev static void
segmf_softunlock(struct hat * hat,struct seg * seg,caddr_t addr,size_t len)302843e1988Sjohnlev segmf_softunlock(struct hat *hat, struct seg *seg, caddr_t addr, size_t len)
303843e1988Sjohnlev {
304843e1988Sjohnlev struct segmf_data *data = seg->s_data;
305843e1988Sjohnlev
306843e1988Sjohnlev hat_unlock(hat, addr, len);
307843e1988Sjohnlev
308843e1988Sjohnlev mutex_enter(&freemem_lock);
309843e1988Sjohnlev ASSERT(data->softlockcnt >= btopr(len));
310843e1988Sjohnlev data->softlockcnt -= btopr(len);
311843e1988Sjohnlev mutex_exit(&freemem_lock);
312843e1988Sjohnlev
313843e1988Sjohnlev if (data->softlockcnt == 0) {
314843e1988Sjohnlev struct as *as = seg->s_as;
315843e1988Sjohnlev
316843e1988Sjohnlev if (AS_ISUNMAPWAIT(as)) {
317843e1988Sjohnlev mutex_enter(&as->a_contents);
318843e1988Sjohnlev if (AS_ISUNMAPWAIT(as)) {
319843e1988Sjohnlev AS_CLRUNMAPWAIT(as);
320843e1988Sjohnlev cv_broadcast(&as->a_cv);
321843e1988Sjohnlev }
322843e1988Sjohnlev mutex_exit(&as->a_contents);
323843e1988Sjohnlev }
324843e1988Sjohnlev }
325843e1988Sjohnlev }
326843e1988Sjohnlev
327843e1988Sjohnlev static int
segmf_fault_range(struct hat * hat,struct seg * seg,caddr_t addr,size_t len,enum fault_type type,enum seg_rw rw)328843e1988Sjohnlev segmf_fault_range(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
329843e1988Sjohnlev enum fault_type type, enum seg_rw rw)
330843e1988Sjohnlev {
331843e1988Sjohnlev struct segmf_data *data = seg->s_data;
332843e1988Sjohnlev int error = 0;
333843e1988Sjohnlev caddr_t a;
334843e1988Sjohnlev
335843e1988Sjohnlev if ((data->prot & seg_rw_to_prot(rw)) == 0)
336843e1988Sjohnlev return (FC_PROT);
337843e1988Sjohnlev
338843e1988Sjohnlev /* loop over the address range handling each fault */
339843e1988Sjohnlev
340843e1988Sjohnlev for (a = addr; a < addr + len; a += PAGESIZE) {
341843e1988Sjohnlev error = segmf_faultpage(hat, seg, a, type, data->prot);
342843e1988Sjohnlev if (error != 0)
343843e1988Sjohnlev break;
344843e1988Sjohnlev }
345843e1988Sjohnlev
346843e1988Sjohnlev if (error != 0 && type == F_SOFTLOCK) {
347843e1988Sjohnlev size_t done = (size_t)(a - addr);
348843e1988Sjohnlev
349843e1988Sjohnlev /*
350843e1988Sjohnlev * Undo what's been done so far.
351843e1988Sjohnlev */
352843e1988Sjohnlev if (done > 0)
353843e1988Sjohnlev segmf_softunlock(hat, seg, addr, done);
354843e1988Sjohnlev }
355843e1988Sjohnlev
356843e1988Sjohnlev return (error);
357843e1988Sjohnlev }
358843e1988Sjohnlev
359843e1988Sjohnlev /*
360843e1988Sjohnlev * We never demand-fault for seg_mf.
361843e1988Sjohnlev */
362843e1988Sjohnlev /*ARGSUSED*/
363843e1988Sjohnlev static int
segmf_fault(struct hat * hat,struct seg * seg,caddr_t addr,size_t len,enum fault_type type,enum seg_rw rw)364843e1988Sjohnlev segmf_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
365843e1988Sjohnlev enum fault_type type, enum seg_rw rw)
366843e1988Sjohnlev {
367843e1988Sjohnlev return (FC_MAKE_ERR(EFAULT));
368843e1988Sjohnlev }
369843e1988Sjohnlev
370843e1988Sjohnlev /*ARGSUSED*/
371843e1988Sjohnlev static int
segmf_faulta(struct seg * seg,caddr_t addr)372843e1988Sjohnlev segmf_faulta(struct seg *seg, caddr_t addr)
373843e1988Sjohnlev {
374843e1988Sjohnlev return (0);
375843e1988Sjohnlev }
376843e1988Sjohnlev
377843e1988Sjohnlev /*ARGSUSED*/
378843e1988Sjohnlev static int
segmf_setprot(struct seg * seg,caddr_t addr,size_t len,uint_t prot)379843e1988Sjohnlev segmf_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
380843e1988Sjohnlev {
381843e1988Sjohnlev return (EINVAL);
382843e1988Sjohnlev }
383843e1988Sjohnlev
384843e1988Sjohnlev /*ARGSUSED*/
385843e1988Sjohnlev static int
segmf_checkprot(struct seg * seg,caddr_t addr,size_t len,uint_t prot)386843e1988Sjohnlev segmf_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
387843e1988Sjohnlev {
388843e1988Sjohnlev return (EINVAL);
389843e1988Sjohnlev }
390843e1988Sjohnlev
391843e1988Sjohnlev /*ARGSUSED*/
392843e1988Sjohnlev static int
segmf_kluster(struct seg * seg,caddr_t addr,ssize_t delta)393843e1988Sjohnlev segmf_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
394843e1988Sjohnlev {
395843e1988Sjohnlev return (-1);
396843e1988Sjohnlev }
397843e1988Sjohnlev
398843e1988Sjohnlev /*ARGSUSED*/
399843e1988Sjohnlev static int
segmf_sync(struct seg * seg,caddr_t addr,size_t len,int attr,uint_t flags)400843e1988Sjohnlev segmf_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
401843e1988Sjohnlev {
402843e1988Sjohnlev return (0);
403843e1988Sjohnlev }
404843e1988Sjohnlev
405843e1988Sjohnlev /*
406843e1988Sjohnlev * XXPV Hmm. Should we say that mf mapping are "in core?"
407843e1988Sjohnlev */
408843e1988Sjohnlev
409843e1988Sjohnlev /*ARGSUSED*/
410843e1988Sjohnlev static size_t
segmf_incore(struct seg * seg,caddr_t addr,size_t len,char * vec)411843e1988Sjohnlev segmf_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
412843e1988Sjohnlev {
413843e1988Sjohnlev size_t v;
414843e1988Sjohnlev
415843e1988Sjohnlev for (v = 0, len = (len + PAGEOFFSET) & PAGEMASK; len;
416843e1988Sjohnlev len -= PAGESIZE, v += PAGESIZE)
417843e1988Sjohnlev *vec++ = 1;
418843e1988Sjohnlev return (v);
419843e1988Sjohnlev }
420843e1988Sjohnlev
421843e1988Sjohnlev /*ARGSUSED*/
422843e1988Sjohnlev static int
segmf_lockop(struct seg * seg,caddr_t addr,size_t len,int attr,int op,ulong_t * lockmap,size_t pos)423843e1988Sjohnlev segmf_lockop(struct seg *seg, caddr_t addr,
424843e1988Sjohnlev size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
425843e1988Sjohnlev {
426843e1988Sjohnlev return (0);
427843e1988Sjohnlev }
428843e1988Sjohnlev
429843e1988Sjohnlev static int
segmf_getprot(struct seg * seg,caddr_t addr,size_t len,uint_t * protv)430843e1988Sjohnlev segmf_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
431843e1988Sjohnlev {
432843e1988Sjohnlev struct segmf_data *data = seg->s_data;
433843e1988Sjohnlev pgcnt_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
434843e1988Sjohnlev
435843e1988Sjohnlev if (pgno != 0) {
436843e1988Sjohnlev do
437843e1988Sjohnlev protv[--pgno] = data->prot;
438843e1988Sjohnlev while (pgno != 0)
439843e1988Sjohnlev ;
440843e1988Sjohnlev }
441843e1988Sjohnlev return (0);
442843e1988Sjohnlev }
443843e1988Sjohnlev
444843e1988Sjohnlev static u_offset_t
segmf_getoffset(struct seg * seg,caddr_t addr)445843e1988Sjohnlev segmf_getoffset(struct seg *seg, caddr_t addr)
446843e1988Sjohnlev {
447843e1988Sjohnlev return (addr - seg->s_base);
448843e1988Sjohnlev }
449843e1988Sjohnlev
450843e1988Sjohnlev /*ARGSUSED*/
451843e1988Sjohnlev static int
segmf_gettype(struct seg * seg,caddr_t addr)452843e1988Sjohnlev segmf_gettype(struct seg *seg, caddr_t addr)
453843e1988Sjohnlev {
454843e1988Sjohnlev return (MAP_SHARED);
455843e1988Sjohnlev }
456843e1988Sjohnlev
457843e1988Sjohnlev /*ARGSUSED1*/
458843e1988Sjohnlev static int
segmf_getvp(struct seg * seg,caddr_t addr,struct vnode ** vpp)459843e1988Sjohnlev segmf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
460843e1988Sjohnlev {
461843e1988Sjohnlev struct segmf_data *data = seg->s_data;
462843e1988Sjohnlev
463843e1988Sjohnlev *vpp = VTOCVP(data->vp);
464843e1988Sjohnlev return (0);
465843e1988Sjohnlev }
466843e1988Sjohnlev
467843e1988Sjohnlev /*ARGSUSED*/
468843e1988Sjohnlev static int
segmf_advise(struct seg * seg,caddr_t addr,size_t len,uint_t behav)469843e1988Sjohnlev segmf_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
470843e1988Sjohnlev {
471843e1988Sjohnlev return (0);
472843e1988Sjohnlev }
473843e1988Sjohnlev
474843e1988Sjohnlev /*ARGSUSED*/
475843e1988Sjohnlev static void
segmf_dump(struct seg * seg)476843e1988Sjohnlev segmf_dump(struct seg *seg)
477843e1988Sjohnlev {}
478843e1988Sjohnlev
479843e1988Sjohnlev /*ARGSUSED*/
480843e1988Sjohnlev static int
segmf_pagelock(struct seg * seg,caddr_t addr,size_t len,struct page *** ppp,enum lock_type type,enum seg_rw rw)481843e1988Sjohnlev segmf_pagelock(struct seg *seg, caddr_t addr, size_t len,
482843e1988Sjohnlev struct page ***ppp, enum lock_type type, enum seg_rw rw)
483843e1988Sjohnlev {
484843e1988Sjohnlev return (ENOTSUP);
485843e1988Sjohnlev }
486843e1988Sjohnlev
487843e1988Sjohnlev /*ARGSUSED*/
488843e1988Sjohnlev static int
segmf_setpagesize(struct seg * seg,caddr_t addr,size_t len,uint_t szc)489843e1988Sjohnlev segmf_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
490843e1988Sjohnlev {
491843e1988Sjohnlev return (ENOTSUP);
492843e1988Sjohnlev }
493843e1988Sjohnlev
494843e1988Sjohnlev static int
segmf_getmemid(struct seg * seg,caddr_t addr,memid_t * memid)495843e1988Sjohnlev segmf_getmemid(struct seg *seg, caddr_t addr, memid_t *memid)
496843e1988Sjohnlev {
497843e1988Sjohnlev struct segmf_data *data = seg->s_data;
498843e1988Sjohnlev
499843e1988Sjohnlev memid->val[0] = (uintptr_t)VTOCVP(data->vp);
500843e1988Sjohnlev memid->val[1] = (uintptr_t)seg_page(seg, addr);
501843e1988Sjohnlev return (0);
502843e1988Sjohnlev }
503843e1988Sjohnlev
504843e1988Sjohnlev /*ARGSUSED*/
505843e1988Sjohnlev static lgrp_mem_policy_info_t *
segmf_getpolicy(struct seg * seg,caddr_t addr)506843e1988Sjohnlev segmf_getpolicy(struct seg *seg, caddr_t addr)
507843e1988Sjohnlev {
508843e1988Sjohnlev return (NULL);
509843e1988Sjohnlev }
510843e1988Sjohnlev
511843e1988Sjohnlev /*ARGSUSED*/
512843e1988Sjohnlev static int
segmf_capable(struct seg * seg,segcapability_t capability)513843e1988Sjohnlev segmf_capable(struct seg *seg, segcapability_t capability)
514843e1988Sjohnlev {
515843e1988Sjohnlev return (0);
516843e1988Sjohnlev }
517843e1988Sjohnlev
518843e1988Sjohnlev /*
519843e1988Sjohnlev * Add a set of contiguous foreign MFNs to the segment. soft-locking them. The
520843e1988Sjohnlev * pre-faulting is necessary due to live migration; in particular we must
521843e1988Sjohnlev * return an error in response to IOCTL_PRIVCMD_MMAPBATCH rather than faulting
522843e1988Sjohnlev * later on a bad MFN. Whilst this isn't necessary for the other MMAP
523843e1988Sjohnlev * ioctl()s, we lock them too, as they should be transitory.
524843e1988Sjohnlev */
525843e1988Sjohnlev int
segmf_add_mfns(struct seg * seg,caddr_t addr,mfn_t mfn,pgcnt_t pgcnt,domid_t domid)526843e1988Sjohnlev segmf_add_mfns(struct seg *seg, caddr_t addr, mfn_t mfn,
527843e1988Sjohnlev pgcnt_t pgcnt, domid_t domid)
528843e1988Sjohnlev {
529843e1988Sjohnlev struct segmf_data *data = seg->s_data;
5307eea693dSMark Johnson pgcnt_t base;
531843e1988Sjohnlev faultcode_t fc;
532843e1988Sjohnlev pgcnt_t i;
533843e1988Sjohnlev int error = 0;
534843e1988Sjohnlev
535843e1988Sjohnlev if (seg->s_ops != &segmf_ops)
536843e1988Sjohnlev return (EINVAL);
537843e1988Sjohnlev
538843e1988Sjohnlev /*
539843e1988Sjohnlev * Don't mess with dom0.
540843e1988Sjohnlev *
541843e1988Sjohnlev * Only allow the domid to be set once for the segment.
542843e1988Sjohnlev * After that attempts to add mappings to this segment for
543843e1988Sjohnlev * other domains explicitly fails.
544843e1988Sjohnlev */
545843e1988Sjohnlev
546843e1988Sjohnlev if (domid == 0 || domid == DOMID_SELF)
547843e1988Sjohnlev return (EACCES);
548843e1988Sjohnlev
549843e1988Sjohnlev mutex_enter(&data->lock);
550843e1988Sjohnlev
551843e1988Sjohnlev if (data->domid == 0)
552843e1988Sjohnlev data->domid = domid;
553843e1988Sjohnlev
554843e1988Sjohnlev if (data->domid != domid) {
555843e1988Sjohnlev error = EINVAL;
556843e1988Sjohnlev goto out;
557843e1988Sjohnlev }
558843e1988Sjohnlev
559843e1988Sjohnlev base = seg_page(seg, addr);
560843e1988Sjohnlev
5617eea693dSMark Johnson for (i = 0; i < pgcnt; i++) {
5627eea693dSMark Johnson data->map[base + i].t_type = SEGMF_MAP_MFN;
5637eea693dSMark Johnson data->map[base + i].u.m.m_mfn = mfn++;
5647eea693dSMark Johnson }
565843e1988Sjohnlev
566843e1988Sjohnlev fc = segmf_fault_range(seg->s_as->a_hat, seg, addr,
567843e1988Sjohnlev pgcnt * MMU_PAGESIZE, F_SOFTLOCK, S_OTHER);
568843e1988Sjohnlev
569843e1988Sjohnlev if (fc != 0) {
570843e1988Sjohnlev error = fc_decode(fc);
5717eea693dSMark Johnson for (i = 0; i < pgcnt; i++) {
5727eea693dSMark Johnson data->map[base + i].t_type = SEGMF_MAP_EMPTY;
5737eea693dSMark Johnson }
574843e1988Sjohnlev }
575843e1988Sjohnlev
576843e1988Sjohnlev out:
577843e1988Sjohnlev mutex_exit(&data->lock);
578843e1988Sjohnlev return (error);
579843e1988Sjohnlev }
580843e1988Sjohnlev
5817eea693dSMark Johnson int
segmf_add_grefs(struct seg * seg,caddr_t addr,uint_t flags,grant_ref_t * grefs,uint_t cnt,domid_t domid)5827eea693dSMark Johnson segmf_add_grefs(struct seg *seg, caddr_t addr, uint_t flags,
5837eea693dSMark Johnson grant_ref_t *grefs, uint_t cnt, domid_t domid)
5847eea693dSMark Johnson {
5857eea693dSMark Johnson struct segmf_data *data;
5867eea693dSMark Johnson segmf_map_t *map;
5877eea693dSMark Johnson faultcode_t fc;
5887eea693dSMark Johnson uint_t idx;
5897eea693dSMark Johnson uint_t i;
5907eea693dSMark Johnson int e;
5917eea693dSMark Johnson
5927eea693dSMark Johnson if (seg->s_ops != &segmf_ops)
5937eea693dSMark Johnson return (EINVAL);
5947eea693dSMark Johnson
5957eea693dSMark Johnson /*
5967eea693dSMark Johnson * Don't mess with dom0.
5977eea693dSMark Johnson *
5987eea693dSMark Johnson * Only allow the domid to be set once for the segment.
5997eea693dSMark Johnson * After that attempts to add mappings to this segment for
6007eea693dSMark Johnson * other domains explicitly fails.
6017eea693dSMark Johnson */
6027eea693dSMark Johnson
6037eea693dSMark Johnson if (domid == 0 || domid == DOMID_SELF)
6047eea693dSMark Johnson return (EACCES);
6057eea693dSMark Johnson
6067eea693dSMark Johnson data = seg->s_data;
6077eea693dSMark Johnson idx = seg_page(seg, addr);
6087eea693dSMark Johnson map = &data->map[idx];
6097eea693dSMark Johnson e = 0;
6107eea693dSMark Johnson
6117eea693dSMark Johnson mutex_enter(&data->lock);
6127eea693dSMark Johnson
6137eea693dSMark Johnson if (data->domid == 0)
6147eea693dSMark Johnson data->domid = domid;
6157eea693dSMark Johnson
6167eea693dSMark Johnson if (data->domid != domid) {
6177eea693dSMark Johnson e = EINVAL;
6187eea693dSMark Johnson goto out;
6197eea693dSMark Johnson }
6207eea693dSMark Johnson
6217eea693dSMark Johnson /* store away the grefs passed in then fault in the pages */
6227eea693dSMark Johnson for (i = 0; i < cnt; i++) {
6237eea693dSMark Johnson map[i].t_type = SEGMF_MAP_GREF;
6247eea693dSMark Johnson map[i].u.g.g_gref = grefs[i];
6257eea693dSMark Johnson map[i].u.g.g_handle = 0;
6267eea693dSMark Johnson map[i].u.g.g_flags = 0;
6277eea693dSMark Johnson if (flags & SEGMF_GREF_WR) {
6287eea693dSMark Johnson map[i].u.g.g_flags |= SEGMF_GFLAGS_WR;
6297eea693dSMark Johnson }
6307eea693dSMark Johnson }
6317eea693dSMark Johnson fc = segmf_fault_gref_range(seg, addr, cnt);
6327eea693dSMark Johnson if (fc != 0) {
6337eea693dSMark Johnson e = fc_decode(fc);
6347eea693dSMark Johnson for (i = 0; i < cnt; i++) {
6357eea693dSMark Johnson data->map[i].t_type = SEGMF_MAP_EMPTY;
6367eea693dSMark Johnson }
6377eea693dSMark Johnson }
6387eea693dSMark Johnson
6397eea693dSMark Johnson out:
6407eea693dSMark Johnson mutex_exit(&data->lock);
6417eea693dSMark Johnson return (e);
6427eea693dSMark Johnson }
6437eea693dSMark Johnson
6447eea693dSMark Johnson int
segmf_release_grefs(struct seg * seg,caddr_t addr,uint_t cnt)6457eea693dSMark Johnson segmf_release_grefs(struct seg *seg, caddr_t addr, uint_t cnt)
6467eea693dSMark Johnson {
6477eea693dSMark Johnson gnttab_unmap_grant_ref_t mapop[SEGMF_MAX_GREFS];
6487eea693dSMark Johnson struct segmf_data *data;
6497eea693dSMark Johnson segmf_map_t *map;
6507eea693dSMark Johnson uint_t idx;
6517eea693dSMark Johnson long e;
6527eea693dSMark Johnson int i;
6537eea693dSMark Johnson int n;
6547eea693dSMark Johnson
6557eea693dSMark Johnson
6567eea693dSMark Johnson if (cnt > SEGMF_MAX_GREFS) {
6577eea693dSMark Johnson return (-1);
6587eea693dSMark Johnson }
6597eea693dSMark Johnson
6607eea693dSMark Johnson idx = seg_page(seg, addr);
6617eea693dSMark Johnson data = seg->s_data;
6627eea693dSMark Johnson map = &data->map[idx];
6637eea693dSMark Johnson
6647eea693dSMark Johnson bzero(mapop, sizeof (gnttab_unmap_grant_ref_t) * cnt);
6657eea693dSMark Johnson
6667eea693dSMark Johnson /*
6677eea693dSMark Johnson * for each entry which isn't empty and is currently mapped,
6687eea693dSMark Johnson * set it up for an unmap then mark them empty.
6697eea693dSMark Johnson */
6707eea693dSMark Johnson n = 0;
6717eea693dSMark Johnson for (i = 0; i < cnt; i++) {
6727eea693dSMark Johnson ASSERT(map[i].t_type != SEGMF_MAP_MFN);
6737eea693dSMark Johnson if ((map[i].t_type == SEGMF_MAP_GREF) &&
6747eea693dSMark Johnson (map[i].u.g.g_flags & SEGMF_GFLAGS_MAPPED)) {
6757eea693dSMark Johnson mapop[n].handle = map[i].u.g.g_handle;
6767eea693dSMark Johnson mapop[n].host_addr = map[i].u.g.g_ptep;
6777eea693dSMark Johnson mapop[n].dev_bus_addr = 0;
6787eea693dSMark Johnson n++;
6797eea693dSMark Johnson }
6807eea693dSMark Johnson map[i].t_type = SEGMF_MAP_EMPTY;
6817eea693dSMark Johnson }
6827eea693dSMark Johnson
6837eea693dSMark Johnson /* if there's nothing to unmap, just return */
6847eea693dSMark Johnson if (n == 0) {
6857eea693dSMark Johnson return (0);
6867eea693dSMark Johnson }
6877eea693dSMark Johnson
6887eea693dSMark Johnson e = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &mapop, n);
6897eea693dSMark Johnson if (e != 0) {
6907eea693dSMark Johnson return (-1);
6917eea693dSMark Johnson }
6927eea693dSMark Johnson
6937eea693dSMark Johnson return (0);
6947eea693dSMark Johnson }
6957eea693dSMark Johnson
6967eea693dSMark Johnson
6977eea693dSMark Johnson void
segmf_add_gref_pte(struct seg * seg,caddr_t addr,uint64_t pte_ma)6987eea693dSMark Johnson segmf_add_gref_pte(struct seg *seg, caddr_t addr, uint64_t pte_ma)
6997eea693dSMark Johnson {
7007eea693dSMark Johnson struct segmf_data *data;
7017eea693dSMark Johnson uint_t idx;
7027eea693dSMark Johnson
7037eea693dSMark Johnson idx = seg_page(seg, addr);
7047eea693dSMark Johnson data = seg->s_data;
7057eea693dSMark Johnson
7067eea693dSMark Johnson data->map[idx].u.g.g_ptep = pte_ma;
7077eea693dSMark Johnson }
7087eea693dSMark Johnson
7097eea693dSMark Johnson
7107eea693dSMark Johnson static int
segmf_fault_gref_range(struct seg * seg,caddr_t addr,size_t cnt)7117eea693dSMark Johnson segmf_fault_gref_range(struct seg *seg, caddr_t addr, size_t cnt)
7127eea693dSMark Johnson {
7137eea693dSMark Johnson gnttab_map_grant_ref_t mapop[SEGMF_MAX_GREFS];
7147eea693dSMark Johnson struct segmf_data *data;
7157eea693dSMark Johnson segmf_map_t *map;
7167eea693dSMark Johnson uint_t idx;
7177eea693dSMark Johnson int e;
7187eea693dSMark Johnson int i;
7197eea693dSMark Johnson
7207eea693dSMark Johnson
7217eea693dSMark Johnson if (cnt > SEGMF_MAX_GREFS) {
7227eea693dSMark Johnson return (-1);
7237eea693dSMark Johnson }
7247eea693dSMark Johnson
7257eea693dSMark Johnson data = seg->s_data;
7267eea693dSMark Johnson idx = seg_page(seg, addr);
7277eea693dSMark Johnson map = &data->map[idx];
7287eea693dSMark Johnson
7297eea693dSMark Johnson bzero(mapop, sizeof (gnttab_map_grant_ref_t) * cnt);
7307eea693dSMark Johnson
7317eea693dSMark Johnson ASSERT(map->t_type == SEGMF_MAP_GREF);
7327eea693dSMark Johnson
7337eea693dSMark Johnson /*
7347eea693dSMark Johnson * map in each page passed in into the user apps AS. We do this by
7357eea693dSMark Johnson * passing the MA of the actual pte of the mapping to the hypervisor.
7367eea693dSMark Johnson */
7377eea693dSMark Johnson for (i = 0; i < cnt; i++) {
7387eea693dSMark Johnson mapop[i].host_addr = map[i].u.g.g_ptep;
7397eea693dSMark Johnson mapop[i].dom = data->domid;
7407eea693dSMark Johnson mapop[i].ref = map[i].u.g.g_gref;
7417eea693dSMark Johnson mapop[i].flags = GNTMAP_host_map | GNTMAP_application_map |
7427eea693dSMark Johnson GNTMAP_contains_pte;
7437eea693dSMark Johnson if (!(map[i].u.g.g_flags & SEGMF_GFLAGS_WR)) {
7447eea693dSMark Johnson mapop[i].flags |= GNTMAP_readonly;
7457eea693dSMark Johnson }
7467eea693dSMark Johnson }
7477eea693dSMark Johnson e = xen_map_gref(GNTTABOP_map_grant_ref, mapop, cnt, B_TRUE);
7487eea693dSMark Johnson if ((e != 0) || (mapop[0].status != GNTST_okay)) {
7497eea693dSMark Johnson return (FC_MAKE_ERR(EFAULT));
7507eea693dSMark Johnson }
7517eea693dSMark Johnson
7527eea693dSMark Johnson /* save handle for segmf_release_grefs() and mark it as mapped */
7537eea693dSMark Johnson for (i = 0; i < cnt; i++) {
7547eea693dSMark Johnson ASSERT(mapop[i].status == GNTST_okay);
7557eea693dSMark Johnson map[i].u.g.g_handle = mapop[i].handle;
7567eea693dSMark Johnson map[i].u.g.g_flags |= SEGMF_GFLAGS_MAPPED;
7577eea693dSMark Johnson }
7587eea693dSMark Johnson
7597eea693dSMark Johnson return (0);
7607eea693dSMark Johnson }
7617eea693dSMark Johnson
762843e1988Sjohnlev static struct seg_ops segmf_ops = {
763843e1988Sjohnlev segmf_dup,
764843e1988Sjohnlev segmf_unmap,
765843e1988Sjohnlev segmf_free,
766843e1988Sjohnlev segmf_fault,
767843e1988Sjohnlev segmf_faulta,
768843e1988Sjohnlev segmf_setprot,
769843e1988Sjohnlev segmf_checkprot,
770843e1988Sjohnlev (int (*)())segmf_kluster,
771843e1988Sjohnlev (size_t (*)(struct seg *))NULL, /* swapout */
772843e1988Sjohnlev segmf_sync,
773843e1988Sjohnlev segmf_incore,
774843e1988Sjohnlev segmf_lockop,
775843e1988Sjohnlev segmf_getprot,
776843e1988Sjohnlev segmf_getoffset,
777843e1988Sjohnlev segmf_gettype,
778843e1988Sjohnlev segmf_getvp,
779843e1988Sjohnlev segmf_advise,
780843e1988Sjohnlev segmf_dump,
781843e1988Sjohnlev segmf_pagelock,
782843e1988Sjohnlev segmf_setpagesize,
783843e1988Sjohnlev segmf_getmemid,
784843e1988Sjohnlev segmf_getpolicy,
7859d12795fSRobert Mustacchi segmf_capable,
7869d12795fSRobert Mustacchi seg_inherit_notsup
787843e1988Sjohnlev };
788