1bf21cd93STycho Nightingale /*-
24c87aefeSPatrick Mooney  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
34c87aefeSPatrick Mooney  *
4bf21cd93STycho Nightingale  * Copyright (c) 2011 NetApp, Inc.
5bf21cd93STycho Nightingale  * All rights reserved.
6bf21cd93STycho Nightingale  *
7bf21cd93STycho Nightingale  * Redistribution and use in source and binary forms, with or without
8bf21cd93STycho Nightingale  * modification, are permitted provided that the following conditions
9bf21cd93STycho Nightingale  * are met:
10bf21cd93STycho Nightingale  * 1. Redistributions of source code must retain the above copyright
11bf21cd93STycho Nightingale  *    notice, this list of conditions and the following disclaimer.
12bf21cd93STycho Nightingale  * 2. Redistributions in binary form must reproduce the above copyright
13bf21cd93STycho Nightingale  *    notice, this list of conditions and the following disclaimer in the
14bf21cd93STycho Nightingale  *    documentation and/or other materials provided with the distribution.
15bf21cd93STycho Nightingale  *
16bf21cd93STycho Nightingale  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17bf21cd93STycho Nightingale  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18bf21cd93STycho Nightingale  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19bf21cd93STycho Nightingale  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20bf21cd93STycho Nightingale  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21bf21cd93STycho Nightingale  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22bf21cd93STycho Nightingale  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23bf21cd93STycho Nightingale  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24bf21cd93STycho Nightingale  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25bf21cd93STycho Nightingale  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26bf21cd93STycho Nightingale  * SUCH DAMAGE.
27bf21cd93STycho Nightingale  *
284c87aefeSPatrick Mooney  * $FreeBSD$
29bf21cd93STycho Nightingale  */
30bf21cd93STycho Nightingale /*
31bf21cd93STycho Nightingale  * This file and its contents are supplied under the terms of the
32bf21cd93STycho Nightingale  * Common Development and Distribution License ("CDDL"), version 1.0.
33bf21cd93STycho Nightingale  * You may only use this file in accordance with the terms of version
34bf21cd93STycho Nightingale  * 1.0 of the CDDL.
35bf21cd93STycho Nightingale  *
36bf21cd93STycho Nightingale  * A full copy of the text of the CDDL should have accompanied this
37bf21cd93STycho Nightingale  * source.  A copy of the CDDL is also available via the Internet at
38bf21cd93STycho Nightingale  * http://www.illumos.org/license/CDDL.
39bf21cd93STycho Nightingale  *
40bf21cd93STycho Nightingale  * Copyright 2015 Pluribus Networks Inc.
414c87aefeSPatrick Mooney  * Copyright 2019 Joyent, Inc.
42b57f5d3eSPatrick Mooney  * Copyright 2021 Oxide Computer Company
43bf21cd93STycho Nightingale  */
44bf21cd93STycho Nightingale 
45bf21cd93STycho Nightingale #include <sys/cdefs.h>
464c87aefeSPatrick Mooney __FBSDID("$FreeBSD$");
47bf21cd93STycho Nightingale 
48bf21cd93STycho Nightingale #include <sys/param.h>
49bf21cd93STycho Nightingale #include <sys/sysctl.h>
50bf21cd93STycho Nightingale #include <sys/ioctl.h>
51154972afSPatrick Mooney #ifdef	__FreeBSD__
52154972afSPatrick Mooney #include <sys/linker.h>
53154972afSPatrick Mooney #endif
54bf21cd93STycho Nightingale #include <sys/mman.h>
55154972afSPatrick Mooney #include <sys/module.h>
56bf21cd93STycho Nightingale #include <sys/_iovec.h>
57bf21cd93STycho Nightingale #include <sys/cpuset.h>
58bf21cd93STycho Nightingale 
594c87aefeSPatrick Mooney #include <x86/segments.h>
60bf21cd93STycho Nightingale #include <machine/specialreg.h>
61bf21cd93STycho Nightingale 
62bf21cd93STycho Nightingale #include <errno.h>
63bf21cd93STycho Nightingale #include <stdio.h>
64bf21cd93STycho Nightingale #include <stdlib.h>
65bf21cd93STycho Nightingale #include <assert.h>
66bf21cd93STycho Nightingale #include <string.h>
67bf21cd93STycho Nightingale #include <fcntl.h>
68bf21cd93STycho Nightingale #include <unistd.h>
69bf21cd93STycho Nightingale 
70bf21cd93STycho Nightingale #include <libutil.h>
71bf21cd93STycho Nightingale 
72bf21cd93STycho Nightingale #include <machine/vmm.h>
73bf21cd93STycho Nightingale #include <machine/vmm_dev.h>
74bf21cd93STycho Nightingale 
75bf21cd93STycho Nightingale #include "vmmapi.h"
76bf21cd93STycho Nightingale 
77bf21cd93STycho Nightingale #define	MB	(1024 * 1024UL)
78bf21cd93STycho Nightingale #define	GB	(1024 * 1024 * 1024UL)
79bf21cd93STycho Nightingale 
804c87aefeSPatrick Mooney #ifndef __FreeBSD__
814c87aefeSPatrick Mooney /* shim to no-op for now */
824c87aefeSPatrick Mooney #define	MAP_NOCORE		0
834c87aefeSPatrick Mooney #define	MAP_ALIGNED_SUPER	0
844c87aefeSPatrick Mooney 
854c87aefeSPatrick Mooney /* Rely on PROT_NONE for guard purposes */
864c87aefeSPatrick Mooney #define	MAP_GUARD		(MAP_PRIVATE | MAP_ANON | MAP_NORESERVE)
87*d7b72f7bSAndy Fiddaman 
88*d7b72f7bSAndy Fiddaman #define	_Thread_local		__thread
894c87aefeSPatrick Mooney #endif
904c87aefeSPatrick Mooney 
914c87aefeSPatrick Mooney /*
924c87aefeSPatrick Mooney  * Size of the guard region before and after the virtual address space
934c87aefeSPatrick Mooney  * mapping the guest physical memory. This must be a multiple of the
944c87aefeSPatrick Mooney  * superpage size for performance reasons.
954c87aefeSPatrick Mooney  */
964c87aefeSPatrick Mooney #define	VM_MMAP_GUARD_SIZE	(4 * MB)
974c87aefeSPatrick Mooney 
984c87aefeSPatrick Mooney #define	PROT_RW		(PROT_READ | PROT_WRITE)
994c87aefeSPatrick Mooney #define	PROT_ALL	(PROT_READ | PROT_WRITE | PROT_EXEC)
1004c87aefeSPatrick Mooney 
101bf21cd93STycho Nightingale struct vmctx {
102bf21cd93STycho Nightingale 	int	fd;
103bf21cd93STycho Nightingale 	uint32_t lowmem_limit;
1044c87aefeSPatrick Mooney 	int	memflags;
105bf21cd93STycho Nightingale 	size_t	lowmem;
106bf21cd93STycho Nightingale 	size_t	highmem;
1074c87aefeSPatrick Mooney 	char	*baseaddr;
108bf21cd93STycho Nightingale 	char	*name;
109bf21cd93STycho Nightingale };
110bf21cd93STycho Nightingale 
111bf21cd93STycho Nightingale #ifdef	__FreeBSD__
112bf21cd93STycho Nightingale #define	CREATE(x)  sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x)))
113bf21cd93STycho Nightingale #define	DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x)))
114bf21cd93STycho Nightingale 
115b57f5d3eSPatrick Mooney int
vm_create(const char * name)116b57f5d3eSPatrick Mooney vm_create(const char *name)
117b57f5d3eSPatrick Mooney {
118b57f5d3eSPatrick Mooney 	/* Try to load vmm(4) module before creating a guest. */
119b57f5d3eSPatrick Mooney 	if (modfind("vmm") < 0)
120b57f5d3eSPatrick Mooney 		kldload("vmm");
121b0de25cbSAndy Fiddaman 	return (CREATE(name));
122b57f5d3eSPatrick Mooney }
123b57f5d3eSPatrick Mooney 
124b57f5d3eSPatrick Mooney void
vm_destroy(struct vmctx * vm)125b57f5d3eSPatrick Mooney vm_destroy(struct vmctx *vm)
126b57f5d3eSPatrick Mooney {
127b57f5d3eSPatrick Mooney 	assert(vm != NULL);
128b57f5d3eSPatrick Mooney 
129b57f5d3eSPatrick Mooney 	if (vm->fd >= 0)
130b57f5d3eSPatrick Mooney 		close(vm->fd);
131b57f5d3eSPatrick Mooney 	DESTROY(vm->name);
132b57f5d3eSPatrick Mooney 
133b57f5d3eSPatrick Mooney 	free(vm);
134b57f5d3eSPatrick Mooney }
135b57f5d3eSPatrick Mooney 
136b57f5d3eSPatrick Mooney #else
137bf21cd93STycho Nightingale static int
vm_do_ctl(int cmd,void * req)138b57f5d3eSPatrick Mooney vm_do_ctl(int cmd, void *req)
139bf21cd93STycho Nightingale {
1404c87aefeSPatrick Mooney 	int ctl_fd;
141bf21cd93STycho Nightingale 
1424c87aefeSPatrick Mooney 	ctl_fd = open(VMM_CTL_DEV, O_EXCL | O_RDWR);
1434c87aefeSPatrick Mooney 	if (ctl_fd < 0) {
1444c87aefeSPatrick Mooney 		return (-1);
1454c87aefeSPatrick Mooney 	}
146bf21cd93STycho Nightingale 
147b57f5d3eSPatrick Mooney 	if (ioctl(ctl_fd, cmd, req) == -1) {
1484c87aefeSPatrick Mooney 		int err = errno;
149bf21cd93STycho Nightingale 
1504c87aefeSPatrick Mooney 		/* Do not lose ioctl errno through the close(2) */
1514c87aefeSPatrick Mooney 		(void) close(ctl_fd);
1524c87aefeSPatrick Mooney 		errno = err;
1534c87aefeSPatrick Mooney 		return (-1);
1544c87aefeSPatrick Mooney 	}
1554c87aefeSPatrick Mooney 	(void) close(ctl_fd);
1564c87aefeSPatrick Mooney 
1574c87aefeSPatrick Mooney 	return (0);
158bf21cd93STycho Nightingale }
159b57f5d3eSPatrick Mooney 
160b57f5d3eSPatrick Mooney int
vm_create(const char * name,uint64_t flags)161b57f5d3eSPatrick Mooney vm_create(const char *name, uint64_t flags)
162b57f5d3eSPatrick Mooney {
163b57f5d3eSPatrick Mooney 	struct vm_create_req req;
164b57f5d3eSPatrick Mooney 
165b57f5d3eSPatrick Mooney 	(void) strncpy(req.name, name, VM_MAX_NAMELEN);
166b57f5d3eSPatrick Mooney 	req.flags = flags;
167b57f5d3eSPatrick Mooney 
168b57f5d3eSPatrick Mooney 	return (vm_do_ctl(VMM_CREATE_VM, &req));
169b57f5d3eSPatrick Mooney }
170b57f5d3eSPatrick Mooney 
171b57f5d3eSPatrick Mooney void
vm_close(struct vmctx * vm)172b57f5d3eSPatrick Mooney vm_close(struct vmctx *vm)
173b57f5d3eSPatrick Mooney {
174b57f5d3eSPatrick Mooney 	assert(vm != NULL);
175b57f5d3eSPatrick Mooney 	assert(vm->fd >= 0);
176b57f5d3eSPatrick Mooney 
177b57f5d3eSPatrick Mooney 	(void) close(vm->fd);
178b57f5d3eSPatrick Mooney 
179b57f5d3eSPatrick Mooney 	free(vm);
180b57f5d3eSPatrick Mooney }
181b57f5d3eSPatrick Mooney 
182b57f5d3eSPatrick Mooney void
vm_destroy(struct vmctx * vm)183b57f5d3eSPatrick Mooney vm_destroy(struct vmctx *vm)
184b57f5d3eSPatrick Mooney {
185b57f5d3eSPatrick Mooney 	struct vm_destroy_req req;
186b57f5d3eSPatrick Mooney 
187b57f5d3eSPatrick Mooney 	assert(vm != NULL);
188b57f5d3eSPatrick Mooney 
189b57f5d3eSPatrick Mooney 	if (vm->fd >= 0) {
190b57f5d3eSPatrick Mooney 		(void) close(vm->fd);
191b57f5d3eSPatrick Mooney 		vm->fd = -1;
192b57f5d3eSPatrick Mooney 	}
193b57f5d3eSPatrick Mooney 
194b57f5d3eSPatrick Mooney 	(void) strncpy(req.name, vm->name, VM_MAX_NAMELEN);
195b57f5d3eSPatrick Mooney 	(void) vm_do_ctl(VMM_DESTROY_VM, &req);
196b57f5d3eSPatrick Mooney 
197b57f5d3eSPatrick Mooney 	free(vm);
198b57f5d3eSPatrick Mooney }
1994c87aefeSPatrick Mooney #endif
200bf21cd93STycho Nightingale 
201bf21cd93STycho Nightingale static int
vm_device_open(const char * name)2024c87aefeSPatrick Mooney vm_device_open(const char *name)
203bf21cd93STycho Nightingale {
2044c87aefeSPatrick Mooney 	int fd, len;
2054c87aefeSPatrick Mooney 	char *vmfile;
206bf21cd93STycho Nightingale 
2074c87aefeSPatrick Mooney 	len = strlen("/dev/vmm/") + strlen(name) + 1;
2084c87aefeSPatrick Mooney 	vmfile = malloc(len);
2094c87aefeSPatrick Mooney 	assert(vmfile != NULL);
2104c87aefeSPatrick Mooney 	snprintf(vmfile, len, "/dev/vmm/%s", name);
211bf21cd93STycho Nightingale 
2124c87aefeSPatrick Mooney 	/* Open the device file */
2134c87aefeSPatrick Mooney 	fd = open(vmfile, O_RDWR, 0);
214bf21cd93STycho Nightingale 
2154c87aefeSPatrick Mooney 	free(vmfile);
2164c87aefeSPatrick Mooney 	return (fd);
217bf21cd93STycho Nightingale }
218bf21cd93STycho Nightingale 
219bf21cd93STycho Nightingale struct vmctx *
vm_open(const char * name)220bf21cd93STycho Nightingale vm_open(const char *name)
221bf21cd93STycho Nightingale {
222bf21cd93STycho Nightingale 	struct vmctx *vm;
223b0de25cbSAndy Fiddaman 	int saved_errno;
224bf21cd93STycho Nightingale 
225bf21cd93STycho Nightingale 	vm = malloc(sizeof(struct vmctx) + strlen(name) + 1);
226bf21cd93STycho Nightingale 	assert(vm != NULL);
227bf21cd93STycho Nightingale 
228bf21cd93STycho Nightingale 	vm->fd = -1;
2294c87aefeSPatrick Mooney 	vm->memflags = 0;
230bf21cd93STycho Nightingale 	vm->lowmem_limit = 3 * GB;
231bf21cd93STycho Nightingale 	vm->name = (char *)(vm + 1);
232bf21cd93STycho Nightingale 	strcpy(vm->name, name);
233bf21cd93STycho Nightingale 
234bf21cd93STycho Nightingale 	if ((vm->fd = vm_device_open(vm->name)) < 0)
235bf21cd93STycho Nightingale 		goto err;
236bf21cd93STycho Nightingale 
237bf21cd93STycho Nightingale 	return (vm);
238bf21cd93STycho Nightingale err:
239b0de25cbSAndy Fiddaman 	saved_errno = errno;
2409c3024a3SHans Rosenfeld 	free(vm);
241b0de25cbSAndy Fiddaman 	errno = saved_errno;
242bf21cd93STycho Nightingale 	return (NULL);
243bf21cd93STycho Nightingale }
244bf21cd93STycho Nightingale 
245bf21cd93STycho Nightingale 
246bf21cd93STycho Nightingale int
vm_parse_memsize(const char * opt,size_t * ret_memsize)247b0de25cbSAndy Fiddaman vm_parse_memsize(const char *opt, size_t *ret_memsize)
248bf21cd93STycho Nightingale {
249bf21cd93STycho Nightingale 	char *endptr;
250bf21cd93STycho Nightingale 	size_t optval;
251bf21cd93STycho Nightingale 	int error;
252bf21cd93STycho Nightingale 
253b0de25cbSAndy Fiddaman 	optval = strtoul(opt, &endptr, 0);
254b0de25cbSAndy Fiddaman 	if (*opt != '\0' && *endptr == '\0') {
255bf21cd93STycho Nightingale 		/*
256bf21cd93STycho Nightingale 		 * For the sake of backward compatibility if the memory size
257bf21cd93STycho Nightingale 		 * specified on the command line is less than a megabyte then
258bf21cd93STycho Nightingale 		 * it is interpreted as being in units of MB.
259bf21cd93STycho Nightingale 		 */
260bf21cd93STycho Nightingale 		if (optval < MB)
261bf21cd93STycho Nightingale 			optval *= MB;
262bf21cd93STycho Nightingale 		*ret_memsize = optval;
263bf21cd93STycho Nightingale 		error = 0;
264bf21cd93STycho Nightingale 	} else
265b0de25cbSAndy Fiddaman 		error = expand_number(opt, ret_memsize);
266bf21cd93STycho Nightingale 
267bf21cd93STycho Nightingale 	return (error);
268bf21cd93STycho Nightingale }
269bf21cd93STycho Nightingale 
2704c87aefeSPatrick Mooney uint32_t
vm_get_lowmem_limit(struct vmctx * ctx)2714c87aefeSPatrick Mooney vm_get_lowmem_limit(struct vmctx *ctx)
272bf21cd93STycho Nightingale {
2734c87aefeSPatrick Mooney 
2744c87aefeSPatrick Mooney 	return (ctx->lowmem_limit);
275bf21cd93STycho Nightingale }
276bf21cd93STycho Nightingale 
2774c87aefeSPatrick Mooney void
vm_set_lowmem_limit(struct vmctx * ctx,uint32_t limit)2784c87aefeSPatrick Mooney vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit)
279bf21cd93STycho Nightingale {
2804c87aefeSPatrick Mooney 
2814c87aefeSPatrick Mooney 	ctx->lowmem_limit = limit;
2824c87aefeSPatrick Mooney }
2834c87aefeSPatrick Mooney 
2844c87aefeSPatrick Mooney void
vm_set_memflags(struct vmctx * ctx,int flags)2854c87aefeSPatrick Mooney vm_set_memflags(struct vmctx *ctx, int flags)
2864c87aefeSPatrick Mooney {
2874c87aefeSPatrick Mooney 
2884c87aefeSPatrick Mooney 	ctx->memflags = flags;
289bf21cd93STycho Nightingale }
290bf21cd93STycho Nightingale 
291bf21cd93STycho Nightingale int
vm_get_memflags(struct vmctx * ctx)2924c87aefeSPatrick Mooney vm_get_memflags(struct vmctx *ctx)
293bf21cd93STycho Nightingale {
2944c87aefeSPatrick Mooney 
2954c87aefeSPatrick Mooney 	return (ctx->memflags);
2964c87aefeSPatrick Mooney }
2974c87aefeSPatrick Mooney 
2984c87aefeSPatrick Mooney /*
2994c87aefeSPatrick Mooney  * Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len).
3004c87aefeSPatrick Mooney  */
3014c87aefeSPatrick Mooney int
vm_mmap_memseg(struct vmctx * ctx,vm_paddr_t gpa,int segid,vm_ooffset_t off,size_t len,int prot)3024c87aefeSPatrick Mooney vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off,
3034c87aefeSPatrick Mooney     size_t len, int prot)
3044c87aefeSPatrick Mooney {
3054c87aefeSPatrick Mooney 	struct vm_memmap memmap;
3064c87aefeSPatrick Mooney 	int error, flags;
3074c87aefeSPatrick Mooney 
3084c87aefeSPatrick Mooney 	memmap.gpa = gpa;
3094c87aefeSPatrick Mooney 	memmap.segid = segid;
3104c87aefeSPatrick Mooney 	memmap.segoff = off;
3114c87aefeSPatrick Mooney 	memmap.len = len;
3124c87aefeSPatrick Mooney 	memmap.prot = prot;
3134c87aefeSPatrick Mooney 	memmap.flags = 0;
3144c87aefeSPatrick Mooney 
3154c87aefeSPatrick Mooney 	if (ctx->memflags & VM_MEM_F_WIRED)
3164c87aefeSPatrick Mooney 		memmap.flags |= VM_MEMMAP_F_WIRED;
3174c87aefeSPatrick Mooney 
3184c87aefeSPatrick Mooney 	/*
3194c87aefeSPatrick Mooney 	 * If this mapping already exists then don't create it again. This
3204c87aefeSPatrick Mooney 	 * is the common case for SYSMEM mappings created by bhyveload(8).
3214c87aefeSPatrick Mooney 	 */
3224c87aefeSPatrick Mooney 	error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags);
3234c87aefeSPatrick Mooney 	if (error == 0 && gpa == memmap.gpa) {
3244c87aefeSPatrick Mooney 		if (segid != memmap.segid || off != memmap.segoff ||
3254c87aefeSPatrick Mooney 		    prot != memmap.prot || flags != memmap.flags) {
3264c87aefeSPatrick Mooney 			errno = EEXIST;
3274c87aefeSPatrick Mooney 			return (-1);
3284c87aefeSPatrick Mooney 		} else {
3294c87aefeSPatrick Mooney 			return (0);
3304c87aefeSPatrick Mooney 		}
3314c87aefeSPatrick Mooney 	}
3324c87aefeSPatrick Mooney 
3334c87aefeSPatrick Mooney 	error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap);
334bf21cd93STycho Nightingale 	return (error);
335bf21cd93STycho Nightingale }
336bf21cd93STycho Nightingale 
3372b948146SAndy Fiddaman int
vm_munmap_memseg(struct vmctx * ctx,vm_paddr_t gpa,size_t len)3382b948146SAndy Fiddaman vm_munmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, size_t len)
3392b948146SAndy Fiddaman {
3402b948146SAndy Fiddaman 	struct vm_munmap munmap;
3412b948146SAndy Fiddaman 	int error;
3422b948146SAndy Fiddaman 
3432b948146SAndy Fiddaman 	munmap.gpa = gpa;
3442b948146SAndy Fiddaman 	munmap.len = len;
3452b948146SAndy Fiddaman 
3462b948146SAndy Fiddaman 	error = ioctl(ctx->fd, VM_MUNMAP_MEMSEG, &munmap);
3472b948146SAndy Fiddaman 	return (error);
3482b948146SAndy Fiddaman }
3492b948146SAndy Fiddaman 
3504c87aefeSPatrick Mooney int
vm_mmap_getnext(struct vmctx * ctx,vm_paddr_t * gpa,int * segid,vm_ooffset_t * segoff,size_t * len,int * prot,int * flags)3514c87aefeSPatrick Mooney vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
3524c87aefeSPatrick Mooney     vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
353bf21cd93STycho Nightingale {
3544c87aefeSPatrick Mooney 	struct vm_memmap memmap;
3554c87aefeSPatrick Mooney 	int error;
356bf21cd93STycho Nightingale 
3574c87aefeSPatrick Mooney 	bzero(&memmap, sizeof(struct vm_memmap));
3584c87aefeSPatrick Mooney 	memmap.gpa = *gpa;
3594c87aefeSPatrick Mooney 	error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap);
3604c87aefeSPatrick Mooney 	if (error == 0) {
3614c87aefeSPatrick Mooney 		*gpa = memmap.gpa;
3624c87aefeSPatrick Mooney 		*segid = memmap.segid;
3634c87aefeSPatrick Mooney 		*segoff = memmap.segoff;
3644c87aefeSPatrick Mooney 		*len = memmap.len;
3654c87aefeSPatrick Mooney 		*prot = memmap.prot;
3664c87aefeSPatrick Mooney 		*flags = memmap.flags;
3674c87aefeSPatrick Mooney 	}
3684c87aefeSPatrick Mooney 	return (error);
369bf21cd93STycho Nightingale }
370bf21cd93STycho Nightingale 
3714c87aefeSPatrick Mooney /*
3724c87aefeSPatrick Mooney  * Return 0 if the segments are identical and non-zero otherwise.
3734c87aefeSPatrick Mooney  *
3744c87aefeSPatrick Mooney  * This is slightly complicated by the fact that only device memory segments
3754c87aefeSPatrick Mooney  * are named.
3764c87aefeSPatrick Mooney  */
3774c87aefeSPatrick Mooney static int
cmpseg(size_t len,const char * str,size_t len2,const char * str2)3784c87aefeSPatrick Mooney cmpseg(size_t len, const char *str, size_t len2, const char *str2)
379bf21cd93STycho Nightingale {
380bf21cd93STycho Nightingale 
3814c87aefeSPatrick Mooney 	if (len == len2) {
3824c87aefeSPatrick Mooney 		if ((!str && !str2) || (str && str2 && !strcmp(str, str2)))
3834c87aefeSPatrick Mooney 			return (0);
3844c87aefeSPatrick Mooney 	}
3854c87aefeSPatrick Mooney 	return (-1);
386bf21cd93STycho Nightingale }
387bf21cd93STycho Nightingale 
388bf21cd93STycho Nightingale static int
vm_alloc_memseg(struct vmctx * ctx,int segid,size_t len,const char * name)3894c87aefeSPatrick Mooney vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name)
390bf21cd93STycho Nightingale {
3914c87aefeSPatrick Mooney 	struct vm_memseg memseg;
3924c87aefeSPatrick Mooney 	size_t n;
393bf21cd93STycho Nightingale 	int error;
394bf21cd93STycho Nightingale 
395bf21cd93STycho Nightingale 	/*
3964c87aefeSPatrick Mooney 	 * If the memory segment has already been created then just return.
3974c87aefeSPatrick Mooney 	 * This is the usual case for the SYSMEM segment created by userspace
3984c87aefeSPatrick Mooney 	 * loaders like bhyveload(8).
399bf21cd93STycho Nightingale 	 */
4004c87aefeSPatrick Mooney 	error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name,
4014c87aefeSPatrick Mooney 	    sizeof(memseg.name));
4024c87aefeSPatrick Mooney 	if (error)
4034c87aefeSPatrick Mooney 		return (error);
4044c87aefeSPatrick Mooney 
4054c87aefeSPatrick Mooney 	if (memseg.len != 0) {
4064c87aefeSPatrick Mooney 		if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) {
4074c87aefeSPatrick Mooney 			errno = EINVAL;
4084c87aefeSPatrick Mooney 			return (-1);
4094c87aefeSPatrick Mooney 		} else {
4104c87aefeSPatrick Mooney 			return (0);
4114c87aefeSPatrick Mooney 		}
4124c87aefeSPatrick Mooney 	}
4134c87aefeSPatrick Mooney 
4144c87aefeSPatrick Mooney 	bzero(&memseg, sizeof(struct vm_memseg));
4154c87aefeSPatrick Mooney 	memseg.segid = segid;
4164c87aefeSPatrick Mooney 	memseg.len = len;
4174c87aefeSPatrick Mooney 	if (name != NULL) {
4184c87aefeSPatrick Mooney 		n = strlcpy(memseg.name, name, sizeof(memseg.name));
4194c87aefeSPatrick Mooney 		if (n >= sizeof(memseg.name)) {
4204c87aefeSPatrick Mooney 			errno = ENAMETOOLONG;
4214c87aefeSPatrick Mooney 			return (-1);
4224c87aefeSPatrick Mooney 		}
4234c87aefeSPatrick Mooney 	}
4244c87aefeSPatrick Mooney 
4254c87aefeSPatrick Mooney 	error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg);
4264c87aefeSPatrick Mooney 	return (error);
4274c87aefeSPatrick Mooney }
4284c87aefeSPatrick Mooney 
4294c87aefeSPatrick Mooney int
vm_get_memseg(struct vmctx * ctx,int segid,size_t * lenp,char * namebuf,size_t bufsize)4304c87aefeSPatrick Mooney vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf,
4314c87aefeSPatrick Mooney     size_t bufsize)
4324c87aefeSPatrick Mooney {
4334c87aefeSPatrick Mooney 	struct vm_memseg memseg;
4344c87aefeSPatrick Mooney 	size_t n;
4354c87aefeSPatrick Mooney 	int error;
4364c87aefeSPatrick Mooney 
4374c87aefeSPatrick Mooney 	memseg.segid = segid;
4384c87aefeSPatrick Mooney 	error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg);
4394c87aefeSPatrick Mooney 	if (error == 0) {
4404c87aefeSPatrick Mooney 		*lenp = memseg.len;
4414c87aefeSPatrick Mooney 		n = strlcpy(namebuf, memseg.name, bufsize);
4424c87aefeSPatrick Mooney 		if (n >= bufsize) {
4434c87aefeSPatrick Mooney 			errno = ENAMETOOLONG;
4444c87aefeSPatrick Mooney 			error = -1;
4454c87aefeSPatrick Mooney 		}
446bf21cd93STycho Nightingale 	}
447bf21cd93STycho Nightingale 	return (error);
448bf21cd93STycho Nightingale }
449bf21cd93STycho Nightingale 
4504c87aefeSPatrick Mooney static int
4514c87aefeSPatrick Mooney #ifdef __FreeBSD__
setup_memory_segment(struct vmctx * ctx,vm_paddr_t gpa,size_t len,char * base)4524c87aefeSPatrick Mooney setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base)
4534c87aefeSPatrick Mooney #else
4544c87aefeSPatrick Mooney setup_memory_segment(struct vmctx *ctx, int segid, vm_paddr_t gpa, size_t len,
4554c87aefeSPatrick Mooney     char *base)
4564c87aefeSPatrick Mooney #endif
4574c87aefeSPatrick Mooney {
4584c87aefeSPatrick Mooney 	char *ptr;
4594c87aefeSPatrick Mooney 	int error, flags;
4604c87aefeSPatrick Mooney 
4614c87aefeSPatrick Mooney 	/* Map 'len' bytes starting at 'gpa' in the guest address space */
4624c87aefeSPatrick Mooney #ifdef __FreeBSD__
4634c87aefeSPatrick Mooney 	error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL);
4644c87aefeSPatrick Mooney #else
4654c87aefeSPatrick Mooney 	/*
4664c87aefeSPatrick Mooney 	 * As we use two segments for lowmem/highmem the offset within the
4674c87aefeSPatrick Mooney 	 * segment is 0 on illumos.
4684c87aefeSPatrick Mooney 	 */
4694c87aefeSPatrick Mooney 	error = vm_mmap_memseg(ctx, gpa, segid, 0, len, PROT_ALL);
4704c87aefeSPatrick Mooney #endif
4714c87aefeSPatrick Mooney 	if (error)
4724c87aefeSPatrick Mooney 		return (error);
4734c87aefeSPatrick Mooney 
4744c87aefeSPatrick Mooney 	flags = MAP_SHARED | MAP_FIXED;
4754c87aefeSPatrick Mooney 	if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
4764c87aefeSPatrick Mooney 		flags |= MAP_NOCORE;
4774c87aefeSPatrick Mooney 
4784c87aefeSPatrick Mooney 	/* mmap into the process address space on the host */
4794c87aefeSPatrick Mooney 	ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa);
4804c87aefeSPatrick Mooney 	if (ptr == MAP_FAILED)
4814c87aefeSPatrick Mooney 		return (-1);
4824c87aefeSPatrick Mooney 
4834c87aefeSPatrick Mooney 	return (0);
4844c87aefeSPatrick Mooney }
4854c87aefeSPatrick Mooney 
486bf21cd93STycho Nightingale int
vm_setup_memory(struct vmctx * ctx,size_t memsize,enum vm_mmap_style vms)487bf21cd93STycho Nightingale vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
488bf21cd93STycho Nightingale {
4894c87aefeSPatrick Mooney 	size_t objsize, len;
4904c87aefeSPatrick Mooney 	vm_paddr_t gpa;
4914c87aefeSPatrick Mooney 	char *baseaddr, *ptr;
492bf21cd93STycho Nightingale 	int error;
493bf21cd93STycho Nightingale 
4944c87aefeSPatrick Mooney 	assert(vms == VM_MMAP_ALL);
495bf21cd93STycho Nightingale 
496bf21cd93STycho Nightingale 	/*
497bf21cd93STycho Nightingale 	 * If 'memsize' cannot fit entirely in the 'lowmem' segment then
498bf21cd93STycho Nightingale 	 * create another 'highmem' segment above 4GB for the remainder.
499bf21cd93STycho Nightingale 	 */
500bf21cd93STycho Nightingale 	if (memsize > ctx->lowmem_limit) {
501bf21cd93STycho Nightingale 		ctx->lowmem = ctx->lowmem_limit;
5024c87aefeSPatrick Mooney 		ctx->highmem = memsize - ctx->lowmem_limit;
5034c87aefeSPatrick Mooney 		objsize = 4*GB + ctx->highmem;
504bf21cd93STycho Nightingale 	} else {
505bf21cd93STycho Nightingale 		ctx->lowmem = memsize;
506bf21cd93STycho Nightingale 		ctx->highmem = 0;
5074c87aefeSPatrick Mooney 		objsize = ctx->lowmem;
508bf21cd93STycho Nightingale 	}
509bf21cd93STycho Nightingale 
5104c87aefeSPatrick Mooney #ifdef __FreeBSD__
5114c87aefeSPatrick Mooney 	error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL);
5124c87aefeSPatrick Mooney 	if (error)
5134c87aefeSPatrick Mooney 		return (error);
5144c87aefeSPatrick Mooney #endif
5154c87aefeSPatrick Mooney 
5164c87aefeSPatrick Mooney 	/*
5174c87aefeSPatrick Mooney 	 * Stake out a contiguous region covering the guest physical memory
5184c87aefeSPatrick Mooney 	 * and the adjoining guard regions.
5194c87aefeSPatrick Mooney 	 */
5204c87aefeSPatrick Mooney 	len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE;
5214c87aefeSPatrick Mooney 	ptr = mmap(NULL, len, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 0);
5224c87aefeSPatrick Mooney 	if (ptr == MAP_FAILED)
5234c87aefeSPatrick Mooney 		return (-1);
5244c87aefeSPatrick Mooney 
5254c87aefeSPatrick Mooney 	baseaddr = ptr + VM_MMAP_GUARD_SIZE;
526bf21cd93STycho Nightingale 
527