1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 /* 31 * This file and its contents are supplied under the terms of the 32 * Common Development and Distribution License ("CDDL"), version 1.0. 33 * You may only use this file in accordance with the terms of version 34 * 1.0 of the CDDL. 35 * 36 * A full copy of the text of the CDDL should have accompanied this 37 * source. A copy of the CDDL is also available via the Internet at 38 * http://www.illumos.org/license/CDDL. 39 * 40 * Copyright 2015 Pluribus Networks Inc. 41 * Copyright 2019 Joyent, Inc. 42 */ 43 44 #include <sys/cdefs.h> 45 __FBSDID("$FreeBSD$"); 46 47 #include <sys/param.h> 48 #include <sys/sysctl.h> 49 #include <sys/ioctl.h> 50 #ifdef __FreeBSD__ 51 #include <sys/linker.h> 52 #endif 53 #include <sys/mman.h> 54 #include <sys/module.h> 55 #include <sys/_iovec.h> 56 #include <sys/cpuset.h> 57 58 #include <x86/segments.h> 59 #include <machine/specialreg.h> 60 61 #include <errno.h> 62 #include <stdio.h> 63 #include <stdlib.h> 64 #include <assert.h> 65 #include <string.h> 66 #include <fcntl.h> 67 #include <unistd.h> 68 69 #include <libutil.h> 70 71 #include <machine/vmm.h> 72 #include <machine/vmm_dev.h> 73 #ifndef __FreeBSD__ 74 #include <sys/vmm_impl.h> 75 #endif 76 77 #include "vmmapi.h" 78 79 #define MB (1024 * 1024UL) 80 #define GB (1024 * 1024 * 1024UL) 81 82 #ifndef __FreeBSD__ 83 /* shim to no-op for now */ 84 #define MAP_NOCORE 0 85 #define MAP_ALIGNED_SUPER 0 86 87 /* Rely on PROT_NONE for guard purposes */ 88 #define MAP_GUARD (MAP_PRIVATE | MAP_ANON | MAP_NORESERVE) 89 #endif 90 91 /* 92 * Size of the guard region before and after the virtual address space 93 * mapping the guest physical memory. This must be a multiple of the 94 * superpage size for performance reasons. 95 */ 96 #define VM_MMAP_GUARD_SIZE (4 * MB) 97 98 #define PROT_RW (PROT_READ | PROT_WRITE) 99 #define PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC) 100 101 struct vmctx { 102 int fd; 103 uint32_t lowmem_limit; 104 int memflags; 105 size_t lowmem; 106 size_t highmem; 107 char *baseaddr; 108 char *name; 109 }; 110 111 #ifdef __FreeBSD__ 112 #define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x))) 113 #define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x))) 114 #else 115 #define CREATE(x) vm_do_ctl(VMM_CREATE_VM, (x)) 116 #define DESTROY(x) vm_do_ctl(VMM_DESTROY_VM, (x)) 117 118 static int 119 vm_do_ctl(int cmd, const char *name) 120 { 121 int ctl_fd; 122 123 ctl_fd = open(VMM_CTL_DEV, O_EXCL | O_RDWR); 124 if (ctl_fd < 0) { 125 return (-1); 126 } 127 128 if (ioctl(ctl_fd, cmd, name) == -1) { 129 int err = errno; 130 131 /* Do not lose ioctl errno through the close(2) */ 132 (void) close(ctl_fd); 133 errno = err; 134 return (-1); 135 } 136 (void) close(ctl_fd); 137 138 return (0); 139 } 140 #endif 141 142 static int 143 vm_device_open(const char *name) 144 { 145 int fd, len; 146 char *vmfile; 147 148 len = strlen("/dev/vmm/") + strlen(name) + 1; 149 vmfile = malloc(len); 150 assert(vmfile != NULL); 151 snprintf(vmfile, len, "/dev/vmm/%s", name); 152 153 /* Open the device file */ 154 fd = open(vmfile, O_RDWR, 0); 155 156 free(vmfile); 157 return (fd); 158 } 159 160 int 161 vm_create(const char *name) 162 { 163 #ifdef __FreeBSD__ 164 /* Try to load vmm(4) module before creating a guest. */ 165 if (modfind("vmm") < 0) 166 kldload("vmm"); 167 #endif 168 return (CREATE((char *)name)); 169 } 170 171 struct vmctx * 172 vm_open(const char *name) 173 { 174 struct vmctx *vm; 175 176 vm = malloc(sizeof(struct vmctx) + strlen(name) + 1); 177 assert(vm != NULL); 178 179 vm->fd = -1; 180 vm->memflags = 0; 181 vm->lowmem_limit = 3 * GB; 182 vm->name = (char *)(vm + 1); 183 strcpy(vm->name, name); 184 185 if ((vm->fd = vm_device_open(vm->name)) < 0) 186 goto err; 187 188 return (vm); 189 err: 190 #ifdef __FreeBSD__ 191 vm_destroy(vm); 192 #else 193 /* 194 * As libvmmapi is used by other programs to query and control bhyve 195 * VMs, destroying a VM just because the open failed isn't useful. We 196 * have to free what we have allocated, though. 197 */ 198 free(vm); 199 #endif 200 return (NULL); 201 } 202 203 #ifndef __FreeBSD__ 204 void 205 vm_close(struct vmctx *vm) 206 { 207 assert(vm != NULL); 208 assert(vm->fd >= 0); 209 210 (void) close(vm->fd); 211 212 free(vm); 213 } 214 #endif 215 216 void 217 vm_destroy(struct vmctx *vm) 218 { 219 assert(vm != NULL); 220 221 if (vm->fd >= 0) 222 close(vm->fd); 223 DESTROY(vm->name); 224 225 free(vm); 226 } 227 228 int 229 vm_parse_memsize(const char *optarg, size_t *ret_memsize) 230 { 231 char *endptr; 232 size_t optval; 233 int error; 234 235 optval = strtoul(optarg, &endptr, 0); 236 if (*optarg != '\0' && *endptr == '\0') { 237 /* 238 * For the sake of backward compatibility if the memory size 239 * specified on the command line is less than a megabyte then 240 * it is interpreted as being in units of MB. 241 */ 242 if (optval < MB) 243 optval *= MB; 244 *ret_memsize = optval; 245 error = 0; 246 } else 247 error = expand_number(optarg, ret_memsize); 248 249 return (error); 250 } 251 252 uint32_t 253 vm_get_lowmem_limit(struct vmctx *ctx) 254 { 255 256 return (ctx->lowmem_limit); 257 } 258 259 void 260 vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit) 261 { 262 263 ctx->lowmem_limit = limit; 264 } 265 266 void 267 vm_set_memflags(struct vmctx *ctx, int flags) 268 { 269 270 ctx->memflags = flags; 271 } 272 273 int 274 vm_get_memflags(struct vmctx *ctx) 275 { 276 277 return (ctx->memflags); 278 } 279 280 /* 281 * Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len). 282 */ 283 int 284 vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off, 285 size_t len, int prot) 286 { 287 struct vm_memmap memmap; 288 int error, flags; 289 290 memmap.gpa = gpa; 291 memmap.segid = segid; 292 memmap.segoff = off; 293 memmap.len = len; 294 memmap.prot = prot; 295 memmap.flags = 0; 296 297 if (ctx->memflags & VM_MEM_F_WIRED) 298 memmap.flags |= VM_MEMMAP_F_WIRED; 299 300 /* 301 * If this mapping already exists then don't create it again. This 302 * is the common case for SYSMEM mappings created by bhyveload(8). 303 */ 304 error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags); 305 if (error == 0 && gpa == memmap.gpa) { 306 if (segid != memmap.segid || off != memmap.segoff || 307 prot != memmap.prot || flags != memmap.flags) { 308 errno = EEXIST; 309 return (-1); 310 } else { 311 return (0); 312 } 313 } 314 315 error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap); 316 return (error); 317 } 318 319 int 320 vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid, 321 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 322 { 323 struct vm_memmap memmap; 324 int error; 325 326 bzero(&memmap, sizeof(struct vm_memmap)); 327 memmap.gpa = *gpa; 328 error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap); 329 if (error == 0) { 330 *gpa = memmap.gpa; 331 *segid = memmap.segid; 332 *segoff = memmap.segoff; 333 *len = memmap.len; 334 *prot = memmap.prot; 335 *flags = memmap.flags; 336 } 337 return (error); 338 } 339 340 /* 341 * Return 0 if the segments are identical and non-zero otherwise. 342 * 343 * This is slightly complicated by the fact that only device memory segments 344 * are named. 345 */ 346 static int 347 cmpseg(size_t len, const char *str, size_t len2, const char *str2) 348 { 349 350 if (len == len2) { 351 if ((!str && !str2) || (str && str2 && !strcmp(str, str2))) 352 return (0); 353 } 354 return (-1); 355 } 356 357 static int 358 vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name) 359 { 360 struct vm_memseg memseg; 361 size_t n; 362 int error; 363 364 /* 365 * If the memory segment has already been created then just return. 366 * This is the usual case for the SYSMEM segment created by userspace 367 * loaders like bhyveload(8). 368 */ 369 error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name, 370 sizeof(memseg.name)); 371 if (error) 372 return (error); 373 374 if (memseg.len != 0) { 375 if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) { 376 errno = EINVAL; 377 return (-1); 378 } else { 379 return (0); 380 } 381 } 382 383 bzero(&memseg, sizeof(struct vm_memseg)); 384 memseg.segid = segid; 385 memseg.len = len; 386 if (name != NULL) { 387 n = strlcpy(memseg.name, name, sizeof(memseg.name)); 388 if (n >= sizeof(memseg.name)) { 389 errno = ENAMETOOLONG; 390 return (-1); 391 } 392 } 393 394 error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg); 395 return (error); 396 } 397 398 int 399 vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf, 400 size_t bufsize) 401 { 402 struct vm_memseg memseg; 403 size_t n; 404 int error; 405 406 memseg.segid = segid; 407 error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg); 408 if (error == 0) { 409 *lenp = memseg.len; 410 n = strlcpy(namebuf, memseg.name, bufsize); 411 if (n >= bufsize) { 412 errno = ENAMETOOLONG; 413 error = -1; 414 } 415 } 416 return (error); 417 } 418 419 static int 420 #ifdef __FreeBSD__ 421 setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base) 422 #else 423 setup_memory_segment(struct vmctx *ctx, int segid, vm_paddr_t gpa, size_t len, 424 char *base) 425 #endif 426 { 427 char *ptr; 428 int error, flags; 429 430 /* Map 'len' bytes starting at 'gpa' in the guest address space */ 431 #ifdef __FreeBSD__ 432 error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL); 433 #else 434 /* 435 * As we use two segments for lowmem/highmem the offset within the 436 * segment is 0 on illumos. 437 */ 438 error = vm_mmap_memseg(ctx, gpa, segid, 0, len, PROT_ALL); 439 #endif 440 if (error) 441 return (error); 442 443 flags = MAP_SHARED | MAP_FIXED; 444 if ((ctx->memflags & VM_MEM_F_INCORE) == 0) 445 flags |= MAP_NOCORE; 446 447 /* mmap into the process address space on the host */ 448 ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa); 449 if (ptr == MAP_FAILED) 450 return (-1); 451 452 return (0); 453 } 454 455 int 456 vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) 457 { 458 size_t objsize, len; 459 vm_paddr_t gpa; 460 char *baseaddr, *ptr; 461 int error; 462 463 assert(vms == VM_MMAP_ALL); 464 465 /* 466 * If 'memsize' cannot fit entirely in the 'lowmem' segment then 467 * create another 'highmem' segment above 4GB for the remainder. 468 */ 469 if (memsize > ctx->lowmem_limit) { 470 ctx->lowmem = ctx->lowmem_limit; 471 ctx->highmem = memsize - ctx->lowmem_limit; 472 objsize = 4*GB + ctx->highmem; 473 } else { 474 ctx->lowmem = memsize; 475 ctx->highmem = 0; 476 objsize = ctx->lowmem; 477 } 478 479 #ifdef __FreeBSD__ 480 error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL); 481 if (error) 482 return (error); 483 #endif 484 485 /* 486 * Stake out a contiguous region covering the guest physical memory 487 * and the adjoining guard regions. 488 */ 489 len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE; 490 ptr = mmap(NULL, len, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 0); 491 if (ptr == MAP_FAILED) 492 return (-1); 493 494 baseaddr = ptr + VM_MMAP_GUARD_SIZE; 495 496 #ifdef __FreeBSD__ 497 if (ctx->highmem > 0) { 498 gpa = 4*GB; 499 len = ctx->highmem; 500 error = setup_memory_segment(ctx, gpa, len, baseaddr); 501 if (error) 502 return (error); 503 } 504 505 if (ctx->lowmem > 0) { 506 gpa = 0; 507 len = ctx->lowmem; 508 error = setup_memory_segment(ctx, gpa, len, baseaddr); 509 if (error) 510 return (error); 511 } 512 #else 513 if (ctx->highmem > 0) { 514 error = vm_alloc_memseg(ctx, VM_HIGHMEM, ctx->highmem, NULL); 515 if (error) 516 return (error); 517 gpa = 4*GB; 518 len = ctx->highmem; 519 error = setup_memory_segment(ctx, VM_HIGHMEM, gpa, len, baseaddr); 520 if (error) 521 return (error); 522 } 523 524 if (ctx->lowmem > 0) { 525 error = vm_alloc_memseg(ctx, VM_LOWMEM, ctx->lowmem, NULL); 526 if (error) 527 return (error); 528 gpa = 0; 529 len = ctx->lowmem; 530 error = setup_memory_segment(ctx, VM_LOWMEM, gpa, len, baseaddr); 531 if (error) 532 return (error); 533 } 534 #endif 535 536 ctx->baseaddr = baseaddr; 537 538 return (0); 539 } 540 541 /* 542 * Returns a non-NULL pointer if [gaddr, gaddr+len) is entirely contained in 543 * the lowmem or highmem regions. 544 * 545 * In particular return NULL if [gaddr, gaddr+len) falls in guest MMIO region. 546 * The instruction emulation code depends on this behavior. 547 */ 548 void * 549 vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len) 550 { 551 552 if (ctx->lowmem > 0) { 553 if (gaddr < ctx->lowmem && len <= ctx->lowmem && 554 gaddr + len <= ctx->lowmem) 555 return (ctx->baseaddr + gaddr); 556 } 557 558 if (ctx->highmem > 0) { 559 if (gaddr >= 4*GB) { 560 if (gaddr < 4*GB + ctx->highmem && 561 len <= ctx->highmem && 562 gaddr + len <= 4*GB + ctx->highmem) 563 return (ctx->baseaddr + gaddr); 564 } 565 } 566 567 return (NULL); 568 } 569 570 size_t 571 vm_get_lowmem_size(struct vmctx *ctx) 572 { 573 574 return (ctx->lowmem); 575 } 576 577 size_t 578 vm_get_highmem_size(struct vmctx *ctx) 579 { 580 581 return (ctx->highmem); 582 } 583 584 #ifndef __FreeBSD__ 585 int 586 vm_get_devmem_offset(struct vmctx *ctx, int segid, off_t *mapoff) 587 { 588 struct vm_devmem_offset vdo; 589 int error; 590 591 vdo.segid = segid; 592 error = ioctl(ctx->fd, VM_DEVMEM_GETOFFSET, &vdo); 593 if (error == 0) 594 *mapoff = vdo.offset; 595 596 return (error); 597 } 598 #endif 599 600 void * 601 vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len) 602 { 603 #ifdef __FreeBSD__ 604 char pathname[MAXPATHLEN]; 605 #endif 606 size_t len2; 607 char *base, *ptr; 608 int fd, error, flags; 609 off_t mapoff; 610 611 fd = -1; 612 ptr = MAP_FAILED; 613 if (name == NULL || strlen(name) == 0) { 614 errno = EINVAL; 615 goto done; 616 } 617 618 error = vm_alloc_memseg(ctx, segid, len, name); 619 if (error) 620 goto done; 621 622 #ifdef __FreeBSD__ 623 strlcpy(pathname, "/dev/vmm.io/", sizeof(pathname)); 624 strlcat(pathname, ctx->name, sizeof(pathname)); 625 strlcat(pathname, ".", sizeof(pathname)); 626 strlcat(pathname, name, sizeof(pathname)); 627 628 fd = open(pathname, O_RDWR); 629 if (fd < 0) 630 goto done; 631 #else 632 if (vm_get_devmem_offset(ctx, segid, &mapoff) != 0) 633 goto done; 634 #endif 635 636 /* 637 * Stake out a contiguous region covering the device memory and the 638 * adjoining guard regions. 639 */ 640 len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE; 641 base = mmap(NULL, len2, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 642 0); 643 if (base == MAP_FAILED) 644 goto done; 645 646 flags = MAP_SHARED | MAP_FIXED; 647 if ((ctx->memflags & VM_MEM_F_INCORE) == 0) 648 flags |= MAP_NOCORE; 649 650 #ifdef __FreeBSD__ 651 /* mmap the devmem region in the host address space */ 652 ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0); 653 #else 654 /* mmap the devmem region in the host address space */ 655 ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, ctx->fd, 656 mapoff); 657 #endif 658 done: 659 if (fd >= 0) 660 close(fd); 661 return (ptr); 662 } 663 664 int 665 vm_set_desc(struct vmctx *ctx, int vcpu, int reg, 666 uint64_t base, uint32_t limit, uint32_t access) 667 { 668 int error; 669 struct vm_seg_desc vmsegdesc; 670 671 bzero(&vmsegdesc, sizeof(vmsegdesc)); 672 vmsegdesc.cpuid = vcpu; 673 vmsegdesc.regnum = reg; 674 vmsegdesc.desc.base = base; 675 vmsegdesc.desc.limit = limit; 676 vmsegdesc.desc.access = access; 677 678 error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc); 679 return (error); 680 } 681 682 int 683 vm_get_desc(struct vmctx *ctx, int vcpu, int reg, 684 uint64_t *base, uint32_t *limit, uint32_t *access) 685 { 686 int error; 687 struct vm_seg_desc vmsegdesc; 688 689 bzero(&vmsegdesc, sizeof(vmsegdesc)); 690 vmsegdesc.cpuid = vcpu; 691 vmsegdesc.regnum = reg; 692 693 error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc); 694 if (error == 0) { 695 *base = vmsegdesc.desc.base; 696 *limit = vmsegdesc.desc.limit; 697 *access = vmsegdesc.desc.access; 698 } 699 return (error); 700 } 701 702 int 703 vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *seg_desc) 704 { 705 int error; 706 707 error = vm_get_desc(ctx, vcpu, reg, &seg_desc->base, &seg_desc->limit, 708 &seg_desc->access); 709 return (error); 710 } 711 712 int 713 vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val) 714 { 715 int error; 716 struct vm_register vmreg; 717 718 bzero(&vmreg, sizeof(vmreg)); 719 vmreg.cpuid = vcpu; 720 vmreg.regnum = reg; 721 vmreg.regval = val; 722 723 error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg); 724 return (error); 725 } 726 727 int 728 vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val) 729 { 730 int error; 731 struct vm_register vmreg; 732 733 bzero(&vmreg, sizeof(vmreg)); 734 vmreg.cpuid = vcpu; 735 vmreg.regnum = reg; 736 737 error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg); 738 *ret_val = vmreg.regval; 739 return (error); 740 } 741 742 int 743 vm_set_register_set(struct vmctx *ctx, int vcpu, unsigned int count, 744 const int *regnums, uint64_t *regvals) 745 { 746 int error; 747 struct vm_register_set vmregset; 748 749 bzero(&vmregset, sizeof(vmregset)); 750 vmregset.cpuid = vcpu; 751 vmregset.count = count; 752 vmregset.regnums = regnums; 753 vmregset.regvals = regvals; 754 755 error = ioctl(ctx->fd, VM_SET_REGISTER_SET, &vmregset); 756 return (error); 757 } 758 759 int 760 vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count, 761 const int *regnums, uint64_t *regvals) 762 { 763 int error; 764 struct vm_register_set vmregset; 765 766 bzero(&vmregset, sizeof(vmregset)); 767 vmregset.cpuid = vcpu; 768 vmregset.count = count; 769 vmregset.regnums = regnums; 770 vmregset.regvals = regvals; 771 772 error = ioctl(ctx->fd, VM_GET_REGISTER_SET, &vmregset); 773 return (error); 774 } 775 776 int 777 vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit) 778 { 779 int error; 780 struct vm_run vmrun; 781 782 bzero(&vmrun, sizeof(vmrun)); 783 vmrun.cpuid = vcpu; 784 785 error = ioctl(ctx->fd, VM_RUN, &vmrun); 786 bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); 787 return (error); 788 } 789 790 int 791 vm_suspend(struct vmctx *ctx, enum vm_suspend_how how) 792 { 793 struct vm_suspend vmsuspend; 794 795 bzero(&vmsuspend, sizeof(vmsuspend)); 796 vmsuspend.how = how; 797 return (ioctl(ctx->fd, VM_SUSPEND, &vmsuspend)); 798 } 799 800 int 801 vm_reinit(struct vmctx *ctx) 802 { 803 804 return (ioctl(ctx->fd, VM_REINIT, 0)); 805 } 806 807 int 808 vm_inject_exception(struct vmctx *ctx, int vcpu, int vector, int errcode_valid, 809 uint32_t errcode, int restart_instruction) 810 { 811 struct vm_exception exc; 812 813 exc.cpuid = vcpu; 814 exc.vector = vector; 815 exc.error_code = errcode; 816 exc.error_code_valid = errcode_valid; 817 exc.restart_instruction = restart_instruction; 818 819 return (ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc)); 820 } 821 822 int 823 vm_apicid2vcpu(struct vmctx *ctx, int apicid) 824 { 825 /* 826 * The apic id associated with the 'vcpu' has the same numerical value 827 * as the 'vcpu' itself. 828 */ 829 return (apicid); 830 } 831 832 int 833 vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector) 834 { 835 struct vm_lapic_irq vmirq; 836 837 bzero(&vmirq, sizeof(vmirq)); 838 vmirq.cpuid = vcpu; 839 vmirq.vector = vector; 840 841 return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq)); 842 } 843 844 int 845 vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector) 846 { 847 struct vm_lapic_irq vmirq; 848 849 bzero(&vmirq, sizeof(vmirq)); 850 vmirq.cpuid = vcpu; 851 vmirq.vector = vector; 852 853 return (ioctl(ctx->fd, VM_LAPIC_LOCAL_IRQ, &vmirq)); 854 } 855 856 int 857 vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg) 858 { 859 struct vm_lapic_msi vmmsi; 860 861 bzero(&vmmsi, sizeof(vmmsi)); 862 vmmsi.addr = addr; 863 vmmsi.msg = msg; 864 865 return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi)); 866 } 867 868 int 869 vm_ioapic_assert_irq(struct vmctx *ctx, int irq) 870 { 871 struct vm_ioapic_irq ioapic_irq; 872 873 bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); 874 ioapic_irq.irq = irq; 875 876 return (ioctl(ctx->fd, VM_IOAPIC_ASSERT_IRQ, &ioapic_irq)); 877 } 878 879 int 880 vm_ioapic_deassert_irq(struct vmctx *ctx, int irq) 881 { 882 struct vm_ioapic_irq ioapic_irq; 883 884 bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); 885 ioapic_irq.irq = irq; 886 887 return (ioctl(ctx->fd, VM_IOAPIC_DEASSERT_IRQ, &ioapic_irq)); 888 } 889 890 int 891 vm_ioapic_pulse_irq(struct vmctx *ctx, int irq) 892 { 893 struct vm_ioapic_irq ioapic_irq; 894 895 bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); 896 ioapic_irq.irq = irq; 897 898 return (ioctl(ctx->fd, VM_IOAPIC_PULSE_IRQ, &ioapic_irq)); 899 } 900 901 int 902 vm_ioapic_pincount(struct vmctx *ctx, int *pincount) 903 { 904 905 return (ioctl(ctx->fd, VM_IOAPIC_PINCOUNT, pincount)); 906 } 907 908 int 909 vm_readwrite_kernemu_device(struct vmctx *ctx, int vcpu, vm_paddr_t gpa, 910 bool write, int size, uint64_t *value) 911 { 912 struct vm_readwrite_kernemu_device irp = { 913 .vcpuid = vcpu, 914 .access_width = fls(size) - 1, 915 .gpa = gpa, 916 .value = write ? *value : ~0ul, 917 }; 918 long cmd = (write ? VM_SET_KERNEMU_DEV : VM_GET_KERNEMU_DEV); 919 int rc; 920 921 rc = ioctl(ctx->fd, cmd, &irp); 922 if (rc == 0 && !write) 923 *value = irp.value; 924 return (rc); 925 } 926 927 int 928 vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) 929 { 930 struct vm_isa_irq isa_irq; 931 932 bzero(&isa_irq, sizeof(struct vm_isa_irq)); 933 isa_irq.atpic_irq = atpic_irq; 934 isa_irq.ioapic_irq = ioapic_irq; 935 936 return (ioctl(ctx->fd, VM_ISA_ASSERT_IRQ, &isa_irq)); 937 } 938 939 int 940 vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) 941 { 942 struct vm_isa_irq isa_irq; 943 944 bzero(&isa_irq, sizeof(struct vm_isa_irq)); 945 isa_irq.atpic_irq = atpic_irq; 946 isa_irq.ioapic_irq = ioapic_irq; 947 948 return (ioctl(ctx->fd, VM_ISA_DEASSERT_IRQ, &isa_irq)); 949 } 950 951 int 952 vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) 953 { 954 struct vm_isa_irq isa_irq; 955 956 bzero(&isa_irq, sizeof(struct vm_isa_irq)); 957 isa_irq.atpic_irq = atpic_irq; 958 isa_irq.ioapic_irq = ioapic_irq; 959 960 return (ioctl(ctx->fd, VM_ISA_PULSE_IRQ, &isa_irq)); 961 } 962 963 int 964 vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq, 965 enum vm_intr_trigger trigger) 966 { 967 struct vm_isa_irq_trigger isa_irq_trigger; 968 969 bzero(&isa_irq_trigger, sizeof(struct vm_isa_irq_trigger)); 970 isa_irq_trigger.atpic_irq = atpic_irq; 971 isa_irq_trigger.trigger = trigger; 972 973 return (ioctl(ctx->fd, VM_ISA_SET_IRQ_TRIGGER, &isa_irq_trigger)); 974 } 975 976 int 977 vm_inject_nmi(struct vmctx *ctx, int vcpu) 978 { 979 struct vm_nmi vmnmi; 980 981 bzero(&vmnmi, sizeof(vmnmi)); 982 vmnmi.cpuid = vcpu; 983 984 return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi)); 985 } 986 987 static const char *capstrmap[] = { 988 [VM_CAP_HALT_EXIT] = "hlt_exit", 989 [VM_CAP_MTRAP_EXIT] = "mtrap_exit", 990 [VM_CAP_PAUSE_EXIT] = "pause_exit", 991 [VM_CAP_UNRESTRICTED_GUEST] = "unrestricted_guest", 992 [VM_CAP_ENABLE_INVPCID] = "enable_invpcid", 993 [VM_CAP_BPT_EXIT] = "bpt_exit", 994 }; 995 996 int 997 vm_capability_name2type(const char *capname) 998 { 999 int i; 1000 1001 for (i = 0; i < nitems(capstrmap); i++) { 1002 if (strcmp(capstrmap[i], capname) == 0) 1003 return (i); 1004 } 1005 1006 return (-1); 1007 } 1008 1009 const char * 1010 vm_capability_type2name(int type) 1011 { 1012 if (type >= 0 && type < nitems(capstrmap)) 1013 return (capstrmap[type]); 1014 1015 return (NULL); 1016 } 1017 1018 int 1019 vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, 1020 int *retval) 1021 { 1022 int error; 1023 struct vm_capability vmcap; 1024 1025 bzero(&vmcap, sizeof(vmcap)); 1026 vmcap.cpuid = vcpu; 1027 vmcap.captype = cap; 1028 1029 error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap); 1030 *retval = vmcap.capval; 1031 return (error); 1032 } 1033 1034 int 1035 vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val) 1036 { 1037 struct vm_capability vmcap; 1038 1039 bzero(&vmcap, sizeof(vmcap)); 1040 vmcap.cpuid = vcpu; 1041 vmcap.captype = cap; 1042 vmcap.capval = val; 1043 1044 return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap)); 1045 } 1046 1047 #ifdef __FreeBSD__ 1048 int 1049 vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 1050 { 1051 struct vm_pptdev pptdev; 1052 1053 bzero(&pptdev, sizeof(pptdev)); 1054 pptdev.bus = bus; 1055 pptdev.slot = slot; 1056 pptdev.func = func; 1057 1058 return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); 1059 } 1060 1061 int 1062 vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 1063 { 1064 struct vm_pptdev pptdev; 1065 1066 bzero(&pptdev, sizeof(pptdev)); 1067 pptdev.bus = bus; 1068 pptdev.slot = slot; 1069 pptdev.func = func; 1070 1071 return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); 1072 } 1073 1074 int 1075 vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, 1076 vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 1077 { 1078 struct vm_pptdev_mmio pptmmio; 1079 1080 bzero(&pptmmio, sizeof(pptmmio)); 1081 pptmmio.bus = bus; 1082 pptmmio.slot = slot; 1083 pptmmio.func = func; 1084 pptmmio.gpa = gpa; 1085 pptmmio.len = len; 1086 pptmmio.hpa = hpa; 1087 1088 return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); 1089 } 1090 1091 int 1092 vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, 1093 uint64_t addr, uint64_t msg, int numvec) 1094 { 1095 struct vm_pptdev_msi pptmsi; 1096 1097 bzero(&pptmsi, sizeof(pptmsi)); 1098 pptmsi.vcpu = vcpu; 1099 pptmsi.bus = bus; 1100 pptmsi.slot = slot; 1101 pptmsi.func = func; 1102 pptmsi.msg = msg; 1103 pptmsi.addr = addr; 1104 pptmsi.numvec = numvec; 1105 1106 return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); 1107 } 1108 1109 int 1110 vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, 1111 int idx, uint64_t addr, uint64_t msg, uint32_t vector_control) 1112 { 1113 struct vm_pptdev_msix pptmsix; 1114 1115 bzero(&pptmsix, sizeof(pptmsix)); 1116 pptmsix.vcpu = vcpu; 1117 pptmsix.bus = bus; 1118 pptmsix.slot = slot; 1119 pptmsix.func = func; 1120 pptmsix.idx = idx; 1121 pptmsix.msg = msg; 1122 pptmsix.addr = addr; 1123 pptmsix.vector_control = vector_control; 1124 1125 return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); 1126 } 1127 1128 int 1129 vm_get_pptdev_limits(struct vmctx *ctx, int bus, int slot, int func, 1130 int *msi_limit, int *msix_limit) 1131 { 1132 struct vm_pptdev_limits pptlimits; 1133 int error; 1134 1135 bzero(&pptlimits, sizeof (pptlimits)); 1136 pptlimits.bus = bus; 1137 pptlimits.slot = slot; 1138 pptlimits.func = func; 1139 1140 error = ioctl(ctx->fd, VM_GET_PPTDEV_LIMITS, &pptlimits); 1141 1142 *msi_limit = pptlimits.msi_limit; 1143 *msix_limit = pptlimits.msix_limit; 1144 1145 return (error); 1146 } 1147 #else /* __FreeBSD__ */ 1148 int 1149 vm_assign_pptdev(struct vmctx *ctx, int pptfd) 1150 { 1151 struct vm_pptdev pptdev; 1152 1153 pptdev.pptfd = pptfd; 1154 return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); 1155 } 1156 1157 int 1158 vm_unassign_pptdev(struct vmctx *ctx, int pptfd) 1159 { 1160 struct vm_pptdev pptdev; 1161 1162 pptdev.pptfd = pptfd; 1163 return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); 1164 } 1165 1166 int 1167 vm_map_pptdev_mmio(struct vmctx *ctx, int pptfd, vm_paddr_t gpa, size_t len, 1168 vm_paddr_t hpa) 1169 { 1170 struct vm_pptdev_mmio pptmmio; 1171 1172 pptmmio.pptfd = pptfd; 1173 pptmmio.gpa = gpa; 1174 pptmmio.len = len; 1175 pptmmio.hpa = hpa; 1176 return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); 1177 } 1178 1179 int 1180 vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int pptfd, uint64_t addr, 1181 uint64_t msg, int numvec) 1182 { 1183 struct vm_pptdev_msi pptmsi; 1184 1185 pptmsi.vcpu = vcpu; 1186 pptmsi.pptfd = pptfd; 1187 pptmsi.msg = msg; 1188 pptmsi.addr = addr; 1189 pptmsi.numvec = numvec; 1190 return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); 1191 } 1192 1193 int 1194 vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int pptfd, int idx, 1195 uint64_t addr, uint64_t msg, uint32_t vector_control) 1196 { 1197 struct vm_pptdev_msix pptmsix; 1198 1199 pptmsix.vcpu = vcpu; 1200 pptmsix.pptfd = pptfd; 1201 pptmsix.idx = idx; 1202 pptmsix.msg = msg; 1203 pptmsix.addr = addr; 1204 pptmsix.vector_control = vector_control; 1205 return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); 1206 } 1207 1208 int 1209 vm_get_pptdev_limits(struct vmctx *ctx, int pptfd, int *msi_limit, 1210 int *msix_limit) 1211 { 1212 struct vm_pptdev_limits pptlimits; 1213 int error; 1214 1215 bzero(&pptlimits, sizeof (pptlimits)); 1216 pptlimits.pptfd = pptfd; 1217 error = ioctl(ctx->fd, VM_GET_PPTDEV_LIMITS, &pptlimits); 1218 1219 *msi_limit = pptlimits.msi_limit; 1220 *msix_limit = pptlimits.msix_limit; 1221 return (error); 1222 } 1223 #endif /* __FreeBSD__ */ 1224 1225 uint64_t * 1226 vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, 1227 int *ret_entries) 1228 { 1229 int error; 1230 1231 static struct vm_stats vmstats; 1232 1233 vmstats.cpuid = vcpu; 1234 1235 error = ioctl(ctx->fd, VM_STATS_IOC, &vmstats); 1236 if (error == 0) { 1237 if (ret_entries) 1238 *ret_entries = vmstats.num_entries; 1239 if (ret_tv) 1240 *ret_tv = vmstats.tv; 1241 return (vmstats.statbuf); 1242 } else 1243 return (NULL); 1244 } 1245 1246 const char * 1247 vm_get_stat_desc(struct vmctx *ctx, int index) 1248 { 1249 static struct vm_stat_desc statdesc; 1250 1251 statdesc.index = index; 1252 if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0) 1253 return (statdesc.desc); 1254 else 1255 return (NULL); 1256 } 1257 1258 int 1259 vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state) 1260 { 1261 int error; 1262 struct vm_x2apic x2apic; 1263 1264 bzero(&x2apic, sizeof(x2apic)); 1265 x2apic.cpuid = vcpu; 1266 1267 error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic); 1268 *state = x2apic.state; 1269 return (error); 1270 } 1271 1272 int 1273 vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state) 1274 { 1275 int error; 1276 struct vm_x2apic x2apic; 1277 1278 bzero(&x2apic, sizeof(x2apic)); 1279 x2apic.cpuid = vcpu; 1280 x2apic.state = state; 1281 1282 error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic); 1283 1284 return (error); 1285 } 1286 1287 /* 1288 * From Intel Vol 3a: 1289 * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT 1290 */ 1291 int 1292 vcpu_reset(struct vmctx *vmctx, int vcpu) 1293 { 1294 int error; 1295 uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx; 1296 uint32_t desc_access, desc_limit; 1297 uint16_t sel; 1298 1299 zero = 0; 1300 1301 rflags = 0x2; 1302 error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags); 1303 if (error) 1304 goto done; 1305 1306 rip = 0xfff0; 1307 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0) 1308 goto done; 1309 1310 cr0 = CR0_NE; 1311 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0) 1312 goto done; 1313 1314 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0) 1315 goto done; 1316 1317 cr4 = 0; 1318 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0) 1319 goto done; 1320 1321 /* 1322 * CS: present, r/w, accessed, 16-bit, byte granularity, usable 1323 */ 1324 desc_base = 0xffff0000; 1325 desc_limit = 0xffff; 1326 desc_access = 0x0093; 1327 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS, 1328 desc_base, desc_limit, desc_access); 1329 if (error) 1330 goto done; 1331 1332 sel = 0xf000; 1333 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0) 1334 goto done; 1335 1336 /* 1337 * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity 1338 */ 1339 desc_base = 0; 1340 desc_limit = 0xffff; 1341 desc_access = 0x0093; 1342 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS, 1343 desc_base, desc_limit, desc_access); 1344 if (error) 1345 goto done; 1346 1347 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS, 1348 desc_base, desc_limit, desc_access); 1349 if (error) 1350 goto done; 1351 1352 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES, 1353 desc_base, desc_limit, desc_access); 1354 if (error) 1355 goto done; 1356 1357 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS, 1358 desc_base, desc_limit, desc_access); 1359 if (error) 1360 goto done; 1361 1362 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS, 1363 desc_base, desc_limit, desc_access); 1364 if (error) 1365 goto done; 1366 1367 sel = 0; 1368 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0) 1369 goto done; 1370 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0) 1371 goto done; 1372 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0) 1373 goto done; 1374 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0) 1375 goto done; 1376 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0) 1377 goto done; 1378 1379 /* General purpose registers */ 1380 rdx = 0xf00; 1381 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0) 1382 goto done; 1383 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0) 1384 goto done; 1385 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0) 1386 goto done; 1387 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0) 1388 goto done; 1389 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0) 1390 goto done; 1391 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0) 1392 goto done; 1393 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0) 1394 goto done; 1395 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0) 1396 goto done; 1397 1398 /* GDTR, IDTR */ 1399 desc_base = 0; 1400 desc_limit = 0xffff; 1401 desc_access = 0; 1402 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR, 1403 desc_base, desc_limit, desc_access); 1404 if (error != 0) 1405 goto done; 1406 1407 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR, 1408 desc_base, desc_limit, desc_access); 1409 if (error != 0) 1410 goto done; 1411 1412 /* TR */ 1413 desc_base = 0; 1414 desc_limit = 0xffff; 1415 desc_access = 0x0000008b; 1416 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access); 1417 if (error) 1418 goto done; 1419 1420 sel = 0; 1421 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0) 1422 goto done; 1423 1424 /* LDTR */ 1425 desc_base = 0; 1426 desc_limit = 0xffff; 1427 desc_access = 0x00000082; 1428 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base, 1429 desc_limit, desc_access); 1430 if (error) 1431 goto done; 1432 1433 sel = 0; 1434 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0) 1435 goto done; 1436 1437 /* XXX cr2, debug registers */ 1438 1439 error = 0; 1440 done: 1441 return (error); 1442 } 1443 1444 int 1445 vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num) 1446 { 1447 int error, i; 1448 struct vm_gpa_pte gpapte; 1449 1450 bzero(&gpapte, sizeof(gpapte)); 1451 gpapte.gpa = gpa; 1452 1453 error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte); 1454 1455 if (error == 0) { 1456 *num = gpapte.ptenum; 1457 for (i = 0; i < gpapte.ptenum; i++) 1458 pte[i] = gpapte.pte[i]; 1459 } 1460 1461 return (error); 1462 } 1463 1464 int 1465 vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities) 1466 { 1467 int error; 1468 struct vm_hpet_cap cap; 1469 1470 bzero(&cap, sizeof(struct vm_hpet_cap)); 1471 error = ioctl(ctx->fd, VM_GET_HPET_CAPABILITIES, &cap); 1472 if (capabilities != NULL) 1473 *capabilities = cap.capabilities; 1474 return (error); 1475 } 1476 1477 int 1478 vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, 1479 uint64_t gla, int prot, uint64_t *gpa, int *fault) 1480 { 1481 struct vm_gla2gpa gg; 1482 int error; 1483 1484 bzero(&gg, sizeof(struct vm_gla2gpa)); 1485 gg.vcpuid = vcpu; 1486 gg.prot = prot; 1487 gg.gla = gla; 1488 gg.paging = *paging; 1489 1490 error = ioctl(ctx->fd, VM_GLA2GPA, &gg); 1491 if (error == 0) { 1492 *fault = gg.fault; 1493 *gpa = gg.gpa; 1494 } 1495 return (error); 1496 } 1497 1498 int 1499 vm_gla2gpa_nofault(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, 1500 uint64_t gla, int prot, uint64_t *gpa, int *fault) 1501 { 1502 struct vm_gla2gpa gg; 1503 int error; 1504 1505 bzero(&gg, sizeof(struct vm_gla2gpa)); 1506 gg.vcpuid = vcpu; 1507 gg.prot = prot; 1508 gg.gla = gla; 1509 gg.paging = *paging; 1510 1511 error = ioctl(ctx->fd, VM_GLA2GPA_NOFAULT, &gg); 1512 if (error == 0) { 1513 *fault = gg.fault; 1514 *gpa = gg.gpa; 1515 } 1516 return (error); 1517 } 1518 1519 #ifndef min 1520 #define min(a,b) (((a) < (b)) ? (a) : (b)) 1521 #endif 1522 1523 int 1524 vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, 1525 uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt, 1526 int *fault) 1527 { 1528 void *va; 1529 uint64_t gpa; 1530 int error, i, n, off; 1531 1532 for (i = 0; i < iovcnt; i++) { 1533 iov[i].iov_base = 0; 1534 iov[i].iov_len = 0; 1535 } 1536 1537 while (len) { 1538 assert(iovcnt > 0); 1539 error = vm_gla2gpa(ctx, vcpu, paging, gla, prot, &gpa, fault); 1540 if (error || *fault) 1541 return (error); 1542 1543 off = gpa & PAGE_MASK; 1544 n = min(len, PAGE_SIZE - off); 1545 1546 va = vm_map_gpa(ctx, gpa, n); 1547 if (va == NULL) 1548 return (EFAULT); 1549 1550 iov->iov_base = va; 1551 iov->iov_len = n; 1552 iov++; 1553 iovcnt--; 1554 1555 gla += n; 1556 len -= n; 1557 } 1558 return (0); 1559 } 1560 1561 void 1562 vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov, int iovcnt) 1563 { 1564 1565 return; 1566 } 1567 1568 void 1569 vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *iov, void *vp, size_t len) 1570 { 1571 const char *src; 1572 char *dst; 1573 size_t n; 1574 1575 dst = vp; 1576 while (len) { 1577 assert(iov->iov_len); 1578 n = min(len, iov->iov_len); 1579 src = iov->iov_base; 1580 bcopy(src, dst, n); 1581 1582 iov++; 1583 dst += n; 1584 len -= n; 1585 } 1586 } 1587 1588 void 1589 vm_copyout(struct vmctx *ctx, int vcpu, const void *vp, struct iovec *iov, 1590 size_t len) 1591 { 1592 const char *src; 1593 char *dst; 1594 size_t n; 1595 1596 src = vp; 1597 while (len) { 1598 assert(iov->iov_len); 1599 n = min(len, iov->iov_len); 1600 dst = iov->iov_base; 1601 bcopy(src, dst, n); 1602 1603 iov++; 1604 src += n; 1605 len -= n; 1606 } 1607 } 1608 1609 static int 1610 vm_get_cpus(struct vmctx *ctx, int which, cpuset_t *cpus) 1611 { 1612 struct vm_cpuset vm_cpuset; 1613 int error; 1614 1615 bzero(&vm_cpuset, sizeof(struct vm_cpuset)); 1616 vm_cpuset.which = which; 1617 vm_cpuset.cpusetsize = sizeof(cpuset_t); 1618 vm_cpuset.cpus = cpus; 1619 1620 error = ioctl(ctx->fd, VM_GET_CPUS, &vm_cpuset); 1621 return (error); 1622 } 1623 1624 int 1625 vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus) 1626 { 1627 1628 return (vm_get_cpus(ctx, VM_ACTIVE_CPUS, cpus)); 1629 } 1630 1631 int 1632 vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus) 1633 { 1634 1635 return (vm_get_cpus(ctx, VM_SUSPENDED_CPUS, cpus)); 1636 } 1637 1638 int 1639 vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus) 1640 { 1641 1642 return (vm_get_cpus(ctx, VM_DEBUG_CPUS, cpus)); 1643 } 1644 1645 int 1646 vm_activate_cpu(struct vmctx *ctx, int vcpu) 1647 { 1648 struct vm_activate_cpu ac; 1649 int error; 1650 1651 bzero(&ac, sizeof(struct vm_activate_cpu)); 1652 ac.vcpuid = vcpu; 1653 error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac); 1654 return (error); 1655 } 1656 1657 int 1658 vm_suspend_cpu(struct vmctx *ctx, int vcpu) 1659 { 1660 struct vm_activate_cpu ac; 1661 int error; 1662 1663 bzero(&ac, sizeof(struct vm_activate_cpu)); 1664 ac.vcpuid = vcpu; 1665 error = ioctl(ctx->fd, VM_SUSPEND_CPU, &ac); 1666 return (error); 1667 } 1668 1669 int 1670 vm_resume_cpu(struct vmctx *ctx, int vcpu) 1671 { 1672 struct vm_activate_cpu ac; 1673 int error; 1674 1675 bzero(&ac, sizeof(struct vm_activate_cpu)); 1676 ac.vcpuid = vcpu; 1677 error = ioctl(ctx->fd, VM_RESUME_CPU, &ac); 1678 return (error); 1679 } 1680 1681 int 1682 vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2) 1683 { 1684 struct vm_intinfo vmii; 1685 int error; 1686 1687 bzero(&vmii, sizeof(struct vm_intinfo)); 1688 vmii.vcpuid = vcpu; 1689 error = ioctl(ctx->fd, VM_GET_INTINFO, &vmii); 1690 if (error == 0) { 1691 *info1 = vmii.info1; 1692 *info2 = vmii.info2; 1693 } 1694 return (error); 1695 } 1696 1697 int 1698 vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1) 1699 { 1700 struct vm_intinfo vmii; 1701 int error; 1702 1703 bzero(&vmii, sizeof(struct vm_intinfo)); 1704 vmii.vcpuid = vcpu; 1705 vmii.info1 = info1; 1706 error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii); 1707 return (error); 1708 } 1709 1710 int 1711 vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value) 1712 { 1713 struct vm_rtc_data rtcdata; 1714 int error; 1715 1716 bzero(&rtcdata, sizeof(struct vm_rtc_data)); 1717 rtcdata.offset = offset; 1718 rtcdata.value = value; 1719 error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata); 1720 return (error); 1721 } 1722 1723 int 1724 vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval) 1725 { 1726 struct vm_rtc_data rtcdata; 1727 int error; 1728 1729 bzero(&rtcdata, sizeof(struct vm_rtc_data)); 1730 rtcdata.offset = offset; 1731 error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata); 1732 if (error == 0) 1733 *retval = rtcdata.value; 1734 return (error); 1735 } 1736 1737 int 1738 vm_rtc_settime(struct vmctx *ctx, time_t secs) 1739 { 1740 struct vm_rtc_time rtctime; 1741 int error; 1742 1743 bzero(&rtctime, sizeof(struct vm_rtc_time)); 1744 rtctime.secs = secs; 1745 error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime); 1746 return (error); 1747 } 1748 1749 int 1750 vm_rtc_gettime(struct vmctx *ctx, time_t *secs) 1751 { 1752 struct vm_rtc_time rtctime; 1753 int error; 1754 1755 bzero(&rtctime, sizeof(struct vm_rtc_time)); 1756 error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime); 1757 if (error == 0) 1758 *secs = rtctime.secs; 1759 return (error); 1760 } 1761 1762 int 1763 vm_restart_instruction(void *arg, int vcpu) 1764 { 1765 struct vmctx *ctx = arg; 1766 1767 return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu)); 1768 } 1769 1770 int 1771 vm_set_topology(struct vmctx *ctx, 1772 uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus) 1773 { 1774 struct vm_cpu_topology topology; 1775 1776 bzero(&topology, sizeof (struct vm_cpu_topology)); 1777 topology.sockets = sockets; 1778 topology.cores = cores; 1779 topology.threads = threads; 1780 topology.maxcpus = maxcpus; 1781 return (ioctl(ctx->fd, VM_SET_TOPOLOGY, &topology)); 1782 } 1783 1784 int 1785 vm_get_topology(struct vmctx *ctx, 1786 uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus) 1787 { 1788 struct vm_cpu_topology topology; 1789 int error; 1790 1791 bzero(&topology, sizeof (struct vm_cpu_topology)); 1792 error = ioctl(ctx->fd, VM_GET_TOPOLOGY, &topology); 1793 if (error == 0) { 1794 *sockets = topology.sockets; 1795 *cores = topology.cores; 1796 *threads = topology.threads; 1797 *maxcpus = topology.maxcpus; 1798 } 1799 return (error); 1800 } 1801 1802 int 1803 vm_get_device_fd(struct vmctx *ctx) 1804 { 1805 1806 return (ctx->fd); 1807 } 1808 1809 #ifndef __FreeBSD__ 1810 int 1811 vm_wrlock_cycle(struct vmctx *ctx) 1812 { 1813 if (ioctl(ctx->fd, VM_WRLOCK_CYCLE, 0) != 0) { 1814 return (errno); 1815 } 1816 return (0); 1817 } 1818 #endif /* __FreeBSD__ */ 1819 1820 #ifdef __FreeBSD__ 1821 const cap_ioctl_t * 1822 vm_get_ioctls(size_t *len) 1823 { 1824 cap_ioctl_t *cmds; 1825 /* keep in sync with machine/vmm_dev.h */ 1826 static const cap_ioctl_t vm_ioctl_cmds[] = { VM_RUN, VM_SUSPEND, VM_REINIT, 1827 VM_ALLOC_MEMSEG, VM_GET_MEMSEG, VM_MMAP_MEMSEG, VM_MMAP_MEMSEG, 1828 VM_MMAP_GETNEXT, VM_SET_REGISTER, VM_GET_REGISTER, 1829 VM_SET_SEGMENT_DESCRIPTOR, VM_GET_SEGMENT_DESCRIPTOR, 1830 VM_SET_REGISTER_SET, VM_GET_REGISTER_SET, 1831 VM_SET_KERNEMU_DEV, VM_GET_KERNEMU_DEV, 1832 VM_INJECT_EXCEPTION, VM_LAPIC_IRQ, VM_LAPIC_LOCAL_IRQ, 1833 VM_LAPIC_MSI, VM_IOAPIC_ASSERT_IRQ, VM_IOAPIC_DEASSERT_IRQ, 1834 VM_IOAPIC_PULSE_IRQ, VM_IOAPIC_PINCOUNT, VM_ISA_ASSERT_IRQ, 1835 VM_ISA_DEASSERT_IRQ, VM_ISA_PULSE_IRQ, VM_ISA_SET_IRQ_TRIGGER, 1836 VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV, 1837 VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI, 1838 VM_PPTDEV_MSIX, VM_INJECT_NMI, VM_STATS, VM_STAT_DESC, 1839 VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE, 1840 VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA, 1841 VM_GLA2GPA_NOFAULT, 1842 VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU, 1843 VM_SET_INTINFO, VM_GET_INTINFO, 1844 VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME, 1845 VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY }; 1846 1847 if (len == NULL) { 1848 cmds = malloc(sizeof(vm_ioctl_cmds)); 1849 if (cmds == NULL) 1850 return (NULL); 1851 bcopy(vm_ioctl_cmds, cmds, sizeof(vm_ioctl_cmds)); 1852 return (cmds); 1853 } 1854 1855 *len = nitems(vm_ioctl_cmds); 1856 return (NULL); 1857 } 1858 #endif /* __FreeBSD__ */ 1859