diff options
Diffstat (limited to 'lib/libvmmapi/vmmapi.c')
-rw-r--r-- | lib/libvmmapi/vmmapi.c | 945 |
1 files changed, 200 insertions, 745 deletions
diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index e92b3199381f..0cf051567110 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,25 +24,19 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> +#include <sys/capsicum.h> #include <sys/sysctl.h> #include <sys/ioctl.h> -#include <sys/linker.h> #include <sys/mman.h> +#include <sys/linker.h> #include <sys/module.h> #include <sys/_iovec.h> #include <sys/cpuset.h> -#include <x86/segments.h> -#include <machine/specialreg.h> - +#include <capsicum_helpers.h> #include <errno.h> #include <stdbool.h> #include <stdio.h> @@ -57,13 +51,23 @@ __FBSDID("$FreeBSD$"); #include <vm/vm.h> #include <machine/vmm.h> #include <machine/vmm_dev.h> +#ifdef WITH_VMMAPI_SNAPSHOT #include <machine/vmm_snapshot.h> +#endif #include "vmmapi.h" +#include "internal.h" #define MB (1024 * 1024UL) #define GB (1024 * 1024 * 1024UL) +#ifdef __amd64__ +#define VM_LOWMEM_LIMIT (3 * GB) +#else +#define VM_LOWMEM_LIMIT 0 +#endif +#define VM_HIGHMEM_BASE (4 * GB) + /* * Size of the guard region before and after the virtual address space * mapping the guest physical memory. This must be a multiple of the @@ -74,16 +78,6 @@ __FBSDID("$FreeBSD$"); #define PROT_RW (PROT_READ | PROT_WRITE) #define PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC) -struct vmctx { - int fd; - uint32_t lowmem_limit; - int memflags; - size_t lowmem; - size_t highmem; - char *baseaddr; - char *name; -}; - #define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x))) #define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x))) @@ -125,9 +119,9 @@ vm_open(const char *name) vm->fd = -1; vm->memflags = 0; - vm->lowmem_limit = 3 * GB; vm->name = (char *)(vm + 1); strcpy(vm->name, name); + memset(vm->memsegs, 0, sizeof(vm->memsegs)); if ((vm->fd = vm_device_open(vm->name)) < 0) goto err; @@ -141,6 +135,15 @@ err: } void +vm_close(struct vmctx *vm) +{ + assert(vm != NULL); + + close(vm->fd); + free(vm); +} + +void vm_destroy(struct vmctx *vm) { assert(vm != NULL); @@ -152,6 +155,29 @@ vm_destroy(struct vmctx *vm) free(vm); } +struct vcpu * +vm_vcpu_open(struct vmctx *ctx, int vcpuid) +{ + struct vcpu *vcpu; + + vcpu = malloc(sizeof(*vcpu)); + vcpu->ctx = ctx; + vcpu->vcpuid = vcpuid; + return (vcpu); +} + +void +vm_vcpu_close(struct vcpu *vcpu) +{ + free(vcpu); +} + +int +vcpu_id(struct vcpu *vcpu) +{ + return (vcpu->vcpuid); +} + int vm_parse_memsize(const char *opt, size_t *ret_memsize) { @@ -177,17 +203,10 @@ vm_parse_memsize(const char *opt, size_t *ret_memsize) } uint32_t -vm_get_lowmem_limit(struct vmctx *ctx) -{ - - return (ctx->lowmem_limit); -} - -void -vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit) +vm_get_lowmem_limit(struct vmctx *ctx __unused) { - ctx->lowmem_limit = limit; + return (VM_LOWMEM_LIMIT); } void @@ -249,8 +268,8 @@ vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr, { *guest_baseaddr = ctx->baseaddr; - *lowmem_size = ctx->lowmem; - *highmem_size = ctx->highmem; + *lowmem_size = ctx->memsegs[VM_MEMSEG_LOW].size; + *highmem_size = ctx->memsegs[VM_MEMSEG_HIGH].size; return (0); } @@ -354,6 +373,7 @@ vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf, size_t n; int error; + bzero(&memseg, sizeof(memseg)); memseg.segid = segid; error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg); if (error == 0) { @@ -401,17 +421,17 @@ vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) assert(vms == VM_MMAP_ALL); /* - * If 'memsize' cannot fit entirely in the 'lowmem' segment then - * create another 'highmem' segment above 4GB for the remainder. + * If 'memsize' cannot fit entirely in the 'lowmem' segment then create + * another 'highmem' segment above VM_HIGHMEM_BASE for the remainder. */ - if (memsize > ctx->lowmem_limit) { - ctx->lowmem = ctx->lowmem_limit; - ctx->highmem = memsize - ctx->lowmem_limit; - objsize = 4*GB + ctx->highmem; + if (memsize > VM_LOWMEM_LIMIT) { + ctx->memsegs[VM_MEMSEG_LOW].size = VM_LOWMEM_LIMIT; + ctx->memsegs[VM_MEMSEG_HIGH].size = memsize - VM_LOWMEM_LIMIT; + objsize = VM_HIGHMEM_BASE + ctx->memsegs[VM_MEMSEG_HIGH].size; } else { - ctx->lowmem = memsize; - ctx->highmem = 0; - objsize = ctx->lowmem; + ctx->memsegs[VM_MEMSEG_LOW].size = memsize; + ctx->memsegs[VM_MEMSEG_HIGH].size = 0; + objsize = memsize; } error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL); @@ -428,17 +448,17 @@ vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) return (-1); baseaddr = ptr + VM_MMAP_GUARD_SIZE; - if (ctx->highmem > 0) { - gpa = 4*GB; - len = ctx->highmem; + if (ctx->memsegs[VM_MEMSEG_HIGH].size > 0) { + gpa = VM_HIGHMEM_BASE; + len = ctx->memsegs[VM_MEMSEG_HIGH].size; error = setup_memory_segment(ctx, gpa, len, baseaddr); if (error) return (error); } - if (ctx->lowmem > 0) { + if (ctx->memsegs[VM_MEMSEG_LOW].size > 0) { gpa = 0; - len = ctx->lowmem; + len = ctx->memsegs[VM_MEMSEG_LOW].size; error = setup_memory_segment(ctx, gpa, len, baseaddr); if (error) return (error); @@ -459,20 +479,19 @@ vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) void * vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len) { + vm_size_t lowsize, highsize; - if (ctx->lowmem > 0) { - if (gaddr < ctx->lowmem && len <= ctx->lowmem && - gaddr + len <= ctx->lowmem) + lowsize = ctx->memsegs[VM_MEMSEG_LOW].size; + if (lowsize > 0) { + if (gaddr < lowsize && len <= lowsize && gaddr + len <= lowsize) return (ctx->baseaddr + gaddr); } - if (ctx->highmem > 0) { - if (gaddr >= 4*GB) { - if (gaddr < 4*GB + ctx->highmem && - len <= ctx->highmem && - gaddr + len <= 4*GB + ctx->highmem) - return (ctx->baseaddr + gaddr); - } + highsize = ctx->memsegs[VM_MEMSEG_HIGH].size; + if (highsize > 0 && gaddr >= VM_HIGHMEM_BASE) { + if (gaddr < VM_HIGHMEM_BASE + highsize && len <= highsize && + gaddr + len <= VM_HIGHMEM_BASE + highsize) + return (ctx->baseaddr + gaddr); } return (NULL); @@ -482,15 +501,19 @@ vm_paddr_t vm_rev_map_gpa(struct vmctx *ctx, void *addr) { vm_paddr_t offaddr; + vm_size_t lowsize, highsize; offaddr = (char *)addr - ctx->baseaddr; - if (ctx->lowmem > 0) - if (offaddr <= ctx->lowmem) + lowsize = ctx->memsegs[VM_MEMSEG_LOW].size; + if (lowsize > 0) + if (offaddr <= lowsize) return (offaddr); - if (ctx->highmem > 0) - if (offaddr >= 4*GB && offaddr < 4*GB + ctx->highmem) + highsize = ctx->memsegs[VM_MEMSEG_HIGH].size; + if (highsize > 0) + if (offaddr >= VM_HIGHMEM_BASE && + offaddr < VM_HIGHMEM_BASE + highsize) return (offaddr); return ((vm_paddr_t)-1); @@ -507,14 +530,21 @@ size_t vm_get_lowmem_size(struct vmctx *ctx) { - return (ctx->lowmem); + return (ctx->memsegs[VM_MEMSEG_LOW].size); +} + +vm_paddr_t +vm_get_highmem_base(struct vmctx *ctx __unused) +{ + + return (VM_HIGHMEM_BASE); } size_t vm_get_highmem_size(struct vmctx *ctx) { - return (ctx->highmem); + return (ctx->memsegs[VM_MEMSEG_HIGH].size); } void * @@ -568,129 +598,81 @@ done: } int -vm_set_desc(struct vmctx *ctx, int vcpu, int reg, - uint64_t base, uint32_t limit, uint32_t access) -{ - int error; - struct vm_seg_desc vmsegdesc; - - bzero(&vmsegdesc, sizeof(vmsegdesc)); - vmsegdesc.cpuid = vcpu; - vmsegdesc.regnum = reg; - vmsegdesc.desc.base = base; - vmsegdesc.desc.limit = limit; - vmsegdesc.desc.access = access; - - error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc); - return (error); -} - -int -vm_get_desc(struct vmctx *ctx, int vcpu, int reg, - uint64_t *base, uint32_t *limit, uint32_t *access) -{ - int error; - struct vm_seg_desc vmsegdesc; - - bzero(&vmsegdesc, sizeof(vmsegdesc)); - vmsegdesc.cpuid = vcpu; - vmsegdesc.regnum = reg; - - error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc); - if (error == 0) { - *base = vmsegdesc.desc.base; - *limit = vmsegdesc.desc.limit; - *access = vmsegdesc.desc.access; - } - return (error); -} - -int -vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *seg_desc) +vcpu_ioctl(struct vcpu *vcpu, u_long cmd, void *arg) { - int error; - - error = vm_get_desc(ctx, vcpu, reg, &seg_desc->base, &seg_desc->limit, - &seg_desc->access); - return (error); + /* + * XXX: fragile, handle with care + * Assumes that the first field of the ioctl data + * is the vcpuid. + */ + *(int *)arg = vcpu->vcpuid; + return (ioctl(vcpu->ctx->fd, cmd, arg)); } int -vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val) +vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) { int error; struct vm_register vmreg; bzero(&vmreg, sizeof(vmreg)); - vmreg.cpuid = vcpu; vmreg.regnum = reg; vmreg.regval = val; - error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg); + error = vcpu_ioctl(vcpu, VM_SET_REGISTER, &vmreg); return (error); } int -vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val) +vm_get_register(struct vcpu *vcpu, int reg, uint64_t *ret_val) { int error; struct vm_register vmreg; bzero(&vmreg, sizeof(vmreg)); - vmreg.cpuid = vcpu; vmreg.regnum = reg; - error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg); + error = vcpu_ioctl(vcpu, VM_GET_REGISTER, &vmreg); *ret_val = vmreg.regval; return (error); } int -vm_set_register_set(struct vmctx *ctx, int vcpu, unsigned int count, +vm_set_register_set(struct vcpu *vcpu, unsigned int count, const int *regnums, uint64_t *regvals) { int error; struct vm_register_set vmregset; bzero(&vmregset, sizeof(vmregset)); - vmregset.cpuid = vcpu; vmregset.count = count; vmregset.regnums = regnums; vmregset.regvals = regvals; - error = ioctl(ctx->fd, VM_SET_REGISTER_SET, &vmregset); + error = vcpu_ioctl(vcpu, VM_SET_REGISTER_SET, &vmregset); return (error); } int -vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count, +vm_get_register_set(struct vcpu *vcpu, unsigned int count, const int *regnums, uint64_t *regvals) { int error; struct vm_register_set vmregset; bzero(&vmregset, sizeof(vmregset)); - vmregset.cpuid = vcpu; vmregset.count = count; vmregset.regnums = regnums; vmregset.regvals = regvals; - error = ioctl(ctx->fd, VM_GET_REGISTER_SET, &vmregset); + error = vcpu_ioctl(vcpu, VM_GET_REGISTER_SET, &vmregset); return (error); } int -vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit) +vm_run(struct vcpu *vcpu, struct vm_run *vmrun) { - int error; - struct vm_run vmrun; - - bzero(&vmrun, sizeof(vmrun)); - vmrun.cpuid = vcpu; - - error = ioctl(ctx->fd, VM_RUN, &vmrun); - bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); - return (error); + return (vcpu_ioctl(vcpu, VM_RUN, vmrun)); } int @@ -711,201 +693,13 @@ vm_reinit(struct vmctx *ctx) } int -vm_inject_exception(struct vmctx *ctx, int vcpu, int vector, int errcode_valid, - uint32_t errcode, int restart_instruction) -{ - struct vm_exception exc; - - exc.cpuid = vcpu; - exc.vector = vector; - exc.error_code = errcode; - exc.error_code_valid = errcode_valid; - exc.restart_instruction = restart_instruction; - - return (ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc)); -} - -int -vm_apicid2vcpu(struct vmctx *ctx __unused, int apicid) -{ - /* - * The apic id associated with the 'vcpu' has the same numerical value - * as the 'vcpu' itself. - */ - return (apicid); -} - -int -vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector) -{ - struct vm_lapic_irq vmirq; - - bzero(&vmirq, sizeof(vmirq)); - vmirq.cpuid = vcpu; - vmirq.vector = vector; - - return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq)); -} - -int -vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector) -{ - struct vm_lapic_irq vmirq; - - bzero(&vmirq, sizeof(vmirq)); - vmirq.cpuid = vcpu; - vmirq.vector = vector; - - return (ioctl(ctx->fd, VM_LAPIC_LOCAL_IRQ, &vmirq)); -} - -int -vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg) -{ - struct vm_lapic_msi vmmsi; - - bzero(&vmmsi, sizeof(vmmsi)); - vmmsi.addr = addr; - vmmsi.msg = msg; - - return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi)); -} - -int -vm_ioapic_assert_irq(struct vmctx *ctx, int irq) -{ - struct vm_ioapic_irq ioapic_irq; - - bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); - ioapic_irq.irq = irq; - - return (ioctl(ctx->fd, VM_IOAPIC_ASSERT_IRQ, &ioapic_irq)); -} - -int -vm_ioapic_deassert_irq(struct vmctx *ctx, int irq) -{ - struct vm_ioapic_irq ioapic_irq; - - bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); - ioapic_irq.irq = irq; - - return (ioctl(ctx->fd, VM_IOAPIC_DEASSERT_IRQ, &ioapic_irq)); -} - -int -vm_ioapic_pulse_irq(struct vmctx *ctx, int irq) -{ - struct vm_ioapic_irq ioapic_irq; - - bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); - ioapic_irq.irq = irq; - - return (ioctl(ctx->fd, VM_IOAPIC_PULSE_IRQ, &ioapic_irq)); -} - -int -vm_ioapic_pincount(struct vmctx *ctx, int *pincount) -{ - - return (ioctl(ctx->fd, VM_IOAPIC_PINCOUNT, pincount)); -} - -int -vm_readwrite_kernemu_device(struct vmctx *ctx, int vcpu, vm_paddr_t gpa, - bool write, int size, uint64_t *value) -{ - struct vm_readwrite_kernemu_device irp = { - .vcpuid = vcpu, - .access_width = fls(size) - 1, - .gpa = gpa, - .value = write ? *value : ~0ul, - }; - long cmd = (write ? VM_SET_KERNEMU_DEV : VM_GET_KERNEMU_DEV); - int rc; - - rc = ioctl(ctx->fd, cmd, &irp); - if (rc == 0 && !write) - *value = irp.value; - return (rc); -} - -int -vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) -{ - struct vm_isa_irq isa_irq; - - bzero(&isa_irq, sizeof(struct vm_isa_irq)); - isa_irq.atpic_irq = atpic_irq; - isa_irq.ioapic_irq = ioapic_irq; - - return (ioctl(ctx->fd, VM_ISA_ASSERT_IRQ, &isa_irq)); -} - -int -vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) -{ - struct vm_isa_irq isa_irq; - - bzero(&isa_irq, sizeof(struct vm_isa_irq)); - isa_irq.atpic_irq = atpic_irq; - isa_irq.ioapic_irq = ioapic_irq; - - return (ioctl(ctx->fd, VM_ISA_DEASSERT_IRQ, &isa_irq)); -} - -int -vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) -{ - struct vm_isa_irq isa_irq; - - bzero(&isa_irq, sizeof(struct vm_isa_irq)); - isa_irq.atpic_irq = atpic_irq; - isa_irq.ioapic_irq = ioapic_irq; - - return (ioctl(ctx->fd, VM_ISA_PULSE_IRQ, &isa_irq)); -} - -int -vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq, - enum vm_intr_trigger trigger) -{ - struct vm_isa_irq_trigger isa_irq_trigger; - - bzero(&isa_irq_trigger, sizeof(struct vm_isa_irq_trigger)); - isa_irq_trigger.atpic_irq = atpic_irq; - isa_irq_trigger.trigger = trigger; - - return (ioctl(ctx->fd, VM_ISA_SET_IRQ_TRIGGER, &isa_irq_trigger)); -} - -int -vm_inject_nmi(struct vmctx *ctx, int vcpu) -{ - struct vm_nmi vmnmi; - - bzero(&vmnmi, sizeof(vmnmi)); - vmnmi.cpuid = vcpu; - - return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi)); -} - -static const char *capstrmap[] = { - [VM_CAP_HALT_EXIT] = "hlt_exit", - [VM_CAP_MTRAP_EXIT] = "mtrap_exit", - [VM_CAP_PAUSE_EXIT] = "pause_exit", - [VM_CAP_UNRESTRICTED_GUEST] = "unrestricted_guest", - [VM_CAP_ENABLE_INVPCID] = "enable_invpcid", - [VM_CAP_BPT_EXIT] = "bpt_exit", -}; - -int vm_capability_name2type(const char *capname) { int i; - for (i = 0; i < (int)nitems(capstrmap); i++) { - if (strcmp(capstrmap[i], capname) == 0) + for (i = 0; i < VM_CAP_MAX; i++) { + if (vm_capstrmap[i] != NULL && + strcmp(vm_capstrmap[i], capname) == 0) return (i); } @@ -915,152 +709,40 @@ vm_capability_name2type(const char *capname) const char * vm_capability_type2name(int type) { - if (type >= 0 && type < (int)nitems(capstrmap)) - return (capstrmap[type]); + if (type >= 0 && type < VM_CAP_MAX) + return (vm_capstrmap[type]); return (NULL); } int -vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, - int *retval) +vm_get_capability(struct vcpu *vcpu, enum vm_cap_type cap, int *retval) { int error; struct vm_capability vmcap; bzero(&vmcap, sizeof(vmcap)); - vmcap.cpuid = vcpu; vmcap.captype = cap; - error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap); + error = vcpu_ioctl(vcpu, VM_GET_CAPABILITY, &vmcap); *retval = vmcap.capval; return (error); } int -vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val) +vm_set_capability(struct vcpu *vcpu, enum vm_cap_type cap, int val) { struct vm_capability vmcap; bzero(&vmcap, sizeof(vmcap)); - vmcap.cpuid = vcpu; vmcap.captype = cap; vmcap.capval = val; - return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap)); -} - -int -vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func) -{ - struct vm_pptdev pptdev; - - bzero(&pptdev, sizeof(pptdev)); - pptdev.bus = bus; - pptdev.slot = slot; - pptdev.func = func; - - return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); -} - -int -vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func) -{ - struct vm_pptdev pptdev; - - bzero(&pptdev, sizeof(pptdev)); - pptdev.bus = bus; - pptdev.slot = slot; - pptdev.func = func; - - return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); -} - -int -vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, - vm_paddr_t gpa, size_t len, vm_paddr_t hpa) -{ - struct vm_pptdev_mmio pptmmio; - - bzero(&pptmmio, sizeof(pptmmio)); - pptmmio.bus = bus; - pptmmio.slot = slot; - pptmmio.func = func; - pptmmio.gpa = gpa; - pptmmio.len = len; - pptmmio.hpa = hpa; - - return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); -} - -int -vm_unmap_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, - vm_paddr_t gpa, size_t len) -{ - struct vm_pptdev_mmio pptmmio; - - bzero(&pptmmio, sizeof(pptmmio)); - pptmmio.bus = bus; - pptmmio.slot = slot; - pptmmio.func = func; - pptmmio.gpa = gpa; - pptmmio.len = len; - - return (ioctl(ctx->fd, VM_UNMAP_PPTDEV_MMIO, &pptmmio)); -} - -int -vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, - uint64_t addr, uint64_t msg, int numvec) -{ - struct vm_pptdev_msi pptmsi; - - bzero(&pptmsi, sizeof(pptmsi)); - pptmsi.vcpu = vcpu; - pptmsi.bus = bus; - pptmsi.slot = slot; - pptmsi.func = func; - pptmsi.msg = msg; - pptmsi.addr = addr; - pptmsi.numvec = numvec; - - return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); -} - -int -vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, - int idx, uint64_t addr, uint64_t msg, uint32_t vector_control) -{ - struct vm_pptdev_msix pptmsix; - - bzero(&pptmsix, sizeof(pptmsix)); - pptmsix.vcpu = vcpu; - pptmsix.bus = bus; - pptmsix.slot = slot; - pptmsix.func = func; - pptmsix.idx = idx; - pptmsix.msg = msg; - pptmsix.addr = addr; - pptmsix.vector_control = vector_control; - - return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); -} - -int -vm_disable_pptdev_msix(struct vmctx *ctx, int bus, int slot, int func) -{ - struct vm_pptdev ppt; - - bzero(&ppt, sizeof(ppt)); - ppt.bus = bus; - ppt.slot = slot; - ppt.func = func; - - return ioctl(ctx->fd, VM_PPTDEV_DISABLE_MSIX, &ppt); + return (vcpu_ioctl(vcpu, VM_SET_CAPABILITY, &vmcap)); } uint64_t * -vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, +vm_get_stats(struct vcpu *vcpu, struct timeval *ret_tv, int *ret_entries) { static _Thread_local uint64_t *stats_buf; @@ -1071,11 +753,10 @@ vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, bool have_stats; have_stats = false; - vmstats.cpuid = vcpu; count = 0; for (index = 0;; index += nitems(vmstats.statbuf)) { vmstats.index = index; - if (ioctl(ctx->fd, VM_STATS, &vmstats) != 0) + if (vcpu_ioctl(vcpu, VM_STATS, &vmstats) != 0) break; if (stats_count < index + vmstats.num_entries) { new_stats = realloc(stats_buf, @@ -1117,192 +798,7 @@ vm_get_stat_desc(struct vmctx *ctx, int index) return (NULL); } -int -vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state) -{ - int error; - struct vm_x2apic x2apic; - - bzero(&x2apic, sizeof(x2apic)); - x2apic.cpuid = vcpu; - - error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic); - *state = x2apic.state; - return (error); -} - -int -vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state) -{ - int error; - struct vm_x2apic x2apic; - - bzero(&x2apic, sizeof(x2apic)); - x2apic.cpuid = vcpu; - x2apic.state = state; - - error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic); - - return (error); -} - -/* - * From Intel Vol 3a: - * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT - */ -int -vcpu_reset(struct vmctx *vmctx, int vcpu) -{ - int error; - uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx; - uint32_t desc_access, desc_limit; - uint16_t sel; - - zero = 0; - - rflags = 0x2; - error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags); - if (error) - goto done; - - rip = 0xfff0; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0) - goto done; - - cr0 = CR0_NE; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0) - goto done; - - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0) - goto done; - - cr4 = 0; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0) - goto done; - - /* - * CS: present, r/w, accessed, 16-bit, byte granularity, usable - */ - desc_base = 0xffff0000; - desc_limit = 0xffff; - desc_access = 0x0093; - error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS, - desc_base, desc_limit, desc_access); - if (error) - goto done; - - sel = 0xf000; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0) - goto done; - - /* - * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity - */ - desc_base = 0; - desc_limit = 0xffff; - desc_access = 0x0093; - error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS, - desc_base, desc_limit, desc_access); - if (error) - goto done; - - error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS, - desc_base, desc_limit, desc_access); - if (error) - goto done; - - error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES, - desc_base, desc_limit, desc_access); - if (error) - goto done; - - error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS, - desc_base, desc_limit, desc_access); - if (error) - goto done; - - error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS, - desc_base, desc_limit, desc_access); - if (error) - goto done; - - sel = 0; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0) - goto done; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0) - goto done; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0) - goto done; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0) - goto done; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0) - goto done; - - /* General purpose registers */ - rdx = 0xf00; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0) - goto done; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0) - goto done; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0) - goto done; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0) - goto done; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0) - goto done; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0) - goto done; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0) - goto done; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0) - goto done; - - /* GDTR, IDTR */ - desc_base = 0; - desc_limit = 0xffff; - desc_access = 0; - error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR, - desc_base, desc_limit, desc_access); - if (error != 0) - goto done; - - error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR, - desc_base, desc_limit, desc_access); - if (error != 0) - goto done; - - /* TR */ - desc_base = 0; - desc_limit = 0xffff; - desc_access = 0x0000008b; - error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access); - if (error) - goto done; - - sel = 0; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0) - goto done; - - /* LDTR */ - desc_base = 0; - desc_limit = 0xffff; - desc_access = 0x00000082; - error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base, - desc_limit, desc_access); - if (error) - goto done; - - sel = 0; - if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0) - goto done; - - /* XXX cr2, debug registers */ - - error = 0; -done: - return (error); -} - +#ifdef __amd64__ int vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num) { @@ -1324,53 +820,39 @@ vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num) } int -vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities) -{ - int error; - struct vm_hpet_cap cap; - - bzero(&cap, sizeof(struct vm_hpet_cap)); - error = ioctl(ctx->fd, VM_GET_HPET_CAPABILITIES, &cap); - if (capabilities != NULL) - *capabilities = cap.capabilities; - return (error); -} - -int -vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, +vm_gla2gpa(struct vcpu *vcpu, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *fault) { struct vm_gla2gpa gg; int error; bzero(&gg, sizeof(struct vm_gla2gpa)); - gg.vcpuid = vcpu; gg.prot = prot; gg.gla = gla; gg.paging = *paging; - error = ioctl(ctx->fd, VM_GLA2GPA, &gg); + error = vcpu_ioctl(vcpu, VM_GLA2GPA, &gg); if (error == 0) { *fault = gg.fault; *gpa = gg.gpa; } return (error); } +#endif int -vm_gla2gpa_nofault(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, +vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *fault) { struct vm_gla2gpa gg; int error; bzero(&gg, sizeof(struct vm_gla2gpa)); - gg.vcpuid = vcpu; gg.prot = prot; gg.gla = gla; gg.paging = *paging; - error = ioctl(ctx->fd, VM_GLA2GPA_NOFAULT, &gg); + error = vcpu_ioctl(vcpu, VM_GLA2GPA_NOFAULT, &gg); if (error == 0) { *fault = gg.fault; *gpa = gg.gpa; @@ -1382,8 +864,9 @@ vm_gla2gpa_nofault(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, #define min(a,b) (((a) < (b)) ? (a) : (b)) #endif +#ifdef __amd64__ int -vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, +vm_copy_setup(struct vcpu *vcpu, struct vm_guest_paging *paging, uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt, int *fault) { @@ -1398,14 +881,14 @@ vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, while (len) { assert(iovcnt > 0); - error = vm_gla2gpa(ctx, vcpu, paging, gla, prot, &gpa, fault); + error = vm_gla2gpa(vcpu, paging, gla, prot, &gpa, fault); if (error || *fault) return (error); off = gpa & PAGE_MASK; n = MIN(len, PAGE_SIZE - off); - va = vm_map_gpa(ctx, gpa, n); + va = vm_map_gpa(vcpu->ctx, gpa, n); if (va == NULL) return (EFAULT); @@ -1419,16 +902,20 @@ vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, } return (0); } +#endif void -vm_copy_teardown(struct vmctx *ctx __unused, int vcpu __unused, - struct iovec *iov __unused, int iovcnt __unused) +vm_copy_teardown(struct iovec *iov __unused, int iovcnt __unused) { + /* + * Intentionally empty. This is used by the instruction + * emulation code shared with the kernel. The in-kernel + * version of this is non-empty. + */ } void -vm_copyin(struct vmctx *ctx __unused, int vcpu __unused, struct iovec *iov, - void *vp, size_t len) +vm_copyin(struct iovec *iov, void *vp, size_t len) { const char *src; char *dst; @@ -1448,8 +935,7 @@ vm_copyin(struct vmctx *ctx __unused, int vcpu __unused, struct iovec *iov, } void -vm_copyout(struct vmctx *ctx __unused, int vcpu __unused, const void *vp, - struct iovec *iov, size_t len) +vm_copyout(const void *vp, struct iovec *iov, size_t len) { const char *src; char *dst; @@ -1505,135 +991,105 @@ vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus) } int -vm_activate_cpu(struct vmctx *ctx, int vcpu) +vm_activate_cpu(struct vcpu *vcpu) { struct vm_activate_cpu ac; int error; bzero(&ac, sizeof(struct vm_activate_cpu)); - ac.vcpuid = vcpu; - error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac); + error = vcpu_ioctl(vcpu, VM_ACTIVATE_CPU, &ac); return (error); } int -vm_suspend_cpu(struct vmctx *ctx, int vcpu) +vm_suspend_all_cpus(struct vmctx *ctx) { struct vm_activate_cpu ac; int error; bzero(&ac, sizeof(struct vm_activate_cpu)); - ac.vcpuid = vcpu; + ac.vcpuid = -1; error = ioctl(ctx->fd, VM_SUSPEND_CPU, &ac); return (error); } int -vm_resume_cpu(struct vmctx *ctx, int vcpu) +vm_suspend_cpu(struct vcpu *vcpu) { struct vm_activate_cpu ac; int error; bzero(&ac, sizeof(struct vm_activate_cpu)); - ac.vcpuid = vcpu; - error = ioctl(ctx->fd, VM_RESUME_CPU, &ac); + error = vcpu_ioctl(vcpu, VM_SUSPEND_CPU, &ac); return (error); } int -vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2) +vm_resume_cpu(struct vcpu *vcpu) { - struct vm_intinfo vmii; - int error; - - bzero(&vmii, sizeof(struct vm_intinfo)); - vmii.vcpuid = vcpu; - error = ioctl(ctx->fd, VM_GET_INTINFO, &vmii); - if (error == 0) { - *info1 = vmii.info1; - *info2 = vmii.info2; - } - return (error); -} - -int -vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1) -{ - struct vm_intinfo vmii; - int error; - - bzero(&vmii, sizeof(struct vm_intinfo)); - vmii.vcpuid = vcpu; - vmii.info1 = info1; - error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii); - return (error); -} - -int -vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value) -{ - struct vm_rtc_data rtcdata; + struct vm_activate_cpu ac; int error; - bzero(&rtcdata, sizeof(struct vm_rtc_data)); - rtcdata.offset = offset; - rtcdata.value = value; - error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata); + bzero(&ac, sizeof(struct vm_activate_cpu)); + error = vcpu_ioctl(vcpu, VM_RESUME_CPU, &ac); return (error); } int -vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval) +vm_resume_all_cpus(struct vmctx *ctx) { - struct vm_rtc_data rtcdata; + struct vm_activate_cpu ac; int error; - bzero(&rtcdata, sizeof(struct vm_rtc_data)); - rtcdata.offset = offset; - error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata); - if (error == 0) - *retval = rtcdata.value; + bzero(&ac, sizeof(struct vm_activate_cpu)); + ac.vcpuid = -1; + error = ioctl(ctx->fd, VM_RESUME_CPU, &ac); return (error); } +#ifdef __amd64__ int -vm_rtc_settime(struct vmctx *ctx, time_t secs) +vm_get_intinfo(struct vcpu *vcpu, uint64_t *info1, uint64_t *info2) { - struct vm_rtc_time rtctime; + struct vm_intinfo vmii; int error; - bzero(&rtctime, sizeof(struct vm_rtc_time)); - rtctime.secs = secs; - error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime); + bzero(&vmii, sizeof(struct vm_intinfo)); + error = vcpu_ioctl(vcpu, VM_GET_INTINFO, &vmii); + if (error == 0) { + *info1 = vmii.info1; + *info2 = vmii.info2; + } return (error); } int -vm_rtc_gettime(struct vmctx *ctx, time_t *secs) +vm_set_intinfo(struct vcpu *vcpu, uint64_t info1) { - struct vm_rtc_time rtctime; + struct vm_intinfo vmii; int error; - bzero(&rtctime, sizeof(struct vm_rtc_time)); - error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime); - if (error == 0) - *secs = rtctime.secs; + bzero(&vmii, sizeof(struct vm_intinfo)); + vmii.info1 = info1; + error = vcpu_ioctl(vcpu, VM_SET_INTINFO, &vmii); return (error); } +#endif +#ifdef WITH_VMMAPI_SNAPSHOT int -vm_restart_instruction(void *arg, int vcpu) +vm_restart_instruction(struct vcpu *vcpu) { - struct vmctx *ctx = arg; + int arg; - return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu)); + return (vcpu_ioctl(vcpu, VM_RESTART_INSTRUCTION, &arg)); } int -vm_snapshot_req(struct vm_snapshot_meta *meta) +vm_snapshot_req(struct vmctx *ctx, struct vm_snapshot_meta *meta) { - if (ioctl(meta->ctx->fd, VM_SNAPSHOT_REQ, meta) == -1) { + if (ioctl(ctx->fd, VM_SNAPSHOT_REQ, meta) == -1) { #ifdef SNAPSHOT_DEBUG fprintf(stderr, "%s: snapshot failed for %s: %d\r\n", __func__, meta->dev_name, errno); @@ -1651,6 +1107,7 @@ vm_restore_time(struct vmctx *ctx) dummy = 0; return (ioctl(ctx->fd, VM_RESTORE_TIME, &dummy)); } +#endif int vm_set_topology(struct vmctx *ctx, @@ -1685,47 +1142,45 @@ vm_get_topology(struct vmctx *ctx, } int +vm_limit_rights(struct vmctx *ctx) +{ + cap_rights_t rights; + + cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW); + if (caph_rights_limit(ctx->fd, &rights) != 0) + return (-1); + if (caph_ioctls_limit(ctx->fd, vm_ioctl_cmds, vm_ioctl_ncmds) != 0) + return (-1); + return (0); +} + +/* + * Avoid using in new code. Operations on the fd should be wrapped here so that + * capability rights can be kept in sync. + */ +int vm_get_device_fd(struct vmctx *ctx) { return (ctx->fd); } +/* Legacy interface, do not use. */ const cap_ioctl_t * vm_get_ioctls(size_t *len) { cap_ioctl_t *cmds; - /* keep in sync with machine/vmm_dev.h */ - static const cap_ioctl_t vm_ioctl_cmds[] = { VM_RUN, VM_SUSPEND, VM_REINIT, - VM_ALLOC_MEMSEG, VM_GET_MEMSEG, VM_MMAP_MEMSEG, VM_MMAP_MEMSEG, - VM_MMAP_GETNEXT, VM_MUNMAP_MEMSEG, VM_SET_REGISTER, VM_GET_REGISTER, - VM_SET_SEGMENT_DESCRIPTOR, VM_GET_SEGMENT_DESCRIPTOR, - VM_SET_REGISTER_SET, VM_GET_REGISTER_SET, - VM_SET_KERNEMU_DEV, VM_GET_KERNEMU_DEV, - VM_INJECT_EXCEPTION, VM_LAPIC_IRQ, VM_LAPIC_LOCAL_IRQ, - VM_LAPIC_MSI, VM_IOAPIC_ASSERT_IRQ, VM_IOAPIC_DEASSERT_IRQ, - VM_IOAPIC_PULSE_IRQ, VM_IOAPIC_PINCOUNT, VM_ISA_ASSERT_IRQ, - VM_ISA_DEASSERT_IRQ, VM_ISA_PULSE_IRQ, VM_ISA_SET_IRQ_TRIGGER, - VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV, - VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI, - VM_PPTDEV_MSIX, VM_UNMAP_PPTDEV_MMIO, VM_PPTDEV_DISABLE_MSIX, - VM_INJECT_NMI, VM_STATS, VM_STAT_DESC, - VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE, - VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA, - VM_GLA2GPA_NOFAULT, - VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU, - VM_SET_INTINFO, VM_GET_INTINFO, - VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME, - VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY }; + size_t sz; if (len == NULL) { - cmds = malloc(sizeof(vm_ioctl_cmds)); + sz = vm_ioctl_ncmds * sizeof(vm_ioctl_cmds[0]); + cmds = malloc(sz); if (cmds == NULL) return (NULL); - bcopy(vm_ioctl_cmds, cmds, sizeof(vm_ioctl_cmds)); + bcopy(vm_ioctl_cmds, cmds, sz); return (cmds); } - *len = nitems(vm_ioctl_cmds); + *len = vm_ioctl_ncmds; return (NULL); } |