diff options
Diffstat (limited to 'sys/dev/vmm')
| -rw-r--r-- | sys/dev/vmm/vmm_dev.c | 143 | ||||
| -rw-r--r-- | sys/dev/vmm/vmm_dev.h | 20 | ||||
| -rw-r--r-- | sys/dev/vmm/vmm_mem.c | 40 | ||||
| -rw-r--r-- | sys/dev/vmm/vmm_mem.h | 6 | ||||
| -rw-r--r-- | sys/dev/vmm/vmm_param.h | 33 |
5 files changed, 206 insertions, 36 deletions
diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c index 9f2b009d02ec..d6543bf6534e 100644 --- a/sys/dev/vmm/vmm_dev.c +++ b/sys/dev/vmm/vmm_dev.c @@ -14,8 +14,11 @@ #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/mman.h> +#include <sys/module.h> +#include <sys/priv.h> #include <sys/proc.h> #include <sys/queue.h> +#include <sys/smp.h> #include <sys/sx.h> #include <sys/sysctl.h> #include <sys/ucred.h> @@ -77,6 +80,8 @@ struct vmmdev_softc { int flags; }; +static bool vmm_initialized = false; + static SLIST_HEAD(, vmmdev_softc) head; static unsigned pr_allow_flag; @@ -87,6 +92,10 @@ static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); SYSCTL_DECL(_hw_vmm); +u_int vm_maxcpu; +SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, + &vm_maxcpu, 0, "Maximum number of vCPUs"); + static void devmem_destroy(void *arg); static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem); @@ -120,18 +129,18 @@ vcpu_unlock_one(struct vcpu *vcpu) vcpu_set_state(vcpu, VCPU_IDLE, false); } +#ifndef __amd64__ static int -vcpu_lock_all(struct vmmdev_softc *sc) +vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate) { struct vcpu *vcpu; int error; uint16_t i, j, maxcpus; error = 0; - vm_slock_vcpus(sc->vm); - maxcpus = vm_get_maxcpus(sc->vm); + maxcpus = vm_get_maxcpus(vm); for (i = 0; i < maxcpus; i++) { - vcpu = vm_vcpu(sc->vm, i); + vcpu = vm_vcpu(vm, i); if (vcpu == NULL) continue; error = vcpu_lock_one(vcpu); @@ -141,16 +150,32 @@ vcpu_lock_all(struct vmmdev_softc *sc) if (error) { for (j = 0; j < i; j++) { - vcpu = vm_vcpu(sc->vm, j); + vcpu = vm_vcpu(vm, j); if (vcpu == NULL) continue; vcpu_unlock_one(vcpu); } - vm_unlock_vcpus(sc->vm); } return (error); } +#endif + +static int +vcpu_lock_all(struct vmmdev_softc *sc) +{ + int error; + + /* + * Serialize vcpu_lock_all() callers. Individual vCPUs are not locked + * in a consistent order so we need to serialize to avoid deadlocks. + */ + vm_lock_vcpus(sc->vm); + error = vcpu_set_state_all(sc->vm, VCPU_FROZEN); + if (error != 0) + vm_unlock_vcpus(sc->vm); + return (error); +} static void vcpu_unlock_all(struct vmmdev_softc *sc) @@ -454,6 +479,12 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, if (ioctl == NULL) return (ENOTTY); + if ((ioctl->flags & VMMDEV_IOCTL_PRIV_CHECK_DRIVER) != 0) { + error = priv_check(td, PRIV_DRIVER); + if (error != 0) + return (error); + } + if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) vm_xlock_memsegs(sc->vm); else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) @@ -596,20 +627,16 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, } error = domainset_populate(&domain, mask, mseg->ds_policy, mseg->ds_mask_size); - if (error) { - free(mask, M_VMMDEV); + free(mask, M_VMMDEV); + if (error) break; - } domainset = domainset_create(&domain); if (domainset == NULL) { error = EINVAL; - free(mask, M_VMMDEV); break; } - free(mask, M_VMMDEV); } error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset); - break; } case VM_GET_MEMSEG: @@ -640,10 +667,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, error = EINVAL; break; } - regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, - M_WAITOK); - regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, - M_WAITOK); + regvals = mallocarray(vmregset->count, sizeof(regvals[0]), + M_VMMDEV, M_WAITOK); + regnums = mallocarray(vmregset->count, sizeof(regnums[0]), + M_VMMDEV, M_WAITOK); error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * vmregset->count); if (error == 0) @@ -666,10 +693,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, error = EINVAL; break; } - regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, - M_WAITOK); - regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, - M_WAITOK); + regvals = mallocarray(vmregset->count, sizeof(regvals[0]), + M_VMMDEV, M_WAITOK); + regnums = mallocarray(vmregset->count, sizeof(regnums[0]), + M_VMMDEV, M_WAITOK); error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * vmregset->count); if (error == 0) @@ -901,6 +928,7 @@ vmmdev_lookup_and_destroy(const char *name, struct ucred *cred) sc->cdev = NULL; sx_xunlock(&vmmdev_mtx); + vm_suspend(sc->vm, VM_SUSPEND_DESTROY); destroy_dev(cdev); vmmdev_destroy(sc); @@ -961,6 +989,9 @@ vmmdev_create(const char *name, struct ucred *cred) struct vm *vm; int error; + if (name == NULL || strlen(name) > VM_MAX_NAMELEN) + return (EINVAL); + sx_xlock(&vmmdev_mtx); sc = vmmdev_lookup(name, cred); if (sc != NULL) { @@ -1001,6 +1032,9 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS) char *buf; int error, buflen; + if (!vmm_initialized) + return (ENXIO); + error = vmm_priv_check(req->td->td_ucred); if (error != 0) return (error); @@ -1086,7 +1120,7 @@ static struct cdevsw vmmctlsw = { .d_ioctl = vmmctl_ioctl, }; -int +static int vmmdev_init(void) { int error; @@ -1102,7 +1136,7 @@ vmmdev_init(void) return (error); } -int +static int vmmdev_cleanup(void) { sx_xlock(&vmmdev_mtx); @@ -1120,6 +1154,71 @@ vmmdev_cleanup(void) } static int +vmm_handler(module_t mod, int what, void *arg) +{ + int error; + + switch (what) { + case MOD_LOAD: + error = vmmdev_init(); + if (error != 0) + break; + + vm_maxcpu = mp_ncpus; + TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); + if (vm_maxcpu > VM_MAXCPU) { + printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); + vm_maxcpu = VM_MAXCPU; + } + if (vm_maxcpu == 0) + vm_maxcpu = 1; + + error = vmm_modinit(); + if (error == 0) + vmm_initialized = true; + else { + error = vmmdev_cleanup(); + KASSERT(error == 0, + ("%s: vmmdev_cleanup failed: %d", __func__, error)); + } + break; + case MOD_UNLOAD: + error = vmmdev_cleanup(); + if (error == 0 && vmm_initialized) { + error = vmm_modcleanup(); + if (error) { + /* + * Something bad happened - prevent new + * VMs from being created + */ + vmm_initialized = false; + } + } + break; + default: + error = 0; + break; + } + return (error); +} + +static moduledata_t vmm_kmod = { + "vmm", + vmm_handler, + NULL +}; + +/* + * vmm initialization has the following dependencies: + * + * - Initialization requires smp_rendezvous() and therefore must happen + * after SMP is fully functional (after SI_SUB_SMP). + * - vmm device initialization requires an initialized devfs. + */ +DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); +MODULE_VERSION(vmm, 1); + +static int devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, struct vm_object **objp, int nprot) { diff --git a/sys/dev/vmm/vmm_dev.h b/sys/dev/vmm/vmm_dev.h index 410066c49cf2..f14176c8afad 100644 --- a/sys/dev/vmm/vmm_dev.h +++ b/sys/dev/vmm/vmm_dev.h @@ -11,15 +11,19 @@ #include <sys/types.h> #include <sys/ioccom.h> + #include <machine/vmm_dev.h> +#include <dev/vmm/vmm_param.h> + #ifdef _KERNEL struct thread; struct vm; struct vcpu; -int vmmdev_init(void); -int vmmdev_cleanup(void); +int vmm_modinit(void); +int vmm_modcleanup(void); + int vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, int fflag, struct thread *td); @@ -44,6 +48,7 @@ struct vmmdev_ioctl { #define VMMDEV_IOCTL_LOCK_ALL_VCPUS 0x08 #define VMMDEV_IOCTL_ALLOC_VCPU 0x10 #define VMMDEV_IOCTL_MAYBE_ALLOC_VCPU 0x20 +#define VMMDEV_IOCTL_PRIV_CHECK_DRIVER 0x40 int flags; }; @@ -52,6 +57,17 @@ struct vmmdev_ioctl { extern const struct vmmdev_ioctl vmmdev_machdep_ioctls[]; extern const size_t vmmdev_machdep_ioctl_count; +/* + * Upper limit on vm_maxcpu. Limited by use of uint16_t types for CPU counts as + * well as range of vpid values for VT-x on amd64 and by the capacity of + * cpuset_t masks. The call to new_unrhdr() in vpid_init() in vmx.c requires + * 'vm_maxcpu + 1 <= 0xffff', hence the '- 1' below. + */ +#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) + +/* Maximum number of vCPUs in a single VM. */ +extern u_int vm_maxcpu; + #endif /* _KERNEL */ struct vmmctl_vm_create { diff --git a/sys/dev/vmm/vmm_mem.c b/sys/dev/vmm/vmm_mem.c index be59e37de33d..5ae944713c81 100644 --- a/sys/dev/vmm/vmm_mem.c +++ b/sys/dev/vmm/vmm_mem.c @@ -26,10 +26,14 @@ static void vm_free_memmap(struct vm *vm, int ident); -void -vm_mem_init(struct vm_mem *mem) +int +vm_mem_init(struct vm_mem *mem, vm_offset_t lo, vm_offset_t hi) { + mem->mem_vmspace = vmmops_vmspace_alloc(lo, hi); + if (mem->mem_vmspace == NULL) + return (ENOMEM); sx_init(&mem->mem_segs_lock, "vm_mem_segs"); + return (0); } static bool @@ -93,10 +97,21 @@ vm_mem_destroy(struct vm *vm) for (int i = 0; i < VM_MAX_MEMSEGS; i++) vm_free_memseg(vm, i); + vmmops_vmspace_free(mem->mem_vmspace); + sx_xunlock(&mem->mem_segs_lock); sx_destroy(&mem->mem_segs_lock); } +struct vmspace * +vm_vmspace(struct vm *vm) +{ + struct vm_mem *mem; + + mem = vm_mem(vm); + return (mem->mem_vmspace); +} + void vm_slock_memsegs(struct vm *vm) { @@ -246,7 +261,7 @@ vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, struct vm_mem *mem; struct vm_mem_seg *seg; struct vm_mem_map *m, *map; - struct vmspace *vmspace; + struct vm_map *vmmap; vm_ooffset_t last; int i, error; @@ -264,8 +279,10 @@ vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, if (seg->object == NULL) return (EINVAL); + if (first + len < first || gpa + len < gpa) + return (EINVAL); last = first + len; - if (first < 0 || first >= last || last > seg->len) + if (first >= last || last > seg->len) return (EINVAL); if ((gpa | first | last) & PAGE_MASK) @@ -282,19 +299,20 @@ vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, if (map == NULL) return (ENOSPC); - vmspace = vm_vmspace(vm); - error = vm_map_find(&vmspace->vm_map, seg->object, first, &gpa, - len, 0, VMFS_NO_SPACE, prot, prot, 0); + vmmap = &mem->mem_vmspace->vm_map; + vm_map_lock(vmmap); + error = vm_map_insert(vmmap, seg->object, first, gpa, gpa + len, + prot, prot, 0); + vm_map_unlock(vmmap); if (error != KERN_SUCCESS) - return (EFAULT); - + return (vm_mmap_to_errno(error)); vm_object_reference(seg->object); if (flags & VM_MEMMAP_F_WIRED) { - error = vm_map_wire(&vmspace->vm_map, gpa, gpa + len, + error = vm_map_wire(vmmap, gpa, gpa + len, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); if (error != KERN_SUCCESS) { - vm_map_remove(&vmspace->vm_map, gpa, gpa + len); + vm_map_remove(vmmap, gpa, gpa + len); return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : EFAULT); } diff --git a/sys/dev/vmm/vmm_mem.h b/sys/dev/vmm/vmm_mem.h index 856470cf2590..f3d22058c7b8 100644 --- a/sys/dev/vmm/vmm_mem.h +++ b/sys/dev/vmm/vmm_mem.h @@ -36,6 +36,7 @@ enum { struct vm; struct vm_object; +struct vmspace; struct vm_mem_seg { size_t len; @@ -56,12 +57,15 @@ struct vm_mem { struct vm_mem_map mem_maps[VM_MAX_MEMMAPS]; struct vm_mem_seg mem_segs[VM_MAX_MEMSEGS]; struct sx mem_segs_lock; + struct vmspace *mem_vmspace; }; -void vm_mem_init(struct vm_mem *mem); +int vm_mem_init(struct vm_mem *mem, vm_offset_t lo, vm_offset_t hi); void vm_mem_cleanup(struct vm *vm); void vm_mem_destroy(struct vm *vm); +struct vmspace *vm_vmspace(struct vm *vm); + /* * APIs that modify the guest memory map require all vcpus to be frozen. */ diff --git a/sys/dev/vmm/vmm_param.h b/sys/dev/vmm/vmm_param.h new file mode 100644 index 000000000000..a5040eb0f58c --- /dev/null +++ b/sys/dev/vmm/vmm_param.h @@ -0,0 +1,33 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + */ + +#ifndef _DEV_VMM_PARAM_H_ +#define _DEV_VMM_PARAM_H_ + +/* + * The VM name has to fit into the pathname length constraints of devfs, + * governed primarily by SPECNAMELEN. The length is the total number of + * characters in the full path, relative to the mount point and not + * including any leading '/' characters. + * A prefix and a suffix are added to the name specified by the user. + * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters + * longer for future use. + * The suffix is a string that identifies a bootrom image or some similar + * image that is attached to the VM. A separator character gets added to + * the suffix automatically when generating the full path, so it must be + * accounted for, reducing the effective length by 1. + * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37 + * bytes for FreeBSD 12. A minimum length is set for safety and supports + * a SPECNAMELEN as small as 32 on old systems. + */ +#define VM_MAX_PREFIXLEN 10 +#define VM_MAX_SUFFIXLEN 15 +#define VM_MIN_NAMELEN 6 +#define VM_MAX_NAMELEN \ + (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1) + +#endif /* !_DEV_VMM_PARAM_H_ */ |
