aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/vmm
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/vmm')
-rw-r--r--sys/dev/vmm/vmm_dev.c143
-rw-r--r--sys/dev/vmm/vmm_dev.h20
-rw-r--r--sys/dev/vmm/vmm_mem.c40
-rw-r--r--sys/dev/vmm/vmm_mem.h6
-rw-r--r--sys/dev/vmm/vmm_param.h33
5 files changed, 206 insertions, 36 deletions
diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
index 9f2b009d02ec..d6543bf6534e 100644
--- a/sys/dev/vmm/vmm_dev.c
+++ b/sys/dev/vmm/vmm_dev.c
@@ -14,8 +14,11 @@
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mman.h>
+#include <sys/module.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/queue.h>
+#include <sys/smp.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
#include <sys/ucred.h>
@@ -77,6 +80,8 @@ struct vmmdev_softc {
int flags;
};
+static bool vmm_initialized = false;
+
static SLIST_HEAD(, vmmdev_softc) head;
static unsigned pr_allow_flag;
@@ -87,6 +92,10 @@ static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
SYSCTL_DECL(_hw_vmm);
+u_int vm_maxcpu;
+SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
+ &vm_maxcpu, 0, "Maximum number of vCPUs");
+
static void devmem_destroy(void *arg);
static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem);
@@ -120,18 +129,18 @@ vcpu_unlock_one(struct vcpu *vcpu)
vcpu_set_state(vcpu, VCPU_IDLE, false);
}
+#ifndef __amd64__
static int
-vcpu_lock_all(struct vmmdev_softc *sc)
+vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
{
struct vcpu *vcpu;
int error;
uint16_t i, j, maxcpus;
error = 0;
- vm_slock_vcpus(sc->vm);
- maxcpus = vm_get_maxcpus(sc->vm);
+ maxcpus = vm_get_maxcpus(vm);
for (i = 0; i < maxcpus; i++) {
- vcpu = vm_vcpu(sc->vm, i);
+ vcpu = vm_vcpu(vm, i);
if (vcpu == NULL)
continue;
error = vcpu_lock_one(vcpu);
@@ -141,16 +150,32 @@ vcpu_lock_all(struct vmmdev_softc *sc)
if (error) {
for (j = 0; j < i; j++) {
- vcpu = vm_vcpu(sc->vm, j);
+ vcpu = vm_vcpu(vm, j);
if (vcpu == NULL)
continue;
vcpu_unlock_one(vcpu);
}
- vm_unlock_vcpus(sc->vm);
}
return (error);
}
+#endif
+
+static int
+vcpu_lock_all(struct vmmdev_softc *sc)
+{
+ int error;
+
+ /*
+ * Serialize vcpu_lock_all() callers. Individual vCPUs are not locked
+ * in a consistent order so we need to serialize to avoid deadlocks.
+ */
+ vm_lock_vcpus(sc->vm);
+ error = vcpu_set_state_all(sc->vm, VCPU_FROZEN);
+ if (error != 0)
+ vm_unlock_vcpus(sc->vm);
+ return (error);
+}
static void
vcpu_unlock_all(struct vmmdev_softc *sc)
@@ -454,6 +479,12 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
if (ioctl == NULL)
return (ENOTTY);
+ if ((ioctl->flags & VMMDEV_IOCTL_PRIV_CHECK_DRIVER) != 0) {
+ error = priv_check(td, PRIV_DRIVER);
+ if (error != 0)
+ return (error);
+ }
+
if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0)
vm_xlock_memsegs(sc->vm);
else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0)
@@ -596,20 +627,16 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
}
error = domainset_populate(&domain, mask, mseg->ds_policy,
mseg->ds_mask_size);
- if (error) {
- free(mask, M_VMMDEV);
+ free(mask, M_VMMDEV);
+ if (error)
break;
- }
domainset = domainset_create(&domain);
if (domainset == NULL) {
error = EINVAL;
- free(mask, M_VMMDEV);
break;
}
- free(mask, M_VMMDEV);
}
error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
-
break;
}
case VM_GET_MEMSEG:
@@ -640,10 +667,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = EINVAL;
break;
}
- regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
- M_WAITOK);
- regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
- M_WAITOK);
+ regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
+ M_VMMDEV, M_WAITOK);
+ regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
+ M_VMMDEV, M_WAITOK);
error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
vmregset->count);
if (error == 0)
@@ -666,10 +693,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = EINVAL;
break;
}
- regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
- M_WAITOK);
- regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
- M_WAITOK);
+ regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
+ M_VMMDEV, M_WAITOK);
+ regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
+ M_VMMDEV, M_WAITOK);
error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
vmregset->count);
if (error == 0)
@@ -901,6 +928,7 @@ vmmdev_lookup_and_destroy(const char *name, struct ucred *cred)
sc->cdev = NULL;
sx_xunlock(&vmmdev_mtx);
+ vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
destroy_dev(cdev);
vmmdev_destroy(sc);
@@ -961,6 +989,9 @@ vmmdev_create(const char *name, struct ucred *cred)
struct vm *vm;
int error;
+ if (name == NULL || strlen(name) > VM_MAX_NAMELEN)
+ return (EINVAL);
+
sx_xlock(&vmmdev_mtx);
sc = vmmdev_lookup(name, cred);
if (sc != NULL) {
@@ -1001,6 +1032,9 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
char *buf;
int error, buflen;
+ if (!vmm_initialized)
+ return (ENXIO);
+
error = vmm_priv_check(req->td->td_ucred);
if (error != 0)
return (error);
@@ -1086,7 +1120,7 @@ static struct cdevsw vmmctlsw = {
.d_ioctl = vmmctl_ioctl,
};
-int
+static int
vmmdev_init(void)
{
int error;
@@ -1102,7 +1136,7 @@ vmmdev_init(void)
return (error);
}
-int
+static int
vmmdev_cleanup(void)
{
sx_xlock(&vmmdev_mtx);
@@ -1120,6 +1154,71 @@ vmmdev_cleanup(void)
}
static int
+vmm_handler(module_t mod, int what, void *arg)
+{
+ int error;
+
+ switch (what) {
+ case MOD_LOAD:
+ error = vmmdev_init();
+ if (error != 0)
+ break;
+
+ vm_maxcpu = mp_ncpus;
+ TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
+ if (vm_maxcpu > VM_MAXCPU) {
+ printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
+ vm_maxcpu = VM_MAXCPU;
+ }
+ if (vm_maxcpu == 0)
+ vm_maxcpu = 1;
+
+ error = vmm_modinit();
+ if (error == 0)
+ vmm_initialized = true;
+ else {
+ error = vmmdev_cleanup();
+ KASSERT(error == 0,
+ ("%s: vmmdev_cleanup failed: %d", __func__, error));
+ }
+ break;
+ case MOD_UNLOAD:
+ error = vmmdev_cleanup();
+ if (error == 0 && vmm_initialized) {
+ error = vmm_modcleanup();
+ if (error) {
+ /*
+ * Something bad happened - prevent new
+ * VMs from being created
+ */
+ vmm_initialized = false;
+ }
+ }
+ break;
+ default:
+ error = 0;
+ break;
+ }
+ return (error);
+}
+
+static moduledata_t vmm_kmod = {
+ "vmm",
+ vmm_handler,
+ NULL
+};
+
+/*
+ * vmm initialization has the following dependencies:
+ *
+ * - Initialization requires smp_rendezvous() and therefore must happen
+ * after SMP is fully functional (after SI_SUB_SMP).
+ * - vmm device initialization requires an initialized devfs.
+ */
+DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY);
+MODULE_VERSION(vmm, 1);
+
+static int
devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
struct vm_object **objp, int nprot)
{
diff --git a/sys/dev/vmm/vmm_dev.h b/sys/dev/vmm/vmm_dev.h
index 410066c49cf2..f14176c8afad 100644
--- a/sys/dev/vmm/vmm_dev.h
+++ b/sys/dev/vmm/vmm_dev.h
@@ -11,15 +11,19 @@
#include <sys/types.h>
#include <sys/ioccom.h>
+
#include <machine/vmm_dev.h>
+#include <dev/vmm/vmm_param.h>
+
#ifdef _KERNEL
struct thread;
struct vm;
struct vcpu;
-int vmmdev_init(void);
-int vmmdev_cleanup(void);
+int vmm_modinit(void);
+int vmm_modcleanup(void);
+
int vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd,
caddr_t data, int fflag, struct thread *td);
@@ -44,6 +48,7 @@ struct vmmdev_ioctl {
#define VMMDEV_IOCTL_LOCK_ALL_VCPUS 0x08
#define VMMDEV_IOCTL_ALLOC_VCPU 0x10
#define VMMDEV_IOCTL_MAYBE_ALLOC_VCPU 0x20
+#define VMMDEV_IOCTL_PRIV_CHECK_DRIVER 0x40
int flags;
};
@@ -52,6 +57,17 @@ struct vmmdev_ioctl {
extern const struct vmmdev_ioctl vmmdev_machdep_ioctls[];
extern const size_t vmmdev_machdep_ioctl_count;
+/*
+ * Upper limit on vm_maxcpu. Limited by use of uint16_t types for CPU counts as
+ * well as range of vpid values for VT-x on amd64 and by the capacity of
+ * cpuset_t masks. The call to new_unrhdr() in vpid_init() in vmx.c requires
+ * 'vm_maxcpu + 1 <= 0xffff', hence the '- 1' below.
+ */
+#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE)
+
+/* Maximum number of vCPUs in a single VM. */
+extern u_int vm_maxcpu;
+
#endif /* _KERNEL */
struct vmmctl_vm_create {
diff --git a/sys/dev/vmm/vmm_mem.c b/sys/dev/vmm/vmm_mem.c
index be59e37de33d..5ae944713c81 100644
--- a/sys/dev/vmm/vmm_mem.c
+++ b/sys/dev/vmm/vmm_mem.c
@@ -26,10 +26,14 @@
static void vm_free_memmap(struct vm *vm, int ident);
-void
-vm_mem_init(struct vm_mem *mem)
+int
+vm_mem_init(struct vm_mem *mem, vm_offset_t lo, vm_offset_t hi)
{
+ mem->mem_vmspace = vmmops_vmspace_alloc(lo, hi);
+ if (mem->mem_vmspace == NULL)
+ return (ENOMEM);
sx_init(&mem->mem_segs_lock, "vm_mem_segs");
+ return (0);
}
static bool
@@ -93,10 +97,21 @@ vm_mem_destroy(struct vm *vm)
for (int i = 0; i < VM_MAX_MEMSEGS; i++)
vm_free_memseg(vm, i);
+ vmmops_vmspace_free(mem->mem_vmspace);
+
sx_xunlock(&mem->mem_segs_lock);
sx_destroy(&mem->mem_segs_lock);
}
+struct vmspace *
+vm_vmspace(struct vm *vm)
+{
+ struct vm_mem *mem;
+
+ mem = vm_mem(vm);
+ return (mem->mem_vmspace);
+}
+
void
vm_slock_memsegs(struct vm *vm)
{
@@ -246,7 +261,7 @@ vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
struct vm_mem *mem;
struct vm_mem_seg *seg;
struct vm_mem_map *m, *map;
- struct vmspace *vmspace;
+ struct vm_map *vmmap;
vm_ooffset_t last;
int i, error;
@@ -264,8 +279,10 @@ vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
if (seg->object == NULL)
return (EINVAL);
+ if (first + len < first || gpa + len < gpa)
+ return (EINVAL);
last = first + len;
- if (first < 0 || first >= last || last > seg->len)
+ if (first >= last || last > seg->len)
return (EINVAL);
if ((gpa | first | last) & PAGE_MASK)
@@ -282,19 +299,20 @@ vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
if (map == NULL)
return (ENOSPC);
- vmspace = vm_vmspace(vm);
- error = vm_map_find(&vmspace->vm_map, seg->object, first, &gpa,
- len, 0, VMFS_NO_SPACE, prot, prot, 0);
+ vmmap = &mem->mem_vmspace->vm_map;
+ vm_map_lock(vmmap);
+ error = vm_map_insert(vmmap, seg->object, first, gpa, gpa + len,
+ prot, prot, 0);
+ vm_map_unlock(vmmap);
if (error != KERN_SUCCESS)
- return (EFAULT);
-
+ return (vm_mmap_to_errno(error));
vm_object_reference(seg->object);
if (flags & VM_MEMMAP_F_WIRED) {
- error = vm_map_wire(&vmspace->vm_map, gpa, gpa + len,
+ error = vm_map_wire(vmmap, gpa, gpa + len,
VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
if (error != KERN_SUCCESS) {
- vm_map_remove(&vmspace->vm_map, gpa, gpa + len);
+ vm_map_remove(vmmap, gpa, gpa + len);
return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
EFAULT);
}
diff --git a/sys/dev/vmm/vmm_mem.h b/sys/dev/vmm/vmm_mem.h
index 856470cf2590..f3d22058c7b8 100644
--- a/sys/dev/vmm/vmm_mem.h
+++ b/sys/dev/vmm/vmm_mem.h
@@ -36,6 +36,7 @@ enum {
struct vm;
struct vm_object;
+struct vmspace;
struct vm_mem_seg {
size_t len;
@@ -56,12 +57,15 @@ struct vm_mem {
struct vm_mem_map mem_maps[VM_MAX_MEMMAPS];
struct vm_mem_seg mem_segs[VM_MAX_MEMSEGS];
struct sx mem_segs_lock;
+ struct vmspace *mem_vmspace;
};
-void vm_mem_init(struct vm_mem *mem);
+int vm_mem_init(struct vm_mem *mem, vm_offset_t lo, vm_offset_t hi);
void vm_mem_cleanup(struct vm *vm);
void vm_mem_destroy(struct vm *vm);
+struct vmspace *vm_vmspace(struct vm *vm);
+
/*
* APIs that modify the guest memory map require all vcpus to be frozen.
*/
diff --git a/sys/dev/vmm/vmm_param.h b/sys/dev/vmm/vmm_param.h
new file mode 100644
index 000000000000..a5040eb0f58c
--- /dev/null
+++ b/sys/dev/vmm/vmm_param.h
@@ -0,0 +1,33 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _DEV_VMM_PARAM_H_
+#define _DEV_VMM_PARAM_H_
+
+/*
+ * The VM name has to fit into the pathname length constraints of devfs,
+ * governed primarily by SPECNAMELEN. The length is the total number of
+ * characters in the full path, relative to the mount point and not
+ * including any leading '/' characters.
+ * A prefix and a suffix are added to the name specified by the user.
+ * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters
+ * longer for future use.
+ * The suffix is a string that identifies a bootrom image or some similar
+ * image that is attached to the VM. A separator character gets added to
+ * the suffix automatically when generating the full path, so it must be
+ * accounted for, reducing the effective length by 1.
+ * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37
+ * bytes for FreeBSD 12. A minimum length is set for safety and supports
+ * a SPECNAMELEN as small as 32 on old systems.
+ */
+#define VM_MAX_PREFIXLEN 10
+#define VM_MAX_SUFFIXLEN 15
+#define VM_MIN_NAMELEN 6
+#define VM_MAX_NAMELEN \
+ (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1)
+
+#endif /* !_DEV_VMM_PARAM_H_ */