aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/vmm/vmm_dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/vmm/vmm_dev.c')
-rw-r--r--sys/dev/vmm/vmm_dev.c266
1 files changed, 231 insertions, 35 deletions
diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
index 819debadd1ac..d6543bf6534e 100644
--- a/sys/dev/vmm/vmm_dev.c
+++ b/sys/dev/vmm/vmm_dev.c
@@ -14,8 +14,11 @@
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mman.h>
+#include <sys/module.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/queue.h>
+#include <sys/smp.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
#include <sys/ucred.h>
@@ -30,7 +33,8 @@
#include <dev/vmm/vmm_mem.h>
#include <dev/vmm/vmm_stat.h>
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
struct vm_memseg_12 {
int segid;
size_t len;
@@ -42,7 +46,22 @@ _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
#define VM_GET_MEMSEG_12 \
_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
-#endif
+#endif /* COMPAT_FREEBSD12 */
+#ifdef COMPAT_FREEBSD14
+struct vm_memseg_14 {
+ int segid;
+ size_t len;
+ char name[VM_MAX_SUFFIXLEN + 1];
+};
+_Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16),
+ "COMPAT_FREEBSD14 ABI");
+
+#define VM_ALLOC_MEMSEG_14 \
+ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14)
+#define VM_GET_MEMSEG_14 \
+ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14)
+#endif /* COMPAT_FREEBSD14 */
+#endif /* __amd64__ */
struct devmem_softc {
int segid;
@@ -61,6 +80,8 @@ struct vmmdev_softc {
int flags;
};
+static bool vmm_initialized = false;
+
static SLIST_HEAD(, vmmdev_softc) head;
static unsigned pr_allow_flag;
@@ -71,6 +92,10 @@ static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
SYSCTL_DECL(_hw_vmm);
+u_int vm_maxcpu;
+SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
+ &vm_maxcpu, 0, "Maximum number of vCPUs");
+
static void devmem_destroy(void *arg);
static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem);
@@ -104,18 +129,18 @@ vcpu_unlock_one(struct vcpu *vcpu)
vcpu_set_state(vcpu, VCPU_IDLE, false);
}
+#ifndef __amd64__
static int
-vcpu_lock_all(struct vmmdev_softc *sc)
+vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
{
struct vcpu *vcpu;
int error;
uint16_t i, j, maxcpus;
error = 0;
- vm_slock_vcpus(sc->vm);
- maxcpus = vm_get_maxcpus(sc->vm);
+ maxcpus = vm_get_maxcpus(vm);
for (i = 0; i < maxcpus; i++) {
- vcpu = vm_vcpu(sc->vm, i);
+ vcpu = vm_vcpu(vm, i);
if (vcpu == NULL)
continue;
error = vcpu_lock_one(vcpu);
@@ -125,16 +150,32 @@ vcpu_lock_all(struct vmmdev_softc *sc)
if (error) {
for (j = 0; j < i; j++) {
- vcpu = vm_vcpu(sc->vm, j);
+ vcpu = vm_vcpu(vm, j);
if (vcpu == NULL)
continue;
vcpu_unlock_one(vcpu);
}
- vm_unlock_vcpus(sc->vm);
}
return (error);
}
+#endif
+
+static int
+vcpu_lock_all(struct vmmdev_softc *sc)
+{
+ int error;
+
+ /*
+ * Serialize vcpu_lock_all() callers. Individual vCPUs are not locked
+ * in a consistent order so we need to serialize to avoid deadlocks.
+ */
+ vm_lock_vcpus(sc->vm);
+ error = vcpu_set_state_all(sc->vm, VCPU_FROZEN);
+ if (error != 0)
+ vm_unlock_vcpus(sc->vm);
+ return (error);
+}
static void
vcpu_unlock_all(struct vmmdev_softc *sc)
@@ -257,7 +298,8 @@ get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
}
static int
-alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
+alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len,
+ struct domainset *domainset)
{
char *name;
int error;
@@ -278,8 +320,7 @@ alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
if (error)
goto done;
}
-
- error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
+ error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset);
if (error)
goto done;
@@ -295,6 +336,20 @@ done:
return (error);
}
+#if defined(__amd64__) && \
+ (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12))
+/*
+ * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts.
+ */
+static void
+adjust_segid(struct vm_memseg *mseg)
+{
+ if (mseg->segid != VM_SYSMEM) {
+ mseg->segid += (VM_BOOTROM - 1);
+ }
+}
+#endif
+
static int
vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
uint64_t *regval)
@@ -353,10 +408,16 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = {
VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
VMMDEV_IOCTL(VM_STAT_DESC, 0),
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
#endif
+#ifdef COMPAT_FREEBSD14
+ VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14,
+ VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
+#endif
+#endif /* __amd64__ */
VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
VMMDEV_IOCTL(VM_MMAP_MEMSEG,
@@ -366,9 +427,14 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = {
VMMDEV_IOCTL(VM_REINIT,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#if defined(COMPAT_FREEBSD12)
VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
#endif
+#ifdef COMPAT_FREEBSD14
+ VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS),
+#endif
+#endif /* __amd64__ */
VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
@@ -388,6 +454,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vmmdev_softc *sc;
struct vcpu *vcpu;
const struct vmmdev_ioctl *ioctl;
+ struct vm_memseg *mseg;
int error, vcpuid;
sc = vmmdev_lookup2(cdev);
@@ -412,6 +479,12 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
if (ioctl == NULL)
return (ENOTTY);
+ if ((ioctl->flags & VMMDEV_IOCTL_PRIV_CHECK_DRIVER) != 0) {
+ error = priv_check(td, PRIV_DRIVER);
+ if (error != 0)
+ return (error);
+ }
+
if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0)
vm_xlock_memsegs(sc->vm);
else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0)
@@ -499,20 +572,73 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
break;
}
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
case VM_ALLOC_MEMSEG_12:
- error = alloc_memseg(sc, (struct vm_memseg *)data,
- sizeof(((struct vm_memseg_12 *)0)->name));
+ mseg = (struct vm_memseg *)data;
+
+ adjust_segid(mseg);
+ error = alloc_memseg(sc, mseg,
+ sizeof(((struct vm_memseg_12 *)0)->name), NULL);
break;
case VM_GET_MEMSEG_12:
- error = get_memseg(sc, (struct vm_memseg *)data,
+ mseg = (struct vm_memseg *)data;
+
+ adjust_segid(mseg);
+ error = get_memseg(sc, mseg,
sizeof(((struct vm_memseg_12 *)0)->name));
break;
-#endif
- case VM_ALLOC_MEMSEG:
- error = alloc_memseg(sc, (struct vm_memseg *)data,
- sizeof(((struct vm_memseg *)0)->name));
+#endif /* COMPAT_FREEBSD12 */
+#ifdef COMPAT_FREEBSD14
+ case VM_ALLOC_MEMSEG_14:
+ mseg = (struct vm_memseg *)data;
+
+ adjust_segid(mseg);
+ error = alloc_memseg(sc, mseg,
+ sizeof(((struct vm_memseg_14 *)0)->name), NULL);
+ break;
+ case VM_GET_MEMSEG_14:
+ mseg = (struct vm_memseg *)data;
+
+ adjust_segid(mseg);
+ error = get_memseg(sc, mseg,
+ sizeof(((struct vm_memseg_14 *)0)->name));
+ break;
+#endif /* COMPAT_FREEBSD14 */
+#endif /* __amd64__ */
+ case VM_ALLOC_MEMSEG: {
+ domainset_t *mask;
+ struct domainset *domainset, domain;
+
+ domainset = NULL;
+ mseg = (struct vm_memseg *)data;
+ if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) {
+ if (mseg->ds_mask_size < sizeof(domainset_t) ||
+ mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) {
+ error = ERANGE;
+ break;
+ }
+ memset(&domain, 0, sizeof(domain));
+ mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK);
+ error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size);
+ if (error) {
+ free(mask, M_VMMDEV);
+ break;
+ }
+ error = domainset_populate(&domain, mask, mseg->ds_policy,
+ mseg->ds_mask_size);
+ free(mask, M_VMMDEV);
+ if (error)
+ break;
+ domainset = domainset_create(&domain);
+ if (domainset == NULL) {
+ error = EINVAL;
+ break;
+ }
+ }
+ error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
break;
+ }
case VM_GET_MEMSEG:
error = get_memseg(sc, (struct vm_memseg *)data,
sizeof(((struct vm_memseg *)0)->name));
@@ -541,10 +667,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = EINVAL;
break;
}
- regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
- M_WAITOK);
- regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
- M_WAITOK);
+ regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
+ M_VMMDEV, M_WAITOK);
+ regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
+ M_VMMDEV, M_WAITOK);
error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
vmregset->count);
if (error == 0)
@@ -567,10 +693,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = EINVAL;
break;
}
- regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
- M_WAITOK);
- regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
- M_WAITOK);
+ regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
+ M_VMMDEV, M_WAITOK);
+ regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
+ M_VMMDEV, M_WAITOK);
error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
vmregset->count);
if (error == 0)
@@ -802,6 +928,7 @@ vmmdev_lookup_and_destroy(const char *name, struct ucred *cred)
sc->cdev = NULL;
sx_xunlock(&vmmdev_mtx);
+ vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
destroy_dev(cdev);
vmmdev_destroy(sc);
@@ -820,7 +947,6 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
buflen = VM_MAX_NAMELEN + 1;
buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
- strlcpy(buf, "beavis", buflen);
error = sysctl_handle_string(oidp, buf, buflen, req);
if (error == 0 && req->newptr != NULL)
error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
@@ -830,7 +956,7 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
NULL, 0, sysctl_vmm_destroy, "A",
- NULL);
+ "Destroy a vmm(4) instance (legacy interface)");
static struct cdevsw vmmdevsw = {
.d_name = "vmmdev",
@@ -863,6 +989,9 @@ vmmdev_create(const char *name, struct ucred *cred)
struct vm *vm;
int error;
+ if (name == NULL || strlen(name) > VM_MAX_NAMELEN)
+ return (EINVAL);
+
sx_xlock(&vmmdev_mtx);
sc = vmmdev_lookup(name, cred);
if (sc != NULL) {
@@ -903,13 +1032,15 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
char *buf;
int error, buflen;
+ if (!vmm_initialized)
+ return (ENXIO);
+
error = vmm_priv_check(req->td->td_ucred);
if (error != 0)
return (error);
buflen = VM_MAX_NAMELEN + 1;
buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
- strlcpy(buf, "beavis", buflen);
error = sysctl_handle_string(oidp, buf, buflen, req);
if (error == 0 && req->newptr != NULL)
error = vmmdev_create(buf, req->td->td_ucred);
@@ -919,7 +1050,7 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
NULL, 0, sysctl_vmm_create, "A",
- NULL);
+ "Create a vmm(4) instance (legacy interface)");
static int
vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
@@ -989,7 +1120,7 @@ static struct cdevsw vmmctlsw = {
.d_ioctl = vmmctl_ioctl,
};
-int
+static int
vmmdev_init(void)
{
int error;
@@ -1005,7 +1136,7 @@ vmmdev_init(void)
return (error);
}
-int
+static int
vmmdev_cleanup(void)
{
sx_xlock(&vmmdev_mtx);
@@ -1023,6 +1154,71 @@ vmmdev_cleanup(void)
}
static int
+vmm_handler(module_t mod, int what, void *arg)
+{
+ int error;
+
+ switch (what) {
+ case MOD_LOAD:
+ error = vmmdev_init();
+ if (error != 0)
+ break;
+
+ vm_maxcpu = mp_ncpus;
+ TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
+ if (vm_maxcpu > VM_MAXCPU) {
+ printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
+ vm_maxcpu = VM_MAXCPU;
+ }
+ if (vm_maxcpu == 0)
+ vm_maxcpu = 1;
+
+ error = vmm_modinit();
+ if (error == 0)
+ vmm_initialized = true;
+ else {
+ error = vmmdev_cleanup();
+ KASSERT(error == 0,
+ ("%s: vmmdev_cleanup failed: %d", __func__, error));
+ }
+ break;
+ case MOD_UNLOAD:
+ error = vmmdev_cleanup();
+ if (error == 0 && vmm_initialized) {
+ error = vmm_modcleanup();
+ if (error) {
+ /*
+ * Something bad happened - prevent new
+ * VMs from being created
+ */
+ vmm_initialized = false;
+ }
+ }
+ break;
+ default:
+ error = 0;
+ break;
+ }
+ return (error);
+}
+
+static moduledata_t vmm_kmod = {
+ "vmm",
+ vmm_handler,
+ NULL
+};
+
+/*
+ * vmm initialization has the following dependencies:
+ *
+ * - Initialization requires smp_rendezvous() and therefore must happen
+ * after SMP is fully functional (after SI_SUB_SMP).
+ * - vmm device initialization requires an initialized devfs.
+ */
+DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY);
+MODULE_VERSION(vmm, 1);
+
+static int
devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
struct vm_object **objp, int nprot)
{