aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Johnston <markj@FreeBSD.org>2026-01-08 21:54:06 +0000
committerMark Johnston <markj@FreeBSD.org>2026-01-08 21:54:06 +0000
commited85203fb7a0334041db6da07e45ddda4caef13d (patch)
tree624a98f63e3128d54d853c404c2b8244555fa88f
parenta6411f6b7df46edc7167a7844ed443db7a91a031 (diff)
-rw-r--r--sys/amd64/include/vmm.h37
-rw-r--r--sys/amd64/vmm/io/vioapic.c1
-rw-r--r--sys/amd64/vmm/io/vlapic.c1
-rw-r--r--sys/amd64/vmm/vmm.c483
-rw-r--r--sys/amd64/vmm/vmm_lapic.c1
-rw-r--r--sys/amd64/vmm/x86.c1
-rw-r--r--sys/arm64/include/vmm.h26
-rw-r--r--sys/arm64/vmm/io/vgic_v3.c1
-rw-r--r--sys/arm64/vmm/vmm.c317
-rw-r--r--sys/dev/vmm/vmm_dev.c32
-rw-r--r--sys/dev/vmm/vmm_vm.c476
-rw-r--r--sys/dev/vmm/vmm_vm.h49
-rw-r--r--sys/modules/vmm/Makefile3
-rw-r--r--sys/riscv/include/vmm.h28
-rw-r--r--sys/riscv/vmm/vmm.c317
-rw-r--r--sys/riscv/vmm/vmm_aplic.c1
-rw-r--r--sys/riscv/vmm/vmm_sbi.c2
-rw-r--r--sys/riscv/vmm/vmm_vtimer.c2
18 files changed, 551 insertions, 1227 deletions
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index e1df85aa91b6..baf2cf42ad6c 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -233,19 +233,7 @@ struct vmm_ops {
extern const struct vmm_ops vmm_ops_intel;
extern const struct vmm_ops vmm_ops_amd;
-int vm_create(const char *name, struct vm **retvm);
-struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
-void vm_disable_vcpu_creation(struct vm *vm);
-void vm_lock_vcpus(struct vm *vm);
-void vm_unlock_vcpus(struct vm *vm);
-void vm_destroy(struct vm *vm);
-int vm_reinit(struct vm *vm);
const char *vm_name(struct vm *vm);
-uint16_t vm_get_maxcpus(struct vm *vm);
-void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
- uint16_t *threads, uint16_t *maxcpus);
-int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
- uint16_t threads, uint16_t maxcpus);
int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
@@ -259,7 +247,6 @@ int vm_get_seg_desc(struct vcpu *vcpu, int reg,
int vm_set_seg_desc(struct vcpu *vcpu, int reg,
struct seg_desc *desc);
int vm_run(struct vcpu *vcpu);
-int vm_suspend(struct vm *vm, enum vm_suspend_how how);
int vm_inject_nmi(struct vcpu *vcpu);
int vm_nmi_pending(struct vcpu *vcpu);
void vm_nmi_clear(struct vcpu *vcpu);
@@ -277,9 +264,6 @@ int vm_set_capability(struct vcpu *vcpu, int type, int val);
int vm_get_x2apic_state(struct vcpu *vcpu, enum x2apic_state *state);
int vm_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state);
int vm_apicid2vcpuid(struct vm *vm, int apicid);
-int vm_activate_cpu(struct vcpu *vcpu);
-int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu);
-int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu);
int vm_restart_instruction(struct vcpu *vcpu);
struct vm_exit *vm_exitinfo(struct vcpu *vcpu);
cpuset_t *vm_exitinfo_cpuset(struct vcpu *vcpu);
@@ -292,24 +276,6 @@ int vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta);
int vm_restore_time(struct vm *vm);
#ifdef _SYS__CPUSET_H_
-/*
- * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
- * The rendezvous 'func(arg)' is not allowed to do anything that will
- * cause the thread to be put to sleep.
- *
- * The caller cannot hold any locks when initiating the rendezvous.
- *
- * The implementation of this API may cause vcpus other than those specified
- * by 'dest' to be stalled. The caller should not rely on any vcpus making
- * forward progress when the rendezvous is in progress.
- */
-typedef void (*vm_rendezvous_func_t)(struct vcpu *vcpu, void *arg);
-int vm_smp_rendezvous(struct vcpu *vcpu, cpuset_t dest,
- vm_rendezvous_func_t func, void *arg);
-
-cpuset_t vm_active_cpus(struct vm *vm);
-cpuset_t vm_debug_cpus(struct vm *vm);
-cpuset_t vm_suspended_cpus(struct vm *vm);
cpuset_t vm_start_cpus(struct vm *vm, const cpuset_t *tostart);
void vm_await_start(struct vm *vm, const cpuset_t *waiting);
#endif /* _SYS__CPUSET_H_ */
@@ -341,8 +307,6 @@ vcpu_reqidle(struct vm_eventinfo *info)
return (*info->iptr);
}
-int vcpu_debugged(struct vcpu *vcpu);
-
/*
* Return true if device indicated by bus/slot/func is supposed to be a
* pci passthrough device.
@@ -354,7 +318,6 @@ bool vmm_is_pptdev(int bus, int slot, int func);
void *vm_iommu_domain(struct vm *vm);
void *vcpu_stats(struct vcpu *vcpu);
-void vcpu_notify_event(struct vcpu *vcpu);
void vcpu_notify_lapic(struct vcpu *vcpu);
struct vm_mem *vm_mem(struct vm *vm);
struct vatpic *vm_atpic(struct vm *vm);
diff --git a/sys/amd64/vmm/io/vioapic.c b/sys/amd64/vmm/io/vioapic.c
index 7df6193d6dc0..a3956785d049 100644
--- a/sys/amd64/vmm/io/vioapic.c
+++ b/sys/amd64/vmm/io/vioapic.c
@@ -43,6 +43,7 @@
#include <machine/vmm_snapshot.h>
#include <dev/vmm/vmm_ktr.h>
+#include <dev/vmm/vmm_vm.h>
#include "vmm_lapic.h"
#include "vlapic.h"
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
index afd5045de574..6849ef7aa589 100644
--- a/sys/amd64/vmm/io/vlapic.c
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -47,6 +47,7 @@
#include <machine/vmm_snapshot.h>
#include <dev/vmm/vmm_ktr.h>
+#include <dev/vmm/vmm_vm.h>
#include "vmm_lapic.h"
#include "vmm_stat.h"
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 4b93e020f8dc..b0712c3eb6ac 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -161,8 +161,7 @@ static MALLOC_DEFINE(M_VM, "vm", "vm");
/* statistics */
static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
-SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
- NULL);
+SYSCTL_DECL(_hw_vmm);
/*
* Halt the guest if all vcpus are executing a HLT instruction with
@@ -173,10 +172,6 @@ SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN,
&halt_detection_enabled, 0,
"Halt VM if all vcpus execute HLT with interrupts disabled");
-static int vmm_ipinum;
-SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
- "IPI vector used for vcpu notifications");
-
static int trace_guest_exceptions;
SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN,
&trace_guest_exceptions, 0,
@@ -186,8 +181,6 @@ static int trap_wbinvd;
SYSCTL_INT(_hw_vmm, OID_AUTO, trap_wbinvd, CTLFLAG_RDTUN, &trap_wbinvd, 0,
"WBINVD triggers a VM-exit");
-static void vcpu_notify_event_locked(struct vcpu *vcpu);
-
/* global statistics */
VMM_STAT(VCPU_MIGRATIONS, "vcpu migration across host cpus");
VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
@@ -284,7 +277,6 @@ vcpu_init(struct vcpu *vcpu)
int
vcpu_trace_exceptions(struct vcpu *vcpu)
{
-
return (trace_guest_exceptions);
}
@@ -364,14 +356,6 @@ vm_init(struct vm *vm, bool create)
}
}
-void
-vm_disable_vcpu_creation(struct vm *vm)
-{
- sx_xlock(&vm->vcpus_init_lock);
- vm->dying = true;
- sx_xunlock(&vm->vcpus_init_lock);
-}
-
struct vcpu *
vm_alloc_vcpu(struct vm *vm, int vcpuid)
{
@@ -402,18 +386,6 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
return (vcpu);
}
-void
-vm_lock_vcpus(struct vm *vm)
-{
- sx_xlock(&vm->vcpus_init_lock);
-}
-
-void
-vm_unlock_vcpus(struct vm *vm)
-{
- sx_unlock(&vm->vcpus_init_lock);
-}
-
int
vm_create(const char *name, struct vm **retvm)
{
@@ -443,35 +415,6 @@ vm_create(const char *name, struct vm **retvm)
return (0);
}
-void
-vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
- uint16_t *threads, uint16_t *maxcpus)
-{
- *sockets = vm->sockets;
- *cores = vm->cores;
- *threads = vm->threads;
- *maxcpus = vm->maxcpus;
-}
-
-uint16_t
-vm_get_maxcpus(struct vm *vm)
-{
- return (vm->maxcpus);
-}
-
-int
-vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
- uint16_t threads, uint16_t maxcpus __unused)
-{
- /* Ignore maxcpus. */
- if ((sockets * cores * threads) > vm->maxcpus)
- return (EINVAL);
- vm->sockets = sockets;
- vm->cores = cores;
- vm->threads = threads;
- return(0);
-}
-
static void
vm_cleanup(struct vm *vm, bool destroy)
{
@@ -520,23 +463,11 @@ vm_destroy(struct vm *vm)
free(vm, M_VM);
}
-int
-vm_reinit(struct vm *vm)
+void
+vm_reset(struct vm *vm)
{
- int error;
-
- /*
- * A virtual machine can be reset only if all vcpus are suspended.
- */
- if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
- vm_cleanup(vm, false);
- vm_init(vm, false);
- error = 0;
- } else {
- error = EBUSY;
- }
-
- return (error);
+ vm_cleanup(vm, false);
+ vm_init(vm, false);
}
const char *
@@ -810,210 +741,6 @@ save_guest_fpustate(struct vcpu *vcpu)
static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
-/*
- * Invoke the rendezvous function on the specified vcpu if applicable. Return
- * true if the rendezvous is finished, false otherwise.
- */
-static bool
-vm_rendezvous(struct vcpu *vcpu)
-{
- struct vm *vm = vcpu->vm;
- int vcpuid;
-
- mtx_assert(&vcpu->vm->rendezvous_mtx, MA_OWNED);
- KASSERT(vcpu->vm->rendezvous_func != NULL,
- ("vm_rendezvous: no rendezvous pending"));
-
- /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
- CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus,
- &vm->active_cpus);
-
- vcpuid = vcpu->vcpuid;
- if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
- !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
- VMM_CTR0(vcpu, "Calling rendezvous func");
- (*vm->rendezvous_func)(vcpu, vm->rendezvous_arg);
- CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
- }
- if (CPU_CMP(&vm->rendezvous_req_cpus,
- &vm->rendezvous_done_cpus) == 0) {
- VMM_CTR0(vcpu, "Rendezvous completed");
- CPU_ZERO(&vm->rendezvous_req_cpus);
- vm->rendezvous_func = NULL;
- wakeup(&vm->rendezvous_func);
- return (true);
- }
- return (false);
-}
-
-static void
-vcpu_wait_idle(struct vcpu *vcpu)
-{
- KASSERT(vcpu->state != VCPU_IDLE, ("vcpu already idle"));
-
- vcpu->reqidle = 1;
- vcpu_notify_event_locked(vcpu);
- VMM_CTR1(vcpu, "vcpu state change from %s to "
- "idle requested", vcpu_state2str(vcpu->state));
- msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
-}
-
-static int
-vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
- bool from_idle)
-{
- int error;
-
- vcpu_assert_locked(vcpu);
-
- /*
- * State transitions from the vmmdev_ioctl() must always begin from
- * the VCPU_IDLE state. This guarantees that there is only a single
- * ioctl() operating on a vcpu at any point.
- */
- if (from_idle) {
- while (vcpu->state != VCPU_IDLE)
- vcpu_wait_idle(vcpu);
- } else {
- KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
- "vcpu idle state"));
- }
-
- if (vcpu->state == VCPU_RUNNING) {
- KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
- "mismatch for running vcpu", curcpu, vcpu->hostcpu));
- } else {
- KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
- "vcpu that is not running", vcpu->hostcpu));
- }
-
- /*
- * The following state transitions are allowed:
- * IDLE -> FROZEN -> IDLE
- * FROZEN -> RUNNING -> FROZEN
- * FROZEN -> SLEEPING -> FROZEN
- */
- switch (vcpu->state) {
- case VCPU_IDLE:
- case VCPU_RUNNING:
- case VCPU_SLEEPING:
- error = (newstate != VCPU_FROZEN);
- break;
- case VCPU_FROZEN:
- error = (newstate == VCPU_FROZEN);
- break;
- default:
- error = 1;
- break;
- }
-
- if (error)
- return (EBUSY);
-
- VMM_CTR2(vcpu, "vcpu state changed from %s to %s",
- vcpu_state2str(vcpu->state), vcpu_state2str(newstate));
-
- vcpu->state = newstate;
- if (newstate == VCPU_RUNNING)
- vcpu->hostcpu = curcpu;
- else
- vcpu->hostcpu = NOCPU;
-
- if (newstate == VCPU_IDLE)
- wakeup(&vcpu->state);
-
- return (0);
-}
-
-/*
- * Try to lock all of the vCPUs in the VM while taking care to avoid deadlocks
- * with vm_smp_rendezvous().
- *
- * The complexity here suggests that the rendezvous mechanism needs a rethink.
- */
-int
-vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
-{
- cpuset_t locked;
- struct vcpu *vcpu;
- int error, i;
- uint16_t maxcpus;
-
- KASSERT(newstate != VCPU_IDLE,
- ("vcpu_set_state_all: invalid target state %d", newstate));
-
- error = 0;
- CPU_ZERO(&locked);
- maxcpus = vm->maxcpus;
-
- mtx_lock(&vm->rendezvous_mtx);
-restart:
- if (vm->rendezvous_func != NULL) {
- /*
- * If we have a pending rendezvous, then the initiator may be
- * blocked waiting for other vCPUs to execute the callback. The
- * current thread may be a vCPU thread so we must not block
- * waiting for the initiator, otherwise we get a deadlock.
- * Thus, execute the callback on behalf of any idle vCPUs.
- */
- for (i = 0; i < maxcpus; i++) {
- vcpu = vm_vcpu(vm, i);
- if (vcpu == NULL)
- continue;
- vcpu_lock(vcpu);
- if (vcpu->state == VCPU_IDLE) {
- (void)vcpu_set_state_locked(vcpu, VCPU_FROZEN,
- true);
- CPU_SET(i, &locked);
- }
- if (CPU_ISSET(i, &locked)) {
- /*
- * We can safely execute the callback on this
- * vCPU's behalf.
- */
- vcpu_unlock(vcpu);
- (void)vm_rendezvous(vcpu);
- vcpu_lock(vcpu);
- }
- vcpu_unlock(vcpu);
- }
- }
-
- /*
- * Now wait for remaining vCPUs to become idle. This may include the
- * initiator of a rendezvous that is currently blocked on the rendezvous
- * mutex.
- */
- CPU_FOREACH_ISCLR(i, &locked) {
- if (i >= maxcpus)
- break;
- vcpu = vm_vcpu(vm, i);
- if (vcpu == NULL)
- continue;
- vcpu_lock(vcpu);
- while (vcpu->state != VCPU_IDLE) {
- mtx_unlock(&vm->rendezvous_mtx);
- vcpu_wait_idle(vcpu);
- vcpu_unlock(vcpu);
- mtx_lock(&vm->rendezvous_mtx);
- if (vm->rendezvous_func != NULL)
- goto restart;
- vcpu_lock(vcpu);
- }
- error = vcpu_set_state_locked(vcpu, newstate, true);
- vcpu_unlock(vcpu);
- if (error != 0) {
- /* Roll back state changes. */
- CPU_FOREACH_ISSET(i, &locked)
- (void)vcpu_set_state(vcpu, VCPU_IDLE, false);
- break;
- }
- CPU_SET(i, &locked);
- }
- mtx_unlock(&vm->rendezvous_mtx);
- return (error);
-}
-
static void
vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
{
@@ -1032,37 +759,6 @@ vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
panic("Error %d setting state to %d", error, newstate);
}
-static int
-vm_handle_rendezvous(struct vcpu *vcpu)
-{
- struct vm *vm;
- struct thread *td;
-
- td = curthread;
- vm = vcpu->vm;
-
- mtx_lock(&vm->rendezvous_mtx);
- while (vm->rendezvous_func != NULL) {
- if (vm_rendezvous(vcpu))
- break;
-
- VMM_CTR0(vcpu, "Wait for rendezvous completion");
- mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
- "vmrndv", hz);
- if (td_ast_pending(td, TDA_SUSPEND)) {
- int error;
-
- mtx_unlock(&vm->rendezvous_mtx);
- error = thread_check_susp(td, true);
- if (error != 0)
- return (error);
- mtx_lock(&vm->rendezvous_mtx);
- }
- }
- mtx_unlock(&vm->rendezvous_mtx);
- return (0);
-}
-
/*
* Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
*/
@@ -1386,33 +1082,6 @@ vm_handle_db(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
return (0);
}
-int
-vm_suspend(struct vm *vm, enum vm_suspend_how how)
-{
- int i;
-
- if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
- return (EINVAL);
-
- if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
- VM_CTR2(vm, "virtual machine already suspended %d/%d",
- vm->suspend, how);
- return (EALREADY);
- }
-
- VM_CTR1(vm, "virtual machine successfully suspended %d", how);
-
- /*
- * Notify all active vcpus that they are now suspended.
- */
- for (i = 0; i < vm->maxcpus; i++) {
- if (CPU_ISSET(i, &vm->active_cpus))
- vcpu_notify_event(vm_vcpu(vm, i));
- }
-
- return (0);
-}
-
void
vm_exit_suspended(struct vcpu *vcpu, uint64_t rip)
{
@@ -2039,107 +1708,6 @@ vm_iommu_domain(struct vm *vm)
return (vm->iommu);
}
-int
-vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
-{
- int error;
-
- vcpu_lock(vcpu);
- error = vcpu_set_state_locked(vcpu, newstate, from_idle);
- vcpu_unlock(vcpu);
-
- return (error);
-}
-
-enum vcpu_state
-vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
-{
- enum vcpu_state state;
-
- vcpu_lock(vcpu);
- state = vcpu->state;
- if (hostcpu != NULL)
- *hostcpu = vcpu->hostcpu;
- vcpu_unlock(vcpu);
-
- return (state);
-}
-
-int
-vm_activate_cpu(struct vcpu *vcpu)
-{
- struct vm *vm = vcpu->vm;
-
- if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
- return (EBUSY);
-
- VMM_CTR0(vcpu, "activated");
- CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
- return (0);
-}
-
-int
-vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
-{
- if (vcpu == NULL) {
- vm->debug_cpus = vm->active_cpus;
- for (int i = 0; i < vm->maxcpus; i++) {
- if (CPU_ISSET(i, &vm->active_cpus))
- vcpu_notify_event(vm_vcpu(vm, i));
- }
- } else {
- if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
- return (EINVAL);
-
- CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
- vcpu_notify_event(vcpu);
- }
- return (0);
-}
-
-int
-vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
-{
-
- if (vcpu == NULL) {
- CPU_ZERO(&vm->debug_cpus);
- } else {
- if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
- return (EINVAL);
-
- CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
- }
- return (0);
-}
-
-int
-vcpu_debugged(struct vcpu *vcpu)
-{
-
- return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
-}
-
-cpuset_t
-vm_active_cpus(struct vm *vm)
-{
-
- return (vm->active_cpus);
-}
-
-cpuset_t
-vm_debug_cpus(struct vm *vm)
-{
-
- return (vm->debug_cpus);
-}
-
-cpuset_t
-vm_suspended_cpus(struct vm *vm)
-{
-
- return (vm->suspended_cpus);
-}
-
/*
* Returns the subset of vCPUs in tostart that are awaiting startup.
* These vCPUs are also marked as no longer awaiting startup.
@@ -2192,47 +1760,6 @@ vm_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state)
return (0);
}
-/*
- * This function is called to ensure that a vcpu "sees" a pending event
- * as soon as possible:
- * - If the vcpu thread is sleeping then it is woken up.
- * - If the vcpu is running on a different host_cpu then an IPI will be directed
- * to the host_cpu to cause the vcpu to trap into the hypervisor.
- */
-static void
-vcpu_notify_event_locked(struct vcpu *vcpu)
-{
- int hostcpu;
-
- hostcpu = vcpu->hostcpu;
- if (vcpu->state == VCPU_RUNNING) {
- KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
- if (hostcpu != curcpu) {
- ipi_cpu(hostcpu, vmm_ipinum);
- } else {
- /*
- * If the 'vcpu' is running on 'curcpu' then it must
- * be sending a notification to itself (e.g. SELF_IPI).
- * The pending event will be picked up when the vcpu
- * transitions back to guest context.
- */
- }
- } else {
- KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
- "with hostcpu %d", vcpu->state, hostcpu));
- if (vcpu->state == VCPU_SLEEPING)
- wakeup_one(vcpu);
- }
-}
-
-void
-vcpu_notify_event(struct vcpu *vcpu)
-{
- vcpu_lock(vcpu);
- vcpu_notify_event_locked(vcpu);
- vcpu_unlock(vcpu);
-}
-
void
vcpu_notify_lapic(struct vcpu *vcpu)
{
diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c
index 63bdee69bb59..44bae5da31e5 100644
--- a/sys/amd64/vmm/vmm_lapic.c
+++ b/sys/amd64/vmm/vmm_lapic.c
@@ -34,6 +34,7 @@
#include <x86/apicreg.h>
#include <dev/vmm/vmm_ktr.h>
+#include <dev/vmm/vmm_vm.h>
#include <machine/vmm.h>
#include "vmm_lapic.h"
diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c
index 2e2224595ab4..f32107124eb8 100644
--- a/sys/amd64/vmm/x86.c
+++ b/sys/amd64/vmm/x86.c
@@ -39,6 +39,7 @@
#include <machine/vmm.h>
#include <dev/vmm/vmm_ktr.h>
+#include <dev/vmm/vmm_vm.h>
#include "vmm_host.h"
#include "vmm_util.h"
diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h
index 16292dd42f28..14b4d1be10a3 100644
--- a/sys/arm64/include/vmm.h
+++ b/sys/arm64/include/vmm.h
@@ -181,24 +181,11 @@ DECLARE_VMMOPS_FUNC(int, restore_tsc, (void *vcpui, uint64_t now));
#endif
#endif
-int vm_create(const char *name, struct vm **retvm);
-struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
-void vm_disable_vcpu_creation(struct vm *vm);
-void vm_lock_vcpus(struct vm *vm);
-void vm_unlock_vcpus(struct vm *vm);
-void vm_destroy(struct vm *vm);
-int vm_reinit(struct vm *vm);
const char *vm_name(struct vm *vm);
-uint16_t vm_get_maxcpus(struct vm *vm);
-void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
- uint16_t *threads, uint16_t *maxcpus);
-int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
- uint16_t threads, uint16_t maxcpus);
int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval);
int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val);
int vm_run(struct vcpu *vcpu);
-int vm_suspend(struct vm *vm, enum vm_suspend_how how);
void* vm_get_cookie(struct vm *vm);
int vcpu_vcpuid(struct vcpu *vcpu);
void *vcpu_get_cookie(struct vcpu *vcpu);
@@ -206,9 +193,6 @@ struct vm *vcpu_vm(struct vcpu *vcpu);
struct vcpu *vm_vcpu(struct vm *vm, int cpu);
int vm_get_capability(struct vcpu *vcpu, int type, int *val);
int vm_set_capability(struct vcpu *vcpu, int type, int val);
-int vm_activate_cpu(struct vcpu *vcpu);
-int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu);
-int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu);
int vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far);
int vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr);
int vm_assert_irq(struct vm *vm, uint32_t irq);
@@ -218,13 +202,8 @@ int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
struct vm_exit *vm_exitinfo(struct vcpu *vcpu);
void vm_exit_suspended(struct vcpu *vcpu, uint64_t pc);
void vm_exit_debug(struct vcpu *vcpu, uint64_t pc);
-void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t pc);
void vm_exit_astpending(struct vcpu *vcpu, uint64_t pc);
-cpuset_t vm_active_cpus(struct vm *vm);
-cpuset_t vm_debug_cpus(struct vm *vm);
-cpuset_t vm_suspended_cpus(struct vm *vm);
-
static __inline int
vcpu_rendezvous_pending(struct vm_eventinfo *info)
{
@@ -239,14 +218,9 @@ vcpu_suspended(struct vm_eventinfo *info)
return (*info->sptr);
}
-int vcpu_debugged(struct vcpu *vcpu);
-
void *vcpu_stats(struct vcpu *vcpu);
-void vcpu_notify_event(struct vcpu *vcpu);
struct vm_mem *vm_mem(struct vm *vm);
-enum vm_reg_name vm_segment_name(int seg_encoding);
-
struct vm_copyinfo {
uint64_t gpa;
size_t len;
diff --git a/sys/arm64/vmm/io/vgic_v3.c b/sys/arm64/vmm/io/vgic_v3.c
index e4f7bb2af3ee..22cd06b09d7d 100644
--- a/sys/arm64/vmm/io/vgic_v3.c
+++ b/sys/arm64/vmm/io/vgic_v3.c
@@ -69,6 +69,7 @@
#include <arm64/vmm/vmm_handlers.h>
#include <dev/vmm/vmm_dev.h>
+#include <dev/vmm/vmm_vm.h>
#include "vgic.h"
#include "vgic_v3.h"
diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c
index 75f7f2fcaaf7..92500aa9febc 100644
--- a/sys/arm64/vmm/vmm.c
+++ b/sys/arm64/vmm/vmm.c
@@ -40,7 +40,6 @@
#include <sys/rwlock.h>
#include <sys/sched.h>
#include <sys/smp.h>
-#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
@@ -74,20 +73,11 @@
#include "io/vgic.h"
#include "io/vtimer.h"
-static int vm_handle_wfi(struct vcpu *vcpu,
- struct vm_exit *vme, bool *retu);
-
static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
/* statistics */
static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
-SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
-
-static int vmm_ipinum;
-SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
- "IPI vector used for vcpu notifications");
-
struct vmm_regs {
uint64_t id_aa64afr0;
uint64_t id_aa64afr1;
@@ -142,8 +132,6 @@ static const struct vmm_regs vmm_arch_regs_masks = {
/* Host registers masked by vmm_arch_regs_masks. */
static struct vmm_regs vmm_arch_regs;
-static void vcpu_notify_event_locked(struct vcpu *vcpu);
-
/* global statistics */
VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception");
@@ -294,14 +282,6 @@ vm_init(struct vm *vm, bool create)
}
}
-void
-vm_disable_vcpu_creation(struct vm *vm)
-{
- sx_xlock(&vm->vcpus_init_lock);
- vm->dying = true;
- sx_xunlock(&vm->vcpus_init_lock);
-}
-
struct vcpu *
vm_alloc_vcpu(struct vm *vm, int vcpuid)
{
@@ -338,18 +318,6 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
return (vcpu);
}
-void
-vm_lock_vcpus(struct vm *vm)
-{
- sx_xlock(&vm->vcpus_init_lock);
-}
-
-void
-vm_unlock_vcpus(struct vm *vm)
-{
- sx_unlock(&vm->vcpus_init_lock);
-}
-
int
vm_create(const char *name, struct vm **retvm)
{
@@ -363,6 +331,7 @@ vm_create(const char *name, struct vm **retvm)
return (error);
}
strcpy(vm->name, name);
+ mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
sx_init(&vm->vcpus_init_lock, "vm vcpus");
vm->sockets = 1;
@@ -379,35 +348,6 @@ vm_create(const char *name, struct vm **retvm)
return (0);
}
-void
-vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
- uint16_t *threads, uint16_t *maxcpus)
-{
- *sockets = vm->sockets;
- *cores = vm->cores;
- *threads = vm->threads;
- *maxcpus = vm->maxcpus;
-}
-
-uint16_t
-vm_get_maxcpus(struct vm *vm)
-{
- return (vm->maxcpus);
-}
-
-int
-vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
- uint16_t threads, uint16_t maxcpus)
-{
- /* Ignore maxcpus. */
- if ((sockets * cores * threads) > vm->maxcpus)
- return (EINVAL);
- vm->sockets = sockets;
- vm->cores = cores;
- vm->threads = threads;
- return(0);
-}
-
static void
vm_cleanup(struct vm *vm, bool destroy)
{
@@ -452,23 +392,11 @@ vm_destroy(struct vm *vm)
free(vm, M_VMM);
}
-int
-vm_reinit(struct vm *vm)
+void
+vm_reset(struct vm *vm)
{
- int error;
-
- /*
- * A virtual machine can be reset only if all vcpus are suspended.
- */
- if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
- vm_cleanup(vm, false);
- vm_init(vm, false);
- error = 0;
- } else {
- error = EBUSY;
- }
-
- return (error);
+ vm_cleanup(vm, false);
+ vm_init(vm, false);
}
const char *
@@ -765,33 +693,6 @@ out_user:
return (0);
}
-int
-vm_suspend(struct vm *vm, enum vm_suspend_how how)
-{
- int i;
-
- if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
- return (EINVAL);
-
- if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
- VM_CTR2(vm, "virtual machine already suspended %d/%d",
- vm->suspend, how);
- return (EALREADY);
- }
-
- VM_CTR1(vm, "virtual machine successfully suspended %d", how);
-
- /*
- * Notify all active vcpus that they are now suspended.
- */
- for (i = 0; i < vm->maxcpus; i++) {
- if (CPU_ISSET(i, &vm->active_cpus))
- vcpu_notify_event(vm_vcpu(vm, i));
- }
-
- return (0);
-}
-
void
vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
{
@@ -819,82 +720,6 @@ vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
vmexit->exitcode = VM_EXITCODE_DEBUG;
}
-int
-vm_activate_cpu(struct vcpu *vcpu)
-{
- struct vm *vm = vcpu->vm;
-
- if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
- return (EBUSY);
-
- CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
- return (0);
-
-}
-
-int
-vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
-{
- if (vcpu == NULL) {
- vm->debug_cpus = vm->active_cpus;
- for (int i = 0; i < vm->maxcpus; i++) {
- if (CPU_ISSET(i, &vm->active_cpus))
- vcpu_notify_event(vm_vcpu(vm, i));
- }
- } else {
- if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
- return (EINVAL);
-
- CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
- vcpu_notify_event(vcpu);
- }
- return (0);
-}
-
-int
-vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
-{
-
- if (vcpu == NULL) {
- CPU_ZERO(&vm->debug_cpus);
- } else {
- if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
- return (EINVAL);
-
- CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
- }
- return (0);
-}
-
-int
-vcpu_debugged(struct vcpu *vcpu)
-{
-
- return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
-}
-
-cpuset_t
-vm_active_cpus(struct vm *vm)
-{
-
- return (vm->active_cpus);
-}
-
-cpuset_t
-vm_debug_cpus(struct vm *vm)
-{
-
- return (vm->debug_cpus);
-}
-
-cpuset_t
-vm_suspended_cpus(struct vm *vm)
-{
-
- return (vm->suspended_cpus);
-}
-
-
void *
vcpu_stats(struct vcpu *vcpu)
{
@@ -902,47 +727,6 @@ vcpu_stats(struct vcpu *vcpu)
return (vcpu->stats);
}
-/*
- * This function is called to ensure that a vcpu "sees" a pending event
- * as soon as possible:
- * - If the vcpu thread is sleeping then it is woken up.
- * - If the vcpu is running on a different host_cpu then an IPI will be directed
- * to the host_cpu to cause the vcpu to trap into the hypervisor.
- */
-static void
-vcpu_notify_event_locked(struct vcpu *vcpu)
-{
- int hostcpu;
-
- hostcpu = vcpu->hostcpu;
- if (vcpu->state == VCPU_RUNNING) {
- KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
- if (hostcpu != curcpu) {
- ipi_cpu(hostcpu, vmm_ipinum);
- } else {
- /*
- * If the 'vcpu' is running on 'curcpu' then it must
- * be sending a notification to itself (e.g. SELF_IPI).
- * The pending event will be picked up when the vcpu
- * transitions back to guest context.
- */
- }
- } else {
- KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
- "with hostcpu %d", vcpu->state, hostcpu));
- if (vcpu->state == VCPU_SLEEPING)
- wakeup_one(vcpu);
- }
-}
-
-void
-vcpu_notify_event(struct vcpu *vcpu)
-{
- vcpu_lock(vcpu);
- vcpu_notify_event_locked(vcpu);
- vcpu_unlock(vcpu);
-}
-
struct vm_mem *
vm_mem(struct vm *vm)
{
@@ -984,71 +768,6 @@ save_guest_fpustate(struct vcpu *vcpu)
KASSERT(PCPU_GET(fpcurthread) == NULL,
("%s: fpcurthread set with guest registers", __func__));
}
-static int
-vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
- bool from_idle)
-{
- int error;
-
- vcpu_assert_locked(vcpu);
-
- /*
- * State transitions from the vmmdev_ioctl() must always begin from
- * the VCPU_IDLE state. This guarantees that there is only a single
- * ioctl() operating on a vcpu at any point.
- */
- if (from_idle) {
- while (vcpu->state != VCPU_IDLE) {
- vcpu_notify_event_locked(vcpu);
- msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
- }
- } else {
- KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
- "vcpu idle state"));
- }
-
- if (vcpu->state == VCPU_RUNNING) {
- KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
- "mismatch for running vcpu", curcpu, vcpu->hostcpu));
- } else {
- KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
- "vcpu that is not running", vcpu->hostcpu));
- }
-
- /*
- * The following state transitions are allowed:
- * IDLE -> FROZEN -> IDLE
- * FROZEN -> RUNNING -> FROZEN
- * FROZEN -> SLEEPING -> FROZEN
- */
- switch (vcpu->state) {
- case VCPU_IDLE:
- case VCPU_RUNNING:
- case VCPU_SLEEPING:
- error = (newstate != VCPU_FROZEN);
- break;
- case VCPU_FROZEN:
- error = (newstate == VCPU_FROZEN);
- break;
- default:
- error = 1;
- break;
- }
-
- if (error)
- return (EBUSY);
-
- vcpu->state = newstate;
- if (newstate == VCPU_RUNNING)
- vcpu->hostcpu = curcpu;
- else
- vcpu->hostcpu = NOCPU;
-
- if (newstate == VCPU_IDLE)
- wakeup(&vcpu->state);
-
- return (0);
-}
static void
vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
@@ -1111,32 +830,6 @@ vm_vcpu(struct vm *vm, int vcpuid)
}
int
-vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
-{
- int error;
-
- vcpu_lock(vcpu);
- error = vcpu_set_state_locked(vcpu, newstate, from_idle);
- vcpu_unlock(vcpu);
-
- return (error);
-}
-
-enum vcpu_state
-vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
-{
- enum vcpu_state state;
-
- vcpu_lock(vcpu);
- state = vcpu->state;
- if (hostcpu != NULL)
- *hostcpu = vcpu->hostcpu;
- vcpu_unlock(vcpu);
-
- return (state);
-}
-
-int
vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
{
if (reg < 0 || reg >= VM_REG_LAST)
diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
index eda15cc88b28..a5322f05d28f 100644
--- a/sys/dev/vmm/vmm_dev.c
+++ b/sys/dev/vmm/vmm_dev.c
@@ -141,38 +141,6 @@ vcpu_unlock_one(struct vcpu *vcpu)
vcpu_set_state(vcpu, VCPU_IDLE, false);
}
-#ifndef __amd64__
-static int
-vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
-{
- struct vcpu *vcpu;
- int error;
- uint16_t i, j, maxcpus;
-
- error = 0;
- maxcpus = vm_get_maxcpus(vm);
- for (i = 0; i < maxcpus; i++) {
- vcpu = vm_vcpu(vm, i);
- if (vcpu == NULL)
- continue;
- error = vcpu_lock_one(vcpu);
- if (error)
- break;
- }
-
- if (error) {
- for (j = 0; j < i; j++) {
- vcpu = vm_vcpu(vm, j);
- if (vcpu == NULL)
- continue;
- vcpu_unlock_one(vcpu);
- }
- }
-
- return (error);
-}
-#endif
-
static int
vcpu_lock_all(struct vmmdev_softc *sc)
{
diff --git a/sys/dev/vmm/vmm_vm.c b/sys/dev/vmm/vmm_vm.c
new file mode 100644
index 000000000000..7941038ed671
--- /dev/null
+++ b/sys/dev/vmm/vmm_vm.c
@@ -0,0 +1,476 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sx.h>
+#include <sys/sysctl.h>
+
+#include <machine/smp.h>
+
+#include <dev/vmm/vmm_vm.h>
+
+SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, NULL);
+
+int vmm_ipinum;
+SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
+ "IPI vector used for vcpu notifications");
+
+/*
+ * Invoke the rendezvous function on the specified vcpu if applicable. Return
+ * true if the rendezvous is finished, false otherwise.
+ */
+static bool
+vm_rendezvous(struct vcpu *vcpu)
+{
+ struct vm *vm = vcpu->vm;
+ int vcpuid;
+
+ mtx_assert(&vcpu->vm->rendezvous_mtx, MA_OWNED);
+ KASSERT(vcpu->vm->rendezvous_func != NULL,
+ ("vm_rendezvous: no rendezvous pending"));
+
+ /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
+ CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus,
+ &vm->active_cpus);
+
+ vcpuid = vcpu->vcpuid;
+ if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
+ !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
+ (*vm->rendezvous_func)(vcpu, vm->rendezvous_arg);
+ CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
+ }
+ if (CPU_CMP(&vm->rendezvous_req_cpus, &vm->rendezvous_done_cpus) == 0) {
+ CPU_ZERO(&vm->rendezvous_req_cpus);
+ vm->rendezvous_func = NULL;
+ wakeup(&vm->rendezvous_func);
+ return (true);
+ }
+ return (false);
+}
+
+int
+vm_handle_rendezvous(struct vcpu *vcpu)
+{
+ struct vm *vm;
+ struct thread *td;
+
+ td = curthread;
+ vm = vcpu->vm;
+
+ mtx_lock(&vm->rendezvous_mtx);
+ while (vm->rendezvous_func != NULL) {
+ if (vm_rendezvous(vcpu))
+ break;
+
+ mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
+ "vmrndv", hz);
+ if (td_ast_pending(td, TDA_SUSPEND)) {
+ int error;
+
+ mtx_unlock(&vm->rendezvous_mtx);
+ error = thread_check_susp(td, true);
+ if (error != 0)
+ return (error);
+ mtx_lock(&vm->rendezvous_mtx);
+ }
+ }
+ mtx_unlock(&vm->rendezvous_mtx);
+ return (0);
+}
+
+static void
+vcpu_wait_idle(struct vcpu *vcpu)
+{
+ KASSERT(vcpu->state != VCPU_IDLE, ("vcpu already idle"));
+
+ vcpu->reqidle = 1;
+ vcpu_notify_event_locked(vcpu);
+ msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
+}
+
+int
+vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
+ bool from_idle)
+{
+ int error;
+
+ vcpu_assert_locked(vcpu);
+
+ /*
+ * State transitions from the vmmdev_ioctl() must always begin from
+ * the VCPU_IDLE state. This guarantees that there is only a single
+ * ioctl() operating on a vcpu at any point.
+ */
+ if (from_idle) {
+ while (vcpu->state != VCPU_IDLE)
+ vcpu_wait_idle(vcpu);
+ } else {
+ KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
+ "vcpu idle state"));
+ }
+
+ if (vcpu->state == VCPU_RUNNING) {
+ KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
+ "mismatch for running vcpu", curcpu, vcpu->hostcpu));
+ } else {
+ KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
+ "vcpu that is not running", vcpu->hostcpu));
+ }
+
+ /*
+ * The following state transitions are allowed:
+ * IDLE -> FROZEN -> IDLE
+ * FROZEN -> RUNNING -> FROZEN
+ * FROZEN -> SLEEPING -> FROZEN
+ */
+ switch (vcpu->state) {
+ case VCPU_IDLE:
+ case VCPU_RUNNING:
+ case VCPU_SLEEPING:
+ error = (newstate != VCPU_FROZEN);
+ break;
+ case VCPU_FROZEN:
+ error = (newstate == VCPU_FROZEN);
+ break;
+ default:
+ error = 1;
+ break;
+ }
+
+ if (error)
+ return (EBUSY);
+
+ vcpu->state = newstate;
+ if (newstate == VCPU_RUNNING)
+ vcpu->hostcpu = curcpu;
+ else
+ vcpu->hostcpu = NOCPU;
+
+ if (newstate == VCPU_IDLE)
+ wakeup(&vcpu->state);
+
+ return (0);
+}
+
+/*
+ * Try to lock all of the vCPUs in the VM while taking care to avoid deadlocks
+ * with vm_smp_rendezvous().
+ *
+ * The complexity here suggests that the rendezvous mechanism needs a rethink.
+ */
+int
+vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
+{
+ cpuset_t locked;
+ struct vcpu *vcpu;
+ int error, i;
+ uint16_t maxcpus;
+
+ KASSERT(newstate != VCPU_IDLE,
+ ("vcpu_set_state_all: invalid target state %d", newstate));
+
+ error = 0;
+ CPU_ZERO(&locked);
+ maxcpus = vm->maxcpus;
+
+ mtx_lock(&vm->rendezvous_mtx);
+restart:
+ if (vm->rendezvous_func != NULL) {
+ /*
+ * If we have a pending rendezvous, then the initiator may be
+ * blocked waiting for other vCPUs to execute the callback. The
+ * current thread may be a vCPU thread so we must not block
+ * waiting for the initiator, otherwise we get a deadlock.
+ * Thus, execute the callback on behalf of any idle vCPUs.
+ */
+ for (i = 0; i < maxcpus; i++) {
+ vcpu = vm_vcpu(vm, i);
+ if (vcpu == NULL)
+ continue;
+ vcpu_lock(vcpu);
+ if (vcpu->state == VCPU_IDLE) {
+ (void)vcpu_set_state_locked(vcpu, VCPU_FROZEN,
+ true);
+ CPU_SET(i, &locked);
+ }
+ if (CPU_ISSET(i, &locked)) {
+ /*
+ * We can safely execute the callback on this
+ * vCPU's behalf.
+ */
+ vcpu_unlock(vcpu);
+ (void)vm_rendezvous(vcpu);
+ vcpu_lock(vcpu);
+ }
+ vcpu_unlock(vcpu);
+ }
+ }
+
+ /*
+ * Now wait for remaining vCPUs to become idle. This may include the
+ * initiator of a rendezvous that is currently blocked on the rendezvous
+ * mutex.
+ */
+ CPU_FOREACH_ISCLR(i, &locked) {
+ if (i >= maxcpus)
+ break;
+ vcpu = vm_vcpu(vm, i);
+ if (vcpu == NULL)
+ continue;
+ vcpu_lock(vcpu);
+ while (vcpu->state != VCPU_IDLE) {
+ mtx_unlock(&vm->rendezvous_mtx);
+ vcpu_wait_idle(vcpu);
+ vcpu_unlock(vcpu);
+ mtx_lock(&vm->rendezvous_mtx);
+ if (vm->rendezvous_func != NULL)
+ goto restart;
+ vcpu_lock(vcpu);
+ }
+ error = vcpu_set_state_locked(vcpu, newstate, true);
+ vcpu_unlock(vcpu);
+ if (error != 0) {
+ /* Roll back state changes. */
+ CPU_FOREACH_ISSET(i, &locked)
+ (void)vcpu_set_state(vcpu, VCPU_IDLE, false);
+ break;
+ }
+ CPU_SET(i, &locked);
+ }
+ mtx_unlock(&vm->rendezvous_mtx);
+ return (error);
+}
+
+
+int
+vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
+{
+ int error;
+
+ vcpu_lock(vcpu);
+ error = vcpu_set_state_locked(vcpu, newstate, from_idle);
+ vcpu_unlock(vcpu);
+
+ return (error);
+}
+
+enum vcpu_state
+vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
+{
+ enum vcpu_state state;
+
+ vcpu_lock(vcpu);
+ state = vcpu->state;
+ if (hostcpu != NULL)
+ *hostcpu = vcpu->hostcpu;
+ vcpu_unlock(vcpu);
+
+ return (state);
+}
+
+/*
+ * This function is called to ensure that a vcpu "sees" a pending event
+ * as soon as possible:
+ * - If the vcpu thread is sleeping then it is woken up.
+ * - If the vcpu is running on a different host_cpu then an IPI will be directed
+ * to the host_cpu to cause the vcpu to trap into the hypervisor.
+ */
+void
+vcpu_notify_event_locked(struct vcpu *vcpu)
+{
+ int hostcpu;
+
+ hostcpu = vcpu->hostcpu;
+ if (vcpu->state == VCPU_RUNNING) {
+ KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
+ if (hostcpu != curcpu) {
+ ipi_cpu(hostcpu, vmm_ipinum);
+ } else {
+ /*
+ * If the 'vcpu' is running on 'curcpu' then it must
+ * be sending a notification to itself (e.g. SELF_IPI).
+ * The pending event will be picked up when the vcpu
+ * transitions back to guest context.
+ */
+ }
+ } else {
+ KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
+ "with hostcpu %d", vcpu->state, hostcpu));
+ if (vcpu->state == VCPU_SLEEPING)
+ wakeup_one(vcpu);
+ }
+}
+
+void
+vcpu_notify_event(struct vcpu *vcpu)
+{
+ vcpu_lock(vcpu);
+ vcpu_notify_event_locked(vcpu);
+ vcpu_unlock(vcpu);
+}
+
+int
+vcpu_debugged(struct vcpu *vcpu)
+{
+ return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
+}
+
+void
+vm_lock_vcpus(struct vm *vm)
+{
+ sx_xlock(&vm->vcpus_init_lock);
+}
+
+void
+vm_unlock_vcpus(struct vm *vm)
+{
+ sx_unlock(&vm->vcpus_init_lock);
+}
+
+void
+vm_disable_vcpu_creation(struct vm *vm)
+{
+ sx_xlock(&vm->vcpus_init_lock);
+ vm->dying = true;
+ sx_xunlock(&vm->vcpus_init_lock);
+}
+
+uint16_t
+vm_get_maxcpus(struct vm *vm)
+{
+ return (vm->maxcpus);
+}
+
+void
+vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+ uint16_t *threads, uint16_t *maxcpus)
+{
+ *sockets = vm->sockets;
+ *cores = vm->cores;
+ *threads = vm->threads;
+ *maxcpus = vm->maxcpus;
+}
+
+int
+vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+ uint16_t threads, uint16_t maxcpus __unused)
+{
+ /* Ignore maxcpus. */
+ if (sockets * cores * threads > vm->maxcpus)
+ return (EINVAL);
+ vm->sockets = sockets;
+ vm->cores = cores;
+ vm->threads = threads;
+ return (0);
+}
+
+int
+vm_suspend(struct vm *vm, enum vm_suspend_how how)
+{
+ int i;
+
+ if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
+ return (EINVAL);
+
+ if (atomic_cmpset_int(&vm->suspend, 0, how) == 0)
+ return (EALREADY);
+
+ /*
+ * Notify all active vcpus that they are now suspended.
+ */
+ for (i = 0; i < vm->maxcpus; i++) {
+ if (CPU_ISSET(i, &vm->active_cpus))
+ vcpu_notify_event(vm_vcpu(vm, i));
+ }
+
+ return (0);
+}
+
+int
+vm_reinit(struct vm *vm)
+{
+ int error;
+
+ /*
+ * A virtual machine can be reset only if all vcpus are suspended.
+ */
+ if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
+ vm_reset(vm);
+ error = 0;
+ } else {
+ error = EBUSY;
+ }
+
+ return (error);
+}
+
+int
+vm_activate_cpu(struct vcpu *vcpu)
+{
+ struct vm *vm = vcpu->vm;
+
+ if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
+ return (EBUSY);
+
+ CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
+ return (0);
+}
+
+int
+vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
+{
+ if (vcpu == NULL) {
+ vm->debug_cpus = vm->active_cpus;
+ for (int i = 0; i < vm->maxcpus; i++) {
+ if (CPU_ISSET(i, &vm->active_cpus))
+ vcpu_notify_event(vm_vcpu(vm, i));
+ }
+ } else {
+ if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
+ return (EINVAL);
+
+ CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
+ vcpu_notify_event(vcpu);
+ }
+ return (0);
+}
+
+int
+vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
+{
+ if (vcpu == NULL) {
+ CPU_ZERO(&vm->debug_cpus);
+ } else {
+ if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
+ return (EINVAL);
+
+ CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
+ }
+ return (0);
+}
+
+cpuset_t
+vm_active_cpus(struct vm *vm)
+{
+ return (vm->active_cpus);
+}
+
+cpuset_t
+vm_debug_cpus(struct vm *vm)
+{
+ return (vm->debug_cpus);
+}
+
+cpuset_t
+vm_suspended_cpus(struct vm *vm)
+{
+ return (vm->suspended_cpus);
+}
diff --git a/sys/dev/vmm/vmm_vm.h b/sys/dev/vmm/vmm_vm.h
index 23f40e079da4..053eeb11d843 100644
--- a/sys/dev/vmm/vmm_vm.h
+++ b/sys/dev/vmm/vmm_vm.h
@@ -9,6 +9,7 @@
#define _DEV_VMM_VM_H_
#ifdef _KERNEL
+#include <sys/_cpuset.h>
#include <machine/vmm.h>
@@ -50,11 +51,16 @@ struct vcpu {
#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
+extern int vmm_ipinum;
+
int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle);
-#ifdef __amd64__
+int vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
+ bool from_idle);
int vcpu_set_state_all(struct vm *vm, enum vcpu_state state);
-#endif
enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu);
+void vcpu_notify_event(struct vcpu *vcpu);
+void vcpu_notify_event_locked(struct vcpu *vcpu);
+int vcpu_debugged(struct vcpu *vcpu);
static int __inline
vcpu_is_running(struct vcpu *vcpu, int *hostcpu)
@@ -74,6 +80,21 @@ vcpu_should_yield(struct vcpu *vcpu)
#endif
typedef void (*vm_rendezvous_func_t)(struct vcpu *vcpu, void *arg);
+int vm_handle_rendezvous(struct vcpu *vcpu);
+
+/*
+ * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
+ * The rendezvous 'func(arg)' is not allowed to do anything that will
+ * cause the thread to be put to sleep.
+ *
+ * The caller cannot hold any locks when initiating the rendezvous.
+ *
+ * The implementation of this API may cause vcpus other than those specified
+ * by 'dest' to be stalled. The caller should not rely on any vcpus making
+ * forward progress when the rendezvous is in progress.
+ */
+int vm_smp_rendezvous(struct vcpu *vcpu, cpuset_t dest,
+ vm_rendezvous_func_t func, void *arg);
/*
* Initialization:
@@ -116,6 +137,30 @@ struct vm {
VMM_VM_MD_FIELDS;
};
+int vm_create(const char *name, struct vm **retvm);
+struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
+void vm_destroy(struct vm *vm);
+int vm_reinit(struct vm *vm);
+void vm_reset(struct vm *vm);
+
+void vm_lock_vcpus(struct vm *vm);
+void vm_unlock_vcpus(struct vm *vm);
+void vm_disable_vcpu_creation(struct vm *vm);
+
+int vm_suspend(struct vm *vm, enum vm_suspend_how how);
+int vm_activate_cpu(struct vcpu *vcpu);
+int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu);
+int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu);
+
+cpuset_t vm_active_cpus(struct vm *vm);
+cpuset_t vm_debug_cpus(struct vm *vm);
+cpuset_t vm_suspended_cpus(struct vm *vm);
+
+uint16_t vm_get_maxcpus(struct vm *vm);
+void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+ uint16_t *threads, uint16_t *maxcpus);
+int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+ uint16_t threads, uint16_t maxcpus);
#endif /* _KERNEL */
#endif /* !_DEV_VMM_VM_H_ */
diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
index 066b4d814348..dcb401d2026d 100644
--- a/sys/modules/vmm/Makefile
+++ b/sys/modules/vmm/Makefile
@@ -18,7 +18,8 @@ SRCS+= vmm.c \
vmm_dev_machdep.c \
vmm_instruction_emul.c \
vmm_mem.c \
- vmm_stat.c
+ vmm_stat.c \
+ vmm_vm.c
.if ${MACHINE_CPUARCH} == "aarch64"
CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm/io
diff --git a/sys/riscv/include/vmm.h b/sys/riscv/include/vmm.h
index a7318294464c..3f321a1a285a 100644
--- a/sys/riscv/include/vmm.h
+++ b/sys/riscv/include/vmm.h
@@ -160,34 +160,18 @@ DECLARE_VMMOPS_FUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min,
vm_offset_t max));
DECLARE_VMMOPS_FUNC(void, vmspace_free, (struct vmspace *vmspace));
-int vm_create(const char *name, struct vm **retvm);
-struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
-void vm_disable_vcpu_creation(struct vm *vm);
-void vm_lock_vcpus(struct vm *vm);
-void vm_unlock_vcpus(struct vm *vm);
-void vm_destroy(struct vm *vm);
-int vm_reinit(struct vm *vm);
const char *vm_name(struct vm *vm);
-uint16_t vm_get_maxcpus(struct vm *vm);
-void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
- uint16_t *threads, uint16_t *maxcpus);
-int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
- uint16_t threads, uint16_t maxcpus);
int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval);
int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val);
int vm_run(struct vcpu *vcpu);
-int vm_suspend(struct vm *vm, enum vm_suspend_how how);
-void* vm_get_cookie(struct vm *vm);
+void *vm_get_cookie(struct vm *vm);
int vcpu_vcpuid(struct vcpu *vcpu);
void *vcpu_get_cookie(struct vcpu *vcpu);
struct vm *vcpu_vm(struct vcpu *vcpu);
struct vcpu *vm_vcpu(struct vm *vm, int cpu);
int vm_get_capability(struct vcpu *vcpu, int type, int *val);
int vm_set_capability(struct vcpu *vcpu, int type, int val);
-int vm_activate_cpu(struct vcpu *vcpu);
-int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu);
-int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu);
int vm_inject_exception(struct vcpu *vcpu, uint64_t scause);
int vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr);
int vm_assert_irq(struct vm *vm, uint32_t irq);
@@ -197,13 +181,8 @@ int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
struct vm_exit *vm_exitinfo(struct vcpu *vcpu);
void vm_exit_suspended(struct vcpu *vcpu, uint64_t pc);
void vm_exit_debug(struct vcpu *vcpu, uint64_t pc);
-void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t pc);
void vm_exit_astpending(struct vcpu *vcpu, uint64_t pc);
-cpuset_t vm_active_cpus(struct vm *vm);
-cpuset_t vm_debug_cpus(struct vm *vm);
-cpuset_t vm_suspended_cpus(struct vm *vm);
-
static __inline int
vcpu_rendezvous_pending(struct vm_eventinfo *info)
{
@@ -218,14 +197,9 @@ vcpu_suspended(struct vm_eventinfo *info)
return (*info->sptr);
}
-int vcpu_debugged(struct vcpu *vcpu);
-
void *vcpu_stats(struct vcpu *vcpu);
-void vcpu_notify_event(struct vcpu *vcpu);
struct vm_mem *vm_mem(struct vm *vm);
-enum vm_reg_name vm_segment_name(int seg_encoding);
-
#endif /* _KERNEL */
#define VM_DIR_READ 0
diff --git a/sys/riscv/vmm/vmm.c b/sys/riscv/vmm/vmm.c
index fea6421962dc..b3ba626962f3 100644
--- a/sys/riscv/vmm/vmm.c
+++ b/sys/riscv/vmm/vmm.c
@@ -45,7 +45,6 @@
#include <sys/rwlock.h>
#include <sys/sched.h>
#include <sys/smp.h>
-#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
@@ -83,14 +82,6 @@ static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
/* statistics */
static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
-SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
-
-static int vmm_ipinum;
-SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
- "IPI vector used for vcpu notifications");
-
-static void vcpu_notify_event_locked(struct vcpu *vcpu);
-
/* global statistics */
VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq");
@@ -179,14 +170,6 @@ vm_init(struct vm *vm, bool create)
}
}
-void
-vm_disable_vcpu_creation(struct vm *vm)
-{
- sx_xlock(&vm->vcpus_init_lock);
- vm->dying = true;
- sx_xunlock(&vm->vcpus_init_lock);
-}
-
struct vcpu *
vm_alloc_vcpu(struct vm *vm, int vcpuid)
{
@@ -217,18 +200,6 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
return (vcpu);
}
-void
-vm_lock_vcpus(struct vm *vm)
-{
- sx_xlock(&vm->vcpus_init_lock);
-}
-
-void
-vm_unlock_vcpus(struct vm *vm)
-{
- sx_unlock(&vm->vcpus_init_lock);
-}
-
int
vm_create(const char *name, struct vm **retvm)
{
@@ -242,6 +213,7 @@ vm_create(const char *name, struct vm **retvm)
return (error);
}
strcpy(vm->name, name);
+ mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
sx_init(&vm->vcpus_init_lock, "vm vcpus");
vm->sockets = 1;
@@ -258,35 +230,6 @@ vm_create(const char *name, struct vm **retvm)
return (0);
}
-void
-vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
- uint16_t *threads, uint16_t *maxcpus)
-{
- *sockets = vm->sockets;
- *cores = vm->cores;
- *threads = vm->threads;
- *maxcpus = vm->maxcpus;
-}
-
-uint16_t
-vm_get_maxcpus(struct vm *vm)
-{
- return (vm->maxcpus);
-}
-
-int
-vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
- uint16_t threads, uint16_t maxcpus)
-{
- /* Ignore maxcpus. */
- if ((sockets * cores * threads) > vm->maxcpus)
- return (EINVAL);
- vm->sockets = sockets;
- vm->cores = cores;
- vm->threads = threads;
- return(0);
-}
-
static void
vm_cleanup(struct vm *vm, bool destroy)
{
@@ -318,29 +261,15 @@ vm_cleanup(struct vm *vm, bool destroy)
void
vm_destroy(struct vm *vm)
{
-
vm_cleanup(vm, true);
-
free(vm, M_VMM);
}
-int
-vm_reinit(struct vm *vm)
+void
+vm_reset(struct vm *vm)
{
- int error;
-
- /*
- * A virtual machine can be reset only if all vcpus are suspended.
- */
- if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
- vm_cleanup(vm, false);
- vm_init(vm, false);
- error = 0;
- } else {
- error = EBUSY;
- }
-
- return (error);
+ vm_cleanup(vm, false);
+ vm_init(vm, false);
}
const char *
@@ -437,33 +366,6 @@ out_user:
return (0);
}
-int
-vm_suspend(struct vm *vm, enum vm_suspend_how how)
-{
- int i;
-
- if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
- return (EINVAL);
-
- if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
- VM_CTR2(vm, "virtual machine already suspended %d/%d",
- vm->suspend, how);
- return (EALREADY);
- }
-
- VM_CTR1(vm, "virtual machine successfully suspended %d", how);
-
- /*
- * Notify all active vcpus that they are now suspended.
- */
- for (i = 0; i < vm->maxcpus; i++) {
- if (CPU_ISSET(i, &vm->active_cpus))
- vcpu_notify_event(vm_vcpu(vm, i));
- }
-
- return (0);
-}
-
void
vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
{
@@ -491,82 +393,6 @@ vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
vmexit->exitcode = VM_EXITCODE_DEBUG;
}
-int
-vm_activate_cpu(struct vcpu *vcpu)
-{
- struct vm *vm = vcpu->vm;
-
- if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
- return (EBUSY);
-
- CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
- return (0);
-
-}
-
-int
-vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
-{
- if (vcpu == NULL) {
- vm->debug_cpus = vm->active_cpus;
- for (int i = 0; i < vm->maxcpus; i++) {
- if (CPU_ISSET(i, &vm->active_cpus))
- vcpu_notify_event(vm_vcpu(vm, i));
- }
- } else {
- if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
- return (EINVAL);
-
- CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
- vcpu_notify_event(vcpu);
- }
- return (0);
-}
-
-int
-vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
-{
-
- if (vcpu == NULL) {
- CPU_ZERO(&vm->debug_cpus);
- } else {
- if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
- return (EINVAL);
-
- CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
- }
- return (0);
-}
-
-int
-vcpu_debugged(struct vcpu *vcpu)
-{
-
- return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
-}
-
-cpuset_t
-vm_active_cpus(struct vm *vm)
-{
-
- return (vm->active_cpus);
-}
-
-cpuset_t
-vm_debug_cpus(struct vm *vm)
-{
-
- return (vm->debug_cpus);
-}
-
-cpuset_t
-vm_suspended_cpus(struct vm *vm)
-{
-
- return (vm->suspended_cpus);
-}
-
-
void *
vcpu_stats(struct vcpu *vcpu)
{
@@ -574,47 +400,6 @@ vcpu_stats(struct vcpu *vcpu)
return (vcpu->stats);
}
-/*
- * This function is called to ensure that a vcpu "sees" a pending event
- * as soon as possible:
- * - If the vcpu thread is sleeping then it is woken up.
- * - If the vcpu is running on a different host_cpu then an IPI will be directed
- * to the host_cpu to cause the vcpu to trap into the hypervisor.
- */
-static void
-vcpu_notify_event_locked(struct vcpu *vcpu)
-{
- int hostcpu;
-
- hostcpu = vcpu->hostcpu;
- if (vcpu->state == VCPU_RUNNING) {
- KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
- if (hostcpu != curcpu) {
- ipi_cpu(hostcpu, vmm_ipinum);
- } else {
- /*
- * If the 'vcpu' is running on 'curcpu' then it must
- * be sending a notification to itself (e.g. SELF_IPI).
- * The pending event will be picked up when the vcpu
- * transitions back to guest context.
- */
- }
- } else {
- KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
- "with hostcpu %d", vcpu->state, hostcpu));
- if (vcpu->state == VCPU_SLEEPING)
- wakeup_one(vcpu);
- }
-}
-
-void
-vcpu_notify_event(struct vcpu *vcpu)
-{
- vcpu_lock(vcpu);
- vcpu_notify_event_locked(vcpu);
- vcpu_unlock(vcpu);
-}
-
struct vm_mem *
vm_mem(struct vm *vm)
{
@@ -655,72 +440,6 @@ save_guest_fpustate(struct vcpu *vcpu)
("%s: fpcurthread set with guest registers", __func__));
}
-static int
-vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
- bool from_idle)
-{
- int error;
-
- vcpu_assert_locked(vcpu);
-
- /*
- * State transitions from the vmmdev_ioctl() must always begin from
- * the VCPU_IDLE state. This guarantees that there is only a single
- * ioctl() operating on a vcpu at any point.
- */
- if (from_idle) {
- while (vcpu->state != VCPU_IDLE) {
- vcpu_notify_event_locked(vcpu);
- msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
- }
- } else {
- KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
- "vcpu idle state"));
- }
-
- if (vcpu->state == VCPU_RUNNING) {
- KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
- "mismatch for running vcpu", curcpu, vcpu->hostcpu));
- } else {
- KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
- "vcpu that is not running", vcpu->hostcpu));
- }
-
- /*
- * The following state transitions are allowed:
- * IDLE -> FROZEN -> IDLE
- * FROZEN -> RUNNING -> FROZEN
- * FROZEN -> SLEEPING -> FROZEN
- */
- switch (vcpu->state) {
- case VCPU_IDLE:
- case VCPU_RUNNING:
- case VCPU_SLEEPING:
- error = (newstate != VCPU_FROZEN);
- break;
- case VCPU_FROZEN:
- error = (newstate == VCPU_FROZEN);
- break;
- default:
- error = 1;
- break;
- }
-
- if (error)
- return (EBUSY);
-
- vcpu->state = newstate;
- if (newstate == VCPU_RUNNING)
- vcpu->hostcpu = curcpu;
- else
- vcpu->hostcpu = NOCPU;
-
- if (newstate == VCPU_IDLE)
- wakeup(&vcpu->state);
-
- return (0);
-}
-
static void
vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
{
@@ -788,32 +507,6 @@ vm_vcpu(struct vm *vm, int vcpuid)
}
int
-vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
-{
- int error;
-
- vcpu_lock(vcpu);
- error = vcpu_set_state_locked(vcpu, newstate, from_idle);
- vcpu_unlock(vcpu);
-
- return (error);
-}
-
-enum vcpu_state
-vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
-{
- enum vcpu_state state;
-
- vcpu_lock(vcpu);
- state = vcpu->state;
- if (hostcpu != NULL)
- *hostcpu = vcpu->hostcpu;
- vcpu_unlock(vcpu);
-
- return (state);
-}
-
-int
vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
{
if (reg < 0 || reg >= VM_REG_LAST)
diff --git a/sys/riscv/vmm/vmm_aplic.c b/sys/riscv/vmm/vmm_aplic.c
index eaf2caa4d313..7c1cd260d352 100644
--- a/sys/riscv/vmm/vmm_aplic.c
+++ b/sys/riscv/vmm/vmm_aplic.c
@@ -48,6 +48,7 @@
#include <machine/vmm_instruction_emul.h>
#include <dev/vmm/vmm_dev.h>
+#include <dev/vmm/vmm_vm.h>
MALLOC_DEFINE(M_APLIC, "RISC-V VMM APLIC", "RISC-V AIA APLIC");
diff --git a/sys/riscv/vmm/vmm_sbi.c b/sys/riscv/vmm/vmm_sbi.c
index 426276444357..c1e6022097e3 100644
--- a/sys/riscv/vmm/vmm_sbi.c
+++ b/sys/riscv/vmm/vmm_sbi.c
@@ -36,6 +36,8 @@
#include <machine/sbi.h>
+#include <dev/vmm/vmm_vm.h>
+
#include "riscv.h"
#include "vmm_fence.h"
diff --git a/sys/riscv/vmm/vmm_vtimer.c b/sys/riscv/vmm/vmm_vtimer.c
index 0dadc962114f..cb2ca878116c 100644
--- a/sys/riscv/vmm/vmm_vtimer.c
+++ b/sys/riscv/vmm/vmm_vtimer.c
@@ -39,6 +39,8 @@
#include <dev/ofw/ofw_bus_subr.h>
#include <dev/ofw/openfirm.h>
+#include <dev/vmm/vmm_vm.h>
+
#include "riscv.h"
#define VTIMER_DEFAULT_FREQ 1000000