aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCorvin Köhne <CorvinK@beckhoff.com>2022-09-07 07:07:03 +0000
committerEmmanuel Vadot <manu@FreeBSD.org>2022-10-14 10:03:05 +0000
commit0bda8d3e9f7a5c04881219723436616b23041e5f (patch)
tree002165f18a43151623c42652b630450f622c9e34
parenta974702e274cbed52ae9ad9ecef8501e267b822d (diff)
downloadsrc-0bda8d3e9f7a5c04881219723436616b23041e5f.tar.gz
src-0bda8d3e9f7a5c04881219723436616b23041e5f.zip
vmm: permit some IPIs to be handled by userspace
Add VM_EXITCODE_IPI to permit returning unhandled IPIs to userland. INIT and STARTUP IPIs are now returned to userland. Due to backward compatibility reasons, a new capability is added for enabling VM_EXITCODE_IPI. Reviewed by: jhb Differential Revision: https://reviews.freebsd.org/D35623 Sponsored by: Beckhoff Automation GmbH & Co. KG
-rw-r--r--sys/amd64/include/vmm.h8
-rw-r--r--sys/amd64/vmm/amd/svm.c10
-rw-r--r--sys/amd64/vmm/intel/vmx.c8
-rw-r--r--sys/amd64/vmm/io/vlapic.c192
-rw-r--r--sys/amd64/vmm/io/vlapic.h2
-rw-r--r--sys/amd64/vmm/io/vlapic_priv.h2
-rw-r--r--sys/amd64/vmm/vmm.c9
-rw-r--r--usr.sbin/bhyve/bhyverun.c34
-rw-r--r--usr.sbin/bhyve/spinup_ap.c3
9 files changed, 198 insertions, 70 deletions
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index dcf862c34264..37a74f053fb3 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -31,6 +31,7 @@
#ifndef _VMM_H_
#define _VMM_H_
+#include <sys/cpuset.h>
#include <sys/sdt.h>
#include <x86/segments.h>
@@ -483,6 +484,7 @@ enum vm_cap_type {
VM_CAP_BPT_EXIT,
VM_CAP_RDPID,
VM_CAP_RDTSCP,
+ VM_CAP_IPI_EXIT,
VM_CAP_MAX
};
@@ -630,6 +632,7 @@ enum vm_exitcode {
VM_EXITCODE_DEBUG,
VM_EXITCODE_VMINSN,
VM_EXITCODE_BPT,
+ VM_EXITCODE_IPI,
VM_EXITCODE_MAX
};
@@ -737,6 +740,11 @@ struct vm_exit {
struct {
enum vm_suspend_how how;
} suspended;
+ struct {
+ uint32_t mode;
+ uint8_t vector;
+ cpuset_t dmask;
+ } ipi;
struct vm_task_switch task_switch;
} u;
};
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index 35e8d9833d0e..4195cc5bd049 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -2315,6 +2315,7 @@ static int
svm_setcap(void *arg, int vcpu, int type, int val)
{
struct svm_softc *sc;
+ struct vlapic *vlapic;
int error;
sc = arg;
@@ -2333,6 +2334,10 @@ svm_setcap(void *arg, int vcpu, int type, int val)
if (val == 0)
error = EINVAL;
break;
+ case VM_CAP_IPI_EXIT:
+ vlapic = vm_lapic(sc->vm, vcpu);
+ vlapic->ipi_exit = val;
+ break;
default:
error = ENOENT;
break;
@@ -2344,6 +2349,7 @@ static int
svm_getcap(void *arg, int vcpu, int type, int *retval)
{
struct svm_softc *sc;
+ struct vlapic *vlapic;
int error;
sc = arg;
@@ -2361,6 +2367,10 @@ svm_getcap(void *arg, int vcpu, int type, int *retval)
case VM_CAP_UNRESTRICTED_GUEST:
*retval = 1; /* unrestricted guest is always enabled */
break;
+ case VM_CAP_IPI_EXIT:
+ vlapic = vm_lapic(sc->vm, vcpu);
+ *retval = vlapic->ipi_exit;
+ break;
default:
error = ENOENT;
break;
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 64544a6e7955..857028dcd0f1 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -3504,6 +3504,7 @@ vmx_getcap(void *arg, int vcpu, int type, int *retval)
ret = 0;
break;
case VM_CAP_BPT_EXIT:
+ case VM_CAP_IPI_EXIT:
ret = 0;
break;
default:
@@ -3521,6 +3522,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
{
struct vmx *vmx = arg;
struct vmcs *vmcs = &vmx->vmcs[vcpu];
+ struct vlapic *vlapic;
uint32_t baseval;
uint32_t *pptr;
int error;
@@ -3599,6 +3601,12 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
reg = VMCS_EXCEPTION_BITMAP;
}
break;
+ case VM_CAP_IPI_EXIT:
+ retval = 0;
+
+ vlapic = vm_lapic(vmx->vm, vcpu);
+ vlapic->ipi_exit = val;
+ break;
default:
break;
}
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
index 9599b4b4e62c..8283c3cb422c 100644
--- a/sys/amd64/vmm/io/vlapic.c
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
static void vlapic_set_error(struct vlapic *, uint32_t, bool);
static void vlapic_callout_handler(void *arg);
+static void vlapic_reset(struct vlapic *vlapic);
static __inline uint32_t
vlapic_get_id(struct vlapic *vlapic)
@@ -957,13 +958,12 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
{
int i;
bool phys;
- cpuset_t dmask;
+ cpuset_t dmask, ipimask;
uint64_t icrval;
- uint32_t dest, vec, mode;
+ uint32_t dest, vec, mode, shorthand;
struct vlapic *vlapic2;
struct vm_exit *vmexit;
struct LAPIC *lapic;
- uint16_t maxcpus;
lapic = vlapic->apic_page;
lapic->icr_lo &= ~APIC_DELSTAT_PEND;
@@ -975,97 +975,147 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
dest = icrval >> (32 + 24);
vec = icrval & APIC_VECTOR_MASK;
mode = icrval & APIC_DELMODE_MASK;
+ phys = (icrval & APIC_DESTMODE_LOG) == 0;
+ shorthand = icrval & APIC_DEST_MASK;
- if (mode == APIC_DELMODE_FIXED && vec < 16) {
- vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false);
- VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
- return (0);
+ VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec);
+
+ switch (shorthand) {
+ case APIC_DEST_DESTFLD:
+ vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, x2apic(vlapic));
+ break;
+ case APIC_DEST_SELF:
+ CPU_SETOF(vlapic->vcpuid, &dmask);
+ break;
+ case APIC_DEST_ALLISELF:
+ dmask = vm_active_cpus(vlapic->vm);
+ break;
+ case APIC_DEST_ALLESELF:
+ dmask = vm_active_cpus(vlapic->vm);
+ CPU_CLR(vlapic->vcpuid, &dmask);
+ break;
+ default:
+ __assert_unreachable();
}
- VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec);
+ /*
+ * ipimask is a set of vCPUs needing userland handling of the current
+ * IPI.
+ */
+ CPU_ZERO(&ipimask);
- if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
- switch (icrval & APIC_DEST_MASK) {
- case APIC_DEST_DESTFLD:
- phys = ((icrval & APIC_DESTMODE_LOG) == 0);
- vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false,
- x2apic(vlapic));
- break;
- case APIC_DEST_SELF:
- CPU_SETOF(vlapic->vcpuid, &dmask);
- break;
- case APIC_DEST_ALLISELF:
- dmask = vm_active_cpus(vlapic->vm);
- break;
- case APIC_DEST_ALLESELF:
- dmask = vm_active_cpus(vlapic->vm);
- CPU_CLR(vlapic->vcpuid, &dmask);
- break;
- default:
- CPU_ZERO(&dmask); /* satisfy gcc */
- break;
+ switch (mode) {
+ case APIC_DELMODE_FIXED:
+ if (vec < 16) {
+ vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR,
+ false);
+ VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
+ return (0);
}
CPU_FOREACH_ISSET(i, &dmask) {
- if (mode == APIC_DELMODE_FIXED) {
- lapic_intr_edge(vlapic->vm, i, vec);
- vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
- IPIS_SENT, i, 1);
- VLAPIC_CTR2(vlapic, "vlapic sending ipi %d "
- "to vcpuid %d", vec, i);
- } else {
- vm_inject_nmi(vlapic->vm, i);
- VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi "
- "to vcpuid %d", i);
- }
+ lapic_intr_edge(vlapic->vm, i, vec);
+ vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
+ IPIS_SENT, i, 1);
+ VLAPIC_CTR2(vlapic,
+ "vlapic sending ipi %d to vcpuid %d", vec, i);
}
- return (0); /* handled completely in the kernel */
- }
+ break;
+ case APIC_DELMODE_NMI:
+ CPU_FOREACH_ISSET(i, &dmask) {
+ vm_inject_nmi(vlapic->vm, i);
+ VLAPIC_CTR1(vlapic,
+ "vlapic sending ipi nmi to vcpuid %d", i);
+ }
- maxcpus = vm_get_maxcpus(vlapic->vm);
- if (mode == APIC_DELMODE_INIT) {
+ break;
+ case APIC_DELMODE_INIT:
if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT)
- return (0);
-
- if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) {
- vlapic2 = vm_lapic(vlapic->vm, dest);
-
- /* move from INIT to waiting-for-SIPI state */
- if (vlapic2->boot_state == BS_INIT) {
- vlapic2->boot_state = BS_SIPI;
- }
+ break;
- return (0);
+ CPU_FOREACH_ISSET(i, &dmask) {
+ /*
+ * Userland which doesn't support the IPI exit requires
+ * that the boot state is set to SIPI here.
+ */
+ vlapic2 = vm_lapic(vlapic->vm, i);
+ vlapic2->boot_state = BS_SIPI;
+ CPU_SET(i, &ipimask);
}
- }
-
- if (mode == APIC_DELMODE_STARTUP) {
- if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) {
- vlapic2 = vm_lapic(vlapic->vm, dest);
+ break;
+ case APIC_DELMODE_STARTUP:
+ CPU_FOREACH_ISSET(i, &dmask) {
+ vlapic2 = vm_lapic(vlapic->vm, i);
/*
* Ignore SIPIs in any state other than wait-for-SIPI
*/
if (vlapic2->boot_state != BS_SIPI)
- return (0);
-
+ continue;
vlapic2->boot_state = BS_RUNNING;
+ CPU_SET(i, &ipimask);
+ }
+
+ break;
+ default:
+ return (1);
+ }
- *retu = true;
- vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
- vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
- vmexit->u.spinup_ap.vcpu = dest;
- vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
+ if (!CPU_EMPTY(&ipimask)) {
+ vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
+ vmexit->exitcode = VM_EXITCODE_IPI;
+ vmexit->u.ipi.mode = mode;
+ vmexit->u.ipi.vector = vec;
+ vmexit->u.ipi.dmask = dmask;
- return (0);
+ *retu = true;
+
+ /*
+ * Old bhyve versions don't support the IPI exit. Translate it
+ * into the old style.
+ */
+ if (!vlapic->ipi_exit) {
+ if (mode == APIC_DELMODE_STARTUP) {
+ vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
+ vmexit->u.spinup_ap.vcpu = CPU_FFS(&ipimask) - 1;
+ vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
+ } else {
+ *retu = false;
+ }
}
}
- /*
- * This will cause a return to userland.
- */
- return (1);
+ return (0);
+}
+
+static void
+vlapic_handle_init(struct vm *vm, int vcpuid, void *arg)
+{
+ struct vlapic *vlapic = vm_lapic(vm, vcpuid);
+
+ vlapic_reset(vlapic);
+
+ /* vlapic_reset modifies the boot state. */
+ vlapic->boot_state = BS_SIPI;
+}
+
+int
+vm_handle_ipi(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu)
+{
+ *retu = true;
+ switch (vme->u.ipi.mode) {
+ case APIC_DELMODE_INIT:
+ vm_smp_rendezvous(vm, vcpuid, vme->u.ipi.dmask,
+ vlapic_handle_init, NULL);
+ break;
+ case APIC_DELMODE_STARTUP:
+ break;
+ default:
+ return (1);
+ }
+
+ return (0);
}
void
@@ -1467,6 +1517,8 @@ vlapic_init(struct vlapic *vlapic)
if (vlapic->vcpuid == 0)
vlapic->msr_apicbase |= APICBASE_BSP;
+ vlapic->ipi_exit = false;
+
vlapic_reset(vlapic);
}
diff --git a/sys/amd64/vmm/io/vlapic.h b/sys/amd64/vmm/io/vlapic.h
index b87657c8bb51..87f3d0c2660f 100644
--- a/sys/amd64/vmm/io/vlapic.h
+++ b/sys/amd64/vmm/io/vlapic.h
@@ -115,4 +115,6 @@ void vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val);
int vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta);
#endif
+int vm_handle_ipi(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu);
+
#endif /* _VLAPIC_H_ */
diff --git a/sys/amd64/vmm/io/vlapic_priv.h b/sys/amd64/vmm/io/vlapic_priv.h
index fe7965cb65d7..4b3e9009e68c 100644
--- a/sys/amd64/vmm/io/vlapic_priv.h
+++ b/sys/amd64/vmm/io/vlapic_priv.h
@@ -183,6 +183,8 @@ struct vlapic {
*/
uint32_t svr_last;
uint32_t lvt_last[VLAPIC_MAXLVT_INDEX + 1];
+
+ bool ipi_exit;
};
void vlapic_init(struct vlapic *vlapic);
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index c504d4f26b3a..8daf2ae29737 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -1818,6 +1818,15 @@ restart:
}
}
+ /*
+ * VM_EXITCODE_INST_EMUL could access the apic which could transform the
+ * exit code into VM_EXITCODE_IPI.
+ */
+ if (error == 0 && vme->exitcode == VM_EXITCODE_IPI) {
+ retu = false;
+ error = vm_handle_ipi(vm, vcpuid, vme, &retu);
+ }
+
if (error == 0 && retu == false)
goto restart;
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index fb6e3b8a13df..0a7e8e252918 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#endif
#include <amd64/vmm/intel/vmcs.h>
+#include <x86/apicreg.h>
#include <machine/atomic.h>
#include <machine/segments.h>
@@ -939,6 +940,35 @@ vmexit_breakpoint(struct vmctx *ctx __unused, struct vm_exit *vme, int *pvcpu)
return (VMEXIT_CONTINUE);
}
+static int
+vmexit_ipi(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+ int error = -1;
+ int i;
+ switch (vmexit->u.ipi.mode) {
+ case APIC_DELMODE_INIT:
+ CPU_FOREACH_ISSET (i, &vmexit->u.ipi.dmask) {
+ error = vm_suspend_cpu(ctx, i);
+ if (error) {
+ warnx("%s: failed to suspend cpu %d\n",
+ __func__, i);
+ break;
+ }
+ }
+ break;
+ case APIC_DELMODE_STARTUP:
+ CPU_FOREACH_ISSET (i, &vmexit->u.ipi.dmask) {
+ spinup_ap(ctx, i, vmexit->u.ipi.vector << PAGE_SHIFT);
+ }
+ error = 0;
+ break;
+ default:
+ break;
+ }
+
+ return (error);
+}
+
static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
[VM_EXITCODE_INOUT] = vmexit_inout,
[VM_EXITCODE_INOUT_STR] = vmexit_inout,
@@ -955,6 +985,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
[VM_EXITCODE_DEBUG] = vmexit_debug,
[VM_EXITCODE_BPT] = vmexit_breakpoint,
+ [VM_EXITCODE_IPI] = vmexit_ipi,
};
static void
@@ -1155,6 +1186,9 @@ spinup_vcpu(struct vmctx *ctx, int vcpu, bool suspend)
error = vm_set_capability(ctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
assert(error == 0);
+ error = vm_set_capability(ctx, vcpu, VM_CAP_IPI_EXIT, 1);
+ assert(error == 0);
+
fbsdrun_addcpu(ctx, vcpu, rip, suspend);
}
diff --git a/usr.sbin/bhyve/spinup_ap.c b/usr.sbin/bhyve/spinup_ap.c
index 2b7e602f8003..438091e564e7 100644
--- a/usr.sbin/bhyve/spinup_ap.c
+++ b/usr.sbin/bhyve/spinup_ap.c
@@ -98,6 +98,9 @@ spinup_ap(struct vmctx *ctx, int newcpu, uint64_t rip)
error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
assert(error == 0);
+ error = vm_set_capability(ctx, newcpu, VM_CAP_IPI_EXIT, 1);
+ assert(error == 0);
+
spinup_ap_realmode(ctx, newcpu, &rip);
vm_resume_cpu(ctx, newcpu);