aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Johnston <markj@FreeBSD.org>2023-10-04 16:22:56 +0000
committerMark Johnston <markj@FreeBSD.org>2023-10-11 13:21:55 +0000
commit28c8a44993e73cba0ab78b8ff07e99592037c26c (patch)
treec984e3f436955f8d8a556ebd95a783c53a315748
parent013da5d9c4156a2a6128d184f2e5464e0146cbae (diff)
downloadsrc-28c8a44993e73cba0ab78b8ff07e99592037c26c.tar.gz
src-28c8a44993e73cba0ab78b8ff07e99592037c26c.zip
bhyve: Split vmexit handling into a separate file
Put it in amd64, since most of it is MD and won't be used on arm64. Add a bit of glue to bhyverun.h to make CPU startup and shutdown work without having to export more global variables. AP startup will be reworked further in a future revision. This makes bhyverun.c much more machine-independent. No functional change intended. Reviewed by: corvink, jhb MFC after: 1 week Sponsored by: Innovate UK Differential Revision: https://reviews.freebsd.org/D40556 (cherry picked from commit 72f9c9d82fce84fcb68c9aa1f32fabcf0c0038e9)
-rw-r--r--usr.sbin/bhyve/Makefile1
-rw-r--r--usr.sbin/bhyve/amd64/vmexit.c493
-rw-r--r--usr.sbin/bhyve/bhyverun.c468
-rw-r--r--usr.sbin/bhyve/bhyverun.h7
-rw-r--r--usr.sbin/bhyve/vmexit.h34
5 files changed, 551 insertions, 452 deletions
diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
index c2ccd60790ae..5f13d457d914 100644
--- a/usr.sbin/bhyve/Makefile
+++ b/usr.sbin/bhyve/Makefile
@@ -75,6 +75,7 @@ SRCS= \
usb_mouse.c \
vga.c \
virtio.c \
+ vmexit.c \
vmgenc.c \
xmsr.c
diff --git a/usr.sbin/bhyve/amd64/vmexit.c b/usr.sbin/bhyve/amd64/vmexit.c
new file mode 100644
index 000000000000..152fecaf424c
--- /dev/null
+++ b/usr.sbin/bhyve/amd64/vmexit.c
@@ -0,0 +1,493 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/vmm_instruction_emul.h>
+#include <amd64/vmm/intel/vmcs.h>
+#include <x86/apicreg.h>
+
+#include <assert.h>
+#include <err.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <vmmapi.h>
+
+#include "bhyverun.h"
+#include "config.h"
+#include "debug.h"
+#include "gdb.h"
+#include "inout.h"
+#include "mem.h"
+#ifdef BHYVE_SNAPSHOT
+#include "snapshot.h"
+#endif
+#include "spinup_ap.h"
+#include "vmexit.h"
+#include "xmsr.h"
+
+static int
+vmexit_inout(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
+{
+ struct vm_exit *vme;
+ int error;
+ int bytes, port, in;
+
+ vme = vmrun->vm_exit;
+ port = vme->u.inout.port;
+ bytes = vme->u.inout.bytes;
+ in = vme->u.inout.in;
+
+ error = emulate_inout(ctx, vcpu, vme);
+ if (error) {
+ fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
+ in ? "in" : "out",
+ bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
+ port, vme->rip);
+ return (VMEXIT_ABORT);
+ } else {
+ return (VMEXIT_CONTINUE);
+ }
+}
+
+static int
+vmexit_rdmsr(struct vmctx *ctx __unused, struct vcpu *vcpu,
+ struct vm_run *vmrun)
+{
+ struct vm_exit *vme;
+ uint64_t val;
+ uint32_t eax, edx;
+ int error;
+
+ vme = vmrun->vm_exit;
+
+ val = 0;
+ error = emulate_rdmsr(vcpu, vme->u.msr.code, &val);
+ if (error != 0) {
+ fprintf(stderr, "rdmsr to register %#x on vcpu %d\n",
+ vme->u.msr.code, vcpu_id(vcpu));
+ if (get_config_bool("x86.strictmsr")) {
+ vm_inject_gp(vcpu);
+ return (VMEXIT_CONTINUE);
+ }
+ }
+
+ eax = val;
+ error = vm_set_register(vcpu, VM_REG_GUEST_RAX, eax);
+ assert(error == 0);
+
+ edx = val >> 32;
+ error = vm_set_register(vcpu, VM_REG_GUEST_RDX, edx);
+ assert(error == 0);
+
+ return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_wrmsr(struct vmctx *ctx __unused, struct vcpu *vcpu,
+ struct vm_run *vmrun)
+{
+ struct vm_exit *vme;
+ int error;
+
+ vme = vmrun->vm_exit;
+
+ error = emulate_wrmsr(vcpu, vme->u.msr.code, vme->u.msr.wval);
+ if (error != 0) {
+ fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
+ vme->u.msr.code, vme->u.msr.wval, vcpu_id(vcpu));
+ if (get_config_bool("x86.strictmsr")) {
+ vm_inject_gp(vcpu);
+ return (VMEXIT_CONTINUE);
+ }
+ }
+ return (VMEXIT_CONTINUE);
+}
+
+static const char * const vmx_exit_reason_desc[] = {
+ [EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)",
+ [EXIT_REASON_EXT_INTR] = "External interrupt",
+ [EXIT_REASON_TRIPLE_FAULT] = "Triple fault",
+ [EXIT_REASON_INIT] = "INIT signal",
+ [EXIT_REASON_SIPI] = "Start-up IPI (SIPI)",
+ [EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)",
+ [EXIT_REASON_SMI] = "Other SMI",
+ [EXIT_REASON_INTR_WINDOW] = "Interrupt window",
+ [EXIT_REASON_NMI_WINDOW] = "NMI window",
+ [EXIT_REASON_TASK_SWITCH] = "Task switch",
+ [EXIT_REASON_CPUID] = "CPUID",
+ [EXIT_REASON_GETSEC] = "GETSEC",
+ [EXIT_REASON_HLT] = "HLT",
+ [EXIT_REASON_INVD] = "INVD",
+ [EXIT_REASON_INVLPG] = "INVLPG",
+ [EXIT_REASON_RDPMC] = "RDPMC",
+ [EXIT_REASON_RDTSC] = "RDTSC",
+ [EXIT_REASON_RSM] = "RSM",
+ [EXIT_REASON_VMCALL] = "VMCALL",
+ [EXIT_REASON_VMCLEAR] = "VMCLEAR",
+ [EXIT_REASON_VMLAUNCH] = "VMLAUNCH",
+ [EXIT_REASON_VMPTRLD] = "VMPTRLD",
+ [EXIT_REASON_VMPTRST] = "VMPTRST",
+ [EXIT_REASON_VMREAD] = "VMREAD",
+ [EXIT_REASON_VMRESUME] = "VMRESUME",
+ [EXIT_REASON_VMWRITE] = "VMWRITE",
+ [EXIT_REASON_VMXOFF] = "VMXOFF",
+ [EXIT_REASON_VMXON] = "VMXON",
+ [EXIT_REASON_CR_ACCESS] = "Control-register accesses",
+ [EXIT_REASON_DR_ACCESS] = "MOV DR",
+ [EXIT_REASON_INOUT] = "I/O instruction",
+ [EXIT_REASON_RDMSR] = "RDMSR",
+ [EXIT_REASON_WRMSR] = "WRMSR",
+ [EXIT_REASON_INVAL_VMCS] =
+ "VM-entry failure due to invalid guest state",
+ [EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading",
+ [EXIT_REASON_MWAIT] = "MWAIT",
+ [EXIT_REASON_MTF] = "Monitor trap flag",
+ [EXIT_REASON_MONITOR] = "MONITOR",
+ [EXIT_REASON_PAUSE] = "PAUSE",
+ [EXIT_REASON_MCE_DURING_ENTRY] =
+ "VM-entry failure due to machine-check event",
+ [EXIT_REASON_TPR] = "TPR below threshold",
+ [EXIT_REASON_APIC_ACCESS] = "APIC access",
+ [EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI",
+ [EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR",
+ [EXIT_REASON_LDTR_TR] = "Access to LDTR or TR",
+ [EXIT_REASON_EPT_FAULT] = "EPT violation",
+ [EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration",
+ [EXIT_REASON_INVEPT] = "INVEPT",
+ [EXIT_REASON_RDTSCP] = "RDTSCP",
+ [EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired",
+ [EXIT_REASON_INVVPID] = "INVVPID",
+ [EXIT_REASON_WBINVD] = "WBINVD",
+ [EXIT_REASON_XSETBV] = "XSETBV",
+ [EXIT_REASON_APIC_WRITE] = "APIC write",
+ [EXIT_REASON_RDRAND] = "RDRAND",
+ [EXIT_REASON_INVPCID] = "INVPCID",
+ [EXIT_REASON_VMFUNC] = "VMFUNC",
+ [EXIT_REASON_ENCLS] = "ENCLS",
+ [EXIT_REASON_RDSEED] = "RDSEED",
+ [EXIT_REASON_PM_LOG_FULL] = "Page-modification log full",
+ [EXIT_REASON_XSAVES] = "XSAVES",
+ [EXIT_REASON_XRSTORS] = "XRSTORS"
+};
+
+static const char *
+vmexit_vmx_desc(uint32_t exit_reason)
+{
+
+ if (exit_reason >= nitems(vmx_exit_reason_desc) ||
+ vmx_exit_reason_desc[exit_reason] == NULL)
+ return ("Unknown");
+ return (vmx_exit_reason_desc[exit_reason]);
+}
+
+#define DEBUG_EPT_MISCONFIG
+#ifdef DEBUG_EPT_MISCONFIG
+#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400
+
+static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4];
+static int ept_misconfig_ptenum;
+#endif
+
+static int
+vmexit_vmx(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
+{
+ struct vm_exit *vme;
+
+ vme = vmrun->vm_exit;
+
+ fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu));
+ fprintf(stderr, "\treason\t\tVMX\n");
+ fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip);
+ fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length);
+ fprintf(stderr, "\tstatus\t\t%d\n", vme->u.vmx.status);
+ fprintf(stderr, "\texit_reason\t%u (%s)\n", vme->u.vmx.exit_reason,
+ vmexit_vmx_desc(vme->u.vmx.exit_reason));
+ fprintf(stderr, "\tqualification\t0x%016lx\n",
+ vme->u.vmx.exit_qualification);
+ fprintf(stderr, "\tinst_type\t\t%d\n", vme->u.vmx.inst_type);
+ fprintf(stderr, "\tinst_error\t\t%d\n", vme->u.vmx.inst_error);
+#ifdef DEBUG_EPT_MISCONFIG
+ if (vme->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) {
+ vm_get_register(vcpu,
+ VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS),
+ &ept_misconfig_gpa);
+ vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte,
+ &ept_misconfig_ptenum);
+ fprintf(stderr, "\tEPT misconfiguration:\n");
+ fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa);
+ fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n",
+ ept_misconfig_ptenum, ept_misconfig_pte[0],
+ ept_misconfig_pte[1], ept_misconfig_pte[2],
+ ept_misconfig_pte[3]);
+ }
+#endif /* DEBUG_EPT_MISCONFIG */
+ return (VMEXIT_ABORT);
+}
+
+static int
+vmexit_svm(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun)
+{
+ struct vm_exit *vme;
+
+ vme = vmrun->vm_exit;
+
+ fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu));
+ fprintf(stderr, "\treason\t\tSVM\n");
+ fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip);
+ fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length);
+ fprintf(stderr, "\texitcode\t%#lx\n", vme->u.svm.exitcode);
+ fprintf(stderr, "\texitinfo1\t%#lx\n", vme->u.svm.exitinfo1);
+ fprintf(stderr, "\texitinfo2\t%#lx\n", vme->u.svm.exitinfo2);
+ return (VMEXIT_ABORT);
+}
+
+static int
+vmexit_bogus(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
+ struct vm_run *vmrun)
+{
+ assert(vmrun->vm_exit->inst_length == 0);
+
+ return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_reqidle(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
+ struct vm_run *vmrun)
+{
+ assert(vmrun->vm_exit->inst_length == 0);
+
+ return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_hlt(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
+ struct vm_run *vmrun __unused)
+{
+ /*
+ * Just continue execution with the next instruction. We use
+ * the HLT VM exit as a way to be friendly with the host
+ * scheduler.
+ */
+ return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_pause(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
+ struct vm_run *vmrun __unused)
+{
+ return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_mtrap(struct vmctx *ctx __unused, struct vcpu *vcpu,
+ struct vm_run *vmrun)
+{
+ assert(vmrun->vm_exit->inst_length == 0);
+
+#ifdef BHYVE_SNAPSHOT
+ checkpoint_cpu_suspend(vcpu_id(vcpu));
+#endif
+ gdb_cpu_mtrap(vcpu);
+#ifdef BHYVE_SNAPSHOT
+ checkpoint_cpu_resume(vcpu_id(vcpu));
+#endif
+
+ return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_inst_emul(struct vmctx *ctx __unused, struct vcpu *vcpu,
+ struct vm_run *vmrun)
+{
+ struct vm_exit *vme;
+ struct vie *vie;
+ int err, i, cs_d;
+ enum vm_cpu_mode mode;
+
+ vme = vmrun->vm_exit;
+
+ vie = &vme->u.inst_emul.vie;
+ if (!vie->decoded) {
+ /*
+ * Attempt to decode in userspace as a fallback. This allows
+ * updating instruction decode in bhyve without rebooting the
+ * kernel (rapid prototyping), albeit with much slower
+ * emulation.
+ */
+ vie_restart(vie);
+ mode = vme->u.inst_emul.paging.cpu_mode;
+ cs_d = vme->u.inst_emul.cs_d;
+ if (vmm_decode_instruction(mode, cs_d, vie) != 0)
+ goto fail;
+ if (vm_set_register(vcpu, VM_REG_GUEST_RIP,
+ vme->rip + vie->num_processed) != 0)
+ goto fail;
+ }
+
+ err = emulate_mem(vcpu, vme->u.inst_emul.gpa, vie,
+ &vme->u.inst_emul.paging);
+ if (err) {
+ if (err == ESRCH) {
+ EPRINTLN("Unhandled memory access to 0x%lx\n",
+ vme->u.inst_emul.gpa);
+ }
+ goto fail;
+ }
+
+ return (VMEXIT_CONTINUE);
+
+fail:
+ fprintf(stderr, "Failed to emulate instruction sequence [ ");
+ for (i = 0; i < vie->num_valid; i++)
+ fprintf(stderr, "%02x", vie->inst[i]);
+ FPRINTLN(stderr, " ] at 0x%lx", vme->rip);
+ return (VMEXIT_ABORT);
+}
+
+static int
+vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
+{
+ struct vm_exit *vme;
+ enum vm_suspend_how how;
+ int vcpuid = vcpu_id(vcpu);
+
+ vme = vmrun->vm_exit;
+
+ how = vme->u.suspended.how;
+
+ fbsdrun_deletecpu(vcpuid);
+
+ switch (how) {
+ case VM_SUSPEND_RESET:
+ exit(0);
+ case VM_SUSPEND_POWEROFF:
+ if (get_config_bool_default("destroy_on_poweroff", false))
+ vm_destroy(ctx);
+ exit(1);
+ case VM_SUSPEND_HALT:
+ exit(2);
+ case VM_SUSPEND_TRIPLEFAULT:
+ exit(3);
+ default:
+ fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how);
+ exit(100);
+ }
+ return (0); /* NOTREACHED */
+}
+
+static int
+vmexit_debug(struct vmctx *ctx __unused, struct vcpu *vcpu,
+ struct vm_run *vmrun __unused)
+{
+
+#ifdef BHYVE_SNAPSHOT
+ checkpoint_cpu_suspend(vcpu_id(vcpu));
+#endif
+ gdb_cpu_suspend(vcpu);
+#ifdef BHYVE_SNAPSHOT
+ checkpoint_cpu_resume(vcpu_id(vcpu));
+#endif
+ /*
+ * XXX-MJ sleep for a short period to avoid chewing up the CPU in the
+ * window between activation of the vCPU thread and the STARTUP IPI.
+ */
+ usleep(1000);
+ return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_breakpoint(struct vmctx *ctx __unused, struct vcpu *vcpu,
+ struct vm_run *vmrun)
+{
+ gdb_cpu_breakpoint(vcpu, vmrun->vm_exit);
+ return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_ipi(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
+ struct vm_run *vmrun)
+{
+ struct vm_exit *vme;
+ cpuset_t *dmask;
+ int error = -1;
+ int i;
+
+ dmask = vmrun->cpuset;
+ vme = vmrun->vm_exit;
+
+ switch (vme->u.ipi.mode) {
+ case APIC_DELMODE_INIT:
+ CPU_FOREACH_ISSET(i, dmask) {
+ error = fbsdrun_suspendcpu(i);
+ if (error) {
+ warnx("failed to suspend cpu %d", i);
+ break;
+ }
+ }
+ break;
+ case APIC_DELMODE_STARTUP:
+ CPU_FOREACH_ISSET(i, dmask) {
+ spinup_ap(fbsdrun_vcpu(i),
+ vme->u.ipi.vector << PAGE_SHIFT);
+ }
+ error = 0;
+ break;
+ default:
+ break;
+ }
+
+ return (error);
+}
+
+int vmexit_task_switch(struct vmctx *, struct vcpu *, struct vm_run *);
+
+const vmexit_handler_t vmexit_handlers[VM_EXITCODE_MAX] = {
+ [VM_EXITCODE_INOUT] = vmexit_inout,
+ [VM_EXITCODE_INOUT_STR] = vmexit_inout,
+ [VM_EXITCODE_VMX] = vmexit_vmx,
+ [VM_EXITCODE_SVM] = vmexit_svm,
+ [VM_EXITCODE_BOGUS] = vmexit_bogus,
+ [VM_EXITCODE_REQIDLE] = vmexit_reqidle,
+ [VM_EXITCODE_RDMSR] = vmexit_rdmsr,
+ [VM_EXITCODE_WRMSR] = vmexit_wrmsr,
+ [VM_EXITCODE_MTRAP] = vmexit_mtrap,
+ [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
+ [VM_EXITCODE_SUSPENDED] = vmexit_suspend,
+ [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
+ [VM_EXITCODE_DEBUG] = vmexit_debug,
+ [VM_EXITCODE_BPT] = vmexit_breakpoint,
+ [VM_EXITCODE_IPI] = vmexit_ipi,
+ [VM_EXITCODE_HLT] = vmexit_hlt,
+ [VM_EXITCODE_PAUSE] = vmexit_pause,
+};
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index 7088f11f63eb..31fda7028569 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -41,11 +41,7 @@
#include <sys/un.h>
#endif
-#include <amd64/vmm/intel/vmcs.h>
-#include <x86/apicreg.h>
-
#include <machine/atomic.h>
-#include <machine/segments.h>
#ifndef WITHOUT_CAPSICUM
#include <capsicum_helpers.h>
@@ -73,11 +69,6 @@
#include <libxo/xo.h>
#endif
-#include <machine/vmm.h>
-#ifndef WITHOUT_CAPSICUM
-#include <machine/vmm_dev.h>
-#endif
-#include <machine/vmm_instruction_emul.h>
#include <vmmapi.h>
#include "bhyverun.h"
@@ -112,83 +103,14 @@
#include "snapshot.h"
#endif
#include "tpm_device.h"
-#include "xmsr.h"
-#include "spinup_ap.h"
#include "rtc.h"
#include "vmgenc.h"
+#include "vmexit.h"
+#include "xmsr.h"
#define MB (1024UL * 1024)
#define GB (1024UL * MB)
-static const char * const vmx_exit_reason_desc[] = {
- [EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)",
- [EXIT_REASON_EXT_INTR] = "External interrupt",
- [EXIT_REASON_TRIPLE_FAULT] = "Triple fault",
- [EXIT_REASON_INIT] = "INIT signal",
- [EXIT_REASON_SIPI] = "Start-up IPI (SIPI)",
- [EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)",
- [EXIT_REASON_SMI] = "Other SMI",
- [EXIT_REASON_INTR_WINDOW] = "Interrupt window",
- [EXIT_REASON_NMI_WINDOW] = "NMI window",
- [EXIT_REASON_TASK_SWITCH] = "Task switch",
- [EXIT_REASON_CPUID] = "CPUID",
- [EXIT_REASON_GETSEC] = "GETSEC",
- [EXIT_REASON_HLT] = "HLT",
- [EXIT_REASON_INVD] = "INVD",
- [EXIT_REASON_INVLPG] = "INVLPG",
- [EXIT_REASON_RDPMC] = "RDPMC",
- [EXIT_REASON_RDTSC] = "RDTSC",
- [EXIT_REASON_RSM] = "RSM",
- [EXIT_REASON_VMCALL] = "VMCALL",
- [EXIT_REASON_VMCLEAR] = "VMCLEAR",
- [EXIT_REASON_VMLAUNCH] = "VMLAUNCH",
- [EXIT_REASON_VMPTRLD] = "VMPTRLD",
- [EXIT_REASON_VMPTRST] = "VMPTRST",
- [EXIT_REASON_VMREAD] = "VMREAD",
- [EXIT_REASON_VMRESUME] = "VMRESUME",
- [EXIT_REASON_VMWRITE] = "VMWRITE",
- [EXIT_REASON_VMXOFF] = "VMXOFF",
- [EXIT_REASON_VMXON] = "VMXON",
- [EXIT_REASON_CR_ACCESS] = "Control-register accesses",
- [EXIT_REASON_DR_ACCESS] = "MOV DR",
- [EXIT_REASON_INOUT] = "I/O instruction",
- [EXIT_REASON_RDMSR] = "RDMSR",
- [EXIT_REASON_WRMSR] = "WRMSR",
- [EXIT_REASON_INVAL_VMCS] =
- "VM-entry failure due to invalid guest state",
- [EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading",
- [EXIT_REASON_MWAIT] = "MWAIT",
- [EXIT_REASON_MTF] = "Monitor trap flag",
- [EXIT_REASON_MONITOR] = "MONITOR",
- [EXIT_REASON_PAUSE] = "PAUSE",
- [EXIT_REASON_MCE_DURING_ENTRY] =
- "VM-entry failure due to machine-check event",
- [EXIT_REASON_TPR] = "TPR below threshold",
- [EXIT_REASON_APIC_ACCESS] = "APIC access",
- [EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI",
- [EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR",
- [EXIT_REASON_LDTR_TR] = "Access to LDTR or TR",
- [EXIT_REASON_EPT_FAULT] = "EPT violation",
- [EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration",
- [EXIT_REASON_INVEPT] = "INVEPT",
- [EXIT_REASON_RDTSCP] = "RDTSCP",
- [EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired",
- [EXIT_REASON_INVVPID] = "INVVPID",
- [EXIT_REASON_WBINVD] = "WBINVD",
- [EXIT_REASON_XSETBV] = "XSETBV",
- [EXIT_REASON_APIC_WRITE] = "APIC write",
- [EXIT_REASON_RDRAND] = "RDRAND",
- [EXIT_REASON_INVPCID] = "INVPCID",
- [EXIT_REASON_VMFUNC] = "VMFUNC",
- [EXIT_REASON_ENCLS] = "ENCLS",
- [EXIT_REASON_RDSEED] = "RDSEED",
- [EXIT_REASON_PM_LOG_FULL] = "Page-modification log full",
- [EXIT_REASON_XSAVES] = "XSAVES",
- [EXIT_REASON_XRSTORS] = "XRSTORS"
-};
-
-typedef int (*vmexit_handler_t)(struct vmctx *, struct vcpu *, struct vm_run *);
-
int guest_ncpus;
uint16_t cpu_cores, cpu_sockets, cpu_threads;
@@ -508,6 +430,12 @@ fbsdrun_virtio_msix(void)
return (get_config_bool_default("virtio_msix", true));
}
+struct vcpu *
+fbsdrun_vcpu(int vcpuid)
+{
+ return (vcpu_info[vcpuid].vcpu);
+}
+
static void *
fbsdrun_start_thread(void *param)
{
@@ -554,7 +482,7 @@ fbsdrun_addcpu(struct vcpu_info *vi)
assert(error == 0);
}
-static void
+void
fbsdrun_deletecpu(int vcpu)
{
static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER;
@@ -581,376 +509,12 @@ fbsdrun_deletecpu(int vcpu)
pthread_mutex_unlock(&resetcpu_mtx);
}
-static int
-vmexit_inout(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
-{
- struct vm_exit *vme;
- int error;
- int bytes, port, in;
-
- vme = vmrun->vm_exit;
- port = vme->u.inout.port;
- bytes = vme->u.inout.bytes;
- in = vme->u.inout.in;
-
- error = emulate_inout(ctx, vcpu, vme);
- if (error) {
- fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
- in ? "in" : "out",
- bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
- port, vme->rip);
- return (VMEXIT_ABORT);
- } else {
- return (VMEXIT_CONTINUE);
- }
-}
-
-static int
-vmexit_rdmsr(struct vmctx *ctx __unused, struct vcpu *vcpu,
- struct vm_run *vmrun)
-{
- struct vm_exit *vme;
- uint64_t val;
- uint32_t eax, edx;
- int error;
-
- vme = vmrun->vm_exit;
-
- val = 0;
- error = emulate_rdmsr(vcpu, vme->u.msr.code, &val);
- if (error != 0) {
- fprintf(stderr, "rdmsr to register %#x on vcpu %d\n",
- vme->u.msr.code, vcpu_id(vcpu));
- if (get_config_bool("x86.strictmsr")) {
- vm_inject_gp(vcpu);
- return (VMEXIT_CONTINUE);
- }
- }
-
- eax = val;
- error = vm_set_register(vcpu, VM_REG_GUEST_RAX, eax);
- assert(error == 0);
-
- edx = val >> 32;
- error = vm_set_register(vcpu, VM_REG_GUEST_RDX, edx);
- assert(error == 0);
-
- return (VMEXIT_CONTINUE);
-}
-
-static int
-vmexit_wrmsr(struct vmctx *ctx __unused, struct vcpu *vcpu,
- struct vm_run *vmrun)
-{
- struct vm_exit *vme;
- int error;
-
- vme = vmrun->vm_exit;
-
- error = emulate_wrmsr(vcpu, vme->u.msr.code, vme->u.msr.wval);
- if (error != 0) {
- fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
- vme->u.msr.code, vme->u.msr.wval, vcpu_id(vcpu));
- if (get_config_bool("x86.strictmsr")) {
- vm_inject_gp(vcpu);
- return (VMEXIT_CONTINUE);
- }
- }
- return (VMEXIT_CONTINUE);
-}
-
-#define DEBUG_EPT_MISCONFIG
-#ifdef DEBUG_EPT_MISCONFIG
-#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400
-
-static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4];
-static int ept_misconfig_ptenum;
-#endif
-
-static const char *
-vmexit_vmx_desc(uint32_t exit_reason)
-{
-
- if (exit_reason >= nitems(vmx_exit_reason_desc) ||
- vmx_exit_reason_desc[exit_reason] == NULL)
- return ("Unknown");
- return (vmx_exit_reason_desc[exit_reason]);
-}
-
-static int
-vmexit_vmx(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
-{
- struct vm_exit *vme;
-
- vme = vmrun->vm_exit;
-
- fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu));
- fprintf(stderr, "\treason\t\tVMX\n");
- fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip);
- fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length);
- fprintf(stderr, "\tstatus\t\t%d\n", vme->u.vmx.status);
- fprintf(stderr, "\texit_reason\t%u (%s)\n", vme->u.vmx.exit_reason,
- vmexit_vmx_desc(vme->u.vmx.exit_reason));
- fprintf(stderr, "\tqualification\t0x%016lx\n",
- vme->u.vmx.exit_qualification);
- fprintf(stderr, "\tinst_type\t\t%d\n", vme->u.vmx.inst_type);
- fprintf(stderr, "\tinst_error\t\t%d\n", vme->u.vmx.inst_error);
-#ifdef DEBUG_EPT_MISCONFIG
- if (vme->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) {
- vm_get_register(vcpu,
- VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS),
- &ept_misconfig_gpa);
- vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte,
- &ept_misconfig_ptenum);
- fprintf(stderr, "\tEPT misconfiguration:\n");
- fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa);
- fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n",
- ept_misconfig_ptenum, ept_misconfig_pte[0],
- ept_misconfig_pte[1], ept_misconfig_pte[2],
- ept_misconfig_pte[3]);
- }
-#endif /* DEBUG_EPT_MISCONFIG */
- return (VMEXIT_ABORT);
-}
-
-static int
-vmexit_svm(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun)
-{
- struct vm_exit *vme;
-
- vme = vmrun->vm_exit;
-
- fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu));
- fprintf(stderr, "\treason\t\tSVM\n");
- fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip);
- fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length);
- fprintf(stderr, "\texitcode\t%#lx\n", vme->u.svm.exitcode);
- fprintf(stderr, "\texitinfo1\t%#lx\n", vme->u.svm.exitinfo1);
- fprintf(stderr, "\texitinfo2\t%#lx\n", vme->u.svm.exitinfo2);
- return (VMEXIT_ABORT);
-}
-
-static int
-vmexit_bogus(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
- struct vm_run *vmrun)
-{
- assert(vmrun->vm_exit->inst_length == 0);
-
- return (VMEXIT_CONTINUE);
-}
-
-static int
-vmexit_reqidle(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
- struct vm_run *vmrun)
-{
- assert(vmrun->vm_exit->inst_length == 0);
-
- return (VMEXIT_CONTINUE);
-}
-
-static int
-vmexit_hlt(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
- struct vm_run *vmrun __unused)
-{
- /*
- * Just continue execution with the next instruction. We use
- * the HLT VM exit as a way to be friendly with the host
- * scheduler.
- */
- return (VMEXIT_CONTINUE);
-}
-
-static int
-vmexit_pause(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
- struct vm_run *vmrun __unused)
-{
- return (VMEXIT_CONTINUE);
-}
-
-static int
-vmexit_mtrap(struct vmctx *ctx __unused, struct vcpu *vcpu,
- struct vm_run *vmrun)
-{
- assert(vmrun->vm_exit->inst_length == 0);
-
-#ifdef BHYVE_SNAPSHOT
- checkpoint_cpu_suspend(vcpu_id(vcpu));
-#endif
- gdb_cpu_mtrap(vcpu);
-#ifdef BHYVE_SNAPSHOT
- checkpoint_cpu_resume(vcpu_id(vcpu));
-#endif
-
- return (VMEXIT_CONTINUE);
-}
-
-static int
-vmexit_inst_emul(struct vmctx *ctx __unused, struct vcpu *vcpu,
- struct vm_run *vmrun)
-{
- struct vm_exit *vme;
- struct vie *vie;
- int err, i, cs_d;
- enum vm_cpu_mode mode;
-
- vme = vmrun->vm_exit;
-
- vie = &vme->u.inst_emul.vie;
- if (!vie->decoded) {
- /*
- * Attempt to decode in userspace as a fallback. This allows
- * updating instruction decode in bhyve without rebooting the
- * kernel (rapid prototyping), albeit with much slower
- * emulation.
- */
- vie_restart(vie);
- mode = vme->u.inst_emul.paging.cpu_mode;
- cs_d = vme->u.inst_emul.cs_d;
- if (vmm_decode_instruction(mode, cs_d, vie) != 0)
- goto fail;
- if (vm_set_register(vcpu, VM_REG_GUEST_RIP,
- vme->rip + vie->num_processed) != 0)
- goto fail;
- }
-
- err = emulate_mem(vcpu, vme->u.inst_emul.gpa, vie,
- &vme->u.inst_emul.paging);
- if (err) {
- if (err == ESRCH) {
- EPRINTLN("Unhandled memory access to 0x%lx\n",
- vme->u.inst_emul.gpa);
- }
- goto fail;
- }
-
- return (VMEXIT_CONTINUE);
-
-fail:
- fprintf(stderr, "Failed to emulate instruction sequence [ ");
- for (i = 0; i < vie->num_valid; i++)
- fprintf(stderr, "%02x", vie->inst[i]);
- FPRINTLN(stderr, " ] at 0x%lx", vme->rip);
- return (VMEXIT_ABORT);
-}
-
-static int
-vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
-{
- struct vm_exit *vme;
- enum vm_suspend_how how;
- int vcpuid = vcpu_id(vcpu);
-
- vme = vmrun->vm_exit;
-
- how = vme->u.suspended.how;
-
- fbsdrun_deletecpu(vcpuid);
-
- switch (how) {
- case VM_SUSPEND_RESET:
- exit(0);
- case VM_SUSPEND_POWEROFF:
- if (get_config_bool_default("destroy_on_poweroff", false))
- vm_destroy(ctx);
- exit(1);
- case VM_SUSPEND_HALT:
- exit(2);
- case VM_SUSPEND_TRIPLEFAULT:
- exit(3);
- default:
- fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how);
- exit(100);
- }
- return (0); /* NOTREACHED */
-}
-
-static int
-vmexit_debug(struct vmctx *ctx __unused, struct vcpu *vcpu,
- struct vm_run *vmrun __unused)
-{
-
-#ifdef BHYVE_SNAPSHOT
- checkpoint_cpu_suspend(vcpu_id(vcpu));
-#endif
- gdb_cpu_suspend(vcpu);
-#ifdef BHYVE_SNAPSHOT
- checkpoint_cpu_resume(vcpu_id(vcpu));
-#endif
- /*
- * XXX-MJ sleep for a short period to avoid chewing up the CPU in the
- * window between activation of the vCPU thread and the STARTUP IPI.
- */
- usleep(1000);
- return (VMEXIT_CONTINUE);
-}
-
-static int
-vmexit_breakpoint(struct vmctx *ctx __unused, struct vcpu *vcpu,
- struct vm_run *vmrun)
-{
- gdb_cpu_breakpoint(vcpu, vmrun->vm_exit);
- return (VMEXIT_CONTINUE);
-}
-
-static int
-vmexit_ipi(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
- struct vm_run *vmrun)
+int
+fbsdrun_suspendcpu(int vcpuid)
{
- struct vm_exit *vme;
- cpuset_t *dmask;
- int error = -1;
- int i;
-
- dmask = vmrun->cpuset;
- vme = vmrun->vm_exit;
-
- switch (vme->u.ipi.mode) {
- case APIC_DELMODE_INIT:
- CPU_FOREACH_ISSET(i, dmask) {
- error = vm_suspend_cpu(vcpu_info[i].vcpu);
- if (error) {
- warnx("%s: failed to suspend cpu %d\n",
- __func__, i);
- break;
- }
- }
- break;
- case APIC_DELMODE_STARTUP:
- CPU_FOREACH_ISSET(i, dmask) {
- spinup_ap(vcpu_info[i].vcpu,
- vme->u.ipi.vector << PAGE_SHIFT);
- }
- error = 0;
- break;
- default:
- break;
- }
-
- return (error);
+ return (vm_suspend_cpu(vcpu_info[vcpuid].vcpu));
}
-int vmexit_task_switch(struct vmctx *, struct vcpu *, struct vm_run *);
-
-static const vmexit_handler_t handler[VM_EXITCODE_MAX] = {
- [VM_EXITCODE_INOUT] = vmexit_inout,
- [VM_EXITCODE_INOUT_STR] = vmexit_inout,
- [VM_EXITCODE_VMX] = vmexit_vmx,
- [VM_EXITCODE_SVM] = vmexit_svm,
- [VM_EXITCODE_BOGUS] = vmexit_bogus,
- [VM_EXITCODE_REQIDLE] = vmexit_reqidle,
- [VM_EXITCODE_RDMSR] = vmexit_rdmsr,
- [VM_EXITCODE_WRMSR] = vmexit_wrmsr,
- [VM_EXITCODE_MTRAP] = vmexit_mtrap,
- [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
- [VM_EXITCODE_SUSPENDED] = vmexit_suspend,
- [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
- [VM_EXITCODE_DEBUG] = vmexit_debug,
- [VM_EXITCODE_BPT] = vmexit_breakpoint,
- [VM_EXITCODE_IPI] = vmexit_ipi,
- [VM_EXITCODE_HLT] = vmexit_hlt,
- [VM_EXITCODE_PAUSE] = vmexit_pause,
-};
-
static void
vm_loop(struct vmctx *ctx, struct vcpu *vcpu)
{
@@ -973,13 +537,13 @@ vm_loop(struct vmctx *ctx, struct vcpu *vcpu)
break;
exitcode = vme.exitcode;
- if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) {
- fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
- exitcode);
+ if (exitcode >= VM_EXITCODE_MAX ||
+ vmexit_handlers[exitcode] == NULL) {
+ warnx("vm_loop: unexpected exitcode 0x%x", exitcode);
exit(4);
}
- rc = (*handler[exitcode])(ctx, vcpu, &vmrun);
+ rc = (*vmexit_handlers[exitcode])(ctx, vcpu, &vmrun);
switch (rc) {
case VMEXIT_CONTINUE:
diff --git a/usr.sbin/bhyve/bhyverun.h b/usr.sbin/bhyve/bhyverun.h
index fc0d2595e66b..b1cfb99a964e 100644
--- a/usr.sbin/bhyve/bhyverun.h
+++ b/usr.sbin/bhyve/bhyverun.h
@@ -44,6 +44,13 @@ void *paddr_guest2host(struct vmctx *ctx, uintptr_t addr, size_t len);
uintptr_t paddr_host2guest(struct vmctx *ctx, void *addr);
#endif
+struct vcpu;
+struct vcpu *fbsdrun_vcpu(int vcpuid);
+void fbsdrun_deletecpu(int vcpuid);
+int fbsdrun_suspendcpu(int vcpuid);
+
int fbsdrun_virtio_msix(void);
+typedef int (*vmexit_handler_t)(struct vmctx *, struct vcpu *, struct vm_run *);
+
#endif
diff --git a/usr.sbin/bhyve/vmexit.h b/usr.sbin/bhyve/vmexit.h
new file mode 100644
index 000000000000..c5c0d7ad5266
--- /dev/null
+++ b/usr.sbin/bhyve/vmexit.h
@@ -0,0 +1,34 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMEXIT_H_
+#define _VMEXIT_H_
+
+extern const vmexit_handler_t vmexit_handlers[VM_EXITCODE_MAX];
+
+#endif /* !_VMEXIT_H_ */