diff options
Diffstat (limited to 'sys/amd64/vmm')
69 files changed, 4016 insertions, 4828 deletions
diff --git a/sys/amd64/vmm/amd/amdv.c b/sys/amd64/vmm/amd/amdv.c index 89a50ed7c0a9..c3a4547afeeb 100644 --- a/sys/amd64/vmm/amd/amdv.c +++ b/sys/amd64/vmm/amd/amdv.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,13 +24,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/systm.h> #include <sys/errno.h> diff --git a/sys/amd64/vmm/amd/amdvi_hw.c b/sys/amd64/vmm/amd/amdvi_hw.c index 87270b24c9f6..831c31277570 100644 --- a/sys/amd64/vmm/amd/amdvi_hw.c +++ b/sys/amd64/vmm/amd/amdvi_hw.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2016, Anish Gupta (anish@freebsd.org) * All rights reserved. @@ -26,9 +26,6 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/systm.h> #include <sys/bus.h> @@ -127,7 +124,7 @@ static inline uint32_t amdvi_pci_read(struct amdvi_softc *softc, int off) { - return (pci_cfgregread(PCI_RID2BUS(softc->pci_rid), + return (pci_cfgregread(softc->pci_seg, PCI_RID2BUS(softc->pci_rid), PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid), off, 4)); } @@ -931,8 +928,8 @@ amdvi_teardown_hw(struct amdvi_softc *softc) dev = softc->dev; - /* - * Called after disable, h/w is stopped by now, free all the resources. + /* + * Called after disable, h/w is stopped by now, free all the resources. */ amdvi_free_evt_intr_res(dev); @@ -1158,9 +1155,9 @@ amdvi_update_mapping(struct amdvi_domain *domain, vm_paddr_t gpa, return (mapped); } -static uint64_t +static int amdvi_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, - uint64_t len) + uint64_t len, uint64_t *res_len) { struct amdvi_domain *domain; @@ -1168,7 +1165,7 @@ amdvi_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, if (domain->id && !domain->ptp) { printf("ptp is NULL"); - return (-1); + return (EINVAL); } /* @@ -1176,13 +1173,14 @@ amdvi_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, * table set-up. */ if (domain->ptp) - return (amdvi_update_mapping(domain, gpa, hpa, len, true)); + *res_len = amdvi_update_mapping(domain, gpa, hpa, len, true); else - return (len); + *res_len = len; + return (0); } -static uint64_t -amdvi_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len) +static int +amdvi_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len, uint64_t *res_len) { struct amdvi_domain *domain; @@ -1192,9 +1190,10 @@ amdvi_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len) * table set-up. */ if (domain->ptp) - return (amdvi_update_mapping(domain, gpa, 0, len, false)); - return - (len); + *res_len = amdvi_update_mapping(domain, gpa, 0, len, false); + else + *res_len = len; + return (0); } static struct amdvi_softc * @@ -1271,8 +1270,8 @@ amdvi_inv_device(struct amdvi_softc *softc, uint16_t devid) amdvi_wait(softc); } -static void -amdvi_add_device(void *arg, uint16_t devid) +static int +amdvi_add_device(void *arg, device_t dev __unused, uint16_t devid) { struct amdvi_domain *domain; struct amdvi_softc *softc; @@ -1285,13 +1284,14 @@ amdvi_add_device(void *arg, uint16_t devid) #endif softc = amdvi_find_iommu(devid); if (softc == NULL) - return; + return (ENXIO); amdvi_set_dte(domain, softc, devid, true); amdvi_inv_device(softc, devid); + return (0); } -static void -amdvi_remove_device(void *arg, uint16_t devid) +static int +amdvi_remove_device(void *arg, device_t dev __unused, uint16_t devid) { struct amdvi_domain *domain; struct amdvi_softc *softc; @@ -1303,9 +1303,10 @@ amdvi_remove_device(void *arg, uint16_t devid) #endif softc = amdvi_find_iommu(devid); if (softc == NULL) - return; + return (ENXIO); amdvi_set_dte(domain, softc, devid, false); amdvi_inv_device(softc, devid); + return (0); } static void @@ -1360,7 +1361,7 @@ amdvi_disable(void) } } -static void +static int amdvi_invalidate_tlb(void *arg) { struct amdvi_domain *domain; @@ -1368,6 +1369,7 @@ amdvi_invalidate_tlb(void *arg) domain = (struct amdvi_domain *)arg; KASSERT(domain, ("domain is NULL")); amdvi_do_inv_domain(domain->id, false); + return (0); } const struct iommu_ops iommu_ops_amd = { @@ -1381,5 +1383,5 @@ const struct iommu_ops iommu_ops_amd = { .remove_mapping = amdvi_remove_mapping, .add_device = amdvi_add_device, .remove_device = amdvi_remove_device, - .invalidate_tlb = amdvi_invalidate_tlb + .invalidate_tlb = amdvi_invalidate_tlb, }; diff --git a/sys/amd64/vmm/amd/amdvi_priv.h b/sys/amd64/vmm/amd/amdvi_priv.h index 6960ef24d683..2a2646b6907e 100644 --- a/sys/amd64/vmm/amd/amdvi_priv.h +++ b/sys/amd64/vmm/amd/amdvi_priv.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2016 Anish Gupta (anish@freebsd.org) * Copyright (c) 2021 The FreeBSD Foundation @@ -27,8 +27,6 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _AMDVI_PRIV_H_ @@ -213,8 +211,8 @@ struct amdvi_ctrl { uint64_t limit:40; uint16_t :12; } excl; - /* - * Revision 2 only. + /* + * Revision 2 only. */ uint64_t ex_feature; struct { @@ -255,8 +253,8 @@ CTASSERT(offsetof(struct amdvi_ctrl, pad2)== 0x2028); CTASSERT(offsetof(struct amdvi_ctrl, pad3)== 0x2040); #define AMDVI_MMIO_V1_SIZE (4 * PAGE_SIZE) /* v1 size */ -/* - * AMF IOMMU v2 size including event counters +/* + * AMF IOMMU v2 size including event counters */ #define AMDVI_MMIO_V2_SIZE (8 * PAGE_SIZE) diff --git a/sys/amd64/vmm/amd/amdiommu.c b/sys/amd64/vmm/amd/amdviiommu.c index 6b4349d1c160..5f5822a667b5 100644 --- a/sys/amd64/vmm/amd/amdiommu.c +++ b/sys/amd64/vmm/amd/amdviiommu.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2021 The FreeBSD Foundation * @@ -28,9 +28,6 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/bus.h> #include <sys/kernel.h> @@ -43,36 +40,36 @@ __FBSDID("$FreeBSD$"); #include "amdvi_priv.h" #include "ivhd_if.h" -struct amdiommu_softc { +struct amdviiommu_softc { struct resource *event_res; /* Event interrupt resource. */ void *event_tag; /* Event interrupt tag. */ int event_rid; }; -static int amdiommu_probe(device_t); -static int amdiommu_attach(device_t); -static int amdiommu_detach(device_t); +static int amdviiommu_probe(device_t); +static int amdviiommu_attach(device_t); +static int amdviiommu_detach(device_t); static int ivhd_setup_intr(device_t, driver_intr_t, void *, const char *); static int ivhd_teardown_intr(device_t); -static device_method_t amdiommu_methods[] = { +static device_method_t amdviiommu_methods[] = { /* device interface */ - DEVMETHOD(device_probe, amdiommu_probe), - DEVMETHOD(device_attach, amdiommu_attach), - DEVMETHOD(device_detach, amdiommu_detach), + DEVMETHOD(device_probe, amdviiommu_probe), + DEVMETHOD(device_attach, amdviiommu_attach), + DEVMETHOD(device_detach, amdviiommu_detach), DEVMETHOD(ivhd_setup_intr, ivhd_setup_intr), DEVMETHOD(ivhd_teardown_intr, ivhd_teardown_intr), DEVMETHOD_END }; -static driver_t amdiommu_driver = { - "amdiommu", - amdiommu_methods, - sizeof(struct amdiommu_softc), +static driver_t amdviiommu_driver = { + "amdviiommu", + amdviiommu_methods, + sizeof(struct amdviiommu_softc), }; static int -amdiommu_probe(device_t dev) +amdviiommu_probe(device_t dev) { int error; int capoff; @@ -102,7 +99,7 @@ amdiommu_probe(device_t dev) } static int -amdiommu_attach(device_t dev) +amdviiommu_attach(device_t dev) { device_set_desc(dev, "AMD-Vi/IOMMU PCI function"); @@ -110,7 +107,7 @@ amdiommu_attach(device_t dev) } static int -amdiommu_detach(device_t dev) +amdviiommu_detach(device_t dev) { return (0); @@ -120,7 +117,7 @@ static int ivhd_setup_intr(device_t dev, driver_intr_t handler, void *arg, const char *desc) { - struct amdiommu_softc *sc; + struct amdviiommu_softc *sc; int error, msicnt; sc = device_get_softc(dev); @@ -161,7 +158,7 @@ fail: static int ivhd_teardown_intr(device_t dev) { - struct amdiommu_softc *sc; + struct amdviiommu_softc *sc; sc = device_get_softc(dev); @@ -179,5 +176,5 @@ ivhd_teardown_intr(device_t dev) } /* This driver has to be loaded before ivhd */ -DRIVER_MODULE(amdiommu, pci, amdiommu_driver, 0, 0); -MODULE_DEPEND(amdiommu, pci, 1, 1, 1); +DRIVER_MODULE(amdviiommu, pci, amdviiommu_driver, 0, 0); +MODULE_DEPEND(amdviiommu, pci, 1, 1, 1); diff --git a/sys/amd64/vmm/amd/ivhd_if.m b/sys/amd64/vmm/amd/ivhd_if.m index f2994243c91e..3b37de9f4ba0 100644 --- a/sys/amd64/vmm/amd/ivhd_if.m +++ b/sys/amd64/vmm/amd/ivhd_if.m @@ -25,7 +25,6 @@ # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # -# $FreeBSD$ # #include <sys/types.h> diff --git a/sys/amd64/vmm/amd/ivrs_drv.c b/sys/amd64/vmm/amd/ivrs_drv.c index 574c70cad383..c75e0fcc2d68 100644 --- a/sys/amd64/vmm/amd/ivrs_drv.c +++ b/sys/amd64/vmm/amd/ivrs_drv.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2016, Anish Gupta (anish@freebsd.org) * Copyright (c) 2021 The FreeBSD Foundation @@ -28,8 +28,6 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include "opt_acpi.h" #include <sys/param.h> #include <sys/bus.h> @@ -53,7 +51,7 @@ __FBSDID("$FreeBSD$"); device_t *ivhd_devs; /* IVHD or AMD-Vi device list. */ int ivhd_count; /* Number of IVHD header. */ -/* +/* * Cached IVHD header list. * Single entry for each IVHD, filtered the legacy one. */ @@ -227,7 +225,7 @@ ivhd_dev_parse(ACPI_IVRS_HARDWARE1 *ivhd, struct amdvi_softc *softc) break; default: - device_printf(softc->dev, + device_printf(softc->dev, "unknown type: 0x%x\n", ivhd->Header.Type); return (-1); } @@ -368,7 +366,7 @@ ivhd_identify(driver_t *driver, device_t parent) ivrs_ivinfo = ivrs->Info; printf("AMD-Vi: IVRS Info VAsize = %d PAsize = %d GVAsize = %d" " flags:%b\n", - REG_BITS(ivrs_ivinfo, 21, 15), REG_BITS(ivrs_ivinfo, 14, 8), + REG_BITS(ivrs_ivinfo, 21, 15), REG_BITS(ivrs_ivinfo, 14, 8), REG_BITS(ivrs_ivinfo, 7, 5), REG_BITS(ivrs_ivinfo, 22, 22), "\020\001EFRSup"); @@ -418,7 +416,7 @@ ivhd_identify(driver_t *driver, device_t parent) if (ivhd_devs[i] == NULL) { ivhd_devs[i] = device_find_child(parent, "ivhd", i); if (ivhd_devs[i] == NULL) { - printf("AMD-Vi: cant find ivhd%d\n", i); + printf("AMD-Vi: can't find ivhd%d\n", i); break; } } @@ -441,7 +439,7 @@ ivhd_probe(device_t dev) return (ENXIO); unit = device_get_unit(dev); - KASSERT((unit < ivhd_count), + KASSERT((unit < ivhd_count), ("ivhd unit %d > count %d", unit, ivhd_count)); ivhd = ivhd_hdrs[unit]; KASSERT(ivhd, ("ivhd is NULL")); @@ -508,7 +506,7 @@ ivhd_print_flag(device_t dev, enum IvrsType ivhd_type, uint8_t flag) * Feature in legacy IVHD type(0x10) and attribute in newer type(0x11 and 0x40). */ static void -ivhd_print_feature(device_t dev, enum IvrsType ivhd_type, uint32_t feature) +ivhd_print_feature(device_t dev, enum IvrsType ivhd_type, uint32_t feature) { switch (ivhd_type) { case IVRS_TYPE_HARDWARE_LEGACY: @@ -641,7 +639,7 @@ ivhd_attach(device_t dev) int status, unit; unit = device_get_unit(dev); - KASSERT((unit < ivhd_count), + KASSERT((unit < ivhd_count), ("ivhd unit %d > count %d", unit, ivhd_count)); /* Make sure its same device for which attach is called. */ KASSERT((ivhd_devs[unit] == dev), @@ -651,7 +649,8 @@ ivhd_attach(device_t dev) softc->dev = dev; ivhd = ivhd_hdrs[unit]; KASSERT(ivhd, ("ivhd is NULL")); - softc->pci_dev = pci_find_bsf(PCI_RID2BUS(ivhd->Header.DeviceId), + softc->pci_dev = pci_find_dbsf(ivhd->PciSegmentGroup, + PCI_RID2BUS(ivhd->Header.DeviceId), PCI_RID2SLOT(ivhd->Header.DeviceId), PCI_RID2FUNC(ivhd->Header.DeviceId)); @@ -659,12 +658,12 @@ ivhd_attach(device_t dev) softc->pci_seg = ivhd->PciSegmentGroup; softc->pci_rid = ivhd->Header.DeviceId; softc->ivhd_flag = ivhd->Header.Flags; - /* + /* * On lgeacy IVHD type(0x10), it is documented as feature * but in newer type it is attribute. */ softc->ivhd_feature = ivhd->FeatureReporting; - /* + /* * PCI capability has more capabilities that are not part of IVRS. */ softc->cap_off = ivhd->CapabilityOffset; @@ -695,7 +694,7 @@ ivhd_attach(device_t dev) status = amdvi_setup_hw(softc); if (status != 0) { - device_printf(dev, "couldn't be initialised, error=%d\n", + device_printf(dev, "couldn't be initialised, error=%d\n", status); goto fail; } diff --git a/sys/amd64/vmm/amd/npt.c b/sys/amd64/vmm/amd/npt.c index 01d5bc7b28fd..6fd6628053f2 100644 --- a/sys/amd64/vmm/amd/npt.c +++ b/sys/amd64/vmm/amd/npt.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com) * All rights reserved. @@ -26,9 +26,6 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/kernel.h> #include <sys/systm.h> @@ -61,7 +58,7 @@ svm_npt_init(int ipinum) npt_flags = ipinum & NPT_IPIMASK; TUNABLE_INT_FETCH("hw.vmm.npt.enable_superpage", &enable_superpage); if (enable_superpage) - npt_flags |= PMAP_PDE_SUPERPAGE; + npt_flags |= PMAP_PDE_SUPERPAGE; return (0); } diff --git a/sys/amd64/vmm/amd/npt.h b/sys/amd64/vmm/amd/npt.h index 35530d783397..9ab163cf9076 100644 --- a/sys/amd64/vmm/amd/npt.h +++ b/sys/amd64/vmm/amd/npt.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com) * All rights reserved. @@ -24,8 +24,6 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _SVM_NPT_H_ diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c index 91e747774b24..2fe6a5bc3584 100644 --- a/sys/amd64/vmm/amd/svm.c +++ b/sys/amd64/vmm/amd/svm.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013, Anish Gupta (akgupt3@gmail.com) * All rights reserved. @@ -27,8 +27,6 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include "opt_bhyve_snapshot.h" #include <sys/param.h> @@ -43,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <vm/vm.h> +#include <vm/vm_extern.h> #include <vm/pmap.h> #include <machine/cpufunc.h> @@ -55,9 +54,11 @@ __FBSDID("$FreeBSD$"); #include <machine/vmm_instruction_emul.h> #include <machine/vmm_snapshot.h> +#include <dev/vmm/vmm_ktr.h> +#include <dev/vmm/vmm_mem.h> + #include "vmm_lapic.h" #include "vmm_stat.h" -#include "vmm_ktr.h" #include "vmm_ioport.h" #include "vatpic.h" #include "vlapic.h" @@ -69,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include "svm_softc.h" #include "svm_msr.h" #include "npt.h" +#include "io/ppt.h" SYSCTL_DECL(_hw_vmm); SYSCTL_NODE(_hw_vmm, OID_AUTO, svm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, @@ -123,18 +125,16 @@ SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, num_asids, CTLFLAG_RDTUN, &nasid, 0, /* Current ASID generation for each host cpu */ static struct asid asid[MAXCPU]; -/* - * SVM host state saved area of size 4KB for each core. - */ -static uint8_t hsave[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE); +/* SVM host state saved area of size 4KB for each physical core. */ +static uint8_t *hsave; static VMM_STAT_AMD(VCPU_EXITINTINFO, "VM exits during event delivery"); static VMM_STAT_AMD(VCPU_INTINFO_INJECTED, "Events pending at VM entry"); static VMM_STAT_AMD(VMEXIT_VINTR, "VM exits due to interrupt window"); -static int svm_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc); -static int svm_setreg(void *arg, int vcpu, int ident, uint64_t val); - +static int svm_getdesc(void *vcpui, int reg, struct seg_desc *desc); +static int svm_setreg(void *vcpui, int ident, uint64_t val); +static int svm_getreg(void *vcpui, int ident, uint64_t *val); static __inline int flush_by_asid(void) { @@ -167,6 +167,10 @@ svm_modcleanup(void) { smp_rendezvous(NULL, svm_disable, NULL, NULL); + + if (hsave != NULL) + kmem_free(hsave, (mp_maxid + 1) * PAGE_SIZE); + return (0); } @@ -214,7 +218,7 @@ svm_enable(void *arg __unused) efer |= EFER_SVM; wrmsr(MSR_EFER, efer); - wrmsr(MSR_VM_HSAVE_PA, vtophys(hsave[curcpu])); + wrmsr(MSR_VM_HSAVE_PA, vtophys(&hsave[curcpu * PAGE_SIZE])); } /* @@ -269,46 +273,76 @@ svm_modinit(int ipinum) svm_npt_init(ipinum); /* Enable SVM on all CPUs */ + hsave = kmem_malloc((mp_maxid + 1) * PAGE_SIZE, M_WAITOK | M_ZERO); smp_rendezvous(NULL, svm_enable, NULL, NULL); return (0); } static void +svm_modsuspend(void) +{ +} + +static void svm_modresume(void) { svm_enable(NULL); -} +} #ifdef BHYVE_SNAPSHOT -int -svm_set_tsc_offset(struct svm_softc *sc, int vcpu, uint64_t offset) +void +svm_set_tsc_offset(struct svm_vcpu *vcpu, uint64_t offset) { - int error; struct vmcb_ctrl *ctrl; - ctrl = svm_get_vmcb_ctrl(sc, vcpu); + ctrl = svm_get_vmcb_ctrl(vcpu); ctrl->tsc_offset = offset; - svm_set_dirty(sc, vcpu, VMCB_CACHE_I); - VCPU_CTR1(sc->vm, vcpu, "tsc offset changed to %#lx", offset); - - error = vm_set_tsc_offset(sc->vm, vcpu, offset); + svm_set_dirty(vcpu, VMCB_CACHE_I); + SVM_CTR1(vcpu, "tsc offset changed to %#lx", offset); - return (error); + vm_set_tsc_offset(vcpu->vcpu, offset); } #endif /* Pentium compatible MSRs */ -#define MSR_PENTIUM_START 0 +#define MSR_PENTIUM_START 0 #define MSR_PENTIUM_END 0x1FFF /* AMD 6th generation and Intel compatible MSRs */ -#define MSR_AMD6TH_START 0xC0000000UL -#define MSR_AMD6TH_END 0xC0001FFFUL +#define MSR_AMD6TH_START 0xC0000000UL +#define MSR_AMD6TH_END 0xC0001FFFUL /* AMD 7th and 8th generation compatible MSRs */ -#define MSR_AMD7TH_START 0xC0010000UL -#define MSR_AMD7TH_END 0xC0011FFFUL +#define MSR_AMD7TH_START 0xC0010000UL +#define MSR_AMD7TH_END 0xC0011FFFUL + +static void +svm_get_cs_info(struct vmcb *vmcb, struct vm_guest_paging *paging, int *cs_d, + uint64_t *base) +{ + struct vmcb_segment seg; + int error __diagused; + + error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg); + KASSERT(error == 0, ("%s: vmcb_seg error %d", __func__, error)); + + switch (paging->cpu_mode) { + case CPU_MODE_REAL: + *base = seg.base; + *cs_d = 0; + break; + case CPU_MODE_PROTECTED: + case CPU_MODE_COMPATIBILITY: + *cs_d = !!(seg.attrib & VMCB_CS_ATTRIB_D); + *base = seg.base; + break; + default: + *base = 0; + *cs_d = 0; + break; + } +} /* * Get the index and bit position for a MSR in permission bitmap. @@ -328,12 +362,12 @@ svm_msr_index(uint64_t msr, int *index, int *bit) return (0); } - base += (MSR_PENTIUM_END - MSR_PENTIUM_START + 1); + base += (MSR_PENTIUM_END - MSR_PENTIUM_START + 1); if (msr >= MSR_AMD6TH_START && msr <= MSR_AMD6TH_END) { - off = (msr - MSR_AMD6TH_START); + off = (msr - MSR_AMD6TH_START); *index = (off + base) / 4; return (0); - } + } base += (MSR_AMD6TH_END - MSR_AMD6TH_START + 1); if (msr >= MSR_AMD7TH_START && msr <= MSR_AMD7TH_END) { @@ -382,26 +416,25 @@ svm_msr_rd_ok(uint8_t *perm_bitmap, uint64_t msr) } static __inline int -svm_get_intercept(struct svm_softc *sc, int vcpu, int idx, uint32_t bitmask) +svm_get_intercept(struct svm_vcpu *vcpu, int idx, uint32_t bitmask) { struct vmcb_ctrl *ctrl; KASSERT(idx >=0 && idx < 5, ("invalid intercept index %d", idx)); - ctrl = svm_get_vmcb_ctrl(sc, vcpu); + ctrl = svm_get_vmcb_ctrl(vcpu); return (ctrl->intercept[idx] & bitmask ? 1 : 0); } static __inline void -svm_set_intercept(struct svm_softc *sc, int vcpu, int idx, uint32_t bitmask, - int enabled) +svm_set_intercept(struct svm_vcpu *vcpu, int idx, uint32_t bitmask, int enabled) { struct vmcb_ctrl *ctrl; uint32_t oldval; KASSERT(idx >=0 && idx < 5, ("invalid intercept index %d", idx)); - ctrl = svm_get_vmcb_ctrl(sc, vcpu); + ctrl = svm_get_vmcb_ctrl(vcpu); oldval = ctrl->intercept[idx]; if (enabled) @@ -410,28 +443,28 @@ svm_set_intercept(struct svm_softc *sc, int vcpu, int idx, uint32_t bitmask, ctrl->intercept[idx] &= ~bitmask; if (ctrl->intercept[idx] != oldval) { - svm_set_dirty(sc, vcpu, VMCB_CACHE_I); - VCPU_CTR3(sc->vm, vcpu, "intercept[%d] modified " - "from %#x to %#x", idx, oldval, ctrl->intercept[idx]); + svm_set_dirty(vcpu, VMCB_CACHE_I); + SVM_CTR3(vcpu, "intercept[%d] modified from %#x to %#x", idx, + oldval, ctrl->intercept[idx]); } } static __inline void -svm_disable_intercept(struct svm_softc *sc, int vcpu, int off, uint32_t bitmask) +svm_disable_intercept(struct svm_vcpu *vcpu, int off, uint32_t bitmask) { - svm_set_intercept(sc, vcpu, off, bitmask, 0); + svm_set_intercept(vcpu, off, bitmask, 0); } static __inline void -svm_enable_intercept(struct svm_softc *sc, int vcpu, int off, uint32_t bitmask) +svm_enable_intercept(struct svm_vcpu *vcpu, int off, uint32_t bitmask) { - svm_set_intercept(sc, vcpu, off, bitmask, 1); + svm_set_intercept(vcpu, off, bitmask, 1); } static void -vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa, +vmcb_init(struct svm_softc *sc, struct svm_vcpu *vcpu, uint64_t iopm_base_pa, uint64_t msrpm_base_pa, uint64_t np_pml4) { struct vmcb_ctrl *ctrl; @@ -439,8 +472,8 @@ vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa, uint32_t mask; int n; - ctrl = svm_get_vmcb_ctrl(sc, vcpu); - state = svm_get_vmcb_state(sc, vcpu); + ctrl = svm_get_vmcb_ctrl(vcpu); + state = svm_get_vmcb_state(vcpu); ctrl->iopm_base_pa = iopm_base_pa; ctrl->msrpm_base_pa = msrpm_base_pa; @@ -456,16 +489,16 @@ vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa, for (n = 0; n < 16; n++) { mask = (BIT(n) << 16) | BIT(n); if (n == 0 || n == 2 || n == 3 || n == 4 || n == 8) - svm_disable_intercept(sc, vcpu, VMCB_CR_INTCPT, mask); + svm_disable_intercept(vcpu, VMCB_CR_INTCPT, mask); else - svm_enable_intercept(sc, vcpu, VMCB_CR_INTCPT, mask); + svm_enable_intercept(vcpu, VMCB_CR_INTCPT, mask); } /* * Intercept everything when tracing guest exceptions otherwise * just intercept machine check exception. */ - if (vcpu_trace_exceptions(sc->vm, vcpu)) { + if (vcpu_trace_exceptions(vcpu->vcpu)) { for (n = 0; n < 32; n++) { /* * Skip unimplemented vectors in the exception bitmap. @@ -473,41 +506,40 @@ vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa, if (n == 2 || n == 9) { continue; } - svm_enable_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(n)); + svm_enable_intercept(vcpu, VMCB_EXC_INTCPT, BIT(n)); } } else { - svm_enable_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(IDT_MC)); + svm_enable_intercept(vcpu, VMCB_EXC_INTCPT, BIT(IDT_MC)); } /* Intercept various events (for e.g. I/O, MSR and CPUID accesses) */ - svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IO); - svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_MSR); - svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_CPUID); - svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INTR); - svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INIT); - svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_NMI); - svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_SMI); - svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_SHUTDOWN); - svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, - VMCB_INTCPT_FERR_FREEZE); - svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INVD); - svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INVLPGA); - - svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_MONITOR); - svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_MWAIT); + svm_enable_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IO); + svm_enable_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_MSR); + svm_enable_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_CPUID); + svm_enable_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INTR); + svm_enable_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INIT); + svm_enable_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_NMI); + svm_enable_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_SMI); + svm_enable_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_SHUTDOWN); + svm_enable_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_FERR_FREEZE); + svm_enable_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INVD); + svm_enable_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INVLPGA); + + svm_enable_intercept(vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_MONITOR); + svm_enable_intercept(vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_MWAIT); /* * Intercept SVM instructions since AMD enables them in guests otherwise. * Non-intercepted VMMCALL causes #UD, skip it. */ - svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMLOAD); - svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMSAVE); - svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_STGI); - svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_CLGI); - svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_SKINIT); - svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_ICEBP); - if (vcpu_trap_wbinvd(sc->vm, vcpu)) { - svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, + svm_enable_intercept(vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMLOAD); + svm_enable_intercept(vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMSAVE); + svm_enable_intercept(vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_STGI); + svm_enable_intercept(vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_CLGI); + svm_enable_intercept(vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_SKINIT); + svm_enable_intercept(vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_ICEBP); + if (vcpu_trap_wbinvd(vcpu->vcpu)) { + svm_enable_intercept(vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_WBINVD); } @@ -515,7 +547,7 @@ vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa, * From section "Canonicalization and Consistency Checks" in APMv2 * the VMRUN intercept bit must be set to pass the consistency check. */ - svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMRUN); + svm_enable_intercept(vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMRUN); /* * The ASID will be set to a non-zero value just before VMRUN. @@ -559,14 +591,8 @@ static void * svm_init(struct vm *vm, pmap_t pmap) { struct svm_softc *svm_sc; - struct svm_vcpu *vcpu; - vm_paddr_t msrpm_pa, iopm_pa, pml4_pa; - int i; - uint16_t maxcpus; svm_sc = malloc(sizeof (*svm_sc), M_SVM, M_WAITOK | M_ZERO); - if (((uintptr_t)svm_sc & PAGE_MASK) != 0) - panic("malloc of svm_softc not aligned on page boundary"); svm_sc->msr_bitmap = contigmalloc(SVM_MSR_BITMAP_SIZE, M_SVM, M_WAITOK, 0, ~(vm_paddr_t)0, PAGE_SIZE, 0); @@ -578,7 +604,7 @@ svm_init(struct vm *vm, pmap_t pmap) panic("contigmalloc of SVM IO bitmap failed"); svm_sc->vm = vm; - svm_sc->nptp = (vm_offset_t)vtophys(pmap->pm_pmltop); + svm_sc->nptp = vtophys(pmap->pm_pmltop); /* * Intercept read and write accesses to all MSRs. @@ -613,21 +639,30 @@ svm_init(struct vm *vm, pmap_t pmap) /* Intercept access to all I/O ports. */ memset(svm_sc->iopm_bitmap, 0xFF, SVM_IO_BITMAP_SIZE); - iopm_pa = vtophys(svm_sc->iopm_bitmap); - msrpm_pa = vtophys(svm_sc->msr_bitmap); - pml4_pa = svm_sc->nptp; - maxcpus = vm_get_maxcpus(svm_sc->vm); - for (i = 0; i < maxcpus; i++) { - vcpu = svm_get_vcpu(svm_sc, i); - vcpu->nextrip = ~0; - vcpu->lastcpu = NOCPU; - vcpu->vmcb_pa = vtophys(&vcpu->vmcb); - vmcb_init(svm_sc, i, iopm_pa, msrpm_pa, pml4_pa); - svm_msr_guest_init(svm_sc, i); - } return (svm_sc); } +static void * +svm_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid) +{ + struct svm_softc *sc = vmi; + struct svm_vcpu *vcpu; + + vcpu = malloc(sizeof(*vcpu), M_SVM, M_WAITOK | M_ZERO); + vcpu->sc = sc; + vcpu->vcpu = vcpu1; + vcpu->vcpuid = vcpuid; + vcpu->vmcb = malloc_aligned(sizeof(struct vmcb), PAGE_SIZE, M_SVM, + M_WAITOK | M_ZERO); + vcpu->nextrip = ~0; + vcpu->lastcpu = NOCPU; + vcpu->vmcb_pa = vtophys(vcpu->vmcb); + vmcb_init(sc, vcpu, vtophys(sc->iopm_bitmap), vtophys(sc->msr_bitmap), + sc->nptp); + svm_msr_guest_init(sc, vcpu); + return (vcpu); +} + /* * Collateral for a generic SVM VM-exit. */ @@ -720,20 +755,39 @@ svm_inout_str_count(struct svm_regctx *regs, int rep) } static void -svm_inout_str_seginfo(struct svm_softc *svm_sc, int vcpu, int64_t info1, - int in, struct vm_inout_str *vis) +svm_inout_str_seginfo(struct svm_vcpu *vcpu, int64_t info1, int in, + struct vm_inout_str *vis) { int error __diagused, s; if (in) { vis->seg_name = VM_REG_GUEST_ES; - } else { - /* The segment field has standard encoding */ + } else if (decode_assist()) { + /* + * The effective segment number in EXITINFO1[12:10] is populated + * only if the processor has the DecodeAssist capability. + * + * XXX this is not specified explicitly in APMv2 but can be + * verified empirically. + */ s = (info1 >> 10) & 0x7; + + /* The segment field has standard encoding */ vis->seg_name = vm_segment_name(s); + } else { + /* + * The segment register need to be manually decoded by fetching + * the instructions near ip. However, we are unable to fetch it + * while the interrupts are disabled. Therefore, we leave the + * value unset until the generic ins/outs handler runs. + */ + vis->seg_name = VM_REG_LAST; + svm_get_cs_info(vcpu->vmcb, &vis->paging, &vis->cs_d, + &vis->cs_base); + return; } - error = svm_getdesc(svm_sc, vcpu, vis->seg_name, &vis->seg_desc); + error = svm_getdesc(vcpu, vis->seg_name, &vis->seg_desc); KASSERT(error == 0, ("%s: svm_getdesc error %d", __func__, error)); } @@ -774,7 +828,7 @@ svm_paging_info(struct vmcb *vmcb, struct vm_guest_paging *paging) * Handle guest I/O intercept. */ static int -svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) +svm_handle_io(struct svm_vcpu *vcpu, struct vm_exit *vmexit) { struct vmcb_ctrl *ctrl; struct vmcb_state *state; @@ -783,23 +837,13 @@ svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) uint64_t info1; int inout_string; - state = svm_get_vmcb_state(svm_sc, vcpu); - ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu); - regs = svm_get_guest_regctx(svm_sc, vcpu); + state = svm_get_vmcb_state(vcpu); + ctrl = svm_get_vmcb_ctrl(vcpu); + regs = svm_get_guest_regctx(vcpu); info1 = ctrl->exitinfo1; inout_string = info1 & BIT(2) ? 1 : 0; - /* - * The effective segment number in EXITINFO1[12:10] is populated - * only if the processor has the DecodeAssist capability. - * - * XXX this is not specified explicitly in APMv2 but can be verified - * empirically. - */ - if (inout_string && !decode_assist()) - return (UNHANDLED); - vmexit->exitcode = VM_EXITCODE_INOUT; vmexit->u.inout.in = (info1 & BIT(0)) ? 1 : 0; vmexit->u.inout.string = inout_string; @@ -811,14 +855,15 @@ svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) if (inout_string) { vmexit->exitcode = VM_EXITCODE_INOUT_STR; vis = &vmexit->u.inout_str; - svm_paging_info(svm_get_vmcb(svm_sc, vcpu), &vis->paging); + svm_paging_info(svm_get_vmcb(vcpu), &vis->paging); vis->rflags = state->rflags; vis->cr0 = state->cr0; vis->index = svm_inout_str_index(regs, vmexit->u.inout.in); vis->count = svm_inout_str_count(regs, vmexit->u.inout.rep); vis->addrsize = svm_inout_str_addrsize(info1); - svm_inout_str_seginfo(svm_sc, vcpu, info1, - vmexit->u.inout.in, vis); + vis->cs_d = 0; + vis->cs_base = 0; + svm_inout_str_seginfo(vcpu, info1, vmexit->u.inout.in, vis); } return (UNHANDLED); @@ -852,17 +897,16 @@ svm_npf_emul_fault(uint64_t exitinfo1) return (false); } - return (true); + return (true); } static void svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit) { struct vm_guest_paging *paging; - struct vmcb_segment seg; struct vmcb_ctrl *ctrl; char *inst_bytes; - int error __diagused, inst_len; + int inst_len; ctrl = &vmcb->ctrl; paging = &vmexit->u.inst_emul.paging; @@ -872,29 +916,8 @@ svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit) vmexit->u.inst_emul.gla = VIE_INVALID_GLA; svm_paging_info(vmcb, paging); - error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg); - KASSERT(error == 0, ("%s: vmcb_seg(CS) error %d", __func__, error)); - - switch(paging->cpu_mode) { - case CPU_MODE_REAL: - vmexit->u.inst_emul.cs_base = seg.base; - vmexit->u.inst_emul.cs_d = 0; - break; - case CPU_MODE_PROTECTED: - case CPU_MODE_COMPATIBILITY: - vmexit->u.inst_emul.cs_base = seg.base; - - /* - * Section 4.8.1 of APM2, Default Operand Size or D bit. - */ - vmexit->u.inst_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ? - 1 : 0; - break; - default: - vmexit->u.inst_emul.cs_base = 0; - vmexit->u.inst_emul.cs_d = 0; - break; - } + svm_get_cs_info(vmcb, paging, &vmexit->u.inst_emul.cs_d, + &vmexit->u.inst_emul.cs_base); /* * Copy the instruction bytes into 'vie' if available. @@ -932,12 +955,12 @@ intrtype_to_str(int intr_type) * Inject an event to vcpu as described in section 15.20, "Event injection". */ static void -svm_eventinject(struct svm_softc *sc, int vcpu, int intr_type, int vector, - uint32_t error, bool ec_valid) +svm_eventinject(struct svm_vcpu *vcpu, int intr_type, int vector, + uint32_t error, bool ec_valid) { struct vmcb_ctrl *ctrl; - ctrl = svm_get_vmcb_ctrl(sc, vcpu); + ctrl = svm_get_vmcb_ctrl(vcpu); KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0, ("%s: event already pending %#lx", __func__, ctrl->eventinj)); @@ -962,24 +985,22 @@ svm_eventinject(struct svm_softc *sc, int vcpu, int intr_type, int vector, if (ec_valid) { ctrl->eventinj |= VMCB_EVENTINJ_EC_VALID; ctrl->eventinj |= (uint64_t)error << 32; - VCPU_CTR3(sc->vm, vcpu, "Injecting %s at vector %d errcode %#x", + SVM_CTR3(vcpu, "Injecting %s at vector %d errcode %#x", intrtype_to_str(intr_type), vector, error); } else { - VCPU_CTR2(sc->vm, vcpu, "Injecting %s at vector %d", + SVM_CTR2(vcpu, "Injecting %s at vector %d", intrtype_to_str(intr_type), vector); } } static void -svm_update_virqinfo(struct svm_softc *sc, int vcpu) +svm_update_virqinfo(struct svm_vcpu *vcpu) { - struct vm *vm; struct vlapic *vlapic; struct vmcb_ctrl *ctrl; - vm = sc->vm; - vlapic = vm_lapic(vm, vcpu); - ctrl = svm_get_vmcb_ctrl(sc, vcpu); + vlapic = vm_lapic(vcpu->vcpu); + ctrl = svm_get_vmcb_ctrl(vcpu); /* Update %cr8 in the emulated vlapic */ vlapic_set_cr8(vlapic, ctrl->v_tpr); @@ -990,13 +1011,13 @@ svm_update_virqinfo(struct svm_softc *sc, int vcpu) } static void -svm_save_intinfo(struct svm_softc *svm_sc, int vcpu) +svm_save_intinfo(struct svm_softc *svm_sc, struct svm_vcpu *vcpu) { struct vmcb_ctrl *ctrl; uint64_t intinfo; - ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu); - intinfo = ctrl->exitintinfo; + ctrl = svm_get_vmcb_ctrl(vcpu); + intinfo = ctrl->exitintinfo; if (!VMCB_EXITINTINFO_VALID(intinfo)) return; @@ -1006,86 +1027,85 @@ svm_save_intinfo(struct svm_softc *svm_sc, int vcpu) * If a #VMEXIT happened during event delivery then record the event * that was being delivered. */ - VCPU_CTR2(svm_sc->vm, vcpu, "SVM:Pending INTINFO(0x%lx), vector=%d.\n", - intinfo, VMCB_EXITINTINFO_VECTOR(intinfo)); - vmm_stat_incr(svm_sc->vm, vcpu, VCPU_EXITINTINFO, 1); - vm_exit_intinfo(svm_sc->vm, vcpu, intinfo); + SVM_CTR2(vcpu, "SVM:Pending INTINFO(0x%lx), vector=%d.\n", intinfo, + VMCB_EXITINTINFO_VECTOR(intinfo)); + vmm_stat_incr(vcpu->vcpu, VCPU_EXITINTINFO, 1); + vm_exit_intinfo(vcpu->vcpu, intinfo); } #ifdef INVARIANTS static __inline int -vintr_intercept_enabled(struct svm_softc *sc, int vcpu) +vintr_intercept_enabled(struct svm_vcpu *vcpu) { - return (svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, - VMCB_INTCPT_VINTR)); + return (svm_get_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR)); } #endif static __inline void -enable_intr_window_exiting(struct svm_softc *sc, int vcpu) +enable_intr_window_exiting(struct svm_vcpu *vcpu) { struct vmcb_ctrl *ctrl; - ctrl = svm_get_vmcb_ctrl(sc, vcpu); + ctrl = svm_get_vmcb_ctrl(vcpu); if (ctrl->v_irq && ctrl->v_intr_vector == 0) { KASSERT(ctrl->v_ign_tpr, ("%s: invalid v_ign_tpr", __func__)); - KASSERT(vintr_intercept_enabled(sc, vcpu), + KASSERT(vintr_intercept_enabled(vcpu), ("%s: vintr intercept should be enabled", __func__)); return; } - VCPU_CTR0(sc->vm, vcpu, "Enable intr window exiting"); + SVM_CTR0(vcpu, "Enable intr window exiting"); ctrl->v_irq = 1; ctrl->v_ign_tpr = 1; ctrl->v_intr_vector = 0; - svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR); - svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR); + svm_set_dirty(vcpu, VMCB_CACHE_TPR); + svm_enable_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR); } static __inline void -disable_intr_window_exiting(struct svm_softc *sc, int vcpu) +disable_intr_window_exiting(struct svm_vcpu *vcpu) { struct vmcb_ctrl *ctrl; - ctrl = svm_get_vmcb_ctrl(sc, vcpu); + ctrl = svm_get_vmcb_ctrl(vcpu); if (!ctrl->v_irq && ctrl->v_intr_vector == 0) { - KASSERT(!vintr_intercept_enabled(sc, vcpu), + KASSERT(!vintr_intercept_enabled(vcpu), ("%s: vintr intercept should be disabled", __func__)); return; } - VCPU_CTR0(sc->vm, vcpu, "Disable intr window exiting"); + SVM_CTR0(vcpu, "Disable intr window exiting"); ctrl->v_irq = 0; ctrl->v_intr_vector = 0; - svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR); - svm_disable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR); + svm_set_dirty(vcpu, VMCB_CACHE_TPR); + svm_disable_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR); } static int -svm_modify_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t val) +svm_modify_intr_shadow(struct svm_vcpu *vcpu, uint64_t val) { struct vmcb_ctrl *ctrl; int oldval, newval; - ctrl = svm_get_vmcb_ctrl(sc, vcpu); + ctrl = svm_get_vmcb_ctrl(vcpu); oldval = ctrl->intr_shadow; newval = val ? 1 : 0; if (newval != oldval) { ctrl->intr_shadow = newval; - VCPU_CTR1(sc->vm, vcpu, "Setting intr_shadow to %d", newval); + SVM_CTR1(vcpu, "Setting intr_shadow to %d", newval); } return (0); } static int -svm_get_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t *val) +svm_get_intr_shadow(struct svm_vcpu *vcpu, uint64_t *val) { struct vmcb_ctrl *ctrl; - ctrl = svm_get_vmcb_ctrl(sc, vcpu); + ctrl = svm_get_vmcb_ctrl(vcpu); *val = ctrl->intr_shadow; return (0); } @@ -1096,31 +1116,30 @@ svm_get_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t *val) * to track when the vcpu is done handling the NMI. */ static int -nmi_blocked(struct svm_softc *sc, int vcpu) +nmi_blocked(struct svm_vcpu *vcpu) { int blocked; - blocked = svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, - VMCB_INTCPT_IRET); + blocked = svm_get_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET); return (blocked); } static void -enable_nmi_blocking(struct svm_softc *sc, int vcpu) +enable_nmi_blocking(struct svm_vcpu *vcpu) { - KASSERT(!nmi_blocked(sc, vcpu), ("vNMI already blocked")); - VCPU_CTR0(sc->vm, vcpu, "vNMI blocking enabled"); - svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET); + KASSERT(!nmi_blocked(vcpu), ("vNMI already blocked")); + SVM_CTR0(vcpu, "vNMI blocking enabled"); + svm_enable_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET); } static void -clear_nmi_blocking(struct svm_softc *sc, int vcpu) +clear_nmi_blocking(struct svm_vcpu *vcpu) { int error __diagused; - KASSERT(nmi_blocked(sc, vcpu), ("vNMI already unblocked")); - VCPU_CTR0(sc->vm, vcpu, "vNMI blocking cleared"); + KASSERT(nmi_blocked(vcpu), ("vNMI already unblocked")); + SVM_CTR0(vcpu, "vNMI blocking cleared"); /* * When the IRET intercept is cleared the vcpu will attempt to execute * the "iret" when it runs next. However, it is possible to inject @@ -1132,30 +1151,31 @@ clear_nmi_blocking(struct svm_softc *sc, int vcpu) * * XXX this needs to be fixed */ - svm_disable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET); + svm_disable_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET); /* * Set 'intr_shadow' to prevent an NMI from being injected on the * immediate VMRUN. */ - error = svm_modify_intr_shadow(sc, vcpu, 1); + error = svm_modify_intr_shadow(vcpu, 1); KASSERT(!error, ("%s: error %d setting intr_shadow", __func__, error)); } #define EFER_MBZ_BITS 0xFFFFFFFFFFFF0200UL static int -svm_write_efer(struct svm_softc *sc, int vcpu, uint64_t newval, bool *retu) +svm_write_efer(struct svm_softc *sc, struct svm_vcpu *vcpu, uint64_t newval, + bool *retu) { struct vm_exit *vme; struct vmcb_state *state; uint64_t changed, lma, oldval; int error __diagused; - state = svm_get_vmcb_state(sc, vcpu); + state = svm_get_vmcb_state(vcpu); oldval = state->efer; - VCPU_CTR2(sc->vm, vcpu, "wrmsr(efer) %#lx/%#lx", oldval, newval); + SVM_CTR2(vcpu, "wrmsr(efer) %#lx/%#lx", oldval, newval); newval &= ~0xFE; /* clear the Read-As-Zero (RAZ) bits */ changed = oldval ^ newval; @@ -1179,7 +1199,7 @@ svm_write_efer(struct svm_softc *sc, int vcpu, uint64_t newval, bool *retu) goto gpf; if (newval & EFER_NXE) { - if (!vm_cpuid_capability(sc->vm, vcpu, VCC_NO_EXECUTE)) + if (!vm_cpuid_capability(vcpu->vcpu, VCC_NO_EXECUTE)) goto gpf; } @@ -1188,48 +1208,48 @@ svm_write_efer(struct svm_softc *sc, int vcpu, uint64_t newval, bool *retu) * this is fixed flag guest attempt to set EFER_LMSLE as an error. */ if (newval & EFER_LMSLE) { - vme = vm_exitinfo(sc->vm, vcpu); + vme = vm_exitinfo(vcpu->vcpu); vm_exit_svm(vme, VMCB_EXIT_MSR, 1, 0); *retu = true; return (0); } if (newval & EFER_FFXSR) { - if (!vm_cpuid_capability(sc->vm, vcpu, VCC_FFXSR)) + if (!vm_cpuid_capability(vcpu->vcpu, VCC_FFXSR)) goto gpf; } if (newval & EFER_TCE) { - if (!vm_cpuid_capability(sc->vm, vcpu, VCC_TCE)) + if (!vm_cpuid_capability(vcpu->vcpu, VCC_TCE)) goto gpf; } - error = svm_setreg(sc, vcpu, VM_REG_GUEST_EFER, newval); + error = svm_setreg(vcpu, VM_REG_GUEST_EFER, newval); KASSERT(error == 0, ("%s: error %d updating efer", __func__, error)); return (0); gpf: - vm_inject_gp(sc->vm, vcpu); + vm_inject_gp(vcpu->vcpu); return (0); } static int -emulate_wrmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t val, - bool *retu) +emulate_wrmsr(struct svm_softc *sc, struct svm_vcpu *vcpu, u_int num, + uint64_t val, bool *retu) { int error; if (lapic_msr(num)) - error = lapic_wrmsr(sc->vm, vcpu, num, val, retu); + error = lapic_wrmsr(vcpu->vcpu, num, val, retu); else if (num == MSR_EFER) error = svm_write_efer(sc, vcpu, val, retu); else - error = svm_wrmsr(sc, vcpu, num, val, retu); + error = svm_wrmsr(vcpu, num, val, retu); return (error); } static int -emulate_rdmsr(struct svm_softc *sc, int vcpu, u_int num, bool *retu) +emulate_rdmsr(struct svm_vcpu *vcpu, u_int num, bool *retu) { struct vmcb_state *state; struct svm_regctx *ctx; @@ -1237,13 +1257,13 @@ emulate_rdmsr(struct svm_softc *sc, int vcpu, u_int num, bool *retu) int error; if (lapic_msr(num)) - error = lapic_rdmsr(sc->vm, vcpu, num, &result, retu); + error = lapic_rdmsr(vcpu->vcpu, num, &result, retu); else - error = svm_rdmsr(sc, vcpu, num, &result, retu); + error = svm_rdmsr(vcpu, num, &result, retu); if (error == 0) { - state = svm_get_vmcb_state(sc, vcpu); - ctx = svm_get_guest_regctx(sc, vcpu); + state = svm_get_vmcb_state(vcpu); + ctx = svm_get_guest_regctx(vcpu); state->rax = result & 0xffffffff; ctx->sctx_rdx = result >> 32; } @@ -1286,6 +1306,8 @@ exit_reason_to_str(uint64_t reason) { .reason = VMCB_EXIT_ICEBP, .str = "icebp" }, { .reason = VMCB_EXIT_INVD, .str = "invd" }, { .reason = VMCB_EXIT_INVLPGA, .str = "invlpga" }, + { .reason = VMCB_EXIT_POPF, .str = "popf" }, + { .reason = VMCB_EXIT_PUSHF, .str = "pushf" }, }; for (i = 0; i < nitems(reasons); i++) { @@ -1324,7 +1346,8 @@ nrip_valid(uint64_t exitcode) } static int -svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) +svm_vmexit(struct svm_softc *svm_sc, struct svm_vcpu *vcpu, + struct vm_exit *vmexit) { struct vmcb *vmcb; struct vmcb_state *state; @@ -1335,8 +1358,8 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) int error __diagused, errcode_valid, handled, idtvec, reflect; bool retu; - ctx = svm_get_guest_regctx(svm_sc, vcpu); - vmcb = svm_get_vmcb(svm_sc, vcpu); + ctx = svm_get_guest_regctx(vcpu); + vmcb = svm_get_vmcb(vcpu); state = &vmcb->state; ctrl = &vmcb->ctrl; @@ -1349,7 +1372,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) vmexit->rip = state->rip; vmexit->inst_length = nrip_valid(code) ? ctrl->nrip - state->rip : 0; - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_COUNT, 1); + vmm_stat_incr(vcpu->vcpu, VMEXIT_COUNT, 1); /* * #VMEXIT(INVALID) needs to be handled early because the VMCB is @@ -1368,7 +1391,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) ("invalid inst_length %d: code (%#lx), info1 (%#lx), info2 (%#lx)", vmexit->inst_length, code, info1, info2)); - svm_update_virqinfo(svm_sc, vcpu); + svm_update_virqinfo(vcpu); svm_save_intinfo(svm_sc, vcpu); switch (code) { @@ -1377,22 +1400,22 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) * Restart execution at "iret" but with the intercept cleared. */ vmexit->inst_length = 0; - clear_nmi_blocking(svm_sc, vcpu); + clear_nmi_blocking(vcpu); handled = 1; break; case VMCB_EXIT_VINTR: /* interrupt window exiting */ - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_VINTR, 1); + vmm_stat_incr(vcpu->vcpu, VMEXIT_VINTR, 1); handled = 1; break; case VMCB_EXIT_INTR: /* external interrupt */ - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXTINT, 1); + vmm_stat_incr(vcpu->vcpu, VMEXIT_EXTINT, 1); handled = 1; break; case VMCB_EXIT_NMI: /* external NMI */ handled = 1; break; case 0x40 ... 0x5F: - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXCEPTION, 1); + vmm_stat_incr(vcpu->vcpu, VMEXIT_EXCEPTION, 1); reflect = 1; idtvec = code - 0x40; switch (idtvec) { @@ -1402,12 +1425,11 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) * reflect the machine check back into the guest. */ reflect = 0; - VCPU_CTR0(svm_sc->vm, vcpu, "Vectoring to MCE handler"); + SVM_CTR0(vcpu, "Vectoring to MCE handler"); __asm __volatile("int $18"); break; case IDT_PF: - error = svm_setreg(svm_sc, vcpu, VM_REG_GUEST_CR2, - info2); + error = svm_setreg(vcpu, VM_REG_GUEST_CR2, info2); KASSERT(error == 0, ("%s: error %d updating cr2", __func__, error)); /* fallthru */ @@ -1423,8 +1445,76 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) errcode_valid = 1; info1 = 0; break; - + case IDT_DB: { + /* + * Check if we are being stepped (RFLAGS.TF) + * and bounce vmexit to userland. + */ + bool stepped = 0; + uint64_t dr6 = 0; + + svm_getreg(vcpu, VM_REG_GUEST_DR6, &dr6); + stepped = !!(dr6 & DBREG_DR6_BS); + if (stepped && (vcpu->caps & (1 << VM_CAP_RFLAGS_TF))) { + vmexit->exitcode = VM_EXITCODE_DB; + vmexit->u.dbg.trace_trap = 1; + vmexit->u.dbg.pushf_intercept = 0; + + if (vcpu->dbg.popf_sstep) { + /* + * DB# exit was caused by stepping over + * popf. + */ + uint64_t rflags; + + vcpu->dbg.popf_sstep = 0; + + /* + * Update shadowed TF bit so the next + * setcap(..., RFLAGS_SSTEP, 0) restores + * the correct value + */ + svm_getreg(vcpu, VM_REG_GUEST_RFLAGS, + &rflags); + vcpu->dbg.rflags_tf = rflags & PSL_T; + } else if (vcpu->dbg.pushf_sstep) { + /* + * DB# exit was caused by stepping over + * pushf. + */ + vcpu->dbg.pushf_sstep = 0; + + /* + * Adjusting the pushed rflags after a + * restarted pushf instruction must be + * handled outside of svm.c due to the + * critical_enter() lock being held. + */ + vmexit->u.dbg.pushf_intercept = 1; + vmexit->u.dbg.tf_shadow_val = + vcpu->dbg.rflags_tf; + svm_paging_info(svm_get_vmcb(vcpu), + &vmexit->u.dbg.paging); + } + + /* Clear DR6 "single-step" bit. */ + dr6 &= ~DBREG_DR6_BS; + error = svm_setreg(vcpu, VM_REG_GUEST_DR6, dr6); + KASSERT(error == 0, + ("%s: error %d updating DR6\r\n", __func__, + error)); + + reflect = 0; + } + break; + } case IDT_BP: + vmexit->exitcode = VM_EXITCODE_BPT; + vmexit->u.bpt.inst_length = vmexit->inst_length; + vmexit->inst_length = 0; + + reflect = 0; + break; case IDT_OF: case IDT_BR: /* @@ -1436,7 +1526,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) * event injection is identical to what it was when * the exception originally happened. */ - VCPU_CTR2(svm_sc->vm, vcpu, "Reset inst_length from %d " + SVM_CTR2(vcpu, "Reset inst_length from %d " "to zero before injecting exception %d", vmexit->inst_length, idtvec); vmexit->inst_length = 0; @@ -1446,32 +1536,32 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) info1 = 0; break; } - KASSERT(vmexit->inst_length == 0, ("invalid inst_length (%d) " - "when reflecting exception %d into guest", - vmexit->inst_length, idtvec)); if (reflect) { + KASSERT(vmexit->inst_length == 0, + ("invalid inst_length (%d) " + "when reflecting exception %d into guest", + vmexit->inst_length, idtvec)); /* Reflect the exception back into the guest */ - VCPU_CTR2(svm_sc->vm, vcpu, "Reflecting exception " + SVM_CTR2(vcpu, "Reflecting exception " "%d/%#x into the guest", idtvec, (int)info1); - error = vm_inject_exception(svm_sc->vm, vcpu, idtvec, + error = vm_inject_exception(vcpu->vcpu, idtvec, errcode_valid, info1, 0); KASSERT(error == 0, ("%s: vm_inject_exception error %d", __func__, error)); + handled = 1; } - handled = 1; break; case VMCB_EXIT_MSR: /* MSR access. */ eax = state->rax; ecx = ctx->sctx_rcx; edx = ctx->sctx_rdx; - retu = false; + retu = false; if (info1) { - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1); + vmm_stat_incr(vcpu->vcpu, VMEXIT_WRMSR, 1); val = (uint64_t)edx << 32 | eax; - VCPU_CTR2(svm_sc->vm, vcpu, "wrmsr %#x val %#lx", - ecx, val); + SVM_CTR2(vcpu, "wrmsr %#x val %#lx", ecx, val); if (emulate_wrmsr(svm_sc, vcpu, ecx, val, &retu)) { vmexit->exitcode = VM_EXITCODE_WRMSR; vmexit->u.msr.code = ecx; @@ -1483,9 +1573,9 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) ("emulate_wrmsr retu with bogus exitcode")); } } else { - VCPU_CTR1(svm_sc->vm, vcpu, "rdmsr %#x", ecx); - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_RDMSR, 1); - if (emulate_rdmsr(svm_sc, vcpu, ecx, &retu)) { + SVM_CTR1(vcpu, "rdmsr %#x", ecx); + vmm_stat_incr(vcpu->vcpu, VMEXIT_RDMSR, 1); + if (emulate_rdmsr(vcpu, ecx, &retu)) { vmexit->exitcode = VM_EXITCODE_RDMSR; vmexit->u.msr.code = ecx; } else if (!retu) { @@ -1497,41 +1587,43 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) } break; case VMCB_EXIT_IO: - handled = svm_handle_io(svm_sc, vcpu, vmexit); - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1); + handled = svm_handle_io(vcpu, vmexit); + vmm_stat_incr(vcpu->vcpu, VMEXIT_INOUT, 1); break; case VMCB_EXIT_CPUID: - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_CPUID, 1); - handled = x86_emulate_cpuid(svm_sc->vm, vcpu, &state->rax, - &ctx->sctx_rbx, &ctx->sctx_rcx, &ctx->sctx_rdx); + vmm_stat_incr(vcpu->vcpu, VMEXIT_CPUID, 1); + handled = x86_emulate_cpuid(vcpu->vcpu, + &state->rax, &ctx->sctx_rbx, &ctx->sctx_rcx, + &ctx->sctx_rdx); break; case VMCB_EXIT_HLT: - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_HLT, 1); + vmm_stat_incr(vcpu->vcpu, VMEXIT_HLT, 1); vmexit->exitcode = VM_EXITCODE_HLT; vmexit->u.hlt.rflags = state->rflags; break; case VMCB_EXIT_PAUSE: vmexit->exitcode = VM_EXITCODE_PAUSE; - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_PAUSE, 1); + vmm_stat_incr(vcpu->vcpu, VMEXIT_PAUSE, 1); break; case VMCB_EXIT_NPF: /* EXITINFO2 contains the faulting guest physical address */ if (info1 & VMCB_NPF_INFO1_RSV) { - VCPU_CTR2(svm_sc->vm, vcpu, "nested page fault with " + SVM_CTR2(vcpu, "nested page fault with " "reserved bits set: info1(%#lx) info2(%#lx)", info1, info2); - } else if (vm_mem_allocated(svm_sc->vm, vcpu, info2)) { + } else if (vm_mem_allocated(vcpu->vcpu, info2) || + ppt_is_mmio(svm_sc->vm, info2)) { vmexit->exitcode = VM_EXITCODE_PAGING; vmexit->u.paging.gpa = info2; vmexit->u.paging.fault_type = npf_fault_type(info1); - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_NESTED_FAULT, 1); - VCPU_CTR3(svm_sc->vm, vcpu, "nested page fault " + vmm_stat_incr(vcpu->vcpu, VMEXIT_NESTED_FAULT, 1); + SVM_CTR3(vcpu, "nested page fault " "on gpa %#lx/%#lx at rip %#lx", info2, info1, state->rip); } else if (svm_npf_emul_fault(info1)) { svm_handle_inst_emul(vmcb, info2, vmexit); - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INST_EMUL, 1); - VCPU_CTR3(svm_sc->vm, vcpu, "inst_emul fault " + vmm_stat_incr(vcpu->vcpu, VMEXIT_INST_EMUL, 1); + SVM_CTR3(vcpu, "inst_emul fault " "for gpa %#lx/%#lx at rip %#lx", info2, info1, state->rip); } @@ -1542,6 +1634,42 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) case VMCB_EXIT_MWAIT: vmexit->exitcode = VM_EXITCODE_MWAIT; break; + case VMCB_EXIT_PUSHF: { + if (vcpu->caps & (1 << VM_CAP_RFLAGS_TF)) { + uint64_t rflags; + + svm_getreg(vcpu, VM_REG_GUEST_RFLAGS, &rflags); + /* Restart this instruction. */ + vmexit->inst_length = 0; + /* Disable PUSHF intercepts - avoid a loop. */ + svm_set_intercept(vcpu, VMCB_CTRL1_INTCPT, + VMCB_INTCPT_PUSHF, 0); + /* Trace restarted instruction. */ + svm_setreg(vcpu, VM_REG_GUEST_RFLAGS, (rflags | PSL_T)); + /* Let the IDT_DB handler know that pushf was stepped. + */ + vcpu->dbg.pushf_sstep = 1; + handled = 1; + } + break; + } + case VMCB_EXIT_POPF: { + if (vcpu->caps & (1 << VM_CAP_RFLAGS_TF)) { + uint64_t rflags; + + svm_getreg(vcpu, VM_REG_GUEST_RFLAGS, &rflags); + /* Restart this instruction */ + vmexit->inst_length = 0; + /* Disable POPF intercepts - avoid a loop*/ + svm_set_intercept(vcpu, VMCB_CTRL1_INTCPT, + VMCB_INTCPT_POPF, 0); + /* Trace restarted instruction */ + svm_setreg(vcpu, VM_REG_GUEST_RFLAGS, (rflags | PSL_T)); + vcpu->dbg.popf_sstep = 1; + handled = 1; + } + break; + } case VMCB_EXIT_SHUTDOWN: case VMCB_EXIT_VMRUN: case VMCB_EXIT_VMMCALL: @@ -1551,21 +1679,21 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) case VMCB_EXIT_CLGI: case VMCB_EXIT_SKINIT: case VMCB_EXIT_ICEBP: - case VMCB_EXIT_INVD: case VMCB_EXIT_INVLPGA: - vm_inject_ud(svm_sc->vm, vcpu); + vm_inject_ud(vcpu->vcpu); handled = 1; break; + case VMCB_EXIT_INVD: case VMCB_EXIT_WBINVD: - /* ignore WBINVD */ + /* ignore exit */ handled = 1; break; default: - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_UNKNOWN, 1); + vmm_stat_incr(vcpu->vcpu, VMEXIT_UNKNOWN, 1); break; - } + } - VCPU_CTR4(svm_sc->vm, vcpu, "%s %s vmexit at %#lx/%d", + SVM_CTR4(vcpu, "%s %s vmexit at %#lx/%d", handled ? "handled" : "unhandled", exit_reason_to_str(code), vmexit->rip, vmexit->inst_length); @@ -1591,48 +1719,51 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) } static void -svm_inj_intinfo(struct svm_softc *svm_sc, int vcpu) +svm_inj_intinfo(struct svm_softc *svm_sc, struct svm_vcpu *vcpu) { uint64_t intinfo; - if (!vm_entry_intinfo(svm_sc->vm, vcpu, &intinfo)) + if (!vm_entry_intinfo(vcpu->vcpu, &intinfo)) return; KASSERT(VMCB_EXITINTINFO_VALID(intinfo), ("%s: entry intinfo is not " "valid: %#lx", __func__, intinfo)); - svm_eventinject(svm_sc, vcpu, VMCB_EXITINTINFO_TYPE(intinfo), + svm_eventinject(vcpu, VMCB_EXITINTINFO_TYPE(intinfo), VMCB_EXITINTINFO_VECTOR(intinfo), VMCB_EXITINTINFO_EC(intinfo), VMCB_EXITINTINFO_EC_VALID(intinfo)); - vmm_stat_incr(svm_sc->vm, vcpu, VCPU_INTINFO_INJECTED, 1); - VCPU_CTR1(svm_sc->vm, vcpu, "Injected entry intinfo: %#lx", intinfo); + vmm_stat_incr(vcpu->vcpu, VCPU_INTINFO_INJECTED, 1); + SVM_CTR1(vcpu, "Injected entry intinfo: %#lx", intinfo); } /* * Inject event to virtual cpu. */ static void -svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic) +svm_inj_interrupts(struct svm_softc *sc, struct svm_vcpu *vcpu, + struct vlapic *vlapic) { struct vmcb_ctrl *ctrl; struct vmcb_state *state; - struct svm_vcpu *vcpustate; uint8_t v_tpr; int vector, need_intr_window; int extint_pending; - state = svm_get_vmcb_state(sc, vcpu); - ctrl = svm_get_vmcb_ctrl(sc, vcpu); - vcpustate = svm_get_vcpu(sc, vcpu); + if (vcpu->caps & (1 << VM_CAP_MASK_HWINTR)) { + return; + } + + state = svm_get_vmcb_state(vcpu); + ctrl = svm_get_vmcb_ctrl(vcpu); need_intr_window = 0; - if (vcpustate->nextrip != state->rip) { + if (vcpu->nextrip != state->rip) { ctrl->intr_shadow = 0; - VCPU_CTR2(sc->vm, vcpu, "Guest interrupt blocking " + SVM_CTR2(vcpu, "Guest interrupt blocking " "cleared due to rip change: %#lx/%#lx", - vcpustate->nextrip, state->rip); + vcpu->nextrip, state->rip); } /* @@ -1647,19 +1778,19 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic) svm_inj_intinfo(sc, vcpu); /* NMI event has priority over interrupts. */ - if (vm_nmi_pending(sc->vm, vcpu)) { - if (nmi_blocked(sc, vcpu)) { + if (vm_nmi_pending(vcpu->vcpu)) { + if (nmi_blocked(vcpu)) { /* * Can't inject another NMI if the guest has not * yet executed an "iret" after the last NMI. */ - VCPU_CTR0(sc->vm, vcpu, "Cannot inject NMI due " + SVM_CTR0(vcpu, "Cannot inject NMI due " "to NMI-blocking"); } else if (ctrl->intr_shadow) { /* * Can't inject an NMI if the vcpu is in an intr_shadow. */ - VCPU_CTR0(sc->vm, vcpu, "Cannot inject NMI due to " + SVM_CTR0(vcpu, "Cannot inject NMI due to " "interrupt shadow"); need_intr_window = 1; goto done; @@ -1668,7 +1799,7 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic) * If there is already an exception/interrupt pending * then defer the NMI until after that. */ - VCPU_CTR1(sc->vm, vcpu, "Cannot inject NMI due to " + SVM_CTR1(vcpu, "Cannot inject NMI due to " "eventinj %#lx", ctrl->eventinj); /* @@ -1683,20 +1814,20 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic) */ ipi_cpu(curcpu, IPI_AST); /* XXX vmm_ipinum? */ } else { - vm_nmi_clear(sc->vm, vcpu); + vm_nmi_clear(vcpu->vcpu); /* Inject NMI, vector number is not used */ - svm_eventinject(sc, vcpu, VMCB_EVENTINJ_TYPE_NMI, + svm_eventinject(vcpu, VMCB_EVENTINJ_TYPE_NMI, IDT_NMI, 0, false); /* virtual NMI blocking is now in effect */ - enable_nmi_blocking(sc, vcpu); + enable_nmi_blocking(vcpu); - VCPU_CTR0(sc->vm, vcpu, "Injecting vNMI"); + SVM_CTR0(vcpu, "Injecting vNMI"); } } - extint_pending = vm_extint_pending(sc->vm, vcpu); + extint_pending = vm_extint_pending(vcpu->vcpu); if (!extint_pending) { if (!vlapic_pending_intr(vlapic, &vector)) goto done; @@ -1714,32 +1845,32 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic) * then we cannot inject the pending interrupt. */ if ((state->rflags & PSL_I) == 0) { - VCPU_CTR2(sc->vm, vcpu, "Cannot inject vector %d due to " + SVM_CTR2(vcpu, "Cannot inject vector %d due to " "rflags %#lx", vector, state->rflags); need_intr_window = 1; goto done; } if (ctrl->intr_shadow) { - VCPU_CTR1(sc->vm, vcpu, "Cannot inject vector %d due to " + SVM_CTR1(vcpu, "Cannot inject vector %d due to " "interrupt shadow", vector); need_intr_window = 1; goto done; } if (ctrl->eventinj & VMCB_EVENTINJ_VALID) { - VCPU_CTR2(sc->vm, vcpu, "Cannot inject vector %d due to " + SVM_CTR2(vcpu, "Cannot inject vector %d due to " "eventinj %#lx", vector, ctrl->eventinj); need_intr_window = 1; goto done; } - svm_eventinject(sc, vcpu, VMCB_EVENTINJ_TYPE_INTR, vector, 0, false); + svm_eventinject(vcpu, VMCB_EVENTINJ_TYPE_INTR, vector, 0, false); if (!extint_pending) { vlapic_intr_accepted(vlapic, vector); } else { - vm_extint_clear(sc->vm, vcpu); + vm_extint_clear(vcpu->vcpu); vatpic_intr_accepted(sc->vm, vector); } @@ -1765,10 +1896,10 @@ done: v_tpr = vlapic_get_cr8(vlapic); KASSERT(v_tpr <= 15, ("invalid v_tpr %#x", v_tpr)); if (ctrl->v_tpr != v_tpr) { - VCPU_CTR2(sc->vm, vcpu, "VMCB V_TPR changed from %#x to %#x", + SVM_CTR2(vcpu, "VMCB V_TPR changed from %#x to %#x", ctrl->v_tpr, v_tpr); ctrl->v_tpr = v_tpr; - svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR); + svm_set_dirty(vcpu, VMCB_CACHE_TPR); } if (need_intr_window) { @@ -1786,9 +1917,9 @@ done: ("Bogus intr_window_exiting: eventinj (%#lx), " "intr_shadow (%u), rflags (%#lx)", ctrl->eventinj, ctrl->intr_shadow, state->rflags)); - enable_intr_window_exiting(sc, vcpu); + enable_intr_window_exiting(vcpu); } else { - disable_intr_window_exiting(sc, vcpu); + disable_intr_window_exiting(vcpu); } } @@ -1810,9 +1941,8 @@ restore_host_tss(void) } static void -svm_pmap_activate(struct svm_softc *sc, int vcpuid, pmap_t pmap) +svm_pmap_activate(struct svm_vcpu *vcpu, pmap_t pmap) { - struct svm_vcpu *vcpustate; struct vmcb_ctrl *ctrl; long eptgen; int cpu; @@ -1822,8 +1952,7 @@ svm_pmap_activate(struct svm_softc *sc, int vcpuid, pmap_t pmap) CPU_SET_ATOMIC(cpu, &pmap->pm_active); smr_enter(pmap->pm_eptsmr); - vcpustate = svm_get_vcpu(sc, vcpuid); - ctrl = svm_get_vmcb_ctrl(sc, vcpuid); + ctrl = svm_get_vmcb_ctrl(vcpu); /* * The TLB entries associated with the vcpu's ASID are not valid @@ -1864,9 +1993,9 @@ svm_pmap_activate(struct svm_softc *sc, int vcpuid, pmap_t pmap) eptgen = atomic_load_long(&pmap->pm_eptgen); ctrl->tlb_ctrl = VMCB_TLB_FLUSH_NOTHING; - if (vcpustate->asid.gen != asid[cpu].gen) { + if (vcpu->asid.gen != asid[cpu].gen) { alloc_asid = true; /* (c) and (d) */ - } else if (vcpustate->eptgen != eptgen) { + } else if (vcpu->eptgen != eptgen) { if (flush_by_asid()) ctrl->tlb_ctrl = VMCB_TLB_FLUSH_GUEST; /* (b1) */ else @@ -1894,11 +2023,11 @@ svm_pmap_activate(struct svm_softc *sc, int vcpuid, pmap_t pmap) if (!flush_by_asid()) ctrl->tlb_ctrl = VMCB_TLB_FLUSH_ALL; } - vcpustate->asid.gen = asid[cpu].gen; - vcpustate->asid.num = asid[cpu].num; + vcpu->asid.gen = asid[cpu].gen; + vcpu->asid.num = asid[cpu].num; - ctrl->asid = vcpustate->asid.num; - svm_set_dirty(sc, vcpuid, VMCB_CACHE_ASID); + ctrl->asid = vcpu->asid.num; + svm_set_dirty(vcpu, VMCB_CACHE_ASID); /* * If this cpu supports "flush-by-asid" then the TLB * was not flushed after the generation bump. The TLB @@ -1907,11 +2036,11 @@ svm_pmap_activate(struct svm_softc *sc, int vcpuid, pmap_t pmap) if (flush_by_asid()) ctrl->tlb_ctrl = VMCB_TLB_FLUSH_GUEST; } - vcpustate->eptgen = eptgen; + vcpu->eptgen = eptgen; KASSERT(ctrl->asid != 0, ("Guest ASID must be non-zero")); - KASSERT(ctrl->asid == vcpustate->asid.num, - ("ASID mismatch: %u/%u", ctrl->asid, vcpustate->asid.num)); + KASSERT(ctrl->asid == vcpu->asid.num, + ("ASID mismatch: %u/%u", ctrl->asid, vcpu->asid.num)); } static void @@ -1993,47 +2122,43 @@ svm_dr_leave_guest(struct svm_regctx *gctx) * Start vcpu with specified RIP. */ static int -svm_run(void *arg, int vcpu, register_t rip, pmap_t pmap, - struct vm_eventinfo *evinfo) +svm_run(void *vcpui, register_t rip, pmap_t pmap, struct vm_eventinfo *evinfo) { struct svm_regctx *gctx; struct svm_softc *svm_sc; - struct svm_vcpu *vcpustate; + struct svm_vcpu *vcpu; struct vmcb_state *state; struct vmcb_ctrl *ctrl; struct vm_exit *vmexit; struct vlapic *vlapic; - struct vm *vm; uint64_t vmcb_pa; int handled; uint16_t ldt_sel; - svm_sc = arg; - vm = svm_sc->vm; - - vcpustate = svm_get_vcpu(svm_sc, vcpu); - state = svm_get_vmcb_state(svm_sc, vcpu); - ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu); - vmexit = vm_exitinfo(vm, vcpu); - vlapic = vm_lapic(vm, vcpu); + vcpu = vcpui; + svm_sc = vcpu->sc; + state = svm_get_vmcb_state(vcpu); + ctrl = svm_get_vmcb_ctrl(vcpu); + vmexit = vm_exitinfo(vcpu->vcpu); + vlapic = vm_lapic(vcpu->vcpu); - gctx = svm_get_guest_regctx(svm_sc, vcpu); - vmcb_pa = svm_sc->vcpu[vcpu].vmcb_pa; + gctx = svm_get_guest_regctx(vcpu); + vmcb_pa = vcpu->vmcb_pa; - if (vcpustate->lastcpu != curcpu) { + if (vcpu->lastcpu != curcpu) { /* * Force new ASID allocation by invalidating the generation. */ - vcpustate->asid.gen = 0; + vcpu->asid.gen = 0; /* * Invalidate the VMCB state cache by marking all fields dirty. */ - svm_set_dirty(svm_sc, vcpu, 0xffffffff); + svm_set_dirty(vcpu, 0xffffffff); /* * XXX - * Setting 'vcpustate->lastcpu' here is bit premature because + * Setting 'vcpu->lastcpu' here is bit premature because * we may return from this function without actually executing * the VMRUN instruction. This could happen if a rendezvous * or an AST is pending on the first time through the loop. @@ -2041,11 +2166,11 @@ svm_run(void *arg, int vcpu, register_t rip, pmap_t pmap, * This works for now but any new side-effects of vcpu * migration should take this case into account. */ - vcpustate->lastcpu = curcpu; - vmm_stat_incr(vm, vcpu, VCPU_MIGRATIONS, 1); + vcpu->lastcpu = curcpu; + vmm_stat_incr(vcpu->vcpu, VCPU_MIGRATIONS, 1); } - svm_msr_guest_enter(svm_sc, vcpu); + svm_msr_guest_enter(vcpu); /* Update Guest RIP */ state->rip = rip; @@ -2062,32 +2187,32 @@ svm_run(void *arg, int vcpu, register_t rip, pmap_t pmap, if (vcpu_suspended(evinfo)) { enable_gintr(); - vm_exit_suspended(vm, vcpu, state->rip); + vm_exit_suspended(vcpu->vcpu, state->rip); break; } - if (vcpu_rendezvous_pending(evinfo)) { + if (vcpu_rendezvous_pending(vcpu->vcpu, evinfo)) { enable_gintr(); - vm_exit_rendezvous(vm, vcpu, state->rip); + vm_exit_rendezvous(vcpu->vcpu, state->rip); break; } if (vcpu_reqidle(evinfo)) { enable_gintr(); - vm_exit_reqidle(vm, vcpu, state->rip); + vm_exit_reqidle(vcpu->vcpu, state->rip); break; } /* We are asked to give the cpu by scheduler. */ - if (vcpu_should_yield(vm, vcpu)) { + if (vcpu_should_yield(vcpu->vcpu)) { enable_gintr(); - vm_exit_astpending(vm, vcpu, state->rip); + vm_exit_astpending(vcpu->vcpu, state->rip); break; } - if (vcpu_debugged(vm, vcpu)) { + if (vcpu_debugged(vcpu->vcpu)) { enable_gintr(); - vm_exit_debug(vm, vcpu, state->rip); + vm_exit_debug(vcpu->vcpu, state->rip); break; } @@ -2106,14 +2231,14 @@ svm_run(void *arg, int vcpu, register_t rip, pmap_t pmap, * Check the pmap generation and the ASID generation to * ensure that the vcpu does not use stale TLB mappings. */ - svm_pmap_activate(svm_sc, vcpu, pmap); + svm_pmap_activate(vcpu, pmap); - ctrl->vmcb_clean = vmcb_clean & ~vcpustate->dirty; - vcpustate->dirty = 0; - VCPU_CTR1(vm, vcpu, "vmcb clean %#x", ctrl->vmcb_clean); + ctrl->vmcb_clean = vmcb_clean & ~vcpu->dirty; + vcpu->dirty = 0; + SVM_CTR1(vcpu, "vmcb clean %#x", ctrl->vmcb_clean); /* Launch Virtual Machine. */ - VCPU_CTR1(vm, vcpu, "Resume execution at %#lx", state->rip); + SVM_CTR1(vcpu, "Resume execution at %#lx", state->rip); svm_dr_enter_guest(gctx); svm_launch(vmcb_pa, gctx, get_pcpu()); svm_dr_leave_guest(gctx); @@ -2130,28 +2255,37 @@ svm_run(void *arg, int vcpu, register_t rip, pmap_t pmap, /* Restore host LDTR. */ lldt(ldt_sel); - /* #VMEXIT disables interrupts so re-enable them here. */ + /* #VMEXIT disables interrupts so re-enable them here. */ enable_gintr(); /* Update 'nextrip' */ - vcpustate->nextrip = state->rip; + vcpu->nextrip = state->rip; /* Handle #VMEXIT and if required return to user space. */ handled = svm_vmexit(svm_sc, vcpu, vmexit); } while (handled); - svm_msr_guest_exit(svm_sc, vcpu); + svm_msr_guest_exit(vcpu); return (0); } static void -svm_cleanup(void *arg) +svm_vcpu_cleanup(void *vcpui) +{ + struct svm_vcpu *vcpu = vcpui; + + free(vcpu->vmcb, M_SVM); + free(vcpu, M_SVM); +} + +static void +svm_cleanup(void *vmi) { - struct svm_softc *sc = arg; + struct svm_softc *sc = vmi; - contigfree(sc->iopm_bitmap, SVM_IO_BITMAP_SIZE, M_SVM); - contigfree(sc->msr_bitmap, SVM_MSR_BITMAP_SIZE, M_SVM); + free(sc->iopm_bitmap, M_SVM); + free(sc->msr_bitmap, M_SVM); free(sc, M_SVM); } @@ -2202,52 +2336,52 @@ swctx_regptr(struct svm_regctx *regctx, int reg) } static int -svm_getreg(void *arg, int vcpu, int ident, uint64_t *val) +svm_getreg(void *vcpui, int ident, uint64_t *val) { - struct svm_softc *svm_sc; + struct svm_vcpu *vcpu; register_t *reg; - svm_sc = arg; + vcpu = vcpui; if (ident == VM_REG_GUEST_INTR_SHADOW) { - return (svm_get_intr_shadow(svm_sc, vcpu, val)); + return (svm_get_intr_shadow(vcpu, val)); } - if (vmcb_read(svm_sc, vcpu, ident, val) == 0) { + if (vmcb_read(vcpu, ident, val) == 0) { return (0); } - reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident); + reg = swctx_regptr(svm_get_guest_regctx(vcpu), ident); if (reg != NULL) { *val = *reg; return (0); } - VCPU_CTR1(svm_sc->vm, vcpu, "svm_getreg: unknown register %#x", ident); + SVM_CTR1(vcpu, "svm_getreg: unknown register %#x", ident); return (EINVAL); } static int -svm_setreg(void *arg, int vcpu, int ident, uint64_t val) +svm_setreg(void *vcpui, int ident, uint64_t val) { - struct svm_softc *svm_sc; + struct svm_vcpu *vcpu; register_t *reg; - svm_sc = arg; + vcpu = vcpui; if (ident == VM_REG_GUEST_INTR_SHADOW) { - return (svm_modify_intr_shadow(svm_sc, vcpu, val)); + return (svm_modify_intr_shadow(vcpu, val)); } /* Do not permit user write access to VMCB fields by offset. */ if (!VMCB_ACCESS_OK(ident)) { - if (vmcb_write(svm_sc, vcpu, ident, val) == 0) { + if (vmcb_write(vcpu, ident, val) == 0) { return (0); } } - reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident); + reg = swctx_regptr(svm_get_guest_regctx(vcpu), ident); if (reg != NULL) { *reg = val; @@ -2265,32 +2399,31 @@ svm_setreg(void *arg, int vcpu, int ident, uint64_t val) * whether 'running' is true/false. */ - VCPU_CTR1(svm_sc->vm, vcpu, "svm_setreg: unknown register %#x", ident); + SVM_CTR1(vcpu, "svm_setreg: unknown register %#x", ident); return (EINVAL); } static int -svm_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +svm_getdesc(void *vcpui, int reg, struct seg_desc *desc) { - return (vmcb_getdesc(arg, vcpu, reg, desc)); + return (vmcb_getdesc(vcpui, reg, desc)); } static int -svm_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +svm_setdesc(void *vcpui, int reg, struct seg_desc *desc) { - return (vmcb_setdesc(arg, vcpu, reg, desc)); + return (vmcb_setdesc(vcpui, reg, desc)); } #ifdef BHYVE_SNAPSHOT static int -svm_snapshot_reg(void *arg, int vcpu, int ident, - struct vm_snapshot_meta *meta) +svm_snapshot_reg(void *vcpui, int ident, struct vm_snapshot_meta *meta) { int ret; uint64_t val; if (meta->op == VM_SNAPSHOT_SAVE) { - ret = svm_getreg(arg, vcpu, ident, &val); + ret = svm_getreg(vcpui, ident, &val); if (ret != 0) goto done; @@ -2298,7 +2431,7 @@ svm_snapshot_reg(void *arg, int vcpu, int ident, } else if (meta->op == VM_SNAPSHOT_RESTORE) { SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); - ret = svm_setreg(arg, vcpu, ident, val); + ret = svm_setreg(vcpui, ident, val); if (ret != 0) goto done; } else { @@ -2312,20 +2445,22 @@ done: #endif static int -svm_setcap(void *arg, int vcpu, int type, int val) +svm_setcap(void *vcpui, int type, int val) { - struct svm_softc *sc; + struct svm_vcpu *vcpu; + struct vlapic *vlapic; int error; - sc = arg; + vcpu = vcpui; error = 0; + switch (type) { case VM_CAP_HALT_EXIT: - svm_set_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, + svm_set_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_HLT, val); break; case VM_CAP_PAUSE_EXIT: - svm_set_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, + svm_set_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_PAUSE, val); break; case VM_CAP_UNRESTRICTED_GUEST: @@ -2333,6 +2468,61 @@ svm_setcap(void *arg, int vcpu, int type, int val) if (val == 0) error = EINVAL; break; + case VM_CAP_BPT_EXIT: + svm_set_intercept(vcpu, VMCB_EXC_INTCPT, BIT(IDT_BP), val); + break; + case VM_CAP_IPI_EXIT: + vlapic = vm_lapic(vcpu->vcpu); + vlapic->ipi_exit = val; + break; + case VM_CAP_MASK_HWINTR: + vcpu->caps &= ~(1 << VM_CAP_MASK_HWINTR); + vcpu->caps |= (val << VM_CAP_MASK_HWINTR); + break; + case VM_CAP_RFLAGS_TF: { + uint64_t rflags; + + /* Fetch RFLAGS. */ + if (svm_getreg(vcpu, VM_REG_GUEST_RFLAGS, &rflags)) { + error = (EINVAL); + break; + } + if (val) { + /* Save current TF bit. */ + vcpu->dbg.rflags_tf = rflags & PSL_T; + /* Trace next instruction. */ + if (svm_setreg(vcpu, VM_REG_GUEST_RFLAGS, + (rflags | PSL_T))) { + error = (EINVAL); + break; + } + vcpu->caps |= (1 << VM_CAP_RFLAGS_TF); + } else { + /* + * Restore shadowed RFLAGS.TF only if vCPU was + * previously stepped + */ + if (vcpu->caps & (1 << VM_CAP_RFLAGS_TF)) { + rflags &= ~PSL_T; + rflags |= vcpu->dbg.rflags_tf; + vcpu->dbg.rflags_tf = 0; + + if (svm_setreg(vcpu, VM_REG_GUEST_RFLAGS, + rflags)) { + error = (EINVAL); + break; + } + vcpu->caps &= ~(1 << VM_CAP_RFLAGS_TF); + } + } + + svm_set_intercept(vcpu, VMCB_EXC_INTCPT, BIT(IDT_DB), val); + svm_set_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_POPF, + val); + svm_set_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_PUSHF, + val); + break; + } default: error = ENOENT; break; @@ -2341,26 +2531,40 @@ svm_setcap(void *arg, int vcpu, int type, int val) } static int -svm_getcap(void *arg, int vcpu, int type, int *retval) +svm_getcap(void *vcpui, int type, int *retval) { - struct svm_softc *sc; + struct svm_vcpu *vcpu; + struct vlapic *vlapic; int error; - sc = arg; + vcpu = vcpui; error = 0; switch (type) { case VM_CAP_HALT_EXIT: - *retval = svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, + *retval = svm_get_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_HLT); break; case VM_CAP_PAUSE_EXIT: - *retval = svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, + *retval = svm_get_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_PAUSE); break; case VM_CAP_UNRESTRICTED_GUEST: *retval = 1; /* unrestricted guest is always enabled */ break; + case VM_CAP_BPT_EXIT: + *retval = svm_get_intercept(vcpu, VMCB_EXC_INTCPT, BIT(IDT_BP)); + break; + case VM_CAP_IPI_EXIT: + vlapic = vm_lapic(vcpu->vcpu); + *retval = vlapic->ipi_exit; + break; + case VM_CAP_RFLAGS_TF: + *retval = !!(vcpu->caps & (1 << VM_CAP_RFLAGS_TF)); + break; + case VM_CAP_MASK_HWINTR: + *retval = !!(vcpu->caps & (1 << VM_CAP_MASK_HWINTR)); + break; default: error = ENOENT; break; @@ -2381,16 +2585,18 @@ svm_vmspace_free(struct vmspace *vmspace) } static struct vlapic * -svm_vlapic_init(void *arg, int vcpuid) +svm_vlapic_init(void *vcpui) { - struct svm_softc *svm_sc; + struct svm_vcpu *vcpu; struct vlapic *vlapic; - svm_sc = arg; + vcpu = vcpui; vlapic = malloc(sizeof(struct vlapic), M_SVM_VLAPIC, M_WAITOK | M_ZERO); - vlapic->vm = svm_sc->vm; - vlapic->vcpuid = vcpuid; - vlapic->apic_page = (struct LAPIC *)&svm_sc->apic_page[vcpuid]; + vlapic->vm = vcpu->sc->vm; + vlapic->vcpu = vcpu->vcpu; + vlapic->vcpuid = vcpu->vcpuid; + vlapic->apic_page = malloc_aligned(PAGE_SIZE, PAGE_SIZE, M_SVM_VLAPIC, + M_WAITOK | M_ZERO); vlapic_init(vlapic); @@ -2398,254 +2604,226 @@ svm_vlapic_init(void *arg, int vcpuid) } static void -svm_vlapic_cleanup(void *arg, struct vlapic *vlapic) +svm_vlapic_cleanup(struct vlapic *vlapic) { vlapic_cleanup(vlapic); + free(vlapic->apic_page, M_SVM_VLAPIC); free(vlapic, M_SVM_VLAPIC); } #ifdef BHYVE_SNAPSHOT static int -svm_snapshot(void *arg, struct vm_snapshot_meta *meta) +svm_vcpu_snapshot(void *vcpui, struct vm_snapshot_meta *meta) { - /* struct svm_softc is AMD's representation for SVM softc */ - struct svm_softc *sc; struct svm_vcpu *vcpu; - int i; - int ret; - - sc = arg; - - KASSERT(sc != NULL, ("%s: arg was NULL", __func__)); - - for (i = 0; i < VM_MAXCPU; i++) { - vcpu = &sc->vcpu[i]; - - /* Snapshot swctx for virtual cpu i */ - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rbp, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rbx, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rcx, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rdx, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rdi, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rsi, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r8, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r9, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r10, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r11, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r12, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r13, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r14, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r15, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr0, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr1, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr2, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr3, meta, ret, done); - - /* Restore other svm_vcpu struct fields */ - - /* Restore NEXTRIP field */ - SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, ret, done); - - /* Restore lastcpu field */ - SNAPSHOT_VAR_OR_LEAVE(vcpu->lastcpu, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->dirty, meta, ret, done); - - /* Restore EPTGEN field - EPT is Extended Page Table */ - SNAPSHOT_VAR_OR_LEAVE(vcpu->eptgen, meta, ret, done); - - SNAPSHOT_VAR_OR_LEAVE(vcpu->asid.gen, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vcpu->asid.num, meta, ret, done); - - /* Set all caches dirty */ - if (meta->op == VM_SNAPSHOT_RESTORE) { - svm_set_dirty(sc, i, VMCB_CACHE_ASID); - svm_set_dirty(sc, i, VMCB_CACHE_IOPM); - svm_set_dirty(sc, i, VMCB_CACHE_I); - svm_set_dirty(sc, i, VMCB_CACHE_TPR); - svm_set_dirty(sc, i, VMCB_CACHE_CR2); - svm_set_dirty(sc, i, VMCB_CACHE_CR); - svm_set_dirty(sc, i, VMCB_CACHE_DT); - svm_set_dirty(sc, i, VMCB_CACHE_SEG); - svm_set_dirty(sc, i, VMCB_CACHE_NP); - } - } - - if (meta->op == VM_SNAPSHOT_RESTORE) - flush_by_asid(); - -done: - return (ret); -} - -static int -svm_vmcx_snapshot(void *arg, struct vm_snapshot_meta *meta, int vcpu) -{ - struct svm_softc *sc; int err, running, hostcpu; - sc = (struct svm_softc *)arg; + vcpu = vcpui; err = 0; - KASSERT(arg != NULL, ("%s: arg was NULL", __func__)); - - running = vcpu_is_running(sc->vm, vcpu, &hostcpu); - if (running && hostcpu !=curcpu) { - printf("%s: %s%d is running", __func__, vm_name(sc->vm), vcpu); + running = vcpu_is_running(vcpu->vcpu, &hostcpu); + if (running && hostcpu != curcpu) { + printf("%s: %s%d is running", __func__, vm_name(vcpu->sc->vm), + vcpu->vcpuid); return (EINVAL); } - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR0, meta); - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR2, meta); - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR3, meta); - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR4, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_CR0, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_CR2, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_CR3, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_CR4, meta); - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_DR6, meta); - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_DR7, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_DR6, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_DR7, meta); - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RAX, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_RAX, meta); - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RSP, meta); - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RIP, meta); - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RFLAGS, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_RSP, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_RIP, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_RFLAGS, meta); /* Guest segments */ /* ES */ - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_ES, meta); - err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_ES, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_ES, meta); + err += vmcb_snapshot_desc(vcpu, VM_REG_GUEST_ES, meta); /* CS */ - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CS, meta); - err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_CS, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_CS, meta); + err += vmcb_snapshot_desc(vcpu, VM_REG_GUEST_CS, meta); /* SS */ - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_SS, meta); - err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_SS, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_SS, meta); + err += vmcb_snapshot_desc(vcpu, VM_REG_GUEST_SS, meta); /* DS */ - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_DS, meta); - err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_DS, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_DS, meta); + err += vmcb_snapshot_desc(vcpu, VM_REG_GUEST_DS, meta); /* FS */ - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_FS, meta); - err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_FS, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_FS, meta); + err += vmcb_snapshot_desc(vcpu, VM_REG_GUEST_FS, meta); /* GS */ - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_GS, meta); - err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_GS, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_GS, meta); + err += vmcb_snapshot_desc(vcpu, VM_REG_GUEST_GS, meta); /* TR */ - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_TR, meta); - err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_TR, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_TR, meta); + err += vmcb_snapshot_desc(vcpu, VM_REG_GUEST_TR, meta); /* LDTR */ - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_LDTR, meta); - err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_LDTR, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_LDTR, meta); + err += vmcb_snapshot_desc(vcpu, VM_REG_GUEST_LDTR, meta); /* EFER */ - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_EFER, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_EFER, meta); /* IDTR and GDTR */ - err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_IDTR, meta); - err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_GDTR, meta); + err += vmcb_snapshot_desc(vcpu, VM_REG_GUEST_IDTR, meta); + err += vmcb_snapshot_desc(vcpu, VM_REG_GUEST_GDTR, meta); /* Specific AMD registers */ - err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_INTR_SHADOW, meta); + err += svm_snapshot_reg(vcpu, VM_REG_GUEST_INTR_SHADOW, meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_CR_INTERCEPT, 4), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_DR_INTERCEPT, 4), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_EXC_INTERCEPT, 4), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_INST1_INTERCEPT, 4), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_INST2_INTERCEPT, 4), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_PAUSE_FILTHRESH, 2), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_PAUSE_FILCNT, 2), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_ASID, 4), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_TLB_CTRL, 4), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_VIRQ, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_EXIT_REASON, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_EXITINFO1, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_EXITINFO2, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_EXITINTINFO, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_NP_ENABLE, 1), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_AVIC_BAR, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_AVIC_PAGE, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_AVIC_LT, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_AVIC_PT, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_CPL, 1), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_STAR, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_LSTAR, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_CSTAR, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_SFMASK, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_KERNELGBASE, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_SYSENTER_CS, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_SYSENTER_ESP, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_SYSENTER_EIP, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_GUEST_PAT, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_DBGCTL, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_BR_FROM, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_BR_TO, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_INT_FROM, 8), meta); - err += vmcb_snapshot_any(sc, vcpu, + err += vmcb_snapshot_any(vcpu, VMCB_ACCESS(VMCB_OFF_INT_TO, 8), meta); + if (err != 0) + goto done; + /* Snapshot swctx for virtual cpu */ + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rbp, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rbx, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rcx, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rdx, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rdi, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rsi, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r8, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r9, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r10, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r11, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r12, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r13, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r14, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r15, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr0, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr1, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr2, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr3, meta, err, done); + + /* Restore other svm_vcpu struct fields */ + + /* Restore NEXTRIP field */ + SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, err, done); + + /* Restore lastcpu field */ + SNAPSHOT_VAR_OR_LEAVE(vcpu->lastcpu, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->dirty, meta, err, done); + + /* Restore EPTGEN field - EPT is Extended Page Table */ + SNAPSHOT_VAR_OR_LEAVE(vcpu->eptgen, meta, err, done); + + SNAPSHOT_VAR_OR_LEAVE(vcpu->asid.gen, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->asid.num, meta, err, done); + + SNAPSHOT_BUF_OR_LEAVE(&vcpu->mtrr, sizeof(vcpu->mtrr), meta, err, done); + + /* Set all caches dirty */ + if (meta->op == VM_SNAPSHOT_RESTORE) + svm_set_dirty(vcpu, 0xffffffff); + +done: return (err); } static int -svm_restore_tsc(void *arg, int vcpu, uint64_t offset) +svm_restore_tsc(void *vcpui, uint64_t offset) { - int err; + struct svm_vcpu *vcpu = vcpui; - err = svm_set_tsc_offset(arg, vcpu, offset); + svm_set_tsc_offset(vcpu, offset); - return (err); + return (0); } #endif @@ -2653,9 +2831,12 @@ const struct vmm_ops vmm_ops_amd = { .modinit = svm_modinit, .modcleanup = svm_modcleanup, .modresume = svm_modresume, + .modsuspend = svm_modsuspend, .init = svm_init, .run = svm_run, .cleanup = svm_cleanup, + .vcpu_init = svm_vcpu_init, + .vcpu_cleanup = svm_vcpu_cleanup, .getreg = svm_getreg, .setreg = svm_setreg, .getdesc = svm_getdesc, @@ -2667,8 +2848,7 @@ const struct vmm_ops vmm_ops_amd = { .vlapic_init = svm_vlapic_init, .vlapic_cleanup = svm_vlapic_cleanup, #ifdef BHYVE_SNAPSHOT - .snapshot = svm_snapshot, - .vmcx_snapshot = svm_vmcx_snapshot, + .vcpu_snapshot = svm_vcpu_snapshot, .restore_tsc = svm_restore_tsc, #endif }; diff --git a/sys/amd64/vmm/amd/svm.h b/sys/amd64/vmm/amd/svm.h index 30e58b9e130f..16459506832a 100644 --- a/sys/amd64/vmm/amd/svm.h +++ b/sys/amd64/vmm/amd/svm.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com) * All rights reserved. @@ -24,8 +24,6 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _SVM_H_ @@ -33,6 +31,7 @@ struct pcpu; struct svm_softc; +struct svm_vcpu; /* * Guest register state that is saved outside the VMCB. @@ -68,7 +67,7 @@ struct svm_regctx { void svm_launch(uint64_t pa, struct svm_regctx *gctx, struct pcpu *pcpu); #ifdef BHYVE_SNAPSHOT -int svm_set_tsc_offset(struct svm_softc *sc, int vcpu, uint64_t offset); +void svm_set_tsc_offset(struct svm_vcpu *vcpu, uint64_t offset); #endif #endif /* _SVM_H_ */ diff --git a/sys/amd64/vmm/amd/svm_genassym.c b/sys/amd64/vmm/amd/svm_genassym.c index 3fd04e451753..21d008190028 100644 --- a/sys/amd64/vmm/amd/svm_genassym.c +++ b/sys/amd64/vmm/amd/svm_genassym.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com) * All rights reserved. @@ -26,9 +26,6 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/assym.h> #include <x86/specialreg.h> diff --git a/sys/amd64/vmm/amd/svm_msr.c b/sys/amd64/vmm/amd/svm_msr.c index 1a22f16cf48e..1f7be6029e64 100644 --- a/sys/amd64/vmm/amd/svm_msr.c +++ b/sys/amd64/vmm/amd/svm_msr.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2014, Neel Natu (neel@freebsd.org) * All rights reserved. @@ -27,8 +27,6 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include "opt_bhyve_snapshot.h" #include <sys/param.h> @@ -61,7 +59,7 @@ static uint64_t host_msrs[HOST_MSR_NUM]; void svm_msr_init(void) { - /* + /* * It is safe to cache the values of the following MSRs because they * don't change based on curcpu, curproc or curthread. */ @@ -72,7 +70,7 @@ svm_msr_init(void) } void -svm_msr_guest_init(struct svm_softc *sc, int vcpu) +svm_msr_guest_init(struct svm_softc *sc, struct svm_vcpu *vcpu) { /* * All the MSRs accessible to the guest are either saved/restored by @@ -86,7 +84,7 @@ svm_msr_guest_init(struct svm_softc *sc, int vcpu) } void -svm_msr_guest_enter(struct svm_softc *sc, int vcpu) +svm_msr_guest_enter(struct svm_vcpu *vcpu) { /* * Save host MSRs (if any) and restore guest MSRs (if any). @@ -94,7 +92,7 @@ svm_msr_guest_enter(struct svm_softc *sc, int vcpu) } void -svm_msr_guest_exit(struct svm_softc *sc, int vcpu) +svm_msr_guest_exit(struct svm_vcpu *vcpu) { /* * Save guest MSRs (if any) and restore host MSRs. @@ -108,8 +106,7 @@ svm_msr_guest_exit(struct svm_softc *sc, int vcpu) } int -svm_rdmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t *result, - bool *retu) +svm_rdmsr(struct svm_vcpu *vcpu, u_int num, uint64_t *result, bool *retu) { int error = 0; @@ -124,8 +121,8 @@ svm_rdmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t *result, case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: case MSR_MTRR64kBase: case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: - if (vm_rdmtrr(&sc->mtrr[vcpu], num, result) != 0) { - vm_inject_gp(sc->vm, vcpu); + if (vm_rdmtrr(&vcpu->mtrr, num, result) != 0) { + vm_inject_gp(vcpu->vcpu); } break; case MSR_SYSCFG: @@ -142,7 +139,7 @@ svm_rdmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t *result, } int -svm_wrmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t val, bool *retu) +svm_wrmsr(struct svm_vcpu *vcpu, u_int num, uint64_t val, bool *retu) { int error = 0; @@ -156,8 +153,8 @@ svm_wrmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t val, bool *retu) case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: case MSR_MTRR64kBase: case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: - if (vm_wrmtrr(&sc->mtrr[vcpu], num, val) != 0) { - vm_inject_gp(sc->vm, vcpu); + if (vm_wrmtrr(&vcpu->mtrr, num, val) != 0) { + vm_inject_gp(vcpu->vcpu); } break; case MSR_SYSCFG: @@ -174,7 +171,7 @@ svm_wrmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t val, bool *retu) break; #ifdef BHYVE_SNAPSHOT case MSR_TSC: - error = svm_set_tsc_offset(sc, vcpu, val - rdtsc()); + svm_set_tsc_offset(vcpu, val - rdtsc()); break; #endif case MSR_EXTFEATURES: diff --git a/sys/amd64/vmm/amd/svm_msr.h b/sys/amd64/vmm/amd/svm_msr.h index 1dba8101ab35..0242e508cd0a 100644 --- a/sys/amd64/vmm/amd/svm_msr.h +++ b/sys/amd64/vmm/amd/svm_msr.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2014 Neel Natu (neel@freebsd.org) * All rights reserved. @@ -24,23 +24,20 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _SVM_MSR_H_ #define _SVM_MSR_H_ struct svm_softc; +struct svm_vcpu; void svm_msr_init(void); -void svm_msr_guest_init(struct svm_softc *sc, int vcpu); -void svm_msr_guest_enter(struct svm_softc *sc, int vcpu); -void svm_msr_guest_exit(struct svm_softc *sc, int vcpu); +void svm_msr_guest_init(struct svm_softc *sc, struct svm_vcpu *vcpu); +void svm_msr_guest_enter(struct svm_vcpu *vcpu); +void svm_msr_guest_exit(struct svm_vcpu *vcpu); -int svm_wrmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t val, - bool *retu); -int svm_rdmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t *result, - bool *retu); +int svm_wrmsr(struct svm_vcpu *vcpu, u_int num, uint64_t val, bool *retu); +int svm_rdmsr(struct svm_vcpu *vcpu, u_int num, uint64_t *result, bool *retu); #endif /* _SVM_MSR_H_ */ diff --git a/sys/amd64/vmm/amd/svm_softc.h b/sys/amd64/vmm/amd/svm_softc.h index 5f6a267617d2..0fd2303a7242 100644 --- a/sys/amd64/vmm/amd/svm_softc.h +++ b/sys/amd64/vmm/amd/svm_softc.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com) * All rights reserved. @@ -24,8 +24,6 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _SVM_SOFTC_H_ @@ -36,17 +34,23 @@ #define SVM_IO_BITMAP_SIZE (3 * PAGE_SIZE) #define SVM_MSR_BITMAP_SIZE (2 * PAGE_SIZE) +struct svm_softc; + +struct dbg { + uint32_t rflags_tf; /* saved RFLAGS.TF value when single-stepping a vcpu */ + bool popf_sstep; /* indicates that we've stepped over popf */ + bool pushf_sstep; /* indicates that we've stepped over pushf */ +}; + struct asid { uint64_t gen; /* range is [1, ~0UL] */ uint32_t num; /* range is [1, nasid - 1] */ }; -/* - * XXX separate out 'struct vmcb' from 'svm_vcpu' to avoid wasting space - * due to VMCB alignment requirements. - */ struct svm_vcpu { - struct vmcb vmcb; /* hardware saved vcpu context */ + struct svm_softc *sc; + struct vcpu *vcpu; + struct vmcb *vmcb; /* hardware saved vcpu context */ struct svm_regctx swctx; /* software saved vcpu context */ uint64_t vmcb_pa; /* VMCB physical address */ uint64_t nextrip; /* next instruction to be executed by guest */ @@ -54,66 +58,70 @@ struct svm_vcpu { uint32_t dirty; /* state cache bits that must be cleared */ long eptgen; /* pmap->pm_eptgen when the vcpu last ran */ struct asid asid; -} __aligned(PAGE_SIZE); + struct vm_mtrr mtrr; + int vcpuid; + struct dbg dbg; + int caps; /* optional vm capabilities */ +}; /* * SVM softc, one per virtual machine. */ struct svm_softc { - uint8_t apic_page[VM_MAXCPU][PAGE_SIZE]; - struct svm_vcpu vcpu[VM_MAXCPU]; - vm_offset_t nptp; /* nested page table */ + vm_paddr_t nptp; /* nested page table */ uint8_t *iopm_bitmap; /* shared by all vcpus */ uint8_t *msr_bitmap; /* shared by all vcpus */ struct vm *vm; - struct vm_mtrr mtrr[VM_MAXCPU]; }; -CTASSERT((offsetof(struct svm_softc, nptp) & PAGE_MASK) == 0); +#define SVM_CTR0(vcpu, format) \ + VCPU_CTR0((vcpu)->sc->vm, (vcpu)->vcpuid, format) -static __inline struct svm_vcpu * -svm_get_vcpu(struct svm_softc *sc, int vcpu) -{ +#define SVM_CTR1(vcpu, format, p1) \ + VCPU_CTR1((vcpu)->sc->vm, (vcpu)->vcpuid, format, p1) - return (&(sc->vcpu[vcpu])); -} +#define SVM_CTR2(vcpu, format, p1, p2) \ + VCPU_CTR2((vcpu)->sc->vm, (vcpu)->vcpuid, format, p1, p2) + +#define SVM_CTR3(vcpu, format, p1, p2, p3) \ + VCPU_CTR3((vcpu)->sc->vm, (vcpu)->vcpuid, format, p1, p2, p3) + +#define SVM_CTR4(vcpu, format, p1, p2, p3, p4) \ + VCPU_CTR4((vcpu)->sc->vm, (vcpu)->vcpuid, format, p1, p2, p3, p4) static __inline struct vmcb * -svm_get_vmcb(struct svm_softc *sc, int vcpu) +svm_get_vmcb(struct svm_vcpu *vcpu) { - return (&(sc->vcpu[vcpu].vmcb)); + return (vcpu->vmcb); } static __inline struct vmcb_state * -svm_get_vmcb_state(struct svm_softc *sc, int vcpu) +svm_get_vmcb_state(struct svm_vcpu *vcpu) { - return (&(sc->vcpu[vcpu].vmcb.state)); + return (&vcpu->vmcb->state); } static __inline struct vmcb_ctrl * -svm_get_vmcb_ctrl(struct svm_softc *sc, int vcpu) +svm_get_vmcb_ctrl(struct svm_vcpu *vcpu) { - return (&(sc->vcpu[vcpu].vmcb.ctrl)); + return (&vcpu->vmcb->ctrl); } static __inline struct svm_regctx * -svm_get_guest_regctx(struct svm_softc *sc, int vcpu) +svm_get_guest_regctx(struct svm_vcpu *vcpu) { - return (&(sc->vcpu[vcpu].swctx)); + return (&vcpu->swctx); } static __inline void -svm_set_dirty(struct svm_softc *sc, int vcpu, uint32_t dirtybits) +svm_set_dirty(struct svm_vcpu *vcpu, uint32_t dirtybits) { - struct svm_vcpu *vcpustate; - - vcpustate = svm_get_vcpu(sc, vcpu); - vcpustate->dirty |= dirtybits; + vcpu->dirty |= dirtybits; } #endif /* _SVM_SOFTC_H_ */ diff --git a/sys/amd64/vmm/amd/svm_support.S b/sys/amd64/vmm/amd/svm_support.S index bf9e9a0f89cc..26bf36b98f71 100644 --- a/sys/amd64/vmm/amd/svm_support.S +++ b/sys/amd64/vmm/amd/svm_support.S @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013, Anish Gupta (akgupt3@gmail.com) * All rights reserved. @@ -24,8 +24,6 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ */ #include <machine/asmacros.h> diff --git a/sys/amd64/vmm/amd/vmcb.c b/sys/amd64/vmm/amd/vmcb.c index 0341e4e6c07c..9a1008fa495c 100644 --- a/sys/amd64/vmm/amd/vmcb.c +++ b/sys/amd64/vmm/amd/vmcb.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com) * All rights reserved. @@ -27,8 +27,6 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include "opt_bhyve_snapshot.h" #include <sys/param.h> @@ -39,8 +37,9 @@ __FBSDID("$FreeBSD$"); #include <machine/vmm.h> #include <machine/vmm_snapshot.h> -#include "vmm_ktr.h" +#include <dev/vmm/vmm_ktr.h> +#include "vlapic.h" #include "vmcb.h" #include "svm.h" #include "svm_softc.h" @@ -116,14 +115,13 @@ vmcb_segptr(struct vmcb *vmcb, int type) } static int -vmcb_access(struct svm_softc *softc, int vcpu, int write, int ident, - uint64_t *val) +vmcb_access(struct svm_vcpu *vcpu, int write, int ident, uint64_t *val) { struct vmcb *vmcb; int off, bytes; char *ptr; - vmcb = svm_get_vmcb(softc, vcpu); + vmcb = svm_get_vmcb(vcpu); off = VMCB_ACCESS_OFFSET(ident); bytes = VMCB_ACCESS_BYTES(ident); @@ -146,14 +144,13 @@ vmcb_access(struct svm_softc *softc, int vcpu, int write, int ident, memcpy(val, ptr + off, bytes); break; default: - VCPU_CTR1(softc->vm, vcpu, - "Invalid size %d for VMCB access: %d", bytes); + SVM_CTR1(vcpu, "Invalid size %d for VMCB access: %d", bytes); return (EINVAL); } /* Invalidate all VMCB state cached by h/w. */ if (write) - svm_set_dirty(softc, vcpu, 0xffffffff); + svm_set_dirty(vcpu, 0xffffffff); return (0); } @@ -162,19 +159,19 @@ vmcb_access(struct svm_softc *softc, int vcpu, int write, int ident, * Read from segment selector, control and general purpose register of VMCB. */ int -vmcb_read(struct svm_softc *sc, int vcpu, int ident, uint64_t *retval) +vmcb_read(struct svm_vcpu *vcpu, int ident, uint64_t *retval) { struct vmcb *vmcb; struct vmcb_state *state; struct vmcb_segment *seg; int err; - vmcb = svm_get_vmcb(sc, vcpu); + vmcb = svm_get_vmcb(vcpu); state = &vmcb->state; err = 0; if (VMCB_ACCESS_OK(ident)) - return (vmcb_access(sc, vcpu, 0, ident, retval)); + return (vmcb_access(vcpu, 0, ident, retval)); switch (ident) { case VM_REG_GUEST_CR0: @@ -235,6 +232,22 @@ vmcb_read(struct svm_softc *sc, int vcpu, int ident, uint64_t *retval) *retval = seg->selector; break; + case VM_REG_GUEST_FS_BASE: + case VM_REG_GUEST_GS_BASE: + seg = vmcb_segptr(vmcb, ident == VM_REG_GUEST_FS_BASE ? + VM_REG_GUEST_FS : VM_REG_GUEST_GS); + KASSERT(seg != NULL, ("%s: unable to get segment %d from VMCB", + __func__, ident)); + *retval = seg->base; + break; + case VM_REG_GUEST_KGS_BASE: + *retval = state->kernelgsbase; + break; + + case VM_REG_GUEST_TPR: + *retval = vlapic_get_cr8(vm_lapic(vcpu->vcpu)); + break; + case VM_REG_GUEST_GDTR: case VM_REG_GUEST_IDTR: /* GDTR and IDTR don't have segment selectors */ @@ -252,56 +265,56 @@ vmcb_read(struct svm_softc *sc, int vcpu, int ident, uint64_t *retval) * Write to segment selector, control and general purpose register of VMCB. */ int -vmcb_write(struct svm_softc *sc, int vcpu, int ident, uint64_t val) +vmcb_write(struct svm_vcpu *vcpu, int ident, uint64_t val) { struct vmcb *vmcb; struct vmcb_state *state; struct vmcb_segment *seg; int err, dirtyseg; - vmcb = svm_get_vmcb(sc, vcpu); + vmcb = svm_get_vmcb(vcpu); state = &vmcb->state; dirtyseg = 0; err = 0; if (VMCB_ACCESS_OK(ident)) - return (vmcb_access(sc, vcpu, 1, ident, &val)); + return (vmcb_access(vcpu, 1, ident, &val)); switch (ident) { case VM_REG_GUEST_CR0: state->cr0 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR); + svm_set_dirty(vcpu, VMCB_CACHE_CR); break; case VM_REG_GUEST_CR2: state->cr2 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR2); + svm_set_dirty(vcpu, VMCB_CACHE_CR2); break; case VM_REG_GUEST_CR3: state->cr3 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR); + svm_set_dirty(vcpu, VMCB_CACHE_CR); break; case VM_REG_GUEST_CR4: state->cr4 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR); + svm_set_dirty(vcpu, VMCB_CACHE_CR); break; case VM_REG_GUEST_DR6: state->dr6 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_DR); + svm_set_dirty(vcpu, VMCB_CACHE_DR); break; case VM_REG_GUEST_DR7: state->dr7 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_DR); + svm_set_dirty(vcpu, VMCB_CACHE_DR); break; case VM_REG_GUEST_EFER: /* EFER_SVM must always be set when the guest is executing */ state->efer = val | EFER_SVM; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR); + svm_set_dirty(vcpu, VMCB_CACHE_CR); break; case VM_REG_GUEST_RAX: @@ -334,7 +347,7 @@ vmcb_write(struct svm_softc *sc, int vcpu, int ident, uint64_t val) __func__, ident)); seg->selector = val; if (dirtyseg) - svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG); + svm_set_dirty(vcpu, VMCB_CACHE_SEG); break; case VM_REG_GUEST_GDTR: @@ -365,15 +378,13 @@ vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg2) } int -vmcb_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +vmcb_setdesc(struct svm_vcpu *vcpu, int reg, struct seg_desc *desc) { struct vmcb *vmcb; - struct svm_softc *sc; struct vmcb_segment *seg; uint16_t attrib; - sc = arg; - vmcb = svm_get_vmcb(sc, vcpu); + vmcb = svm_get_vmcb(vcpu); seg = vmcb_segptr(vmcb, reg); KASSERT(seg != NULL, ("%s: invalid segment descriptor %d", @@ -395,7 +406,7 @@ vmcb_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) seg->attrib = attrib; } - VCPU_CTR4(sc->vm, vcpu, "Setting desc %d: base (%#lx), limit (%#x), " + SVM_CTR4(vcpu, "Setting desc %d: base (%#lx), limit (%#x), " "attrib (%#x)", reg, seg->base, seg->limit, seg->attrib); switch (reg) { @@ -403,11 +414,11 @@ vmcb_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) case VM_REG_GUEST_DS: case VM_REG_GUEST_ES: case VM_REG_GUEST_SS: - svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG); + svm_set_dirty(vcpu, VMCB_CACHE_SEG); break; case VM_REG_GUEST_GDTR: case VM_REG_GUEST_IDTR: - svm_set_dirty(sc, vcpu, VMCB_CACHE_DT); + svm_set_dirty(vcpu, VMCB_CACHE_DT); break; default: break; @@ -417,14 +428,12 @@ vmcb_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) } int -vmcb_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +vmcb_getdesc(struct svm_vcpu *vcpu, int reg, struct seg_desc *desc) { struct vmcb *vmcb; - struct svm_softc *sc; struct vmcb_segment *seg; - sc = arg; - vmcb = svm_get_vmcb(sc, vcpu); + vmcb = svm_get_vmcb(vcpu); seg = vmcb_segptr(vmcb, reg); KASSERT(seg != NULL, ("%s: invalid segment descriptor %d", __func__, reg)); @@ -459,55 +468,46 @@ vmcb_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) #ifdef BHYVE_SNAPSHOT int -vmcb_getany(struct svm_softc *sc, int vcpu, int ident, uint64_t *val) +vmcb_getany(struct svm_vcpu *vcpu, int ident, uint64_t *val) { int error = 0; - if (vcpu < 0 || vcpu >= VM_MAXCPU) { - error = EINVAL; - goto err; - } - if (ident >= VM_REG_LAST) { error = EINVAL; goto err; } - error = vmcb_read(sc, vcpu, ident, val); + error = vmcb_read(vcpu, ident, val); err: return (error); } int -vmcb_setany(struct svm_softc *sc, int vcpu, int ident, uint64_t val) +vmcb_setany(struct svm_vcpu *vcpu, int ident, uint64_t val) { int error = 0; - if (vcpu < 0 || vcpu >= VM_MAXCPU) { - error = EINVAL; - goto err; - } - if (ident >= VM_REG_LAST) { error = EINVAL; goto err; } - error = vmcb_write(sc, vcpu, ident, val); + error = vmcb_write(vcpu, ident, val); err: return (error); } int -vmcb_snapshot_desc(void *arg, int vcpu, int reg, struct vm_snapshot_meta *meta) +vmcb_snapshot_desc(struct svm_vcpu *vcpu, int reg, + struct vm_snapshot_meta *meta) { int ret; struct seg_desc desc; if (meta->op == VM_SNAPSHOT_SAVE) { - ret = vmcb_getdesc(arg, vcpu, reg, &desc); + ret = vmcb_getdesc(vcpu, reg, &desc); if (ret != 0) goto done; @@ -519,7 +519,7 @@ vmcb_snapshot_desc(void *arg, int vcpu, int reg, struct vm_snapshot_meta *meta) SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done); - ret = vmcb_setdesc(arg, vcpu, reg, &desc); + ret = vmcb_setdesc(vcpu, reg, &desc); if (ret != 0) goto done; } else { @@ -532,14 +532,14 @@ done: } int -vmcb_snapshot_any(struct svm_softc *sc, int vcpu, int ident, - struct vm_snapshot_meta *meta) +vmcb_snapshot_any(struct svm_vcpu *vcpu, int ident, + struct vm_snapshot_meta *meta) { int ret; uint64_t val; if (meta->op == VM_SNAPSHOT_SAVE) { - ret = vmcb_getany(sc, vcpu, ident, &val); + ret = vmcb_getany(vcpu, ident, &val); if (ret != 0) goto done; @@ -547,7 +547,7 @@ vmcb_snapshot_any(struct svm_softc *sc, int vcpu, int ident, } else if (meta->op == VM_SNAPSHOT_RESTORE) { SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); - ret = vmcb_setany(sc, vcpu, ident, val); + ret = vmcb_setany(vcpu, ident, val); if (ret != 0) goto done; } else { diff --git a/sys/amd64/vmm/amd/vmcb.h b/sys/amd64/vmm/amd/vmcb.h index 084f4465cb49..09150fc26a72 100644 --- a/sys/amd64/vmm/amd/vmcb.h +++ b/sys/amd64/vmm/amd/vmcb.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com) * All rights reserved. @@ -24,8 +24,6 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VMCB_H_ @@ -169,7 +167,7 @@ #define VMCB_NPF_INFO1_GPT BIT(33) /* Guest page table. */ /* - * EXITINTINFO, Interrupt exit info for all intrecepts. + * EXITINTINFO, Interrupt exit info for all intercepts. * Section 15.7.2, Intercepts during IDT Interrupt Delivery. */ #define VMCB_EXITINTINFO_VECTOR(x) ((x) & 0xFF) @@ -234,6 +232,7 @@ #ifdef _KERNEL struct svm_softc; +struct svm_vcpu; struct vm_snapshot_meta; /* VMCB save state area segment format */ @@ -353,17 +352,17 @@ struct vmcb { CTASSERT(sizeof(struct vmcb) == PAGE_SIZE); CTASSERT(offsetof(struct vmcb, state) == 0x400); -int vmcb_read(struct svm_softc *sc, int vcpu, int ident, uint64_t *retval); -int vmcb_write(struct svm_softc *sc, int vcpu, int ident, uint64_t val); -int vmcb_setdesc(void *arg, int vcpu, int ident, struct seg_desc *desc); -int vmcb_getdesc(void *arg, int vcpu, int ident, struct seg_desc *desc); +int vmcb_read(struct svm_vcpu *vcpu, int ident, uint64_t *retval); +int vmcb_write(struct svm_vcpu *vcpu, int ident, uint64_t val); +int vmcb_setdesc(struct svm_vcpu *vcpu, int ident, struct seg_desc *desc); +int vmcb_getdesc(struct svm_vcpu *vcpu, int ident, struct seg_desc *desc); int vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg); #ifdef BHYVE_SNAPSHOT -int vmcb_getany(struct svm_softc *sc, int vcpu, int ident, uint64_t *val); -int vmcb_setany(struct svm_softc *sc, int vcpu, int ident, uint64_t val); -int vmcb_snapshot_desc(void *arg, int vcpu, int reg, +int vmcb_getany(struct svm_vcpu *vcpu, int ident, uint64_t *val); +int vmcb_setany(struct svm_vcpu *vcpu, int ident, uint64_t val); +int vmcb_snapshot_desc(struct svm_vcpu *vcpu, int reg, struct vm_snapshot_meta *meta); -int vmcb_snapshot_any(struct svm_softc *sc, int vcpu, int ident, +int vmcb_snapshot_any(struct svm_vcpu*vcpu, int ident, struct vm_snapshot_meta *meta); #endif diff --git a/sys/amd64/vmm/intel/ept.c b/sys/amd64/vmm/intel/ept.c index 4d75d5547890..5432c7da5df7 100644 --- a/sys/amd64/vmm/intel/ept.c +++ b/sys/amd64/vmm/intel/ept.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,13 +24,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/kernel.h> #include <sys/types.h> @@ -142,7 +137,7 @@ ept_dump(uint64_t *ptp, int nlevels) if (ptpval == 0) continue; - + for (t = 0; t < tabs; t++) printf("\t"); printf("%3d 0x%016lx\n", i, ptpval); diff --git a/sys/amd64/vmm/intel/ept.h b/sys/amd64/vmm/intel/ept.h index 4a029e8b22df..93aa9ca3c041 100644 --- a/sys/amd64/vmm/intel/ept.h +++ b/sys/amd64/vmm/intel/ept.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,8 +24,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _EPT_H_ diff --git a/sys/amd64/vmm/intel/vmcs.c b/sys/amd64/vmm/intel/vmcs.c index aed21d2f4d15..35c2ee5b6eff 100644 --- a/sys/amd64/vmm/intel/vmcs.c +++ b/sys/amd64/vmm/intel/vmcs.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,16 +24,11 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #include "opt_bhyve_snapshot.h" #include "opt_ddb.h" -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/sysctl.h> #include <sys/systm.h> @@ -124,10 +119,13 @@ vmcs_field_encoding(int ident) return (VMCS_GUEST_PDPTE3); case VM_REG_GUEST_ENTRY_INST_LENGTH: return (VMCS_ENTRY_INST_LENGTH); + case VM_REG_GUEST_FS_BASE: + return (VMCS_GUEST_FS_BASE); + case VM_REG_GUEST_GS_BASE: + return (VMCS_GUEST_GS_BASE); default: return (-1); } - } static int diff --git a/sys/amd64/vmm/intel/vmcs.h b/sys/amd64/vmm/intel/vmcs.h index 5c5214c65609..f247370fc60f 100644 --- a/sys/amd64/vmm/intel/vmcs.h +++ b/sys/amd64/vmm/intel/vmcs.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,8 +24,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VMCS_H_ diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index f1aea1d2a9db..842281ab862e 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -25,13 +25,9 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include "opt_bhyve_snapshot.h" #include <sys/param.h> @@ -46,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <vm/vm.h> +#include <vm/vm_extern.h> #include <vm/pmap.h> #include <machine/psl.h> @@ -61,10 +58,12 @@ __FBSDID("$FreeBSD$"); #include <machine/vmm_instruction_emul.h> #include <machine/vmm_snapshot.h> +#include <dev/vmm/vmm_ktr.h> +#include <dev/vmm/vmm_mem.h> + #include "vmm_lapic.h" #include "vmm_host.h" #include "vmm_ioport.h" -#include "vmm_ktr.h" #include "vmm_stat.h" #include "vatpic.h" #include "vlapic.h" @@ -76,6 +75,7 @@ __FBSDID("$FreeBSD$"); #include "vmx_msr.h" #include "x86.h" #include "vmx_controls.h" +#include "io/ppt.h" #define PINBASED_CTLS_ONE_SETTING \ (PINBASED_EXTINT_EXITING | \ @@ -127,12 +127,14 @@ __FBSDID("$FreeBSD$"); static MALLOC_DEFINE(M_VMX, "vmx", "vmx"); static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic"); +bool vmx_have_msr_tsc_aux; + SYSCTL_DECL(_hw_vmm); SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, NULL); int vmxon_enabled[MAXCPU]; -static char vmxon_region[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE); +static uint8_t *vmxon_region; static uint32_t pinbased_ctls, procbased_ctls, procbased_ctls2; static uint32_t exit_ctls, entry_ctls; @@ -193,15 +195,18 @@ SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, invpcid, CTLFLAG_RD, &cap_invpcid, 0, "Guests are allowed to use INVPCID"); static int tpr_shadowing; -SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, tpr_shadowing, CTLFLAG_RD, +SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, tpr_shadowing, + CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &tpr_shadowing, 0, "TPR shadowing support"); static int virtual_interrupt_delivery; -SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, virtual_interrupt_delivery, CTLFLAG_RD, +SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, virtual_interrupt_delivery, + CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &virtual_interrupt_delivery, 0, "APICv virtual interrupt delivery support"); static int posted_interrupts; -SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, posted_interrupts, CTLFLAG_RD, +SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, posted_interrupts, + CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &posted_interrupts, 0, "APICv posted interrupt support"); static int pirvec = -1; @@ -214,10 +219,10 @@ SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD, &vpid_alloc_failed, 0, NULL); int guest_l1d_flush; -SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush, CTLFLAG_RD, +SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &guest_l1d_flush, 0, NULL); int guest_l1d_flush_sw; -SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush_sw, CTLFLAG_RD, +SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush_sw, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &guest_l1d_flush_sw, 0, NULL); static struct msr_entry msr_load_list[1] __aligned(16); @@ -308,12 +313,12 @@ SDT_PROBE_DEFINE4(vmm, vmx, exit, return, */ #define APIC_ACCESS_ADDRESS 0xFFFFF000 -static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc); -static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval); +static int vmx_getdesc(void *vcpui, int reg, struct seg_desc *desc); +static int vmx_getreg(void *vcpui, int reg, uint64_t *retval); static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val); static void vmx_inject_pir(struct vlapic *vlapic); #ifdef BHYVE_SNAPSHOT -static int vmx_restore_tsc(void *arg, int vcpu, uint64_t now); +static int vmx_restore_tsc(void *vcpui, uint64_t now); #endif static inline bool @@ -517,49 +522,38 @@ vpid_free(int vpid) panic("vpid_free: invalid vpid %d", vpid); /* - * VPIDs [0,VM_MAXCPU] are special and are not allocated from + * VPIDs [0,vm_maxcpu] are special and are not allocated from * the unit number allocator. */ - if (vpid > VM_MAXCPU) + if (vpid > vm_maxcpu) free_unr(vpid_unr, vpid); } -static void -vpid_alloc(uint16_t *vpid, int num) +static uint16_t +vpid_alloc(int vcpuid) { - int i, x; - - if (num <= 0 || num > VM_MAXCPU) - panic("invalid number of vpids requested: %d", num); + int x; /* * If the "enable vpid" execution control is not enabled then the * VPID is required to be 0 for all vcpus. */ - if ((procbased_ctls2 & PROCBASED2_ENABLE_VPID) == 0) { - for (i = 0; i < num; i++) - vpid[i] = 0; - return; - } + if ((procbased_ctls2 & PROCBASED2_ENABLE_VPID) == 0) + return (0); /* - * Allocate a unique VPID for each vcpu from the unit number allocator. + * Try to allocate a unique VPID for each from the unit number + * allocator. */ - for (i = 0; i < num; i++) { - x = alloc_unr(vpid_unr); - if (x == -1) - break; - else - vpid[i] = x; - } + x = alloc_unr(vpid_unr); - if (i < num) { + if (x == -1) { atomic_add_int(&vpid_alloc_failed, 1); /* * If the unit number allocator does not have enough unique - * VPIDs then we need to allocate from the [1,VM_MAXCPU] range. + * VPIDs then we need to allocate from the [1,vm_maxcpu] range. * * These VPIDs are not be unique across VMs but this does not * affect correctness because the combined mappings are also @@ -568,12 +562,10 @@ vpid_alloc(uint16_t *vpid, int num) * It is still sub-optimal because the invvpid will invalidate * combined mappings for a particular VPID across all EP4TAs. */ - while (i-- > 0) - vpid_free(vpid[i]); - - for (i = 0; i < num; i++) - vpid[i] = i + 1; + return (vcpuid + 1); } + + return (x); } static void @@ -583,13 +575,13 @@ vpid_init(void) * VPID 0 is required when the "enable VPID" execution control is * disabled. * - * VPIDs [1,VM_MAXCPU] are used as the "overflow namespace" when the + * VPIDs [1,vm_maxcpu] are used as the "overflow namespace" when the * unit number allocator does not have sufficient unique VPIDs to * satisfy the allocation. * * The remaining VPIDs are managed by the unit number allocator. */ - vpid_unr = new_unrhdr(VM_MAXCPU + 1, 0xffff, NULL); + vpid_unr = new_unrhdr(vm_maxcpu + 1, 0xffff, NULL); } static void @@ -630,6 +622,9 @@ vmx_modcleanup(void) smp_rendezvous(NULL, vmx_disable, NULL, NULL); + if (vmxon_region != NULL) + kmem_free(vmxon_region, (mp_maxid + 1) * PAGE_SIZE); + return (0); } @@ -649,18 +644,26 @@ vmx_enable(void *arg __unused) load_cr4(rcr4() | CR4_VMXE); - *(uint32_t *)vmxon_region[curcpu] = vmx_revision(); - error = vmxon(vmxon_region[curcpu]); + *(uint32_t *)&vmxon_region[curcpu * PAGE_SIZE] = vmx_revision(); + error = vmxon(&vmxon_region[curcpu * PAGE_SIZE]); if (error == 0) vmxon_enabled[curcpu] = 1; } static void +vmx_modsuspend(void) +{ + + if (vmxon_enabled[curcpu]) + vmx_disable(NULL); +} + +static void vmx_modresume(void) { if (vmxon_enabled[curcpu]) - vmxon(vmxon_region[curcpu]); + vmx_enable(NULL); } static int @@ -821,8 +824,10 @@ vmx_modinit(int ipinum) PROCBASED2_ENABLE_RDTSCP, 0, &tmp); cap_rdpid = error == 0 && host_has_rdpid(); cap_rdtscp = error == 0 && host_has_rdtscp(); - if (cap_rdpid || cap_rdtscp) + if (cap_rdpid || cap_rdtscp) { procbased_ctls2 |= PROCBASED2_ENABLE_RDTSCP; + vmx_have_msr_tsc_aux = true; + } cap_unrestricted_guest = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2, @@ -841,8 +846,12 @@ vmx_modinit(int ipinum) &tmp); if (error == 0) { tpr_shadowing = 1; +#ifndef BURN_BRIDGES TUNABLE_INT_FETCH("hw.vmm.vmx.use_tpr_shadowing", &tpr_shadowing); +#endif + TUNABLE_INT_FETCH("hw.vmm.vmx.cap.tpr_shadowing", + &tpr_shadowing); } if (tpr_shadowing) { @@ -863,8 +872,12 @@ vmx_modinit(int ipinum) procbased2_vid_bits, 0, &tmp); if (error == 0 && tpr_shadowing) { virtual_interrupt_delivery = 1; +#ifndef BURN_BRIDGES TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_vid", &virtual_interrupt_delivery); +#endif + TUNABLE_INT_FETCH("hw.vmm.vmx.cap.virtual_interrupt_delivery", + &virtual_interrupt_delivery); } if (virtual_interrupt_delivery) { @@ -890,8 +903,12 @@ vmx_modinit(int ipinum) } } else { posted_interrupts = 1; +#ifndef BURN_BRIDGES TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_pir", &posted_interrupts); +#endif + TUNABLE_INT_FETCH("hw.vmm.vmx.cap.posted_interrupts", + &posted_interrupts); } } } @@ -908,7 +925,10 @@ vmx_modinit(int ipinum) guest_l1d_flush = (cpu_ia32_arch_caps & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) == 0; +#ifndef BURN_BRIDGES TUNABLE_INT_FETCH("hw.vmm.l1d_flush", &guest_l1d_flush); +#endif + TUNABLE_INT_FETCH("hw.vmm.vmx.l1d_flush", &guest_l1d_flush); /* * L1D cache flush is enabled. Use IA32_FLUSH_CMD MSR when @@ -920,8 +940,12 @@ vmx_modinit(int ipinum) if (guest_l1d_flush) { if ((cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) == 0) { guest_l1d_flush_sw = 1; +#ifndef BURN_BRIDGES TUNABLE_INT_FETCH("hw.vmm.l1d_flush_sw", &guest_l1d_flush_sw); +#endif + TUNABLE_INT_FETCH("hw.vmm.vmx.l1d_flush_sw", + &guest_l1d_flush_sw); } if (guest_l1d_flush_sw) { if (nmi_flush_l1d_sw <= 1) @@ -962,6 +986,8 @@ vmx_modinit(int ipinum) vmx_msr_init(); /* enable VMX operation */ + vmxon_region = kmem_malloc((mp_maxid + 1) * PAGE_SIZE, + M_WAITOK | M_ZERO); smp_rendezvous(NULL, vmx_enable, NULL, NULL); vmx_initialized = 1; @@ -1029,18 +1055,10 @@ vmx_setup_cr_shadow(int which, struct vmcs *vmcs, uint32_t initial) static void * vmx_init(struct vm *vm, pmap_t pmap) { - uint16_t vpid[VM_MAXCPU]; - int i, error; + int error __diagused; struct vmx *vmx; - struct vmcs *vmcs; - uint32_t exc_bitmap; - uint16_t maxcpus; vmx = malloc(sizeof(struct vmx), M_VMX, M_WAITOK | M_ZERO); - if ((uintptr_t)vmx & PAGE_MASK) { - panic("malloc of struct vmx not aligned on %d byte boundary", - PAGE_SIZE); - } vmx->vm = vm; vmx->eptp = eptp(vtophys((vm_offset_t)pmap->pm_pmltop)); @@ -1056,6 +1074,8 @@ vmx_init(struct vm *vm, pmap_t pmap) */ ept_invalidate_mappings(vmx->eptp); + vmx->msr_bitmap = malloc_aligned(PAGE_SIZE, PAGE_SIZE, M_VMX, + M_WAITOK | M_ZERO); msr_bitmap_initialize(vmx->msr_bitmap); /* @@ -1097,8 +1117,6 @@ vmx_init(struct vm *vm, pmap_t pmap) ((cap_rdpid || cap_rdtscp) && guest_msr_ro(vmx, MSR_TSC_AUX))) panic("vmx_init: error setting guest msr access"); - vpid_alloc(vpid, VM_MAXCPU); - if (virtual_interrupt_delivery) { error = vm_map_mmio(vm, DEFAULT_APIC_BASE, PAGE_SIZE, APIC_ACCESS_ADDRESS); @@ -1106,143 +1124,159 @@ vmx_init(struct vm *vm, pmap_t pmap) KASSERT(error == 0, ("vm_map_mmio(apicbase) error %d", error)); } - maxcpus = vm_get_maxcpus(vm); - for (i = 0; i < maxcpus; i++) { - vmcs = &vmx->vmcs[i]; - vmcs->identifier = vmx_revision(); - error = vmclear(vmcs); - if (error != 0) { - panic("vmx_init: vmclear error %d on vcpu %d\n", - error, i); - } + vmx->pmap = pmap; + return (vmx); +} - vmx_msr_guest_init(vmx, i); +static void * +vmx_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid) +{ + struct vmx *vmx = vmi; + struct vmcs *vmcs; + struct vmx_vcpu *vcpu; + uint32_t exc_bitmap; + uint16_t vpid; + int error; - error = vmcs_init(vmcs); - KASSERT(error == 0, ("vmcs_init error %d", error)); + vpid = vpid_alloc(vcpuid); + + vcpu = malloc(sizeof(*vcpu), M_VMX, M_WAITOK | M_ZERO); + vcpu->vmx = vmx; + vcpu->vcpu = vcpu1; + vcpu->vcpuid = vcpuid; + vcpu->vmcs = malloc_aligned(sizeof(*vmcs), PAGE_SIZE, M_VMX, + M_WAITOK | M_ZERO); + vcpu->apic_page = malloc_aligned(PAGE_SIZE, PAGE_SIZE, M_VMX, + M_WAITOK | M_ZERO); + vcpu->pir_desc = malloc_aligned(sizeof(*vcpu->pir_desc), 64, M_VMX, + M_WAITOK | M_ZERO); + + vmcs = vcpu->vmcs; + vmcs->identifier = vmx_revision(); + error = vmclear(vmcs); + if (error != 0) { + panic("vmx_init: vmclear error %d on vcpu %d\n", + error, vcpuid); + } - VMPTRLD(vmcs); - error = 0; - error += vmwrite(VMCS_HOST_RSP, (u_long)&vmx->ctx[i]); - error += vmwrite(VMCS_EPTP, vmx->eptp); - error += vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls); - error += vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls); - if (vcpu_trap_wbinvd(vm, i)) { - KASSERT(cap_wbinvd_exit, ("WBINVD trap not available")); - procbased_ctls2 |= PROCBASED2_WBINVD_EXITING; - } - error += vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2); - error += vmwrite(VMCS_EXIT_CTLS, exit_ctls); - error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls); - error += vmwrite(VMCS_MSR_BITMAP, vtophys(vmx->msr_bitmap)); - error += vmwrite(VMCS_VPID, vpid[i]); - - if (guest_l1d_flush && !guest_l1d_flush_sw) { - vmcs_write(VMCS_ENTRY_MSR_LOAD, pmap_kextract( - (vm_offset_t)&msr_load_list[0])); - vmcs_write(VMCS_ENTRY_MSR_LOAD_COUNT, - nitems(msr_load_list)); - vmcs_write(VMCS_EXIT_MSR_STORE, 0); - vmcs_write(VMCS_EXIT_MSR_STORE_COUNT, 0); - } + vmx_msr_guest_init(vmx, vcpu); - /* exception bitmap */ - if (vcpu_trace_exceptions(vm, i)) - exc_bitmap = 0xffffffff; - else - exc_bitmap = 1 << IDT_MC; - error += vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap); + error = vmcs_init(vmcs); + KASSERT(error == 0, ("vmcs_init error %d", error)); - vmx->ctx[i].guest_dr6 = DBREG_DR6_RESERVED1; - error += vmwrite(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1); + VMPTRLD(vmcs); + error = 0; + error += vmwrite(VMCS_HOST_RSP, (u_long)&vcpu->ctx); + error += vmwrite(VMCS_EPTP, vmx->eptp); + error += vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls); + error += vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls); + if (vcpu_trap_wbinvd(vcpu->vcpu)) { + KASSERT(cap_wbinvd_exit, ("WBINVD trap not available")); + procbased_ctls2 |= PROCBASED2_WBINVD_EXITING; + } + error += vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2); + error += vmwrite(VMCS_EXIT_CTLS, exit_ctls); + error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls); + error += vmwrite(VMCS_MSR_BITMAP, vtophys(vmx->msr_bitmap)); + error += vmwrite(VMCS_VPID, vpid); + + if (guest_l1d_flush && !guest_l1d_flush_sw) { + vmcs_write(VMCS_ENTRY_MSR_LOAD, pmap_kextract( + (vm_offset_t)&msr_load_list[0])); + vmcs_write(VMCS_ENTRY_MSR_LOAD_COUNT, + nitems(msr_load_list)); + vmcs_write(VMCS_EXIT_MSR_STORE, 0); + vmcs_write(VMCS_EXIT_MSR_STORE_COUNT, 0); + } - if (tpr_shadowing) { - error += vmwrite(VMCS_VIRTUAL_APIC, - vtophys(&vmx->apic_page[i])); - } + /* exception bitmap */ + if (vcpu_trace_exceptions(vcpu->vcpu)) + exc_bitmap = 0xffffffff; + else + exc_bitmap = 1 << IDT_MC; + error += vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap); - if (virtual_interrupt_delivery) { - error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS); - error += vmwrite(VMCS_EOI_EXIT0, 0); - error += vmwrite(VMCS_EOI_EXIT1, 0); - error += vmwrite(VMCS_EOI_EXIT2, 0); - error += vmwrite(VMCS_EOI_EXIT3, 0); - } - if (posted_interrupts) { - error += vmwrite(VMCS_PIR_VECTOR, pirvec); - error += vmwrite(VMCS_PIR_DESC, - vtophys(&vmx->pir_desc[i])); - } - VMCLEAR(vmcs); - KASSERT(error == 0, ("vmx_init: error customizing the vmcs")); + vcpu->ctx.guest_dr6 = DBREG_DR6_RESERVED1; + error += vmwrite(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1); + + if (tpr_shadowing) { + error += vmwrite(VMCS_VIRTUAL_APIC, vtophys(vcpu->apic_page)); + } + + if (virtual_interrupt_delivery) { + error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS); + error += vmwrite(VMCS_EOI_EXIT0, 0); + error += vmwrite(VMCS_EOI_EXIT1, 0); + error += vmwrite(VMCS_EOI_EXIT2, 0); + error += vmwrite(VMCS_EOI_EXIT3, 0); + } + if (posted_interrupts) { + error += vmwrite(VMCS_PIR_VECTOR, pirvec); + error += vmwrite(VMCS_PIR_DESC, vtophys(vcpu->pir_desc)); + } + VMCLEAR(vmcs); + KASSERT(error == 0, ("vmx_init: error customizing the vmcs")); - vmx->cap[i].set = 0; - vmx->cap[i].set |= cap_rdpid != 0 ? 1 << VM_CAP_RDPID : 0; - vmx->cap[i].set |= cap_rdtscp != 0 ? 1 << VM_CAP_RDTSCP : 0; - vmx->cap[i].proc_ctls = procbased_ctls; - vmx->cap[i].proc_ctls2 = procbased_ctls2; - vmx->cap[i].exc_bitmap = exc_bitmap; + vcpu->cap.set = 0; + vcpu->cap.set |= cap_rdpid != 0 ? 1 << VM_CAP_RDPID : 0; + vcpu->cap.set |= cap_rdtscp != 0 ? 1 << VM_CAP_RDTSCP : 0; + vcpu->cap.proc_ctls = procbased_ctls; + vcpu->cap.proc_ctls2 = procbased_ctls2; + vcpu->cap.exc_bitmap = exc_bitmap; - vmx->state[i].nextrip = ~0; - vmx->state[i].lastcpu = NOCPU; - vmx->state[i].vpid = vpid[i]; + vcpu->state.nextrip = ~0; + vcpu->state.lastcpu = NOCPU; + vcpu->state.vpid = vpid; - /* - * Set up the CR0/4 shadows, and init the read shadow - * to the power-on register value from the Intel Sys Arch. - * CR0 - 0x60000010 - * CR4 - 0 - */ - error = vmx_setup_cr0_shadow(vmcs, 0x60000010); - if (error != 0) - panic("vmx_setup_cr0_shadow %d", error); + /* + * Set up the CR0/4 shadows, and init the read shadow + * to the power-on register value from the Intel Sys Arch. + * CR0 - 0x60000010 + * CR4 - 0 + */ + error = vmx_setup_cr0_shadow(vmcs, 0x60000010); + if (error != 0) + panic("vmx_setup_cr0_shadow %d", error); - error = vmx_setup_cr4_shadow(vmcs, 0); - if (error != 0) - panic("vmx_setup_cr4_shadow %d", error); + error = vmx_setup_cr4_shadow(vmcs, 0); + if (error != 0) + panic("vmx_setup_cr4_shadow %d", error); - vmx->ctx[i].pmap = pmap; - } + vcpu->ctx.pmap = vmx->pmap; - return (vmx); + return (vcpu); } static int -vmx_handle_cpuid(struct vm *vm, int vcpu, struct vmxctx *vmxctx) +vmx_handle_cpuid(struct vmx_vcpu *vcpu, struct vmxctx *vmxctx) { int handled; - handled = x86_emulate_cpuid(vm, vcpu, (uint64_t *)&vmxctx->guest_rax, + handled = x86_emulate_cpuid(vcpu->vcpu, (uint64_t *)&vmxctx->guest_rax, (uint64_t *)&vmxctx->guest_rbx, (uint64_t *)&vmxctx->guest_rcx, (uint64_t *)&vmxctx->guest_rdx); return (handled); } static __inline void -vmx_run_trace(struct vmx *vmx, int vcpu) +vmx_run_trace(struct vmx_vcpu *vcpu) { -#ifdef KTR - VCPU_CTR1(vmx->vm, vcpu, "Resume execution at %#lx", vmcs_guest_rip()); -#endif + VMX_CTR1(vcpu, "Resume execution at %#lx", vmcs_guest_rip()); } static __inline void -vmx_exit_trace(struct vmx *vmx, int vcpu, uint64_t rip, uint32_t exit_reason, - int handled) +vmx_exit_trace(struct vmx_vcpu *vcpu, uint64_t rip, uint32_t exit_reason, + int handled) { -#ifdef KTR - VCPU_CTR3(vmx->vm, vcpu, "%s %s vmexit at 0x%0lx", + VMX_CTR3(vcpu, "%s %s vmexit at 0x%0lx", handled ? "handled" : "unhandled", exit_reason_to_str(exit_reason), rip); -#endif } static __inline void -vmx_astpending_trace(struct vmx *vmx, int vcpu, uint64_t rip) +vmx_astpending_trace(struct vmx_vcpu *vcpu, uint64_t rip) { -#ifdef KTR - VCPU_CTR1(vmx->vm, vcpu, "astpending vmexit at 0x%0lx", rip); -#endif + VMX_CTR1(vcpu, "astpending vmexit at 0x%0lx", rip); } static VMM_STAT_INTEL(VCPU_INVVPID_SAVED, "Number of vpid invalidations saved"); @@ -1252,12 +1286,12 @@ static VMM_STAT_INTEL(VCPU_INVVPID_DONE, "Number of vpid invalidations done"); * Invalidate guest mappings identified by its vpid from the TLB. */ static __inline void -vmx_invvpid(struct vmx *vmx, int vcpu, pmap_t pmap, int running) +vmx_invvpid(struct vmx *vmx, struct vmx_vcpu *vcpu, pmap_t pmap, int running) { struct vmxstate *vmxstate; struct invvpid_desc invvpid_desc; - vmxstate = &vmx->state[vcpu]; + vmxstate = &vcpu->state; if (vmxstate->vpid == 0) return; @@ -1273,7 +1307,7 @@ vmx_invvpid(struct vmx *vmx, int vcpu, pmap_t pmap, int running) } KASSERT(curthread->td_critnest > 0, ("%s: vcpu %d running outside " - "critical section", __func__, vcpu)); + "critical section", __func__, vcpu->vcpuid)); /* * Invalidate all mappings tagged with 'vpid' @@ -1296,7 +1330,7 @@ vmx_invvpid(struct vmx *vmx, int vcpu, pmap_t pmap, int running) invvpid_desc.vpid = vmxstate->vpid; invvpid_desc.linear_addr = 0; invvpid(INVVPID_TYPE_SINGLE_CONTEXT, invvpid_desc); - vmm_stat_incr(vmx->vm, vcpu, VCPU_INVVPID_DONE, 1); + vmm_stat_incr(vcpu->vcpu, VCPU_INVVPID_DONE, 1); } else { /* * The invvpid can be skipped if an invept is going to @@ -1304,22 +1338,22 @@ vmx_invvpid(struct vmx *vmx, int vcpu, pmap_t pmap, int running) * will invalidate combined mappings tagged with * 'vmx->eptp' for all vpids. */ - vmm_stat_incr(vmx->vm, vcpu, VCPU_INVVPID_SAVED, 1); + vmm_stat_incr(vcpu->vcpu, VCPU_INVVPID_SAVED, 1); } } static void -vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap) +vmx_set_pcpu_defaults(struct vmx *vmx, struct vmx_vcpu *vcpu, pmap_t pmap) { struct vmxstate *vmxstate; - vmxstate = &vmx->state[vcpu]; + vmxstate = &vcpu->state; if (vmxstate->lastcpu == curcpu) return; vmxstate->lastcpu = curcpu; - vmm_stat_incr(vmx->vm, vcpu, VCPU_MIGRATIONS, 1); + vmm_stat_incr(vcpu->vcpu, VCPU_MIGRATIONS, 1); vmcs_write(VMCS_HOST_TR_BASE, vmm_get_host_trbase()); vmcs_write(VMCS_HOST_GDTR_BASE, vmm_get_host_gdtrbase()); @@ -1333,64 +1367,64 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap) CTASSERT((PROCBASED_CTLS_ONE_SETTING & PROCBASED_INT_WINDOW_EXITING) != 0); static void __inline -vmx_set_int_window_exiting(struct vmx *vmx, int vcpu) +vmx_set_int_window_exiting(struct vmx_vcpu *vcpu) { - if ((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) == 0) { - vmx->cap[vcpu].proc_ctls |= PROCBASED_INT_WINDOW_EXITING; - vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); - VCPU_CTR0(vmx->vm, vcpu, "Enabling interrupt window exiting"); + if ((vcpu->cap.proc_ctls & PROCBASED_INT_WINDOW_EXITING) == 0) { + vcpu->cap.proc_ctls |= PROCBASED_INT_WINDOW_EXITING; + vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vcpu->cap.proc_ctls); + VMX_CTR0(vcpu, "Enabling interrupt window exiting"); } } static void __inline -vmx_clear_int_window_exiting(struct vmx *vmx, int vcpu) +vmx_clear_int_window_exiting(struct vmx_vcpu *vcpu) { - KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0, - ("intr_window_exiting not set: %#x", vmx->cap[vcpu].proc_ctls)); - vmx->cap[vcpu].proc_ctls &= ~PROCBASED_INT_WINDOW_EXITING; - vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); - VCPU_CTR0(vmx->vm, vcpu, "Disabling interrupt window exiting"); + KASSERT((vcpu->cap.proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0, + ("intr_window_exiting not set: %#x", vcpu->cap.proc_ctls)); + vcpu->cap.proc_ctls &= ~PROCBASED_INT_WINDOW_EXITING; + vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vcpu->cap.proc_ctls); + VMX_CTR0(vcpu, "Disabling interrupt window exiting"); } static void __inline -vmx_set_nmi_window_exiting(struct vmx *vmx, int vcpu) +vmx_set_nmi_window_exiting(struct vmx_vcpu *vcpu) { - if ((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) == 0) { - vmx->cap[vcpu].proc_ctls |= PROCBASED_NMI_WINDOW_EXITING; - vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); - VCPU_CTR0(vmx->vm, vcpu, "Enabling NMI window exiting"); + if ((vcpu->cap.proc_ctls & PROCBASED_NMI_WINDOW_EXITING) == 0) { + vcpu->cap.proc_ctls |= PROCBASED_NMI_WINDOW_EXITING; + vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vcpu->cap.proc_ctls); + VMX_CTR0(vcpu, "Enabling NMI window exiting"); } } static void __inline -vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu) +vmx_clear_nmi_window_exiting(struct vmx_vcpu *vcpu) { - KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) != 0, - ("nmi_window_exiting not set %#x", vmx->cap[vcpu].proc_ctls)); - vmx->cap[vcpu].proc_ctls &= ~PROCBASED_NMI_WINDOW_EXITING; - vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); - VCPU_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting"); + KASSERT((vcpu->cap.proc_ctls & PROCBASED_NMI_WINDOW_EXITING) != 0, + ("nmi_window_exiting not set %#x", vcpu->cap.proc_ctls)); + vcpu->cap.proc_ctls &= ~PROCBASED_NMI_WINDOW_EXITING; + vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vcpu->cap.proc_ctls); + VMX_CTR0(vcpu, "Disabling NMI window exiting"); } int -vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset) +vmx_set_tsc_offset(struct vmx_vcpu *vcpu, uint64_t offset) { int error; - if ((vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET) == 0) { - vmx->cap[vcpu].proc_ctls |= PROCBASED_TSC_OFFSET; - vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); - VCPU_CTR0(vmx->vm, vcpu, "Enabling TSC offsetting"); + if ((vcpu->cap.proc_ctls & PROCBASED_TSC_OFFSET) == 0) { + vcpu->cap.proc_ctls |= PROCBASED_TSC_OFFSET; + vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vcpu->cap.proc_ctls); + VMX_CTR0(vcpu, "Enabling TSC offsetting"); } error = vmwrite(VMCS_TSC_OFFSET, offset); #ifdef BHYVE_SNAPSHOT if (error == 0) - error = vm_set_tsc_offset(vmx->vm, vcpu, offset); + vm_set_tsc_offset(vcpu->vcpu, offset); #endif return (error); } @@ -1401,7 +1435,7 @@ vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset) VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING) static void -vmx_inject_nmi(struct vmx *vmx, int vcpu) +vmx_inject_nmi(struct vmx_vcpu *vcpu) { uint32_t gi __diagused, info; @@ -1420,32 +1454,36 @@ vmx_inject_nmi(struct vmx *vmx, int vcpu) info = IDT_NMI | VMCS_INTR_T_NMI | VMCS_INTR_VALID; vmcs_write(VMCS_ENTRY_INTR_INFO, info); - VCPU_CTR0(vmx->vm, vcpu, "Injecting vNMI"); + VMX_CTR0(vcpu, "Injecting vNMI"); /* Clear the request */ - vm_nmi_clear(vmx->vm, vcpu); + vm_nmi_clear(vcpu->vcpu); } static void -vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, +vmx_inject_interrupts(struct vmx_vcpu *vcpu, struct vlapic *vlapic, uint64_t guestrip) { int vector, need_nmi_exiting, extint_pending; uint64_t rflags, entryinfo; uint32_t gi, info; - if (vmx->state[vcpu].nextrip != guestrip) { + if (vcpu->cap.set & (1 << VM_CAP_MASK_HWINTR)) { + return; + } + + if (vcpu->state.nextrip != guestrip) { gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); if (gi & HWINTR_BLOCKING) { - VCPU_CTR2(vmx->vm, vcpu, "Guest interrupt blocking " + VMX_CTR2(vcpu, "Guest interrupt blocking " "cleared due to rip change: %#lx/%#lx", - vmx->state[vcpu].nextrip, guestrip); + vcpu->state.nextrip, guestrip); gi &= ~HWINTR_BLOCKING; vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); } } - if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) { + if (vm_entry_intinfo(vcpu->vcpu, &entryinfo)) { KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry " "intinfo is not valid: %#lx", __func__, entryinfo)); @@ -1470,7 +1508,7 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, vmcs_write(VMCS_ENTRY_INTR_INFO, info); } - if (vm_nmi_pending(vmx->vm, vcpu)) { + if (vm_nmi_pending(vcpu->vcpu)) { /* * If there are no conditions blocking NMI injection then * inject it directly here otherwise enable "NMI window @@ -1487,22 +1525,22 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, if ((gi & (HWINTR_BLOCKING | NMI_BLOCKING)) == 0) { info = vmcs_read(VMCS_ENTRY_INTR_INFO); if ((info & VMCS_INTR_VALID) == 0) { - vmx_inject_nmi(vmx, vcpu); + vmx_inject_nmi(vcpu); need_nmi_exiting = 0; } else { - VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI " + VMX_CTR1(vcpu, "Cannot inject NMI " "due to VM-entry intr info %#x", info); } } else { - VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI due to " + VMX_CTR1(vcpu, "Cannot inject NMI due to " "Guest Interruptibility-state %#x", gi); } if (need_nmi_exiting) - vmx_set_nmi_window_exiting(vmx, vcpu); + vmx_set_nmi_window_exiting(vcpu); } - extint_pending = vm_extint_pending(vmx->vm, vcpu); + extint_pending = vm_extint_pending(vcpu->vcpu); if (!extint_pending && virtual_interrupt_delivery) { vmx_inject_pir(vlapic); @@ -1514,8 +1552,8 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, * checking for pending interrupts. This is just an optimization and * not needed for correctness. */ - if ((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0) { - VCPU_CTR0(vmx->vm, vcpu, "Skip interrupt injection due to " + if ((vcpu->cap.proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0) { + VMX_CTR0(vcpu, "Skip interrupt injection due to " "pending int_window_exiting"); return; } @@ -1535,7 +1573,7 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, ("invalid vector %d from local APIC", vector)); } else { /* Ask the legacy pic for a vector to inject */ - vatpic_pending_intr(vmx->vm, &vector); + vatpic_pending_intr(vcpu->vmx->vm, &vector); /* * From the Intel SDM, Volume 3, Section "Maskable @@ -1550,14 +1588,14 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, /* Check RFLAGS.IF and the interruptibility state of the guest */ rflags = vmcs_read(VMCS_GUEST_RFLAGS); if ((rflags & PSL_I) == 0) { - VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to " + VMX_CTR2(vcpu, "Cannot inject vector %d due to " "rflags %#lx", vector, rflags); goto cantinject; } gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); if (gi & HWINTR_BLOCKING) { - VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to " + VMX_CTR2(vcpu, "Cannot inject vector %d due to " "Guest Interruptibility-state %#x", vector, gi); goto cantinject; } @@ -1571,7 +1609,7 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, * - An exception was injected above. * - An NMI was injected above or after "NMI window exiting" */ - VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to " + VMX_CTR2(vcpu, "Cannot inject vector %d due to " "VM-entry intr info %#x", vector, info); goto cantinject; } @@ -1585,8 +1623,8 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, /* Update the Local APIC ISR */ vlapic_intr_accepted(vlapic, vector); } else { - vm_extint_clear(vmx->vm, vcpu); - vatpic_intr_accepted(vmx->vm, vector); + vm_extint_clear(vcpu->vcpu); + vatpic_intr_accepted(vcpu->vmx->vm, vector); /* * After we accepted the current ExtINT the PIC may @@ -1599,10 +1637,10 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, * as soon as possible. This applies both for the software * emulated vlapic and the hardware assisted virtual APIC. */ - vmx_set_int_window_exiting(vmx, vcpu); + vmx_set_int_window_exiting(vcpu); } - VCPU_CTR1(vmx->vm, vcpu, "Injecting hwintr at vector %d", vector); + VMX_CTR1(vcpu, "Injecting hwintr at vector %d", vector); return; @@ -1611,7 +1649,7 @@ cantinject: * Set the Interrupt Window Exiting execution control so we can inject * the interrupt as soon as blocking condition goes away. */ - vmx_set_int_window_exiting(vmx, vcpu); + vmx_set_int_window_exiting(vcpu); } /* @@ -1624,29 +1662,29 @@ cantinject: * hypervisor needs to restore virtual-NMI blocking before resuming the guest. */ static void -vmx_restore_nmi_blocking(struct vmx *vmx, int vcpuid) +vmx_restore_nmi_blocking(struct vmx_vcpu *vcpu) { uint32_t gi; - VCPU_CTR0(vmx->vm, vcpuid, "Restore Virtual-NMI blocking"); + VMX_CTR0(vcpu, "Restore Virtual-NMI blocking"); gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); gi |= VMCS_INTERRUPTIBILITY_NMI_BLOCKING; vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); } static void -vmx_clear_nmi_blocking(struct vmx *vmx, int vcpuid) +vmx_clear_nmi_blocking(struct vmx_vcpu *vcpu) { uint32_t gi; - VCPU_CTR0(vmx->vm, vcpuid, "Clear Virtual-NMI blocking"); + VMX_CTR0(vcpu, "Clear Virtual-NMI blocking"); gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); gi &= ~VMCS_INTERRUPTIBILITY_NMI_BLOCKING; vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); } static void -vmx_assert_nmi_blocking(struct vmx *vmx, int vcpuid) +vmx_assert_nmi_blocking(struct vmx_vcpu *vcpu) { uint32_t gi __diagused; @@ -1656,13 +1694,14 @@ vmx_assert_nmi_blocking(struct vmx *vmx, int vcpuid) } static int -vmx_emulate_xsetbv(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) +vmx_emulate_xsetbv(struct vmx *vmx, struct vmx_vcpu *vcpu, + struct vm_exit *vmexit) { struct vmxctx *vmxctx; uint64_t xcrval; const struct xsave_limits *limits; - vmxctx = &vmx->ctx[vcpu]; + vmxctx = &vcpu->ctx; limits = vmm_get_xsave_limits(); /* @@ -1673,31 +1712,31 @@ vmx_emulate_xsetbv(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) /* Only xcr0 is supported. */ if (vmxctx->guest_rcx != 0) { - vm_inject_gp(vmx->vm, vcpu); + vm_inject_gp(vcpu->vcpu); return (HANDLED); } /* We only handle xcr0 if both the host and guest have XSAVE enabled. */ if (!limits->xsave_enabled || !(vmcs_read(VMCS_GUEST_CR4) & CR4_XSAVE)) { - vm_inject_ud(vmx->vm, vcpu); + vm_inject_ud(vcpu->vcpu); return (HANDLED); } xcrval = vmxctx->guest_rdx << 32 | (vmxctx->guest_rax & 0xffffffff); if ((xcrval & ~limits->xcr0_allowed) != 0) { - vm_inject_gp(vmx->vm, vcpu); + vm_inject_gp(vcpu->vcpu); return (HANDLED); } if (!(xcrval & XFEATURE_ENABLED_X87)) { - vm_inject_gp(vmx->vm, vcpu); + vm_inject_gp(vcpu->vcpu); return (HANDLED); } /* AVX (YMM_Hi128) requires SSE. */ if (xcrval & XFEATURE_ENABLED_AVX && (xcrval & XFEATURE_AVX) != XFEATURE_AVX) { - vm_inject_gp(vmx->vm, vcpu); + vm_inject_gp(vcpu->vcpu); return (HANDLED); } @@ -1708,7 +1747,7 @@ vmx_emulate_xsetbv(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) if (xcrval & XFEATURE_AVX512 && (xcrval & (XFEATURE_AVX512 | XFEATURE_AVX)) != (XFEATURE_AVX512 | XFEATURE_AVX)) { - vm_inject_gp(vmx->vm, vcpu); + vm_inject_gp(vcpu->vcpu); return (HANDLED); } @@ -1718,7 +1757,7 @@ vmx_emulate_xsetbv(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) */ if (((xcrval & XFEATURE_ENABLED_BNDREGS) != 0) != ((xcrval & XFEATURE_ENABLED_BNDCSR) != 0)) { - vm_inject_gp(vmx->vm, vcpu); + vm_inject_gp(vcpu->vcpu); return (HANDLED); } @@ -1732,11 +1771,11 @@ vmx_emulate_xsetbv(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) } static uint64_t -vmx_get_guest_reg(struct vmx *vmx, int vcpu, int ident) +vmx_get_guest_reg(struct vmx_vcpu *vcpu, int ident) { const struct vmxctx *vmxctx; - vmxctx = &vmx->ctx[vcpu]; + vmxctx = &vcpu->ctx; switch (ident) { case 0: @@ -1777,11 +1816,11 @@ vmx_get_guest_reg(struct vmx *vmx, int vcpu, int ident) } static void -vmx_set_guest_reg(struct vmx *vmx, int vcpu, int ident, uint64_t regval) +vmx_set_guest_reg(struct vmx_vcpu *vcpu, int ident, uint64_t regval) { struct vmxctx *vmxctx; - vmxctx = &vmx->ctx[vcpu]; + vmxctx = &vcpu->ctx; switch (ident) { case 0: @@ -1838,7 +1877,7 @@ vmx_set_guest_reg(struct vmx *vmx, int vcpu, int ident, uint64_t regval) } static int -vmx_emulate_cr0_access(struct vmx *vmx, int vcpu, uint64_t exitqual) +vmx_emulate_cr0_access(struct vmx_vcpu *vcpu, uint64_t exitqual) { uint64_t crval, regval; @@ -1846,7 +1885,7 @@ vmx_emulate_cr0_access(struct vmx *vmx, int vcpu, uint64_t exitqual) if ((exitqual & 0xf0) != 0x00) return (UNHANDLED); - regval = vmx_get_guest_reg(vmx, vcpu, (exitqual >> 8) & 0xf); + regval = vmx_get_guest_reg(vcpu, (exitqual >> 8) & 0xf); vmcs_write(VMCS_CR0_SHADOW, regval); @@ -1876,7 +1915,7 @@ vmx_emulate_cr0_access(struct vmx *vmx, int vcpu, uint64_t exitqual) } static int -vmx_emulate_cr4_access(struct vmx *vmx, int vcpu, uint64_t exitqual) +vmx_emulate_cr4_access(struct vmx_vcpu *vcpu, uint64_t exitqual) { uint64_t crval, regval; @@ -1884,7 +1923,7 @@ vmx_emulate_cr4_access(struct vmx *vmx, int vcpu, uint64_t exitqual) if ((exitqual & 0xf0) != 0x00) return (UNHANDLED); - regval = vmx_get_guest_reg(vmx, vcpu, (exitqual >> 8) & 0xf); + regval = vmx_get_guest_reg(vcpu, (exitqual >> 8) & 0xf); vmcs_write(VMCS_CR4_SHADOW, regval); @@ -1896,7 +1935,8 @@ vmx_emulate_cr4_access(struct vmx *vmx, int vcpu, uint64_t exitqual) } static int -vmx_emulate_cr8_access(struct vmx *vmx, int vcpu, uint64_t exitqual) +vmx_emulate_cr8_access(struct vmx *vmx, struct vmx_vcpu *vcpu, + uint64_t exitqual) { struct vlapic *vlapic; uint64_t cr8; @@ -1907,13 +1947,13 @@ vmx_emulate_cr8_access(struct vmx *vmx, int vcpu, uint64_t exitqual) return (UNHANDLED); } - vlapic = vm_lapic(vmx->vm, vcpu); + vlapic = vm_lapic(vcpu->vcpu); regnum = (exitqual >> 8) & 0xf; if (exitqual & 0x10) { cr8 = vlapic_get_cr8(vlapic); - vmx_set_guest_reg(vmx, vcpu, regnum, cr8); + vmx_set_guest_reg(vcpu, regnum, cr8); } else { - cr8 = vmx_get_guest_reg(vmx, vcpu, regnum); + cr8 = vmx_get_guest_reg(vcpu, regnum); vlapic_set_cr8(vlapic, cr8); } @@ -1969,26 +2009,26 @@ vmx_paging_mode(void) } static uint64_t -inout_str_index(struct vmx *vmx, int vcpuid, int in) +inout_str_index(struct vmx_vcpu *vcpu, int in) { uint64_t val; int error __diagused; enum vm_reg_name reg; reg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI; - error = vmx_getreg(vmx, vcpuid, reg, &val); + error = vmx_getreg(vcpu, reg, &val); KASSERT(error == 0, ("%s: vmx_getreg error %d", __func__, error)); return (val); } static uint64_t -inout_str_count(struct vmx *vmx, int vcpuid, int rep) +inout_str_count(struct vmx_vcpu *vcpu, int rep) { uint64_t val; int error __diagused; if (rep) { - error = vmx_getreg(vmx, vcpuid, VM_REG_GUEST_RCX, &val); + error = vmx_getreg(vcpu, VM_REG_GUEST_RCX, &val); KASSERT(!error, ("%s: vmx_getreg error %d", __func__, error)); } else { val = 1; @@ -2015,7 +2055,7 @@ inout_str_addrsize(uint32_t inst_info) } static void -inout_str_seginfo(struct vmx *vmx, int vcpuid, uint32_t inst_info, int in, +inout_str_seginfo(struct vmx_vcpu *vcpu, uint32_t inst_info, int in, struct vm_inout_str *vis) { int error __diagused, s; @@ -2027,7 +2067,7 @@ inout_str_seginfo(struct vmx *vmx, int vcpuid, uint32_t inst_info, int in, vis->seg_name = vm_segment_name(s); } - error = vmx_getdesc(vmx, vcpuid, vis->seg_name, &vis->seg_desc); + error = vmx_getdesc(vcpu, vis->seg_name, &vis->seg_desc); KASSERT(error == 0, ("%s: vmx_getdesc error %d", __func__, error)); } @@ -2116,25 +2156,25 @@ ept_emulation_fault(uint64_t ept_qual) } static __inline int -apic_access_virtualization(struct vmx *vmx, int vcpuid) +apic_access_virtualization(struct vmx_vcpu *vcpu) { uint32_t proc_ctls2; - proc_ctls2 = vmx->cap[vcpuid].proc_ctls2; + proc_ctls2 = vcpu->cap.proc_ctls2; return ((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) ? 1 : 0); } static __inline int -x2apic_virtualization(struct vmx *vmx, int vcpuid) +x2apic_virtualization(struct vmx_vcpu *vcpu) { uint32_t proc_ctls2; - proc_ctls2 = vmx->cap[vcpuid].proc_ctls2; + proc_ctls2 = vcpu->cap.proc_ctls2; return ((proc_ctls2 & PROCBASED2_VIRTUALIZE_X2APIC_MODE) ? 1 : 0); } static int -vmx_handle_apic_write(struct vmx *vmx, int vcpuid, struct vlapic *vlapic, +vmx_handle_apic_write(struct vmx_vcpu *vcpu, struct vlapic *vlapic, uint64_t qual) { int error, handled, offset; @@ -2144,7 +2184,7 @@ vmx_handle_apic_write(struct vmx *vmx, int vcpuid, struct vlapic *vlapic, handled = HANDLED; offset = APIC_WRITE_OFFSET(qual); - if (!apic_access_virtualization(vmx, vcpuid)) { + if (!apic_access_virtualization(vcpu)) { /* * In general there should not be any APIC write VM-exits * unless APIC-access virtualization is enabled. @@ -2152,7 +2192,7 @@ vmx_handle_apic_write(struct vmx *vmx, int vcpuid, struct vlapic *vlapic, * However self-IPI virtualization can legitimately trigger * an APIC-write VM-exit so treat it specially. */ - if (x2apic_virtualization(vmx, vcpuid) && + if (x2apic_virtualization(vcpu) && offset == APIC_OFFSET_SELF_IPI) { apic_regs = (uint32_t *)(vlapic->apic_page); vector = apic_regs[APIC_OFFSET_SELF_IPI / 4]; @@ -2202,10 +2242,10 @@ vmx_handle_apic_write(struct vmx *vmx, int vcpuid, struct vlapic *vlapic, } static bool -apic_access_fault(struct vmx *vmx, int vcpuid, uint64_t gpa) +apic_access_fault(struct vmx_vcpu *vcpu, uint64_t gpa) { - if (apic_access_virtualization(vmx, vcpuid) && + if (apic_access_virtualization(vcpu) && (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE)) return (true); else @@ -2213,12 +2253,12 @@ apic_access_fault(struct vmx *vmx, int vcpuid, uint64_t gpa) } static int -vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit) +vmx_handle_apic_access(struct vmx_vcpu *vcpu, struct vm_exit *vmexit) { uint64_t qual; int access_type, offset, allowed; - if (!apic_access_virtualization(vmx, vcpuid)) + if (!apic_access_virtualization(vcpu)) return (UNHANDLED); qual = vmexit->u.vmx.exit_qualification; @@ -2299,20 +2339,20 @@ vmx_task_switch_reason(uint64_t qual) } static int -emulate_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) +emulate_wrmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t val, bool *retu) { int error; if (lapic_msr(num)) - error = lapic_wrmsr(vmx->vm, vcpuid, num, val, retu); + error = lapic_wrmsr(vcpu->vcpu, num, val, retu); else - error = vmx_wrmsr(vmx, vcpuid, num, val, retu); + error = vmx_wrmsr(vcpu, num, val, retu); return (error); } static int -emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu) +emulate_rdmsr(struct vmx_vcpu *vcpu, u_int num, bool *retu) { struct vmxctx *vmxctx; uint64_t result; @@ -2320,13 +2360,13 @@ emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu) int error; if (lapic_msr(num)) - error = lapic_rdmsr(vmx->vm, vcpuid, num, &result, retu); + error = lapic_rdmsr(vcpu->vcpu, num, &result, retu); else - error = vmx_rdmsr(vmx, vcpuid, num, &result, retu); + error = vmx_rdmsr(vcpu, num, &result, retu); if (error == 0) { eax = result; - vmxctx = &vmx->ctx[vcpuid]; + vmxctx = &vcpu->ctx; error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RAX, eax); KASSERT(error == 0, ("vmxctx_setreg(rax) error %d", error)); @@ -2339,7 +2379,7 @@ emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu) } static int -vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) +vmx_exit_process(struct vmx *vmx, struct vmx_vcpu *vcpu, struct vm_exit *vmexit) { int error, errcode, errcode_valid, handled, in; struct vmxctx *vmxctx; @@ -2349,20 +2389,26 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info; uint32_t intr_type, intr_vec, reason; uint64_t exitintinfo, qual, gpa; +#ifdef KDTRACE_HOOKS + int vcpuid; +#endif bool retu; CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_VIRTUAL_NMI) != 0); CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_NMI_EXITING) != 0); handled = UNHANDLED; - vmxctx = &vmx->ctx[vcpu]; + vmxctx = &vcpu->ctx; +#ifdef KDTRACE_HOOKS + vcpuid = vcpu->vcpuid; +#endif qual = vmexit->u.vmx.exit_qualification; reason = vmexit->u.vmx.exit_reason; vmexit->exitcode = VM_EXITCODE_BOGUS; - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_COUNT, 1); - SDT_PROBE3(vmm, vmx, exit, entry, vmx, vcpu, vmexit); + vmm_stat_incr(vcpu->vcpu, VMEXIT_COUNT, 1); + SDT_PROBE3(vmm, vmx, exit, entry, vmx, vcpuid, vmexit); /* * VM-entry failures during or after loading guest state. @@ -2371,7 +2417,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) * as most VM-exit fields are not populated as usual. */ if (__predict_false(reason == EXIT_REASON_MCE_DURING_ENTRY)) { - VCPU_CTR0(vmx->vm, vcpu, "Handling MCE during VM-entry"); + VMX_CTR0(vcpu, "Handling MCE during VM-entry"); __asm __volatile("int $18"); return (1); } @@ -2392,7 +2438,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) idtvec_err = vmcs_idt_vectoring_err(); exitintinfo |= (uint64_t)idtvec_err << 32; } - error = vm_exit_intinfo(vmx->vm, vcpu, exitintinfo); + error = vm_exit_intinfo(vcpu->vcpu, exitintinfo); KASSERT(error == 0, ("%s: vm_set_intinfo error %d", __func__, error)); @@ -2410,9 +2456,9 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) intr_type = idtvec_info & VMCS_INTR_T_MASK; if (intr_type == VMCS_INTR_T_NMI) { if (reason != EXIT_REASON_TASK_SWITCH) - vmx_clear_nmi_blocking(vmx, vcpu); + vmx_clear_nmi_blocking(vcpu); else - vmx_assert_nmi_blocking(vmx, vcpu); + vmx_assert_nmi_blocking(vcpu); } /* @@ -2465,21 +2511,21 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) } } vmexit->exitcode = VM_EXITCODE_TASK_SWITCH; - SDT_PROBE4(vmm, vmx, exit, taskswitch, vmx, vcpu, vmexit, ts); - VCPU_CTR4(vmx->vm, vcpu, "task switch reason %d, tss 0x%04x, " + SDT_PROBE4(vmm, vmx, exit, taskswitch, vmx, vcpuid, vmexit, ts); + VMX_CTR4(vcpu, "task switch reason %d, tss 0x%04x, " "%s errcode 0x%016lx", ts->reason, ts->tsssel, ts->ext ? "external" : "internal", ((uint64_t)ts->errcode << 32) | ts->errcode_valid); break; case EXIT_REASON_CR_ACCESS: - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CR_ACCESS, 1); - SDT_PROBE4(vmm, vmx, exit, craccess, vmx, vcpu, vmexit, qual); + vmm_stat_incr(vcpu->vcpu, VMEXIT_CR_ACCESS, 1); + SDT_PROBE4(vmm, vmx, exit, craccess, vmx, vcpuid, vmexit, qual); switch (qual & 0xf) { case 0: - handled = vmx_emulate_cr0_access(vmx, vcpu, qual); + handled = vmx_emulate_cr0_access(vcpu, qual); break; case 4: - handled = vmx_emulate_cr4_access(vmx, vcpu, qual); + handled = vmx_emulate_cr4_access(vcpu, qual); break; case 8: handled = vmx_emulate_cr8_access(vmx, vcpu, qual); @@ -2487,12 +2533,12 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) } break; case EXIT_REASON_RDMSR: - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RDMSR, 1); + vmm_stat_incr(vcpu->vcpu, VMEXIT_RDMSR, 1); retu = false; ecx = vmxctx->guest_rcx; - VCPU_CTR1(vmx->vm, vcpu, "rdmsr 0x%08x", ecx); - SDT_PROBE4(vmm, vmx, exit, rdmsr, vmx, vcpu, vmexit, ecx); - error = emulate_rdmsr(vmx, vcpu, ecx, &retu); + VMX_CTR1(vcpu, "rdmsr 0x%08x", ecx); + SDT_PROBE4(vmm, vmx, exit, rdmsr, vmx, vcpuid, vmexit, ecx); + error = emulate_rdmsr(vcpu, ecx, &retu); if (error) { vmexit->exitcode = VM_EXITCODE_RDMSR; vmexit->u.msr.code = ecx; @@ -2505,17 +2551,17 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) } break; case EXIT_REASON_WRMSR: - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_WRMSR, 1); + vmm_stat_incr(vcpu->vcpu, VMEXIT_WRMSR, 1); retu = false; eax = vmxctx->guest_rax; ecx = vmxctx->guest_rcx; edx = vmxctx->guest_rdx; - VCPU_CTR2(vmx->vm, vcpu, "wrmsr 0x%08x value 0x%016lx", + VMX_CTR2(vcpu, "wrmsr 0x%08x value 0x%016lx", ecx, (uint64_t)edx << 32 | eax); - SDT_PROBE5(vmm, vmx, exit, wrmsr, vmx, vmexit, vcpu, ecx, + SDT_PROBE5(vmm, vmx, exit, wrmsr, vmx, vmexit, vcpuid, ecx, (uint64_t)edx << 32 | eax); - error = emulate_wrmsr(vmx, vcpu, ecx, - (uint64_t)edx << 32 | eax, &retu); + error = emulate_wrmsr(vcpu, ecx, (uint64_t)edx << 32 | eax, + &retu); if (error) { vmexit->exitcode = VM_EXITCODE_WRMSR; vmexit->u.msr.code = ecx; @@ -2529,8 +2575,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) } break; case EXIT_REASON_HLT: - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1); - SDT_PROBE3(vmm, vmx, exit, halt, vmx, vcpu, vmexit); + vmm_stat_incr(vcpu->vcpu, VMEXIT_HLT, 1); + SDT_PROBE3(vmm, vmx, exit, halt, vmx, vcpuid, vmexit); vmexit->exitcode = VM_EXITCODE_HLT; vmexit->u.hlt.rflags = vmcs_read(VMCS_GUEST_RFLAGS); if (virtual_interrupt_delivery) @@ -2540,20 +2586,20 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) vmexit->u.hlt.intr_status = 0; break; case EXIT_REASON_MTF: - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1); - SDT_PROBE3(vmm, vmx, exit, mtrap, vmx, vcpu, vmexit); + vmm_stat_incr(vcpu->vcpu, VMEXIT_MTRAP, 1); + SDT_PROBE3(vmm, vmx, exit, mtrap, vmx, vcpuid, vmexit); vmexit->exitcode = VM_EXITCODE_MTRAP; vmexit->inst_length = 0; break; case EXIT_REASON_PAUSE: - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_PAUSE, 1); - SDT_PROBE3(vmm, vmx, exit, pause, vmx, vcpu, vmexit); + vmm_stat_incr(vcpu->vcpu, VMEXIT_PAUSE, 1); + SDT_PROBE3(vmm, vmx, exit, pause, vmx, vcpuid, vmexit); vmexit->exitcode = VM_EXITCODE_PAUSE; break; case EXIT_REASON_INTR_WINDOW: - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INTR_WINDOW, 1); - SDT_PROBE3(vmm, vmx, exit, intrwindow, vmx, vcpu, vmexit); - vmx_clear_int_window_exiting(vmx, vcpu); + vmm_stat_incr(vcpu->vcpu, VMEXIT_INTR_WINDOW, 1); + SDT_PROBE3(vmm, vmx, exit, intrwindow, vmx, vcpuid, vmexit); + vmx_clear_int_window_exiting(vcpu); return (1); case EXIT_REASON_EXT_INTR: /* @@ -2567,7 +2613,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) */ intr_info = vmcs_read(VMCS_EXIT_INTR_INFO); SDT_PROBE4(vmm, vmx, exit, interrupt, - vmx, vcpu, vmexit, intr_info); + vmx, vcpuid, vmexit, intr_info); /* * XXX: Ignore this exit if VMCS_INTR_VALID is not set. @@ -2584,18 +2630,18 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) * This is special. We want to treat this as an 'handled' * VM-exit but not increment the instruction pointer. */ - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXTINT, 1); + vmm_stat_incr(vcpu->vcpu, VMEXIT_EXTINT, 1); return (1); case EXIT_REASON_NMI_WINDOW: - SDT_PROBE3(vmm, vmx, exit, nmiwindow, vmx, vcpu, vmexit); + SDT_PROBE3(vmm, vmx, exit, nmiwindow, vmx, vcpuid, vmexit); /* Exit to allow the pending virtual NMI to be injected */ - if (vm_nmi_pending(vmx->vm, vcpu)) - vmx_inject_nmi(vmx, vcpu); - vmx_clear_nmi_window_exiting(vmx, vcpu); - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NMI_WINDOW, 1); + if (vm_nmi_pending(vcpu->vcpu)) + vmx_inject_nmi(vcpu); + vmx_clear_nmi_window_exiting(vcpu); + vmm_stat_incr(vcpu->vcpu, VMEXIT_NMI_WINDOW, 1); return (1); case EXIT_REASON_INOUT: - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INOUT, 1); + vmm_stat_incr(vcpu->vcpu, VMEXIT_INOUT, 1); vmexit->exitcode = VM_EXITCODE_INOUT; vmexit->u.inout.bytes = (qual & 0x7) + 1; vmexit->u.inout.in = in = (qual & 0x8) ? 1 : 0; @@ -2610,20 +2656,22 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) vmx_paging_info(&vis->paging); vis->rflags = vmcs_read(VMCS_GUEST_RFLAGS); vis->cr0 = vmcs_read(VMCS_GUEST_CR0); - vis->index = inout_str_index(vmx, vcpu, in); - vis->count = inout_str_count(vmx, vcpu, vis->inout.rep); + vis->index = inout_str_index(vcpu, in); + vis->count = inout_str_count(vcpu, vis->inout.rep); vis->addrsize = inout_str_addrsize(inst_info); - inout_str_seginfo(vmx, vcpu, inst_info, in, vis); + vis->cs_d = 0; + vis->cs_base = 0; + inout_str_seginfo(vcpu, inst_info, in, vis); } - SDT_PROBE3(vmm, vmx, exit, inout, vmx, vcpu, vmexit); + SDT_PROBE3(vmm, vmx, exit, inout, vmx, vcpuid, vmexit); break; case EXIT_REASON_CPUID: - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CPUID, 1); - SDT_PROBE3(vmm, vmx, exit, cpuid, vmx, vcpu, vmexit); - handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx); + vmm_stat_incr(vcpu->vcpu, VMEXIT_CPUID, 1); + SDT_PROBE3(vmm, vmx, exit, cpuid, vmx, vcpuid, vmexit); + handled = vmx_handle_cpuid(vcpu, vmxctx); break; case EXIT_REASON_EXCEPTION: - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXCEPTION, 1); + vmm_stat_incr(vcpu->vcpu, VMEXIT_EXCEPTION, 1); intr_info = vmcs_read(VMCS_EXIT_INTR_INFO); KASSERT((intr_info & VMCS_INTR_VALID) != 0, ("VM exit interruption info invalid: %#x", intr_info)); @@ -2643,7 +2691,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 && (intr_vec != IDT_DF) && (intr_info & EXIT_QUAL_NMIUDTI) != 0) - vmx_restore_nmi_blocking(vmx, vcpu); + vmx_restore_nmi_blocking(vcpu); /* * The NMI has already been handled in vmx_exit_handle_nmi(). @@ -2656,7 +2704,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) * the machine check back into the guest. */ if (intr_vec == IDT_MC) { - VCPU_CTR0(vmx->vm, vcpu, "Vectoring to MCE handler"); + VMX_CTR0(vcpu, "Vectoring to MCE handler"); __asm __volatile("int $18"); return (1); } @@ -2666,7 +2714,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) * debug exceptions, bounce them out to userland. */ if (intr_type == VMCS_INTR_T_SWEXCEPTION && intr_vec == IDT_BP && - (vmx->cap[vcpu].set & (1 << VM_CAP_BPT_EXIT))) { + (vcpu->cap.set & (1 << VM_CAP_BPT_EXIT))) { vmexit->exitcode = VM_EXITCODE_BPT; vmexit->u.bpt.inst_length = vmexit->inst_length; vmexit->inst_length = 0; @@ -2694,11 +2742,11 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) errcode_valid = 1; errcode = vmcs_read(VMCS_EXIT_INTR_ERRCODE); } - VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%#x into " + VMX_CTR2(vcpu, "Reflecting exception %d/%#x into " "the guest", intr_vec, errcode); SDT_PROBE5(vmm, vmx, exit, exception, - vmx, vcpu, vmexit, intr_vec, errcode); - error = vm_inject_exception(vmx->vm, vcpu, intr_vec, + vmx, vcpuid, vmexit, intr_vec, errcode); + error = vm_inject_exception(vcpu->vcpu, intr_vec, errcode_valid, errcode, 0); KASSERT(error == 0, ("%s: vm_inject_exception error %d", __func__, error)); @@ -2711,20 +2759,20 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) * this must be an instruction that accesses MMIO space. */ gpa = vmcs_gpa(); - if (vm_mem_allocated(vmx->vm, vcpu, gpa) || - apic_access_fault(vmx, vcpu, gpa)) { + if (vm_mem_allocated(vcpu->vcpu, gpa) || + ppt_is_mmio(vmx->vm, gpa) || apic_access_fault(vcpu, gpa)) { vmexit->exitcode = VM_EXITCODE_PAGING; vmexit->inst_length = 0; vmexit->u.paging.gpa = gpa; vmexit->u.paging.fault_type = ept_fault_type(qual); - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1); + vmm_stat_incr(vcpu->vcpu, VMEXIT_NESTED_FAULT, 1); SDT_PROBE5(vmm, vmx, exit, nestedfault, - vmx, vcpu, vmexit, gpa, qual); + vmx, vcpuid, vmexit, gpa, qual); } else if (ept_emulation_fault(qual)) { vmexit_inst_emul(vmexit, gpa, vmcs_gla()); - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INST_EMUL, 1); + vmm_stat_incr(vcpu->vcpu, VMEXIT_INST_EMUL, 1); SDT_PROBE4(vmm, vmx, exit, mmiofault, - vmx, vcpu, vmexit, gpa); + vmx, vcpuid, vmexit, gpa); } /* * If Virtual NMIs control is 1 and the VM-exit is due to an @@ -2736,17 +2784,17 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) */ if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 && (qual & EXIT_QUAL_NMIUDTI) != 0) - vmx_restore_nmi_blocking(vmx, vcpu); + vmx_restore_nmi_blocking(vcpu); break; case EXIT_REASON_VIRTUALIZED_EOI: vmexit->exitcode = VM_EXITCODE_IOAPIC_EOI; vmexit->u.ioapic_eoi.vector = qual & 0xFF; - SDT_PROBE3(vmm, vmx, exit, eoi, vmx, vcpu, vmexit); + SDT_PROBE3(vmm, vmx, exit, eoi, vmx, vcpuid, vmexit); vmexit->inst_length = 0; /* trap-like */ break; case EXIT_REASON_APIC_ACCESS: - SDT_PROBE3(vmm, vmx, exit, apicaccess, vmx, vcpu, vmexit); - handled = vmx_handle_apic_access(vmx, vcpu, vmexit); + SDT_PROBE3(vmm, vmx, exit, apicaccess, vmx, vcpuid, vmexit); + handled = vmx_handle_apic_access(vcpu, vmexit); break; case EXIT_REASON_APIC_WRITE: /* @@ -2754,25 +2802,25 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) * pointing to the next instruction. */ vmexit->inst_length = 0; - vlapic = vm_lapic(vmx->vm, vcpu); + vlapic = vm_lapic(vcpu->vcpu); SDT_PROBE4(vmm, vmx, exit, apicwrite, - vmx, vcpu, vmexit, vlapic); - handled = vmx_handle_apic_write(vmx, vcpu, vlapic, qual); + vmx, vcpuid, vmexit, vlapic); + handled = vmx_handle_apic_write(vcpu, vlapic, qual); break; case EXIT_REASON_XSETBV: - SDT_PROBE3(vmm, vmx, exit, xsetbv, vmx, vcpu, vmexit); + SDT_PROBE3(vmm, vmx, exit, xsetbv, vmx, vcpuid, vmexit); handled = vmx_emulate_xsetbv(vmx, vcpu, vmexit); break; case EXIT_REASON_MONITOR: - SDT_PROBE3(vmm, vmx, exit, monitor, vmx, vcpu, vmexit); + SDT_PROBE3(vmm, vmx, exit, monitor, vmx, vcpuid, vmexit); vmexit->exitcode = VM_EXITCODE_MONITOR; break; case EXIT_REASON_MWAIT: - SDT_PROBE3(vmm, vmx, exit, mwait, vmx, vcpu, vmexit); + SDT_PROBE3(vmm, vmx, exit, mwait, vmx, vcpuid, vmexit); vmexit->exitcode = VM_EXITCODE_MWAIT; break; case EXIT_REASON_TPR: - vlapic = vm_lapic(vmx->vm, vcpu); + vlapic = vm_lapic(vcpu->vcpu); vlapic_sync_tpr(vlapic); vmexit->inst_length = 0; handled = HANDLED; @@ -2787,17 +2835,18 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) case EXIT_REASON_VMWRITE: case EXIT_REASON_VMXOFF: case EXIT_REASON_VMXON: - SDT_PROBE3(vmm, vmx, exit, vminsn, vmx, vcpu, vmexit); + SDT_PROBE3(vmm, vmx, exit, vminsn, vmx, vcpuid, vmexit); vmexit->exitcode = VM_EXITCODE_VMINSN; break; + case EXIT_REASON_INVD: case EXIT_REASON_WBINVD: - /* ignore WBINVD */ + /* ignore exit */ handled = HANDLED; break; default: SDT_PROBE4(vmm, vmx, exit, unknown, - vmx, vcpu, vmexit, reason); - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1); + vmx, vcpuid, vmexit, reason); + vmm_stat_incr(vcpu->vcpu, VMEXIT_UNKNOWN, 1); break; } @@ -2834,7 +2883,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) } SDT_PROBE4(vmm, vmx, exit, return, - vmx, vcpu, vmexit, handled); + vmx, vcpuid, vmexit, handled); return (handled); } @@ -2872,7 +2921,7 @@ vmx_exit_inst_error(struct vmxctx *vmxctx, int rc, struct vm_exit *vmexit) * clear NMI blocking. */ static __inline void -vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit) +vmx_exit_handle_nmi(struct vmx_vcpu *vcpu, struct vm_exit *vmexit) { uint32_t intr_info; @@ -2888,7 +2937,7 @@ vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit) if ((intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_NMI) { KASSERT((intr_info & 0xff) == IDT_NMI, ("VM exit due " "to NMI has invalid vector: %#x", intr_info)); - VCPU_CTR0(vmx->vm, vcpuid, "Vectoring to NMI handler"); + VMX_CTR0(vcpu, "Vectoring to NMI handler"); __asm __volatile("int $2"); } } @@ -2986,12 +3035,11 @@ vmx_pmap_deactivate(struct vmx *vmx, pmap_t pmap) } static int -vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, - struct vm_eventinfo *evinfo) +vmx_run(void *vcpui, register_t rip, pmap_t pmap, struct vm_eventinfo *evinfo) { int rc, handled, launched; struct vmx *vmx; - struct vm *vm; + struct vmx_vcpu *vcpu; struct vmxctx *vmxctx; struct vmcs *vmcs; struct vm_exit *vmexit; @@ -3000,18 +3048,18 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, struct region_descriptor gdtr, idtr; uint16_t ldt_sel; - vmx = arg; - vm = vmx->vm; - vmcs = &vmx->vmcs[vcpu]; - vmxctx = &vmx->ctx[vcpu]; - vlapic = vm_lapic(vm, vcpu); - vmexit = vm_exitinfo(vm, vcpu); + vcpu = vcpui; + vmx = vcpu->vmx; + vmcs = vcpu->vmcs; + vmxctx = &vcpu->ctx; + vlapic = vm_lapic(vcpu->vcpu); + vmexit = vm_exitinfo(vcpu->vcpu); launched = 0; KASSERT(vmxctx->pmap == pmap, ("pmap %p different than ctx pmap %p", pmap, vmxctx->pmap)); - vmx_msr_guest_enter(vmx, vcpu); + vmx_msr_guest_enter(vcpu); VMPTRLD(vmcs); @@ -3051,7 +3099,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, * pmap_invalidate_ept(). */ disable_intr(); - vmx_inject_interrupts(vmx, vcpu, vlapic, rip); + vmx_inject_interrupts(vcpu, vlapic, rip); /* * Check for vcpu suspension after injecting events because @@ -3060,33 +3108,33 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, */ if (vcpu_suspended(evinfo)) { enable_intr(); - vm_exit_suspended(vmx->vm, vcpu, rip); + vm_exit_suspended(vcpu->vcpu, rip); break; } - if (vcpu_rendezvous_pending(evinfo)) { + if (vcpu_rendezvous_pending(vcpu->vcpu, evinfo)) { enable_intr(); - vm_exit_rendezvous(vmx->vm, vcpu, rip); + vm_exit_rendezvous(vcpu->vcpu, rip); break; } if (vcpu_reqidle(evinfo)) { enable_intr(); - vm_exit_reqidle(vmx->vm, vcpu, rip); + vm_exit_reqidle(vcpu->vcpu, rip); break; } - if (vcpu_should_yield(vm, vcpu)) { + if (vcpu_should_yield(vcpu->vcpu)) { enable_intr(); - vm_exit_astpending(vmx->vm, vcpu, rip); - vmx_astpending_trace(vmx, vcpu, rip); + vm_exit_astpending(vcpu->vcpu, rip); + vmx_astpending_trace(vcpu, rip); handled = HANDLED; break; } - if (vcpu_debugged(vm, vcpu)) { + if (vcpu_debugged(vcpu->vcpu)) { enable_intr(); - vm_exit_debug(vmx->vm, vcpu, rip); + vm_exit_debug(vcpu->vcpu, rip); break; } @@ -3095,7 +3143,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, * must be updated right before entering the guest. */ if (tpr_shadowing && !virtual_interrupt_delivery) { - if ((vmx->cap[vcpu].proc_ctls & PROCBASED_USE_TPR_SHADOW) != 0) { + if ((vcpu->cap.proc_ctls & PROCBASED_USE_TPR_SHADOW) != 0) { vmcs_write(VMCS_TPR_THRESHOLD, vlapic_get_cr8(vlapic)); } } @@ -3141,7 +3189,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, */ vmx_pmap_activate(vmx, pmap); - vmx_run_trace(vmx, vcpu); + vmx_run_trace(vcpu); rc = vmx_enter_guest(vmxctx, vmx, launched); vmx_pmap_deactivate(vmx, pmap); @@ -3159,10 +3207,10 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, vmexit->u.vmx.exit_qualification = vmcs_exit_qualification(); /* Update 'nextrip' */ - vmx->state[vcpu].nextrip = rip; + vcpu->state.nextrip = rip; if (rc == VMX_GUEST_VMEXIT) { - vmx_exit_handle_nmi(vmx, vcpu, vmexit); + vmx_exit_handle_nmi(vcpu, vmexit); enable_intr(); handled = vmx_exit_process(vmx, vcpu, vmexit); } else { @@ -3170,7 +3218,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, vmx_exit_inst_error(vmxctx, rc, vmexit); } launched = 1; - vmx_exit_trace(vmx, vcpu, rip, exit_reason, handled); + vmx_exit_trace(vcpu, rip, exit_reason, handled); rip = vmexit->rip; } while (handled); @@ -3184,29 +3232,36 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, handled, vmexit->exitcode); } - VCPU_CTR1(vm, vcpu, "returning from vmx_run: exitcode %d", + VMX_CTR1(vcpu, "returning from vmx_run: exitcode %d", vmexit->exitcode); VMCLEAR(vmcs); - vmx_msr_guest_exit(vmx, vcpu); + vmx_msr_guest_exit(vcpu); return (0); } static void -vmx_cleanup(void *arg) +vmx_vcpu_cleanup(void *vcpui) { - int i; - struct vmx *vmx = arg; - uint16_t maxcpus; + struct vmx_vcpu *vcpu = vcpui; - if (apic_access_virtualization(vmx, 0)) - vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE); + vpid_free(vcpu->state.vpid); + free(vcpu->pir_desc, M_VMX); + free(vcpu->apic_page, M_VMX); + free(vcpu->vmcs, M_VMX); + free(vcpu, M_VMX); +} - maxcpus = vm_get_maxcpus(vmx->vm); - for (i = 0; i < maxcpus; i++) - vpid_free(vmx->state[i].vpid); +static void +vmx_cleanup(void *vmi) +{ + struct vmx *vmx = vmi; + if (virtual_interrupt_delivery) + vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE); + + free(vmx->msr_bitmap, M_VMX); free(vmx, M_VMX); return; @@ -3290,19 +3345,19 @@ vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val) } static int -vmx_get_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t *retval) +vmx_get_intr_shadow(struct vmx_vcpu *vcpu, int running, uint64_t *retval) { uint64_t gi; int error; - error = vmcs_getreg(&vmx->vmcs[vcpu], running, + error = vmcs_getreg(vcpu->vmcs, running, VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY), &gi); *retval = (gi & HWINTR_BLOCKING) ? 1 : 0; return (error); } static int -vmx_modify_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t val) +vmx_modify_intr_shadow(struct vmx_vcpu *vcpu, int running, uint64_t val) { struct vmcs *vmcs; uint64_t gi; @@ -3316,7 +3371,7 @@ vmx_modify_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t val) goto done; } - vmcs = &vmx->vmcs[vcpu]; + vmcs = vcpu->vmcs; ident = VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY); error = vmcs_getreg(vmcs, running, ident, &gi); if (error == 0) { @@ -3324,7 +3379,7 @@ vmx_modify_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t val) error = vmcs_setreg(vmcs, running, ident, gi); } done: - VCPU_CTR2(vmx->vm, vcpu, "Setting intr_shadow to %#lx %s", val, + VMX_CTR2(vcpu, "Setting intr_shadow to %#lx %s", val, error ? "failed" : "succeeded"); return (error); } @@ -3351,47 +3406,59 @@ vmx_shadow_reg(int reg) } static int -vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval) +vmx_getreg(void *vcpui, int reg, uint64_t *retval) { int running, hostcpu; - struct vmx *vmx = arg; + struct vmx_vcpu *vcpu = vcpui; + struct vmx *vmx = vcpu->vmx; - running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); + running = vcpu_is_running(vcpu->vcpu, &hostcpu); if (running && hostcpu != curcpu) - panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu); + panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), + vcpu->vcpuid); - if (reg == VM_REG_GUEST_INTR_SHADOW) - return (vmx_get_intr_shadow(vmx, vcpu, running, retval)); + switch (reg) { + case VM_REG_GUEST_INTR_SHADOW: + return (vmx_get_intr_shadow(vcpu, running, retval)); + case VM_REG_GUEST_KGS_BASE: + *retval = vcpu->guest_msrs[IDX_MSR_KGSBASE]; + return (0); + case VM_REG_GUEST_TPR: + *retval = vlapic_get_cr8(vm_lapic(vcpu->vcpu)); + return (0); + } - if (vmxctx_getreg(&vmx->ctx[vcpu], reg, retval) == 0) + if (vmxctx_getreg(&vcpu->ctx, reg, retval) == 0) return (0); - return (vmcs_getreg(&vmx->vmcs[vcpu], running, reg, retval)); + return (vmcs_getreg(vcpu->vmcs, running, reg, retval)); } static int -vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) +vmx_setreg(void *vcpui, int reg, uint64_t val) { int error, hostcpu, running, shadow; uint64_t ctls; pmap_t pmap; - struct vmx *vmx = arg; + struct vmx_vcpu *vcpu = vcpui; + struct vmx *vmx = vcpu->vmx; - running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); + running = vcpu_is_running(vcpu->vcpu, &hostcpu); if (running && hostcpu != curcpu) - panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu); + panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), + vcpu->vcpuid); if (reg == VM_REG_GUEST_INTR_SHADOW) - return (vmx_modify_intr_shadow(vmx, vcpu, running, val)); + return (vmx_modify_intr_shadow(vcpu, running, val)); - if (vmxctx_setreg(&vmx->ctx[vcpu], reg, val) == 0) + if (vmxctx_setreg(&vcpu->ctx, reg, val) == 0) return (0); /* Do not permit user write access to VMCS fields by offset. */ if (reg < 0) return (EINVAL); - error = vmcs_setreg(&vmx->vmcs[vcpu], running, reg, val); + error = vmcs_setreg(vcpu->vmcs, running, reg, val); if (error == 0) { /* @@ -3401,13 +3468,13 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) */ if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0 && (reg == VM_REG_GUEST_EFER)) { - vmcs_getreg(&vmx->vmcs[vcpu], running, + vmcs_getreg(vcpu->vmcs, running, VMCS_IDENT(VMCS_ENTRY_CTLS), &ctls); if (val & EFER_LMA) ctls |= VM_ENTRY_GUEST_LMA; else ctls &= ~VM_ENTRY_GUEST_LMA; - vmcs_setreg(&vmx->vmcs[vcpu], running, + vmcs_setreg(vcpu->vmcs, running, VMCS_IDENT(VMCS_ENTRY_CTLS), ctls); } @@ -3416,7 +3483,7 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) /* * Store the unmodified value in the shadow */ - error = vmcs_setreg(&vmx->vmcs[vcpu], running, + error = vmcs_setreg(vcpu->vmcs, running, VMCS_IDENT(shadow), val); } @@ -3428,7 +3495,7 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) * XXX the processor retains global mappings when %cr3 * is updated but vmx_invvpid() does not. */ - pmap = vmx->ctx[vcpu].pmap; + pmap = vcpu->ctx.pmap; vmx_invvpid(vmx, vcpu, pmap, running); } } @@ -3437,41 +3504,45 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) } static int -vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +vmx_getdesc(void *vcpui, int reg, struct seg_desc *desc) { int hostcpu, running; - struct vmx *vmx = arg; + struct vmx_vcpu *vcpu = vcpui; + struct vmx *vmx = vcpu->vmx; - running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); + running = vcpu_is_running(vcpu->vcpu, &hostcpu); if (running && hostcpu != curcpu) - panic("vmx_getdesc: %s%d is running", vm_name(vmx->vm), vcpu); + panic("vmx_getdesc: %s%d is running", vm_name(vmx->vm), + vcpu->vcpuid); - return (vmcs_getdesc(&vmx->vmcs[vcpu], running, reg, desc)); + return (vmcs_getdesc(vcpu->vmcs, running, reg, desc)); } static int -vmx_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +vmx_setdesc(void *vcpui, int reg, struct seg_desc *desc) { int hostcpu, running; - struct vmx *vmx = arg; + struct vmx_vcpu *vcpu = vcpui; + struct vmx *vmx = vcpu->vmx; - running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); + running = vcpu_is_running(vcpu->vcpu, &hostcpu); if (running && hostcpu != curcpu) - panic("vmx_setdesc: %s%d is running", vm_name(vmx->vm), vcpu); + panic("vmx_setdesc: %s%d is running", vm_name(vmx->vm), + vcpu->vcpuid); - return (vmcs_setdesc(&vmx->vmcs[vcpu], running, reg, desc)); + return (vmcs_setdesc(vcpu->vmcs, running, reg, desc)); } static int -vmx_getcap(void *arg, int vcpu, int type, int *retval) +vmx_getcap(void *vcpui, int type, int *retval) { - struct vmx *vmx = arg; + struct vmx_vcpu *vcpu = vcpui; int vcap; int ret; ret = ENOENT; - vcap = vmx->cap[vcpu].set; + vcap = vcpu->cap.set; switch (type) { case VM_CAP_HALT_EXIT: @@ -3503,6 +3574,7 @@ vmx_getcap(void *arg, int vcpu, int type, int *retval) ret = 0; break; case VM_CAP_BPT_EXIT: + case VM_CAP_IPI_EXIT: ret = 0; break; default: @@ -3516,10 +3588,11 @@ vmx_getcap(void *arg, int vcpu, int type, int *retval) } static int -vmx_setcap(void *arg, int vcpu, int type, int val) +vmx_setcap(void *vcpui, int type, int val) { - struct vmx *vmx = arg; - struct vmcs *vmcs = &vmx->vmcs[vcpu]; + struct vmx_vcpu *vcpu = vcpui; + struct vmcs *vmcs = vcpu->vmcs; + struct vlapic *vlapic; uint32_t baseval; uint32_t *pptr; int error; @@ -3534,7 +3607,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) case VM_CAP_HALT_EXIT: if (cap_halt_exit) { retval = 0; - pptr = &vmx->cap[vcpu].proc_ctls; + pptr = &vcpu->cap.proc_ctls; baseval = *pptr; flag = PROCBASED_HLT_EXITING; reg = VMCS_PRI_PROC_BASED_CTLS; @@ -3543,7 +3616,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) case VM_CAP_MTRAP_EXIT: if (cap_monitor_trap) { retval = 0; - pptr = &vmx->cap[vcpu].proc_ctls; + pptr = &vcpu->cap.proc_ctls; baseval = *pptr; flag = PROCBASED_MTF; reg = VMCS_PRI_PROC_BASED_CTLS; @@ -3552,7 +3625,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) case VM_CAP_PAUSE_EXIT: if (cap_pause_exit) { retval = 0; - pptr = &vmx->cap[vcpu].proc_ctls; + pptr = &vcpu->cap.proc_ctls; baseval = *pptr; flag = PROCBASED_PAUSE_EXITING; reg = VMCS_PRI_PROC_BASED_CTLS; @@ -3572,7 +3645,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) case VM_CAP_UNRESTRICTED_GUEST: if (cap_unrestricted_guest) { retval = 0; - pptr = &vmx->cap[vcpu].proc_ctls2; + pptr = &vcpu->cap.proc_ctls2; baseval = *pptr; flag = PROCBASED2_UNRESTRICTED_GUEST; reg = VMCS_SEC_PROC_BASED_CTLS; @@ -3581,7 +3654,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) case VM_CAP_ENABLE_INVPCID: if (cap_invpcid) { retval = 0; - pptr = &vmx->cap[vcpu].proc_ctls2; + pptr = &vcpu->cap.proc_ctls2; baseval = *pptr; flag = PROCBASED2_ENABLE_INVPCID; reg = VMCS_SEC_PROC_BASED_CTLS; @@ -3591,13 +3664,22 @@ vmx_setcap(void *arg, int vcpu, int type, int val) retval = 0; /* Don't change the bitmap if we are tracing all exceptions. */ - if (vmx->cap[vcpu].exc_bitmap != 0xffffffff) { - pptr = &vmx->cap[vcpu].exc_bitmap; + if (vcpu->cap.exc_bitmap != 0xffffffff) { + pptr = &vcpu->cap.exc_bitmap; baseval = *pptr; flag = (1 << IDT_BP); reg = VMCS_EXCEPTION_BITMAP; } break; + case VM_CAP_IPI_EXIT: + retval = 0; + + vlapic = vm_lapic(vcpu->vcpu); + vlapic->ipi_exit = val; + break; + case VM_CAP_MASK_HWINTR: + retval = 0; + break; default: break; } @@ -3626,9 +3708,9 @@ vmx_setcap(void *arg, int vcpu, int type, int val) } if (val) { - vmx->cap[vcpu].set |= (1 << type); + vcpu->cap.set |= (1 << type); } else { - vmx->cap[vcpu].set &= ~(1 << type); + vcpu->cap.set &= ~(1 << type); } return (0); @@ -3649,21 +3731,21 @@ vmx_vmspace_free(struct vmspace *vmspace) struct vlapic_vtx { struct vlapic vlapic; struct pir_desc *pir_desc; - struct vmx *vmx; + struct vmx_vcpu *vcpu; u_int pending_prio; }; #define VPR_PRIO_BIT(vpr) (1 << ((vpr) >> 4)) -#define VMX_CTR_PIR(vm, vcpuid, pir_desc, notify, vector, level, msg) \ +#define VMX_CTR_PIR(vlapic, pir_desc, notify, vector, level, msg) \ do { \ - VCPU_CTR2(vm, vcpuid, msg " assert %s-triggered vector %d", \ + VLAPIC_CTR2(vlapic, msg " assert %s-triggered vector %d", \ level ? "level" : "edge", vector); \ - VCPU_CTR1(vm, vcpuid, msg " pir0 0x%016lx", pir_desc->pir[0]); \ - VCPU_CTR1(vm, vcpuid, msg " pir1 0x%016lx", pir_desc->pir[1]); \ - VCPU_CTR1(vm, vcpuid, msg " pir2 0x%016lx", pir_desc->pir[2]); \ - VCPU_CTR1(vm, vcpuid, msg " pir3 0x%016lx", pir_desc->pir[3]); \ - VCPU_CTR1(vm, vcpuid, msg " notify: %s", notify ? "yes" : "no");\ + VLAPIC_CTR1(vlapic, msg " pir0 0x%016lx", pir_desc->pir[0]); \ + VLAPIC_CTR1(vlapic, msg " pir1 0x%016lx", pir_desc->pir[1]); \ + VLAPIC_CTR1(vlapic, msg " pir2 0x%016lx", pir_desc->pir[2]); \ + VLAPIC_CTR1(vlapic, msg " pir3 0x%016lx", pir_desc->pir[3]); \ + VLAPIC_CTR1(vlapic, msg " notify: %s", notify ? "yes" : "no"); \ } while (0) /* @@ -3721,8 +3803,8 @@ vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level) } } - VMX_CTR_PIR(vlapic->vm, vlapic->vcpuid, pir_desc, notify, vector, - level, "vmx_set_intr_ready"); + VMX_CTR_PIR(vlapic, pir_desc, notify, vector, level, + "vmx_set_intr_ready"); return (notify); } @@ -3733,7 +3815,8 @@ vmx_pending_intr(struct vlapic *vlapic, int *vecptr) struct pir_desc *pir_desc; struct LAPIC *lapic; uint64_t pending, pirval; - uint32_t ppr, vpr; + uint8_t ppr, vpr, rvi; + struct vm_exit *vmexit; int i; /* @@ -3744,31 +3827,26 @@ vmx_pending_intr(struct vlapic *vlapic, int *vecptr) vlapic_vtx = (struct vlapic_vtx *)vlapic; pir_desc = vlapic_vtx->pir_desc; + lapic = vlapic->apic_page; - pending = atomic_load_acq_long(&pir_desc->pending); - if (!pending) { - /* - * While a virtual interrupt may have already been - * processed the actual delivery maybe pending the - * interruptibility of the guest. Recognize a pending - * interrupt by reevaluating virtual interrupts - * following Section 29.2.1 in the Intel SDM Volume 3. - */ - struct vm_exit *vmexit; - uint8_t rvi, ppr; - - vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); - KASSERT(vmexit->exitcode == VM_EXITCODE_HLT, - ("vmx_pending_intr: exitcode not 'HLT'")); - rvi = vmexit->u.hlt.intr_status & APIC_TPR_INT; - lapic = vlapic->apic_page; - ppr = lapic->ppr & APIC_TPR_INT; - if (rvi > ppr) { - return (1); - } + /* + * While a virtual interrupt may have already been + * processed the actual delivery maybe pending the + * interruptibility of the guest. Recognize a pending + * interrupt by reevaluating virtual interrupts + * following Section 30.2.1 in the Intel SDM Volume 3. + */ + vmexit = vm_exitinfo(vlapic->vcpu); + KASSERT(vmexit->exitcode == VM_EXITCODE_HLT, + ("vmx_pending_intr: exitcode not 'HLT'")); + rvi = vmexit->u.hlt.intr_status & APIC_TPR_INT; + ppr = lapic->ppr & APIC_TPR_INT; + if (rvi > ppr) + return (1); + pending = atomic_load_acq_long(&pir_desc->pending); + if (!pending) return (0); - } /* * If there is an interrupt pending then it will be recognized only @@ -3777,13 +3855,10 @@ vmx_pending_intr(struct vlapic *vlapic, int *vecptr) * Special case: if the processor priority is zero then any pending * interrupt will be recognized. */ - lapic = vlapic->apic_page; - ppr = lapic->ppr & APIC_TPR_INT; if (ppr == 0) return (1); - VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "HLT with non-zero PPR %d", - lapic->ppr); + VLAPIC_CTR1(vlapic, "HLT with non-zero PPR %d", lapic->ppr); vpr = 0; for (i = 3; i >= 0; i--) { @@ -3823,17 +3898,15 @@ static void vmx_set_tmr(struct vlapic *vlapic, int vector, bool level) { struct vlapic_vtx *vlapic_vtx; - struct vmx *vmx; struct vmcs *vmcs; uint64_t mask, val; KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); - KASSERT(!vcpu_is_running(vlapic->vm, vlapic->vcpuid, NULL), + KASSERT(!vcpu_is_running(vlapic->vcpu, NULL), ("vmx_set_tmr: vcpu cannot be running")); vlapic_vtx = (struct vlapic_vtx *)vlapic; - vmx = vlapic_vtx->vmx; - vmcs = &vmx->vmcs[vlapic->vcpuid]; + vmcs = vlapic_vtx->vcpu->vmcs; mask = 1UL << (vector % 64); VMPTRLD(vmcs); @@ -3849,20 +3922,20 @@ vmx_set_tmr(struct vlapic *vlapic, int vector, bool level) static void vmx_enable_x2apic_mode_ts(struct vlapic *vlapic) { - struct vmx *vmx; + struct vlapic_vtx *vlapic_vtx; + struct vmx_vcpu *vcpu; struct vmcs *vmcs; uint32_t proc_ctls; - int vcpuid; - vcpuid = vlapic->vcpuid; - vmx = ((struct vlapic_vtx *)vlapic)->vmx; - vmcs = &vmx->vmcs[vcpuid]; + vlapic_vtx = (struct vlapic_vtx *)vlapic; + vcpu = vlapic_vtx->vcpu; + vmcs = vcpu->vmcs; - proc_ctls = vmx->cap[vcpuid].proc_ctls; + proc_ctls = vcpu->cap.proc_ctls; proc_ctls &= ~PROCBASED_USE_TPR_SHADOW; proc_ctls |= PROCBASED_CR8_LOAD_EXITING; proc_ctls |= PROCBASED_CR8_STORE_EXITING; - vmx->cap[vcpuid].proc_ctls = proc_ctls; + vcpu->cap.proc_ctls = proc_ctls; VMPTRLD(vmcs); vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls); @@ -3872,22 +3945,25 @@ vmx_enable_x2apic_mode_ts(struct vlapic *vlapic) static void vmx_enable_x2apic_mode_vid(struct vlapic *vlapic) { + struct vlapic_vtx *vlapic_vtx; struct vmx *vmx; + struct vmx_vcpu *vcpu; struct vmcs *vmcs; uint32_t proc_ctls2; - int vcpuid, error __diagused; + int error __diagused; - vcpuid = vlapic->vcpuid; - vmx = ((struct vlapic_vtx *)vlapic)->vmx; - vmcs = &vmx->vmcs[vcpuid]; + vlapic_vtx = (struct vlapic_vtx *)vlapic; + vcpu = vlapic_vtx->vcpu; + vmx = vcpu->vmx; + vmcs = vcpu->vmcs; - proc_ctls2 = vmx->cap[vcpuid].proc_ctls2; + proc_ctls2 = vcpu->cap.proc_ctls2; KASSERT((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) != 0, ("%s: invalid proc_ctls2 %#x", __func__, proc_ctls2)); proc_ctls2 &= ~PROCBASED2_VIRTUALIZE_APIC_ACCESSES; proc_ctls2 |= PROCBASED2_VIRTUALIZE_X2APIC_MODE; - vmx->cap[vcpuid].proc_ctls2 = proc_ctls2; + vcpu->cap.proc_ctls2 = proc_ctls2; VMPTRLD(vmcs); vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc_ctls2); @@ -3936,7 +4012,7 @@ vmx_inject_pir(struct vlapic *vlapic) vlapic_vtx = (struct vlapic_vtx *)vlapic; pir_desc = vlapic_vtx->pir_desc; if (atomic_cmpset_long(&pir_desc->pending, 1, 0) == 0) { - VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: " + VLAPIC_CTR0(vlapic, "vmx_inject_pir: " "no posted interrupt pending"); return; } @@ -4006,7 +4082,7 @@ vmx_inject_pir(struct vlapic *vlapic) intr_status_new = (intr_status_old & 0xFF00) | rvi; if (intr_status_new > intr_status_old) { vmcs_write(VMCS_GUEST_INTR_STATUS, intr_status_new); - VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: " + VLAPIC_CTR2(vlapic, "vmx_inject_pir: " "guest_intr_status changed from 0x%04x to 0x%04x", intr_status_old, intr_status_new); } @@ -4014,22 +4090,25 @@ vmx_inject_pir(struct vlapic *vlapic) } static struct vlapic * -vmx_vlapic_init(void *arg, int vcpuid) +vmx_vlapic_init(void *vcpui) { struct vmx *vmx; + struct vmx_vcpu *vcpu; struct vlapic *vlapic; struct vlapic_vtx *vlapic_vtx; - vmx = arg; + vcpu = vcpui; + vmx = vcpu->vmx; vlapic = malloc(sizeof(struct vlapic_vtx), M_VLAPIC, M_WAITOK | M_ZERO); vlapic->vm = vmx->vm; - vlapic->vcpuid = vcpuid; - vlapic->apic_page = (struct LAPIC *)&vmx->apic_page[vcpuid]; + vlapic->vcpu = vcpu->vcpu; + vlapic->vcpuid = vcpu->vcpuid; + vlapic->apic_page = (struct LAPIC *)vcpu->apic_page; vlapic_vtx = (struct vlapic_vtx *)vlapic; - vlapic_vtx->pir_desc = &vmx->pir_desc[vcpuid]; - vlapic_vtx->vmx = vmx; + vlapic_vtx->pir_desc = vcpu->pir_desc; + vlapic_vtx->vcpu = vcpu; if (tpr_shadowing) { vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode_ts; @@ -4052,7 +4131,7 @@ vmx_vlapic_init(void *arg, int vcpuid) } static void -vmx_vlapic_cleanup(void *arg, struct vlapic *vlapic) +vmx_vlapic_cleanup(struct vlapic *vlapic) { vlapic_cleanup(vlapic); @@ -4061,65 +4140,23 @@ vmx_vlapic_cleanup(void *arg, struct vlapic *vlapic) #ifdef BHYVE_SNAPSHOT static int -vmx_snapshot(void *arg, struct vm_snapshot_meta *meta) -{ - struct vmx *vmx; - struct vmxctx *vmxctx; - int i; - int ret; - - vmx = arg; - - KASSERT(vmx != NULL, ("%s: arg was NULL", __func__)); - - for (i = 0; i < VM_MAXCPU; i++) { - SNAPSHOT_BUF_OR_LEAVE(vmx->guest_msrs[i], - sizeof(vmx->guest_msrs[i]), meta, ret, done); - - vmxctx = &vmx->ctx[i]; - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rdi, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rsi, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rdx, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rcx, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r8, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r9, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rax, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rbx, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rbp, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r10, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r11, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r12, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r13, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r14, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r15, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_cr2, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr0, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr1, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr2, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr3, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr6, meta, ret, done); - } - -done: - return (ret); -} - -static int -vmx_vmcx_snapshot(void *arg, struct vm_snapshot_meta *meta, int vcpu) +vmx_vcpu_snapshot(void *vcpui, struct vm_snapshot_meta *meta) { struct vmcs *vmcs; struct vmx *vmx; + struct vmx_vcpu *vcpu; + struct vmxctx *vmxctx; int err, run, hostcpu; - vmx = (struct vmx *)arg; err = 0; + vcpu = vcpui; + vmx = vcpu->vmx; + vmcs = vcpu->vmcs; - KASSERT(arg != NULL, ("%s: arg was NULL", __func__)); - vmcs = &vmx->vmcs[vcpu]; - - run = vcpu_is_running(vmx->vm, vcpu, &hostcpu); + run = vcpu_is_running(vcpu->vcpu, &hostcpu); if (run && hostcpu != curcpu) { - printf("%s: %s%d is running", __func__, vm_name(vmx->vm), vcpu); + printf("%s: %s%d is running", __func__, vm_name(vmx->vm), + vcpu->vcpuid); return (EINVAL); } @@ -4175,30 +4212,67 @@ vmx_vmcx_snapshot(void *arg, struct vm_snapshot_meta *meta, int vcpu) err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_ACTIVITY, meta); err += vmcs_snapshot_any(vmcs, run, VMCS_ENTRY_CTLS, meta); err += vmcs_snapshot_any(vmcs, run, VMCS_EXIT_CTLS, meta); + if (err != 0) + goto done; + + SNAPSHOT_BUF_OR_LEAVE(vcpu->guest_msrs, + sizeof(vcpu->guest_msrs), meta, err, done); + + SNAPSHOT_BUF_OR_LEAVE(vcpu->pir_desc, + sizeof(*vcpu->pir_desc), meta, err, done); + + SNAPSHOT_BUF_OR_LEAVE(&vcpu->mtrr, + sizeof(vcpu->mtrr), meta, err, done); + + vmxctx = &vcpu->ctx; + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rdi, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rsi, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rdx, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rcx, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r8, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r9, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rax, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rbx, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rbp, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r10, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r11, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r12, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r13, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r14, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r15, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_cr2, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr0, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr1, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr2, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr3, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr6, meta, err, done); +done: return (err); } static int -vmx_restore_tsc(void *arg, int vcpu, uint64_t offset) +vmx_restore_tsc(void *vcpui, uint64_t offset) { + struct vmx_vcpu *vcpu = vcpui; struct vmcs *vmcs; - struct vmx *vmx = (struct vmx *)arg; + struct vmx *vmx; int error, running, hostcpu; - KASSERT(arg != NULL, ("%s: arg was NULL", __func__)); - vmcs = &vmx->vmcs[vcpu]; + vmx = vcpu->vmx; + vmcs = vcpu->vmcs; - running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); + running = vcpu_is_running(vcpu->vcpu, &hostcpu); if (running && hostcpu != curcpu) { - printf("%s: %s%d is running", __func__, vm_name(vmx->vm), vcpu); + printf("%s: %s%d is running", __func__, vm_name(vmx->vm), + vcpu->vcpuid); return (EINVAL); } if (!running) VMPTRLD(vmcs); - error = vmx_set_tsc_offset(vmx, vcpu, offset); + error = vmx_set_tsc_offset(vcpu, offset); if (!running) VMCLEAR(vmcs); @@ -4209,10 +4283,13 @@ vmx_restore_tsc(void *arg, int vcpu, uint64_t offset) const struct vmm_ops vmm_ops_intel = { .modinit = vmx_modinit, .modcleanup = vmx_modcleanup, + .modsuspend = vmx_modsuspend, .modresume = vmx_modresume, .init = vmx_init, .run = vmx_run, .cleanup = vmx_cleanup, + .vcpu_init = vmx_vcpu_init, + .vcpu_cleanup = vmx_vcpu_cleanup, .getreg = vmx_getreg, .setreg = vmx_setreg, .getdesc = vmx_getdesc, @@ -4224,8 +4301,7 @@ const struct vmm_ops vmm_ops_intel = { .vlapic_init = vmx_vlapic_init, .vlapic_cleanup = vmx_vlapic_cleanup, #ifdef BHYVE_SNAPSHOT - .snapshot = vmx_snapshot, - .vmcx_snapshot = vmx_vmcx_snapshot, + .vcpu_snapshot = vmx_vcpu_snapshot, .restore_tsc = vmx_restore_tsc, #endif }; diff --git a/sys/amd64/vmm/intel/vmx.h b/sys/amd64/vmm/intel/vmx.h index 81e508e30d3d..af4437d1eda4 100644 --- a/sys/amd64/vmm/intel/vmx.h +++ b/sys/amd64/vmm/intel/vmx.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,17 +24,19 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VMX_H_ #define _VMX_H_ +#include <vm/vm.h> +#include <vm/pmap.h> + #include "vmcs.h" #include "x86.h" struct pmap; +struct vmx; struct vmxctx { register_t guest_rdi; /* Guest state */ @@ -122,24 +124,45 @@ enum { GUEST_MSR_NUM /* must be the last enumeration */ }; +struct vmx_vcpu { + struct vmx *vmx; + struct vcpu *vcpu; + struct vmcs *vmcs; + struct apic_page *apic_page; + struct pir_desc *pir_desc; + uint64_t guest_msrs[GUEST_MSR_NUM]; + struct vmxctx ctx; + struct vmxcap cap; + struct vmxstate state; + struct vm_mtrr mtrr; + int vcpuid; +}; + /* virtual machine softc */ struct vmx { - struct vmcs vmcs[VM_MAXCPU]; /* one vmcs per virtual cpu */ - struct apic_page apic_page[VM_MAXCPU]; /* one apic page per vcpu */ - char msr_bitmap[PAGE_SIZE]; - struct pir_desc pir_desc[VM_MAXCPU]; - uint64_t guest_msrs[VM_MAXCPU][GUEST_MSR_NUM]; - struct vmxctx ctx[VM_MAXCPU]; - struct vmxcap cap[VM_MAXCPU]; - struct vmxstate state[VM_MAXCPU]; - uint64_t eptp; struct vm *vm; + char *msr_bitmap; + uint64_t eptp; long eptgen[MAXCPU]; /* cached pmap->pm_eptgen */ - struct vm_mtrr mtrr[VM_MAXCPU]; + pmap_t pmap; }; -CTASSERT((offsetof(struct vmx, vmcs) & PAGE_MASK) == 0); -CTASSERT((offsetof(struct vmx, msr_bitmap) & PAGE_MASK) == 0); -CTASSERT((offsetof(struct vmx, pir_desc[0]) & 63) == 0); + +extern bool vmx_have_msr_tsc_aux; + +#define VMX_CTR0(vcpu, format) \ + VCPU_CTR0((vcpu)->vmx->vm, (vcpu)->vcpuid, format) + +#define VMX_CTR1(vcpu, format, p1) \ + VCPU_CTR1((vcpu)->vmx->vm, (vcpu)->vcpuid, format, p1) + +#define VMX_CTR2(vcpu, format, p1, p2) \ + VCPU_CTR2((vcpu)->vmx->vm, (vcpu)->vcpuid, format, p1, p2) + +#define VMX_CTR3(vcpu, format, p1, p2, p3) \ + VCPU_CTR3((vcpu)->vmx->vm, (vcpu)->vcpuid, format, p1, p2, p3) + +#define VMX_CTR4(vcpu, format, p1, p2, p3, p4) \ + VCPU_CTR4((vcpu)->vmx->vm, (vcpu)->vcpuid, format, p1, p2, p3, p4) #define VMX_GUEST_VMEXIT 0 #define VMX_VMRESUME_ERROR 1 @@ -150,23 +173,9 @@ void vmx_call_isr(uintptr_t entry); u_long vmx_fix_cr0(u_long cr0); u_long vmx_fix_cr4(u_long cr4); -int vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset); +int vmx_set_tsc_offset(struct vmx_vcpu *vcpu, uint64_t offset); extern char vmx_exit_guest[]; extern char vmx_exit_guest_flush_rsb[]; -static inline bool -vmx_have_msr_tsc_aux(struct vmx *vmx) -{ - int rdpid_rdtscp_bits = ((1 << VM_CAP_RDPID) | (1 << VM_CAP_RDTSCP)); - - /* - * Since the values of these bits are uniform across all vCPUs - * (see discussion in vmx_modinit() and initialization of these bits - * in vmx_init()), just always use vCPU-zero's capability set and - * remove the need to require a vcpuid argument. - */ - return ((vmx->cap[0].set & rdpid_rdtscp_bits) != 0); -} - #endif diff --git a/sys/amd64/vmm/intel/vmx_controls.h b/sys/amd64/vmm/intel/vmx_controls.h index 5408d129ad45..2e4e7cc8a028 100644 --- a/sys/amd64/vmm/intel/vmx_controls.h +++ b/sys/amd64/vmm/intel/vmx_controls.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,8 +24,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VMX_CONTROLS_H_ diff --git a/sys/amd64/vmm/intel/vmx_cpufunc.h b/sys/amd64/vmm/intel/vmx_cpufunc.h index 05ac56290cb9..26ef54472436 100644 --- a/sys/amd64/vmm/intel/vmx_cpufunc.h +++ b/sys/amd64/vmm/intel/vmx_cpufunc.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,8 +24,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VMX_CPUFUNC_H_ diff --git a/sys/amd64/vmm/intel/vmx_genassym.c b/sys/amd64/vmm/intel/vmx_genassym.c index d428dad2b71c..06d6b494103a 100644 --- a/sys/amd64/vmm/intel/vmx_genassym.c +++ b/sys/amd64/vmm/intel/vmx_genassym.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,13 +24,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c index a135518cb1c3..40dbec290f2d 100644 --- a/sys/amd64/vmm/intel/vmx_msr.c +++ b/sys/amd64/vmm/intel/vmx_msr.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,13 +24,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> @@ -314,17 +309,13 @@ vmx_msr_init(void) } void -vmx_msr_guest_init(struct vmx *vmx, int vcpuid) +vmx_msr_guest_init(struct vmx *vmx, struct vmx_vcpu *vcpu) { - uint64_t *guest_msrs; - - guest_msrs = vmx->guest_msrs[vcpuid]; - /* * The permissions bitmap is shared between all vcpus so initialize it * once when initializing the vBSP. */ - if (vcpuid == 0) { + if (vcpu->vcpuid == 0) { guest_msr_rw(vmx, MSR_LSTAR); guest_msr_rw(vmx, MSR_CSTAR); guest_msr_rw(vmx, MSR_STAR); @@ -335,7 +326,7 @@ vmx_msr_guest_init(struct vmx *vmx, int vcpuid) /* * Initialize guest IA32_PAT MSR with default value after reset. */ - guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) | + vcpu->guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) | PAT_VALUE(1, PAT_WRITE_THROUGH) | PAT_VALUE(2, PAT_UNCACHED) | PAT_VALUE(3, PAT_UNCACHEABLE) | @@ -348,40 +339,38 @@ vmx_msr_guest_init(struct vmx *vmx, int vcpuid) } void -vmx_msr_guest_enter(struct vmx *vmx, int vcpuid) +vmx_msr_guest_enter(struct vmx_vcpu *vcpu) { - uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; /* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */ update_pcb_bases(curpcb); - wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]); - wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]); - wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]); - wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]); - wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]); + wrmsr(MSR_LSTAR, vcpu->guest_msrs[IDX_MSR_LSTAR]); + wrmsr(MSR_CSTAR, vcpu->guest_msrs[IDX_MSR_CSTAR]); + wrmsr(MSR_STAR, vcpu->guest_msrs[IDX_MSR_STAR]); + wrmsr(MSR_SF_MASK, vcpu->guest_msrs[IDX_MSR_SF_MASK]); + wrmsr(MSR_KGSBASE, vcpu->guest_msrs[IDX_MSR_KGSBASE]); } void -vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, int vcpuid) +vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu) { - uint64_t guest_tsc_aux = vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX]; + uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX]; uint32_t host_aux = cpu_auxmsr(); - if (vmx_have_msr_tsc_aux(vmx) && guest_tsc_aux != host_aux) + if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux) wrmsr(MSR_TSC_AUX, guest_tsc_aux); } void -vmx_msr_guest_exit(struct vmx *vmx, int vcpuid) +vmx_msr_guest_exit(struct vmx_vcpu *vcpu) { - uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; /* Save guest MSRs */ - guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); - guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); - guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); - guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); - guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE); + vcpu->guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); + vcpu->guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); + vcpu->guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); + vcpu->guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); + vcpu->guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE); /* Restore host MSRs */ wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]); @@ -393,15 +382,15 @@ vmx_msr_guest_exit(struct vmx *vmx, int vcpuid) } void -vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, int vcpuid) +vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu) { - uint64_t guest_tsc_aux = vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX]; + uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX]; uint32_t host_aux = cpu_auxmsr(); - if (vmx_have_msr_tsc_aux(vmx) && guest_tsc_aux != host_aux) + if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux) /* * Note that it is not necessary to save the guest value - * here; vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX] always + * here; vcpu->guest_msrs[IDX_MSR_TSC_AUX] always * contains the current value since it is updated whenever * the guest writes to it (which is expected to be very * rare). @@ -410,12 +399,10 @@ vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, int vcpuid) } int -vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu) +vmx_rdmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t *val, bool *retu) { - const uint64_t *guest_msrs; int error; - guest_msrs = vmx->guest_msrs[vcpuid]; error = 0; switch (num) { @@ -429,8 +416,8 @@ vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu) case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: case MSR_MTRR64kBase: case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: - if (vm_rdmtrr(&vmx->mtrr[vcpuid], num, val) != 0) { - vm_inject_gp(vmx->vm, vcpuid); + if (vm_rdmtrr(&vcpu->mtrr, num, val) != 0) { + vm_inject_gp(vcpu->vcpu); } break; case MSR_IA32_MISC_ENABLE: @@ -444,7 +431,7 @@ vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu) *val = turbo_ratio_limit; break; case MSR_PAT: - *val = guest_msrs[IDX_MSR_PAT]; + *val = vcpu->guest_msrs[IDX_MSR_PAT]; break; default: error = EINVAL; @@ -454,13 +441,11 @@ vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu) } int -vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) +vmx_wrmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t val, bool *retu) { - uint64_t *guest_msrs; uint64_t changed; int error; - guest_msrs = vmx->guest_msrs[vcpuid]; error = 0; switch (num) { @@ -473,8 +458,8 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: case MSR_MTRR64kBase: case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: - if (vm_wrmtrr(&vmx->mtrr[vcpuid], num, val) != 0) { - vm_inject_gp(vmx->vm, vcpuid); + if (vm_wrmtrr(&vcpu->mtrr, num, val) != 0) { + vm_inject_gp(vcpu->vcpu); } break; case MSR_IA32_MISC_ENABLE: @@ -499,23 +484,23 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) break; case MSR_PAT: if (pat_valid(val)) - guest_msrs[IDX_MSR_PAT] = val; + vcpu->guest_msrs[IDX_MSR_PAT] = val; else - vm_inject_gp(vmx->vm, vcpuid); + vm_inject_gp(vcpu->vcpu); break; case MSR_TSC: - error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc()); + error = vmx_set_tsc_offset(vcpu, val - rdtsc()); break; case MSR_TSC_AUX: - if (vmx_have_msr_tsc_aux(vmx)) + if (vmx_have_msr_tsc_aux) /* * vmx_msr_guest_enter_tsc_aux() will apply this * value when it is called immediately before guest * entry. */ - guest_msrs[IDX_MSR_TSC_AUX] = val; + vcpu->guest_msrs[IDX_MSR_TSC_AUX] = val; else - vm_inject_gp(vmx->vm, vcpuid); + vm_inject_gp(vcpu->vcpu); break; default: error = EINVAL; diff --git a/sys/amd64/vmm/intel/vmx_msr.h b/sys/amd64/vmm/intel/vmx_msr.h index e3a570545e57..f88f37bd9163 100644 --- a/sys/amd64/vmm/intel/vmx_msr.h +++ b/sys/amd64/vmm/intel/vmx_msr.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,8 +24,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VMX_MSR_H_ @@ -34,13 +32,13 @@ struct vmx; void vmx_msr_init(void); -void vmx_msr_guest_init(struct vmx *vmx, int vcpuid); -void vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, int vcpuid); -void vmx_msr_guest_enter(struct vmx *vmx, int vcpuid); -void vmx_msr_guest_exit(struct vmx *vmx, int vcpuid); -void vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, int vcpuid); -int vmx_rdmsr(struct vmx *, int vcpuid, u_int num, uint64_t *val, bool *retu); -int vmx_wrmsr(struct vmx *, int vcpuid, u_int num, uint64_t val, bool *retu); +void vmx_msr_guest_init(struct vmx *vmx, struct vmx_vcpu *vcpu); +void vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu); +void vmx_msr_guest_enter(struct vmx_vcpu *vcpu); +void vmx_msr_guest_exit(struct vmx_vcpu *vcpu); +void vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu); +int vmx_rdmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t *val, bool *retu); +int vmx_wrmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t val, bool *retu); uint32_t vmx_revision(void); diff --git a/sys/amd64/vmm/intel/vmx_support.S b/sys/amd64/vmm/intel/vmx_support.S index b4101e1b78a1..877e377f892d 100644 --- a/sys/amd64/vmm/intel/vmx_support.S +++ b/sys/amd64/vmm/intel/vmx_support.S @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * Copyright (c) 2013 Neel Natu <neel@freebsd.org> @@ -25,8 +25,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #include <machine/asmacros.h> @@ -34,12 +32,6 @@ #include "vmx_assym.h" -#ifdef SMP -#define LK lock ; -#else -#define LK -#endif - /* Be friendly to DTrace FBT's prologue/epilogue pattern matching */ #define VENTER push %rbp ; mov %rsp,%rbp #define VLEAVE pop %rbp @@ -179,13 +171,11 @@ do_launch: */ movq %rsp, %rdi /* point %rdi back to 'vmxctx' */ movl $VMX_VMLAUNCH_ERROR, %eax - jmp decode_inst_error - + /* FALLTHROUGH */ decode_inst_error: movl $VM_FAIL_VALID, %r11d - jz inst_error - movl $VM_FAIL_INVALID, %r11d -inst_error: + movl $VM_FAIL_INVALID, %esi + cmovnzl %esi, %r11d movl %r11d, VMXCTX_INST_FAIL_STATUS(%rdi) /* diff --git a/sys/amd64/vmm/intel/vtd.c b/sys/amd64/vmm/intel/vtd.c index 8f06dc823364..b56541290a9d 100644 --- a/sys/amd64/vmm/intel/vtd.c +++ b/sys/amd64/vmm/intel/vtd.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,13 +24,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/kernel.h> #include <sys/systm.h> @@ -117,7 +112,7 @@ struct domain { static SLIST_HEAD(, domain) domhead; -#define DRHD_MAX_UNITS 8 +#define DRHD_MAX_UNITS 16 static ACPI_DMAR_HARDWARE_UNIT *drhds[DRHD_MAX_UNITS]; static int drhd_num; static struct vtdmap *vtdmaps[DRHD_MAX_UNITS]; @@ -436,8 +431,8 @@ vtd_disable(void) } } -static void -vtd_add_device(void *arg, uint16_t rid) +static int +vtd_add_device(void *arg, device_t dev __unused, uint16_t rid) { int idx; uint64_t *ctxp; @@ -446,6 +441,8 @@ vtd_add_device(void *arg, uint16_t rid) struct vtdmap *vtdmap; uint8_t bus; + KASSERT(dom != NULL, ("domain is NULL")); + bus = PCI_RID2BUS(rid); ctxp = ctx_tables[bus]; pt_paddr = vtophys(dom->ptp); @@ -478,10 +475,11 @@ vtd_add_device(void *arg, uint16_t rid) * 'Not Present' entries are not cached in either the Context Cache * or in the IOTLB, so there is no need to invalidate either of them. */ + return (0); } -static void -vtd_remove_device(void *arg, uint16_t rid) +static int +vtd_remove_device(void *arg, device_t dev __unused, uint16_t rid) { int i, idx; uint64_t *ctxp; @@ -509,6 +507,7 @@ vtd_remove_device(void *arg, uint16_t rid) vtd_ctx_global_invalidate(vtdmap); vtd_iotlb_global_invalidate(vtdmap); } + return (0); } #define CREATE_MAPPING 0 @@ -603,21 +602,24 @@ vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len, return (1UL << ptpshift); } -static uint64_t -vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len) +static int +vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len, + uint64_t *res_len) { - return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING)); + *res_len = vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING); + return (0); } -static uint64_t -vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len) +static int +vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len, uint64_t *res_len) { - return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING)); + *res_len = vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING); + return (0); } -static void +static int vtd_invalidate_tlb(void *dom) { int i; @@ -631,6 +633,7 @@ vtd_invalidate_tlb(void *dom) vtdmap = vtdmaps[i]; vtd_iotlb_global_invalidate(vtdmap); } + return (0); } static void * diff --git a/sys/amd64/vmm/io/iommu.c b/sys/amd64/vmm/io/iommu.c index 6a589f153815..9fc612244699 100644 --- a/sys/amd64/vmm/io/iommu.c +++ b/sys/amd64/vmm/io/iommu.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,13 +24,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/bus.h> #include <sys/eventhandler.h> @@ -63,6 +58,8 @@ static const struct iommu_ops *ops; static void *host_domain; static eventhandler_tag add_tag, delete_tag; +static void iommu_cleanup_int(bool iommu_disable); + static __inline int IOMMU_INIT(void) { @@ -97,48 +94,51 @@ IOMMU_DESTROY_DOMAIN(void *dom) (*ops->destroy_domain)(dom); } -static __inline uint64_t -IOMMU_CREATE_MAPPING(void *domain, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len) +static __inline int +IOMMU_CREATE_MAPPING(void *domain, vm_paddr_t gpa, vm_paddr_t hpa, + uint64_t len, uint64_t *res_len) { if (ops != NULL && iommu_avail) - return ((*ops->create_mapping)(domain, gpa, hpa, len)); - else - return (len); /* XXX */ + return ((*ops->create_mapping)(domain, gpa, hpa, len, res_len)); + return (EOPNOTSUPP); } static __inline uint64_t -IOMMU_REMOVE_MAPPING(void *domain, vm_paddr_t gpa, uint64_t len) +IOMMU_REMOVE_MAPPING(void *domain, vm_paddr_t gpa, uint64_t len, + uint64_t *res_len) { if (ops != NULL && iommu_avail) - return ((*ops->remove_mapping)(domain, gpa, len)); - else - return (len); /* XXX */ + return ((*ops->remove_mapping)(domain, gpa, len, res_len)); + return (EOPNOTSUPP); } -static __inline void -IOMMU_ADD_DEVICE(void *domain, uint16_t rid) +static __inline int +IOMMU_ADD_DEVICE(void *domain, device_t dev, uint16_t rid) { if (ops != NULL && iommu_avail) - (*ops->add_device)(domain, rid); + return ((*ops->add_device)(domain, dev, rid)); + return (EOPNOTSUPP); } -static __inline void -IOMMU_REMOVE_DEVICE(void *domain, uint16_t rid) +static __inline int +IOMMU_REMOVE_DEVICE(void *domain, device_t dev, uint16_t rid) { if (ops != NULL && iommu_avail) - (*ops->remove_device)(domain, rid); + return ((*ops->remove_device)(domain, dev, rid)); + return (0); /* To allow ppt_attach() to succeed. */ } -static __inline void +static __inline int IOMMU_INVALIDATE_TLB(void *domain) { if (ops != NULL && iommu_avail) - (*ops->invalidate_tlb)(domain); + return ((*ops->invalidate_tlb)(domain)); + return (0); } static __inline void @@ -162,14 +162,14 @@ iommu_pci_add(void *arg, device_t dev) { /* Add new devices to the host domain. */ - iommu_add_device(host_domain, pci_get_rid(dev)); + iommu_add_device(host_domain, dev, pci_get_rid(dev)); } static void iommu_pci_delete(void *arg, device_t dev) { - iommu_remove_device(host_domain, pci_get_rid(dev)); + iommu_remove_device(host_domain, dev, pci_get_rid(dev)); } static void @@ -235,17 +235,24 @@ iommu_init(void) * Everything else belongs to the host * domain. */ - iommu_add_device(host_domain, + error = iommu_add_device(host_domain, dev, pci_get_rid(dev)); + if (error != 0 && error != ENXIO) { + printf( + "iommu_add_device(%s rid %#x) failed, error %d\n", + device_get_name(dev), + pci_get_rid(dev), error); + iommu_cleanup_int(false); + return; + } } } } IOMMU_ENABLE(); - } -void -iommu_cleanup(void) +static void +iommu_cleanup_int(bool iommu_disable) { if (add_tag != NULL) { @@ -256,11 +263,19 @@ iommu_cleanup(void) EVENTHANDLER_DEREGISTER(pci_delete_device, delete_tag); delete_tag = NULL; } - IOMMU_DISABLE(); + if (iommu_disable) + IOMMU_DISABLE(); IOMMU_DESTROY_DOMAIN(host_domain); + host_domain = NULL; IOMMU_CLEANUP(); } +void +iommu_cleanup(void) +{ + iommu_cleanup_int(true); +} + void * iommu_create_domain(vm_paddr_t maxaddr) { @@ -284,33 +299,39 @@ iommu_destroy_domain(void *dom) IOMMU_DESTROY_DOMAIN(dom); } -void +int iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa, size_t len) { uint64_t mapped, remaining; - - remaining = len; - - while (remaining > 0) { - mapped = IOMMU_CREATE_MAPPING(dom, gpa, hpa, remaining); - gpa += mapped; - hpa += mapped; - remaining -= mapped; + int error; + + for (remaining = len; remaining > 0; gpa += mapped, hpa += mapped, + remaining -= mapped) { + error = IOMMU_CREATE_MAPPING(dom, gpa, hpa, remaining, + &mapped); + if (error != 0) { + /* XXXKIB rollback */ + return (error); + } } + return (0); } -void +int iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len) { uint64_t unmapped, remaining; - - remaining = len; - - while (remaining > 0) { - unmapped = IOMMU_REMOVE_MAPPING(dom, gpa, remaining); - gpa += unmapped; - remaining -= unmapped; + int error; + + for (remaining = len; remaining > 0; gpa += unmapped, + remaining -= unmapped) { + error = IOMMU_REMOVE_MAPPING(dom, gpa, remaining, &unmapped); + if (error != 0) { + /* XXXKIB ? */ + return (error); + } } + return (0); } void * @@ -320,23 +341,23 @@ iommu_host_domain(void) return (host_domain); } -void -iommu_add_device(void *dom, uint16_t rid) +int +iommu_add_device(void *dom, device_t dev, uint16_t rid) { - IOMMU_ADD_DEVICE(dom, rid); + return (IOMMU_ADD_DEVICE(dom, dev, rid)); } -void -iommu_remove_device(void *dom, uint16_t rid) +int +iommu_remove_device(void *dom, device_t dev, uint16_t rid) { - IOMMU_REMOVE_DEVICE(dom, rid); + return (IOMMU_REMOVE_DEVICE(dom, dev, rid)); } -void +int iommu_invalidate_tlb(void *domain) { - IOMMU_INVALIDATE_TLB(domain); + return (IOMMU_INVALIDATE_TLB(domain)); } diff --git a/sys/amd64/vmm/io/iommu.h b/sys/amd64/vmm/io/iommu.h index 090415b57505..5294a9d92a6b 100644 --- a/sys/amd64/vmm/io/iommu.h +++ b/sys/amd64/vmm/io/iommu.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,8 +24,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _IO_IOMMU_H_ @@ -37,13 +35,13 @@ typedef void (*iommu_enable_func_t)(void); typedef void (*iommu_disable_func_t)(void); typedef void *(*iommu_create_domain_t)(vm_paddr_t maxaddr); typedef void (*iommu_destroy_domain_t)(void *domain); -typedef uint64_t (*iommu_create_mapping_t)(void *domain, vm_paddr_t gpa, - vm_paddr_t hpa, uint64_t len); -typedef uint64_t (*iommu_remove_mapping_t)(void *domain, vm_paddr_t gpa, - uint64_t len); -typedef void (*iommu_add_device_t)(void *domain, uint16_t rid); -typedef void (*iommu_remove_device_t)(void *dom, uint16_t rid); -typedef void (*iommu_invalidate_tlb_t)(void *dom); +typedef int (*iommu_create_mapping_t)(void *domain, vm_paddr_t gpa, + vm_paddr_t hpa, uint64_t len, uint64_t *res_len); +typedef int (*iommu_remove_mapping_t)(void *domain, vm_paddr_t gpa, + uint64_t len, uint64_t *res_len); +typedef int (*iommu_add_device_t)(void *domain, device_t dev, uint16_t rid); +typedef int (*iommu_remove_device_t)(void *dom, device_t dev, uint16_t rid); +typedef int (*iommu_invalidate_tlb_t)(void *dom); struct iommu_ops { iommu_init_func_t init; /* module wide */ @@ -67,10 +65,10 @@ void iommu_cleanup(void); void *iommu_host_domain(void); void *iommu_create_domain(vm_paddr_t maxaddr); void iommu_destroy_domain(void *dom); -void iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa, - size_t len); -void iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len); -void iommu_add_device(void *dom, uint16_t rid); -void iommu_remove_device(void *dom, uint16_t rid); -void iommu_invalidate_tlb(void *domain); +int iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa, + size_t len); +int iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len); +int iommu_add_device(void *dom, device_t dev, uint16_t rid); +int iommu_remove_device(void *dom, device_t dev, uint16_t rid); +int iommu_invalidate_tlb(void *domain); #endif diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c index 6bcd8f78bbab..2cb459fb848f 100644 --- a/sys/amd64/vmm/io/ppt.c +++ b/sys/amd64/vmm/io/ppt.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,13 +24,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> @@ -46,12 +41,12 @@ __FBSDID("$FreeBSD$"); #include <dev/pci/pcireg.h> #include <machine/resource.h> - #include <machine/vmm.h> #include <machine/vmm_dev.h> +#include <dev/vmm/vmm_ktr.h> + #include "vmm_lapic.h" -#include "vmm_ktr.h" #include "iommu.h" #include "ppt.h" @@ -156,10 +151,19 @@ static int ppt_attach(device_t dev) { struct pptdev *ppt; + uint16_t cmd, cmd1; + int error; ppt = device_get_softc(dev); - iommu_remove_device(iommu_host_domain(), pci_get_rid(dev)); + cmd1 = cmd = pci_read_config(dev, PCIR_COMMAND, 2); + cmd &= ~(PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); + pci_write_config(dev, PCIR_COMMAND, cmd, 2); + error = iommu_remove_device(iommu_host_domain(), dev, pci_get_rid(dev)); + if (error != 0) { + pci_write_config(dev, PCIR_COMMAND, cmd1, 2); + return (error); + } num_pptdevs++; TAILQ_INSERT_TAIL(&pptdev_list, ppt, next); ppt->dev = dev; @@ -174,15 +178,22 @@ static int ppt_detach(device_t dev) { struct pptdev *ppt; + int error; ppt = device_get_softc(dev); if (ppt->vm != NULL) return (EBUSY); + if (iommu_host_domain() != NULL) { + error = iommu_add_device(iommu_host_domain(), dev, + pci_get_rid(dev)); + } else { + error = 0; + } + if (error != 0) + return (error); num_pptdevs--; TAILQ_REMOVE(&pptdev_list, ppt, next); - pci_disable_busmaster(dev); - iommu_add_device(iommu_host_domain(), pci_get_rid(dev)); return (0); } @@ -257,7 +268,7 @@ ppt_teardown_msi(struct pptdev *ppt) if (res != NULL) bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); - + ppt->msi.res[i] = NULL; ppt->msi.cookie[i] = NULL; } @@ -268,7 +279,7 @@ ppt_teardown_msi(struct pptdev *ppt) ppt->msi.num_msgs = 0; } -static void +static void ppt_teardown_msix_intr(struct pptdev *ppt, int idx) { int rid; @@ -279,25 +290,25 @@ ppt_teardown_msix_intr(struct pptdev *ppt, int idx) res = ppt->msix.res[idx]; cookie = ppt->msix.cookie[idx]; - if (cookie != NULL) + if (cookie != NULL) bus_teardown_intr(ppt->dev, res, cookie); - if (res != NULL) + if (res != NULL) bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); ppt->msix.res[idx] = NULL; ppt->msix.cookie[idx] = NULL; } -static void +static void ppt_teardown_msix(struct pptdev *ppt) { int i; - if (ppt->msix.num_msgs == 0) + if (ppt->msix.num_msgs == 0) return; - for (i = 0; i < ppt->msix.num_msgs; i++) + for (i = 0; i < ppt->msix.num_msgs; i++) ppt_teardown_msix_intr(ppt, i); free(ppt->msix.res, M_PPTMSIX); @@ -307,14 +318,14 @@ ppt_teardown_msix(struct pptdev *ppt) pci_release_msi(ppt->dev); if (ppt->msix.msix_table_res) { - bus_release_resource(ppt->dev, SYS_RES_MEMORY, + bus_release_resource(ppt->dev, SYS_RES_MEMORY, ppt->msix.msix_table_rid, ppt->msix.msix_table_res); ppt->msix.msix_table_res = NULL; ppt->msix.msix_table_rid = 0; } if (ppt->msix.msix_pba_res) { - bus_release_resource(ppt->dev, SYS_RES_MEMORY, + bus_release_resource(ppt->dev, SYS_RES_MEMORY, ppt->msix.msix_pba_rid, ppt->msix.msix_pba_res); ppt->msix.msix_pba_res = NULL; @@ -379,11 +390,28 @@ ppt_pci_reset(device_t dev) pci_power_reset(dev); } +static uint16_t +ppt_bar_enables(struct pptdev *ppt) +{ + struct pci_map *pm; + uint16_t cmd; + + cmd = 0; + for (pm = pci_first_bar(ppt->dev); pm != NULL; pm = pci_next_bar(pm)) { + if (PCI_BAR_IO(pm->pm_value)) + cmd |= PCIM_CMD_PORTEN; + if (PCI_BAR_MEM(pm->pm_value)) + cmd |= PCIM_CMD_MEMEN; + } + return (cmd); +} + int ppt_assign_device(struct vm *vm, int bus, int slot, int func) { struct pptdev *ppt; int error; + uint16_t cmd; /* Passing NULL requires the device to be unowned. */ error = ppt_find(NULL, bus, slot, func, &ppt); @@ -393,8 +421,14 @@ ppt_assign_device(struct vm *vm, int bus, int slot, int func) pci_save_state(ppt->dev); ppt_pci_reset(ppt->dev); pci_restore_state(ppt->dev); + error = iommu_add_device(vm_iommu_domain(vm), ppt->dev, + pci_get_rid(ppt->dev)); + if (error != 0) + return (error); ppt->vm = vm; - iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); + cmd = pci_read_config(ppt->dev, PCIR_COMMAND, 2); + cmd |= PCIM_CMD_BUSMASTEREN | ppt_bar_enables(ppt); + pci_write_config(ppt->dev, PCIR_COMMAND, cmd, 2); return (0); } @@ -403,20 +437,25 @@ ppt_unassign_device(struct vm *vm, int bus, int slot, int func) { struct pptdev *ppt; int error; + uint16_t cmd; error = ppt_find(vm, bus, slot, func, &ppt); if (error) return (error); + cmd = pci_read_config(ppt->dev, PCIR_COMMAND, 2); + cmd &= ~(PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); + pci_write_config(ppt->dev, PCIR_COMMAND, cmd, 2); pci_save_state(ppt->dev); ppt_pci_reset(ppt->dev); pci_restore_state(ppt->dev); ppt_unmap_all_mmio(vm, ppt); ppt_teardown_msi(ppt); ppt_teardown_msix(ppt); - iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); + error = iommu_remove_device(vm_iommu_domain(vm), ppt->dev, + pci_get_rid(ppt->dev)); ppt->vm = NULL; - return (0); + return (error); } int @@ -439,6 +478,23 @@ ppt_unassign_all(struct vm *vm) return (0); } +static bool +ppt_valid_bar_mapping(struct pptdev *ppt, vm_paddr_t hpa, size_t len) +{ + struct pci_map *pm; + pci_addr_t base, size; + + for (pm = pci_first_bar(ppt->dev); pm != NULL; pm = pci_next_bar(pm)) { + if (!PCI_BAR_MEM(pm->pm_value)) + continue; + base = pm->pm_value & PCIM_BAR_MEM_BASE; + size = (pci_addr_t)1 << pm->pm_size; + if (hpa >= base && hpa + len <= base + size) + return (true); + } + return (false); +} + int ppt_map_mmio(struct vm *vm, int bus, int slot, int func, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) @@ -447,10 +503,17 @@ ppt_map_mmio(struct vm *vm, int bus, int slot, int func, struct pptseg *seg; struct pptdev *ppt; + if (len % PAGE_SIZE != 0 || len == 0 || gpa % PAGE_SIZE != 0 || + hpa % PAGE_SIZE != 0 || gpa + len < gpa || hpa + len < hpa) + return (EINVAL); + error = ppt_find(vm, bus, slot, func, &ppt); if (error) return (error); + if (!ppt_valid_bar_mapping(ppt, hpa, len)) + return (EINVAL); + for (i = 0; i < MAX_MMIOSEGS; i++) { seg = &ppt->mmio[i]; if (seg->len == 0) { @@ -520,7 +583,7 @@ pptintr(void *arg) } int -ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, +ppt_setup_msi(struct vm *vm, int bus, int slot, int func, uint64_t addr, uint64_t msg, int numvec) { int i, rid, flags; @@ -613,7 +676,7 @@ ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, } int -ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func, +ppt_setup_msix(struct vm *vm, int bus, int slot, int func, int idx, uint64_t addr, uint64_t msg, uint32_t vector_control) { struct pptdev *ppt; @@ -630,10 +693,10 @@ ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func, return (EBUSY); dinfo = device_get_ivars(ppt->dev); - if (!dinfo) + if (!dinfo) return (ENXIO); - /* + /* * First-time configuration: * Allocate the MSI-X table * Allocate the IRQ resources @@ -687,6 +750,9 @@ ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func, } } + if (idx >= ppt->msix.num_msgs) + return (EINVAL); + if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { /* Tear down the IRQ if it's already set up */ ppt_teardown_msix_intr(ppt, idx); diff --git a/sys/amd64/vmm/io/ppt.h b/sys/amd64/vmm/io/ppt.h index e6339f57b8ad..f97c399564d7 100644 --- a/sys/amd64/vmm/io/ppt.h +++ b/sys/amd64/vmm/io/ppt.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,8 +24,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _IO_PPT_H_ @@ -36,9 +34,9 @@ int ppt_map_mmio(struct vm *vm, int bus, int slot, int func, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); int ppt_unmap_mmio(struct vm *vm, int bus, int slot, int func, vm_paddr_t gpa, size_t len); -int ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, +int ppt_setup_msi(struct vm *vm, int bus, int slot, int func, uint64_t addr, uint64_t msg, int numvec); -int ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func, +int ppt_setup_msix(struct vm *vm, int bus, int slot, int func, int idx, uint64_t addr, uint64_t msg, uint32_t vector_control); int ppt_disable_msix(struct vm *vm, int bus, int slot, int func); int ppt_assigned_devices(struct vm *vm); diff --git a/sys/amd64/vmm/io/vatpic.c b/sys/amd64/vmm/io/vatpic.c index de34b82c0cff..a003cd7e8c07 100644 --- a/sys/amd64/vmm/io/vatpic.c +++ b/sys/amd64/vmm/io/vatpic.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com> * All rights reserved. @@ -27,8 +27,6 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include "opt_bhyve_snapshot.h" #include <sys/param.h> @@ -41,12 +39,12 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <x86/apicreg.h> -#include <dev/ic/i8259.h> - #include <machine/vmm.h> #include <machine/vmm_snapshot.h> -#include "vmm_ktr.h" +#include <dev/ic/i8259.h> +#include <dev/vmm/vmm_ktr.h> + #include "vmm_lapic.h" #include "vioapic.h" #include "vatpic.h" @@ -262,7 +260,7 @@ vatpic_notify_intr(struct vatpic *vatpic) * interrupt. */ atpic->intr_raised = true; - lapic_set_local_intr(vatpic->vm, -1, APIC_LVT_LINT0); + lapic_set_local_intr(vatpic->vm, NULL, APIC_LVT_LINT0); vioapic_pulse_irq(vatpic->vm, 0); } else { VATPIC_CTR3(vatpic, "atpic master no eligible interrupts " @@ -712,7 +710,7 @@ vatpic_write(struct vatpic *vatpic, struct atpic *atpic, bool in, int port, } int -vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, +vatpic_master_handler(struct vm *vm, bool in, int port, int bytes, uint32_t *eax) { struct vatpic *vatpic; @@ -732,7 +730,7 @@ vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, } int -vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, +vatpic_slave_handler(struct vm *vm, bool in, int port, int bytes, uint32_t *eax) { struct vatpic *vatpic; @@ -752,7 +750,7 @@ vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, } int -vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, +vatpic_elc_handler(struct vm *vm, bool in, int port, int bytes, uint32_t *eax) { struct vatpic *vatpic; @@ -809,6 +807,7 @@ vatpic_init(struct vm *vm) void vatpic_cleanup(struct vatpic *vatpic) { + mtx_destroy(&vatpic->mtx); free(vatpic, M_VATPIC); } diff --git a/sys/amd64/vmm/io/vatpic.h b/sys/amd64/vmm/io/vatpic.h index 8990a2a5fcb0..352c55a3089d 100644 --- a/sys/amd64/vmm/io/vatpic.h +++ b/sys/amd64/vmm/io/vatpic.h @@ -22,8 +22,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VATPIC_H_ @@ -41,11 +39,11 @@ struct vm_snapshot_meta; struct vatpic *vatpic_init(struct vm *vm); void vatpic_cleanup(struct vatpic *vatpic); -int vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port, - int bytes, uint32_t *eax); -int vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port, - int bytes, uint32_t *eax); -int vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, +int vatpic_master_handler(struct vm *vm, bool in, int port, int bytes, + uint32_t *eax); +int vatpic_slave_handler(struct vm *vm, bool in, int port, int bytes, + uint32_t *eax); +int vatpic_elc_handler(struct vm *vm, bool in, int port, int bytes, uint32_t *eax); int vatpic_assert_irq(struct vm *vm, int irq); diff --git a/sys/amd64/vmm/io/vatpit.c b/sys/amd64/vmm/io/vatpit.c index 27bbf227cbbb..31b6c2ad0f89 100644 --- a/sys/amd64/vmm/io/vatpit.c +++ b/sys/amd64/vmm/io/vatpit.c @@ -27,8 +27,6 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include "opt_bhyve_snapshot.h" #include <sys/param.h> @@ -43,7 +41,8 @@ __FBSDID("$FreeBSD$"); #include <machine/vmm.h> #include <machine/vmm_snapshot.h> -#include "vmm_ktr.h" +#include <dev/vmm/vmm_ktr.h> + #include "vatpic.h" #include "vioapic.h" #include "vatpit.h" @@ -336,8 +335,7 @@ vatpit_update_mode(struct vatpit *vatpit, uint8_t val) } int -vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *eax) +vatpit_handler(struct vm *vm, bool in, int port, int bytes, uint32_t *eax) { struct vatpit *vatpit; struct channel *c; @@ -419,7 +417,7 @@ vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, } int -vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, +vatpit_nmisc_handler(struct vm *vm, bool in, int port, int bytes, uint32_t *eax) { struct vatpit *vatpit; @@ -471,6 +469,7 @@ vatpit_cleanup(struct vatpit *vatpit) for (i = 0; i < 3; i++) callout_drain(&vatpit->channel[i].callout); + mtx_destroy(&vatpit->mtx); free(vatpit, M_VATPIT); } diff --git a/sys/amd64/vmm/io/vatpit.h b/sys/amd64/vmm/io/vatpit.h index 65e06ec9bf58..c18071069d3c 100644 --- a/sys/amd64/vmm/io/vatpit.h +++ b/sys/amd64/vmm/io/vatpit.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com> * Copyright (c) 2011 NetApp, Inc. @@ -25,14 +25,12 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VATPIT_H_ #define _VATPIT_H_ -#include <machine/timerreg.h> +#include <x86/timerreg.h> #define NMISC_PORT 0x61 @@ -41,10 +39,9 @@ struct vm_snapshot_meta; struct vatpit *vatpit_init(struct vm *vm); void vatpit_cleanup(struct vatpit *vatpit); -int vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, +int vatpit_handler(struct vm *vm, bool in, int port, int bytes, uint32_t *eax); +int vatpit_nmisc_handler(struct vm *vm, bool in, int port, int bytes, uint32_t *eax); -int vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port, - int bytes, uint32_t *eax); #ifdef BHYVE_SNAPSHOT int vatpit_snapshot(struct vatpit *vatpit, struct vm_snapshot_meta *meta); #endif diff --git a/sys/amd64/vmm/io/vhpet.c b/sys/amd64/vmm/io/vhpet.c index 530f5d49f8f1..88063f2952e5 100644 --- a/sys/amd64/vmm/io/vhpet.c +++ b/sys/amd64/vmm/io/vhpet.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com> * Copyright (c) 2013 Neel Natu <neel@freebsd.org> @@ -25,13 +25,9 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include "opt_bhyve_snapshot.h" #include <sys/param.h> @@ -41,19 +37,18 @@ __FBSDID("$FreeBSD$"); #include <sys/malloc.h> #include <sys/systm.h> -#include <dev/acpica/acpi_hpet.h> - #include <machine/vmm.h> #include <machine/vmm_dev.h> #include <machine/vmm_snapshot.h> +#include <dev/acpica/acpi_hpet.h> +#include <dev/vmm/vmm_ktr.h> + #include "vmm_lapic.h" #include "vatpic.h" #include "vioapic.h" #include "vhpet.h" -#include "vmm_ktr.h" - static MALLOC_DEFINE(M_VHPET, "vhpet", "bhyve virtual hpet"); #define HPET_FREQ 16777216 /* 16.7 (2^24) Mhz */ @@ -236,7 +231,7 @@ vhpet_timer_interrupt(struct vhpet *vhpet, int n) lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32, vhpet->timer[n].msireg & 0xffffffff); return; - } + } pin = vhpet_timer_ioapic_pin(vhpet, n); if (pin == 0) { @@ -472,7 +467,7 @@ vhpet_timer_update_config(struct vhpet *vhpet, int n, uint64_t data, } int -vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size, +vhpet_mmio_write(struct vcpu *vcpu, uint64_t gpa, uint64_t val, int size, void *arg) { struct vhpet *vhpet; @@ -481,7 +476,7 @@ vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size, sbintime_t now, *nowptr; int i, offset; - vhpet = vm_hpet(vm); + vhpet = vm_hpet(vcpu_vm(vcpu)); offset = gpa - VHPET_BASE; VHPET_LOCK(vhpet); @@ -498,7 +493,7 @@ vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size, if ((offset & 0x4) != 0) { mask <<= 32; data <<= 32; - } + } break; default: VM_CTR2(vhpet->vm, "hpet invalid mmio write: " @@ -622,14 +617,14 @@ done: } int -vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval, int size, +vhpet_mmio_read(struct vcpu *vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg) { int i, offset; struct vhpet *vhpet; uint64_t data; - vhpet = vm_hpet(vm); + vhpet = vm_hpet(vcpu_vm(vcpu)); offset = gpa - VHPET_BASE; VHPET_LOCK(vhpet); @@ -652,7 +647,7 @@ vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval, int size, if (offset == HPET_CAPABILITIES || offset == HPET_CAPABILITIES + 4) { data = vhpet_capabilities(); - goto done; + goto done; } if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) { @@ -754,6 +749,7 @@ vhpet_cleanup(struct vhpet *vhpet) for (i = 0; i < VHPET_NUM_TIMERS; i++) callout_drain(&vhpet->timer[i].callout); + mtx_destroy(&vhpet->mtx); free(vhpet, M_VHPET); } @@ -779,7 +775,7 @@ vhpet_snapshot(struct vhpet *vhpet, struct vm_snapshot_meta *meta) /* at restore time the countbase should have the value it had when the * snapshot was created; since the value is not directly kept in * vhpet->countbase, but rather computed relative to the current system - * uptime using countbase_sbt, save the value retured by vhpet_counter + * uptime using countbase_sbt, save the value returned by vhpet_counter */ if (meta->op == VM_SNAPSHOT_SAVE) countbase = vhpet_counter(vhpet, NULL); diff --git a/sys/amd64/vmm/io/vhpet.h b/sys/amd64/vmm/io/vhpet.h index 113683c09b33..4cc000ec70cf 100644 --- a/sys/amd64/vmm/io/vhpet.h +++ b/sys/amd64/vmm/io/vhpet.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com> * Copyright (c) 2013 Neel Natu <neel@freebsd.org> @@ -25,8 +25,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VHPET_H_ @@ -35,13 +33,14 @@ #define VHPET_BASE 0xfed00000 #define VHPET_SIZE 1024 +#ifdef _KERNEL struct vm_snapshot_meta; struct vhpet *vhpet_init(struct vm *vm); void vhpet_cleanup(struct vhpet *vhpet); -int vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, +int vhpet_mmio_write(struct vcpu *vcpu, uint64_t gpa, uint64_t val, int size, void *arg); -int vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *val, +int vhpet_mmio_read(struct vcpu *vcpu, uint64_t gpa, uint64_t *val, int size, void *arg); int vhpet_getcap(struct vm_hpet_cap *cap); #ifdef BHYVE_SNAPSHOT @@ -49,4 +48,6 @@ int vhpet_snapshot(struct vhpet *vhpet, struct vm_snapshot_meta *meta); int vhpet_restore_time(struct vhpet *vhpet); #endif +#endif /* _KERNEL */ + #endif /* _VHPET_H_ */ diff --git a/sys/amd64/vmm/io/vioapic.c b/sys/amd64/vmm/io/vioapic.c index 639c1b07eb08..7df6193d6dc0 100644 --- a/sys/amd64/vmm/io/vioapic.c +++ b/sys/amd64/vmm/io/vioapic.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com> * Copyright (c) 2013 Neel Natu <neel@freebsd.org> @@ -25,13 +25,9 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include "opt_bhyve_snapshot.h" #include <sys/param.h> @@ -46,7 +42,8 @@ __FBSDID("$FreeBSD$"); #include <machine/vmm.h> #include <machine/vmm_snapshot.h> -#include "vmm_ktr.h" +#include <dev/vmm/vmm_ktr.h> + #include "vmm_lapic.h" #include "vlapic.h" #include "vioapic.h" @@ -133,6 +130,15 @@ vioapic_send_intr(struct vioapic *vioapic, int pin) vector = low & IOART_INTVEC; dest = high >> APIC_ID_SHIFT; + /* + * Ideally we'd just call lapic_intr_msi() here with the + * constructed MSI instead of interpreting it for ourselves. + * But until/unless we support emulated IOMMUs with interrupt + * remapping, interpretation is simple. We just need to mask + * in the Extended Destination ID bits for the 15-bit + * enlightenment (http://david.woodhou.se/ExtDestId.pdf) + */ + dest |= ((high & APIC_EXT_ID_MASK) >> APIC_EXT_ID_SHIFT) << 8; vlapic_deliver_intr(vioapic->vm, level, dest, phys, delmode, vector); } @@ -237,7 +243,7 @@ vioapic_pulse_irq(struct vm *vm, int irq) * configuration. */ static void -vioapic_update_tmr(struct vm *vm, int vcpuid, void *arg) +vioapic_update_tmr(struct vcpu *vcpu, void *arg) { struct vioapic *vioapic; struct vlapic *vlapic; @@ -245,8 +251,8 @@ vioapic_update_tmr(struct vm *vm, int vcpuid, void *arg) int delmode, pin, vector; bool level, phys; - vlapic = vm_lapic(vm, vcpuid); - vioapic = vm_ioapic(vm); + vlapic = vm_lapic(vcpu); + vioapic = vm_ioapic(vcpu_vm(vcpu)); VIOAPIC_LOCK(vioapic); /* @@ -277,7 +283,7 @@ vioapic_update_tmr(struct vm *vm, int vcpuid, void *arg) } static uint32_t -vioapic_read(struct vioapic *vioapic, int vcpuid, uint32_t addr) +vioapic_read(struct vioapic *vioapic, struct vcpu *vcpu, uint32_t addr) { int regnum, pin, rshift; @@ -312,7 +318,8 @@ vioapic_read(struct vioapic *vioapic, int vcpuid, uint32_t addr) } static void -vioapic_write(struct vioapic *vioapic, int vcpuid, uint32_t addr, uint32_t data) +vioapic_write(struct vioapic *vioapic, struct vcpu *vcpu, uint32_t addr, + uint32_t data) { uint64_t data64, mask64; uint64_t last, changed; @@ -372,7 +379,7 @@ vioapic_write(struct vioapic *vioapic, int vcpuid, uint32_t addr, uint32_t data) "vlapic trigger-mode register", pin); VIOAPIC_UNLOCK(vioapic); allvcpus = vm_active_cpus(vioapic->vm); - (void)vm_smp_rendezvous(vioapic->vm, vcpuid, allvcpus, + (void)vm_smp_rendezvous(vcpu, allvcpus, vioapic_update_tmr, NULL); VIOAPIC_LOCK(vioapic); } @@ -392,7 +399,7 @@ vioapic_write(struct vioapic *vioapic, int vcpuid, uint32_t addr, uint32_t data) } static int -vioapic_mmio_rw(struct vioapic *vioapic, int vcpuid, uint64_t gpa, +vioapic_mmio_rw(struct vioapic *vioapic, struct vcpu *vcpu, uint64_t gpa, uint64_t *data, int size, bool doread) { uint64_t offset; @@ -417,10 +424,10 @@ vioapic_mmio_rw(struct vioapic *vioapic, int vcpuid, uint64_t gpa, vioapic->ioregsel = *data; } else { if (doread) { - *data = vioapic_read(vioapic, vcpuid, + *data = vioapic_read(vioapic, vcpu, vioapic->ioregsel); } else { - vioapic_write(vioapic, vcpuid, vioapic->ioregsel, + vioapic_write(vioapic, vcpu, vioapic->ioregsel, *data); } } @@ -430,31 +437,31 @@ vioapic_mmio_rw(struct vioapic *vioapic, int vcpuid, uint64_t gpa, } int -vioapic_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval, +vioapic_mmio_read(struct vcpu *vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg) { int error; struct vioapic *vioapic; - vioapic = vm_ioapic(vm); - error = vioapic_mmio_rw(vioapic, vcpuid, gpa, rval, size, true); + vioapic = vm_ioapic(vcpu_vm(vcpu)); + error = vioapic_mmio_rw(vioapic, vcpu, gpa, rval, size, true); return (error); } int -vioapic_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t wval, +vioapic_mmio_write(struct vcpu *vcpu, uint64_t gpa, uint64_t wval, int size, void *arg) { int error; struct vioapic *vioapic; - vioapic = vm_ioapic(vm); - error = vioapic_mmio_rw(vioapic, vcpuid, gpa, &wval, size, false); + vioapic = vm_ioapic(vcpu_vm(vcpu)); + error = vioapic_mmio_rw(vioapic, vcpu, gpa, &wval, size, false); return (error); } void -vioapic_process_eoi(struct vm *vm, int vcpuid, int vector) +vioapic_process_eoi(struct vm *vm, int vector) { struct vioapic *vioapic; int pin; @@ -507,6 +514,7 @@ void vioapic_cleanup(struct vioapic *vioapic) { + mtx_destroy(&vioapic->mtx); free(vioapic, M_VIOAPIC); } diff --git a/sys/amd64/vmm/io/vioapic.h b/sys/amd64/vmm/io/vioapic.h index 19dbffe3ec24..ac05d95357ff 100644 --- a/sys/amd64/vmm/io/vioapic.h +++ b/sys/amd64/vmm/io/vioapic.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com> * Copyright (c) 2013 Neel Natu <neel@freebsd.org> @@ -25,18 +25,17 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VIOAPIC_H_ #define _VIOAPIC_H_ -struct vm_snapshot_meta; - #define VIOAPIC_BASE 0xFEC00000 #define VIOAPIC_SIZE 4096 +#ifdef _KERNEL +struct vm_snapshot_meta; + struct vioapic *vioapic_init(struct vm *vm); void vioapic_cleanup(struct vioapic *vioapic); @@ -44,16 +43,18 @@ int vioapic_assert_irq(struct vm *vm, int irq); int vioapic_deassert_irq(struct vm *vm, int irq); int vioapic_pulse_irq(struct vm *vm, int irq); -int vioapic_mmio_write(void *vm, int vcpuid, uint64_t gpa, +int vioapic_mmio_write(struct vcpu *vcpu, uint64_t gpa, uint64_t wval, int size, void *arg); -int vioapic_mmio_read(void *vm, int vcpuid, uint64_t gpa, +int vioapic_mmio_read(struct vcpu *vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg); int vioapic_pincount(struct vm *vm); -void vioapic_process_eoi(struct vm *vm, int vcpuid, int vector); +void vioapic_process_eoi(struct vm *vm, int vector); #ifdef BHYVE_SNAPSHOT int vioapic_snapshot(struct vioapic *vioapic, struct vm_snapshot_meta *meta); #endif -#endif +#endif /* _KERNEL */ + +#endif /* _VIOAPIC_H_ */ diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c index 9599b4b4e62c..9879dfa164a4 100644 --- a/sys/amd64/vmm/io/vlapic.c +++ b/sys/amd64/vmm/io/vlapic.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -25,13 +25,9 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include "opt_bhyve_snapshot.h" #include <sys/param.h> @@ -47,12 +43,12 @@ __FBSDID("$FreeBSD$"); #include <machine/clock.h> #include <machine/smp.h> - #include <machine/vmm.h> #include <machine/vmm_snapshot.h> +#include <dev/vmm/vmm_ktr.h> + #include "vmm_lapic.h" -#include "vmm_ktr.h" #include "vmm_stat.h" #include "vlapic.h" @@ -61,7 +57,7 @@ __FBSDID("$FreeBSD$"); #define PRIO(x) ((x) >> 4) -#define VLAPIC_VERSION (16) +#define VLAPIC_VERSION (0x14) #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) @@ -84,6 +80,7 @@ __FBSDID("$FreeBSD$"); static void vlapic_set_error(struct vlapic *, uint32_t, bool); static void vlapic_callout_handler(void *arg); +static void vlapic_reset(struct vlapic *vlapic); static __inline uint32_t vlapic_get_id(struct vlapic *vlapic) @@ -386,7 +383,7 @@ vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) int idx; lapic = vlapic->apic_page; - lvtptr = vlapic_get_lvtptr(vlapic, offset); + lvtptr = vlapic_get_lvtptr(vlapic, offset); val = *lvtptr; idx = lvt_off_to_idx(offset); @@ -459,13 +456,13 @@ vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt) return (0); } if (vlapic_set_intr_ready(vlapic, vec, false)) - vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true); + vcpu_notify_event(vlapic->vcpu, true); break; case APIC_LVT_DM_NMI: - vm_inject_nmi(vlapic->vm, vlapic->vcpuid); + vm_inject_nmi(vlapic->vcpu); break; case APIC_LVT_DM_EXTINT: - vm_inject_extint(vlapic->vm, vlapic->vcpuid); + vm_inject_extint(vlapic->vcpu); break; default: // Other modes ignored @@ -587,20 +584,18 @@ vlapic_process_eoi(struct vlapic *vlapic) } isrptr[idx] &= ~(1 << bitpos); vector = i * 32 + bitpos; - VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "EOI vector %d", - vector); + VLAPIC_CTR1(vlapic, "EOI vector %d", vector); VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); vlapic->isrvec_stk_top--; vlapic_update_ppr(vlapic); if ((tmrptr[idx] & (1 << bitpos)) != 0) { - vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, - vector); + vioapic_process_eoi(vlapic->vm, vector); } return; } } - VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "Gratuitous EOI"); - vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1); + VLAPIC_CTR0(vlapic, "Gratuitous EOI"); + vmm_stat_incr(vlapic->vcpu, VLAPIC_GRATUITOUS_EOI, 1); } static __inline int @@ -636,7 +631,7 @@ vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error) return; if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) { - vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); + vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_ERROR, 1); } } @@ -650,7 +645,7 @@ vlapic_fire_timer(struct vlapic *vlapic) if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) { VLAPIC_CTR0(vlapic, "vlapic timer fired"); - vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); + vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_TIMER, 1); } } @@ -662,7 +657,7 @@ vlapic_fire_cmci(struct vlapic *vlapic) { if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) { - vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); + vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_CMC, 1); } } @@ -681,10 +676,10 @@ vlapic_trigger_lvt(struct vlapic *vlapic, int vector) */ switch (vector) { case APIC_LVT_LINT0: - vm_inject_extint(vlapic->vm, vlapic->vcpuid); + vm_inject_extint(vlapic->vcpu); break; case APIC_LVT_LINT1: - vm_inject_nmi(vlapic->vm, vlapic->vcpuid); + vm_inject_nmi(vlapic->vcpu); break; default: break; @@ -701,8 +696,8 @@ vlapic_trigger_lvt(struct vlapic *vlapic, int vector) case APIC_LVT_THERMAL: case APIC_LVT_CMCI: if (vlapic_fire_lvt(vlapic, vector)) { - vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, - LVTS_TRIGGERRED, vector, 1); + vmm_stat_array_incr(vlapic->vcpu, LVTS_TRIGGERRED, + vector, 1); } break; default: @@ -809,7 +804,7 @@ vlapic_icrtmr_write_handler(struct vlapic *vlapic) /* * This function populates 'dmask' with the set of vcpus that match the * addressing specified by the (dest, phys, lowprio) tuple. - * + * * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) * or xAPIC (8-bit) destination field. */ @@ -867,7 +862,7 @@ vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, CPU_ZERO(dmask); amask = vm_active_cpus(vm); CPU_FOREACH_ISSET(vcpuid, &amask) { - vlapic = vm_lapic(vm, vcpuid); + vlapic = vm_lapic(vm_vcpu(vm, vcpuid)); dfr = vlapic->apic_page->dfr; ldr = vlapic->apic_page->ldr; @@ -906,7 +901,8 @@ vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, } } -static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu"); +static VMM_STAT(VLAPIC_IPI_SEND, "ipis sent from vcpu"); +static VMM_STAT(VLAPIC_IPI_RECV, "ipis received by vcpu"); static void vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) @@ -914,8 +910,8 @@ vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) struct LAPIC *lapic = vlapic->apic_page; if (lapic->tpr != val) { - VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vlapic TPR changed " - "from %#x to %#x", lapic->tpr, val); + VLAPIC_CTR2(vlapic, "vlapic TPR changed from %#x to %#x", + lapic->tpr, val); lapic->tpr = val; vlapic_update_ppr(vlapic); } @@ -935,7 +931,7 @@ vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) uint8_t tpr; if (val & ~0xf) { - vm_inject_gp(vlapic->vm, vlapic->vcpuid); + vm_inject_gp(vlapic->vcpu); return; } @@ -952,18 +948,97 @@ vlapic_get_cr8(struct vlapic *vlapic) return (tpr >> 4); } +static bool +vlapic_is_icr_valid(uint64_t icrval) +{ + uint32_t mode = icrval & APIC_DELMODE_MASK; + uint32_t level = icrval & APIC_LEVEL_MASK; + uint32_t trigger = icrval & APIC_TRIGMOD_MASK; + uint32_t shorthand = icrval & APIC_DEST_MASK; + + switch (mode) { + case APIC_DELMODE_FIXED: + if (trigger == APIC_TRIGMOD_EDGE) + return (true); + /* + * AMD allows a level assert IPI and Intel converts a level + * assert IPI into an edge IPI. + */ + if (trigger == APIC_TRIGMOD_LEVEL && level == APIC_LEVEL_ASSERT) + return (true); + break; + case APIC_DELMODE_LOWPRIO: + case APIC_DELMODE_SMI: + case APIC_DELMODE_NMI: + case APIC_DELMODE_INIT: + if (trigger == APIC_TRIGMOD_EDGE && + (shorthand == APIC_DEST_DESTFLD || + shorthand == APIC_DEST_ALLESELF)) + return (true); + /* + * AMD allows a level assert IPI and Intel converts a level + * assert IPI into an edge IPI. + */ + if (trigger == APIC_TRIGMOD_LEVEL && + level == APIC_LEVEL_ASSERT && + (shorthand == APIC_DEST_DESTFLD || + shorthand == APIC_DEST_ALLESELF)) + return (true); + /* + * An level triggered deassert INIT is defined in the Intel + * Multiprocessor Specification and the Intel Software Developer + * Manual. Due to the MPS it's required to send a level assert + * INIT to a cpu and then a level deassert INIT. Some operating + * systems e.g. FreeBSD or Linux use that algorithm. According + * to the SDM a level deassert INIT is only supported by Pentium + * and P6 processors. It's always send to all cpus regardless of + * the destination or shorthand field. It resets the arbitration + * id register. This register is not software accessible and + * only required for the APIC bus arbitration. So, the level + * deassert INIT doesn't need any emulation and we should ignore + * it. The SDM also defines that newer processors don't support + * the level deassert INIT and it's not valid any more. As it's + * defined for older systems, it can't be invalid per se. + * Otherwise, backward compatibility would be broken. However, + * when returning false here, it'll be ignored which is the + * desired behaviour. + */ + if (mode == APIC_DELMODE_INIT && + trigger == APIC_TRIGMOD_LEVEL && + level == APIC_LEVEL_DEASSERT) + return (false); + break; + case APIC_DELMODE_STARTUP: + if (shorthand == APIC_DEST_DESTFLD || + shorthand == APIC_DEST_ALLESELF) + return (true); + break; + case APIC_DELMODE_RR: + /* Only available on AMD! */ + if (trigger == APIC_TRIGMOD_EDGE && + shorthand == APIC_DEST_DESTFLD) + return (true); + break; + case APIC_DELMODE_RESV: + return (false); + default: + __assert_unreachable(); + } + + return (false); +} + int vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) { int i; bool phys; - cpuset_t dmask; + cpuset_t dmask, ipimask; uint64_t icrval; - uint32_t dest, vec, mode; - struct vlapic *vlapic2; + uint32_t dest, vec, mode, shorthand; + struct vcpu *vcpu; struct vm_exit *vmexit; struct LAPIC *lapic; - uint16_t maxcpus; lapic = vlapic->apic_page; lapic->icr_lo &= ~APIC_DELSTAT_PEND; @@ -975,97 +1050,166 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) dest = icrval >> (32 + 24); vec = icrval & APIC_VECTOR_MASK; mode = icrval & APIC_DELMODE_MASK; + phys = (icrval & APIC_DESTMODE_LOG) == 0; + shorthand = icrval & APIC_DEST_MASK; + + VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); - if (mode == APIC_DELMODE_FIXED && vec < 16) { - vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false); - VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); + switch (shorthand) { + case APIC_DEST_DESTFLD: + vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, x2apic(vlapic)); + break; + case APIC_DEST_SELF: + CPU_SETOF(vlapic->vcpuid, &dmask); + break; + case APIC_DEST_ALLISELF: + dmask = vm_active_cpus(vlapic->vm); + break; + case APIC_DEST_ALLESELF: + dmask = vm_active_cpus(vlapic->vm); + CPU_CLR(vlapic->vcpuid, &dmask); + break; + default: + __assert_unreachable(); + } + + /* + * Ignore invalid combinations of the icr. + */ + if (!vlapic_is_icr_valid(icrval)) { + VLAPIC_CTR1(vlapic, "Ignoring invalid ICR %016lx", icrval); return (0); } - VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); + /* + * ipimask is a set of vCPUs needing userland handling of the current + * IPI. + */ + CPU_ZERO(&ipimask); - if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) { - switch (icrval & APIC_DEST_MASK) { - case APIC_DEST_DESTFLD: - phys = ((icrval & APIC_DESTMODE_LOG) == 0); - vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, - x2apic(vlapic)); - break; - case APIC_DEST_SELF: - CPU_SETOF(vlapic->vcpuid, &dmask); - break; - case APIC_DEST_ALLISELF: - dmask = vm_active_cpus(vlapic->vm); - break; - case APIC_DEST_ALLESELF: - dmask = vm_active_cpus(vlapic->vm); - CPU_CLR(vlapic->vcpuid, &dmask); - break; - default: - CPU_ZERO(&dmask); /* satisfy gcc */ - break; + switch (mode) { + case APIC_DELMODE_FIXED: + if (vec < 16) { + vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, + false); + VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); + return (0); } CPU_FOREACH_ISSET(i, &dmask) { - if (mode == APIC_DELMODE_FIXED) { - lapic_intr_edge(vlapic->vm, i, vec); - vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, - IPIS_SENT, i, 1); - VLAPIC_CTR2(vlapic, "vlapic sending ipi %d " - "to vcpuid %d", vec, i); - } else { - vm_inject_nmi(vlapic->vm, i); - VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi " - "to vcpuid %d", i); - } + vcpu = vm_vcpu(vlapic->vm, i); + lapic_intr_edge(vcpu, vec); + vmm_stat_incr(vlapic->vcpu, VLAPIC_IPI_SEND, 1); + vmm_stat_incr(vcpu, VLAPIC_IPI_RECV, 1); + VLAPIC_CTR2(vlapic, + "vlapic sending ipi %d to vcpuid %d", vec, i); } - return (0); /* handled completely in the kernel */ - } + break; + case APIC_DELMODE_NMI: + CPU_FOREACH_ISSET(i, &dmask) { + vcpu = vm_vcpu(vlapic->vm, i); + vm_inject_nmi(vcpu); + VLAPIC_CTR1(vlapic, + "vlapic sending ipi nmi to vcpuid %d", i); + } - maxcpus = vm_get_maxcpus(vlapic->vm); - if (mode == APIC_DELMODE_INIT) { - if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) - return (0); + break; + case APIC_DELMODE_INIT: + case APIC_DELMODE_STARTUP: + if (!vlapic->ipi_exit) { + if (!phys) + break; - if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) { - vlapic2 = vm_lapic(vlapic->vm, dest); + i = vm_apicid2vcpuid(vlapic->vm, dest); + if (i >= vm_get_maxcpus(vlapic->vm) || + i == vlapic->vcpuid) + break; - /* move from INIT to waiting-for-SIPI state */ - if (vlapic2->boot_state == BS_INIT) { - vlapic2->boot_state = BS_SIPI; - } + CPU_SETOF(i, &ipimask); - return (0); + break; } + + CPU_COPY(&dmask, &ipimask); + break; + default: + return (1); } - if (mode == APIC_DELMODE_STARTUP) { - if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) { - vlapic2 = vm_lapic(vlapic->vm, dest); + if (!CPU_EMPTY(&ipimask)) { + vmexit = vm_exitinfo(vlapic->vcpu); + vmexit->exitcode = VM_EXITCODE_IPI; + vmexit->u.ipi.mode = mode; + vmexit->u.ipi.vector = vec; + *vm_exitinfo_cpuset(vlapic->vcpu) = ipimask; - /* - * Ignore SIPIs in any state other than wait-for-SIPI - */ - if (vlapic2->boot_state != BS_SIPI) - return (0); + *retu = true; + } - vlapic2->boot_state = BS_RUNNING; + return (0); +} - *retu = true; - vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); - vmexit->exitcode = VM_EXITCODE_SPINUP_AP; - vmexit->u.spinup_ap.vcpu = dest; - vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT; +static void +vlapic_handle_init(struct vcpu *vcpu, void *arg) +{ + struct vlapic *vlapic = vm_lapic(vcpu); - return (0); + vlapic_reset(vlapic); +} + +int +vm_handle_ipi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) +{ + struct vlapic *vlapic = vm_lapic(vcpu); + cpuset_t *dmask = vm_exitinfo_cpuset(vcpu); + uint8_t vec = vme->u.ipi.vector; + + *retu = true; + switch (vme->u.ipi.mode) { + case APIC_DELMODE_INIT: { + cpuset_t active, reinit; + + active = vm_active_cpus(vcpu_vm(vcpu)); + CPU_AND(&reinit, &active, dmask); + if (!CPU_EMPTY(&reinit)) { + vm_smp_rendezvous(vcpu, reinit, vlapic_handle_init, + NULL); } + vm_await_start(vcpu_vm(vcpu), dmask); + + if (!vlapic->ipi_exit) + *retu = false; + + break; } + case APIC_DELMODE_STARTUP: + /* + * Ignore SIPIs in any state other than wait-for-SIPI + */ + *dmask = vm_start_cpus(vcpu_vm(vcpu), dmask); - /* - * This will cause a return to userland. - */ - return (1); + if (CPU_EMPTY(dmask)) { + *retu = false; + break; + } + + /* + * Old bhyve versions don't support the IPI + * exit. Translate it into the old style. + */ + if (!vlapic->ipi_exit) { + vme->exitcode = VM_EXITCODE_SPINUP_AP; + vme->u.spinup_ap.vcpu = CPU_FFS(dmask) - 1; + vme->u.spinup_ap.rip = vec << PAGE_SHIFT; + } + + break; + default: + __assert_unreachable(); + } + + return (0); } void @@ -1076,9 +1220,9 @@ vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val) KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode")); vec = val & 0xff; - lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); - vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT, - vlapic->vcpuid, 1); + lapic_intr_edge(vlapic->vcpu, vec); + vmm_stat_incr(vlapic->vcpu, VLAPIC_IPI_SEND, 1); + vmm_stat_incr(vlapic->vcpu, VLAPIC_IPI_RECV, 1); VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); } @@ -1107,7 +1251,7 @@ vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) if (vecptr != NULL) *vecptr = vector; return (1); - } else + } else break; } } @@ -1125,7 +1269,7 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector) return ((*vlapic->ops.intr_accepted)(vlapic, vector)); /* - * clear the ready bit for vector being accepted in irr + * clear the ready bit for vector being accepted in irr * and set the vector as in service in isr. */ idx = (vector / 32) * 4; @@ -1265,17 +1409,17 @@ vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, case APIC_OFFSET_ESR: *data = lapic->esr; break; - case APIC_OFFSET_ICR_LOW: + case APIC_OFFSET_ICR_LOW: *data = lapic->icr_lo; if (x2apic(vlapic)) *data |= (uint64_t)lapic->icr_hi << 32; break; - case APIC_OFFSET_ICR_HI: + case APIC_OFFSET_ICR_HI: *data = lapic->icr_hi; break; case APIC_OFFSET_CMCI_LVT: case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: - *data = vlapic_get_lvt(vlapic, offset); + *data = vlapic_get_lvt(vlapic, offset); #ifdef INVARIANTS reg = vlapic_get_lvtptr(vlapic, offset); KASSERT(*data == *reg, ("inconsistent lvt value at " @@ -1365,7 +1509,7 @@ vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, lapic->svr = data; vlapic_svr_write_handler(vlapic); break; - case APIC_OFFSET_ICR_LOW: + case APIC_OFFSET_ICR_LOW: lapic->icr_lo = data; if (x2apic(vlapic)) lapic->icr_hi = data >> 32; @@ -1434,11 +1578,6 @@ vlapic_reset(struct vlapic *vlapic) lapic->dcr_timer = 0; vlapic_dcr_write_handler(vlapic); - if (vlapic->vcpuid == 0) - vlapic->boot_state = BS_RUNNING; /* BSP */ - else - vlapic->boot_state = BS_INIT; /* AP */ - vlapic->svr_last = lapic->svr; } @@ -1467,6 +1606,8 @@ vlapic_init(struct vlapic *vlapic) if (vlapic->vcpuid == 0) vlapic->msr_apicbase |= APICBASE_BSP; + vlapic->ipi_exit = false; + vlapic_reset(vlapic); } @@ -1475,6 +1616,7 @@ vlapic_cleanup(struct vlapic *vlapic) { callout_drain(&vlapic->callout); + mtx_destroy(&vlapic->timer_mtx); } uint64_t @@ -1498,12 +1640,12 @@ vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) } void -vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) +vlapic_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state) { struct vlapic *vlapic; struct LAPIC *lapic; - vlapic = vm_lapic(vm, vcpuid); + vlapic = vm_lapic(vcpu); if (state == X2APIC_DISABLED) vlapic->msr_apicbase &= ~APICBASE_X2APIC; @@ -1536,6 +1678,7 @@ void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, int delmode, int vec) { + struct vcpu *vcpu; bool lowprio; int vcpuid; cpuset_t dmask; @@ -1556,10 +1699,11 @@ vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); CPU_FOREACH_ISSET(vcpuid, &dmask) { + vcpu = vm_vcpu(vm, vcpuid); if (delmode == IOART_DELEXINT) { - vm_inject_extint(vm, vcpuid); + vm_inject_extint(vcpu); } else { - lapic_set_intr(vm, vcpuid, vec, level); + lapic_set_intr(vcpu, vec, level); } } } @@ -1693,17 +1837,23 @@ vlapic_reset_callout(struct vlapic *vlapic, uint32_t ccr) int vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta) { - int i, ret; + int ret; + struct vcpu *vcpu; struct vlapic *vlapic; struct LAPIC *lapic; uint32_t ccr; + uint16_t i, maxcpus; KASSERT(vm != NULL, ("%s: arg was NULL", __func__)); ret = 0; - for (i = 0; i < VM_MAXCPU; i++) { - vlapic = vm_lapic(vm, i); + maxcpus = vm_get_maxcpus(vm); + for (i = 0; i < maxcpus; i++) { + vcpu = vm_vcpu(vm, i); + if (vcpu == NULL) + continue; + vlapic = vm_lapic(vcpu); /* snapshot the page first; timer period depends on icr_timer */ lapic = vlapic->apic_page; @@ -1729,7 +1879,6 @@ vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta) sizeof(vlapic->isrvec_stk), meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vlapic->isrvec_stk_top, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vlapic->boot_state, meta, ret, done); SNAPSHOT_BUF_OR_LEAVE(vlapic->lvt_last, sizeof(vlapic->lvt_last), diff --git a/sys/amd64/vmm/io/vlapic.h b/sys/amd64/vmm/io/vlapic.h index b87657c8bb51..569dce6b272f 100644 --- a/sys/amd64/vmm/io/vlapic.h +++ b/sys/amd64/vmm/io/vlapic.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,8 +24,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VLAPIC_H_ @@ -79,7 +77,7 @@ void vlapic_sync_tpr(struct vlapic *vlapic); uint64_t vlapic_get_apicbase(struct vlapic *vlapic); int vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val); -void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state s); +void vlapic_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state s); bool vlapic_enabled(struct vlapic *vlapic); void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, @@ -115,4 +113,6 @@ void vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val); int vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta); #endif +int vm_handle_ipi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu); + #endif /* _VLAPIC_H_ */ diff --git a/sys/amd64/vmm/io/vlapic_priv.h b/sys/amd64/vmm/io/vlapic_priv.h index fe7965cb65d7..1dce593b9444 100644 --- a/sys/amd64/vmm/io/vlapic_priv.h +++ b/sys/amd64/vmm/io/vlapic_priv.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Neel Natu <neel@freebsd.org> * All rights reserved. @@ -24,8 +24,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VLAPIC_PRIV_H_ @@ -125,12 +123,6 @@ do { \ VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]); \ } while (0) -enum boot_state { - BS_INIT, - BS_SIPI, - BS_RUNNING -}; - /* * 16 priority levels with at most one vector injected per level. */ @@ -138,6 +130,7 @@ enum boot_state { #define VLAPIC_MAXLVT_INDEX APIC_LVT_CMCI +struct vcpu; struct vlapic; struct vlapic_ops { @@ -151,6 +144,7 @@ struct vlapic_ops { struct vlapic { struct vm *vm; + struct vcpu *vcpu; int vcpuid; struct LAPIC *apic_page; struct vlapic_ops ops; @@ -173,7 +167,6 @@ struct vlapic { int isrvec_stk_top; uint64_t msr_apicbase; - enum boot_state boot_state; /* * Copies of some registers in the virtual APIC page. We do this for @@ -183,6 +176,8 @@ struct vlapic { */ uint32_t svr_last; uint32_t lvt_last[VLAPIC_MAXLVT_INDEX + 1]; + + bool ipi_exit; }; void vlapic_init(struct vlapic *vlapic); diff --git a/sys/amd64/vmm/io/vpmtmr.c b/sys/amd64/vmm/io/vpmtmr.c index f79e94f6d0fe..fb0775f12aab 100644 --- a/sys/amd64/vmm/io/vpmtmr.c +++ b/sys/amd64/vmm/io/vpmtmr.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2014, Neel Natu (neel@freebsd.org) * All rights reserved. @@ -27,8 +27,6 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include "opt_bhyve_snapshot.h" #include <sys/param.h> @@ -83,8 +81,7 @@ vpmtmr_cleanup(struct vpmtmr *vpmtmr) } int -vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *val) +vpmtmr_handler(struct vm *vm, bool in, int port, int bytes, uint32_t *val) { struct vpmtmr *vpmtmr; sbintime_t now, delta; diff --git a/sys/amd64/vmm/io/vpmtmr.h b/sys/amd64/vmm/io/vpmtmr.h index a10c0b4e8309..cdb87b9d77f6 100644 --- a/sys/amd64/vmm/io/vpmtmr.h +++ b/sys/amd64/vmm/io/vpmtmr.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2014 Neel Natu (neel@freebsd.org) * All rights reserved. @@ -24,8 +24,6 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VPMTMR_H_ @@ -39,8 +37,7 @@ struct vm_snapshot_meta; struct vpmtmr *vpmtmr_init(struct vm *vm); void vpmtmr_cleanup(struct vpmtmr *pmtmr); -int vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *val); +int vpmtmr_handler(struct vm *vm, bool in, int port, int bytes, uint32_t *val); #ifdef BHYVE_SNAPSHOT int vpmtmr_snapshot(struct vpmtmr *vpmtmr, struct vm_snapshot_meta *meta); diff --git a/sys/amd64/vmm/io/vrtc.c b/sys/amd64/vmm/io/vrtc.c index 65b2cd2cb39f..a56c77b7bf73 100644 --- a/sys/amd64/vmm/io/vrtc.c +++ b/sys/amd64/vmm/io/vrtc.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2014, Neel Natu (neel@freebsd.org) * All rights reserved. @@ -27,8 +27,6 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include "opt_bhyve_snapshot.h" #include <sys/param.h> @@ -46,7 +44,8 @@ __FBSDID("$FreeBSD$"); #include <isa/rtc.h> -#include "vmm_ktr.h" +#include <dev/vmm/vmm_ktr.h> + #include "vatpic.h" #include "vioapic.h" #include "vrtc.h" @@ -347,7 +346,7 @@ rtc_to_secs(struct vrtc *vrtc) /* * Ignore 'rtc->dow' because some guests like Linux don't bother - * setting it at all while others like OpenBSD/i386 set it incorrectly. + * setting it at all while others like OpenBSD/i386 set it incorrectly. * * clock_ct_to_ts() does not depend on 'ct.dow' anyways so ignore it. */ @@ -844,8 +843,7 @@ vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval) } int -vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *val) +vrtc_addr_handler(struct vm *vm, bool in, int port, int bytes, uint32_t *val) { struct vrtc *vrtc; @@ -867,8 +865,7 @@ vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, } int -vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *val) +vrtc_data_handler(struct vm *vm, bool in, int port, int bytes, uint32_t *val) { struct vrtc *vrtc; struct rtcdev *rtc; @@ -915,24 +912,24 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, } else { *val = *((uint8_t *)rtc + offset); } - VCPU_CTR2(vm, vcpuid, "Read value %#x from RTC offset %#x", + VM_CTR2(vm, "Read value %#x from RTC offset %#x", *val, offset); } else { switch (offset) { case 10: - VCPU_CTR1(vm, vcpuid, "RTC reg_a set to %#x", *val); + VM_CTR1(vm, "RTC reg_a set to %#x", *val); vrtc_set_reg_a(vrtc, *val); break; case 11: - VCPU_CTR1(vm, vcpuid, "RTC reg_b set to %#x", *val); + VM_CTR1(vm, "RTC reg_b set to %#x", *val); error = vrtc_set_reg_b(vrtc, *val); break; case 12: - VCPU_CTR1(vm, vcpuid, "RTC reg_c set to %#x (ignored)", + VM_CTR1(vm, "RTC reg_c set to %#x (ignored)", *val); break; case 13: - VCPU_CTR1(vm, vcpuid, "RTC reg_d set to %#x (ignored)", + VM_CTR1(vm, "RTC reg_d set to %#x (ignored)", *val); break; case 0: @@ -942,7 +939,7 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, *val &= 0x7f; /* FALLTHRU */ default: - VCPU_CTR2(vm, vcpuid, "RTC offset %#x set to %#x", + VM_CTR2(vm, "RTC offset %#x set to %#x", offset, *val); *((uint8_t *)rtc + offset) = *val; break; @@ -1020,6 +1017,7 @@ vrtc_cleanup(struct vrtc *vrtc) { callout_drain(&vrtc->callout); + mtx_destroy(&vrtc->mtx); free(vrtc, M_VRTC); } diff --git a/sys/amd64/vmm/io/vrtc.h b/sys/amd64/vmm/io/vrtc.h index 791fb7db3e26..ee596389b945 100644 --- a/sys/amd64/vmm/io/vrtc.h +++ b/sys/amd64/vmm/io/vrtc.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2014 Neel Natu (neel@freebsd.org) * All rights reserved. @@ -24,8 +24,6 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VRTC_H_ @@ -45,9 +43,9 @@ int vrtc_set_time(struct vm *vm, time_t secs); int vrtc_nvram_write(struct vm *vm, int offset, uint8_t value); int vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval); -int vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, +int vrtc_addr_handler(struct vm *vm, bool in, int port, int bytes, uint32_t *val); -int vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, +int vrtc_data_handler(struct vm *vm, bool in, int port, int bytes, uint32_t *val); #ifdef BHYVE_SNAPSHOT diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 47299301f5f0..473887240b9b 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,13 +24,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include "opt_bhyve_snapshot.h" #include <sys/param.h> @@ -46,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include <sys/rwlock.h> #include <sys/sched.h> #include <sys/smp.h> +#include <sys/sx.h> #include <sys/vnode.h> #include <vm/vm.h> @@ -70,12 +66,14 @@ __FBSDID("$FreeBSD$"); #include <x86/ifunc.h> #include <machine/vmm.h> -#include <machine/vmm_dev.h> #include <machine/vmm_instruction_emul.h> #include <machine/vmm_snapshot.h> +#include <dev/vmm/vmm_dev.h> +#include <dev/vmm/vmm_ktr.h> +#include <dev/vmm/vmm_mem.h> + #include "vmm_ioport.h" -#include "vmm_ktr.h" #include "vmm_host.h" #include "vmm_mem.h" #include "vmm_util.h" @@ -104,8 +102,11 @@ struct vlapic; struct vcpu { struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ enum vcpu_state state; /* (o) vcpu state */ + int vcpuid; /* (o) */ int hostcpu; /* (o) vcpu's host cpu */ int reqidle; /* (i) request vcpu to idle */ + struct vm *vm; /* (o) */ + void *cookie; /* (i) cpu-specific data */ struct vlapic *vlapic; /* (i) APIC device model */ enum x2apic_state x2apic_state; /* (i) APIC mode */ uint64_t exitintinfo; /* (i) events pending at VM exit */ @@ -119,38 +120,27 @@ struct vcpu { uint64_t guest_xcr0; /* (i) guest %xcr0 register */ void *stats; /* (a,i) statistics */ struct vm_exit exitinfo; /* (x) exit reason and collateral */ + cpuset_t exitinfo_cpuset; /* (x) storage for vmexit handlers */ uint64_t nextrip; /* (x) next instruction to execute */ uint64_t tsc_offset; /* (o) TSC offsetting */ }; -#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) +#define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) -struct mem_seg { - size_t len; - bool sysmem; - struct vm_object *object; -}; -#define VM_MAX_MEMSEGS 4 - -struct mem_map { - vm_paddr_t gpa; - size_t len; - vm_ooffset_t segoff; - int segid; - int prot; - int flags; -}; -#define VM_MAX_MEMMAPS 8 - /* * Initialization: * (o) initialized the first time the VM is created * (i) initialized when VM is created and when it is reinitialized * (x) initialized before use + * + * Locking: + * [m] mem_segs_lock + * [r] rendezvous_mtx + * [v] reads require one frozen vcpu, writes require freezing all vcpus */ struct vm { void *cookie; /* (i) cpu-specific data */ @@ -163,26 +153,42 @@ struct vm { struct vrtc *vrtc; /* (o) virtual RTC */ volatile cpuset_t active_cpus; /* (i) active vcpus */ volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ + cpuset_t startup_cpus; /* (i) [r] waiting for startup */ int suspend; /* (i) stop VM execution */ + bool dying; /* (o) is dying */ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ - cpuset_t rendezvous_req_cpus; /* (x) rendezvous requested */ - cpuset_t rendezvous_done_cpus; /* (x) rendezvous finished */ - void *rendezvous_arg; /* (x) rendezvous func/arg */ + cpuset_t rendezvous_req_cpus; /* (x) [r] rendezvous requested */ + cpuset_t rendezvous_done_cpus; /* (x) [r] rendezvous finished */ + void *rendezvous_arg; /* (x) [r] rendezvous func/arg */ vm_rendezvous_func_t rendezvous_func; struct mtx rendezvous_mtx; /* (o) rendezvous lock */ - struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ - struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ - struct vmspace *vmspace; /* (o) guest's address space */ + struct vm_mem mem; /* (i) [m+v] guest memory */ char name[VM_MAX_NAMELEN+1]; /* (o) virtual machine name */ - struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ + struct vcpu **vcpu; /* (o) guest vcpus */ /* The following describe the vm cpu topology */ uint16_t sockets; /* (o) num of sockets */ uint16_t cores; /* (o) num of cores/socket */ uint16_t threads; /* (o) num of threads/core */ uint16_t maxcpus; /* (o) max pluggable cpus */ + struct sx vcpus_init_lock; /* (o) */ }; +#define VMM_CTR0(vcpu, format) \ + VCPU_CTR0((vcpu)->vm, (vcpu)->vcpuid, format) + +#define VMM_CTR1(vcpu, format, p1) \ + VCPU_CTR1((vcpu)->vm, (vcpu)->vcpuid, format, p1) + +#define VMM_CTR2(vcpu, format, p1, p2) \ + VCPU_CTR2((vcpu)->vm, (vcpu)->vcpuid, format, p1, p2) + +#define VMM_CTR3(vcpu, format, p1, p2, p3) \ + VCPU_CTR3((vcpu)->vm, (vcpu)->vcpuid, format, p1, p2, p3) + +#define VMM_CTR4(vcpu, format, p1, p2, p3, p4) \ + VCPU_CTR4((vcpu)->vm, (vcpu)->vcpuid, format, p1, p2, p3, p4) + static int vmm_initialized; static void vmmops_panic(void); @@ -194,7 +200,7 @@ vmmops_panic(void) } #define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \ - DEFINE_IFUNC(static, ret_type, vmmops_##opname, args) \ + DEFINE_IFUNC(, ret_type, vmmops_##opname, args) \ { \ if (vmm_is_intel()) \ return (vmm_ops_intel.opname); \ @@ -206,37 +212,32 @@ vmmops_panic(void) DEFINE_VMMOPS_IFUNC(int, modinit, (int ipinum)) DEFINE_VMMOPS_IFUNC(int, modcleanup, (void)) +DEFINE_VMMOPS_IFUNC(void, modsuspend, (void)) DEFINE_VMMOPS_IFUNC(void, modresume, (void)) DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap)) -DEFINE_VMMOPS_IFUNC(int, run, (void *vmi, int vcpu, register_t rip, - struct pmap *pmap, struct vm_eventinfo *info)) +DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t rip, struct pmap *pmap, + struct vm_eventinfo *info)) DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi)) -DEFINE_VMMOPS_IFUNC(int, getreg, (void *vmi, int vcpu, int num, - uint64_t *retval)) -DEFINE_VMMOPS_IFUNC(int, setreg, (void *vmi, int vcpu, int num, - uint64_t val)) -DEFINE_VMMOPS_IFUNC(int, getdesc, (void *vmi, int vcpu, int num, - struct seg_desc *desc)) -DEFINE_VMMOPS_IFUNC(int, setdesc, (void *vmi, int vcpu, int num, - struct seg_desc *desc)) -DEFINE_VMMOPS_IFUNC(int, getcap, (void *vmi, int vcpu, int num, int *retval)) -DEFINE_VMMOPS_IFUNC(int, setcap, (void *vmi, int vcpu, int num, int val)) +DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, + int vcpu_id)) +DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui)) +DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)) +DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val)) +DEFINE_VMMOPS_IFUNC(int, getdesc, (void *vcpui, int num, struct seg_desc *desc)) +DEFINE_VMMOPS_IFUNC(int, setdesc, (void *vcpui, int num, struct seg_desc *desc)) +DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval)) +DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val)) DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, vm_offset_t max)) DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace)) -DEFINE_VMMOPS_IFUNC(struct vlapic *, vlapic_init, (void *vmi, int vcpu)) -DEFINE_VMMOPS_IFUNC(void, vlapic_cleanup, (void *vmi, struct vlapic *vlapic)) +DEFINE_VMMOPS_IFUNC(struct vlapic *, vlapic_init, (void *vcpui)) +DEFINE_VMMOPS_IFUNC(void, vlapic_cleanup, (struct vlapic *vlapic)) #ifdef BHYVE_SNAPSHOT -DEFINE_VMMOPS_IFUNC(int, snapshot, (void *vmi, struct vm_snapshot_meta - *meta)) -DEFINE_VMMOPS_IFUNC(int, vmcx_snapshot, (void *vmi, struct vm_snapshot_meta - *meta, int vcpu)) -DEFINE_VMMOPS_IFUNC(int, restore_tsc, (void *vmi, int vcpuid, uint64_t now)) +DEFINE_VMMOPS_IFUNC(int, vcpu_snapshot, (void *vcpui, + struct vm_snapshot_meta *meta)) +DEFINE_VMMOPS_IFUNC(int, restore_tsc, (void *vcpui, uint64_t now)) #endif -#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) -#define fpu_stop_emulating() clts() - SDT_PROVIDER_DEFINE(vmm); static MALLOC_DEFINE(M_VM, "vm", "vm"); @@ -269,10 +270,43 @@ static int trap_wbinvd; SYSCTL_INT(_hw_vmm, OID_AUTO, trap_wbinvd, CTLFLAG_RDTUN, &trap_wbinvd, 0, "WBINVD triggers a VM-exit"); -static void vm_free_memmap(struct vm *vm, int ident); -static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); +u_int vm_maxcpu; +SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, + &vm_maxcpu, 0, "Maximum number of vCPUs"); + static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr); +/* global statistics */ +VMM_STAT(VCPU_MIGRATIONS, "vcpu migration across host cpus"); +VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); +VMM_STAT(VMEXIT_EXTINT, "vm exits due to external interrupt"); +VMM_STAT(VMEXIT_HLT, "number of times hlt was intercepted"); +VMM_STAT(VMEXIT_CR_ACCESS, "number of times %cr access was intercepted"); +VMM_STAT(VMEXIT_RDMSR, "number of times rdmsr was intercepted"); +VMM_STAT(VMEXIT_WRMSR, "number of times wrmsr was intercepted"); +VMM_STAT(VMEXIT_MTRAP, "number of monitor trap exits"); +VMM_STAT(VMEXIT_PAUSE, "number of times pause was intercepted"); +VMM_STAT(VMEXIT_INTR_WINDOW, "vm exits due to interrupt window opening"); +VMM_STAT(VMEXIT_NMI_WINDOW, "vm exits due to nmi window opening"); +VMM_STAT(VMEXIT_INOUT, "number of times in/out was intercepted"); +VMM_STAT(VMEXIT_CPUID, "number of times cpuid was intercepted"); +VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits due to nested page fault"); +VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation"); +VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason"); +VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit"); +VMM_STAT(VMEXIT_REQIDLE, "number of times idle requested at exit"); +VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace"); +VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit"); +VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions"); + +/* + * Upper limit on vm_maxcpu. Limited by use of uint16_t types for CPU + * counts as well as range of vpid values for VT-x and by the capacity + * of cpuset_t masks. The call to new_unrhdr() in vpid_init() in + * vmx.c requires 'vm_maxcpu + 1 <= 0xffff', hence the '- 1' below. + */ +#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) + #ifdef KTR static const char * vcpu_state2str(enum vcpu_state state) @@ -294,40 +328,45 @@ vcpu_state2str(enum vcpu_state state) #endif static void -vcpu_cleanup(struct vm *vm, int i, bool destroy) +vcpu_cleanup(struct vcpu *vcpu, bool destroy) { - struct vcpu *vcpu = &vm->vcpu[i]; - - vmmops_vlapic_cleanup(vm->cookie, vcpu->vlapic); + vmmops_vlapic_cleanup(vcpu->vlapic); + vmmops_vcpu_cleanup(vcpu->cookie); + vcpu->cookie = NULL; if (destroy) { - vmm_stat_free(vcpu->stats); + vmm_stat_free(vcpu->stats); fpu_save_area_free(vcpu->guestfpu); + vcpu_lock_destroy(vcpu); + free(vcpu, M_VM); } } -static void -vcpu_init(struct vm *vm, int vcpu_id, bool create) +static struct vcpu * +vcpu_alloc(struct vm *vm, int vcpu_id) { struct vcpu *vcpu; KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, ("vcpu_init: invalid vcpu %d", vcpu_id)); - - vcpu = &vm->vcpu[vcpu_id]; - - if (create) { - KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already " - "initialized", vcpu_id)); - vcpu_lock_init(vcpu); - vcpu->state = VCPU_IDLE; - vcpu->hostcpu = NOCPU; - vcpu->guestfpu = fpu_save_area_alloc(); - vcpu->stats = vmm_stat_alloc(); - vcpu->tsc_offset = 0; - } - vcpu->vlapic = vmmops_vlapic_init(vm->cookie, vcpu_id); - vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); + vcpu = malloc(sizeof(*vcpu), M_VM, M_WAITOK | M_ZERO); + vcpu_lock_init(vcpu); + vcpu->state = VCPU_IDLE; + vcpu->hostcpu = NOCPU; + vcpu->vcpuid = vcpu_id; + vcpu->vm = vm; + vcpu->guestfpu = fpu_save_area_alloc(); + vcpu->stats = vmm_stat_alloc(); + vcpu->tsc_offset = 0; + return (vcpu); +} + +static void +vcpu_init(struct vcpu *vcpu) +{ + vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); + vcpu->vlapic = vmmops_vlapic_init(vcpu->cookie); + vm_set_x2apic_state(vcpu, X2APIC_DISABLED); vcpu->reqidle = 0; vcpu->exitintinfo = 0; vcpu->nmi_pending = 0; @@ -339,39 +378,46 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create) } int -vcpu_trace_exceptions(struct vm *vm, int vcpuid) +vcpu_trace_exceptions(struct vcpu *vcpu) { return (trace_guest_exceptions); } int -vcpu_trap_wbinvd(struct vm *vm, int vcpuid) +vcpu_trap_wbinvd(struct vcpu *vcpu) { return (trap_wbinvd); } struct vm_exit * -vm_exitinfo(struct vm *vm, int cpuid) +vm_exitinfo(struct vcpu *vcpu) { - struct vcpu *vcpu; - - if (cpuid < 0 || cpuid >= vm->maxcpus) - panic("vm_exitinfo: invalid cpuid %d", cpuid); - - vcpu = &vm->vcpu[cpuid]; - return (&vcpu->exitinfo); } +cpuset_t * +vm_exitinfo_cpuset(struct vcpu *vcpu) +{ + return (&vcpu->exitinfo_cpuset); +} + static int vmm_init(void) { - int error; - if (!vmm_is_hw_supported()) return (ENXIO); + vm_maxcpu = mp_ncpus; + TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); + + if (vm_maxcpu > VM_MAXCPU) { + printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); + vm_maxcpu = VM_MAXCPU; + } + if (vm_maxcpu == 0) + vm_maxcpu = 1; + vmm_host_state_init(); vmm_ipinum = lapic_ipi_alloc(pti ? &IDTVEC(justreturn1_pti) : @@ -379,10 +425,7 @@ vmm_init(void) if (vmm_ipinum < 0) vmm_ipinum = IPI_AST; - error = vmm_mem_init(); - if (error) - return (error); - + vmm_suspend_p = vmmops_modsuspend; vmm_resume_p = vmmops_modresume; return (vmmops_modinit(vmm_ipinum)); @@ -396,10 +439,14 @@ vmm_handler(module_t mod, int what, void *arg) switch (what) { case MOD_LOAD: if (vmm_is_hw_supported()) { - vmmdev_init(); + error = vmmdev_init(); + if (error != 0) + break; error = vmm_init(); if (error == 0) vmm_initialized = 1; + else + (void)vmmdev_cleanup(); } else { error = ENXIO; } @@ -408,6 +455,7 @@ vmm_handler(module_t mod, int what, void *arg) if (vmm_is_hw_supported()) { error = vmmdev_cleanup(); if (error == 0) { + vmm_suspend_p = NULL; vmm_resume_p = NULL; iommu_cleanup(); if (vmm_ipinum != IPI_AST) @@ -442,16 +490,15 @@ static moduledata_t vmm_kmod = { * * - VT-x initialization requires smp_rendezvous() and therefore must happen * after SMP is fully functional (after SI_SUB_SMP). + * - vmm device initialization requires an initialized devfs. */ -DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); +DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); MODULE_VERSION(vmm, 1); static void vm_init(struct vm *vm, bool create) { - int i; - - vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); + vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm))); vm->iommu = NULL; vm->vioapic = vioapic_init(vm); vm->vhpet = vhpet_init(vm); @@ -463,12 +510,67 @@ vm_init(struct vm *vm, bool create) CPU_ZERO(&vm->active_cpus); CPU_ZERO(&vm->debug_cpus); + CPU_ZERO(&vm->startup_cpus); vm->suspend = 0; CPU_ZERO(&vm->suspended_cpus); - for (i = 0; i < vm->maxcpus; i++) - vcpu_init(vm, i, create); + if (!create) { + for (int i = 0; i < vm->maxcpus; i++) { + if (vm->vcpu[i] != NULL) + vcpu_init(vm->vcpu[i]); + } + } +} + +void +vm_disable_vcpu_creation(struct vm *vm) +{ + sx_xlock(&vm->vcpus_init_lock); + vm->dying = true; + sx_xunlock(&vm->vcpus_init_lock); +} + +struct vcpu * +vm_alloc_vcpu(struct vm *vm, int vcpuid) +{ + struct vcpu *vcpu; + + if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) + return (NULL); + + vcpu = (struct vcpu *) + atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); + if (__predict_true(vcpu != NULL)) + return (vcpu); + + sx_xlock(&vm->vcpus_init_lock); + vcpu = vm->vcpu[vcpuid]; + if (vcpu == NULL && !vm->dying) { + vcpu = vcpu_alloc(vm, vcpuid); + vcpu_init(vcpu); + + /* + * Ensure vCPU is fully created before updating pointer + * to permit unlocked reads above. + */ + atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], + (uintptr_t)vcpu); + } + sx_xunlock(&vm->vcpus_init_lock); + return (vcpu); +} + +void +vm_lock_vcpus(struct vm *vm) +{ + sx_xlock(&vm->vcpus_init_lock); +} + +void +vm_unlock_vcpus(struct vm *vm) +{ + sx_unlock(&vm->vcpus_init_lock); } /* @@ -481,7 +583,7 @@ int vm_create(const char *name, struct vm **retvm) { struct vm *vm; - struct vmspace *vmspace; + int error; /* * If vmm.ko could not be successfully initialized then don't attempt @@ -494,19 +596,22 @@ vm_create(const char *name, struct vm **retvm) VM_MAX_NAMELEN + 1) return (EINVAL); - vmspace = vmmops_vmspace_alloc(0, VM_MAXUSER_ADDRESS_LA48); - if (vmspace == NULL) - return (ENOMEM); - vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); + error = vm_mem_init(&vm->mem, 0, VM_MAXUSER_ADDRESS_LA48); + if (error != 0) { + free(vm, M_VM); + return (error); + } strcpy(vm->name, name); - vm->vmspace = vmspace; mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); + sx_init(&vm->vcpus_init_lock, "vm vcpus"); + vm->vcpu = malloc(sizeof(*vm->vcpu) * vm_maxcpu, M_VM, M_WAITOK | + M_ZERO); vm->sockets = 1; vm->cores = cores_per_package; /* XXX backwards compatibility */ vm->threads = threads_per_core; /* XXX backwards compatibility */ - vm->maxcpus = VM_MAXCPU; /* XXX temp to keep code working */ + vm->maxcpus = vm_maxcpu; vm_init(vm, true); @@ -532,25 +637,24 @@ vm_get_maxcpus(struct vm *vm) int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, - uint16_t threads, uint16_t maxcpus) + uint16_t threads, uint16_t maxcpus __unused) { - if (maxcpus != 0) - return (EINVAL); /* XXX remove when supported */ + /* Ignore maxcpus. */ if ((sockets * cores * threads) > vm->maxcpus) return (EINVAL); - /* XXX need to check sockets * cores * threads == vCPU, how? */ vm->sockets = sockets; vm->cores = cores; vm->threads = threads; - vm->maxcpus = VM_MAXCPU; /* XXX temp to keep code working */ return(0); } static void vm_cleanup(struct vm *vm, bool destroy) { - struct mem_map *mm; - int i; + if (destroy) + vm_xlock_memsegs(vm); + else + vm_assert_memseg_xlocked(vm); ppt_unassign_all(vm); @@ -567,31 +671,21 @@ vm_cleanup(struct vm *vm, bool destroy) vatpic_cleanup(vm->vatpic); vioapic_cleanup(vm->vioapic); - for (i = 0; i < vm->maxcpus; i++) - vcpu_cleanup(vm, i, destroy); + for (int i = 0; i < vm->maxcpus; i++) { + if (vm->vcpu[i] != NULL) + vcpu_cleanup(vm->vcpu[i], destroy); + } vmmops_cleanup(vm->cookie); - /* - * System memory is removed from the guest address space only when - * the VM is destroyed. This is because the mapping remains the same - * across VM reset. - * - * Device memory can be relocated by the guest (e.g. using PCI BARs) - * so those mappings are removed on a VM reset. - */ - for (i = 0; i < VM_MAX_MEMMAPS; i++) { - mm = &vm->mem_maps[i]; - if (destroy || !sysmem_mapping(vm, mm)) - vm_free_memmap(vm, i); - } + vm_mem_cleanup(vm); if (destroy) { - for (i = 0; i < VM_MAX_MEMSEGS; i++) - vm_free_memseg(vm, i); + vm_mem_destroy(vm); - vmmops_vmspace_free(vm->vmspace); - vm->vmspace = NULL; + free(vm->vcpu, M_VM); + sx_destroy(&vm->vcpus_init_lock); + mtx_destroy(&vm->rendezvous_mtx); } } @@ -632,7 +726,7 @@ vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) { vm_object_t obj; - if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) + if ((obj = vmm_mmio_alloc(vm_vmspace(vm), gpa, len, hpa)) == NULL) return (ENOMEM); else return (0); @@ -642,317 +736,87 @@ int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) { - vmm_mmio_free(vm->vmspace, gpa, len); + vmm_mmio_free(vm_vmspace(vm), gpa, len); return (0); } -/* - * Return 'true' if 'gpa' is allocated in the guest address space. - * - * This function is called in the context of a running vcpu which acts as - * an implicit lock on 'vm->mem_maps[]'. - */ -bool -vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa) -{ - struct mem_map *mm; - int i; - -#ifdef INVARIANTS - int hostcpu, state; - state = vcpu_get_state(vm, vcpuid, &hostcpu); - KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, - ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); -#endif - - for (i = 0; i < VM_MAX_MEMMAPS; i++) { - mm = &vm->mem_maps[i]; - if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) - return (true); /* 'gpa' is sysmem or devmem */ - } - - if (ppt_is_mmio(vm, gpa)) - return (true); /* 'gpa' is pci passthru mmio */ - - return (false); -} - -int -vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) -{ - struct mem_seg *seg; - vm_object_t obj; - - if (ident < 0 || ident >= VM_MAX_MEMSEGS) - return (EINVAL); - - if (len == 0 || (len & PAGE_MASK)) - return (EINVAL); - - seg = &vm->mem_segs[ident]; - if (seg->object != NULL) { - if (seg->len == len && seg->sysmem == sysmem) - return (EEXIST); - else - return (EINVAL); - } - - obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); - if (obj == NULL) - return (ENOMEM); - - seg->len = len; - seg->object = obj; - seg->sysmem = sysmem; - return (0); -} - -int -vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, - vm_object_t *objptr) -{ - struct mem_seg *seg; - - if (ident < 0 || ident >= VM_MAX_MEMSEGS) - return (EINVAL); - - seg = &vm->mem_segs[ident]; - if (len) - *len = seg->len; - if (sysmem) - *sysmem = seg->sysmem; - if (objptr) - *objptr = seg->object; - return (0); -} - -void -vm_free_memseg(struct vm *vm, int ident) -{ - struct mem_seg *seg; - - KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, - ("%s: invalid memseg ident %d", __func__, ident)); - - seg = &vm->mem_segs[ident]; - if (seg->object != NULL) { - vm_object_deallocate(seg->object); - bzero(seg, sizeof(struct mem_seg)); - } -} - -int -vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, - size_t len, int prot, int flags) +static int +vm_iommu_map(struct vm *vm) { - struct mem_seg *seg; - struct mem_map *m, *map; - vm_ooffset_t last; - int i, error; - - if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) - return (EINVAL); - - if (flags & ~VM_MEMMAP_F_WIRED) - return (EINVAL); - - if (segid < 0 || segid >= VM_MAX_MEMSEGS) - return (EINVAL); - - seg = &vm->mem_segs[segid]; - if (seg->object == NULL) - return (EINVAL); + pmap_t pmap; + vm_paddr_t gpa, hpa; + struct vm_mem_map *mm; + int error, i; - last = first + len; - if (first < 0 || first >= last || last > seg->len) - return (EINVAL); + sx_assert(&vm->mem.mem_segs_lock, SX_LOCKED); - if ((gpa | first | last) & PAGE_MASK) - return (EINVAL); - - map = NULL; + pmap = vmspace_pmap(vm_vmspace(vm)); for (i = 0; i < VM_MAX_MEMMAPS; i++) { - m = &vm->mem_maps[i]; - if (m->len == 0) { - map = m; - break; - } - } + if (!vm_memseg_sysmem(vm, i)) + continue; - if (map == NULL) - return (ENOSPC); + mm = &vm->mem.mem_maps[i]; + KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0, + ("iommu map found invalid memmap %#lx/%#lx/%#x", + mm->gpa, mm->len, mm->flags)); + if ((mm->flags & VM_MEMMAP_F_WIRED) == 0) + continue; + mm->flags |= VM_MEMMAP_F_IOMMU; - error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, - len, 0, VMFS_NO_SPACE, prot, prot, 0); - if (error != KERN_SUCCESS) - return (EFAULT); + for (gpa = mm->gpa; gpa < mm->gpa + mm->len; gpa += PAGE_SIZE) { + hpa = pmap_extract(pmap, gpa); - vm_object_reference(seg->object); + /* + * All mappings in the vmm vmspace must be + * present since they are managed by vmm in this way. + * Because we are in pass-through mode, the + * mappings must also be wired. This implies + * that all pages must be mapped and wired, + * allowing to use pmap_extract() and avoiding the + * need to use vm_gpa_hold_global(). + * + * This could change if/when we start + * supporting page faults on IOMMU maps. + */ + KASSERT(vm_page_wired(PHYS_TO_VM_PAGE(hpa)), + ("vm_iommu_map: vm %p gpa %jx hpa %jx not wired", + vm, (uintmax_t)gpa, (uintmax_t)hpa)); - if (flags & VM_MEMMAP_F_WIRED) { - error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, - VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); - if (error != KERN_SUCCESS) { - vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); - return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : - EFAULT); + iommu_create_mapping(vm->iommu, gpa, hpa, PAGE_SIZE); } } - map->gpa = gpa; - map->len = len; - map->segoff = first; - map->segid = segid; - map->prot = prot; - map->flags = flags; - return (0); + error = iommu_invalidate_tlb(iommu_host_domain()); + return (error); } -int -vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) +static int +vm_iommu_unmap(struct vm *vm) { - struct mem_map *m; - int i; - - for (i = 0; i < VM_MAX_MEMMAPS; i++) { - m = &vm->mem_maps[i]; - if (m->gpa == gpa && m->len == len && - (m->flags & VM_MEMMAP_F_IOMMU) == 0) { - vm_free_memmap(vm, i); - return (0); - } - } - - return (EINVAL); -} + vm_paddr_t gpa; + struct vm_mem_map *mm; + int error, i; -int -vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, - vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) -{ - struct mem_map *mm, *mmnext; - int i; + sx_assert(&vm->mem.mem_segs_lock, SX_LOCKED); - mmnext = NULL; for (i = 0; i < VM_MAX_MEMMAPS; i++) { - mm = &vm->mem_maps[i]; - if (mm->len == 0 || mm->gpa < *gpa) + if (!vm_memseg_sysmem(vm, i)) continue; - if (mmnext == NULL || mm->gpa < mmnext->gpa) - mmnext = mm; - } - - if (mmnext != NULL) { - *gpa = mmnext->gpa; - if (segid) - *segid = mmnext->segid; - if (segoff) - *segoff = mmnext->segoff; - if (len) - *len = mmnext->len; - if (prot) - *prot = mmnext->prot; - if (flags) - *flags = mmnext->flags; - return (0); - } else { - return (ENOENT); - } -} - -static void -vm_free_memmap(struct vm *vm, int ident) -{ - struct mem_map *mm; - int error __diagused; - - mm = &vm->mem_maps[ident]; - if (mm->len) { - error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, - mm->gpa + mm->len); - KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", - __func__, error)); - bzero(mm, sizeof(struct mem_map)); - } -} - -static __inline bool -sysmem_mapping(struct vm *vm, struct mem_map *mm) -{ - if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) - return (true); - else - return (false); -} - -vm_paddr_t -vmm_sysmem_maxaddr(struct vm *vm) -{ - struct mem_map *mm; - vm_paddr_t maxaddr; - int i; - - maxaddr = 0; - for (i = 0; i < VM_MAX_MEMMAPS; i++) { - mm = &vm->mem_maps[i]; - if (sysmem_mapping(vm, mm)) { - if (maxaddr < mm->gpa + mm->len) - maxaddr = mm->gpa + mm->len; - } - } - return (maxaddr); -} - -static void -vm_iommu_modify(struct vm *vm, bool map) -{ - int i, sz; - vm_paddr_t gpa, hpa; - struct mem_map *mm; - void *vp, *cookie, *host_domain; - - sz = PAGE_SIZE; - host_domain = iommu_host_domain(); - - for (i = 0; i < VM_MAX_MEMMAPS; i++) { - mm = &vm->mem_maps[i]; - if (!sysmem_mapping(vm, mm)) + mm = &vm->mem.mem_maps[i]; + if ((mm->flags & VM_MEMMAP_F_IOMMU) == 0) continue; - - if (map) { - KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0, - ("iommu map found invalid memmap %#lx/%#lx/%#x", - mm->gpa, mm->len, mm->flags)); - if ((mm->flags & VM_MEMMAP_F_WIRED) == 0) - continue; - mm->flags |= VM_MEMMAP_F_IOMMU; - } else { - if ((mm->flags & VM_MEMMAP_F_IOMMU) == 0) - continue; - mm->flags &= ~VM_MEMMAP_F_IOMMU; - KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0, - ("iommu unmap found invalid memmap %#lx/%#lx/%#x", - mm->gpa, mm->len, mm->flags)); - } - - gpa = mm->gpa; - while (gpa < mm->gpa + mm->len) { - vp = vm_gpa_hold(vm, -1, gpa, PAGE_SIZE, VM_PROT_WRITE, - &cookie); - KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", - vm_name(vm), gpa)); - - vm_gpa_release(cookie); - - hpa = DMAP_TO_PHYS((uintptr_t)vp); - if (map) { - iommu_create_mapping(vm->iommu, gpa, hpa, sz); - } else { - iommu_remove_mapping(vm->iommu, gpa, sz); - } - - gpa += PAGE_SIZE; + mm->flags &= ~VM_MEMMAP_F_IOMMU; + KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0, + ("iommu unmap found invalid memmap %#lx/%#lx/%#x", + mm->gpa, mm->len, mm->flags)); + + for (gpa = mm->gpa; gpa < mm->gpa + mm->len; gpa += PAGE_SIZE) { + KASSERT(vm_page_wired(PHYS_TO_VM_PAGE(pmap_extract( + vmspace_pmap(vm_vmspace(vm)), gpa))), + ("vm_iommu_unmap: vm %p gpa %jx not wired", + vm, (uintmax_t)gpa)); + iommu_remove_mapping(vm->iommu, gpa, PAGE_SIZE); } } @@ -960,15 +824,10 @@ vm_iommu_modify(struct vm *vm, bool map) * Invalidate the cached translations associated with the domain * from which pages were removed. */ - if (map) - iommu_invalidate_tlb(host_domain); - else - iommu_invalidate_tlb(vm->iommu); + error = iommu_invalidate_tlb(vm->iommu); + return (error); } -#define vm_iommu_unmap(vm) vm_iommu_modify((vm), false) -#define vm_iommu_map(vm) vm_iommu_modify((vm), true) - int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) { @@ -979,9 +838,9 @@ vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) return (error); if (ppt_assigned_devices(vm) == 0) - vm_iommu_unmap(vm); + error = vm_iommu_unmap(vm); - return (0); + return (error); } int @@ -989,6 +848,7 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) { int error; vm_paddr_t maxaddr; + bool map = false; /* Set up the IOMMU to do the 'gpa' to 'hpa' translation */ if (ppt_assigned_devices(vm) == 0) { @@ -998,100 +858,40 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) vm->iommu = iommu_create_domain(maxaddr); if (vm->iommu == NULL) return (ENXIO); - vm_iommu_map(vm); + map = true; } error = ppt_assign_device(vm, bus, slot, func); + if (error == 0 && map) + error = vm_iommu_map(vm); return (error); } -void * -vm_gpa_hold(struct vm *vm, int vcpuid, vm_paddr_t gpa, size_t len, int reqprot, - void **cookie) -{ - int i, count, pageoff; - struct mem_map *mm; - vm_page_t m; -#ifdef INVARIANTS - /* - * All vcpus are frozen by ioctls that modify the memory map - * (e.g. VM_MMAP_MEMSEG). Therefore 'vm->memmap[]' stability is - * guaranteed if at least one vcpu is in the VCPU_FROZEN state. - */ - int state; - KASSERT(vcpuid >= -1 && vcpuid < vm->maxcpus, ("%s: invalid vcpuid %d", - __func__, vcpuid)); - for (i = 0; i < vm->maxcpus; i++) { - if (vcpuid != -1 && vcpuid != i) - continue; - state = vcpu_get_state(vm, i, NULL); - KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", - __func__, state)); - } -#endif - pageoff = gpa & PAGE_MASK; - if (len > PAGE_SIZE - pageoff) - panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); - - count = 0; - for (i = 0; i < VM_MAX_MEMMAPS; i++) { - mm = &vm->mem_maps[i]; - if (gpa >= mm->gpa && gpa < mm->gpa + mm->len) { - count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, - trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); - break; - } - } - - if (count == 1) { - *cookie = m; - return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); - } else { - *cookie = NULL; - return (NULL); - } -} - -void -vm_gpa_release(void *cookie) -{ - vm_page_t m = cookie; - - vm_page_unwire(m, PQ_ACTIVE); -} - int -vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) +vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) { - - if (vcpu < 0 || vcpu >= vm->maxcpus) - return (EINVAL); - + /* Negative values represent VM control structure fields. */ if (reg >= VM_REG_LAST) return (EINVAL); - return (vmmops_getreg(vm->cookie, vcpu, reg, retval)); + return (vmmops_getreg(vcpu->cookie, reg, retval)); } int -vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val) +vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) { - struct vcpu *vcpu; int error; - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - return (EINVAL); - + /* Negative values represent VM control structure fields. */ if (reg >= VM_REG_LAST) return (EINVAL); - error = vmmops_setreg(vm->cookie, vcpuid, reg, val); + error = vmmops_setreg(vcpu->cookie, reg, val); if (error || reg != VM_REG_GUEST_RIP) return (error); /* Set 'nextrip' to match the value of %rip */ - VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val); - vcpu = &vm->vcpu[vcpuid]; + VMM_CTR1(vcpu, "Setting nextrip to %#lx", val); vcpu->nextrip = val; return (0); } @@ -1129,30 +929,23 @@ is_segment_register(int reg) } int -vm_get_seg_desc(struct vm *vm, int vcpu, int reg, - struct seg_desc *desc) +vm_get_seg_desc(struct vcpu *vcpu, int reg, struct seg_desc *desc) { - if (vcpu < 0 || vcpu >= vm->maxcpus) - return (EINVAL); - if (!is_segment_register(reg) && !is_descriptor_table(reg)) return (EINVAL); - return (vmmops_getdesc(vm->cookie, vcpu, reg, desc)); + return (vmmops_getdesc(vcpu->cookie, reg, desc)); } int -vm_set_seg_desc(struct vm *vm, int vcpu, int reg, - struct seg_desc *desc) +vm_set_seg_desc(struct vcpu *vcpu, int reg, struct seg_desc *desc) { - if (vcpu < 0 || vcpu >= vm->maxcpus) - return (EINVAL); if (!is_segment_register(reg) && !is_descriptor_table(reg)) return (EINVAL); - return (vmmops_setdesc(vm->cookie, vcpu, reg, desc)); + return (vmmops_setdesc(vcpu->cookie, reg, desc)); } static void @@ -1163,7 +956,7 @@ restore_guest_fpustate(struct vcpu *vcpu) fpuexit(curthread); /* restore guest FPU state */ - fpu_stop_emulating(); + fpu_enable(); fpurestore(vcpu->guestfpu); /* restore guest XCR0 if XSAVE is enabled in the host */ @@ -1171,10 +964,10 @@ restore_guest_fpustate(struct vcpu *vcpu) load_xcr(0, vcpu->guest_xcr0); /* - * The FPU is now "dirty" with the guest's state so turn on emulation - * to trap any access to the FPU by the host. + * The FPU is now "dirty" with the guest's state so disable + * the FPU to trap any access by the host. */ - fpu_start_emulating(); + fpu_disable(); } static void @@ -1191,21 +984,67 @@ save_guest_fpustate(struct vcpu *vcpu) } /* save guest FPU state */ - fpu_stop_emulating(); + fpu_enable(); fpusave(vcpu->guestfpu); - fpu_start_emulating(); + fpu_disable(); } static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); +/* + * Invoke the rendezvous function on the specified vcpu if applicable. Return + * true if the rendezvous is finished, false otherwise. + */ +static bool +vm_rendezvous(struct vcpu *vcpu) +{ + struct vm *vm = vcpu->vm; + int vcpuid; + + mtx_assert(&vcpu->vm->rendezvous_mtx, MA_OWNED); + KASSERT(vcpu->vm->rendezvous_func != NULL, + ("vm_rendezvous: no rendezvous pending")); + + /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ + CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus, + &vm->active_cpus); + + vcpuid = vcpu->vcpuid; + if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && + !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { + VMM_CTR0(vcpu, "Calling rendezvous func"); + (*vm->rendezvous_func)(vcpu, vm->rendezvous_arg); + CPU_SET(vcpuid, &vm->rendezvous_done_cpus); + } + if (CPU_CMP(&vm->rendezvous_req_cpus, + &vm->rendezvous_done_cpus) == 0) { + VMM_CTR0(vcpu, "Rendezvous completed"); + CPU_ZERO(&vm->rendezvous_req_cpus); + vm->rendezvous_func = NULL; + wakeup(&vm->rendezvous_func); + return (true); + } + return (false); +} + +static void +vcpu_wait_idle(struct vcpu *vcpu) +{ + KASSERT(vcpu->state != VCPU_IDLE, ("vcpu already idle")); + + vcpu->reqidle = 1; + vcpu_notify_event_locked(vcpu, false); + VMM_CTR1(vcpu, "vcpu state change from %s to " + "idle requested", vcpu_state2str(vcpu->state)); + msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); +} + static int -vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate, +vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) { - struct vcpu *vcpu; int error; - vcpu = &vm->vcpu[vcpuid]; vcpu_assert_locked(vcpu); /* @@ -1214,13 +1053,8 @@ vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate, * ioctl() operating on a vcpu at any point. */ if (from_idle) { - while (vcpu->state != VCPU_IDLE) { - vcpu->reqidle = 1; - vcpu_notify_event_locked(vcpu, false); - VCPU_CTR1(vm, vcpuid, "vcpu state change from %s to " - "idle requested", vcpu_state2str(vcpu->state)); - msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); - } + while (vcpu->state != VCPU_IDLE) + vcpu_wait_idle(vcpu); } else { KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " "vcpu idle state")); @@ -1257,7 +1091,7 @@ vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate, if (error) return (EBUSY); - VCPU_CTR2(vm, vcpuid, "vcpu state changed from %s to %s", + VMM_CTR2(vcpu, "vcpu state changed from %s to %s", vcpu_state2str(vcpu->state), vcpu_state2str(newstate)); vcpu->state = newstate; @@ -1272,66 +1106,133 @@ vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate, return (0); } +/* + * Try to lock all of the vCPUs in the VM while taking care to avoid deadlocks + * with vm_smp_rendezvous(). + * + * The complexity here suggests that the rendezvous mechanism needs a rethink. + */ +int +vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate) +{ + cpuset_t locked; + struct vcpu *vcpu; + int error, i; + uint16_t maxcpus; + + KASSERT(newstate != VCPU_IDLE, + ("vcpu_set_state_all: invalid target state %d", newstate)); + + error = 0; + CPU_ZERO(&locked); + maxcpus = vm->maxcpus; + + mtx_lock(&vm->rendezvous_mtx); +restart: + if (vm->rendezvous_func != NULL) { + /* + * If we have a pending rendezvous, then the initiator may be + * blocked waiting for other vCPUs to execute the callback. The + * current thread may be a vCPU thread so we must not block + * waiting for the initiator, otherwise we get a deadlock. + * Thus, execute the callback on behalf of any idle vCPUs. + */ + for (i = 0; i < maxcpus; i++) { + vcpu = vm_vcpu(vm, i); + if (vcpu == NULL) + continue; + vcpu_lock(vcpu); + if (vcpu->state == VCPU_IDLE) { + (void)vcpu_set_state_locked(vcpu, VCPU_FROZEN, + true); + CPU_SET(i, &locked); + } + if (CPU_ISSET(i, &locked)) { + /* + * We can safely execute the callback on this + * vCPU's behalf. + */ + vcpu_unlock(vcpu); + (void)vm_rendezvous(vcpu); + vcpu_lock(vcpu); + } + vcpu_unlock(vcpu); + } + } + + /* + * Now wait for remaining vCPUs to become idle. This may include the + * initiator of a rendezvous that is currently blocked on the rendezvous + * mutex. + */ + CPU_FOREACH_ISCLR(i, &locked) { + if (i >= maxcpus) + break; + vcpu = vm_vcpu(vm, i); + if (vcpu == NULL) + continue; + vcpu_lock(vcpu); + while (vcpu->state != VCPU_IDLE) { + mtx_unlock(&vm->rendezvous_mtx); + vcpu_wait_idle(vcpu); + vcpu_unlock(vcpu); + mtx_lock(&vm->rendezvous_mtx); + if (vm->rendezvous_func != NULL) + goto restart; + vcpu_lock(vcpu); + } + error = vcpu_set_state_locked(vcpu, newstate, true); + vcpu_unlock(vcpu); + if (error != 0) { + /* Roll back state changes. */ + CPU_FOREACH_ISSET(i, &locked) + (void)vcpu_set_state(vcpu, VCPU_IDLE, false); + break; + } + CPU_SET(i, &locked); + } + mtx_unlock(&vm->rendezvous_mtx); + return (error); +} + static void -vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) +vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) { int error; - if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0) + if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) panic("Error %d setting state to %d\n", error, newstate); } static void -vcpu_require_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate) +vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) { int error; - if ((error = vcpu_set_state_locked(vm, vcpuid, newstate, false)) != 0) + if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) panic("Error %d setting state to %d", error, newstate); } -#define RENDEZVOUS_CTR0(vm, vcpuid, fmt) \ - do { \ - if (vcpuid >= 0) \ - VCPU_CTR0(vm, vcpuid, fmt); \ - else \ - VM_CTR0(vm, fmt); \ - } while (0) - static int -vm_handle_rendezvous(struct vm *vm, int vcpuid) +vm_handle_rendezvous(struct vcpu *vcpu) { + struct vm *vm; struct thread *td; - int error; - - KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < vm->maxcpus), - ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid)); - error = 0; td = curthread; + vm = vcpu->vm; + mtx_lock(&vm->rendezvous_mtx); while (vm->rendezvous_func != NULL) { - /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ - CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus, &vm->active_cpus); - - if (vcpuid != -1 && - CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && - !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { - VCPU_CTR0(vm, vcpuid, "Calling rendezvous func"); - (*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg); - CPU_SET(vcpuid, &vm->rendezvous_done_cpus); - } - if (CPU_CMP(&vm->rendezvous_req_cpus, - &vm->rendezvous_done_cpus) == 0) { - VCPU_CTR0(vm, vcpuid, "Rendezvous completed"); - vm->rendezvous_func = NULL; - wakeup(&vm->rendezvous_func); + if (vm_rendezvous(vcpu)) break; - } - RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion"); + + VMM_CTR0(vcpu, "Wait for rendezvous completion"); mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, "vmrndv", hz); - if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) { + if (td_ast_pending(td, TDA_SUSPEND)) { + int error; + mtx_unlock(&vm->rendezvous_mtx); error = thread_check_susp(td, true); if (error != 0) @@ -1347,21 +1248,21 @@ vm_handle_rendezvous(struct vm *vm, int vcpuid) * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. */ static int -vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) +vm_handle_hlt(struct vcpu *vcpu, bool intr_disabled, bool *retu) { - struct vcpu *vcpu; + struct vm *vm = vcpu->vm; const char *wmesg; struct thread *td; - int error, t, vcpu_halted, vm_halted; - - KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted")); + int error, t, vcpuid, vcpu_halted, vm_halted; - vcpu = &vm->vcpu[vcpuid]; + vcpuid = vcpu->vcpuid; vcpu_halted = 0; vm_halted = 0; error = 0; td = curthread; + KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted")); + vcpu_lock(vcpu); while (1) { /* @@ -1375,20 +1276,20 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) */ if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle) break; - if (vm_nmi_pending(vm, vcpuid)) + if (vm_nmi_pending(vcpu)) break; if (!intr_disabled) { - if (vm_extint_pending(vm, vcpuid) || + if (vm_extint_pending(vcpu) || vlapic_pending_intr(vcpu->vlapic, NULL)) { break; } } /* Don't go to sleep if the vcpu thread needs to yield */ - if (vcpu_should_yield(vm, vcpuid)) + if (vcpu_should_yield(vcpu)) break; - if (vcpu_debugged(vm, vcpuid)) + if (vcpu_debugged(vcpu)) break; /* @@ -1399,7 +1300,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) */ if (intr_disabled) { wmesg = "vmhalt"; - VCPU_CTR0(vm, vcpuid, "Halted"); + VMM_CTR0(vcpu, "Halted"); if (!vcpu_halted && halt_detection_enabled) { vcpu_halted = 1; CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus); @@ -1413,19 +1314,24 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) } t = ticks; - vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING); + vcpu_require_state_locked(vcpu, VCPU_SLEEPING); /* * XXX msleep_spin() cannot be interrupted by signals so * wake up periodically to check pending signals. */ msleep_spin(vcpu, &vcpu->mtx, wmesg, hz); - vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); - vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); - if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) { + vcpu_require_state_locked(vcpu, VCPU_FROZEN); + vmm_stat_incr(vcpu, VCPU_IDLE_TICKS, ticks - t); + if (td_ast_pending(td, TDA_SUSPEND)) { vcpu_unlock(vcpu); error = thread_check_susp(td, false); - if (error != 0) + if (error != 0) { + if (vcpu_halted) { + CPU_CLR_ATOMIC(vcpuid, + &vm->halted_cpus); + } return (error); + } vcpu_lock(vcpu); } } @@ -1442,14 +1348,13 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) } static int -vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) +vm_handle_paging(struct vcpu *vcpu, bool *retu) { + struct vm *vm = vcpu->vm; int rv, ftype; struct vm_map *map; - struct vcpu *vcpu; struct vm_exit *vme; - vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", @@ -1461,20 +1366,20 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) ("vm_handle_paging: invalid fault_type %d", ftype)); if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { - rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), + rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm_vmspace(vm)), vme->u.paging.gpa, ftype); if (rv == 0) { - VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx", + VMM_CTR2(vcpu, "%s bit emulation for gpa %#lx", ftype == VM_PROT_READ ? "accessed" : "dirty", vme->u.paging.gpa); goto done; } } - map = &vm->vmspace->vm_map; + map = &vm_vmspace(vm)->vm_map; rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); - VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " + VMM_CTR3(vcpu, "vm_handle_paging rv = %d, gpa = %#lx, " "ftype = %d", rv, vme->u.paging.gpa, ftype); if (rv != KERN_SUCCESS) @@ -1484,10 +1389,9 @@ done: } static int -vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) +vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) { struct vie *vie; - struct vcpu *vcpu; struct vm_exit *vme; uint64_t gla, gpa, cs_base; struct vm_guest_paging *paging; @@ -1496,7 +1400,6 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) enum vm_cpu_mode cpu_mode; int cs_d, error, fault; - vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", @@ -1510,12 +1413,12 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) paging = &vme->u.inst_emul.paging; cpu_mode = paging->cpu_mode; - VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa); + VMM_CTR1(vcpu, "inst_emul fault accessing gpa %#lx", gpa); /* Fetch, decode and emulate the faulting instruction */ if (vie->num_valid == 0) { - error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip + - cs_base, VIE_INST_SIZE, vie, &fault); + error = vmm_fetch_instruction(vcpu, paging, vme->rip + cs_base, + VIE_INST_SIZE, vie, &fault); } else { /* * The instruction bytes have already been copied into 'vie' @@ -1525,8 +1428,8 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) if (error || fault) return (error); - if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) { - VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx", + if (vmm_decode_instruction(vcpu, gla, cpu_mode, cs_d, vie) != 0) { + VMM_CTR1(vcpu, "Error decoding instruction at %#lx", vme->rip + cs_base); *retu = true; /* dump instruction bytes in userspace */ return (0); @@ -1537,8 +1440,8 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) */ vme->inst_length = vie->num_processed; vcpu->nextrip += vie->num_processed; - VCPU_CTR1(vm, vcpuid, "nextrip updated to %#lx after instruction " - "decoding", vcpu->nextrip); + VMM_CTR1(vcpu, "nextrip updated to %#lx after instruction decoding", + vcpu->nextrip); /* return to userland unless this is an in-kernel emulated device */ if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { @@ -1555,24 +1458,23 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) return (0); } - error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging, - mread, mwrite, retu); + error = vmm_emulate_instruction(vcpu, gpa, vie, paging, mread, mwrite, + retu); return (error); } static int -vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) +vm_handle_suspend(struct vcpu *vcpu, bool *retu) { + struct vm *vm = vcpu->vm; int error, i; - struct vcpu *vcpu; struct thread *td; error = 0; - vcpu = &vm->vcpu[vcpuid]; td = curthread; - CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus); + CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); /* * Wait until all 'active_cpus' have suspended themselves. @@ -1584,24 +1486,24 @@ vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) vcpu_lock(vcpu); while (error == 0) { if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { - VCPU_CTR0(vm, vcpuid, "All vcpus suspended"); + VMM_CTR0(vcpu, "All vcpus suspended"); break; } if (vm->rendezvous_func == NULL) { - VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); - vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING); + VMM_CTR0(vcpu, "Sleeping during suspend"); + vcpu_require_state_locked(vcpu, VCPU_SLEEPING); msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); - vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); - if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) { + vcpu_require_state_locked(vcpu, VCPU_FROZEN); + if (td_ast_pending(td, TDA_SUSPEND)) { vcpu_unlock(vcpu); error = thread_check_susp(td, false); vcpu_lock(vcpu); } } else { - VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend"); + VMM_CTR0(vcpu, "Rendezvous during suspend"); vcpu_unlock(vcpu); - error = vm_handle_rendezvous(vm, vcpuid); + error = vm_handle_rendezvous(vcpu); vcpu_lock(vcpu); } } @@ -1612,7 +1514,7 @@ vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) */ for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->suspended_cpus)) { - vcpu_notify_event(vm, i, false); + vcpu_notify_event(vm_vcpu(vm, i), false); } } @@ -1621,10 +1523,8 @@ vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) } static int -vm_handle_reqidle(struct vm *vm, int vcpuid, bool *retu) +vm_handle_reqidle(struct vcpu *vcpu, bool *retu) { - struct vcpu *vcpu = &vm->vcpu[vcpuid]; - vcpu_lock(vcpu); KASSERT(vcpu->reqidle, ("invalid vcpu reqidle %d", vcpu->reqidle)); vcpu->reqidle = 0; @@ -1633,6 +1533,40 @@ vm_handle_reqidle(struct vm *vm, int vcpuid, bool *retu) return (0); } +static int +vm_handle_db(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) +{ + int error, fault; + uint64_t rsp; + uint64_t rflags; + struct vm_copyinfo copyinfo[2]; + + *retu = true; + if (!vme->u.dbg.pushf_intercept || vme->u.dbg.tf_shadow_val != 0) { + return (0); + } + + vm_get_register(vcpu, VM_REG_GUEST_RSP, &rsp); + error = vm_copy_setup(vcpu, &vme->u.dbg.paging, rsp, sizeof(uint64_t), + VM_PROT_RW, copyinfo, nitems(copyinfo), &fault); + if (error != 0 || fault != 0) { + *retu = false; + return (EINVAL); + } + + /* Read pushed rflags value from top of stack. */ + vm_copyin(copyinfo, &rflags, sizeof(uint64_t)); + + /* Clear TF bit. */ + rflags &= ~(PSL_T); + + /* Write updated value back to memory. */ + vm_copyout(&rflags, copyinfo, sizeof(uint64_t)); + vm_copy_teardown(copyinfo, nitems(copyinfo)); + + return (0); +} + int vm_suspend(struct vm *vm, enum vm_suspend_how how) { @@ -1654,21 +1588,22 @@ vm_suspend(struct vm *vm, enum vm_suspend_how how) */ for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->active_cpus)) - vcpu_notify_event(vm, i, false); + vcpu_notify_event(vm_vcpu(vm, i), false); } return (0); } void -vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip) +vm_exit_suspended(struct vcpu *vcpu, uint64_t rip) { + struct vm *vm = vcpu->vm; struct vm_exit *vmexit; KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); - vmexit = vm_exitinfo(vm, vcpuid); + vmexit = vm_exitinfo(vcpu); vmexit->rip = rip; vmexit->inst_length = 0; vmexit->exitcode = VM_EXITCODE_SUSPENDED; @@ -1676,70 +1611,65 @@ vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip) } void -vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip) +vm_exit_debug(struct vcpu *vcpu, uint64_t rip) { struct vm_exit *vmexit; - vmexit = vm_exitinfo(vm, vcpuid); + vmexit = vm_exitinfo(vcpu); vmexit->rip = rip; vmexit->inst_length = 0; vmexit->exitcode = VM_EXITCODE_DEBUG; } void -vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip) +vm_exit_rendezvous(struct vcpu *vcpu, uint64_t rip) { struct vm_exit *vmexit; - KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress")); - - vmexit = vm_exitinfo(vm, vcpuid); + vmexit = vm_exitinfo(vcpu); vmexit->rip = rip; vmexit->inst_length = 0; vmexit->exitcode = VM_EXITCODE_RENDEZVOUS; - vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1); + vmm_stat_incr(vcpu, VMEXIT_RENDEZVOUS, 1); } void -vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip) +vm_exit_reqidle(struct vcpu *vcpu, uint64_t rip) { struct vm_exit *vmexit; - vmexit = vm_exitinfo(vm, vcpuid); + vmexit = vm_exitinfo(vcpu); vmexit->rip = rip; vmexit->inst_length = 0; vmexit->exitcode = VM_EXITCODE_REQIDLE; - vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1); + vmm_stat_incr(vcpu, VMEXIT_REQIDLE, 1); } void -vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip) +vm_exit_astpending(struct vcpu *vcpu, uint64_t rip) { struct vm_exit *vmexit; - vmexit = vm_exitinfo(vm, vcpuid); + vmexit = vm_exitinfo(vcpu); vmexit->rip = rip; vmexit->inst_length = 0; vmexit->exitcode = VM_EXITCODE_BOGUS; - vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1); + vmm_stat_incr(vcpu, VMEXIT_ASTPENDING, 1); } int -vm_run(struct vm *vm, struct vm_run *vmrun) +vm_run(struct vcpu *vcpu) { + struct vm *vm = vcpu->vm; struct vm_eventinfo evinfo; int error, vcpuid; - struct vcpu *vcpu; struct pcb *pcb; uint64_t tscval; struct vm_exit *vme; bool retu, intr_disabled; pmap_t pmap; - vcpuid = vmrun->cpuid; - - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - return (EINVAL); + vcpuid = vcpu->vcpuid; if (!CPU_ISSET(vcpuid, &vm->active_cpus)) return (EINVAL); @@ -1747,10 +1677,9 @@ vm_run(struct vm *vm, struct vm_run *vmrun) if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) return (EINVAL); - pmap = vmspace_pmap(vm->vmspace); - vcpu = &vm->vcpu[vcpuid]; + pmap = vmspace_pmap(vm_vmspace(vm)); vme = &vcpu->exitinfo; - evinfo.rptr = &vm->rendezvous_func; + evinfo.rptr = &vm->rendezvous_req_cpus; evinfo.sptr = &vm->suspend; evinfo.iptr = &vcpu->reqidle; restart: @@ -1766,13 +1695,13 @@ restart: restore_guest_fpustate(vcpu); - vcpu_require_state(vm, vcpuid, VCPU_RUNNING); - error = vmmops_run(vm->cookie, vcpuid, vcpu->nextrip, pmap, &evinfo); - vcpu_require_state(vm, vcpuid, VCPU_FROZEN); + vcpu_require_state(vcpu, VCPU_RUNNING); + error = vmmops_run(vcpu->cookie, vcpu->nextrip, pmap, &evinfo); + vcpu_require_state(vcpu, VCPU_FROZEN); save_guest_fpustate(vcpu); - vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); + vmm_stat_incr(vcpu, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); critical_exit(); @@ -1781,36 +1710,38 @@ restart: vcpu->nextrip = vme->rip + vme->inst_length; switch (vme->exitcode) { case VM_EXITCODE_REQIDLE: - error = vm_handle_reqidle(vm, vcpuid, &retu); + error = vm_handle_reqidle(vcpu, &retu); break; case VM_EXITCODE_SUSPENDED: - error = vm_handle_suspend(vm, vcpuid, &retu); + error = vm_handle_suspend(vcpu, &retu); break; case VM_EXITCODE_IOAPIC_EOI: - vioapic_process_eoi(vm, vcpuid, - vme->u.ioapic_eoi.vector); + vioapic_process_eoi(vm, vme->u.ioapic_eoi.vector); break; case VM_EXITCODE_RENDEZVOUS: - error = vm_handle_rendezvous(vm, vcpuid); + error = vm_handle_rendezvous(vcpu); break; case VM_EXITCODE_HLT: intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0); - error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu); + error = vm_handle_hlt(vcpu, intr_disabled, &retu); break; case VM_EXITCODE_PAGING: - error = vm_handle_paging(vm, vcpuid, &retu); + error = vm_handle_paging(vcpu, &retu); break; case VM_EXITCODE_INST_EMUL: - error = vm_handle_inst_emul(vm, vcpuid, &retu); + error = vm_handle_inst_emul(vcpu, &retu); break; case VM_EXITCODE_INOUT: case VM_EXITCODE_INOUT_STR: - error = vm_handle_inout(vm, vcpuid, vme, &retu); + error = vm_handle_inout(vcpu, vme, &retu); + break; + case VM_EXITCODE_DB: + error = vm_handle_db(vcpu, vme, &retu); break; case VM_EXITCODE_MONITOR: case VM_EXITCODE_MWAIT: case VM_EXITCODE_VMINSN: - vm_inject_ud(vm, vcpuid); + vm_inject_ud(vcpu); break; default: retu = true; /* handled in userland */ @@ -1818,32 +1749,30 @@ restart: } } + /* + * VM_EXITCODE_INST_EMUL could access the apic which could transform the + * exit code into VM_EXITCODE_IPI. + */ + if (error == 0 && vme->exitcode == VM_EXITCODE_IPI) + error = vm_handle_ipi(vcpu, vme, &retu); + if (error == 0 && retu == false) goto restart; - vmm_stat_incr(vm, vcpuid, VMEXIT_USERSPACE, 1); - VCPU_CTR2(vm, vcpuid, "retu %d/%d", error, vme->exitcode); + vmm_stat_incr(vcpu, VMEXIT_USERSPACE, 1); + VMM_CTR2(vcpu, "retu %d/%d", error, vme->exitcode); - /* copy the exit information */ - bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); return (error); } int -vm_restart_instruction(void *arg, int vcpuid) +vm_restart_instruction(struct vcpu *vcpu) { - struct vm *vm; - struct vcpu *vcpu; enum vcpu_state state; uint64_t rip; int error __diagused; - vm = arg; - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - return (EINVAL); - - vcpu = &vm->vcpu[vcpuid]; - state = vcpu_get_state(vm, vcpuid, NULL); + state = vcpu_get_state(vcpu, NULL); if (state == VCPU_RUNNING) { /* * When a vcpu is "running" the next instruction is determined @@ -1852,7 +1781,7 @@ vm_restart_instruction(void *arg, int vcpuid) * instruction to be restarted. */ vcpu->exitinfo.inst_length = 0; - VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by " + VMM_CTR1(vcpu, "restarting instruction at %#lx by " "setting inst_length to zero", vcpu->exitinfo.rip); } else if (state == VCPU_FROZEN) { /* @@ -1861,9 +1790,9 @@ vm_restart_instruction(void *arg, int vcpuid) * instruction. Thus instruction restart is achieved by setting * 'nextrip' to the vcpu's %rip. */ - error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip); + error = vm_get_register(vcpu, VM_REG_GUEST_RIP, &rip); KASSERT(!error, ("%s: error %d getting rip", __func__, error)); - VCPU_CTR2(vm, vcpuid, "restarting instruction by updating " + VMM_CTR2(vcpu, "restarting instruction by updating " "nextrip from %#lx to %#lx", vcpu->nextrip, rip); vcpu->nextrip = rip; } else { @@ -1873,16 +1802,10 @@ vm_restart_instruction(void *arg, int vcpuid) } int -vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info) +vm_exit_intinfo(struct vcpu *vcpu, uint64_t info) { - struct vcpu *vcpu; int type, vector; - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - return (EINVAL); - - vcpu = &vm->vcpu[vcpuid]; - if (info & VM_INTINFO_VALID) { type = info & VM_INTINFO_TYPE; vector = info & 0xff; @@ -1895,7 +1818,7 @@ vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info) } else { info = 0; } - VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info); + VMM_CTR2(vcpu, "%s: info1(%#lx)", __func__, info); vcpu->exitintinfo = info; return (0); } @@ -1955,7 +1878,7 @@ exception_class(uint64_t info) } static int -nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, +nested_fault(struct vcpu *vcpu, uint64_t info1, uint64_t info2, uint64_t *retinfo) { enum exc_class exc1, exc2; @@ -1971,9 +1894,9 @@ nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, type1 = info1 & VM_INTINFO_TYPE; vector1 = info1 & 0xff; if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { - VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)", + VMM_CTR2(vcpu, "triple fault: info1(%#lx), info2(%#lx)", info1, info2); - vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT); + vm_suspend(vcpu->vm, VM_SUSPEND_TRIPLEFAULT); *retinfo = 0; return (0); } @@ -2013,17 +1936,11 @@ vcpu_exception_intinfo(struct vcpu *vcpu) } int -vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) +vm_entry_intinfo(struct vcpu *vcpu, uint64_t *retinfo) { - struct vcpu *vcpu; uint64_t info1, info2; int valid; - KASSERT(vcpuid >= 0 && - vcpuid < vm->maxcpus, ("invalid vcpu %d", vcpuid)); - - vcpu = &vm->vcpu[vcpuid]; - info1 = vcpu->exitintinfo; vcpu->exitintinfo = 0; @@ -2031,12 +1948,12 @@ vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) if (vcpu->exception_pending) { info2 = vcpu_exception_intinfo(vcpu); vcpu->exception_pending = 0; - VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx", + VMM_CTR2(vcpu, "Exception %d delivered: %#lx", vcpu->exc_vector, info2); } if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) { - valid = nested_fault(vm, vcpuid, info1, info2, retinfo); + valid = nested_fault(vcpu, info1, info2, retinfo); } else if (info1 & VM_INTINFO_VALID) { *retinfo = info1; valid = 1; @@ -2048,7 +1965,7 @@ vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) } if (valid) { - VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), " + VMM_CTR4(vcpu, "%s: info1(%#lx), info2(%#lx), " "retinfo(%#lx)", __func__, info1, info2, *retinfo); } @@ -2056,30 +1973,20 @@ vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) } int -vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2) +vm_get_intinfo(struct vcpu *vcpu, uint64_t *info1, uint64_t *info2) { - struct vcpu *vcpu; - - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - return (EINVAL); - - vcpu = &vm->vcpu[vcpuid]; *info1 = vcpu->exitintinfo; *info2 = vcpu_exception_intinfo(vcpu); return (0); } int -vm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid, +vm_inject_exception(struct vcpu *vcpu, int vector, int errcode_valid, uint32_t errcode, int restart_instruction) { - struct vcpu *vcpu; uint64_t regval; int error __diagused; - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - return (EINVAL); - if (vector < 0 || vector >= 32) return (EINVAL); @@ -2091,10 +1998,8 @@ vm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid, if (vector == IDT_DF) return (EINVAL); - vcpu = &vm->vcpu[vcpuid]; - if (vcpu->exception_pending) { - VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to " + VMM_CTR2(vcpu, "Unable to inject exception %d due to " "pending exception %d", vector, vcpu->exc_vector); return (EBUSY); } @@ -2103,7 +2008,7 @@ vm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid, /* * Exceptions don't deliver an error code in real mode. */ - error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, ®val); + error = vm_get_register(vcpu, VM_REG_GUEST_CR0, ®val); KASSERT(!error, ("%s: error %d getting CR0", __func__, error)); if (!(regval & CR0_PE)) errcode_valid = 0; @@ -2115,174 +2020,141 @@ vm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid, * Event blocking by "STI" or "MOV SS" is cleared after guest executes * one instruction or incurs an exception. */ - error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0); + error = vm_set_register(vcpu, VM_REG_GUEST_INTR_SHADOW, 0); KASSERT(error == 0, ("%s: error %d clearing interrupt shadow", __func__, error)); if (restart_instruction) - vm_restart_instruction(vm, vcpuid); + vm_restart_instruction(vcpu); vcpu->exception_pending = 1; vcpu->exc_vector = vector; vcpu->exc_errcode = errcode; vcpu->exc_errcode_valid = errcode_valid; - VCPU_CTR1(vm, vcpuid, "Exception %d pending", vector); + VMM_CTR1(vcpu, "Exception %d pending", vector); return (0); } void -vm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid, - int errcode) +vm_inject_fault(struct vcpu *vcpu, int vector, int errcode_valid, int errcode) { - struct vm *vm; int error __diagused, restart_instruction; - vm = vmarg; restart_instruction = 1; - error = vm_inject_exception(vm, vcpuid, vector, errcode_valid, + error = vm_inject_exception(vcpu, vector, errcode_valid, errcode, restart_instruction); KASSERT(error == 0, ("vm_inject_exception error %d", error)); } void -vm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2) +vm_inject_pf(struct vcpu *vcpu, int error_code, uint64_t cr2) { - struct vm *vm; int error __diagused; - vm = vmarg; - VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx", + VMM_CTR2(vcpu, "Injecting page fault: error_code %#x, cr2 %#lx", error_code, cr2); - error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2); + error = vm_set_register(vcpu, VM_REG_GUEST_CR2, cr2); KASSERT(error == 0, ("vm_set_register(cr2) error %d", error)); - vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code); + vm_inject_fault(vcpu, IDT_PF, 1, error_code); } static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); int -vm_inject_nmi(struct vm *vm, int vcpuid) +vm_inject_nmi(struct vcpu *vcpu) { - struct vcpu *vcpu; - - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - return (EINVAL); - - vcpu = &vm->vcpu[vcpuid]; vcpu->nmi_pending = 1; - vcpu_notify_event(vm, vcpuid, false); + vcpu_notify_event(vcpu, false); return (0); } int -vm_nmi_pending(struct vm *vm, int vcpuid) +vm_nmi_pending(struct vcpu *vcpu) { - struct vcpu *vcpu; - - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); - - vcpu = &vm->vcpu[vcpuid]; - return (vcpu->nmi_pending); } void -vm_nmi_clear(struct vm *vm, int vcpuid) +vm_nmi_clear(struct vcpu *vcpu) { - struct vcpu *vcpu; - - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); - - vcpu = &vm->vcpu[vcpuid]; - if (vcpu->nmi_pending == 0) panic("vm_nmi_clear: inconsistent nmi_pending state"); vcpu->nmi_pending = 0; - vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); + vmm_stat_incr(vcpu, VCPU_NMI_COUNT, 1); } static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu"); int -vm_inject_extint(struct vm *vm, int vcpuid) +vm_inject_extint(struct vcpu *vcpu) { - struct vcpu *vcpu; - - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - return (EINVAL); - - vcpu = &vm->vcpu[vcpuid]; vcpu->extint_pending = 1; - vcpu_notify_event(vm, vcpuid, false); + vcpu_notify_event(vcpu, false); return (0); } int -vm_extint_pending(struct vm *vm, int vcpuid) +vm_extint_pending(struct vcpu *vcpu) { - struct vcpu *vcpu; - - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - panic("vm_extint_pending: invalid vcpuid %d", vcpuid); - - vcpu = &vm->vcpu[vcpuid]; - return (vcpu->extint_pending); } void -vm_extint_clear(struct vm *vm, int vcpuid) +vm_extint_clear(struct vcpu *vcpu) { - struct vcpu *vcpu; - - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - panic("vm_extint_pending: invalid vcpuid %d", vcpuid); - - vcpu = &vm->vcpu[vcpuid]; - if (vcpu->extint_pending == 0) panic("vm_extint_clear: inconsistent extint_pending state"); vcpu->extint_pending = 0; - vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1); + vmm_stat_incr(vcpu, VCPU_EXTINT_COUNT, 1); } int -vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) +vm_get_capability(struct vcpu *vcpu, int type, int *retval) { - if (vcpu < 0 || vcpu >= vm->maxcpus) - return (EINVAL); - if (type < 0 || type >= VM_CAP_MAX) return (EINVAL); - return (vmmops_getcap(vm->cookie, vcpu, type, retval)); + return (vmmops_getcap(vcpu->cookie, type, retval)); } int -vm_set_capability(struct vm *vm, int vcpu, int type, int val) +vm_set_capability(struct vcpu *vcpu, int type, int val) { - if (vcpu < 0 || vcpu >= vm->maxcpus) - return (EINVAL); - if (type < 0 || type >= VM_CAP_MAX) return (EINVAL); - return (vmmops_setcap(vm->cookie, vcpu, type, val)); + return (vmmops_setcap(vcpu->cookie, type, val)); +} + +struct vm * +vcpu_vm(struct vcpu *vcpu) +{ + return (vcpu->vm); +} + +int +vcpu_vcpuid(struct vcpu *vcpu) +{ + return (vcpu->vcpuid); +} + +struct vcpu * +vm_vcpu(struct vm *vm, int vcpuid) +{ + return (vm->vcpu[vcpuid]); } struct vlapic * -vm_lapic(struct vm *vm, int cpu) +vm_lapic(struct vcpu *vcpu) { - return (vm->vcpu[cpu].vlapic); + return (vcpu->vlapic); } struct vioapic * @@ -2330,7 +2202,7 @@ vmm_is_pptdev(int bus, int slot, int func) found = true; break; } - + if (cp2 != NULL) *cp2++ = ' '; @@ -2349,35 +2221,22 @@ vm_iommu_domain(struct vm *vm) } int -vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate, - bool from_idle) +vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) { int error; - struct vcpu *vcpu; - - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - panic("vm_set_run_state: invalid vcpuid %d", vcpuid); - - vcpu = &vm->vcpu[vcpuid]; vcpu_lock(vcpu); - error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle); + error = vcpu_set_state_locked(vcpu, newstate, from_idle); vcpu_unlock(vcpu); return (error); } enum vcpu_state -vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) +vcpu_get_state(struct vcpu *vcpu, int *hostcpu) { - struct vcpu *vcpu; enum vcpu_state state; - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - panic("vm_get_run_state: invalid vcpuid %d", vcpuid); - - vcpu = &vm->vcpu[vcpuid]; - vcpu_lock(vcpu); state = vcpu->state; if (hostcpu != NULL) @@ -2388,67 +2247,57 @@ vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) } int -vm_activate_cpu(struct vm *vm, int vcpuid) +vm_activate_cpu(struct vcpu *vcpu) { + struct vm *vm = vcpu->vm; - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - return (EINVAL); - - if (CPU_ISSET(vcpuid, &vm->active_cpus)) + if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) return (EBUSY); - VCPU_CTR0(vm, vcpuid, "activated"); - CPU_SET_ATOMIC(vcpuid, &vm->active_cpus); + VMM_CTR0(vcpu, "activated"); + CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); return (0); } int -vm_suspend_cpu(struct vm *vm, int vcpuid) +vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) { - int i; - - if (vcpuid < -1 || vcpuid >= vm->maxcpus) - return (EINVAL); - - if (vcpuid == -1) { + if (vcpu == NULL) { vm->debug_cpus = vm->active_cpus; - for (i = 0; i < vm->maxcpus; i++) { + for (int i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->active_cpus)) - vcpu_notify_event(vm, i, false); + vcpu_notify_event(vm_vcpu(vm, i), false); } } else { - if (!CPU_ISSET(vcpuid, &vm->active_cpus)) + if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) return (EINVAL); - CPU_SET_ATOMIC(vcpuid, &vm->debug_cpus); - vcpu_notify_event(vm, vcpuid, false); + CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); + vcpu_notify_event(vcpu, false); } return (0); } int -vm_resume_cpu(struct vm *vm, int vcpuid) +vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) { - if (vcpuid < -1 || vcpuid >= vm->maxcpus) - return (EINVAL); - - if (vcpuid == -1) { + if (vcpu == NULL) { CPU_ZERO(&vm->debug_cpus); } else { - if (!CPU_ISSET(vcpuid, &vm->debug_cpus)) + if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) return (EINVAL); - CPU_CLR_ATOMIC(vcpuid, &vm->debug_cpus); + CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); } return (0); } int -vcpu_debugged(struct vm *vm, int vcpuid) +vcpu_debugged(struct vcpu *vcpu) { - return (CPU_ISSET(vcpuid, &vm->debug_cpus)); + return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); } cpuset_t @@ -2472,36 +2321,54 @@ vm_suspended_cpus(struct vm *vm) return (vm->suspended_cpus); } +/* + * Returns the subset of vCPUs in tostart that are awaiting startup. + * These vCPUs are also marked as no longer awaiting startup. + */ +cpuset_t +vm_start_cpus(struct vm *vm, const cpuset_t *tostart) +{ + cpuset_t set; + + mtx_lock(&vm->rendezvous_mtx); + CPU_AND(&set, &vm->startup_cpus, tostart); + CPU_ANDNOT(&vm->startup_cpus, &vm->startup_cpus, &set); + mtx_unlock(&vm->rendezvous_mtx); + return (set); +} + +void +vm_await_start(struct vm *vm, const cpuset_t *waiting) +{ + mtx_lock(&vm->rendezvous_mtx); + CPU_OR(&vm->startup_cpus, &vm->startup_cpus, waiting); + mtx_unlock(&vm->rendezvous_mtx); +} + void * -vcpu_stats(struct vm *vm, int vcpuid) +vcpu_stats(struct vcpu *vcpu) { - return (vm->vcpu[vcpuid].stats); + return (vcpu->stats); } int -vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) +vm_get_x2apic_state(struct vcpu *vcpu, enum x2apic_state *state) { - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - return (EINVAL); - - *state = vm->vcpu[vcpuid].x2apic_state; + *state = vcpu->x2apic_state; return (0); } int -vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) +vm_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state) { - if (vcpuid < 0 || vcpuid >= vm->maxcpus) - return (EINVAL); - if (state >= X2APIC_STATE_LAST) return (EINVAL); - vm->vcpu[vcpuid].x2apic_state = state; + vcpu->x2apic_state = state; - vlapic_set_x2apic_state(vm, vcpuid, state); + vlapic_set_x2apic_state(vcpu, state); return (0); } @@ -2545,20 +2412,17 @@ vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr) } void -vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) +vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr) { - struct vcpu *vcpu = &vm->vcpu[vcpuid]; - vcpu_lock(vcpu); vcpu_notify_event_locked(vcpu, lapic_intr); vcpu_unlock(vcpu); } -struct vmspace * -vm_get_vmspace(struct vm *vm) +struct vm_mem * +vm_mem(struct vm *vm) { - - return (vm->vmspace); + return (&vm->mem); } int @@ -2571,29 +2435,28 @@ vm_apicid2vcpuid(struct vm *vm, int apicid) } int -vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, +vm_smp_rendezvous(struct vcpu *vcpu, cpuset_t dest, vm_rendezvous_func_t func, void *arg) { + struct vm *vm = vcpu->vm; int error, i; /* * Enforce that this function is called without any locks */ WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); - KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < vm->maxcpus), - ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid)); restart: mtx_lock(&vm->rendezvous_mtx); if (vm->rendezvous_func != NULL) { /* * If a rendezvous is already in progress then we need to - * call the rendezvous handler in case this 'vcpuid' is one + * call the rendezvous handler in case this 'vcpu' is one * of the targets of the rendezvous. */ - RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress"); + VMM_CTR0(vcpu, "Rendezvous already in progress"); mtx_unlock(&vm->rendezvous_mtx); - error = vm_handle_rendezvous(vm, vcpuid); + error = vm_handle_rendezvous(vcpu); if (error != 0) return (error); goto restart; @@ -2601,7 +2464,7 @@ restart: KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous " "rendezvous is still in progress")); - RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous"); + VMM_CTR0(vcpu, "Initiating rendezvous"); vm->rendezvous_req_cpus = dest; CPU_ZERO(&vm->rendezvous_done_cpus); vm->rendezvous_arg = arg; @@ -2614,10 +2477,10 @@ restart: */ for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &dest)) - vcpu_notify_event(vm, i, false); + vcpu_notify_event(vm_vcpu(vm, i), false); } - return (vm_handle_rendezvous(vm, vcpuid)); + return (vm_handle_rendezvous(vcpu)); } struct vatpic * @@ -2664,8 +2527,7 @@ vm_segment_name(int seg) } void -vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, - int num_copyinfo) +vm_copy_teardown(struct vm_copyinfo *copyinfo, int num_copyinfo) { int idx; @@ -2677,7 +2539,7 @@ vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, } int -vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, +vm_copy_setup(struct vcpu *vcpu, struct vm_guest_paging *paging, uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, int num_copyinfo, int *fault) { @@ -2691,8 +2553,9 @@ vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, nused = 0; remaining = len; while (remaining > 0) { - KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo")); - error = vm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa, fault); + if (nused >= num_copyinfo) + return (EFAULT); + error = vm_gla2gpa(vcpu, paging, gla, prot, &gpa, fault); if (error || *fault) return (error); off = gpa & PAGE_MASK; @@ -2705,7 +2568,7 @@ vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, } for (idx = 0; idx < nused; idx++) { - hva = vm_gpa_hold(vm, vcpuid, copyinfo[idx].gpa, + hva = vm_gpa_hold(vcpu, copyinfo[idx].gpa, copyinfo[idx].len, prot, &cookie); if (hva == NULL) break; @@ -2714,7 +2577,7 @@ vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, } if (idx != nused) { - vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo); + vm_copy_teardown(copyinfo, num_copyinfo); return (EFAULT); } else { *fault = 0; @@ -2723,8 +2586,7 @@ vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, } void -vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr, - size_t len) +vm_copyin(struct vm_copyinfo *copyinfo, void *kaddr, size_t len) { char *dst; int idx; @@ -2740,8 +2602,7 @@ vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr, } void -vm_copyout(struct vm *vm, int vcpuid, const void *kaddr, - struct vm_copyinfo *copyinfo, size_t len) +vm_copyout(const void *kaddr, struct vm_copyinfo *copyinfo, size_t len) { const char *src; int idx; @@ -2764,23 +2625,23 @@ VMM_STAT_DECLARE(VMM_MEM_RESIDENT); VMM_STAT_DECLARE(VMM_MEM_WIRED); static void -vm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) +vm_get_rescnt(struct vcpu *vcpu, struct vmm_stat_type *stat) { - if (vcpu == 0) { - vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT, - PAGE_SIZE * vmspace_resident_count(vm->vmspace)); - } + if (vcpu->vcpuid == 0) { + vmm_stat_set(vcpu, VMM_MEM_RESIDENT, PAGE_SIZE * + vmspace_resident_count(vm_vmspace(vcpu->vm))); + } } static void -vm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) +vm_get_wiredcnt(struct vcpu *vcpu, struct vmm_stat_type *stat) { - if (vcpu == 0) { - vmm_stat_set(vm, vcpu, VMM_MEM_WIRED, - PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace))); - } + if (vcpu->vcpuid == 0) { + vmm_stat_set(vcpu, VMM_MEM_WIRED, PAGE_SIZE * + pmap_wired_count(vmspace_pmap(vm_vmspace(vcpu->vm)))); + } } VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt); @@ -2790,12 +2651,17 @@ VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt); static int vm_snapshot_vcpus(struct vm *vm, struct vm_snapshot_meta *meta) { + uint64_t tsc, now; int ret; - int i; struct vcpu *vcpu; + uint16_t i, maxcpus; - for (i = 0; i < VM_MAXCPU; i++) { - vcpu = &vm->vcpu[i]; + now = rdtsc(); + maxcpus = vm_get_maxcpus(vm); + for (i = 0; i < maxcpus; i++) { + vcpu = vm->vcpu[i]; + if (vcpu == NULL) + continue; SNAPSHOT_VAR_OR_LEAVE(vcpu->x2apic_state, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vcpu->exitintinfo, meta, ret, done); @@ -2805,13 +2671,17 @@ vm_snapshot_vcpus(struct vm *vm, struct vm_snapshot_meta *meta) SNAPSHOT_VAR_OR_LEAVE(vcpu->guest_xcr0, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vcpu->exitinfo, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, ret, done); - /* XXX we're cheating here, since the value of tsc_offset as - * saved here is actually the value of the guest's TSC value. + + /* + * Save the absolute TSC value by adding now to tsc_offset. * * It will be turned turned back into an actual offset when the * TSC restore function is called */ - SNAPSHOT_VAR_OR_LEAVE(vcpu->tsc_offset, meta, ret, done); + tsc = now + vcpu->tsc_offset; + SNAPSHOT_VAR_OR_LEAVE(tsc, meta, ret, done); + if (meta->op == VM_SNAPSHOT_RESTORE) + vcpu->tsc_offset = tsc; } done: @@ -2822,47 +2692,32 @@ static int vm_snapshot_vm(struct vm *vm, struct vm_snapshot_meta *meta) { int ret; - int i; - uint64_t now; - - ret = 0; - now = rdtsc(); - - if (meta->op == VM_SNAPSHOT_SAVE) { - /* XXX make tsc_offset take the value TSC proper as seen by the - * guest - */ - for (i = 0; i < VM_MAXCPU; i++) - vm->vcpu[i].tsc_offset += now; - } ret = vm_snapshot_vcpus(vm, meta); - if (ret != 0) { - printf("%s: failed to copy vm data to user buffer", __func__); + if (ret != 0) goto done; - } - - if (meta->op == VM_SNAPSHOT_SAVE) { - /* XXX turn tsc_offset back into an offset; actual value is only - * required for restore; using it otherwise would be wrong - */ - for (i = 0; i < VM_MAXCPU; i++) - vm->vcpu[i].tsc_offset -= now; - } + SNAPSHOT_VAR_OR_LEAVE(vm->startup_cpus, meta, ret, done); done: return (ret); } static int -vm_snapshot_vmcx(struct vm *vm, struct vm_snapshot_meta *meta) +vm_snapshot_vcpu(struct vm *vm, struct vm_snapshot_meta *meta) { - int i, error; + int error; + struct vcpu *vcpu; + uint16_t i, maxcpus; error = 0; - for (i = 0; i < VM_MAXCPU; i++) { - error = vmmops_vmcx_snapshot(vm->cookie, meta, i); + maxcpus = vm_get_maxcpus(vm); + for (i = 0; i < maxcpus; i++) { + vcpu = vm->vcpu[i]; + if (vcpu == NULL) + continue; + + error = vmmops_vcpu_snapshot(vcpu->cookie, meta); if (error != 0) { printf("%s: failed to snapshot vmcs/vmcb data for " "vCPU: %d; error: %d\n", __func__, i, error); @@ -2883,11 +2738,8 @@ vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta) int ret = 0; switch (meta->dev_req) { - case STRUCT_VMX: - ret = vmmops_snapshot(vm->cookie, meta); - break; case STRUCT_VMCX: - ret = vm_snapshot_vmcx(vm, meta); + ret = vm_snapshot_vcpu(vm, meta); break; case STRUCT_VM: ret = vm_snapshot_vm(vm, meta); @@ -2921,26 +2773,19 @@ vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta) return (ret); } -int -vm_set_tsc_offset(struct vm *vm, int vcpuid, uint64_t offset) +void +vm_set_tsc_offset(struct vcpu *vcpu, uint64_t offset) { - struct vcpu *vcpu; - - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) - return (EINVAL); - - vcpu = &vm->vcpu[vcpuid]; vcpu->tsc_offset = offset; - - return (0); } int vm_restore_time(struct vm *vm) { - int error, i; + int error; uint64_t now; struct vcpu *vcpu; + uint16_t i, maxcpus; now = rdtsc(); @@ -2948,11 +2793,14 @@ vm_restore_time(struct vm *vm) if (error) return (error); - for (i = 0; i < nitems(vm->vcpu); i++) { - vcpu = &vm->vcpu[i]; + maxcpus = vm_get_maxcpus(vm); + for (i = 0; i < maxcpus; i++) { + vcpu = vm->vcpu[i]; + if (vcpu == NULL) + continue; - error = vmmops_restore_tsc(vm->cookie, i, vcpu->tsc_offset - - now); + error = vmmops_restore_tsc(vcpu->cookie, + vcpu->tsc_offset - now); if (error) return (error); } diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c deleted file mode 100644 index db8563c91830..000000000000 --- a/sys/amd64/vmm/vmm_dev.c +++ /dev/null @@ -1,1286 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD - * - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include "opt_bhyve_snapshot.h" - -#include <sys/param.h> -#include <sys/kernel.h> -#include <sys/jail.h> -#include <sys/queue.h> -#include <sys/lock.h> -#include <sys/mutex.h> -#include <sys/malloc.h> -#include <sys/conf.h> -#include <sys/sysctl.h> -#include <sys/libkern.h> -#include <sys/ioccom.h> -#include <sys/mman.h> -#include <sys/uio.h> -#include <sys/proc.h> - -#include <vm/vm.h> -#include <vm/pmap.h> -#include <vm/vm_map.h> -#include <vm/vm_object.h> - -#include <machine/vmparam.h> -#include <machine/vmm.h> -#include <machine/vmm_dev.h> -#include <machine/vmm_instruction_emul.h> -#include <machine/vmm_snapshot.h> -#include <x86/apicreg.h> - -#include "vmm_lapic.h" -#include "vmm_stat.h" -#include "vmm_mem.h" -#include "io/ppt.h" -#include "io/vatpic.h" -#include "io/vioapic.h" -#include "io/vhpet.h" -#include "io/vrtc.h" - -#ifdef COMPAT_FREEBSD13 -struct vm_stats_old { - int cpuid; /* in */ - int num_entries; /* out */ - struct timeval tv; - uint64_t statbuf[MAX_VM_STATS]; -}; - -#define VM_STATS_OLD \ - _IOWR('v', IOCNUM_VM_STATS, struct vm_stats_old) -#endif - -struct devmem_softc { - int segid; - char *name; - struct cdev *cdev; - struct vmmdev_softc *sc; - SLIST_ENTRY(devmem_softc) link; -}; - -struct vmmdev_softc { - struct vm *vm; /* vm instance cookie */ - struct cdev *cdev; - struct ucred *ucred; - SLIST_ENTRY(vmmdev_softc) link; - SLIST_HEAD(, devmem_softc) devmem; - int flags; -}; -#define VSC_LINKED 0x01 - -static SLIST_HEAD(, vmmdev_softc) head; - -static unsigned pr_allow_flag; -static struct mtx vmmdev_mtx; - -static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); - -SYSCTL_DECL(_hw_vmm); - -static int vmm_priv_check(struct ucred *ucred); -static int devmem_create_cdev(const char *vmname, int id, char *devmem); -static void devmem_destroy(void *arg); - -static int -vmm_priv_check(struct ucred *ucred) -{ - - if (jailed(ucred) && - !(ucred->cr_prison->pr_allow & pr_allow_flag)) - return (EPERM); - - return (0); -} - -static int -vcpu_lock_one(struct vmmdev_softc *sc, int vcpu) -{ - int error; - - if (vcpu < 0 || vcpu >= vm_get_maxcpus(sc->vm)) - return (EINVAL); - - error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true); - return (error); -} - -static void -vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu) -{ - enum vcpu_state state; - - state = vcpu_get_state(sc->vm, vcpu, NULL); - if (state != VCPU_FROZEN) { - panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm), - vcpu, state); - } - - vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); -} - -static int -vcpu_lock_all(struct vmmdev_softc *sc) -{ - int error, vcpu; - uint16_t maxcpus; - - maxcpus = vm_get_maxcpus(sc->vm); - for (vcpu = 0; vcpu < maxcpus; vcpu++) { - error = vcpu_lock_one(sc, vcpu); - if (error) - break; - } - - if (error) { - while (--vcpu >= 0) - vcpu_unlock_one(sc, vcpu); - } - - return (error); -} - -static void -vcpu_unlock_all(struct vmmdev_softc *sc) -{ - int vcpu; - uint16_t maxcpus; - - maxcpus = vm_get_maxcpus(sc->vm); - for (vcpu = 0; vcpu < maxcpus; vcpu++) - vcpu_unlock_one(sc, vcpu); -} - -static struct vmmdev_softc * -vmmdev_lookup(const char *name) -{ - struct vmmdev_softc *sc; - -#ifdef notyet /* XXX kernel is not compiled with invariants */ - mtx_assert(&vmmdev_mtx, MA_OWNED); -#endif - - SLIST_FOREACH(sc, &head, link) { - if (strcmp(name, vm_name(sc->vm)) == 0) - break; - } - - if (sc == NULL) - return (NULL); - - if (cr_cansee(curthread->td_ucred, sc->ucred)) - return (NULL); - - return (sc); -} - -static struct vmmdev_softc * -vmmdev_lookup2(struct cdev *cdev) -{ - - return (cdev->si_drv1); -} - -static int -vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) -{ - int error, off, c, prot; - vm_paddr_t gpa, maxaddr; - void *hpa, *cookie; - struct vmmdev_softc *sc; - uint16_t lastcpu; - - error = vmm_priv_check(curthread->td_ucred); - if (error) - return (error); - - sc = vmmdev_lookup2(cdev); - if (sc == NULL) - return (ENXIO); - - /* - * Get a read lock on the guest memory map by freezing any vcpu. - */ - lastcpu = vm_get_maxcpus(sc->vm) - 1; - error = vcpu_lock_one(sc, lastcpu); - if (error) - return (error); - - prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); - maxaddr = vmm_sysmem_maxaddr(sc->vm); - while (uio->uio_resid > 0 && error == 0) { - gpa = uio->uio_offset; - off = gpa & PAGE_MASK; - c = min(uio->uio_resid, PAGE_SIZE - off); - - /* - * The VM has a hole in its physical memory map. If we want to - * use 'dd' to inspect memory beyond the hole we need to - * provide bogus data for memory that lies in the hole. - * - * Since this device does not support lseek(2), dd(1) will - * read(2) blocks of data to simulate the lseek(2). - */ - hpa = vm_gpa_hold(sc->vm, lastcpu, gpa, c, - prot, &cookie); - if (hpa == NULL) { - if (uio->uio_rw == UIO_READ && gpa < maxaddr) - error = uiomove(__DECONST(void *, zero_region), - c, uio); - else - error = EFAULT; - } else { - error = uiomove(hpa, c, uio); - vm_gpa_release(cookie); - } - } - vcpu_unlock_one(sc, lastcpu); - return (error); -} - -CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); - -static int -get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) -{ - struct devmem_softc *dsc; - int error; - bool sysmem; - - error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); - if (error || mseg->len == 0) - return (error); - - if (!sysmem) { - SLIST_FOREACH(dsc, &sc->devmem, link) { - if (dsc->segid == mseg->segid) - break; - } - KASSERT(dsc != NULL, ("%s: devmem segment %d not found", - __func__, mseg->segid)); - error = copystr(dsc->name, mseg->name, len, NULL); - } else { - bzero(mseg->name, len); - } - - return (error); -} - -static int -alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) -{ - char *name; - int error; - bool sysmem; - - error = 0; - name = NULL; - sysmem = true; - - /* - * The allocation is lengthened by 1 to hold a terminating NUL. It'll - * by stripped off when devfs processes the full string. - */ - if (VM_MEMSEG_NAME(mseg)) { - sysmem = false; - name = malloc(len, M_VMMDEV, M_WAITOK); - error = copystr(mseg->name, name, len, NULL); - if (error) - goto done; - } - - error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); - if (error) - goto done; - - if (VM_MEMSEG_NAME(mseg)) { - error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); - if (error) - vm_free_memseg(sc->vm, mseg->segid); - else - name = NULL; /* freed when 'cdev' is destroyed */ - } -done: - free(name, M_VMMDEV); - return (error); -} - -static int -vm_get_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum, - uint64_t *regval) -{ - int error, i; - - error = 0; - for (i = 0; i < count; i++) { - error = vm_get_register(vm, vcpu, regnum[i], ®val[i]); - if (error) - break; - } - return (error); -} - -static int -vm_set_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum, - uint64_t *regval) -{ - int error, i; - - error = 0; - for (i = 0; i < count; i++) { - error = vm_set_register(vm, vcpu, regnum[i], regval[i]); - if (error) - break; - } - return (error); -} - -static int -vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, - struct thread *td) -{ - int error, vcpu, state_changed, size; - cpuset_t *cpuset; - struct vmmdev_softc *sc; - struct vm_register *vmreg; - struct vm_seg_desc *vmsegdesc; - struct vm_register_set *vmregset; - struct vm_run *vmrun; - struct vm_exception *vmexc; - struct vm_lapic_irq *vmirq; - struct vm_lapic_msi *vmmsi; - struct vm_ioapic_irq *ioapic_irq; - struct vm_isa_irq *isa_irq; - struct vm_isa_irq_trigger *isa_irq_trigger; - struct vm_capability *vmcap; - struct vm_pptdev *pptdev; - struct vm_pptdev_mmio *pptmmio; - struct vm_pptdev_msi *pptmsi; - struct vm_pptdev_msix *pptmsix; - struct vm_nmi *vmnmi; -#ifdef COMPAT_FREEBSD13 - struct vm_stats_old *vmstats_old; -#endif - struct vm_stats *vmstats; - struct vm_stat_desc *statdesc; - struct vm_x2apic *x2apic; - struct vm_gpa_pte *gpapte; - struct vm_suspend *vmsuspend; - struct vm_gla2gpa *gg; - struct vm_activate_cpu *vac; - struct vm_cpuset *vm_cpuset; - struct vm_intinfo *vmii; - struct vm_rtc_time *rtctime; - struct vm_rtc_data *rtcdata; - struct vm_memmap *mm; - struct vm_munmap *mu; - struct vm_cpu_topology *topology; - struct vm_readwrite_kernemu_device *kernemu; - uint64_t *regvals; - int *regnums; -#ifdef BHYVE_SNAPSHOT - struct vm_snapshot_meta *snapshot_meta; -#endif - - error = vmm_priv_check(curthread->td_ucred); - if (error) - return (error); - - sc = vmmdev_lookup2(cdev); - if (sc == NULL) - return (ENXIO); - - vcpu = -1; - state_changed = 0; - - /* - * Some VMM ioctls can operate only on vcpus that are not running. - */ - switch (cmd) { - case VM_RUN: - case VM_GET_REGISTER: - case VM_SET_REGISTER: - case VM_GET_SEGMENT_DESCRIPTOR: - case VM_SET_SEGMENT_DESCRIPTOR: - case VM_GET_REGISTER_SET: - case VM_SET_REGISTER_SET: - case VM_INJECT_EXCEPTION: - case VM_GET_CAPABILITY: - case VM_SET_CAPABILITY: - case VM_PPTDEV_MSI: - case VM_PPTDEV_MSIX: - case VM_SET_X2APIC_STATE: - case VM_GLA2GPA: - case VM_GLA2GPA_NOFAULT: - case VM_ACTIVATE_CPU: - case VM_SET_INTINFO: - case VM_GET_INTINFO: - case VM_RESTART_INSTRUCTION: - /* - * XXX fragile, handle with care - * Assumes that the first field of the ioctl data is the vcpu. - */ - vcpu = *(int *)data; - error = vcpu_lock_one(sc, vcpu); - if (error) - goto done; - state_changed = 1; - break; - - case VM_MAP_PPTDEV_MMIO: - case VM_UNMAP_PPTDEV_MMIO: - case VM_BIND_PPTDEV: - case VM_UNBIND_PPTDEV: -#ifdef COMPAT_FREEBSD12 - case VM_ALLOC_MEMSEG_FBSD12: -#endif - case VM_ALLOC_MEMSEG: - case VM_MMAP_MEMSEG: - case VM_MUNMAP_MEMSEG: - case VM_REINIT: - /* - * ioctls that operate on the entire virtual machine must - * prevent all vcpus from running. - */ - error = vcpu_lock_all(sc); - if (error) - goto done; - state_changed = 2; - break; - -#ifdef COMPAT_FREEBSD12 - case VM_GET_MEMSEG_FBSD12: -#endif - case VM_GET_MEMSEG: - case VM_MMAP_GETNEXT: - /* - * Lock a vcpu to make sure that the memory map cannot be - * modified while it is being inspected. - */ - vcpu = vm_get_maxcpus(sc->vm) - 1; - error = vcpu_lock_one(sc, vcpu); - if (error) - goto done; - state_changed = 1; - break; - - default: - break; - } - - switch(cmd) { - case VM_RUN: - vmrun = (struct vm_run *)data; - error = vm_run(sc->vm, vmrun); - break; - case VM_SUSPEND: - vmsuspend = (struct vm_suspend *)data; - error = vm_suspend(sc->vm, vmsuspend->how); - break; - case VM_REINIT: - error = vm_reinit(sc->vm); - break; - case VM_STAT_DESC: { - statdesc = (struct vm_stat_desc *)data; - error = vmm_stat_desc_copy(statdesc->index, - statdesc->desc, sizeof(statdesc->desc)); - break; - } -#ifdef COMPAT_FREEBSD13 - case VM_STATS_OLD: - vmstats_old = (struct vm_stats_old *)data; - getmicrotime(&vmstats_old->tv); - error = vmm_stat_copy(sc->vm, vmstats_old->cpuid, 0, - nitems(vmstats_old->statbuf), - &vmstats_old->num_entries, - vmstats_old->statbuf); - break; -#endif - case VM_STATS: { - vmstats = (struct vm_stats *)data; - getmicrotime(&vmstats->tv); - error = vmm_stat_copy(sc->vm, vmstats->cpuid, vmstats->index, - nitems(vmstats->statbuf), - &vmstats->num_entries, vmstats->statbuf); - break; - } - case VM_PPTDEV_MSI: - pptmsi = (struct vm_pptdev_msi *)data; - error = ppt_setup_msi(sc->vm, pptmsi->vcpu, - pptmsi->bus, pptmsi->slot, pptmsi->func, - pptmsi->addr, pptmsi->msg, - pptmsi->numvec); - break; - case VM_PPTDEV_MSIX: - pptmsix = (struct vm_pptdev_msix *)data; - error = ppt_setup_msix(sc->vm, pptmsix->vcpu, - pptmsix->bus, pptmsix->slot, - pptmsix->func, pptmsix->idx, - pptmsix->addr, pptmsix->msg, - pptmsix->vector_control); - break; - case VM_PPTDEV_DISABLE_MSIX: - pptdev = (struct vm_pptdev *)data; - error = ppt_disable_msix(sc->vm, pptdev->bus, pptdev->slot, - pptdev->func); - break; - case VM_MAP_PPTDEV_MMIO: - pptmmio = (struct vm_pptdev_mmio *)data; - error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot, - pptmmio->func, pptmmio->gpa, pptmmio->len, - pptmmio->hpa); - break; - case VM_UNMAP_PPTDEV_MMIO: - pptmmio = (struct vm_pptdev_mmio *)data; - error = ppt_unmap_mmio(sc->vm, pptmmio->bus, pptmmio->slot, - pptmmio->func, pptmmio->gpa, pptmmio->len); - break; - case VM_BIND_PPTDEV: - pptdev = (struct vm_pptdev *)data; - error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot, - pptdev->func); - break; - case VM_UNBIND_PPTDEV: - pptdev = (struct vm_pptdev *)data; - error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot, - pptdev->func); - break; - case VM_INJECT_EXCEPTION: - vmexc = (struct vm_exception *)data; - error = vm_inject_exception(sc->vm, vmexc->cpuid, - vmexc->vector, vmexc->error_code_valid, vmexc->error_code, - vmexc->restart_instruction); - break; - case VM_INJECT_NMI: - vmnmi = (struct vm_nmi *)data; - error = vm_inject_nmi(sc->vm, vmnmi->cpuid); - break; - case VM_LAPIC_IRQ: - vmirq = (struct vm_lapic_irq *)data; - error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector); - break; - case VM_LAPIC_LOCAL_IRQ: - vmirq = (struct vm_lapic_irq *)data; - error = lapic_set_local_intr(sc->vm, vmirq->cpuid, - vmirq->vector); - break; - case VM_LAPIC_MSI: - vmmsi = (struct vm_lapic_msi *)data; - error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg); - break; - case VM_IOAPIC_ASSERT_IRQ: - ioapic_irq = (struct vm_ioapic_irq *)data; - error = vioapic_assert_irq(sc->vm, ioapic_irq->irq); - break; - case VM_IOAPIC_DEASSERT_IRQ: - ioapic_irq = (struct vm_ioapic_irq *)data; - error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq); - break; - case VM_IOAPIC_PULSE_IRQ: - ioapic_irq = (struct vm_ioapic_irq *)data; - error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq); - break; - case VM_IOAPIC_PINCOUNT: - *(int *)data = vioapic_pincount(sc->vm); - break; - case VM_SET_KERNEMU_DEV: - case VM_GET_KERNEMU_DEV: { - mem_region_write_t mwrite; - mem_region_read_t mread; - bool arg; - - kernemu = (void *)data; - - if (kernemu->access_width > 0) - size = (1u << kernemu->access_width); - else - size = 1; - - if (kernemu->gpa >= DEFAULT_APIC_BASE && kernemu->gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { - mread = lapic_mmio_read; - mwrite = lapic_mmio_write; - } else if (kernemu->gpa >= VIOAPIC_BASE && kernemu->gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { - mread = vioapic_mmio_read; - mwrite = vioapic_mmio_write; - } else if (kernemu->gpa >= VHPET_BASE && kernemu->gpa < VHPET_BASE + VHPET_SIZE) { - mread = vhpet_mmio_read; - mwrite = vhpet_mmio_write; - } else { - error = EINVAL; - break; - } - - if (cmd == VM_SET_KERNEMU_DEV) - error = mwrite(sc->vm, kernemu->vcpuid, kernemu->gpa, - kernemu->value, size, &arg); - else - error = mread(sc->vm, kernemu->vcpuid, kernemu->gpa, - &kernemu->value, size, &arg); - break; - } - case VM_ISA_ASSERT_IRQ: - isa_irq = (struct vm_isa_irq *)data; - error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq); - if (error == 0 && isa_irq->ioapic_irq != -1) - error = vioapic_assert_irq(sc->vm, - isa_irq->ioapic_irq); - break; - case VM_ISA_DEASSERT_IRQ: - isa_irq = (struct vm_isa_irq *)data; - error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq); - if (error == 0 && isa_irq->ioapic_irq != -1) - error = vioapic_deassert_irq(sc->vm, - isa_irq->ioapic_irq); - break; - case VM_ISA_PULSE_IRQ: - isa_irq = (struct vm_isa_irq *)data; - error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq); - if (error == 0 && isa_irq->ioapic_irq != -1) - error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq); - break; - case VM_ISA_SET_IRQ_TRIGGER: - isa_irq_trigger = (struct vm_isa_irq_trigger *)data; - error = vatpic_set_irq_trigger(sc->vm, - isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); - break; - case VM_MMAP_GETNEXT: - mm = (struct vm_memmap *)data; - error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, - &mm->segoff, &mm->len, &mm->prot, &mm->flags); - break; - case VM_MMAP_MEMSEG: - mm = (struct vm_memmap *)data; - error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, - mm->len, mm->prot, mm->flags); - break; - case VM_MUNMAP_MEMSEG: - mu = (struct vm_munmap *)data; - error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); - break; -#ifdef COMPAT_FREEBSD12 - case VM_ALLOC_MEMSEG_FBSD12: - error = alloc_memseg(sc, (struct vm_memseg *)data, - sizeof(((struct vm_memseg_fbsd12 *)0)->name)); - break; -#endif - case VM_ALLOC_MEMSEG: - error = alloc_memseg(sc, (struct vm_memseg *)data, - sizeof(((struct vm_memseg *)0)->name)); - break; -#ifdef COMPAT_FREEBSD12 - case VM_GET_MEMSEG_FBSD12: - error = get_memseg(sc, (struct vm_memseg *)data, - sizeof(((struct vm_memseg_fbsd12 *)0)->name)); - break; -#endif - case VM_GET_MEMSEG: - error = get_memseg(sc, (struct vm_memseg *)data, - sizeof(((struct vm_memseg *)0)->name)); - break; - case VM_GET_REGISTER: - vmreg = (struct vm_register *)data; - error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum, - &vmreg->regval); - break; - case VM_SET_REGISTER: - vmreg = (struct vm_register *)data; - error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum, - vmreg->regval); - break; - case VM_SET_SEGMENT_DESCRIPTOR: - vmsegdesc = (struct vm_seg_desc *)data; - error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid, - vmsegdesc->regnum, - &vmsegdesc->desc); - break; - case VM_GET_SEGMENT_DESCRIPTOR: - vmsegdesc = (struct vm_seg_desc *)data; - error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid, - vmsegdesc->regnum, - &vmsegdesc->desc); - break; - case VM_GET_REGISTER_SET: - vmregset = (struct vm_register_set *)data; - if (vmregset->count > VM_REG_LAST) { - error = EINVAL; - break; - } - regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, - M_WAITOK); - regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, - M_WAITOK); - error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * - vmregset->count); - if (error == 0) - error = vm_get_register_set(sc->vm, vmregset->cpuid, - vmregset->count, regnums, regvals); - if (error == 0) - error = copyout(regvals, vmregset->regvals, - sizeof(regvals[0]) * vmregset->count); - free(regvals, M_VMMDEV); - free(regnums, M_VMMDEV); - break; - case VM_SET_REGISTER_SET: - vmregset = (struct vm_register_set *)data; - if (vmregset->count > VM_REG_LAST) { - error = EINVAL; - break; - } - regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, - M_WAITOK); - regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, - M_WAITOK); - error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * - vmregset->count); - if (error == 0) - error = copyin(vmregset->regvals, regvals, - sizeof(regvals[0]) * vmregset->count); - if (error == 0) - error = vm_set_register_set(sc->vm, vmregset->cpuid, - vmregset->count, regnums, regvals); - free(regvals, M_VMMDEV); - free(regnums, M_VMMDEV); - break; - case VM_GET_CAPABILITY: - vmcap = (struct vm_capability *)data; - error = vm_get_capability(sc->vm, vmcap->cpuid, - vmcap->captype, - &vmcap->capval); - break; - case VM_SET_CAPABILITY: - vmcap = (struct vm_capability *)data; - error = vm_set_capability(sc->vm, vmcap->cpuid, - vmcap->captype, - vmcap->capval); - break; - case VM_SET_X2APIC_STATE: - x2apic = (struct vm_x2apic *)data; - error = vm_set_x2apic_state(sc->vm, - x2apic->cpuid, x2apic->state); - break; - case VM_GET_X2APIC_STATE: - x2apic = (struct vm_x2apic *)data; - error = vm_get_x2apic_state(sc->vm, - x2apic->cpuid, &x2apic->state); - break; - case VM_GET_GPA_PMAP: - gpapte = (struct vm_gpa_pte *)data; - pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)), - gpapte->gpa, gpapte->pte, &gpapte->ptenum); - error = 0; - break; - case VM_GET_HPET_CAPABILITIES: - error = vhpet_getcap((struct vm_hpet_cap *)data); - break; - case VM_GLA2GPA: { - CTASSERT(PROT_READ == VM_PROT_READ); - CTASSERT(PROT_WRITE == VM_PROT_WRITE); - CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); - gg = (struct vm_gla2gpa *)data; - error = vm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla, - gg->prot, &gg->gpa, &gg->fault); - KASSERT(error == 0 || error == EFAULT, - ("%s: vm_gla2gpa unknown error %d", __func__, error)); - break; - } - case VM_GLA2GPA_NOFAULT: - gg = (struct vm_gla2gpa *)data; - error = vm_gla2gpa_nofault(sc->vm, gg->vcpuid, &gg->paging, - gg->gla, gg->prot, &gg->gpa, &gg->fault); - KASSERT(error == 0 || error == EFAULT, - ("%s: vm_gla2gpa unknown error %d", __func__, error)); - break; - case VM_ACTIVATE_CPU: - vac = (struct vm_activate_cpu *)data; - error = vm_activate_cpu(sc->vm, vac->vcpuid); - break; - case VM_GET_CPUS: - error = 0; - vm_cpuset = (struct vm_cpuset *)data; - size = vm_cpuset->cpusetsize; - if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) { - error = ERANGE; - break; - } - cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO); - if (vm_cpuset->which == VM_ACTIVE_CPUS) - *cpuset = vm_active_cpus(sc->vm); - else if (vm_cpuset->which == VM_SUSPENDED_CPUS) - *cpuset = vm_suspended_cpus(sc->vm); - else if (vm_cpuset->which == VM_DEBUG_CPUS) - *cpuset = vm_debug_cpus(sc->vm); - else - error = EINVAL; - if (error == 0) - error = copyout(cpuset, vm_cpuset->cpus, size); - free(cpuset, M_TEMP); - break; - case VM_SUSPEND_CPU: - vac = (struct vm_activate_cpu *)data; - error = vm_suspend_cpu(sc->vm, vac->vcpuid); - break; - case VM_RESUME_CPU: - vac = (struct vm_activate_cpu *)data; - error = vm_resume_cpu(sc->vm, vac->vcpuid); - break; - case VM_SET_INTINFO: - vmii = (struct vm_intinfo *)data; - error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1); - break; - case VM_GET_INTINFO: - vmii = (struct vm_intinfo *)data; - error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1, - &vmii->info2); - break; - case VM_RTC_WRITE: - rtcdata = (struct vm_rtc_data *)data; - error = vrtc_nvram_write(sc->vm, rtcdata->offset, - rtcdata->value); - break; - case VM_RTC_READ: - rtcdata = (struct vm_rtc_data *)data; - error = vrtc_nvram_read(sc->vm, rtcdata->offset, - &rtcdata->value); - break; - case VM_RTC_SETTIME: - rtctime = (struct vm_rtc_time *)data; - error = vrtc_set_time(sc->vm, rtctime->secs); - break; - case VM_RTC_GETTIME: - error = 0; - rtctime = (struct vm_rtc_time *)data; - rtctime->secs = vrtc_get_time(sc->vm); - break; - case VM_RESTART_INSTRUCTION: - error = vm_restart_instruction(sc->vm, vcpu); - break; - case VM_SET_TOPOLOGY: - topology = (struct vm_cpu_topology *)data; - error = vm_set_topology(sc->vm, topology->sockets, - topology->cores, topology->threads, topology->maxcpus); - break; - case VM_GET_TOPOLOGY: - topology = (struct vm_cpu_topology *)data; - vm_get_topology(sc->vm, &topology->sockets, &topology->cores, - &topology->threads, &topology->maxcpus); - error = 0; - break; -#ifdef BHYVE_SNAPSHOT - case VM_SNAPSHOT_REQ: - snapshot_meta = (struct vm_snapshot_meta *)data; - error = vm_snapshot_req(sc->vm, snapshot_meta); - break; - case VM_RESTORE_TIME: - error = vm_restore_time(sc->vm); - break; -#endif - default: - error = ENOTTY; - break; - } - - if (state_changed == 1) - vcpu_unlock_one(sc, vcpu); - else if (state_changed == 2) - vcpu_unlock_all(sc); - -done: - /* - * Make sure that no handler returns a kernel-internal - * error value to userspace. - */ - KASSERT(error == ERESTART || error >= 0, - ("vmmdev_ioctl: invalid error return %d", error)); - return (error); -} - -static int -vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, - struct vm_object **objp, int nprot) -{ - struct vmmdev_softc *sc; - vm_paddr_t gpa; - size_t len; - vm_ooffset_t segoff, first, last; - int error, found, segid; - uint16_t lastcpu; - bool sysmem; - - error = vmm_priv_check(curthread->td_ucred); - if (error) - return (error); - - first = *offset; - last = first + mapsize; - if ((nprot & PROT_EXEC) || first < 0 || first >= last) - return (EINVAL); - - sc = vmmdev_lookup2(cdev); - if (sc == NULL) { - /* virtual machine is in the process of being created */ - return (EINVAL); - } - - /* - * Get a read lock on the guest memory map by freezing any vcpu. - */ - lastcpu = vm_get_maxcpus(sc->vm) - 1; - error = vcpu_lock_one(sc, lastcpu); - if (error) - return (error); - - gpa = 0; - found = 0; - while (!found) { - error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, - NULL, NULL); - if (error) - break; - - if (first >= gpa && last <= gpa + len) - found = 1; - else - gpa += len; - } - - if (found) { - error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); - KASSERT(error == 0 && *objp != NULL, - ("%s: invalid memory segment %d", __func__, segid)); - if (sysmem) { - vm_object_reference(*objp); - *offset = segoff + (first - gpa); - } else { - error = EINVAL; - } - } - vcpu_unlock_one(sc, lastcpu); - return (error); -} - -static void -vmmdev_destroy(void *arg) -{ - struct vmmdev_softc *sc = arg; - struct devmem_softc *dsc; - int error __diagused; - - error = vcpu_lock_all(sc); - KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); - - while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { - KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); - SLIST_REMOVE_HEAD(&sc->devmem, link); - free(dsc->name, M_VMMDEV); - free(dsc, M_VMMDEV); - } - - if (sc->cdev != NULL) - destroy_dev(sc->cdev); - - if (sc->vm != NULL) - vm_destroy(sc->vm); - - if (sc->ucred != NULL) - crfree(sc->ucred); - - if ((sc->flags & VSC_LINKED) != 0) { - mtx_lock(&vmmdev_mtx); - SLIST_REMOVE(&head, sc, vmmdev_softc, link); - mtx_unlock(&vmmdev_mtx); - } - - free(sc, M_VMMDEV); -} - -static int -sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) -{ - struct devmem_softc *dsc; - struct vmmdev_softc *sc; - struct cdev *cdev; - char *buf; - int error, buflen; - - error = vmm_priv_check(req->td->td_ucred); - if (error) - return (error); - - buflen = VM_MAX_NAMELEN + 1; - buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); - strlcpy(buf, "beavis", buflen); - error = sysctl_handle_string(oidp, buf, buflen, req); - if (error != 0 || req->newptr == NULL) - goto out; - - mtx_lock(&vmmdev_mtx); - sc = vmmdev_lookup(buf); - if (sc == NULL || sc->cdev == NULL) { - mtx_unlock(&vmmdev_mtx); - error = EINVAL; - goto out; - } - - /* - * Setting 'sc->cdev' to NULL is used to indicate that the VM - * is scheduled for destruction. - */ - cdev = sc->cdev; - sc->cdev = NULL; - mtx_unlock(&vmmdev_mtx); - - /* - * Destroy all cdevs: - * - * - any new operations on the 'cdev' will return an error (ENXIO). - * - * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' - */ - SLIST_FOREACH(dsc, &sc->devmem, link) { - KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); - destroy_dev(dsc->cdev); - devmem_destroy(dsc); - } - destroy_dev(cdev); - vmmdev_destroy(sc); - error = 0; - -out: - free(buf, M_VMMDEV); - return (error); -} -SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, - CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, - NULL, 0, sysctl_vmm_destroy, "A", - NULL); - -static struct cdevsw vmmdevsw = { - .d_name = "vmmdev", - .d_version = D_VERSION, - .d_ioctl = vmmdev_ioctl, - .d_mmap_single = vmmdev_mmap_single, - .d_read = vmmdev_rw, - .d_write = vmmdev_rw, -}; - -static int -sysctl_vmm_create(SYSCTL_HANDLER_ARGS) -{ - struct vm *vm; - struct cdev *cdev; - struct vmmdev_softc *sc, *sc2; - char *buf; - int error, buflen; - - error = vmm_priv_check(req->td->td_ucred); - if (error) - return (error); - - buflen = VM_MAX_NAMELEN + 1; - buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); - strlcpy(buf, "beavis", buflen); - error = sysctl_handle_string(oidp, buf, buflen, req); - if (error != 0 || req->newptr == NULL) - goto out; - - mtx_lock(&vmmdev_mtx); - sc = vmmdev_lookup(buf); - mtx_unlock(&vmmdev_mtx); - if (sc != NULL) { - error = EEXIST; - goto out; - } - - error = vm_create(buf, &vm); - if (error != 0) - goto out; - - sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); - sc->ucred = crhold(curthread->td_ucred); - sc->vm = vm; - SLIST_INIT(&sc->devmem); - - /* - * Lookup the name again just in case somebody sneaked in when we - * dropped the lock. - */ - mtx_lock(&vmmdev_mtx); - sc2 = vmmdev_lookup(buf); - if (sc2 == NULL) { - SLIST_INSERT_HEAD(&head, sc, link); - sc->flags |= VSC_LINKED; - } - mtx_unlock(&vmmdev_mtx); - - if (sc2 != NULL) { - vmmdev_destroy(sc); - error = EEXIST; - goto out; - } - - error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred, - UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); - if (error != 0) { - vmmdev_destroy(sc); - goto out; - } - - mtx_lock(&vmmdev_mtx); - sc->cdev = cdev; - sc->cdev->si_drv1 = sc; - mtx_unlock(&vmmdev_mtx); - -out: - free(buf, M_VMMDEV); - return (error); -} -SYSCTL_PROC(_hw_vmm, OID_AUTO, create, - CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, - NULL, 0, sysctl_vmm_create, "A", - NULL); - -void -vmmdev_init(void) -{ - mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF); - pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, - "Allow use of vmm in a jail."); -} - -int -vmmdev_cleanup(void) -{ - int error; - - if (SLIST_EMPTY(&head)) - error = 0; - else - error = EBUSY; - - return (error); -} - -static int -devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, - struct vm_object **objp, int nprot) -{ - struct devmem_softc *dsc; - vm_ooffset_t first, last; - size_t seglen; - int error; - uint16_t lastcpu; - bool sysmem; - - dsc = cdev->si_drv1; - if (dsc == NULL) { - /* 'cdev' has been created but is not ready for use */ - return (ENXIO); - } - - first = *offset; - last = *offset + len; - if ((nprot & PROT_EXEC) || first < 0 || first >= last) - return (EINVAL); - - lastcpu = vm_get_maxcpus(dsc->sc->vm) - 1; - error = vcpu_lock_one(dsc->sc, lastcpu); - if (error) - return (error); - - error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); - KASSERT(error == 0 && !sysmem && *objp != NULL, - ("%s: invalid devmem segment %d", __func__, dsc->segid)); - - vcpu_unlock_one(dsc->sc, lastcpu); - - if (seglen >= last) { - vm_object_reference(*objp); - return (0); - } else { - return (EINVAL); - } -} - -static struct cdevsw devmemsw = { - .d_name = "devmem", - .d_version = D_VERSION, - .d_mmap_single = devmem_mmap_single, -}; - -static int -devmem_create_cdev(const char *vmname, int segid, char *devname) -{ - struct devmem_softc *dsc; - struct vmmdev_softc *sc; - struct cdev *cdev; - int error; - - error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, - UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); - if (error) - return (error); - - dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); - - mtx_lock(&vmmdev_mtx); - sc = vmmdev_lookup(vmname); - KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); - if (sc->cdev == NULL) { - /* virtual machine is being created or destroyed */ - mtx_unlock(&vmmdev_mtx); - free(dsc, M_VMMDEV); - destroy_dev_sched_cb(cdev, NULL, 0); - return (ENODEV); - } - - dsc->segid = segid; - dsc->name = devname; - dsc->cdev = cdev; - dsc->sc = sc; - SLIST_INSERT_HEAD(&sc->devmem, dsc, link); - mtx_unlock(&vmmdev_mtx); - - /* The 'cdev' is ready for use after 'si_drv1' is initialized */ - cdev->si_drv1 = dsc; - return (0); -} - -static void -devmem_destroy(void *arg) -{ - struct devmem_softc *dsc = arg; - - KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); - dsc->cdev = NULL; - dsc->sc = NULL; -} diff --git a/sys/amd64/vmm/vmm_dev_machdep.c b/sys/amd64/vmm/vmm_dev_machdep.c new file mode 100644 index 000000000000..b84be809ea24 --- /dev/null +++ b/sys/amd64/vmm/vmm_dev_machdep.c @@ -0,0 +1,596 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_bhyve_snapshot.h" + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/libkern.h> +#include <sys/ioccom.h> +#include <sys/mman.h> +#include <sys/uio.h> +#include <sys/proc.h> + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> + +#include <machine/vmparam.h> +#include <machine/vmm.h> +#include <machine/vmm_instruction_emul.h> +#include <machine/vmm_snapshot.h> +#include <x86/apicreg.h> + +#include <dev/vmm/vmm_dev.h> +#include <dev/vmm/vmm_mem.h> +#include <dev/vmm/vmm_stat.h> + +#include "vmm_lapic.h" +#include "vmm_mem.h" +#include "io/ppt.h" +#include "io/vatpic.h" +#include "io/vioapic.h" +#include "io/vhpet.h" +#include "io/vrtc.h" + +#ifdef COMPAT_FREEBSD13 +struct vm_stats_13 { + int cpuid; /* in */ + int num_entries; /* out */ + struct timeval tv; + uint64_t statbuf[MAX_VM_STATS]; +}; + +#define VM_STATS_13 _IOWR('v', IOCNUM_VM_STATS, struct vm_stats_13) + +struct vm_snapshot_meta_13 { + void *ctx; /* unused */ + void *dev_data; + const char *dev_name; /* identify userspace devices */ + enum snapshot_req dev_req; /* identify kernel structs */ + + struct vm_snapshot_buffer buffer; + + enum vm_snapshot_op op; +}; + +#define VM_SNAPSHOT_REQ_13 \ + _IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_meta_13) + +struct vm_exit_ipi_13 { + uint32_t mode; + uint8_t vector; + __BITSET_DEFINE(, 256) dmask; +}; + +struct vm_exit_13 { + uint32_t exitcode; + int32_t inst_length; + uint64_t rip; + uint64_t u[120 / sizeof(uint64_t)]; +}; + +struct vm_run_13 { + int cpuid; + struct vm_exit_13 vm_exit; +}; + +#define VM_RUN_13 \ + _IOWR('v', IOCNUM_RUN, struct vm_run_13) + +#endif /* COMPAT_FREEBSD13 */ + +const struct vmmdev_ioctl vmmdev_machdep_ioctls[] = { + VMMDEV_IOCTL(VM_RUN, VMMDEV_IOCTL_LOCK_ONE_VCPU), +#ifdef COMPAT_FREEBSD13 + VMMDEV_IOCTL(VM_RUN_13, VMMDEV_IOCTL_LOCK_ONE_VCPU), +#endif + VMMDEV_IOCTL(VM_GET_SEGMENT_DESCRIPTOR, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_SET_SEGMENT_DESCRIPTOR, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_SET_X2APIC_STATE, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_GLA2GPA, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_GLA2GPA_NOFAULT, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_SET_INTINFO, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_GET_INTINFO, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_RESTART_INSTRUCTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_GET_KERNEMU_DEV, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_SET_KERNEMU_DEV, VMMDEV_IOCTL_LOCK_ONE_VCPU), + + VMMDEV_IOCTL(VM_BIND_PPTDEV, + VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS | + VMMDEV_IOCTL_PRIV_CHECK_DRIVER), + VMMDEV_IOCTL(VM_UNBIND_PPTDEV, + VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS | + VMMDEV_IOCTL_PRIV_CHECK_DRIVER), + + VMMDEV_IOCTL(VM_MAP_PPTDEV_MMIO, VMMDEV_IOCTL_LOCK_ALL_VCPUS | + VMMDEV_IOCTL_PRIV_CHECK_DRIVER), + VMMDEV_IOCTL(VM_UNMAP_PPTDEV_MMIO, VMMDEV_IOCTL_LOCK_ALL_VCPUS | + VMMDEV_IOCTL_PRIV_CHECK_DRIVER), +#ifdef BHYVE_SNAPSHOT +#ifdef COMPAT_FREEBSD13 + VMMDEV_IOCTL(VM_SNAPSHOT_REQ_13, VMMDEV_IOCTL_LOCK_ALL_VCPUS), +#endif + VMMDEV_IOCTL(VM_SNAPSHOT_REQ, VMMDEV_IOCTL_LOCK_ALL_VCPUS), + VMMDEV_IOCTL(VM_RESTORE_TIME, VMMDEV_IOCTL_LOCK_ALL_VCPUS), +#endif + +#ifdef COMPAT_FREEBSD13 + VMMDEV_IOCTL(VM_STATS_13, VMMDEV_IOCTL_LOCK_ONE_VCPU), +#endif + VMMDEV_IOCTL(VM_INJECT_NMI, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_LAPIC_IRQ, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_GET_X2APIC_STATE, VMMDEV_IOCTL_LOCK_ONE_VCPU), + + VMMDEV_IOCTL(VM_LAPIC_LOCAL_IRQ, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), + + VMMDEV_IOCTL(VM_PPTDEV_MSI, VMMDEV_IOCTL_PRIV_CHECK_DRIVER), + VMMDEV_IOCTL(VM_PPTDEV_MSIX, VMMDEV_IOCTL_PRIV_CHECK_DRIVER), + VMMDEV_IOCTL(VM_PPTDEV_DISABLE_MSIX, VMMDEV_IOCTL_PRIV_CHECK_DRIVER), + VMMDEV_IOCTL(VM_LAPIC_MSI, 0), + VMMDEV_IOCTL(VM_IOAPIC_ASSERT_IRQ, 0), + VMMDEV_IOCTL(VM_IOAPIC_DEASSERT_IRQ, 0), + VMMDEV_IOCTL(VM_IOAPIC_PULSE_IRQ, 0), + VMMDEV_IOCTL(VM_IOAPIC_PINCOUNT, 0), + VMMDEV_IOCTL(VM_ISA_ASSERT_IRQ, 0), + VMMDEV_IOCTL(VM_ISA_DEASSERT_IRQ, 0), + VMMDEV_IOCTL(VM_ISA_PULSE_IRQ, 0), + VMMDEV_IOCTL(VM_ISA_SET_IRQ_TRIGGER, 0), + VMMDEV_IOCTL(VM_GET_GPA_PMAP, 0), + VMMDEV_IOCTL(VM_GET_HPET_CAPABILITIES, 0), + VMMDEV_IOCTL(VM_RTC_READ, 0), + VMMDEV_IOCTL(VM_RTC_WRITE, 0), + VMMDEV_IOCTL(VM_RTC_GETTIME, 0), + VMMDEV_IOCTL(VM_RTC_SETTIME, 0), +}; +const size_t vmmdev_machdep_ioctl_count = nitems(vmmdev_machdep_ioctls); + +int +vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, + int fflag, struct thread *td) +{ + int error; + + error = 0; + switch (cmd) { + case VM_RUN: { + struct vm_exit *vme; + struct vm_run *vmrun; + + vmrun = (struct vm_run *)data; + vme = vm_exitinfo(vcpu); + + error = vm_run(vcpu); + if (error != 0) + break; + + error = copyout(vme, vmrun->vm_exit, sizeof(*vme)); + if (error != 0) + break; + if (vme->exitcode == VM_EXITCODE_IPI) { + error = copyout(vm_exitinfo_cpuset(vcpu), + vmrun->cpuset, + min(vmrun->cpusetsize, sizeof(cpuset_t))); + if (error != 0) + break; + if (sizeof(cpuset_t) < vmrun->cpusetsize) { + uint8_t *p; + + p = (uint8_t *)vmrun->cpuset + + sizeof(cpuset_t); + while (p < (uint8_t *)vmrun->cpuset + + vmrun->cpusetsize) { + if (subyte(p++, 0) != 0) { + error = EFAULT; + break; + } + } + } + } + break; + } +#ifdef COMPAT_FREEBSD13 + case VM_RUN_13: { + struct vm_exit *vme; + struct vm_exit_13 *vme_13; + struct vm_run_13 *vmrun_13; + + vmrun_13 = (struct vm_run_13 *)data; + vme_13 = &vmrun_13->vm_exit; + vme = vm_exitinfo(vcpu); + + error = vm_run(vcpu); + if (error == 0) { + vme_13->exitcode = vme->exitcode; + vme_13->inst_length = vme->inst_length; + vme_13->rip = vme->rip; + memcpy(vme_13->u, &vme->u, sizeof(vme_13->u)); + if (vme->exitcode == VM_EXITCODE_IPI) { + struct vm_exit_ipi_13 *ipi; + cpuset_t *dmask; + int cpu; + + dmask = vm_exitinfo_cpuset(vcpu); + ipi = (struct vm_exit_ipi_13 *)&vme_13->u[0]; + BIT_ZERO(256, &ipi->dmask); + CPU_FOREACH_ISSET(cpu, dmask) { + if (cpu >= 256) + break; + BIT_SET(256, cpu, &ipi->dmask); + } + } + } + break; + } + case VM_STATS_13: { + struct vm_stats_13 *vmstats_13; + + vmstats_13 = (struct vm_stats_13 *)data; + getmicrotime(&vmstats_13->tv); + error = vmm_stat_copy(vcpu, 0, nitems(vmstats_13->statbuf), + &vmstats_13->num_entries, vmstats_13->statbuf); + break; + } +#endif + case VM_PPTDEV_MSI: { + struct vm_pptdev_msi *pptmsi; + + pptmsi = (struct vm_pptdev_msi *)data; + error = ppt_setup_msi(vm, pptmsi->bus, pptmsi->slot, + pptmsi->func, pptmsi->addr, pptmsi->msg, pptmsi->numvec); + break; + } + case VM_PPTDEV_MSIX: { + struct vm_pptdev_msix *pptmsix; + + pptmsix = (struct vm_pptdev_msix *)data; + error = ppt_setup_msix(vm, pptmsix->bus, pptmsix->slot, + pptmsix->func, pptmsix->idx, pptmsix->addr, pptmsix->msg, + pptmsix->vector_control); + break; + } + case VM_PPTDEV_DISABLE_MSIX: { + struct vm_pptdev *pptdev; + + pptdev = (struct vm_pptdev *)data; + error = ppt_disable_msix(vm, pptdev->bus, pptdev->slot, + pptdev->func); + break; + } + case VM_MAP_PPTDEV_MMIO: { + struct vm_pptdev_mmio *pptmmio; + + pptmmio = (struct vm_pptdev_mmio *)data; + error = ppt_map_mmio(vm, pptmmio->bus, pptmmio->slot, + pptmmio->func, pptmmio->gpa, pptmmio->len, pptmmio->hpa); + break; + } + case VM_UNMAP_PPTDEV_MMIO: { + struct vm_pptdev_mmio *pptmmio; + + pptmmio = (struct vm_pptdev_mmio *)data; + error = ppt_unmap_mmio(vm, pptmmio->bus, pptmmio->slot, + pptmmio->func, pptmmio->gpa, pptmmio->len); + break; + } + case VM_BIND_PPTDEV: { + struct vm_pptdev *pptdev; + + pptdev = (struct vm_pptdev *)data; + error = vm_assign_pptdev(vm, pptdev->bus, pptdev->slot, + pptdev->func); + break; + } + case VM_UNBIND_PPTDEV: { + struct vm_pptdev *pptdev; + + pptdev = (struct vm_pptdev *)data; + error = vm_unassign_pptdev(vm, pptdev->bus, pptdev->slot, + pptdev->func); + break; + } + case VM_INJECT_EXCEPTION: { + struct vm_exception *vmexc; + + vmexc = (struct vm_exception *)data; + error = vm_inject_exception(vcpu, + vmexc->vector, vmexc->error_code_valid, vmexc->error_code, + vmexc->restart_instruction); + break; + } + case VM_INJECT_NMI: + error = vm_inject_nmi(vcpu); + break; + case VM_LAPIC_IRQ: { + struct vm_lapic_irq *vmirq; + + vmirq = (struct vm_lapic_irq *)data; + error = lapic_intr_edge(vcpu, vmirq->vector); + break; + } + case VM_LAPIC_LOCAL_IRQ: { + struct vm_lapic_irq *vmirq; + + vmirq = (struct vm_lapic_irq *)data; + error = lapic_set_local_intr(vm, vcpu, vmirq->vector); + break; + } + case VM_LAPIC_MSI: { + struct vm_lapic_msi *vmmsi; + + vmmsi = (struct vm_lapic_msi *)data; + error = lapic_intr_msi(vm, vmmsi->addr, vmmsi->msg); + break; + } + case VM_IOAPIC_ASSERT_IRQ: { + struct vm_ioapic_irq *ioapic_irq; + + ioapic_irq = (struct vm_ioapic_irq *)data; + error = vioapic_assert_irq(vm, ioapic_irq->irq); + break; + } + case VM_IOAPIC_DEASSERT_IRQ: { + struct vm_ioapic_irq *ioapic_irq; + + ioapic_irq = (struct vm_ioapic_irq *)data; + error = vioapic_deassert_irq(vm, ioapic_irq->irq); + break; + } + case VM_IOAPIC_PULSE_IRQ: { + struct vm_ioapic_irq *ioapic_irq; + + ioapic_irq = (struct vm_ioapic_irq *)data; + error = vioapic_pulse_irq(vm, ioapic_irq->irq); + break; + } + case VM_IOAPIC_PINCOUNT: + *(int *)data = vioapic_pincount(vm); + break; + case VM_SET_KERNEMU_DEV: + case VM_GET_KERNEMU_DEV: { + struct vm_readwrite_kernemu_device *kernemu; + mem_region_write_t mwrite; + mem_region_read_t mread; + int size; + bool arg; + + kernemu = (void *)data; + + if (kernemu->access_width > 0) + size = (1u << kernemu->access_width); + else + size = 1; + + if (kernemu->gpa >= DEFAULT_APIC_BASE && + kernemu->gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { + mread = lapic_mmio_read; + mwrite = lapic_mmio_write; + } else if (kernemu->gpa >= VIOAPIC_BASE && + kernemu->gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { + mread = vioapic_mmio_read; + mwrite = vioapic_mmio_write; + } else if (kernemu->gpa >= VHPET_BASE && + kernemu->gpa < VHPET_BASE + VHPET_SIZE) { + mread = vhpet_mmio_read; + mwrite = vhpet_mmio_write; + } else { + error = EINVAL; + break; + } + + if (cmd == VM_SET_KERNEMU_DEV) + error = mwrite(vcpu, kernemu->gpa, + kernemu->value, size, &arg); + else + error = mread(vcpu, kernemu->gpa, + &kernemu->value, size, &arg); + break; + } + case VM_ISA_ASSERT_IRQ: { + struct vm_isa_irq *isa_irq; + + isa_irq = (struct vm_isa_irq *)data; + error = vatpic_assert_irq(vm, isa_irq->atpic_irq); + if (error == 0 && isa_irq->ioapic_irq != -1) + error = vioapic_assert_irq(vm, isa_irq->ioapic_irq); + break; + } + case VM_ISA_DEASSERT_IRQ: { + struct vm_isa_irq *isa_irq; + + isa_irq = (struct vm_isa_irq *)data; + error = vatpic_deassert_irq(vm, isa_irq->atpic_irq); + if (error == 0 && isa_irq->ioapic_irq != -1) + error = vioapic_deassert_irq(vm, isa_irq->ioapic_irq); + break; + } + case VM_ISA_PULSE_IRQ: { + struct vm_isa_irq *isa_irq; + + isa_irq = (struct vm_isa_irq *)data; + error = vatpic_pulse_irq(vm, isa_irq->atpic_irq); + if (error == 0 && isa_irq->ioapic_irq != -1) + error = vioapic_pulse_irq(vm, isa_irq->ioapic_irq); + break; + } + case VM_ISA_SET_IRQ_TRIGGER: { + struct vm_isa_irq_trigger *isa_irq_trigger; + + isa_irq_trigger = (struct vm_isa_irq_trigger *)data; + error = vatpic_set_irq_trigger(vm, + isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); + break; + } + case VM_SET_SEGMENT_DESCRIPTOR: { + struct vm_seg_desc *vmsegdesc; + + vmsegdesc = (struct vm_seg_desc *)data; + error = vm_set_seg_desc(vcpu, vmsegdesc->regnum, + &vmsegdesc->desc); + break; + } + case VM_GET_SEGMENT_DESCRIPTOR: { + struct vm_seg_desc *vmsegdesc; + + vmsegdesc = (struct vm_seg_desc *)data; + error = vm_get_seg_desc(vcpu, vmsegdesc->regnum, + &vmsegdesc->desc); + break; + } + case VM_SET_X2APIC_STATE: { + struct vm_x2apic *x2apic; + + x2apic = (struct vm_x2apic *)data; + error = vm_set_x2apic_state(vcpu, x2apic->state); + break; + } + case VM_GET_X2APIC_STATE: { + struct vm_x2apic *x2apic; + + x2apic = (struct vm_x2apic *)data; + error = vm_get_x2apic_state(vcpu, &x2apic->state); + break; + } + case VM_GET_GPA_PMAP: { + struct vm_gpa_pte *gpapte; + + gpapte = (struct vm_gpa_pte *)data; + pmap_get_mapping(vmspace_pmap(vm_vmspace(vm)), gpapte->gpa, + gpapte->pte, &gpapte->ptenum); + break; + } + case VM_GET_HPET_CAPABILITIES: + error = vhpet_getcap((struct vm_hpet_cap *)data); + break; + case VM_GLA2GPA: { + struct vm_gla2gpa *gg; + + CTASSERT(PROT_READ == VM_PROT_READ); + CTASSERT(PROT_WRITE == VM_PROT_WRITE); + CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); + gg = (struct vm_gla2gpa *)data; + error = vm_gla2gpa(vcpu, &gg->paging, gg->gla, + gg->prot, &gg->gpa, &gg->fault); + KASSERT(error == 0 || error == EFAULT, + ("%s: vm_gla2gpa unknown error %d", __func__, error)); + break; + } + case VM_GLA2GPA_NOFAULT: { + struct vm_gla2gpa *gg; + + gg = (struct vm_gla2gpa *)data; + error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, + gg->prot, &gg->gpa, &gg->fault); + KASSERT(error == 0 || error == EFAULT, + ("%s: vm_gla2gpa unknown error %d", __func__, error)); + break; + } + case VM_SET_INTINFO: { + struct vm_intinfo *vmii; + + vmii = (struct vm_intinfo *)data; + error = vm_exit_intinfo(vcpu, vmii->info1); + break; + } + case VM_GET_INTINFO: { + struct vm_intinfo *vmii; + + vmii = (struct vm_intinfo *)data; + error = vm_get_intinfo(vcpu, &vmii->info1, &vmii->info2); + break; + } + case VM_RTC_WRITE: { + struct vm_rtc_data *rtcdata; + + rtcdata = (struct vm_rtc_data *)data; + error = vrtc_nvram_write(vm, rtcdata->offset, + rtcdata->value); + break; + } + case VM_RTC_READ: { + struct vm_rtc_data *rtcdata; + + rtcdata = (struct vm_rtc_data *)data; + error = vrtc_nvram_read(vm, rtcdata->offset, + &rtcdata->value); + break; + } + case VM_RTC_SETTIME: { + struct vm_rtc_time *rtctime; + + rtctime = (struct vm_rtc_time *)data; + error = vrtc_set_time(vm, rtctime->secs); + break; + } + case VM_RTC_GETTIME: { + struct vm_rtc_time *rtctime; + + rtctime = (struct vm_rtc_time *)data; + rtctime->secs = vrtc_get_time(vm); + break; + } + case VM_RESTART_INSTRUCTION: + error = vm_restart_instruction(vcpu); + break; +#ifdef BHYVE_SNAPSHOT + case VM_SNAPSHOT_REQ: { + struct vm_snapshot_meta *snapshot_meta; + + snapshot_meta = (struct vm_snapshot_meta *)data; + error = vm_snapshot_req(vm, snapshot_meta); + break; + } +#ifdef COMPAT_FREEBSD13 + case VM_SNAPSHOT_REQ_13: { + struct vm_snapshot_meta *snapshot_meta; + struct vm_snapshot_meta_13 *snapshot_13; + + /* + * The old structure just has an additional pointer at + * the start that is ignored. + */ + snapshot_13 = (struct vm_snapshot_meta_13 *)data; + snapshot_meta = + (struct vm_snapshot_meta *)&snapshot_13->dev_data; + error = vm_snapshot_req(vm, snapshot_meta); + break; + } +#endif + case VM_RESTORE_TIME: + error = vm_restore_time(vm); + break; +#endif + default: + error = ENOTTY; + break; + } + + return (error); +} diff --git a/sys/amd64/vmm/vmm_host.c b/sys/amd64/vmm/vmm_host.c index 4a36296beab2..78811b488fdb 100644 --- a/sys/amd64/vmm/vmm_host.c +++ b/sys/amd64/vmm/vmm_host.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2012 NetApp, Inc. * All rights reserved. @@ -24,13 +24,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/pcpu.h> diff --git a/sys/amd64/vmm/vmm_host.h b/sys/amd64/vmm/vmm_host.h index f750a977324b..eebb794843b6 100644 --- a/sys/amd64/vmm/vmm_host.h +++ b/sys/amd64/vmm/vmm_host.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2012 NetApp, Inc. * All rights reserved. @@ -24,8 +24,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VMM_HOST_H_ diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c index 06dffe6a80b9..c54b6e6d0074 100644 --- a/sys/amd64/vmm/vmm_instruction_emul.c +++ b/sys/amd64/vmm/vmm_instruction_emul.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2012 Sandvine, Inc. * Copyright (c) 2012 NetApp, Inc. @@ -25,13 +25,9 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #ifdef _KERNEL #include <sys/param.h> #include <sys/pcpu.h> @@ -43,6 +39,8 @@ __FBSDID("$FreeBSD$"); #include <machine/vmparam.h> #include <machine/vmm.h> + +#include <dev/vmm/vmm_mem.h> #else /* !_KERNEL */ #include <sys/types.h> #include <sys/errno.h> @@ -67,30 +65,6 @@ __FBSDID("$FreeBSD$"); #include <x86/psl.h> #include <x86/specialreg.h> -/* struct vie_op.op_type */ -enum { - VIE_OP_TYPE_NONE = 0, - VIE_OP_TYPE_MOV, - VIE_OP_TYPE_MOVSX, - VIE_OP_TYPE_MOVZX, - VIE_OP_TYPE_AND, - VIE_OP_TYPE_OR, - VIE_OP_TYPE_SUB, - VIE_OP_TYPE_TWO_BYTE, - VIE_OP_TYPE_PUSH, - VIE_OP_TYPE_CMP, - VIE_OP_TYPE_POP, - VIE_OP_TYPE_MOVS, - VIE_OP_TYPE_GROUP1, - VIE_OP_TYPE_STOS, - VIE_OP_TYPE_BITTEST, - VIE_OP_TYPE_TWOB_GRP15, - VIE_OP_TYPE_ADD, - VIE_OP_TYPE_TEST, - VIE_OP_TYPE_BEXTR, - VIE_OP_TYPE_LAST -}; - /* struct vie_op.op_flags */ #define VIE_OP_F_IMM (1 << 0) /* 16/32-bit immediate operand */ #define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ @@ -154,6 +128,16 @@ static const struct vie_op one_byte_opcodes[256] = { .op_byte = 0x3B, .op_type = VIE_OP_TYPE_CMP, }, + [0x6E] = { + .op_byte = 0x6E, + .op_type = VIE_OP_TYPE_OUTS, + .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION, + }, + [0x6F] = { + .op_byte = 0x6F, + .op_type = VIE_OP_TYPE_OUTS, + .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION, + }, [0x88] = { .op_byte = 0x88, .op_type = VIE_OP_TYPE_MOV, @@ -238,6 +222,12 @@ static const struct vie_op one_byte_opcodes[256] = { .op_byte = 0x8F, .op_type = VIE_OP_TYPE_POP, }, + [0xF6] = { + /* XXX Group 3 extended opcode - not just TEST */ + .op_byte = 0xF6, + .op_type = VIE_OP_TYPE_TEST, + .op_flags = VIE_OP_F_IMM8, + }, [0xF7] = { /* XXX Group 3 extended opcode - not just TEST */ .op_byte = 0xF7, @@ -290,11 +280,11 @@ static uint64_t size2mask[] = { }; static int -vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval) +vie_read_register(struct vcpu *vcpu, enum vm_reg_name reg, uint64_t *rval) { int error; - error = vm_get_register(vm, vcpuid, reg, rval); + error = vm_get_register(vcpu, reg, rval); return (error); } @@ -326,14 +316,14 @@ vie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr) } static int -vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) +vie_read_bytereg(struct vcpu *vcpu, struct vie *vie, uint8_t *rval) { uint64_t val; int error, lhbr; enum vm_reg_name reg; vie_calc_bytereg(vie, ®, &lhbr); - error = vm_get_register(vm, vcpuid, reg, &val); + error = vm_get_register(vcpu, reg, &val); /* * To obtain the value of a legacy high byte register shift the @@ -347,14 +337,14 @@ vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) } static int -vie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte) +vie_write_bytereg(struct vcpu *vcpu, struct vie *vie, uint8_t byte) { uint64_t origval, val, mask; int error, lhbr; enum vm_reg_name reg; vie_calc_bytereg(vie, ®, &lhbr); - error = vm_get_register(vm, vcpuid, reg, &origval); + error = vm_get_register(vcpu, reg, &origval); if (error == 0) { val = byte; mask = 0xff; @@ -367,13 +357,13 @@ vie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte) mask <<= 8; } val |= origval & ~mask; - error = vm_set_register(vm, vcpuid, reg, val); + error = vm_set_register(vcpu, reg, val); } return (error); } int -vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, +vie_update_register(struct vcpu *vcpu, enum vm_reg_name reg, uint64_t val, int size) { int error; @@ -382,7 +372,7 @@ vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, switch (size) { case 1: case 2: - error = vie_read_register(vm, vcpuid, reg, &origval); + error = vie_read_register(vcpu, reg, &origval); if (error) return (error); val &= size2mask[size]; @@ -397,7 +387,7 @@ vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, return (EINVAL); } - error = vm_set_register(vm, vcpuid, reg, val); + error = vm_set_register(vcpu, reg, val); return (error); } @@ -509,7 +499,7 @@ getandflags(int opsize, uint64_t x, uint64_t y) } static int -emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, +emulate_mov(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { int error, size; @@ -528,9 +518,9 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available) */ size = 1; /* override for byte operation */ - error = vie_read_bytereg(vm, vcpuid, vie, &byte); + error = vie_read_bytereg(vcpu, vie, &byte); if (error == 0) - error = memwrite(vm, vcpuid, gpa, byte, size, arg); + error = memwrite(vcpu, gpa, byte, size, arg); break; case 0x89: /* @@ -540,10 +530,10 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * REX.W + 89/r mov r/m64, r64 */ reg = gpr_map[vie->reg]; - error = vie_read_register(vm, vcpuid, reg, &val); + error = vie_read_register(vcpu, reg, &val); if (error == 0) { val &= size2mask[size]; - error = memwrite(vm, vcpuid, gpa, val, size, arg); + error = memwrite(vcpu, gpa, val, size, arg); } break; case 0x8A: @@ -553,9 +543,9 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * REX + 8A/r: mov r8, r/m8 */ size = 1; /* override for byte operation */ - error = memread(vm, vcpuid, gpa, &val, size, arg); + error = memread(vcpu, gpa, &val, size, arg); if (error == 0) - error = vie_write_bytereg(vm, vcpuid, vie, val); + error = vie_write_bytereg(vcpu, vie, val); break; case 0x8B: /* @@ -564,10 +554,10 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * 8B/r: mov r32, r/m32 * REX.W 8B/r: mov r64, r/m64 */ - error = memread(vm, vcpuid, gpa, &val, size, arg); + error = memread(vcpu, gpa, &val, size, arg); if (error == 0) { reg = gpr_map[vie->reg]; - error = vie_update_register(vm, vcpuid, reg, val, size); + error = vie_update_register(vcpu, reg, val, size); } break; case 0xA1: @@ -577,23 +567,23 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * A1: mov EAX, moffs32 * REX.W + A1: mov RAX, moffs64 */ - error = memread(vm, vcpuid, gpa, &val, size, arg); + error = memread(vcpu, gpa, &val, size, arg); if (error == 0) { reg = VM_REG_GUEST_RAX; - error = vie_update_register(vm, vcpuid, reg, val, size); + error = vie_update_register(vcpu, reg, val, size); } break; case 0xA3: /* * MOV from AX/EAX/RAX to seg:moffset * A3: mov moffs16, AX - * A3: mov moffs32, EAX + * A3: mov moffs32, EAX * REX.W + A3: mov moffs64, RAX */ - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); + error = vie_read_register(vcpu, VM_REG_GUEST_RAX, &val); if (error == 0) { val &= size2mask[size]; - error = memwrite(vm, vcpuid, gpa, val, size, arg); + error = memwrite(vcpu, gpa, val, size, arg); } break; case 0xC6: @@ -603,7 +593,7 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * REX + C6/0 mov r/m8, imm8 */ size = 1; /* override for byte operation */ - error = memwrite(vm, vcpuid, gpa, vie->immediate, size, arg); + error = memwrite(vcpu, gpa, vie->immediate, size, arg); break; case 0xC7: /* @@ -613,7 +603,7 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits) */ val = vie->immediate & size2mask[size]; - error = memwrite(vm, vcpuid, gpa, val, size, arg); + error = memwrite(vcpu, gpa, val, size, arg); break; default: break; @@ -623,9 +613,8 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } static int -emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, - void *arg) +emulate_movx(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite __unused, void *arg) { int error, size; enum vm_reg_name reg; @@ -646,7 +635,7 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, */ /* get the first operand */ - error = memread(vm, vcpuid, gpa, &val, 1, arg); + error = memread(vcpu, gpa, &val, 1, arg); if (error) break; @@ -657,7 +646,7 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, val = (uint8_t)val; /* write the result */ - error = vie_update_register(vm, vcpuid, reg, val, size); + error = vie_update_register(vcpu, reg, val, size); break; case 0xB7: /* @@ -667,7 +656,7 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * 0F B7/r movzx r32, r/m16 * REX.W + 0F B7/r movzx r64, r/m16 */ - error = memread(vm, vcpuid, gpa, &val, 2, arg); + error = memread(vcpu, gpa, &val, 2, arg); if (error) return (error); @@ -676,7 +665,7 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, /* zero-extend word */ val = (uint16_t)val; - error = vie_update_register(vm, vcpuid, reg, val, size); + error = vie_update_register(vcpu, reg, val, size); break; case 0xBE: /* @@ -689,7 +678,7 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, */ /* get the first operand */ - error = memread(vm, vcpuid, gpa, &val, 1, arg); + error = memread(vcpu, gpa, &val, 1, arg); if (error) break; @@ -700,7 +689,7 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, val = (int8_t)val; /* write the result */ - error = vie_update_register(vm, vcpuid, reg, val, size); + error = vie_update_register(vcpu, reg, val, size); break; default: break; @@ -712,47 +701,47 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * Helper function to calculate and validate a linear address. */ static int -get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging, - int opsize, int addrsize, int prot, enum vm_reg_name seg, - enum vm_reg_name gpr, uint64_t *gla, int *fault) +get_gla(struct vcpu *vcpu, struct vie *vie __unused, + struct vm_guest_paging *paging, int opsize, int addrsize, int prot, + enum vm_reg_name seg, enum vm_reg_name gpr, uint64_t *gla, int *fault) { struct seg_desc desc; uint64_t cr0, val, rflags; int error __diagused; - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); + error = vie_read_register(vcpu, VM_REG_GUEST_CR0, &cr0); KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error)); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); - error = vm_get_seg_desc(vm, vcpuid, seg, &desc); + error = vm_get_seg_desc(vcpu, seg, &desc); KASSERT(error == 0, ("%s: error %d getting segment descriptor %d", __func__, error, seg)); - error = vie_read_register(vm, vcpuid, gpr, &val); + error = vie_read_register(vcpu, gpr, &val); KASSERT(error == 0, ("%s: error %d getting register %d", __func__, error, gpr)); if (vie_calculate_gla(paging->cpu_mode, seg, &desc, val, opsize, addrsize, prot, gla)) { if (seg == VM_REG_GUEST_SS) - vm_inject_ss(vm, vcpuid, 0); + vm_inject_ss(vcpu, 0); else - vm_inject_gp(vm, vcpuid); + vm_inject_gp(vcpu); goto guest_fault; } if (vie_canonical_check(paging->cpu_mode, *gla)) { if (seg == VM_REG_GUEST_SS) - vm_inject_ss(vm, vcpuid, 0); + vm_inject_ss(vcpu, 0); else - vm_inject_gp(vm, vcpuid); + vm_inject_gp(vcpu); goto guest_fault; } if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) { - vm_inject_ac(vm, vcpuid, 0); + vm_inject_ac(vcpu, 0); goto guest_fault; } @@ -765,7 +754,7 @@ guest_fault: } static int -emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, +emulate_movs(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { @@ -792,7 +781,7 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, repeat = vie->repz_present | vie->repnz_present; if (repeat) { - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx); + error = vie_read_register(vcpu, VM_REG_GUEST_RCX, &rcx); KASSERT(!error, ("%s: error %d getting rcx", __func__, error)); /* @@ -822,12 +811,12 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, */ seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS; - error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize, + error = get_gla(vcpu, vie, paging, opsize, vie->addrsize, PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr, &fault); if (error || fault) goto done; - error = vm_copy_setup(vm, vcpuid, paging, srcaddr, opsize, PROT_READ, + error = vm_copy_setup(vcpu, paging, srcaddr, opsize, PROT_READ, copyinfo, nitems(copyinfo), &fault); if (error == 0) { if (fault) @@ -836,9 +825,9 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, /* * case (2): read from system memory and write to mmio. */ - vm_copyin(vm, vcpuid, copyinfo, &val, opsize); - vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); - error = memwrite(vm, vcpuid, gpa, val, opsize, arg); + vm_copyin(copyinfo, &val, opsize); + vm_copy_teardown(copyinfo, nitems(copyinfo)); + error = memwrite(vcpu, gpa, val, opsize, arg); if (error) goto done; } else { @@ -847,13 +836,13 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * if 'srcaddr' is in the mmio space. */ - error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize, + error = get_gla(vcpu, vie, paging, opsize, vie->addrsize, PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr, &fault); if (error || fault) goto done; - error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize, + error = vm_copy_setup(vcpu, paging, dstaddr, opsize, PROT_WRITE, copyinfo, nitems(copyinfo), &fault); if (error == 0) { if (fault) @@ -868,12 +857,12 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * injected into the guest then it will happen * before the MMIO read is attempted. */ - error = memread(vm, vcpuid, gpa, &val, opsize, arg); + error = memread(vcpu, gpa, &val, opsize, arg); if (error) goto done; - vm_copyout(vm, vcpuid, &val, copyinfo, opsize); - vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); + vm_copyout(&val, copyinfo, opsize); + vm_copy_teardown(copyinfo, nitems(copyinfo)); } else { /* * Case (4): read from and write to mmio. @@ -883,33 +872,33 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * instruction is not going to be restarted due * to address translation faults. */ - error = vm_gla2gpa(vm, vcpuid, paging, srcaddr, + error = vm_gla2gpa(vcpu, paging, srcaddr, PROT_READ, &srcgpa, &fault); if (error || fault) goto done; - error = vm_gla2gpa(vm, vcpuid, paging, dstaddr, + error = vm_gla2gpa(vcpu, paging, dstaddr, PROT_WRITE, &dstgpa, &fault); if (error || fault) goto done; - error = memread(vm, vcpuid, srcgpa, &val, opsize, arg); + error = memread(vcpu, srcgpa, &val, opsize, arg); if (error) goto done; - error = memwrite(vm, vcpuid, dstgpa, val, opsize, arg); + error = memwrite(vcpu, dstgpa, val, opsize, arg); if (error) goto done; } } - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi); + error = vie_read_register(vcpu, VM_REG_GUEST_RSI, &rsi); KASSERT(error == 0, ("%s: error %d getting rsi", __func__, error)); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi); + error = vie_read_register(vcpu, VM_REG_GUEST_RDI, &rdi); KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error)); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); if (rflags & PSL_D) { @@ -920,17 +909,17 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, rdi += opsize; } - error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSI, rsi, + error = vie_update_register(vcpu, VM_REG_GUEST_RSI, rsi, vie->addrsize); KASSERT(error == 0, ("%s: error %d updating rsi", __func__, error)); - error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi, + error = vie_update_register(vcpu, VM_REG_GUEST_RDI, rdi, vie->addrsize); KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error)); if (repeat) { rcx = rcx - 1; - error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX, + error = vie_update_register(vcpu, VM_REG_GUEST_RCX, rcx, vie->addrsize); KASSERT(!error, ("%s: error %d updating rcx", __func__, error)); @@ -938,7 +927,7 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * Repeat the instruction if the count register is not zero. */ if ((rcx & vie_size2mask(vie->addrsize)) != 0) - vm_restart_instruction(vm, vcpuid); + vm_restart_instruction(vcpu); } done: KASSERT(error == 0 || error == EFAULT, ("%s: unexpected error %d", @@ -947,8 +936,8 @@ done: } static int -emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - struct vm_guest_paging *paging, mem_region_read_t memread, +emulate_stos(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + struct vm_guest_paging *paging __unused, mem_region_read_t memread __unused, mem_region_write_t memwrite, void *arg) { int error, opsize, repeat; @@ -959,7 +948,7 @@ emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, repeat = vie->repz_present | vie->repnz_present; if (repeat) { - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx); + error = vie_read_register(vcpu, VM_REG_GUEST_RCX, &rcx); KASSERT(!error, ("%s: error %d getting rcx", __func__, error)); /* @@ -970,17 +959,17 @@ emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, return (0); } - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); + error = vie_read_register(vcpu, VM_REG_GUEST_RAX, &val); KASSERT(!error, ("%s: error %d getting rax", __func__, error)); - error = memwrite(vm, vcpuid, gpa, val, opsize, arg); + error = memwrite(vcpu, gpa, val, opsize, arg); if (error) return (error); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi); + error = vie_read_register(vcpu, VM_REG_GUEST_RDI, &rdi); KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error)); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); if (rflags & PSL_D) @@ -988,13 +977,13 @@ emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, else rdi += opsize; - error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi, + error = vie_update_register(vcpu, VM_REG_GUEST_RDI, rdi, vie->addrsize); KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error)); if (repeat) { rcx = rcx - 1; - error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX, + error = vie_update_register(vcpu, VM_REG_GUEST_RCX, rcx, vie->addrsize); KASSERT(!error, ("%s: error %d updating rcx", __func__, error)); @@ -1002,14 +991,14 @@ emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * Repeat the instruction if the count register is not zero. */ if ((rcx & vie_size2mask(vie->addrsize)) != 0) - vm_restart_instruction(vm, vcpuid); + vm_restart_instruction(vcpu); } return (0); } static int -emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, +emulate_and(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { int error, size; @@ -1032,18 +1021,18 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, /* get the first operand */ reg = gpr_map[vie->reg]; - error = vie_read_register(vm, vcpuid, reg, &val1); + error = vie_read_register(vcpu, reg, &val1); if (error) break; /* get the second operand */ - error = memread(vm, vcpuid, gpa, &val2, size, arg); + error = memread(vcpu, gpa, &val2, size, arg); if (error) break; /* perform the operation and write the result */ result = val1 & val2; - error = vie_update_register(vm, vcpuid, reg, result, size); + error = vie_update_register(vcpu, reg, result, size); break; case 0x81: case 0x83: @@ -1061,7 +1050,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, */ /* get the first operand */ - error = memread(vm, vcpuid, gpa, &val1, size, arg); + error = memread(vcpu, gpa, &val1, size, arg); if (error) break; @@ -1070,7 +1059,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * operand and write the result */ result = val1 & vie->immediate; - error = memwrite(vm, vcpuid, gpa, result, size, arg); + error = memwrite(vcpu, gpa, result, size, arg); break; default: break; @@ -1078,7 +1067,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, if (error) return (error); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); @@ -1092,12 +1081,12 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, rflags &= ~RFLAGS_STATUS_BITS; rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N); - error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); + error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8); return (error); } static int -emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, +emulate_or(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { int error, size; @@ -1120,18 +1109,18 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, /* get the first operand */ reg = gpr_map[vie->reg]; - error = vie_read_register(vm, vcpuid, reg, &val1); + error = vie_read_register(vcpu, reg, &val1); if (error) break; - + /* get the second operand */ - error = memread(vm, vcpuid, gpa, &val2, size, arg); + error = memread(vcpu, gpa, &val2, size, arg); if (error) break; /* perform the operation and write the result */ result = val1 | val2; - error = vie_update_register(vm, vcpuid, reg, result, size); + error = vie_update_register(vcpu, reg, result, size); break; case 0x81: case 0x83: @@ -1149,7 +1138,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, */ /* get the first operand */ - error = memread(vm, vcpuid, gpa, &val1, size, arg); + error = memread(vcpu, gpa, &val1, size, arg); if (error) break; @@ -1158,7 +1147,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * operand and write the result */ result = val1 | vie->immediate; - error = memwrite(vm, vcpuid, gpa, result, size, arg); + error = memwrite(vcpu, gpa, result, size, arg); break; default: break; @@ -1166,7 +1155,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, if (error) return (error); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); @@ -1180,13 +1169,13 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, rflags &= ~RFLAGS_STATUS_BITS; rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N); - error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); + error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8); return (error); } static int -emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +emulate_cmp(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite __unused, void *arg) { int error, size; uint64_t regop, memop, op1, op2, rflags, rflags2; @@ -1213,12 +1202,12 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, /* Get the register operand */ reg = gpr_map[vie->reg]; - error = vie_read_register(vm, vcpuid, reg, ®op); + error = vie_read_register(vcpu, reg, ®op); if (error) return (error); /* Get the memory operand */ - error = memread(vm, vcpuid, gpa, &memop, size, arg); + error = memread(vcpu, gpa, &memop, size, arg); if (error) return (error); @@ -1257,7 +1246,7 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, size = 1; /* get the first operand */ - error = memread(vm, vcpuid, gpa, &op1, size, arg); + error = memread(vcpu, gpa, &op1, size, arg); if (error) return (error); @@ -1266,19 +1255,19 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, default: return (EINVAL); } - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); rflags &= ~RFLAGS_STATUS_BITS; rflags |= rflags2 & RFLAGS_STATUS_BITS; - error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); + error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8); return (error); } static int -emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +emulate_test(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite __unused, void *arg) { int error, size; uint64_t op1, rflags, rflags2; @@ -1287,6 +1276,12 @@ emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, error = EINVAL; switch (vie->op.op_byte) { + case 0xF6: + /* + * F6 /0 test r/m8, imm8 + */ + size = 1; /* override for byte operation */ + /* FALLTHROUGH */ case 0xF7: /* * F7 /0 test r/m16, imm16 @@ -1301,7 +1296,7 @@ emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, if ((vie->reg & 7) != 0) return (EINVAL); - error = memread(vm, vcpuid, gpa, &op1, size, arg); + error = memread(vcpu, gpa, &op1, size, arg); if (error) return (error); @@ -1310,7 +1305,7 @@ emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, default: return (EINVAL); } - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); @@ -1321,18 +1316,18 @@ emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, rflags &= ~RFLAGS_STATUS_BITS; rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N); - error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); + error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8); return (error); } static int -emulate_bextr(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, +emulate_bextr(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, - mem_region_write_t memwrite, void *arg) + mem_region_write_t memwrite __unused, void *arg) { uint64_t src1, src2, dst, rflags; - unsigned start, len; - int error, size; + unsigned start, len, size; + int error; size = vie->opsize; error = EINVAL; @@ -1354,13 +1349,13 @@ emulate_bextr(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * operand) using an index and length specified in the second /source/ * operand (third operand). */ - error = memread(vm, vcpuid, gpa, &src1, size, arg); + error = memread(vcpu, gpa, &src1, size, arg); if (error) return (error); - error = vie_read_register(vm, vcpuid, gpr_map[vie->vex_reg], &src2); + error = vie_read_register(vcpu, gpr_map[vie->vex_reg], &src2); if (error) return (error); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); @@ -1386,7 +1381,7 @@ emulate_bextr(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, dst = src1; done: - error = vie_update_register(vm, vcpuid, gpr_map[vie->reg], dst, size); + error = vie_update_register(vcpu, gpr_map[vie->reg], dst, size); if (error) return (error); @@ -1397,14 +1392,14 @@ done: rflags &= ~RFLAGS_STATUS_BITS; if (dst == 0) rflags |= PSL_Z; - error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, + error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8); return (error); } static int -emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +emulate_add(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite __unused, void *arg) { int error, size; uint64_t nval, rflags, rflags2, val1, val2; @@ -1425,18 +1420,18 @@ emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, /* get the first operand */ reg = gpr_map[vie->reg]; - error = vie_read_register(vm, vcpuid, reg, &val1); + error = vie_read_register(vcpu, reg, &val1); if (error) break; /* get the second operand */ - error = memread(vm, vcpuid, gpa, &val2, size, arg); + error = memread(vcpu, gpa, &val2, size, arg); if (error) break; /* perform the operation and write the result */ nval = val1 + val2; - error = vie_update_register(vm, vcpuid, reg, nval, size); + error = vie_update_register(vcpu, reg, nval, size); break; default: break; @@ -1444,14 +1439,14 @@ emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, if (!error) { rflags2 = getaddflags(size, val1, val2); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); rflags &= ~RFLAGS_STATUS_BITS; rflags |= rflags2 & RFLAGS_STATUS_BITS; - error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, + error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8); } @@ -1459,8 +1454,8 @@ emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } static int -emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +emulate_sub(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite __unused, void *arg) { int error, size; uint64_t nval, rflags, rflags2, val1, val2; @@ -1473,7 +1468,7 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, case 0x2B: /* * SUB r/m from r and store the result in r - * + * * 2B/r SUB r16, r/m16 * 2B/r SUB r32, r/m32 * REX.W + 2B/r SUB r64, r/m64 @@ -1481,18 +1476,18 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, /* get the first operand */ reg = gpr_map[vie->reg]; - error = vie_read_register(vm, vcpuid, reg, &val1); + error = vie_read_register(vcpu, reg, &val1); if (error) break; /* get the second operand */ - error = memread(vm, vcpuid, gpa, &val2, size, arg); + error = memread(vcpu, gpa, &val2, size, arg); if (error) break; /* perform the operation and write the result */ nval = val1 - val2; - error = vie_update_register(vm, vcpuid, reg, nval, size); + error = vie_update_register(vcpu, reg, nval, size); break; default: break; @@ -1500,14 +1495,14 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, if (!error) { rflags2 = getcc(size, val1, val2); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); rflags &= ~RFLAGS_STATUS_BITS; rflags |= rflags2 & RFLAGS_STATUS_BITS; - error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, + error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8); } @@ -1515,7 +1510,7 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } static int -emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, +emulate_stack_op(struct vcpu *vcpu, uint64_t mmio_gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { @@ -1553,7 +1548,7 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, * stack-segment descriptor determines the size of the * stack pointer. */ - error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_SS, &ss_desc); + error = vm_get_seg_desc(vcpu, VM_REG_GUEST_SS, &ss_desc); KASSERT(error == 0, ("%s: error %d getting SS descriptor", __func__, error)); if (SEG_DESC_DEF32(ss_desc.access)) @@ -1562,13 +1557,13 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, stackaddrsize = 2; } - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); + error = vie_read_register(vcpu, VM_REG_GUEST_CR0, &cr0); KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error)); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp); + error = vie_read_register(vcpu, VM_REG_GUEST_RSP, &rsp); KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error)); if (pushop) { rsp -= size; @@ -1577,39 +1572,39 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc, rsp, size, stackaddrsize, pushop ? PROT_WRITE : PROT_READ, &stack_gla)) { - vm_inject_ss(vm, vcpuid, 0); + vm_inject_ss(vcpu, 0); return (0); } if (vie_canonical_check(paging->cpu_mode, stack_gla)) { - vm_inject_ss(vm, vcpuid, 0); + vm_inject_ss(vcpu, 0); return (0); } if (vie_alignment_check(paging->cpl, size, cr0, rflags, stack_gla)) { - vm_inject_ac(vm, vcpuid, 0); + vm_inject_ac(vcpu, 0); return (0); } - error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size, + error = vm_copy_setup(vcpu, paging, stack_gla, size, pushop ? PROT_WRITE : PROT_READ, copyinfo, nitems(copyinfo), &fault); if (error || fault) return (error); if (pushop) { - error = memread(vm, vcpuid, mmio_gpa, &val, size, arg); + error = memread(vcpu, mmio_gpa, &val, size, arg); if (error == 0) - vm_copyout(vm, vcpuid, &val, copyinfo, size); + vm_copyout(&val, copyinfo, size); } else { - vm_copyin(vm, vcpuid, copyinfo, &val, size); - error = memwrite(vm, vcpuid, mmio_gpa, val, size, arg); + vm_copyin(copyinfo, &val, size); + error = memwrite(vcpu, mmio_gpa, val, size, arg); rsp += size; } - vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); + vm_copy_teardown(copyinfo, nitems(copyinfo)); if (error == 0) { - error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp, + error = vie_update_register(vcpu, VM_REG_GUEST_RSP, rsp, stackaddrsize); KASSERT(error == 0, ("error %d updating rsp", error)); } @@ -1617,7 +1612,7 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, } static int -emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, +emulate_push(struct vcpu *vcpu, uint64_t mmio_gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { @@ -1632,13 +1627,13 @@ emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, if ((vie->reg & 7) != 6) return (EINVAL); - error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread, + error = emulate_stack_op(vcpu, mmio_gpa, vie, paging, memread, memwrite, arg); return (error); } static int -emulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, +emulate_pop(struct vcpu *vcpu, uint64_t mmio_gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { @@ -1653,29 +1648,29 @@ emulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, if ((vie->reg & 7) != 0) return (EINVAL); - error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread, + error = emulate_stack_op(vcpu, mmio_gpa, vie, paging, memread, memwrite, arg); return (error); } static int -emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - struct vm_guest_paging *paging, mem_region_read_t memread, +emulate_group1(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + struct vm_guest_paging *paging __unused, mem_region_read_t memread, mem_region_write_t memwrite, void *memarg) { int error; switch (vie->reg & 7) { case 0x1: /* OR */ - error = emulate_or(vm, vcpuid, gpa, vie, + error = emulate_or(vcpu, gpa, vie, memread, memwrite, memarg); break; case 0x4: /* AND */ - error = emulate_and(vm, vcpuid, gpa, vie, + error = emulate_and(vcpu, gpa, vie, memread, memwrite, memarg); break; case 0x7: /* CMP */ - error = emulate_cmp(vm, vcpuid, gpa, vie, + error = emulate_cmp(vcpu, gpa, vie, memread, memwrite, memarg); break; default: @@ -1687,8 +1682,9 @@ emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } static int -emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, void *memarg) +emulate_bittest(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite __unused, + void *memarg) { uint64_t val, rflags; int error, bitmask, bitoff; @@ -1702,10 +1698,10 @@ emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, if ((vie->reg & 7) != 4) return (EINVAL); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); - error = memread(vm, vcpuid, gpa, &val, vie->opsize, memarg); + error = memread(vcpu, gpa, &val, vie->opsize, memarg); if (error) return (error); @@ -1722,15 +1718,16 @@ emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, else rflags &= ~PSL_C; - error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); + error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8); KASSERT(error == 0, ("%s: error %d updating rflags", __func__, error)); return (0); } static int -emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, void *memarg) +emulate_twob_group15(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite __unused, + void *memarg) { int error; uint64_t buf; @@ -1748,7 +1745,7 @@ emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * CLFLUSH, CLFLUSHOPT. Only check for access * rights. */ - error = memread(vm, vcpuid, gpa, &buf, 1, memarg); + error = memread(vcpu, gpa, &buf, 1, memarg); } break; default: @@ -1760,7 +1757,7 @@ emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } int -vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, +vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, mem_region_write_t memwrite, void *memarg) { @@ -1771,68 +1768,68 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, switch (vie->op.op_type) { case VIE_OP_TYPE_GROUP1: - error = emulate_group1(vm, vcpuid, gpa, vie, paging, memread, + error = emulate_group1(vcpu, gpa, vie, paging, memread, memwrite, memarg); break; case VIE_OP_TYPE_POP: - error = emulate_pop(vm, vcpuid, gpa, vie, paging, memread, + error = emulate_pop(vcpu, gpa, vie, paging, memread, memwrite, memarg); break; case VIE_OP_TYPE_PUSH: - error = emulate_push(vm, vcpuid, gpa, vie, paging, memread, + error = emulate_push(vcpu, gpa, vie, paging, memread, memwrite, memarg); break; case VIE_OP_TYPE_CMP: - error = emulate_cmp(vm, vcpuid, gpa, vie, + error = emulate_cmp(vcpu, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_MOV: - error = emulate_mov(vm, vcpuid, gpa, vie, + error = emulate_mov(vcpu, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_MOVSX: case VIE_OP_TYPE_MOVZX: - error = emulate_movx(vm, vcpuid, gpa, vie, + error = emulate_movx(vcpu, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_MOVS: - error = emulate_movs(vm, vcpuid, gpa, vie, paging, memread, + error = emulate_movs(vcpu, gpa, vie, paging, memread, memwrite, memarg); break; case VIE_OP_TYPE_STOS: - error = emulate_stos(vm, vcpuid, gpa, vie, paging, memread, + error = emulate_stos(vcpu, gpa, vie, paging, memread, memwrite, memarg); break; case VIE_OP_TYPE_AND: - error = emulate_and(vm, vcpuid, gpa, vie, + error = emulate_and(vcpu, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_OR: - error = emulate_or(vm, vcpuid, gpa, vie, + error = emulate_or(vcpu, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_SUB: - error = emulate_sub(vm, vcpuid, gpa, vie, + error = emulate_sub(vcpu, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_BITTEST: - error = emulate_bittest(vm, vcpuid, gpa, vie, + error = emulate_bittest(vcpu, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_TWOB_GRP15: - error = emulate_twob_group15(vm, vcpuid, gpa, vie, + error = emulate_twob_group15(vcpu, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_ADD: - error = emulate_add(vm, vcpuid, gpa, vie, memread, + error = emulate_add(vcpu, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_TEST: - error = emulate_test(vm, vcpuid, gpa, vie, + error = emulate_test(vcpu, gpa, vie, memread, memwrite, memarg); break; case VIE_OP_TYPE_BEXTR: - error = emulate_bextr(vm, vcpuid, gpa, vie, paging, + error = emulate_bextr(vcpu, gpa, vie, paging, memread, memwrite, memarg); break; default: @@ -1915,7 +1912,7 @@ vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, if (SEG_DESC_UNUSABLE(desc->access)) return (-1); - /* + /* * The processor generates a #NP exception when a segment * register is loaded with a selector that points to a * descriptor that is not present. If this was the case then @@ -2055,17 +2052,17 @@ ptp_release(void **cookie) } static void * -ptp_hold(struct vm *vm, int vcpu, vm_paddr_t ptpphys, size_t len, void **cookie) +ptp_hold(struct vcpu *vcpu, vm_paddr_t ptpphys, size_t len, void **cookie) { void *ptr; ptp_release(cookie); - ptr = vm_gpa_hold(vm, vcpu, ptpphys, len, VM_PROT_RW, cookie); + ptr = vm_gpa_hold(vcpu, ptpphys, len, VM_PROT_RW, cookie); return (ptr); } static int -_vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, +_vm_gla2gpa(struct vcpu *vcpu, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *guest_fault, bool check_only) { int nlevels, pfcode, ptpshift, ptpindex, retval, usermode, writable; @@ -2093,7 +2090,7 @@ restart: * should be generated. */ if (!check_only) - vm_inject_gp(vm, vcpuid); + vm_inject_gp(vcpu); goto fault; } @@ -2108,7 +2105,7 @@ restart: /* Zero out the lower 12 bits. */ ptpphys &= ~0xfff; - ptpbase32 = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE, + ptpbase32 = ptp_hold(vcpu, ptpphys, PAGE_SIZE, &cookie); if (ptpbase32 == NULL) @@ -2126,7 +2123,7 @@ restart: if (!check_only) { pfcode = pf_error_code(usermode, prot, 0, pte32); - vm_inject_pf(vm, vcpuid, pfcode, gla); + vm_inject_pf(vcpu, pfcode, gla); } goto fault; } @@ -2170,7 +2167,7 @@ restart: /* Zero out the lower 5 bits and the upper 32 bits */ ptpphys &= 0xffffffe0UL; - ptpbase = ptp_hold(vm, vcpuid, ptpphys, sizeof(*ptpbase) * 4, + ptpbase = ptp_hold(vcpu, ptpphys, sizeof(*ptpbase) * 4, &cookie); if (ptpbase == NULL) goto error; @@ -2182,7 +2179,7 @@ restart: if ((pte & PG_V) == 0) { if (!check_only) { pfcode = pf_error_code(usermode, prot, 0, pte); - vm_inject_pf(vm, vcpuid, pfcode, gla); + vm_inject_pf(vcpu, pfcode, gla); } goto fault; } @@ -2200,7 +2197,7 @@ restart: /* Zero out the lower 12 bits and the upper 12 bits */ ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12; - ptpbase = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE, &cookie); + ptpbase = ptp_hold(vcpu, ptpphys, PAGE_SIZE, &cookie); if (ptpbase == NULL) goto error; @@ -2215,7 +2212,7 @@ restart: (writable && (pte & PG_RW) == 0)) { if (!check_only) { pfcode = pf_error_code(usermode, prot, 0, pte); - vm_inject_pf(vm, vcpuid, pfcode, gla); + vm_inject_pf(vcpu, pfcode, gla); } goto fault; } @@ -2233,7 +2230,7 @@ restart: if (!check_only) { pfcode = pf_error_code(usermode, prot, 1, pte); - vm_inject_pf(vm, vcpuid, pfcode, gla); + vm_inject_pf(vcpu, pfcode, gla); } goto fault; } @@ -2266,25 +2263,25 @@ fault: } int -vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, +vm_gla2gpa(struct vcpu *vcpu, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *guest_fault) { - return (_vm_gla2gpa(vm, vcpuid, paging, gla, prot, gpa, guest_fault, + return (_vm_gla2gpa(vcpu, paging, gla, prot, gpa, guest_fault, false)); } int -vm_gla2gpa_nofault(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, +vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *guest_fault) { - return (_vm_gla2gpa(vm, vcpuid, paging, gla, prot, gpa, guest_fault, + return (_vm_gla2gpa(vcpu, paging, gla, prot, gpa, guest_fault, true)); } int -vmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, +vmm_fetch_instruction(struct vcpu *vcpu, struct vm_guest_paging *paging, uint64_t rip, int inst_length, struct vie *vie, int *faultptr) { struct vm_copyinfo copyinfo[2]; @@ -2294,13 +2291,13 @@ vmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, panic("vmm_fetch_instruction: invalid length %d", inst_length); prot = PROT_READ | PROT_EXEC; - error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot, + error = vm_copy_setup(vcpu, paging, rip, inst_length, prot, copyinfo, nitems(copyinfo), faultptr); if (error || *faultptr) return (error); - vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length); - vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); + vm_copyin(copyinfo, vie->inst, inst_length); + vm_copy_teardown(copyinfo, nitems(copyinfo)); vie->num_valid = inst_length; return (0); } @@ -2812,7 +2809,7 @@ decode_moffset(struct vie *vie) * page table fault matches with our instruction decoding. */ static int -verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie, +verify_gla(struct vcpu *vcpu, uint64_t gla, struct vie *vie, enum vm_cpu_mode cpu_mode) { int error; @@ -2826,7 +2823,7 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie, base = 0; if (vie->base_register != VM_REG_LAST) { - error = vm_get_register(vm, cpuid, vie->base_register, &base); + error = vm_get_register(vcpu, vie->base_register, &base); if (error) { printf("verify_gla: error %d getting base reg %d\n", error, vie->base_register); @@ -2843,7 +2840,7 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie, idx = 0; if (vie->index_register != VM_REG_LAST) { - error = vm_get_register(vm, cpuid, vie->index_register, &idx); + error = vm_get_register(vcpu, vie->index_register, &idx); if (error) { printf("verify_gla: error %d getting index reg %d\n", error, vie->index_register); @@ -2875,7 +2872,7 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie, seg != VM_REG_GUEST_GS) { segbase = 0; } else { - error = vm_get_seg_desc(vm, cpuid, seg, &desc); + error = vm_get_seg_desc(vcpu, seg, &desc); if (error) { printf("verify_gla: error %d getting segment" " descriptor %d", error, @@ -2902,7 +2899,7 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie, int #ifdef _KERNEL -vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, +vmm_decode_instruction(struct vcpu *vcpu, uint64_t gla, enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie) #else vmm_decode_instruction(enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie) @@ -2932,7 +2929,7 @@ vmm_decode_instruction(enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie) #ifdef _KERNEL if ((vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) == 0) { - if (verify_gla(vm, cpuid, gla, vie, cpu_mode)) + if (verify_gla(vcpu, gla, vie, cpu_mode)) return (-1); } #endif diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c index e30f796d598c..8aab28f5e68e 100644 --- a/sys/amd64/vmm/vmm_ioport.c +++ b/sys/amd64/vmm/vmm_ioport.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com> * All rights reserved. @@ -26,21 +26,19 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/systm.h> #include <machine/vmm.h> #include <machine/vmm_instruction_emul.h> +#include <dev/vmm/vmm_ktr.h> + #include "vatpic.h" #include "vatpit.h" #include "vpmtmr.h" #include "vrtc.h" #include "vmm_ioport.h" -#include "vmm_ktr.h" #define MAX_IOPORTS 1280 @@ -100,11 +98,10 @@ inout_instruction(struct vm_exit *vmexit) #endif /* KTR */ static int -emulate_inout_port(struct vm *vm, int vcpuid, struct vm_exit *vmexit, - bool *retu) +emulate_inout_port(struct vcpu *vcpu, struct vm_exit *vmexit, bool *retu) { ioport_handler_func_t handler; - uint32_t mask, val; + uint32_t mask, val = 0; int error; /* @@ -122,7 +119,7 @@ emulate_inout_port(struct vm *vm, int vcpuid, struct vm_exit *vmexit, val = vmexit->u.inout.eax & mask; } - error = (*handler)(vm, vcpuid, vmexit->u.inout.in, + error = (*handler)(vcpu_vm(vcpu), vmexit->u.inout.in, vmexit->u.inout.port, vmexit->u.inout.bytes, &val); if (error) { /* @@ -138,7 +135,7 @@ emulate_inout_port(struct vm *vm, int vcpuid, struct vm_exit *vmexit, if (vmexit->u.inout.in) { vmexit->u.inout.eax &= ~mask; vmexit->u.inout.eax |= val & mask; - error = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX, + error = vm_set_register(vcpu, VM_REG_GUEST_RAX, vmexit->u.inout.eax); KASSERT(error == 0, ("emulate_ioport: error %d setting guest " "rax register", error)); @@ -148,14 +145,54 @@ emulate_inout_port(struct vm *vm, int vcpuid, struct vm_exit *vmexit, } static int -emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu) +decode_segment(struct vcpu *vcpu, enum vm_reg_name *segment) { + struct vm_guest_paging *paging; + struct vie vie; + struct vm_exit *vme; + int err; + int fault; + + vme = vm_exitinfo(vcpu); + paging = &vme->u.inout_str.paging; + + vie_init(&vie, NULL, 0); + err = vmm_fetch_instruction(vcpu, paging, + vme->rip + vme->u.inout_str.cs_base, VIE_INST_SIZE, &vie, &fault); + if (err || fault) + return (err); + + err = vmm_decode_instruction(vcpu, VIE_INVALID_GLA, paging->cpu_mode, + vme->u.inout_str.cs_d, &vie); + + if (err || vie.op.op_type != VIE_OP_TYPE_OUTS) + return (EINVAL); + if (vie.segment_override) + *segment = vie.segment_register; + else + *segment = VM_REG_GUEST_DS; + + return (0); +} + +static int +emulate_inout_str(struct vcpu *vcpu, struct vm_exit *vmexit, bool *retu) +{ + int err; + *retu = true; + if (vmexit->u.inout_str.seg_name == VM_REG_LAST) { + err = decode_segment(vcpu, &vmexit->u.inout_str.seg_name); + if (err) + return (err); + return (vm_get_seg_desc(vcpu, vmexit->u.inout_str.seg_name, + &vmexit->u.inout_str.seg_desc)); + } return (0); /* Return to userspace to finish emulation */ } int -vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu) +vm_handle_inout(struct vcpu *vcpu, struct vm_exit *vmexit, bool *retu) { int bytes __diagused, error; @@ -164,11 +201,11 @@ vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu) ("vm_handle_inout: invalid operand size %d", bytes)); if (vmexit->u.inout.string) - error = emulate_inout_str(vm, vcpuid, vmexit, retu); + error = emulate_inout_str(vcpu, vmexit, retu); else - error = emulate_inout_port(vm, vcpuid, vmexit, retu); + error = emulate_inout_port(vcpu, vmexit, retu); - VCPU_CTR4(vm, vcpuid, "%s%s 0x%04x: %s", + VCPU_CTR4(vcpu_vm(vcpu), vcpu_vcpuid(vcpu), "%s%s 0x%04x: %s", vmexit->u.inout.rep ? "rep " : "", inout_instruction(vmexit), vmexit->u.inout.port, diff --git a/sys/amd64/vmm/vmm_ioport.h b/sys/amd64/vmm/vmm_ioport.h index 14e315f400ce..e24e5ad57185 100644 --- a/sys/amd64/vmm/vmm_ioport.h +++ b/sys/amd64/vmm/vmm_ioport.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com> * All rights reserved. @@ -24,16 +24,14 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VMM_IOPORT_H_ #define _VMM_IOPORT_H_ -typedef int (*ioport_handler_func_t)(struct vm *vm, int vcpuid, +typedef int (*ioport_handler_func_t)(struct vm *vm, bool in, int port, int bytes, uint32_t *val); -int vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu); +int vm_handle_inout(struct vcpu *vcpu, struct vm_exit *vme, bool *retu); #endif /* _VMM_IOPORT_H_ */ diff --git a/sys/amd64/vmm/vmm_ktr.h b/sys/amd64/vmm/vmm_ktr.h deleted file mode 100644 index 414d0341cca9..000000000000 --- a/sys/amd64/vmm/vmm_ktr.h +++ /dev/null @@ -1,71 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD - * - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _VMM_KTR_H_ -#define _VMM_KTR_H_ - -#include <sys/ktr.h> -#include <sys/pcpu.h> - -#ifndef KTR_VMM -#define KTR_VMM KTR_GEN -#endif - -#define VCPU_CTR0(vm, vcpuid, format) \ -CTR2(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid)) - -#define VCPU_CTR1(vm, vcpuid, format, p1) \ -CTR3(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1)) - -#define VCPU_CTR2(vm, vcpuid, format, p1, p2) \ -CTR4(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2)) - -#define VCPU_CTR3(vm, vcpuid, format, p1, p2, p3) \ -CTR5(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2), (p3)) - -#define VCPU_CTR4(vm, vcpuid, format, p1, p2, p3, p4) \ -CTR6(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), \ - (p1), (p2), (p3), (p4)) - -#define VM_CTR0(vm, format) \ -CTR1(KTR_VMM, "vm %s: " format, vm_name((vm))) - -#define VM_CTR1(vm, format, p1) \ -CTR2(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1)) - -#define VM_CTR2(vm, format, p1, p2) \ -CTR3(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2)) - -#define VM_CTR3(vm, format, p1, p2, p3) \ -CTR4(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3)) - -#define VM_CTR4(vm, format, p1, p2, p3, p4) \ -CTR5(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3), (p4)) -#endif diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c index 8191da758100..0cae01f172ec 100644 --- a/sys/amd64/vmm/vmm_lapic.c +++ b/sys/amd64/vmm/vmm_lapic.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,13 +24,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/systm.h> #include <sys/smp.h> @@ -38,8 +33,9 @@ __FBSDID("$FreeBSD$"); #include <x86/specialreg.h> #include <x86/apicreg.h> +#include <dev/vmm/vmm_ktr.h> + #include <machine/vmm.h> -#include "vmm_ktr.h" #include "vmm_lapic.h" #include "vlapic.h" @@ -52,13 +48,10 @@ __FBSDID("$FreeBSD$"); #define MSI_X86_ADDR_LOG 0x00000004 /* Destination Mode */ int -lapic_set_intr(struct vm *vm, int cpu, int vector, bool level) +lapic_set_intr(struct vcpu *vcpu, int vector, bool level) { struct vlapic *vlapic; - if (cpu < 0 || cpu >= vm_get_maxcpus(vm)) - return (EINVAL); - /* * According to section "Maskable Hardware Interrupts" in Intel SDM * vectors 16 through 255 can be delivered through the local APIC. @@ -66,32 +59,31 @@ lapic_set_intr(struct vm *vm, int cpu, int vector, bool level) if (vector < 16 || vector > 255) return (EINVAL); - vlapic = vm_lapic(vm, cpu); + vlapic = vm_lapic(vcpu); if (vlapic_set_intr_ready(vlapic, vector, level)) - vcpu_notify_event(vm, cpu, true); + vcpu_notify_event(vcpu, true); return (0); } int -lapic_set_local_intr(struct vm *vm, int cpu, int vector) +lapic_set_local_intr(struct vm *vm, struct vcpu *vcpu, int vector) { struct vlapic *vlapic; cpuset_t dmask; - int error; + int cpu, error; - if (cpu < -1 || cpu >= vm_get_maxcpus(vm)) - return (EINVAL); - - if (cpu == -1) + if (vcpu == NULL) { + error = 0; dmask = vm_active_cpus(vm); - else - CPU_SETOF(cpu, &dmask); - error = 0; - CPU_FOREACH_ISSET(cpu, &dmask) { - vlapic = vm_lapic(vm, cpu); + CPU_FOREACH_ISSET(cpu, &dmask) { + vlapic = vm_lapic(vm_vcpu(vm, cpu)); + error = vlapic_trigger_lvt(vlapic, vector); + if (error) + break; + } + } else { + vlapic = vm_lapic(vcpu); error = vlapic_trigger_lvt(vlapic, vector); - if (error) - break; } return (error); @@ -123,6 +115,11 @@ lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg) * physical otherwise. */ dest = (addr >> 12) & 0xff; + /* + * Extended Destination ID support uses bits 5-11 of the address: + * http://david.woodhou.se/ExtDestId.pdf + */ + dest |= ((addr >> 5) & 0x7f) << 8; phys = ((addr & (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG)) != (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG)); delmode = msg & APIC_DELMODE_MASK; @@ -156,13 +153,13 @@ lapic_msr(u_int msr) } int -lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval, bool *retu) +lapic_rdmsr(struct vcpu *vcpu, u_int msr, uint64_t *rval, bool *retu) { int error; u_int offset; struct vlapic *vlapic; - vlapic = vm_lapic(vm, cpu); + vlapic = vm_lapic(vcpu); if (msr == MSR_APICBASE) { *rval = vlapic_get_apicbase(vlapic); @@ -176,13 +173,13 @@ lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval, bool *retu) } int -lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val, bool *retu) +lapic_wrmsr(struct vcpu *vcpu, u_int msr, uint64_t val, bool *retu) { int error; u_int offset; struct vlapic *vlapic; - vlapic = vm_lapic(vm, cpu); + vlapic = vm_lapic(vcpu); if (msr == MSR_APICBASE) { error = vlapic_set_apicbase(vlapic, val); @@ -195,7 +192,7 @@ lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val, bool *retu) } int -lapic_mmio_write(void *vm, int cpu, uint64_t gpa, uint64_t wval, int size, +lapic_mmio_write(struct vcpu *vcpu, uint64_t gpa, uint64_t wval, int size, void *arg) { int error; @@ -211,13 +208,13 @@ lapic_mmio_write(void *vm, int cpu, uint64_t gpa, uint64_t wval, int size, if (size != 4 || off & 0xf) return (EINVAL); - vlapic = vm_lapic(vm, cpu); + vlapic = vm_lapic(vcpu); error = vlapic_write(vlapic, 1, off, wval, arg); return (error); } int -lapic_mmio_read(void *vm, int cpu, uint64_t gpa, uint64_t *rval, int size, +lapic_mmio_read(struct vcpu *vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg) { int error; @@ -235,7 +232,7 @@ lapic_mmio_read(void *vm, int cpu, uint64_t gpa, uint64_t *rval, int size, if (off & 0xf) return (EINVAL); - vlapic = vm_lapic(vm, cpu); + vlapic = vm_lapic(vcpu); error = vlapic_read(vlapic, 1, off, rval, arg); return (error); } diff --git a/sys/amd64/vmm/vmm_lapic.h b/sys/amd64/vmm/vmm_lapic.h index 5fa6c4ef4f32..1c0e17b15c18 100644 --- a/sys/amd64/vmm/vmm_lapic.h +++ b/sys/amd64/vmm/vmm_lapic.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,53 +24,50 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VMM_LAPIC_H_ #define _VMM_LAPIC_H_ +struct vcpu; struct vm; bool lapic_msr(u_int num); -int lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval, - bool *retu); -int lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval, - bool *retu); +int lapic_rdmsr(struct vcpu *vcpu, u_int msr, uint64_t *rval, bool *retu); +int lapic_wrmsr(struct vcpu *vcpu, u_int msr, uint64_t wval, bool *retu); -int lapic_mmio_read(void *vm, int cpu, uint64_t gpa, +int lapic_mmio_read(struct vcpu *vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg); -int lapic_mmio_write(void *vm, int cpu, uint64_t gpa, +int lapic_mmio_write(struct vcpu *vcpu, uint64_t gpa, uint64_t wval, int size, void *arg); /* * Signals to the LAPIC that an interrupt at 'vector' needs to be generated * to the 'cpu', the state is recorded in IRR. */ -int lapic_set_intr(struct vm *vm, int cpu, int vector, bool trig); +int lapic_set_intr(struct vcpu *vcpu, int vector, bool trig); #define LAPIC_TRIG_LEVEL true #define LAPIC_TRIG_EDGE false static __inline int -lapic_intr_level(struct vm *vm, int cpu, int vector) +lapic_intr_level(struct vcpu *vcpu, int vector) { - return (lapic_set_intr(vm, cpu, vector, LAPIC_TRIG_LEVEL)); + return (lapic_set_intr(vcpu, vector, LAPIC_TRIG_LEVEL)); } static __inline int -lapic_intr_edge(struct vm *vm, int cpu, int vector) +lapic_intr_edge(struct vcpu *vcpu, int vector) { - return (lapic_set_intr(vm, cpu, vector, LAPIC_TRIG_EDGE)); + return (lapic_set_intr(vcpu, vector, LAPIC_TRIG_EDGE)); } /* * Triggers the LAPIC local interrupt (LVT) 'vector' on 'cpu'. 'cpu' can * be set to -1 to trigger the interrupt on all CPUs. */ -int lapic_set_local_intr(struct vm *vm, int cpu, int vector); +int lapic_set_local_intr(struct vm *vm, struct vcpu *vcpu, int vector); int lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg); diff --git a/sys/amd64/vmm/vmm_mem.h b/sys/amd64/vmm/vmm_mem.h index 4a010a2f9f0a..41b9bf07c4fc 100644 --- a/sys/amd64/vmm/vmm_mem.h +++ b/sys/amd64/vmm/vmm_mem.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,8 +24,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VMM_MEM_H_ @@ -34,7 +32,6 @@ struct vmspace; struct vm_object; -int vmm_mem_init(void); struct vm_object *vmm_mmio_alloc(struct vmspace *, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); void vmm_mmio_free(struct vmspace *, vm_paddr_t gpa, size_t size); diff --git a/sys/amd64/vmm/vmm_mem.c b/sys/amd64/vmm/vmm_mem_machdep.c index a736d94bba64..e96c9e4bdc66 100644 --- a/sys/amd64/vmm/vmm_mem.c +++ b/sys/amd64/vmm/vmm_mem_machdep.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,13 +24,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/systm.h> #include <sys/malloc.h> @@ -50,13 +45,6 @@ __FBSDID("$FreeBSD$"); #include "vmm_mem.h" -int -vmm_mem_init(void) -{ - - return (0); -} - vm_object_t vmm_mmio_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) @@ -100,7 +88,7 @@ vmm_mmio_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len, * has incremented the reference count on the sglist. Dropping the * initial reference count ensures that the sglist will be freed * when the object is deallocated. - * + * * If the object could not be allocated then we end up freeing the * sglist. */ diff --git a/sys/amd64/vmm/vmm_snapshot.c b/sys/amd64/vmm/vmm_snapshot.c index c77bb05f76b7..cd53f05a1603 100644 --- a/sys/amd64/vmm/vmm_snapshot.c +++ b/sys/amd64/vmm/vmm_snapshot.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2016 Flavius Anton * Copyright (c) 2016 Mihai Tiganus @@ -33,9 +33,6 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/types.h> #include <sys/systm.h> @@ -57,14 +54,11 @@ vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op) } int -vm_snapshot_buf(volatile void *data, size_t data_size, - struct vm_snapshot_meta *meta) +vm_snapshot_buf(void *data, size_t data_size, struct vm_snapshot_meta *meta) { struct vm_snapshot_buffer *buffer; - int op; - void *nv_data; + int op, error; - nv_data = __DEVOLATILE(void *, data); buffer = &meta->buffer; op = meta->op; @@ -74,11 +68,14 @@ vm_snapshot_buf(volatile void *data, size_t data_size, } if (op == VM_SNAPSHOT_SAVE) - copyout(nv_data, buffer->buf, data_size); + error = copyout(data, buffer->buf, data_size); else if (op == VM_SNAPSHOT_RESTORE) - copyin(buffer->buf, nv_data, data_size); + error = copyin(buffer->buf, data, data_size); else - return (EINVAL); + error = EINVAL; + + if (error) + return (error); buffer->buf += data_size; buffer->buf_rem -= data_size; @@ -104,38 +101,3 @@ vm_get_snapshot_size(struct vm_snapshot_meta *meta) return (length); } - -int -vm_snapshot_buf_cmp(volatile void *data, size_t data_size, - struct vm_snapshot_meta *meta) -{ - struct vm_snapshot_buffer *buffer; - int op; - int ret; - void *_data = *(void **)(void *)&data; - - buffer = &meta->buffer; - op = meta->op; - - if (buffer->buf_rem < data_size) { - printf("%s: buffer too small\r\n", __func__); - ret = E2BIG; - goto done; - } - - if (op == VM_SNAPSHOT_SAVE) { - ret = 0; - copyout(_data, buffer->buf, data_size); - } else if (op == VM_SNAPSHOT_RESTORE) { - ret = memcmp(_data, buffer->buf, data_size); - } else { - ret = EINVAL; - goto done; - } - - buffer->buf += data_size; - buffer->buf_rem -= data_size; - -done: - return (ret); -} diff --git a/sys/amd64/vmm/vmm_stat.c b/sys/amd64/vmm/vmm_stat.c deleted file mode 100644 index 497db4452f3b..000000000000 --- a/sys/amd64/vmm/vmm_stat.c +++ /dev/null @@ -1,185 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD - * - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/kernel.h> -#include <sys/systm.h> -#include <sys/malloc.h> - -#include <machine/vmm.h> -#include "vmm_util.h" -#include "vmm_stat.h" - -/* - * 'vst_num_elems' is the total number of addressable statistic elements - * 'vst_num_types' is the number of unique statistic types - * - * It is always true that 'vst_num_elems' is greater than or equal to - * 'vst_num_types'. This is because a stat type may represent more than - * one element (for e.g. VMM_STAT_ARRAY). - */ -static int vst_num_elems, vst_num_types; -static struct vmm_stat_type *vsttab[MAX_VMM_STAT_ELEMS]; - -static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat"); - -#define vst_size ((size_t)vst_num_elems * sizeof(uint64_t)) - -void -vmm_stat_register(void *arg) -{ - struct vmm_stat_type *vst = arg; - - /* We require all stats to identify themselves with a description */ - if (vst->desc == NULL) - return; - - if (vst->scope == VMM_STAT_SCOPE_INTEL && !vmm_is_intel()) - return; - - if (vst->scope == VMM_STAT_SCOPE_AMD && !vmm_is_svm()) - return; - - if (vst_num_elems + vst->nelems >= MAX_VMM_STAT_ELEMS) { - printf("Cannot accommodate vmm stat type \"%s\"!\n", vst->desc); - return; - } - - vst->index = vst_num_elems; - vst_num_elems += vst->nelems; - - vsttab[vst_num_types++] = vst; -} - -int -vmm_stat_copy(struct vm *vm, int vcpu, int index, int count, int *num_stats, - uint64_t *buf) -{ - struct vmm_stat_type *vst; - uint64_t *stats; - int i, tocopy; - - if (vcpu < 0 || vcpu >= vm_get_maxcpus(vm)) - return (EINVAL); - - if (index < 0 || count < 0) - return (EINVAL); - - if (index > vst_num_elems) - return (ENOENT); - - if (index == vst_num_elems) { - *num_stats = 0; - return (0); - } - - tocopy = min(vst_num_elems - index, count); - - /* Let stats functions update their counters */ - for (i = 0; i < vst_num_types; i++) { - vst = vsttab[i]; - if (vst->func != NULL) - (*vst->func)(vm, vcpu, vst); - } - - /* Copy over the stats */ - stats = vcpu_stats(vm, vcpu); - memcpy(buf, stats + index, tocopy * sizeof(stats[0])); - *num_stats = tocopy; - return (0); -} - -void * -vmm_stat_alloc(void) -{ - - return (malloc(vst_size, M_VMM_STAT, M_WAITOK)); -} - -void -vmm_stat_init(void *vp) -{ - - bzero(vp, vst_size); -} - -void -vmm_stat_free(void *vp) -{ - free(vp, M_VMM_STAT); -} - -int -vmm_stat_desc_copy(int index, char *buf, int bufsize) -{ - int i; - struct vmm_stat_type *vst; - - for (i = 0; i < vst_num_types; i++) { - vst = vsttab[i]; - if (index >= vst->index && index < vst->index + vst->nelems) { - if (vst->nelems > 1) { - snprintf(buf, bufsize, "%s[%d]", - vst->desc, index - vst->index); - } else { - strlcpy(buf, vst->desc, bufsize); - } - return (0); /* found it */ - } - } - - return (EINVAL); -} - -/* global statistics */ -VMM_STAT(VCPU_MIGRATIONS, "vcpu migration across host cpus"); -VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); -VMM_STAT(VMEXIT_EXTINT, "vm exits due to external interrupt"); -VMM_STAT(VMEXIT_HLT, "number of times hlt was intercepted"); -VMM_STAT(VMEXIT_CR_ACCESS, "number of times %cr access was intercepted"); -VMM_STAT(VMEXIT_RDMSR, "number of times rdmsr was intercepted"); -VMM_STAT(VMEXIT_WRMSR, "number of times wrmsr was intercepted"); -VMM_STAT(VMEXIT_MTRAP, "number of monitor trap exits"); -VMM_STAT(VMEXIT_PAUSE, "number of times pause was intercepted"); -VMM_STAT(VMEXIT_INTR_WINDOW, "vm exits due to interrupt window opening"); -VMM_STAT(VMEXIT_NMI_WINDOW, "vm exits due to nmi window opening"); -VMM_STAT(VMEXIT_INOUT, "number of times in/out was intercepted"); -VMM_STAT(VMEXIT_CPUID, "number of times cpuid was intercepted"); -VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits due to nested page fault"); -VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation"); -VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason"); -VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit"); -VMM_STAT(VMEXIT_REQIDLE, "number of times idle requested at exit"); -VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace"); -VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit"); -VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions"); diff --git a/sys/amd64/vmm/vmm_stat.h b/sys/amd64/vmm/vmm_stat.h index 0e9c8db8429d..cf3895001528 100644 --- a/sys/amd64/vmm/vmm_stat.h +++ b/sys/amd64/vmm/vmm_stat.h @@ -27,115 +27,14 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VMM_STAT_H_ #define _VMM_STAT_H_ -struct vm; - -#define MAX_VMM_STAT_ELEMS 64 /* arbitrary */ - -enum vmm_stat_scope { - VMM_STAT_SCOPE_ANY, - VMM_STAT_SCOPE_INTEL, /* Intel VMX specific statistic */ - VMM_STAT_SCOPE_AMD, /* AMD SVM specific statistic */ -}; - -struct vmm_stat_type; -typedef void (*vmm_stat_func_t)(struct vm *vm, int vcpu, - struct vmm_stat_type *stat); - -struct vmm_stat_type { - int index; /* position in the stats buffer */ - int nelems; /* standalone or array */ - const char *desc; /* description of statistic */ - vmm_stat_func_t func; - enum vmm_stat_scope scope; -}; - -void vmm_stat_register(void *arg); - -#define VMM_STAT_FDEFINE(type, nelems, desc, func, scope) \ - struct vmm_stat_type type[1] = { \ - { -1, nelems, desc, func, scope } \ - }; \ - SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_register, type) - -#define VMM_STAT_DEFINE(type, nelems, desc, scope) \ - VMM_STAT_FDEFINE(type, nelems, desc, NULL, scope) - -#define VMM_STAT_DECLARE(type) \ - extern struct vmm_stat_type type[1] - -#define VMM_STAT(type, desc) \ - VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_ANY) -#define VMM_STAT_INTEL(type, desc) \ - VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_INTEL) -#define VMM_STAT_AMD(type, desc) \ - VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_AMD) - -#define VMM_STAT_FUNC(type, desc, func) \ - VMM_STAT_FDEFINE(type, 1, desc, func, VMM_STAT_SCOPE_ANY) - -#define VMM_STAT_ARRAY(type, nelems, desc) \ - VMM_STAT_DEFINE(type, nelems, desc, VMM_STAT_SCOPE_ANY) - -void *vmm_stat_alloc(void); -void vmm_stat_init(void *vp); -void vmm_stat_free(void *vp); - -int vmm_stat_copy(struct vm *vm, int vcpu, int index, int count, - int *num_stats, uint64_t *buf); -int vmm_stat_desc_copy(int index, char *buf, int buflen); +#include <dev/vmm/vmm_stat.h> -static void __inline -vmm_stat_array_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst, - int statidx, uint64_t x) -{ -#ifdef VMM_KEEP_STATS - uint64_t *stats; - - stats = vcpu_stats(vm, vcpu); - - if (vst->index >= 0 && statidx < vst->nelems) - stats[vst->index + statidx] += x; -#endif -} - -static void __inline -vmm_stat_array_set(struct vm *vm, int vcpu, struct vmm_stat_type *vst, - int statidx, uint64_t val) -{ -#ifdef VMM_KEEP_STATS - uint64_t *stats; - - stats = vcpu_stats(vm, vcpu); - - if (vst->index >= 0 && statidx < vst->nelems) - stats[vst->index + statidx] = val; -#endif -} - -static void __inline -vmm_stat_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst, uint64_t x) -{ - -#ifdef VMM_KEEP_STATS - vmm_stat_array_incr(vm, vcpu, vst, 0, x); -#endif -} - -static void __inline -vmm_stat_set(struct vm *vm, int vcpu, struct vmm_stat_type *vst, uint64_t val) -{ - -#ifdef VMM_KEEP_STATS - vmm_stat_array_set(vm, vcpu, vst, 0, val); -#endif -} +#include "vmm_util.h" VMM_STAT_DECLARE(VCPU_MIGRATIONS); VMM_STAT_DECLARE(VMEXIT_COUNT); @@ -158,4 +57,10 @@ VMM_STAT_DECLARE(VMEXIT_USERSPACE); VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS); VMM_STAT_DECLARE(VMEXIT_EXCEPTION); VMM_STAT_DECLARE(VMEXIT_REQIDLE); + +#define VMM_STAT_INTEL(type, desc) \ + VMM_STAT_DEFINE(type, 1, desc, vmm_is_intel) +#define VMM_STAT_AMD(type, desc) \ + VMM_STAT_DEFINE(type, 1, desc, vmm_is_svm) + #endif diff --git a/sys/amd64/vmm/vmm_util.c b/sys/amd64/vmm/vmm_util.c index b75a1b25eeb6..6c921e218a34 100644 --- a/sys/amd64/vmm/vmm_util.c +++ b/sys/amd64/vmm/vmm_util.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,13 +24,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/libkern.h> diff --git a/sys/amd64/vmm/vmm_util.h b/sys/amd64/vmm/vmm_util.h index 130330c2686a..c689f2f81721 100644 --- a/sys/amd64/vmm/vmm_util.h +++ b/sys/amd64/vmm/vmm_util.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,8 +24,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _VMM_UTIL_H_ diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c index 67542fe3b661..2e2224595ab4 100644 --- a/sys/amd64/vmm/x86.c +++ b/sys/amd64/vmm/x86.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,13 +24,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/pcpu.h> #include <sys/systm.h> @@ -41,11 +36,11 @@ __FBSDID("$FreeBSD$"); #include <machine/md_var.h> #include <machine/segments.h> #include <machine/specialreg.h> - #include <machine/vmm.h> +#include <dev/vmm/vmm_ktr.h> + #include "vmm_host.h" -#include "vmm_ktr.h" #include "vmm_util.h" #include "x86.h" @@ -53,7 +48,12 @@ SYSCTL_DECL(_hw_vmm); static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, NULL); -#define CPUID_VM_HIGH 0x40000000 +#define CPUID_VM_SIGNATURE 0x40000000 +#define CPUID_BHYVE_FEATURES 0x40000001 +#define CPUID_VM_HIGH CPUID_BHYVE_FEATURES + +/* Features advertised in CPUID_BHYVE_FEATURES %eax */ +#define CPUID_BHYVE_FEAT_EXT_DEST_ID (1UL << 0) /* MSI Extended Dest ID */ static const char bhyve_id[12] = "bhyve bhyve "; @@ -61,35 +61,26 @@ static uint64_t bhyve_xcpuids; SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0, "Number of times an unknown cpuid leaf was accessed"); -#if __FreeBSD_version < 1200060 /* Remove after 11 EOL helps MFCing */ -extern u_int threads_per_core; -SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN, - &threads_per_core, 0, NULL); - -extern u_int cores_per_package; -SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN, - &cores_per_package, 0, NULL); -#endif - static int cpuid_leaf_b = 1; SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN, &cpuid_leaf_b, 0, NULL); /* - * Round up to the next power of two, if necessary, and then take log2. - * Returns -1 if argument is zero. + * Compute ceil(log2(x)). Returns -1 if x is zero. */ static __inline int log2(u_int x) { - return (fls(x << (1 - powerof2(x))) - 1); + return (x == 0 ? -1 : order_base_2(x)); } int -x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, +x86_emulate_cpuid(struct vcpu *vcpu, uint64_t *rax, uint64_t *rbx, uint64_t *rcx, uint64_t *rdx) { + struct vm *vm = vcpu_vm(vcpu); + int vcpu_id = vcpu_vcpuid(vcpu); const struct xsave_limits *limits; uint64_t cr4; int error, enable_invpcid, enable_rdpid, enable_rdtscp, level, @@ -114,7 +105,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, if (cpu_exthigh != 0 && func >= 0x80000000) { if (func > cpu_exthigh) func = cpu_exthigh; - } else if (func >= 0x40000000) { + } else if (func >= CPUID_VM_SIGNATURE) { if (func > CPUID_VM_HIGH) func = CPUID_VM_HIGH; } else if (func > cpu_high) { @@ -164,8 +155,6 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, * pkg_id_shift and other OSes may rely on it. */ width = MIN(0xF, log2(threads * cores)); - if (width < 0x4) - width = 0; logical_cpus = MIN(0xFF, threads * cores - 1); regs[2] = (width << AMDID_COREID_SIZE_SHIFT) | logical_cpus; } @@ -202,7 +191,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, regs[2] &= ~AMDID2_MWAITX; /* Advertise RDTSCP if it is enabled. */ - error = vm_get_capability(vm, vcpu_id, + error = vm_get_capability(vcpu, VM_CAP_RDTSCP, &enable_rdtscp); if (error == 0 && enable_rdtscp) regs[3] |= AMDID_RDTSCP; @@ -247,7 +236,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, goto default_leaf; /* - * Similar to Intel, generate a ficticious cache + * Similar to Intel, generate a fictitious cache * topology for the guest with L3 shared by the * package, and L1 and L2 local to a core. */ @@ -270,7 +259,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, func = 3; /* unified cache */ break; default: - logical_cpus = 0; + logical_cpus = sockets * threads * cores; level = 0; func = 0; break; @@ -280,7 +269,14 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, regs[0] = (logical_cpus << 14) | (1 << 8) | (level << 5) | func; regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0; + + /* + * ecx: Number of cache ways for non-fully + * associative cache, minus 1. Reported value + * of zero means there is one way. + */ regs[2] = 0; + regs[3] = 0; break; @@ -309,7 +305,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, case CPUID_0000_0001: do_cpuid(1, regs); - error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state); + error = vm_get_x2apic_state(vcpu, &x2apic_state); if (error) { panic("x86_emulate_cpuid: error %d " "fetching x2apic state", error); @@ -349,7 +345,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, */ regs[2] &= ~CPUID2_OSXSAVE; if (regs[2] & CPUID2_XSAVE) { - error = vm_get_register(vm, vcpu_id, + error = vm_get_register(vcpu, VM_REG_GUEST_CR4, &cr4); if (error) panic("x86_emulate_cpuid: error %d " @@ -439,28 +435,32 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, /* * Expose known-safe features. */ - regs[1] &= (CPUID_STDEXT_FSGSBASE | + regs[1] &= CPUID_STDEXT_FSGSBASE | CPUID_STDEXT_BMI1 | CPUID_STDEXT_HLE | CPUID_STDEXT_AVX2 | CPUID_STDEXT_SMEP | CPUID_STDEXT_BMI2 | CPUID_STDEXT_ERMS | CPUID_STDEXT_RTM | CPUID_STDEXT_AVX512F | + CPUID_STDEXT_AVX512DQ | CPUID_STDEXT_RDSEED | CPUID_STDEXT_SMAP | CPUID_STDEXT_AVX512PF | CPUID_STDEXT_AVX512ER | - CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA); - regs[2] = 0; + CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA | + CPUID_STDEXT_AVX512BW | + CPUID_STDEXT_AVX512VL; + regs[2] &= CPUID_STDEXT2_VAES | + CPUID_STDEXT2_VPCLMULQDQ; regs[3] &= CPUID_STDEXT3_MD_CLEAR; /* Advertise RDPID if it is enabled. */ - error = vm_get_capability(vm, vcpu_id, - VM_CAP_RDPID, &enable_rdpid); + error = vm_get_capability(vcpu, VM_CAP_RDPID, + &enable_rdpid); if (error == 0 && enable_rdpid) regs[2] |= CPUID_STDEXT2_RDPID; /* Advertise INVPCID if it is enabled. */ - error = vm_get_capability(vm, vcpu_id, + error = vm_get_capability(vcpu, VM_CAP_ENABLE_INVPCID, &enable_invpcid); if (error == 0 && enable_invpcid) regs[1] |= CPUID_STDEXT_INVPCID; @@ -606,13 +606,20 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, regs[3] = 0; break; - case 0x40000000: + case CPUID_VM_SIGNATURE: regs[0] = CPUID_VM_HIGH; bcopy(bhyve_id, ®s[1], 4); bcopy(bhyve_id + 4, ®s[2], 4); bcopy(bhyve_id + 8, ®s[3], 4); break; + case CPUID_BHYVE_FEATURES: + regs[0] = CPUID_BHYVE_FEAT_EXT_DEST_ID; + regs[1] = 0; + regs[2] = 0; + regs[3] = 0; + break; + default: default_leaf: /* @@ -637,7 +644,7 @@ default_leaf: } bool -vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability cap) +vm_cpuid_capability(struct vcpu *vcpu, enum vm_cpuid_capability cap) { bool rv; diff --git a/sys/amd64/vmm/x86.h b/sys/amd64/vmm/x86.h index 4785bb8f46d4..56364f4f5cb4 100644 --- a/sys/amd64/vmm/x86.h +++ b/sys/amd64/vmm/x86.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. @@ -24,8 +24,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _X86_H_ @@ -66,7 +64,7 @@ */ #define CPUID_0000_0001_FEAT0_VMX (1<<5) -int x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, +int x86_emulate_cpuid(struct vcpu *vcpu, uint64_t *rax, uint64_t *rbx, uint64_t *rcx, uint64_t *rdx); enum vm_cpuid_capability { @@ -81,7 +79,7 @@ enum vm_cpuid_capability { * Return 'true' if the capability 'cap' is enabled in this virtual cpu * and 'false' otherwise. */ -bool vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability); +bool vm_cpuid_capability(struct vcpu *vcpu, enum vm_cpuid_capability); #define VMM_MTRR_VAR_MAX 10 #define VMM_MTRR_DEF_MASK \ |
