From b053814333808bd3c86dc6a5f1fa3a7ec6ac2368 Mon Sep 17 00:00:00 2001 From: Neel Natu Date: Tue, 23 Dec 2014 02:14:49 +0000 Subject: Allow ktr(4) tracing of all guest exceptions via the tunable "hw.vmm.trace_guest_exceptions". To enable this feature set the tunable to "1" before loading vmm.ko. Tracing the guest exceptions can be useful when debugging guest triple faults. Note that there is a performance impact when exception tracing is enabled since every exception will now trigger a VM-exit. Also, handle machine check exceptions that happen during guest execution by vectoring to the host's machine check handler via "int $18". Discussed with: grehan MFC after: 2 weeks --- sys/amd64/include/vmm.h | 2 + sys/amd64/vmm/amd/svm.c | 96 ++++++++++++++++++++++++++++++++++++++++++++-- sys/amd64/vmm/intel/vmcs.c | 6 --- sys/amd64/vmm/intel/vmcs.h | 2 +- sys/amd64/vmm/intel/vmx.c | 76 +++++++++++++++++++++++++++++++++--- sys/amd64/vmm/vmm.c | 12 ++++++ 6 files changed, 177 insertions(+), 17 deletions(-) (limited to 'sys/amd64') diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 0f191102ca5d..8a8c3f471831 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -358,6 +358,8 @@ void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr, size_t len); void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr, struct vm_copyinfo *copyinfo, size_t len); + +int vcpu_trace_exceptions(struct vm *vm, int vcpuid); #endif /* KERNEL */ #define VM_MAXCPU 16 /* maximum virtual cpus */ diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c index 753799af6d2b..7d7504619e97 100644 --- a/sys/amd64/vmm/amd/svm.c +++ b/sys/amd64/vmm/amd/svm.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include "vmm_lapic.h" @@ -429,8 +430,24 @@ vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa, svm_enable_intercept(sc, vcpu, VMCB_CR_INTCPT, mask); } - /* Intercept Machine Check exceptions. */ - svm_enable_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(IDT_MC)); + + /* + * Intercept everything when tracing guest exceptions otherwise + * just intercept machine check exception. + */ + if (vcpu_trace_exceptions(sc->vm, vcpu)) { + for (n = 0; n < 32; n++) { + /* + * Skip unimplemented vectors in the exception bitmap. + */ + if (n == 2 || n == 9) { + continue; + } + svm_enable_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(n)); + } + } else { + svm_enable_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(IDT_MC)); + } /* Intercept various events (for e.g. I/O, MSR and CPUID accesses) */ svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IO); @@ -1176,9 +1193,10 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) struct vmcb_state *state; struct vmcb_ctrl *ctrl; struct svm_regctx *ctx; + struct vm_exception exception; uint64_t code, info1, info2, val; uint32_t eax, ecx, edx; - int handled; + int error, errcode_valid, handled, idtvec, reflect; bool retu; ctx = svm_get_guest_regctx(svm_sc, vcpu); @@ -1237,8 +1255,78 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) case VMCB_EXIT_NMI: /* external NMI */ handled = 1; break; - case VMCB_EXIT_MC: /* machine check */ + case 0x40 ... 0x5F: vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXCEPTION, 1); + reflect = 1; + idtvec = code - 0x40; + switch (idtvec) { + case IDT_MC: + /* + * Call the machine check handler by hand. Also don't + * reflect the machine check back into the guest. + */ + reflect = 0; + VCPU_CTR0(svm_sc->vm, vcpu, "Vectoring to MCE handler"); + __asm __volatile("int $18"); + break; + case IDT_PF: + error = svm_setreg(svm_sc, vcpu, VM_REG_GUEST_CR2, + info2); + KASSERT(error == 0, ("%s: error %d updating cr2", + __func__, error)); + /* fallthru */ + case IDT_NP: + case IDT_SS: + case IDT_GP: + case IDT_AC: + case IDT_TS: + errcode_valid = 1; + break; + + case IDT_DF: + errcode_valid = 1; + info1 = 0; + break; + + case IDT_BP: + case IDT_OF: + case IDT_BR: + /* + * The 'nrip' field is populated for INT3, INTO and + * BOUND exceptions and this also implies that + * 'inst_length' is non-zero. + * + * Reset 'inst_length' to zero so the guest %rip at + * event injection is identical to what it was when + * the exception originally happened. + */ + VCPU_CTR2(svm_sc->vm, vcpu, "Reset inst_length from %d " + "to zero before injecting exception %d", + vmexit->inst_length, idtvec); + vmexit->inst_length = 0; + /* fallthru */ + default: + errcode_valid = 0; + break; + } + KASSERT(vmexit->inst_length == 0, ("invalid inst_length (%d) " + "when reflecting exception %d into guest", + vmexit->inst_length, idtvec)); + + if (reflect) { + /* Reflect the exception back into the guest */ + exception.vector = idtvec; + exception.error_code_valid = errcode_valid; + exception.error_code = errcode_valid ? info1 : 0; + VCPU_CTR2(svm_sc->vm, vcpu, "Reflecting exception " + "%d/%#x into the guest", exception.vector, + exception.error_code); + error = vm_inject_exception(svm_sc->vm, vcpu, + &exception); + KASSERT(error == 0, ("%s: vm_inject_exception error %d", + __func__, error)); + } + handled = 1; break; case VMCB_EXIT_MSR: /* MSR access. */ eax = state->rax; diff --git a/sys/amd64/vmm/intel/vmcs.c b/sys/amd64/vmm/intel/vmcs.c index 51e5c2c06f00..ae4d9db3274c 100644 --- a/sys/amd64/vmm/intel/vmcs.c +++ b/sys/amd64/vmm/intel/vmcs.c @@ -332,7 +332,6 @@ vmcs_init(struct vmcs *vmcs) int error, codesel, datasel, tsssel; u_long cr0, cr4, efer; uint64_t pat, fsbase, idtrbase; - uint32_t exc_bitmap; codesel = vmm_get_host_codesel(); datasel = vmm_get_host_datasel(); @@ -417,11 +416,6 @@ vmcs_init(struct vmcs *vmcs) if ((error = vmwrite(VMCS_HOST_RIP, (u_long)vmx_exit_guest)) != 0) goto done; - /* exception bitmap */ - exc_bitmap = 1 << IDT_MC; - if ((error = vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap)) != 0) - goto done; - /* link pointer */ if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0) goto done; diff --git a/sys/amd64/vmm/intel/vmcs.h b/sys/amd64/vmm/intel/vmcs.h index 6122de581fac..6d78a6928e20 100644 --- a/sys/amd64/vmm/intel/vmcs.h +++ b/sys/amd64/vmm/intel/vmcs.h @@ -321,7 +321,7 @@ vmcs_write(uint32_t encoding, uint64_t val) #define EXIT_REASON_MTF 37 #define EXIT_REASON_MONITOR 39 #define EXIT_REASON_PAUSE 40 -#define EXIT_REASON_MCE 41 +#define EXIT_REASON_MCE_DURING_ENTRY 41 #define EXIT_REASON_TPR 43 #define EXIT_REASON_APIC_ACCESS 44 #define EXIT_REASON_VIRTUALIZED_EOI 45 diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index c855697c6c0d..c3dd04e66e1f 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -283,8 +283,8 @@ exit_reason_to_str(int reason) return "monitor"; case EXIT_REASON_PAUSE: return "pause"; - case EXIT_REASON_MCE: - return "mce"; + case EXIT_REASON_MCE_DURING_ENTRY: + return "mce-during-entry"; case EXIT_REASON_TPR: return "tpr"; case EXIT_REASON_APIC_ACCESS: @@ -821,6 +821,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap) int i, error; struct vmx *vmx; struct vmcs *vmcs; + uint32_t exc_bitmap; vmx = malloc(sizeof(struct vmx), M_VMX, M_WAITOK | M_ZERO); if ((uintptr_t)vmx & PAGE_MASK) { @@ -911,6 +912,14 @@ vmx_vminit(struct vm *vm, pmap_t pmap) error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls); error += vmwrite(VMCS_MSR_BITMAP, vtophys(vmx->msr_bitmap)); error += vmwrite(VMCS_VPID, vpid[i]); + + /* exception bitmap */ + if (vcpu_trace_exceptions(vm, i)) + exc_bitmap = 0xffffffff; + else + exc_bitmap = 1 << IDT_MC; + error += vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap); + if (virtual_interrupt_delivery) { error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS); error += vmwrite(VMCS_VIRTUAL_APIC, @@ -2056,8 +2065,9 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) struct vlapic *vlapic; struct vm_inout_str *vis; struct vm_task_switch *ts; + struct vm_exception vmexc; uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info; - uint32_t intr_type, reason; + uint32_t intr_type, intr_vec, reason; uint64_t exitintinfo, qual, gpa; bool retu; @@ -2073,6 +2083,18 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) vmm_stat_incr(vmx->vm, vcpu, VMEXIT_COUNT, 1); + /* + * VM-entry failures during or after loading guest state. + * + * These VM-exits are uncommon but must be handled specially + * as most VM-exit fields are not populated as usual. + */ + if (__predict_false(reason == EXIT_REASON_MCE_DURING_ENTRY)) { + VCPU_CTR0(vmx->vm, vcpu, "Handling MCE during VM-entry"); + __asm __volatile("int $18"); + return (1); + } + /* * VM exits that can be triggered during event delivery need to * be handled specially by re-injecting the event if the IDT @@ -2305,6 +2327,9 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) KASSERT((intr_info & VMCS_INTR_VALID) != 0, ("VM exit interruption info invalid: %#x", intr_info)); + intr_vec = intr_info & 0xff; + intr_type = intr_info & VMCS_INTR_T_MASK; + /* * If Virtual NMIs control is 1 and the VM-exit is due to a * fault encountered during the execution of IRET then we must @@ -2315,16 +2340,55 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) * See "Information for VM Exits Due to Vectored Events". */ if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 && - (intr_info & 0xff) != IDT_DF && + (intr_vec != IDT_DF) && (intr_info & EXIT_QUAL_NMIUDTI) != 0) vmx_restore_nmi_blocking(vmx, vcpu); /* * The NMI has already been handled in vmx_exit_handle_nmi(). */ - if ((intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_NMI) + if (intr_type == VMCS_INTR_T_NMI) return (1); - break; + + /* + * Call the machine check handler by hand. Also don't reflect + * the machine check back into the guest. + */ + if (intr_vec == IDT_MC) { + VCPU_CTR0(vmx->vm, vcpu, "Vectoring to MCE handler"); + __asm __volatile("int $18"); + return (1); + } + + if (intr_vec == IDT_PF) { + error = vmxctx_setreg(vmxctx, VM_REG_GUEST_CR2, qual); + KASSERT(error == 0, ("%s: vmxctx_setreg(cr2) error %d", + __func__, error)); + } + + /* + * Software exceptions exhibit trap-like behavior. This in + * turn requires populating the VM-entry instruction length + * so that the %rip in the trap frame is past the INT3/INTO + * instruction. + */ + if (intr_type == VMCS_INTR_T_SWEXCEPTION) + vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length); + + /* Reflect all other exceptions back into the guest */ + bzero(&vmexc, sizeof(struct vm_exception)); + vmexc.vector = intr_vec; + if (intr_info & VMCS_INTR_DEL_ERRCODE) { + vmexc.error_code_valid = 1; + vmexc.error_code = vmcs_read(VMCS_EXIT_INTR_ERRCODE); + } + VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%#x into " + "the guest", vmexc.vector, vmexc.error_code); + error = vm_inject_exception(vmx->vm, vcpu, &vmexc); + KASSERT(error == 0, ("%s: vm_inject_exception error %d", + __func__, error)); + return (1); + case EXIT_REASON_EPT_FAULT: /* * If 'gpa' lies within the address space allocated to diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index e7fbada39fc2..d9cb6f3c1254 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -207,6 +207,11 @@ static int vmm_ipinum; SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, "IPI vector used for vcpu notifications"); +static int trace_guest_exceptions; +SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN, + &trace_guest_exceptions, 0, + "Trap into hypervisor on all guest exceptions and reflect them back"); + static void vcpu_cleanup(struct vm *vm, int i, bool destroy) { @@ -250,6 +255,13 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create) vmm_stat_init(vcpu->stats); } +int +vcpu_trace_exceptions(struct vm *vm, int vcpuid) +{ + + return (trace_guest_exceptions); +} + struct vm_exit * vm_exitinfo(struct vm *vm, int cpuid) { -- cgit v1.2.3