diff options
Diffstat (limited to 'sys/amd64/vmm/intel/vmx_msr.c')
-rw-r--r-- | sys/amd64/vmm/intel/vmx_msr.c | 511 |
1 files changed, 511 insertions, 0 deletions
diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c new file mode 100644 index 000000000000..40dbec290f2d --- /dev/null +++ b/sys/amd64/vmm/intel/vmx_msr.c @@ -0,0 +1,511 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> + +#include <machine/clock.h> +#include <machine/cpufunc.h> +#include <machine/md_var.h> +#include <machine/pcb.h> +#include <machine/specialreg.h> +#include <machine/vmm.h> + +#include "vmx.h" +#include "vmx_msr.h" +#include "x86.h" + +static bool +vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos) +{ + + return ((msr_val & (1UL << (bitpos + 32))) != 0); +} + +static bool +vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos) +{ + + return ((msr_val & (1UL << bitpos)) == 0); +} + +uint32_t +vmx_revision(void) +{ + + return (rdmsr(MSR_VMX_BASIC) & 0xffffffff); +} + +/* + * Generate a bitmask to be used for the VMCS execution control fields. + * + * The caller specifies what bits should be set to one in 'ones_mask' + * and what bits should be set to zero in 'zeros_mask'. The don't-care + * bits are set to the default value. The default values are obtained + * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining + * VMX Capabilities". + * + * Returns zero on success and non-zero on error. + */ +int +vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, + uint32_t zeros_mask, uint32_t *retval) +{ + int i; + uint64_t val, trueval; + bool true_ctls_avail, one_allowed, zero_allowed; + + /* We cannot ask the same bit to be set to both '1' and '0' */ + if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask)) + return (EINVAL); + + true_ctls_avail = (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) != 0; + + val = rdmsr(ctl_reg); + if (true_ctls_avail) + trueval = rdmsr(true_ctl_reg); /* step c */ + else + trueval = val; /* step a */ + + for (i = 0; i < 32; i++) { + one_allowed = vmx_ctl_allows_one_setting(trueval, i); + zero_allowed = vmx_ctl_allows_zero_setting(trueval, i); + + KASSERT(one_allowed || zero_allowed, + ("invalid zero/one setting for bit %d of ctl 0x%0x, " + "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg)); + + if (zero_allowed && !one_allowed) { /* b(i),c(i) */ + if (ones_mask & (1 << i)) + return (EINVAL); + *retval &= ~(1 << i); + } else if (one_allowed && !zero_allowed) { /* b(i),c(i) */ + if (zeros_mask & (1 << i)) + return (EINVAL); + *retval |= 1 << i; + } else { + if (zeros_mask & (1 << i)) /* b(ii),c(ii) */ + *retval &= ~(1 << i); + else if (ones_mask & (1 << i)) /* b(ii), c(ii) */ + *retval |= 1 << i; + else if (!true_ctls_avail) + *retval &= ~(1 << i); /* b(iii) */ + else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/ + *retval &= ~(1 << i); + else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */ + *retval |= 1 << i; + else { + panic("vmx_set_ctlreg: unable to determine " + "correct value of ctl bit %d for msr " + "0x%0x and true msr 0x%0x", i, ctl_reg, + true_ctl_reg); + } + } + } + + return (0); +} + +void +msr_bitmap_initialize(char *bitmap) +{ + + memset(bitmap, 0xff, PAGE_SIZE); +} + +int +msr_bitmap_change_access(char *bitmap, u_int msr, int access) +{ + int byte, bit; + + if (msr <= 0x00001FFF) + byte = msr / 8; + else if (msr >= 0xC0000000 && msr <= 0xC0001FFF) + byte = 1024 + (msr - 0xC0000000) / 8; + else + return (EINVAL); + + bit = msr & 0x7; + + if (access & MSR_BITMAP_ACCESS_READ) + bitmap[byte] &= ~(1 << bit); + else + bitmap[byte] |= 1 << bit; + + byte += 2048; + if (access & MSR_BITMAP_ACCESS_WRITE) + bitmap[byte] &= ~(1 << bit); + else + bitmap[byte] |= 1 << bit; + + return (0); +} + +static uint64_t misc_enable; +static uint64_t platform_info; +static uint64_t turbo_ratio_limit; +static uint64_t host_msrs[GUEST_MSR_NUM]; + +static bool +nehalem_cpu(void) +{ + u_int family, model; + + /* + * The family:model numbers belonging to the Nehalem microarchitecture + * are documented in Section 35.5, Intel SDM dated Feb 2014. + */ + family = CPUID_TO_FAMILY(cpu_id); + model = CPUID_TO_MODEL(cpu_id); + if (family == 0x6) { + switch (model) { + case 0x1A: + case 0x1E: + case 0x1F: + case 0x2E: + return (true); + default: + break; + } + } + return (false); +} + +static bool +westmere_cpu(void) +{ + u_int family, model; + + /* + * The family:model numbers belonging to the Westmere microarchitecture + * are documented in Section 35.6, Intel SDM dated Feb 2014. + */ + family = CPUID_TO_FAMILY(cpu_id); + model = CPUID_TO_MODEL(cpu_id); + if (family == 0x6) { + switch (model) { + case 0x25: + case 0x2C: + return (true); + default: + break; + } + } + return (false); +} + +static bool +pat_valid(uint64_t val) +{ + int i, pa; + + /* + * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT" + * + * Extract PA0 through PA7 and validate that each one encodes a + * valid memory type. + */ + for (i = 0; i < 8; i++) { + pa = (val >> (i * 8)) & 0xff; + if (pa == 2 || pa == 3 || pa >= 8) + return (false); + } + return (true); +} + +void +vmx_msr_init(void) +{ + uint64_t bus_freq, ratio; + int i; + + /* + * It is safe to cache the values of the following MSRs because + * they don't change based on curcpu, curproc or curthread. + */ + host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); + host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); + host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); + host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); + + /* + * Initialize emulated MSRs + */ + misc_enable = rdmsr(MSR_IA32_MISC_ENABLE); + /* + * Set mandatory bits + * 11: branch trace disabled + * 12: PEBS unavailable + * Clear unsupported features + * 16: SpeedStep enable + * 18: enable MONITOR FSM + */ + misc_enable |= (1 << 12) | (1 << 11); + misc_enable &= ~((1 << 18) | (1 << 16)); + + if (nehalem_cpu() || westmere_cpu()) + bus_freq = 133330000; /* 133Mhz */ + else + bus_freq = 100000000; /* 100Mhz */ + + /* + * XXXtime + * The ratio should really be based on the virtual TSC frequency as + * opposed to the host TSC. + */ + ratio = (tsc_freq / bus_freq) & 0xff; + + /* + * The register definition is based on the micro-architecture + * but the following bits are always the same: + * [15:8] Maximum Non-Turbo Ratio + * [28] Programmable Ratio Limit for Turbo Mode + * [29] Programmable TDC-TDP Limit for Turbo Mode + * [47:40] Maximum Efficiency Ratio + * + * The other bits can be safely set to 0 on all + * micro-architectures up to Haswell. + */ + platform_info = (ratio << 8) | (ratio << 40); + + /* + * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is + * dependent on the maximum cores per package supported by the micro- + * architecture. For e.g., Westmere supports 6 cores per package and + * uses the low 48 bits. Sandybridge support 8 cores per package and + * uses up all 64 bits. + * + * However, the unused bits are reserved so we pretend that all bits + * in this MSR are valid. + */ + for (i = 0; i < 8; i++) + turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio; +} + +void +vmx_msr_guest_init(struct vmx *vmx, struct vmx_vcpu *vcpu) +{ + /* + * The permissions bitmap is shared between all vcpus so initialize it + * once when initializing the vBSP. + */ + if (vcpu->vcpuid == 0) { + guest_msr_rw(vmx, MSR_LSTAR); + guest_msr_rw(vmx, MSR_CSTAR); + guest_msr_rw(vmx, MSR_STAR); + guest_msr_rw(vmx, MSR_SF_MASK); + guest_msr_rw(vmx, MSR_KGSBASE); + } + + /* + * Initialize guest IA32_PAT MSR with default value after reset. + */ + vcpu->guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) | + PAT_VALUE(1, PAT_WRITE_THROUGH) | + PAT_VALUE(2, PAT_UNCACHED) | + PAT_VALUE(3, PAT_UNCACHEABLE) | + PAT_VALUE(4, PAT_WRITE_BACK) | + PAT_VALUE(5, PAT_WRITE_THROUGH) | + PAT_VALUE(6, PAT_UNCACHED) | + PAT_VALUE(7, PAT_UNCACHEABLE); + + return; +} + +void +vmx_msr_guest_enter(struct vmx_vcpu *vcpu) +{ + + /* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */ + update_pcb_bases(curpcb); + wrmsr(MSR_LSTAR, vcpu->guest_msrs[IDX_MSR_LSTAR]); + wrmsr(MSR_CSTAR, vcpu->guest_msrs[IDX_MSR_CSTAR]); + wrmsr(MSR_STAR, vcpu->guest_msrs[IDX_MSR_STAR]); + wrmsr(MSR_SF_MASK, vcpu->guest_msrs[IDX_MSR_SF_MASK]); + wrmsr(MSR_KGSBASE, vcpu->guest_msrs[IDX_MSR_KGSBASE]); +} + +void +vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu) +{ + uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX]; + uint32_t host_aux = cpu_auxmsr(); + + if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux) + wrmsr(MSR_TSC_AUX, guest_tsc_aux); +} + +void +vmx_msr_guest_exit(struct vmx_vcpu *vcpu) +{ + + /* Save guest MSRs */ + vcpu->guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); + vcpu->guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); + vcpu->guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); + vcpu->guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); + vcpu->guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE); + + /* Restore host MSRs */ + wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]); + wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]); + wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]); + wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]); + + /* MSR_KGSBASE will be restored on the way back to userspace */ +} + +void +vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu) +{ + uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX]; + uint32_t host_aux = cpu_auxmsr(); + + if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux) + /* + * Note that it is not necessary to save the guest value + * here; vcpu->guest_msrs[IDX_MSR_TSC_AUX] always + * contains the current value since it is updated whenever + * the guest writes to it (which is expected to be very + * rare). + */ + wrmsr(MSR_TSC_AUX, host_aux); +} + +int +vmx_rdmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t *val, bool *retu) +{ + int error; + + error = 0; + + switch (num) { + case MSR_MCG_CAP: + case MSR_MCG_STATUS: + *val = 0; + break; + case MSR_MTRRcap: + case MSR_MTRRdefType: + case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7: + case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: + case MSR_MTRR64kBase: + case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: + if (vm_rdmtrr(&vcpu->mtrr, num, val) != 0) { + vm_inject_gp(vcpu->vcpu); + } + break; + case MSR_IA32_MISC_ENABLE: + *val = misc_enable; + break; + case MSR_PLATFORM_INFO: + *val = platform_info; + break; + case MSR_TURBO_RATIO_LIMIT: + case MSR_TURBO_RATIO_LIMIT1: + *val = turbo_ratio_limit; + break; + case MSR_PAT: + *val = vcpu->guest_msrs[IDX_MSR_PAT]; + break; + default: + error = EINVAL; + break; + } + return (error); +} + +int +vmx_wrmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t val, bool *retu) +{ + uint64_t changed; + int error; + + error = 0; + + switch (num) { + case MSR_MCG_CAP: + case MSR_MCG_STATUS: + break; /* ignore writes */ + case MSR_MTRRcap: + case MSR_MTRRdefType: + case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7: + case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: + case MSR_MTRR64kBase: + case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: + if (vm_wrmtrr(&vcpu->mtrr, num, val) != 0) { + vm_inject_gp(vcpu->vcpu); + } + break; + case MSR_IA32_MISC_ENABLE: + changed = val ^ misc_enable; + /* + * If the host has disabled the NX feature then the guest + * also cannot use it. However, a Linux guest will try to + * enable the NX feature by writing to the MISC_ENABLE MSR. + * + * This can be safely ignored because the memory management + * code looks at CPUID.80000001H:EDX.NX to check if the + * functionality is actually enabled. + */ + changed &= ~(1UL << 34); + + /* + * Punt to userspace if any other bits are being modified. + */ + if (changed) + error = EINVAL; + + break; + case MSR_PAT: + if (pat_valid(val)) + vcpu->guest_msrs[IDX_MSR_PAT] = val; + else + vm_inject_gp(vcpu->vcpu); + break; + case MSR_TSC: + error = vmx_set_tsc_offset(vcpu, val - rdtsc()); + break; + case MSR_TSC_AUX: + if (vmx_have_msr_tsc_aux) + /* + * vmx_msr_guest_enter_tsc_aux() will apply this + * value when it is called immediately before guest + * entry. + */ + vcpu->guest_msrs[IDX_MSR_TSC_AUX] = val; + else + vm_inject_gp(vcpu->vcpu); + break; + default: + error = EINVAL; + break; + } + + return (error); +} |