aboutsummaryrefslogtreecommitdiff
path: root/sys/amd64/vmm/intel/vmx_msr.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/amd64/vmm/intel/vmx_msr.c')
-rw-r--r--sys/amd64/vmm/intel/vmx_msr.c511
1 files changed, 511 insertions, 0 deletions
diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c
new file mode 100644
index 000000000000..40dbec290f2d
--- /dev/null
+++ b/sys/amd64/vmm/intel/vmx_msr.c
@@ -0,0 +1,511 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+
+#include <machine/clock.h>
+#include <machine/cpufunc.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/specialreg.h>
+#include <machine/vmm.h>
+
+#include "vmx.h"
+#include "vmx_msr.h"
+#include "x86.h"
+
+static bool
+vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
+{
+
+ return ((msr_val & (1UL << (bitpos + 32))) != 0);
+}
+
+static bool
+vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
+{
+
+ return ((msr_val & (1UL << bitpos)) == 0);
+}
+
+uint32_t
+vmx_revision(void)
+{
+
+ return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
+}
+
+/*
+ * Generate a bitmask to be used for the VMCS execution control fields.
+ *
+ * The caller specifies what bits should be set to one in 'ones_mask'
+ * and what bits should be set to zero in 'zeros_mask'. The don't-care
+ * bits are set to the default value. The default values are obtained
+ * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
+ * VMX Capabilities".
+ *
+ * Returns zero on success and non-zero on error.
+ */
+int
+vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
+ uint32_t zeros_mask, uint32_t *retval)
+{
+ int i;
+ uint64_t val, trueval;
+ bool true_ctls_avail, one_allowed, zero_allowed;
+
+ /* We cannot ask the same bit to be set to both '1' and '0' */
+ if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
+ return (EINVAL);
+
+ true_ctls_avail = (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) != 0;
+
+ val = rdmsr(ctl_reg);
+ if (true_ctls_avail)
+ trueval = rdmsr(true_ctl_reg); /* step c */
+ else
+ trueval = val; /* step a */
+
+ for (i = 0; i < 32; i++) {
+ one_allowed = vmx_ctl_allows_one_setting(trueval, i);
+ zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
+
+ KASSERT(one_allowed || zero_allowed,
+ ("invalid zero/one setting for bit %d of ctl 0x%0x, "
+ "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
+
+ if (zero_allowed && !one_allowed) { /* b(i),c(i) */
+ if (ones_mask & (1 << i))
+ return (EINVAL);
+ *retval &= ~(1 << i);
+ } else if (one_allowed && !zero_allowed) { /* b(i),c(i) */
+ if (zeros_mask & (1 << i))
+ return (EINVAL);
+ *retval |= 1 << i;
+ } else {
+ if (zeros_mask & (1 << i)) /* b(ii),c(ii) */
+ *retval &= ~(1 << i);
+ else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
+ *retval |= 1 << i;
+ else if (!true_ctls_avail)
+ *retval &= ~(1 << i); /* b(iii) */
+ else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
+ *retval &= ~(1 << i);
+ else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
+ *retval |= 1 << i;
+ else {
+ panic("vmx_set_ctlreg: unable to determine "
+ "correct value of ctl bit %d for msr "
+ "0x%0x and true msr 0x%0x", i, ctl_reg,
+ true_ctl_reg);
+ }
+ }
+ }
+
+ return (0);
+}
+
+void
+msr_bitmap_initialize(char *bitmap)
+{
+
+ memset(bitmap, 0xff, PAGE_SIZE);
+}
+
+int
+msr_bitmap_change_access(char *bitmap, u_int msr, int access)
+{
+ int byte, bit;
+
+ if (msr <= 0x00001FFF)
+ byte = msr / 8;
+ else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
+ byte = 1024 + (msr - 0xC0000000) / 8;
+ else
+ return (EINVAL);
+
+ bit = msr & 0x7;
+
+ if (access & MSR_BITMAP_ACCESS_READ)
+ bitmap[byte] &= ~(1 << bit);
+ else
+ bitmap[byte] |= 1 << bit;
+
+ byte += 2048;
+ if (access & MSR_BITMAP_ACCESS_WRITE)
+ bitmap[byte] &= ~(1 << bit);
+ else
+ bitmap[byte] |= 1 << bit;
+
+ return (0);
+}
+
+static uint64_t misc_enable;
+static uint64_t platform_info;
+static uint64_t turbo_ratio_limit;
+static uint64_t host_msrs[GUEST_MSR_NUM];
+
+static bool
+nehalem_cpu(void)
+{
+ u_int family, model;
+
+ /*
+ * The family:model numbers belonging to the Nehalem microarchitecture
+ * are documented in Section 35.5, Intel SDM dated Feb 2014.
+ */
+ family = CPUID_TO_FAMILY(cpu_id);
+ model = CPUID_TO_MODEL(cpu_id);
+ if (family == 0x6) {
+ switch (model) {
+ case 0x1A:
+ case 0x1E:
+ case 0x1F:
+ case 0x2E:
+ return (true);
+ default:
+ break;
+ }
+ }
+ return (false);
+}
+
+static bool
+westmere_cpu(void)
+{
+ u_int family, model;
+
+ /*
+ * The family:model numbers belonging to the Westmere microarchitecture
+ * are documented in Section 35.6, Intel SDM dated Feb 2014.
+ */
+ family = CPUID_TO_FAMILY(cpu_id);
+ model = CPUID_TO_MODEL(cpu_id);
+ if (family == 0x6) {
+ switch (model) {
+ case 0x25:
+ case 0x2C:
+ return (true);
+ default:
+ break;
+ }
+ }
+ return (false);
+}
+
+static bool
+pat_valid(uint64_t val)
+{
+ int i, pa;
+
+ /*
+ * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
+ *
+ * Extract PA0 through PA7 and validate that each one encodes a
+ * valid memory type.
+ */
+ for (i = 0; i < 8; i++) {
+ pa = (val >> (i * 8)) & 0xff;
+ if (pa == 2 || pa == 3 || pa >= 8)
+ return (false);
+ }
+ return (true);
+}
+
+void
+vmx_msr_init(void)
+{
+ uint64_t bus_freq, ratio;
+ int i;
+
+ /*
+ * It is safe to cache the values of the following MSRs because
+ * they don't change based on curcpu, curproc or curthread.
+ */
+ host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
+ host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
+ host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
+ host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
+
+ /*
+ * Initialize emulated MSRs
+ */
+ misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
+ /*
+ * Set mandatory bits
+ * 11: branch trace disabled
+ * 12: PEBS unavailable
+ * Clear unsupported features
+ * 16: SpeedStep enable
+ * 18: enable MONITOR FSM
+ */
+ misc_enable |= (1 << 12) | (1 << 11);
+ misc_enable &= ~((1 << 18) | (1 << 16));
+
+ if (nehalem_cpu() || westmere_cpu())
+ bus_freq = 133330000; /* 133Mhz */
+ else
+ bus_freq = 100000000; /* 100Mhz */
+
+ /*
+ * XXXtime
+ * The ratio should really be based on the virtual TSC frequency as
+ * opposed to the host TSC.
+ */
+ ratio = (tsc_freq / bus_freq) & 0xff;
+
+ /*
+ * The register definition is based on the micro-architecture
+ * but the following bits are always the same:
+ * [15:8] Maximum Non-Turbo Ratio
+ * [28] Programmable Ratio Limit for Turbo Mode
+ * [29] Programmable TDC-TDP Limit for Turbo Mode
+ * [47:40] Maximum Efficiency Ratio
+ *
+ * The other bits can be safely set to 0 on all
+ * micro-architectures up to Haswell.
+ */
+ platform_info = (ratio << 8) | (ratio << 40);
+
+ /*
+ * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
+ * dependent on the maximum cores per package supported by the micro-
+ * architecture. For e.g., Westmere supports 6 cores per package and
+ * uses the low 48 bits. Sandybridge support 8 cores per package and
+ * uses up all 64 bits.
+ *
+ * However, the unused bits are reserved so we pretend that all bits
+ * in this MSR are valid.
+ */
+ for (i = 0; i < 8; i++)
+ turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
+}
+
+void
+vmx_msr_guest_init(struct vmx *vmx, struct vmx_vcpu *vcpu)
+{
+ /*
+ * The permissions bitmap is shared between all vcpus so initialize it
+ * once when initializing the vBSP.
+ */
+ if (vcpu->vcpuid == 0) {
+ guest_msr_rw(vmx, MSR_LSTAR);
+ guest_msr_rw(vmx, MSR_CSTAR);
+ guest_msr_rw(vmx, MSR_STAR);
+ guest_msr_rw(vmx, MSR_SF_MASK);
+ guest_msr_rw(vmx, MSR_KGSBASE);
+ }
+
+ /*
+ * Initialize guest IA32_PAT MSR with default value after reset.
+ */
+ vcpu->guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
+ PAT_VALUE(1, PAT_WRITE_THROUGH) |
+ PAT_VALUE(2, PAT_UNCACHED) |
+ PAT_VALUE(3, PAT_UNCACHEABLE) |
+ PAT_VALUE(4, PAT_WRITE_BACK) |
+ PAT_VALUE(5, PAT_WRITE_THROUGH) |
+ PAT_VALUE(6, PAT_UNCACHED) |
+ PAT_VALUE(7, PAT_UNCACHEABLE);
+
+ return;
+}
+
+void
+vmx_msr_guest_enter(struct vmx_vcpu *vcpu)
+{
+
+ /* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */
+ update_pcb_bases(curpcb);
+ wrmsr(MSR_LSTAR, vcpu->guest_msrs[IDX_MSR_LSTAR]);
+ wrmsr(MSR_CSTAR, vcpu->guest_msrs[IDX_MSR_CSTAR]);
+ wrmsr(MSR_STAR, vcpu->guest_msrs[IDX_MSR_STAR]);
+ wrmsr(MSR_SF_MASK, vcpu->guest_msrs[IDX_MSR_SF_MASK]);
+ wrmsr(MSR_KGSBASE, vcpu->guest_msrs[IDX_MSR_KGSBASE]);
+}
+
+void
+vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu)
+{
+ uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX];
+ uint32_t host_aux = cpu_auxmsr();
+
+ if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux)
+ wrmsr(MSR_TSC_AUX, guest_tsc_aux);
+}
+
+void
+vmx_msr_guest_exit(struct vmx_vcpu *vcpu)
+{
+
+ /* Save guest MSRs */
+ vcpu->guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
+ vcpu->guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
+ vcpu->guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
+ vcpu->guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
+ vcpu->guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
+
+ /* Restore host MSRs */
+ wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
+ wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
+ wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
+ wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
+
+ /* MSR_KGSBASE will be restored on the way back to userspace */
+}
+
+void
+vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu)
+{
+ uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX];
+ uint32_t host_aux = cpu_auxmsr();
+
+ if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux)
+ /*
+ * Note that it is not necessary to save the guest value
+ * here; vcpu->guest_msrs[IDX_MSR_TSC_AUX] always
+ * contains the current value since it is updated whenever
+ * the guest writes to it (which is expected to be very
+ * rare).
+ */
+ wrmsr(MSR_TSC_AUX, host_aux);
+}
+
+int
+vmx_rdmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t *val, bool *retu)
+{
+ int error;
+
+ error = 0;
+
+ switch (num) {
+ case MSR_MCG_CAP:
+ case MSR_MCG_STATUS:
+ *val = 0;
+ break;
+ case MSR_MTRRcap:
+ case MSR_MTRRdefType:
+ case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
+ case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
+ case MSR_MTRR64kBase:
+ case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1:
+ if (vm_rdmtrr(&vcpu->mtrr, num, val) != 0) {
+ vm_inject_gp(vcpu->vcpu);
+ }
+ break;
+ case MSR_IA32_MISC_ENABLE:
+ *val = misc_enable;
+ break;
+ case MSR_PLATFORM_INFO:
+ *val = platform_info;
+ break;
+ case MSR_TURBO_RATIO_LIMIT:
+ case MSR_TURBO_RATIO_LIMIT1:
+ *val = turbo_ratio_limit;
+ break;
+ case MSR_PAT:
+ *val = vcpu->guest_msrs[IDX_MSR_PAT];
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ return (error);
+}
+
+int
+vmx_wrmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t val, bool *retu)
+{
+ uint64_t changed;
+ int error;
+
+ error = 0;
+
+ switch (num) {
+ case MSR_MCG_CAP:
+ case MSR_MCG_STATUS:
+ break; /* ignore writes */
+ case MSR_MTRRcap:
+ case MSR_MTRRdefType:
+ case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
+ case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
+ case MSR_MTRR64kBase:
+ case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1:
+ if (vm_wrmtrr(&vcpu->mtrr, num, val) != 0) {
+ vm_inject_gp(vcpu->vcpu);
+ }
+ break;
+ case MSR_IA32_MISC_ENABLE:
+ changed = val ^ misc_enable;
+ /*
+ * If the host has disabled the NX feature then the guest
+ * also cannot use it. However, a Linux guest will try to
+ * enable the NX feature by writing to the MISC_ENABLE MSR.
+ *
+ * This can be safely ignored because the memory management
+ * code looks at CPUID.80000001H:EDX.NX to check if the
+ * functionality is actually enabled.
+ */
+ changed &= ~(1UL << 34);
+
+ /*
+ * Punt to userspace if any other bits are being modified.
+ */
+ if (changed)
+ error = EINVAL;
+
+ break;
+ case MSR_PAT:
+ if (pat_valid(val))
+ vcpu->guest_msrs[IDX_MSR_PAT] = val;
+ else
+ vm_inject_gp(vcpu->vcpu);
+ break;
+ case MSR_TSC:
+ error = vmx_set_tsc_offset(vcpu, val - rdtsc());
+ break;
+ case MSR_TSC_AUX:
+ if (vmx_have_msr_tsc_aux)
+ /*
+ * vmx_msr_guest_enter_tsc_aux() will apply this
+ * value when it is called immediately before guest
+ * entry.
+ */
+ vcpu->guest_msrs[IDX_MSR_TSC_AUX] = val;
+ else
+ vm_inject_gp(vcpu->vcpu);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}