1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
--- sys/amd64/amd64/pmap.c.orig
+++ sys/amd64/amd64/pmap.c
@@ -1226,6 +1226,51 @@
m->md.pat_mode = PAT_WRITE_BACK;
}
+static int pmap_allow_2m_x_ept;
+SYSCTL_INT(_vm_pmap, OID_AUTO, allow_2m_x_ept, CTLFLAG_RWTUN | CTLFLAG_NOFETCH,
+ &pmap_allow_2m_x_ept, 0,
+ "Allow executable superpage mappings in EPT");
+
+void
+pmap_allow_2m_x_ept_recalculate(void)
+{
+ /*
+ * SKL002, SKL012S. Since the EPT format is only used by
+ * Intel CPUs, the vendor check is merely a formality.
+ */
+ if (!(cpu_vendor_id != CPU_VENDOR_INTEL ||
+ (cpu_ia32_arch_caps & IA32_ARCH_CAP_IF_PSCHANGE_MC_NO) != 0 ||
+ (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
+ (CPUID_TO_MODEL(cpu_id) == 0x26 || /* Atoms */
+ CPUID_TO_MODEL(cpu_id) == 0x27 ||
+ CPUID_TO_MODEL(cpu_id) == 0x35 ||
+ CPUID_TO_MODEL(cpu_id) == 0x36 ||
+ CPUID_TO_MODEL(cpu_id) == 0x37 ||
+ CPUID_TO_MODEL(cpu_id) == 0x86 ||
+ CPUID_TO_MODEL(cpu_id) == 0x1c ||
+ CPUID_TO_MODEL(cpu_id) == 0x4a ||
+ CPUID_TO_MODEL(cpu_id) == 0x4c ||
+ CPUID_TO_MODEL(cpu_id) == 0x4d ||
+ CPUID_TO_MODEL(cpu_id) == 0x5a ||
+ CPUID_TO_MODEL(cpu_id) == 0x5c ||
+ CPUID_TO_MODEL(cpu_id) == 0x5d ||
+ CPUID_TO_MODEL(cpu_id) == 0x5f ||
+ CPUID_TO_MODEL(cpu_id) == 0x6e ||
+ CPUID_TO_MODEL(cpu_id) == 0x7a ||
+ CPUID_TO_MODEL(cpu_id) == 0x57 || /* Knights */
+ CPUID_TO_MODEL(cpu_id) == 0x85))))
+ pmap_allow_2m_x_ept = 1;
+ TUNABLE_INT_FETCH("hw.allow_2m_x_ept", &pmap_allow_2m_x_ept);
+}
+
+static bool
+pmap_allow_2m_x_page(pmap_t pmap, bool executable)
+{
+
+ return (pmap->pm_type != PT_EPT || !executable ||
+ !pmap_allow_2m_x_ept);
+}
+
/*
* Initialize the pmap module.
* Called by vm_init, to initialize any structures that the pmap
@@ -1270,6 +1315,9 @@
}
}
+ /* IFU */
+ pmap_allow_2m_x_ept_recalculate();
+
/*
* Initialize the vm page array entries for the kernel pmap's
* page table pages.
@@ -4550,6 +4598,15 @@
}
#if VM_NRESERVLEVEL > 0
+static bool
+pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde)
+{
+
+ if (pmap->pm_type != PT_EPT)
+ return (false);
+ return ((pde & EPT_PG_EXECUTE) != 0);
+}
+
/*
* Tries to promote the 512, contiguous 4KB page mappings that are within a
* single page table page (PTP) to a single 2MB page mapping. For promotion
@@ -4584,7 +4641,9 @@
firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
setpde:
newpde = *firstpte;
- if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) {
+ if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V) ||
+ !pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap,
+ newpde))) {
atomic_add_long(&pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
@@ -5010,6 +5069,12 @@
PG_V = pmap_valid_bit(pmap);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ if (!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap,
+ newpde))) {
+ CTR2(KTR_PMAP, "pmap_enter_pde: 2m x blocked for va %#lx"
+ " in pmap %p", va, pmap);
+ return (KERN_FAILURE);
+ }
if ((pdpg = pmap_allocpde(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ?
NULL : lockp)) == NULL) {
CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
@@ -5139,6 +5204,7 @@
va = start + ptoa(diff);
if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
m->psind == 1 && pmap_ps_enabled(pmap) &&
+ pmap_allow_2m_x_page(pmap, (prot & VM_PROT_EXECUTE) != 0) &&
pmap_enter_2mpage(pmap, va, m, prot, &lock))
m = &m[NBPDR / PAGE_SIZE - 1];
else
--- sys/amd64/include/pmap.h.orig
+++ sys/amd64/include/pmap.h
@@ -407,6 +407,7 @@
void pmap_activate_boot(pmap_t pmap);
void pmap_activate_sw(struct thread *);
+void pmap_allow_2m_x_ept_recalculate(void);
void pmap_bootstrap(vm_paddr_t *);
int pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde);
int pmap_change_attr(vm_offset_t, vm_size_t, int);
--- sys/dev/cpuctl/cpuctl.c.orig
+++ sys/dev/cpuctl/cpuctl.c
@@ -48,6 +48,10 @@
#include <sys/pmckern.h>
#include <sys/cpuctl.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
#include <machine/cpufunc.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
@@ -535,6 +539,9 @@
hw_ibrs_recalculate();
restore_cpu(oldcpu, is_bound, td);
hw_ssb_recalculate(true);
+#ifdef __amd64__
+ pmap_allow_2m_x_ept_recalculate();
+#endif
hw_mds_recalculate();
printcpuinfo();
return (0);
--- sys/x86/include/specialreg.h.orig
+++ sys/x86/include/specialreg.h
@@ -406,6 +406,7 @@
#define IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY 0x00000008
#define IA32_ARCH_CAP_SSB_NO 0x00000010
#define IA32_ARCH_CAP_MDS_NO 0x00000020
+#define IA32_ARCH_CAP_IF_PSCHANGE_MC_NO 0x00000040
/*
* CPUID manufacturers identifiers
|