aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKonstantin Belousov <kib@FreeBSD.org>2022-10-10 23:08:55 +0000
committerKonstantin Belousov <kib@FreeBSD.org>2023-01-20 03:21:57 +0000
commit567cc4e6bfd92d7351e385569f2bb4b7c89b6db0 (patch)
treeaa9e95aa0636148b15933c8cebc2324415876a3d
parenteb2e82b9ca8e39c0c2045b19cf93489063d57beb (diff)
downloadsrc-567cc4e6bfd9.tar.gz
src-567cc4e6bfd9.zip
amd64: for small cores, use (big hammer) INVPCID_CTXGLOB instead of INVLPG
PR: 261169, 266145 Tested by: pho (cherry picked from commit cde70e312c3fde5b37a29be1dacb7fde9a45b94a)
-rw-r--r--sys/amd64/amd64/initcpu.c5
-rw-r--r--sys/amd64/amd64/mp_machdep.c16
-rw-r--r--sys/amd64/amd64/pmap.c36
-rw-r--r--sys/amd64/include/pcpu.h3
-rw-r--r--sys/amd64/include/pmap.h20
5 files changed, 67 insertions, 13 deletions
diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
index 1b731821889e..08385d3095d0 100644
--- a/sys/amd64/amd64/initcpu.c
+++ b/sys/amd64/amd64/initcpu.c
@@ -324,6 +324,11 @@ initializecpu(void)
if ((r[0] & CPUID_HYBRID_CORE_MASK) ==
CPUID_HYBRID_SMALL_CORE) {
PCPU_SET(small_core, 1);
+ if (pmap_pcid_enabled &&
+ pmap_pcid_invlpg_workaround_uena) {
+ PCPU_SET(pcid_invlpg_workaround, 1);
+ pmap_pcid_invlpg_workaround = 1;
+ }
}
}
}
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 5e94ba822871..650f83b1aad4 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -863,7 +863,7 @@ invlpg_invpcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1)
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- invlpg(smp_tlb_addr1);
+ pmap_invlpg(smp_tlb_pmap, smp_tlb_addr1);
if (smp_tlb_pmap == PCPU_GET(curpmap) &&
smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3 &&
PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
@@ -933,10 +933,16 @@ invlrng_invpcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1,
#endif /* COUNT_IPIS */
addr = smp_tlb_addr1;
- do {
- invlpg(addr);
- addr += PAGE_SIZE;
- } while (addr < smp_tlb_addr2);
+ if (smp_tlb_pmap == kernel_pmap && PCPU_GET(pcid_invlpg_workaround)) {
+ struct invpcid_descr d = { 0 };
+
+ invpcid(&d, INVPCID_CTXGLOB);
+ } else {
+ do {
+ invlpg(addr);
+ addr += PAGE_SIZE;
+ } while (addr < smp_tlb_addr2);
+ }
if (smp_tlb_pmap == PCPU_GET(curpmap) &&
smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3 &&
PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 233c58b83f2d..62fa64881c7b 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -517,6 +517,12 @@ SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
int invpcid_works = 0;
SYSCTL_INT(_vm_pmap, OID_AUTO, invpcid_works, CTLFLAG_RD, &invpcid_works, 0,
"Is the invpcid instruction available ?");
+int pmap_pcid_invlpg_workaround = 0;
+SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_invlpg_workaround,
+ CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
+ &pmap_pcid_invlpg_workaround, 0,
+ "Enable small core PCID/INVLPG workaround");
+int pmap_pcid_invlpg_workaround_uena = 1;
int __read_frequently pti = 0;
SYSCTL_INT(_vm_pmap, OID_AUTO, pti, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
@@ -2518,6 +2524,9 @@ pmap_init(void)
VM_PAGE_TO_PHYS(m);
}
}
+
+ TUNABLE_INT_FETCH("vm.pmap.pcid_invlpg_workaround",
+ &pmap_pcid_invlpg_workaround_uena);
}
SYSCTL_UINT(_vm_pmap, OID_AUTO, large_map_pml4_entries,
@@ -2749,7 +2758,7 @@ pmap_update_pde_invalidate(pmap_t pmap, vm_offset_t va, pd_entry_t newpde)
if ((newpde & PG_PS) == 0)
/* Demotion: flush a specific 2MB page mapping. */
- invlpg(va);
+ pmap_invlpg(pmap, va);
else if ((newpde & PG_G) == 0)
/*
* Promotion: flush every 4KB page mapping from the TLB
@@ -3088,7 +3097,7 @@ pmap_invalidate_page_curcpu_cb(pmap_t pmap, vm_offset_t va,
vm_offset_t addr2 __unused)
{
if (pmap == kernel_pmap) {
- invlpg(va);
+ pmap_invlpg(kernel_pmap, va);
} else if (pmap == PCPU_GET(curpmap)) {
invlpg(va);
pmap_invalidate_page_cb(pmap, va);
@@ -3179,8 +3188,14 @@ pmap_invalidate_range_curcpu_cb(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
vm_offset_t addr;
if (pmap == kernel_pmap) {
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
+ if (PCPU_GET(pcid_invlpg_workaround)) {
+ struct invpcid_descr d = { 0 };
+
+ invpcid(&d, INVPCID_CTXGLOB);
+ } else {
+ for (addr = sva; addr < eva; addr += PAGE_SIZE)
+ invlpg(addr);
+ }
} else if (pmap == PCPU_GET(curpmap)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
@@ -3717,7 +3732,7 @@ pmap_flush_cache_phys_range(vm_paddr_t spa, vm_paddr_t epa, vm_memattr_t mattr)
for (; spa < epa; spa += PAGE_SIZE) {
sched_pin();
pte_store(pte, spa | pte_bits);
- invlpg(vaddr);
+ pmap_invlpg(kernel_pmap, vaddr);
/* XXXKIB atomic inside flush_cache_range are excessive */
pmap_flush_cache_range(vaddr, vaddr + PAGE_SIZE);
sched_unpin();
@@ -7527,7 +7542,7 @@ pmap_kenter_temporary(vm_paddr_t pa, int i)
va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
pmap_kenter(va, pa);
- invlpg(va);
+ pmap_invlpg(kernel_pmap, va);
return ((void *)crashdumpmap);
}
@@ -10223,7 +10238,7 @@ pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
page[i]->md.pat_mode, 0);
pte_store(pte, paddr | X86_PG_RW | X86_PG_V |
cache_bits);
- invlpg(vaddr[i]);
+ pmap_invlpg(kernel_pmap, vaddr[i]);
}
}
}
@@ -10272,7 +10287,14 @@ pmap_quick_remove_page(vm_offset_t addr)
if (addr != qframe)
return;
pte_store(vtopte(qframe), 0);
+
+ /*
+ * Since qframe is exclusively mapped by
+ * pmap_quick_enter_page() and that function doesn't set PG_G,
+ * we can use INVLPG here.
+ */
invlpg(qframe);
+
mtx_unlock_spin(&qframe_mtx);
}
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index c0b8ee456f25..13de60f650de 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -100,7 +100,8 @@ _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
u_int pc_smp_tlb_op; \
uint64_t pc_ucr3_load_mask; \
u_int pc_small_core; \
- char __pad[2912] /* pad to UMA_PCPU_ALLOC_SIZE */
+ u_int pc_pcid_invlpg_workaround; \
+ char __pad[2908] /* pad to UMA_PCPU_ALLOC_SIZE */
#define PC_DBREG_CMD_NONE 0
#define PC_DBREG_CMD_LOAD 1
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index 8f1e77806a25..7b86f9e139e1 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -461,6 +461,8 @@ extern vm_offset_t virtual_end;
extern vm_paddr_t dmaplimit;
extern int pmap_pcid_enabled;
extern int invpcid_works;
+extern int pmap_pcid_invlpg_workaround;
+extern int pmap_pcid_invlpg_workaround_uena;
#define pmap_page_get_memattr(m) ((vm_memattr_t)(m)->md.pat_mode)
#define pmap_page_is_write_mapped(m) (((m)->a.flags & PGA_WRITEABLE) != 0)
@@ -546,6 +548,24 @@ pmap_invalidate_cpu_mask(pmap_t pmap)
return (&pmap->pm_active);
}
+/*
+ * It seems that AlderLake+ small cores have some microarchitectural
+ * bug, which results in the INVLPG instruction failing to flush all
+ * global TLB entries when PCID is enabled. Work around it for now,
+ * by doing global invalidation on small cores instead of INVLPG.
+ */
+static __inline void
+pmap_invlpg(pmap_t pmap, vm_offset_t va)
+{
+ if (pmap == kernel_pmap && PCPU_GET(pcid_invlpg_workaround)) {
+ struct invpcid_descr d = { 0 };
+
+ invpcid(&d, INVPCID_CTXGLOB);
+ } else {
+ invlpg(va);
+ }
+}
+
#endif /* _KERNEL */
/* Return various clipped indexes for a given VA */