src - FreeBSD source tree

diff options


context:
space:
mode:

author	Konstantin Belousov <kib@FreeBSD.org>	2022-10-10 23:08:55 +0000
committer	Konstantin Belousov <kib@FreeBSD.org>	2023-01-20 03:21:57 +0000
commit	567cc4e6bfd92d7351e385569f2bb4b7c89b6db0 (patch)
tree	aa9e95aa0636148b15933c8cebc2324415876a3d
parent	eb2e82b9ca8e39c0c2045b19cf93489063d57beb (diff)
download	src-567cc4e6bfd9.tar.gz src-567cc4e6bfd9.zip

amd64: for small cores, use (big hammer) INVPCID_CTXGLOB instead of INVLPG

PR: 261169, 266145 Tested by: pho (cherry picked from commit cde70e312c3fde5b37a29be1dacb7fde9a45b94a)

Diffstat

-rw-r--r--

sys/amd64/amd64/initcpu.c

-rw-r--r--

sys/amd64/amd64/mp_machdep.c

-rw-r--r--

sys/amd64/amd64/pmap.c

-rw-r--r--

sys/amd64/include/pcpu.h

-rw-r--r--

sys/amd64/include/pmap.h

5 files changed, 67 insertions, 13 deletions

diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
index 1b731821889e..08385d3095d0 100644
--- a/sys/amd64/amd64/initcpu.c
+++ b/sys/amd64/amd64/initcpu.c

@@ -324,6 +324,11 @@ initializecpu(void)

if ((r[0] & CPUID_HYBRID_CORE_MASK) ==

CPUID_HYBRID_SMALL_CORE) {

PCPU_SET(small_core, 1);

+ if (pmap_pcid_enabled &&

+ pmap_pcid_invlpg_workaround_uena) {

+ PCPU_SET(pcid_invlpg_workaround, 1);

+ pmap_pcid_invlpg_workaround = 1;

+ }

}

diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 5e94ba822871..650f83b1aad4 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c

@@ -863,7 +863,7 @@ invlpg_invpcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1)

(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;

#endif /* COUNT_IPIS */

- invlpg(smp_tlb_addr1);

+ pmap_invlpg(smp_tlb_pmap, smp_tlb_addr1);

if (smp_tlb_pmap == PCPU_GET(curpmap) &&

smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3 &&

PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {

@@ -933,10 +933,16 @@ invlrng_invpcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1,

#endif /* COUNT_IPIS */

addr = smp_tlb_addr1;

- do {

- invlpg(addr);

- addr += PAGE_SIZE;

- } while (addr < smp_tlb_addr2);

+ if (smp_tlb_pmap == kernel_pmap && PCPU_GET(pcid_invlpg_workaround)) {

+ struct invpcid_descr d = { 0 };

+ invpcid(&d, INVPCID_CTXGLOB);

+ } else {

+ do {

+ invlpg(addr);

+ addr += PAGE_SIZE;

+ } while (addr < smp_tlb_addr2);

+ }

if (smp_tlb_pmap == PCPU_GET(curpmap) &&

smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3 &&

PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 233c58b83f2d..62fa64881c7b 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c

@@ -517,6 +517,12 @@ SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,

int invpcid_works = 0;

SYSCTL_INT(_vm_pmap, OID_AUTO, invpcid_works, CTLFLAG_RD, &invpcid_works, 0,

"Is the invpcid instruction available ?");

+int pmap_pcid_invlpg_workaround = 0;

+SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_invlpg_workaround,

+ CTLFLAG_RDTUN | CTLFLAG_NOFETCH,

+ &pmap_pcid_invlpg_workaround, 0,

+ "Enable small core PCID/INVLPG workaround");

+int pmap_pcid_invlpg_workaround_uena = 1;

int __read_frequently pti = 0;

SYSCTL_INT(_vm_pmap, OID_AUTO, pti, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,

@@ -2518,6 +2524,9 @@ pmap_init(void)

VM_PAGE_TO_PHYS(m);

}

+ TUNABLE_INT_FETCH("vm.pmap.pcid_invlpg_workaround",

+ &pmap_pcid_invlpg_workaround_uena);

}

SYSCTL_UINT(_vm_pmap, OID_AUTO, large_map_pml4_entries,

@@ -2749,7 +2758,7 @@ pmap_update_pde_invalidate(pmap_t pmap, vm_offset_t va, pd_entry_t newpde)

if ((newpde & PG_PS) == 0)

/* Demotion: flush a specific 2MB page mapping. */

- invlpg(va);

+ pmap_invlpg(pmap, va);

else if ((newpde & PG_G) == 0)

* Promotion: flush every 4KB page mapping from the TLB

@@ -3088,7 +3097,7 @@ pmap_invalidate_page_curcpu_cb(pmap_t pmap, vm_offset_t va,

vm_offset_t addr2 __unused)

{

if (pmap == kernel_pmap) {

- invlpg(va);

+ pmap_invlpg(kernel_pmap, va);

} else if (pmap == PCPU_GET(curpmap)) {

invlpg(va);

pmap_invalidate_page_cb(pmap, va);

@@ -3179,8 +3188,14 @@ pmap_invalidate_range_curcpu_cb(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)

vm_offset_t addr;

if (pmap == kernel_pmap) {

- for (addr = sva; addr < eva; addr += PAGE_SIZE)

- invlpg(addr);

+ if (PCPU_GET(pcid_invlpg_workaround)) {

+ struct invpcid_descr d = { 0 };

+ invpcid(&d, INVPCID_CTXGLOB);

+ } else {

+ for (addr = sva; addr < eva; addr += PAGE_SIZE)

+ invlpg(addr);

+ }

} else if (pmap == PCPU_GET(curpmap)) {

for (addr = sva; addr < eva; addr += PAGE_SIZE)

invlpg(addr);

@@ -3717,7 +3732,7 @@ pmap_flush_cache_phys_range(vm_paddr_t spa, vm_paddr_t epa, vm_memattr_t mattr)

for (; spa < epa; spa += PAGE_SIZE) {

sched_pin();

pte_store(pte, spa | pte_bits);

- invlpg(vaddr);

+ pmap_invlpg(kernel_pmap, vaddr);

/* XXXKIB atomic inside flush_cache_range are excessive */

pmap_flush_cache_range(vaddr, vaddr + PAGE_SIZE);

sched_unpin();

@@ -7527,7 +7542,7 @@ pmap_kenter_temporary(vm_paddr_t pa, int i)

va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);

pmap_kenter(va, pa);

- invlpg(va);

+ pmap_invlpg(kernel_pmap, va);

return ((void *)crashdumpmap);

}

@@ -10223,7 +10238,7 @@ pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,

page[i]->md.pat_mode, 0);

pte_store(pte, paddr | X86_PG_RW | X86_PG_V |

cache_bits);

- invlpg(vaddr[i]);

+ pmap_invlpg(kernel_pmap, vaddr[i]);

}

@@ -10272,7 +10287,14 @@ pmap_quick_remove_page(vm_offset_t addr)

if (addr != qframe)

return;

pte_store(vtopte(qframe), 0);

+ /*

+ * Since qframe is exclusively mapped by

+ * pmap_quick_enter_page() and that function doesn't set PG_G,

+ * we can use INVLPG here.

+ */

invlpg(qframe);

mtx_unlock_spin(&qframe_mtx);

}

diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index c0b8ee456f25..13de60f650de 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h

@@ -100,7 +100,8 @@ _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");

u_int pc_smp_tlb_op; \

uint64_t pc_ucr3_load_mask; \

u_int pc_small_core; \

- char __pad[2912] /* pad to UMA_PCPU_ALLOC_SIZE */

+ u_int pc_pcid_invlpg_workaround; \

+ char __pad[2908] /* pad to UMA_PCPU_ALLOC_SIZE */

#define PC_DBREG_CMD_NONE 0

#define PC_DBREG_CMD_LOAD 1

diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index 8f1e77806a25..7b86f9e139e1 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h

@@ -461,6 +461,8 @@ extern vm_offset_t virtual_end;

extern vm_paddr_t dmaplimit;

extern int pmap_pcid_enabled;

extern int invpcid_works;

+extern int pmap_pcid_invlpg_workaround;

+extern int pmap_pcid_invlpg_workaround_uena;

#define pmap_page_get_memattr(m) ((vm_memattr_t)(m)->md.pat_mode)

#define pmap_page_is_write_mapped(m) (((m)->a.flags & PGA_WRITEABLE) != 0)

@@ -546,6 +548,24 @@ pmap_invalidate_cpu_mask(pmap_t pmap)

return (&pmap->pm_active);

}

+/*

+ * It seems that AlderLake+ small cores have some microarchitectural

+ * bug, which results in the INVLPG instruction failing to flush all

+ * global TLB entries when PCID is enabled. Work around it for now,

+ * by doing global invalidation on small cores instead of INVLPG.

+ */

+static __inline void

+pmap_invlpg(pmap_t pmap, vm_offset_t va)

+ if (pmap == kernel_pmap && PCPU_GET(pcid_invlpg_workaround)) {

+ struct invpcid_descr d = { 0 };

+ invpcid(&d, INVPCID_CTXGLOB);

+ } else {

+ invlpg(va);

+ }

#endif /* _KERNEL */

/* Return various clipped indexes for a given VA */