aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Johnston <markj@FreeBSD.org>2019-09-02 21:57:57 +0000
committerMark Johnston <markj@FreeBSD.org>2019-09-02 21:57:57 +0000
commit209f2e983876efbc2e981b6380723d7e99986431 (patch)
treebd7ff8f48212cd6bace2fd69ede1d49cd262e2a9
parent87044fca73fe867bb63bb185459de312f54d500c (diff)
downloadsrc-209f2e983876efbc2e981b6380723d7e99986431.tar.gz
src-209f2e983876efbc2e981b6380723d7e99986431.zip
Add a sysctl to dump kernel mappings and their properties on amd64.
The sysctl is called vm.pmap.kernel_maps. It dumps address ranges and their corresponding protection and mapping mode, as well as counts of 2MB and 1GB pages in the range. Reviewed by: kib MFC after: 2 weeks Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D21380
Notes
Notes: svn path=/head/; revision=351728
-rw-r--r--sys/amd64/amd64/pmap.c298
1 files changed, 298 insertions, 0 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index a5f468871733..ef97fa081c8f 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -124,6 +124,7 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/rangeset.h>
#include <sys/rwlock.h>
+#include <sys/sbuf.h>
#include <sys/sx.h>
#include <sys/turnstile.h>
#include <sys/vmem.h>
@@ -2112,6 +2113,41 @@ pmap_cache_mask(pmap_t pmap, boolean_t is_pde)
return (mask);
}
+static int
+pmap_pat_index(pmap_t pmap, pt_entry_t pte, bool is_pde)
+{
+ int pat_flag, pat_idx;
+
+ pat_idx = 0;
+ switch (pmap->pm_type) {
+ case PT_X86:
+ case PT_RVI:
+ /* The PAT bit is different for PTE's and PDE's. */
+ pat_flag = is_pde ? X86_PG_PDE_PAT : X86_PG_PTE_PAT;
+
+ if ((pte & pat_flag) != 0)
+ pat_idx |= 0x4;
+ if ((pte & PG_NC_PCD) != 0)
+ pat_idx |= 0x2;
+ if ((pte & PG_NC_PWT) != 0)
+ pat_idx |= 0x1;
+ break;
+ case PT_EPT:
+ if ((pte & EPT_PG_IGNORE_PAT) != 0)
+ panic("EPT PTE %#lx has no PAT memory type", pte);
+ pat_idx = (pte & EPT_PG_MEMORY_TYPE(0x7)) >> 3;
+ break;
+ }
+
+ /* See pmap_init_pat(). */
+ if (pat_idx == 4)
+ pat_idx = 0;
+ if (pat_idx == 7)
+ pat_idx = 3;
+
+ return (pat_idx);
+}
+
bool
pmap_ps_enabled(pmap_t pmap)
{
@@ -9981,6 +10017,268 @@ pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
return (error);
}
+/*
+ * Track a range of the kernel's virtual address space that is contiguous
+ * in various mapping attributes.
+ */
+struct pmap_kernel_map_range {
+ vm_offset_t sva;
+ pt_entry_t attrs;
+ int ptes;
+ int pdes;
+ int pdpes;
+};
+
+static void
+sysctl_kmaps_dump(struct sbuf *sb, struct pmap_kernel_map_range *range,
+ vm_offset_t eva)
+{
+ const char *mode;
+ int i, pat_idx;
+
+ if (eva <= range->sva)
+ return;
+
+ pat_idx = pmap_pat_index(kernel_pmap, range->attrs, true);
+ for (i = 0; i < PAT_INDEX_SIZE; i++)
+ if (pat_index[i] == pat_idx)
+ break;
+
+ switch (i) {
+ case PAT_WRITE_BACK:
+ mode = "WB";
+ break;
+ case PAT_WRITE_THROUGH:
+ mode = "WT";
+ break;
+ case PAT_UNCACHEABLE:
+ mode = "UC";
+ break;
+ case PAT_WRITE_PROTECTED:
+ mode = "WP";
+ break;
+ case PAT_WRITE_COMBINING:
+ mode = "WC";
+ break;
+ default:
+ printf("%s: unknown PAT mode %#x for range %#016lx-%#016lx\n",
+ __func__, i, range->sva, eva);
+ mode = "??";
+ break;
+ }
+
+ sbuf_printf(sb, "%#016lx-%#016lx r%c%c%c%c %s %d %d %d\n",
+ range->sva, eva,
+ (range->attrs & X86_PG_RW) != 0 ? 'w' : '-',
+ (range->attrs & pg_nx) != 0 ? '-' : 'x',
+ (range->attrs & X86_PG_U) != 0 ? 'u' : 's',
+ (range->attrs & X86_PG_G) != 0 ? 'g' : '-',
+ mode, range->pdpes, range->pdes, range->ptes);
+
+ /* Reset to sentinel value. */
+ range->sva = KVADDR(NPML4EPG - 1, NPDPEPG - 1, NPDEPG - 1, NPTEPG - 1);
+}
+
+/*
+ * Determine whether the attributes specified by a page table entry match those
+ * being tracked by the current range. This is not quite as simple as a direct
+ * flag comparison since some PAT modes have multiple representations.
+ */
+static bool
+sysctl_kmaps_match(struct pmap_kernel_map_range *range, pt_entry_t attrs)
+{
+ pt_entry_t diff, mask;
+
+ mask = X86_PG_G | X86_PG_RW | X86_PG_U | X86_PG_PDE_CACHE | pg_nx;
+ diff = (range->attrs ^ attrs) & mask;
+ if (diff == 0)
+ return (true);
+ if ((diff & ~X86_PG_PDE_PAT) == 0 &&
+ pmap_pat_index(kernel_pmap, range->attrs, true) ==
+ pmap_pat_index(kernel_pmap, attrs, true))
+ return (true);
+ return (false);
+}
+
+static void
+sysctl_kmaps_reinit(struct pmap_kernel_map_range *range, vm_offset_t va,
+ pt_entry_t attrs)
+{
+
+ memset(range, 0, sizeof(*range));
+ range->sva = va;
+ range->attrs = attrs;
+}
+
+/*
+ * Given a leaf PTE, derive the mapping's attributes. If they do not match
+ * those of the current run, dump the address range and its attributes, and
+ * begin a new run.
+ */
+static void
+sysctl_kmaps_check(struct sbuf *sb, struct pmap_kernel_map_range *range,
+ vm_offset_t va, pml4_entry_t pml4e, pdp_entry_t pdpe, pd_entry_t pde,
+ pt_entry_t pte)
+{
+ pt_entry_t attrs;
+
+ attrs = pml4e & (X86_PG_RW | X86_PG_U | pg_nx);
+
+ attrs |= pdpe & pg_nx;
+ attrs &= pg_nx | (pdpe & (X86_PG_RW | X86_PG_U));
+ if ((pdpe & PG_PS) != 0) {
+ attrs |= pdpe & (X86_PG_G | X86_PG_PDE_CACHE);
+ } else if (pde != 0) {
+ attrs |= pde & pg_nx;
+ attrs &= pg_nx | (pde & (X86_PG_RW | X86_PG_U));
+ }
+ if ((pde & PG_PS) != 0) {
+ attrs |= pde & (X86_PG_G | X86_PG_PDE_CACHE);
+ } else if (pte != 0) {
+ attrs |= pte & pg_nx;
+ attrs &= pg_nx | (pte & (X86_PG_RW | X86_PG_U));
+ attrs |= pte & (X86_PG_G | X86_PG_PTE_CACHE);
+
+ /* Canonicalize by always using the PDE PAT bit. */
+ if ((attrs & X86_PG_PTE_PAT) != 0)
+ attrs ^= X86_PG_PDE_PAT | X86_PG_PTE_PAT;
+ }
+
+ if (range->sva > va || !sysctl_kmaps_match(range, attrs)) {
+ sysctl_kmaps_dump(sb, range, va);
+ sysctl_kmaps_reinit(range, va, attrs);
+ }
+}
+
+static int
+sysctl_kmaps(SYSCTL_HANDLER_ARGS)
+{
+ struct pmap_kernel_map_range range;
+ struct sbuf sbuf, *sb;
+ pml4_entry_t pml4e;
+ pdp_entry_t *pdp, pdpe;
+ pd_entry_t *pd, pde;
+ pt_entry_t *pt, pte;
+ vm_offset_t sva;
+ vm_paddr_t pa;
+ int error, i, j, k, l;
+
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+ sb = &sbuf;
+ sbuf_new_for_sysctl(sb, NULL, PAGE_SIZE, req);
+
+ /* Sentinel value. */
+ range.sva = KVADDR(NPML4EPG - 1, NPDPEPG - 1, NPDEPG - 1, NPTEPG - 1);
+
+ /*
+ * Iterate over the kernel page tables without holding the kernel pmap
+ * lock. Outside of the large map, kernel page table pages are never
+ * freed, so at worst we will observe inconsistencies in the output.
+ * Within the large map, ensure that PDP and PD page addresses are
+ * valid before descending.
+ */
+ for (sva = 0, i = pmap_pml4e_index(sva); i < NPML4EPG; i++) {
+ switch (i) {
+ case PML4PML4I:
+ sbuf_printf(sb, "\nRecursive map:\n");
+ break;
+ case DMPML4I:
+ sbuf_printf(sb, "\nDirect map:\n");
+ break;
+ case KPML4BASE:
+ sbuf_printf(sb, "\nKernel map:\n");
+ break;
+ case LMSPML4I:
+ sbuf_printf(sb, "\nLarge map:\n");
+ break;
+ }
+
+ /* Convert to canonical form. */
+ if (sva == 1ul << 47)
+ sva |= -1ul << 48;
+
+restart:
+ pml4e = kernel_pmap->pm_pml4[i];
+ if ((pml4e & X86_PG_V) == 0) {
+ sva = rounddown2(sva, NBPML4);
+ sysctl_kmaps_dump(sb, &range, sva);
+ sva += NBPML4;
+ continue;
+ }
+ pa = pml4e & PG_FRAME;
+ pdp = (pdp_entry_t *)PHYS_TO_DMAP(pa);
+
+ for (j = pmap_pdpe_index(sva); j < NPDPEPG; j++) {
+ pdpe = pdp[j];
+ if ((pdpe & X86_PG_V) == 0) {
+ sva = rounddown2(sva, NBPDP);
+ sysctl_kmaps_dump(sb, &range, sva);
+ sva += NBPDP;
+ continue;
+ }
+ pa = pdpe & PG_FRAME;
+ if (PMAP_ADDRESS_IN_LARGEMAP(sva) &&
+ vm_phys_paddr_to_vm_page(pa) == NULL)
+ goto restart;
+ if ((pdpe & PG_PS) != 0) {
+ sva = rounddown2(sva, NBPDP);
+ sysctl_kmaps_check(sb, &range, sva, pml4e, pdpe,
+ 0, 0);
+ range.pdpes++;
+ sva += NBPDP;
+ continue;
+ }
+ pd = (pd_entry_t *)PHYS_TO_DMAP(pa);
+
+ for (k = pmap_pde_index(sva); k < NPDEPG; k++) {
+ pde = pd[k];
+ if ((pde & X86_PG_V) == 0) {
+ sva = rounddown2(sva, NBPDR);
+ sysctl_kmaps_dump(sb, &range, sva);
+ sva += NBPDR;
+ continue;
+ }
+ pa = pde & PG_FRAME;
+ if (PMAP_ADDRESS_IN_LARGEMAP(sva) &&
+ vm_phys_paddr_to_vm_page(pa) == NULL)
+ goto restart;
+ if ((pde & PG_PS) != 0) {
+ sva = rounddown2(sva, NBPDR);
+ sysctl_kmaps_check(sb, &range, sva,
+ pml4e, pdpe, pde, 0);
+ range.pdes++;
+ sva += NBPDR;
+ continue;
+ }
+ pt = (pt_entry_t *)PHYS_TO_DMAP(pa);
+
+ for (l = pmap_pte_index(sva); l < NPTEPG; l++,
+ sva += PAGE_SIZE) {
+ pte = pt[l];
+ if ((pte & X86_PG_V) == 0) {
+ sysctl_kmaps_dump(sb, &range,
+ sva);
+ continue;
+ }
+ sysctl_kmaps_check(sb, &range, sva,
+ pml4e, pdpe, pde, pte);
+ range.ptes++;
+ }
+ }
+ }
+ }
+
+ error = sbuf_finish(sb);
+ sbuf_delete(sb);
+ return (error);
+}
+SYSCTL_OID(_vm_pmap, OID_AUTO, kernel_maps,
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ NULL, 0, sysctl_kmaps, "A",
+ "Dump kernel address layout");
+
#ifdef DDB
DB_SHOW_COMMAND(pte, pmap_print_pte)
{