diff options
Diffstat (limited to 'sys/arm64/arm64/pmap.c')
-rw-r--r-- | sys/arm64/arm64/pmap.c | 876 |
1 files changed, 789 insertions, 87 deletions
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index d9ea4e329766..961c5b1e68d2 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -106,6 +106,7 @@ __FBSDID("$FreeBSD$"); */ #include <sys/param.h> +#include <sys/bitstring.h> #include <sys/bus.h> #include <sys/systm.h> #include <sys/kernel.h> @@ -134,6 +135,7 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_extern.h> #include <vm/vm_pageout.h> #include <vm/vm_pager.h> +#include <vm/vm_phys.h> #include <vm/vm_radix.h> #include <vm/vm_reserv.h> #include <vm/uma.h> @@ -176,6 +178,7 @@ __FBSDID("$FreeBSD$"); #endif #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) +#define pa_to_pvh(pa) (&pv_table[pmap_l2_pindex(pa)]) #define NPV_LIST_LOCKS MAXCPU @@ -218,6 +221,14 @@ vm_offset_t kernel_vm_end = 0; struct msgbuf *msgbufp = NULL; +/* + * Data for the pv entry allocation mechanism. + * Updates to pv_invl_gen are protected by the pv_list_locks[] + * elements, but reads are not. + */ +static struct md_page *pv_table; +static struct md_page pv_dummy; + vm_paddr_t dmap_phys_base; /* The start of the dmap region */ vm_paddr_t dmap_phys_max; /* The limit of the dmap region */ vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ @@ -231,7 +242,7 @@ extern pt_entry_t pagetable_dmap[]; static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); -static int superpages_enabled = 1; +static int superpages_enabled = 0; SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0, "Are large page mappings enabled?"); @@ -498,6 +509,17 @@ pmap_l3_valid(pt_entry_t l3) return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); } + +/* Is a level 1 or 2entry a valid block and cacheable */ +CTASSERT(L1_BLOCK == L2_BLOCK); +static __inline int +pmap_pte_valid_cacheable(pt_entry_t pte) +{ + + return (((pte & ATTR_DESCR_MASK) == L1_BLOCK) && + ((pte & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY))); +} + static __inline int pmap_l3_valid_cacheable(pt_entry_t l3) { @@ -855,7 +877,8 @@ pmap_page_init(vm_page_t m) void pmap_init(void) { - int i; + vm_size_t s; + int i, pv_npg; /* * Are large page mappings enabled? @@ -872,8 +895,39 @@ pmap_init(void) */ for (i = 0; i < NPV_LIST_LOCKS; i++) rw_init(&pv_list_locks[i], "pmap pv list"); + + /* + * Calculate the size of the pv head table for superpages. + */ + pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE); + + /* + * Allocate memory for the pv head table for superpages. + */ + s = (vm_size_t)(pv_npg * sizeof(struct md_page)); + s = round_page(s); + pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, + M_WAITOK | M_ZERO); + for (i = 0; i < pv_npg; i++) + TAILQ_INIT(&pv_table[i].pv_list); + TAILQ_INIT(&pv_dummy.pv_list); } +static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0, + "2MB page mapping counters"); + +static u_long pmap_l2_demotions; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD, + &pmap_l2_demotions, 0, "2MB page demotions"); + +static u_long pmap_l2_p_failures; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD, + &pmap_l2_p_failures, 0, "2MB page promotion failures"); + +static u_long pmap_l2_promotions; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD, + &pmap_l2_promotions, 0, "2MB page promotions"); + /* * Invalidate a single TLB entry. */ @@ -980,6 +1034,7 @@ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pt_entry_t *pte, tpte; + vm_offset_t off; vm_paddr_t pa; vm_page_t m; int lvl; @@ -1001,9 +1056,21 @@ retry: tpte & ATTR_DESCR_MASK)); if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || ((prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, tpte & ~ATTR_MASK, &pa)) + switch(lvl) { + case 1: + off = va & L1_OFFSET; + break; + case 2: + off = va & L2_OFFSET; + break; + case 3: + default: + off = 0; + } + if (vm_page_pa_tryrelock(pmap, + (tpte & ~ATTR_MASK) | off, &pa)) goto retry; - m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK); + m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off); vm_page_hold(m); } } @@ -1371,6 +1438,7 @@ pmap_pinit0(pmap_t pmap) PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); pmap->pm_l0 = kernel_pmap->pm_l0; + pmap->pm_root.rt_root = 0; } int @@ -1392,6 +1460,7 @@ pmap_pinit(pmap_t pmap) if ((l0pt->flags & PG_ZERO) == 0) pagezero(pmap->pm_l0); + pmap->pm_root.rt_root = 0; bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); return (1); @@ -1537,6 +1606,9 @@ pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { vm_pindex_t ptepindex; pd_entry_t *pde, tpde; +#ifdef INVARIANTS + pt_entry_t *pte; +#endif vm_page_t m; int lvl; @@ -1555,13 +1627,33 @@ retry: * and activate it. If we get a level 2 pde it will point to a level 3 * table. */ - if (lvl == 2) { + switch (lvl) { + case -1: + break; + case 0: +#ifdef INVARIANTS + pte = pmap_l0_to_l1(pde, va); + KASSERT(pmap_load(pte) == 0, + ("pmap_alloc_l3: TODO: l0 superpages")); +#endif + break; + case 1: +#ifdef INVARIANTS + pte = pmap_l1_to_l2(pde, va); + KASSERT(pmap_load(pte) == 0, + ("pmap_alloc_l3: TODO: l1 superpages")); +#endif + break; + case 2: tpde = pmap_load(pde); if (tpde != 0) { m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK); m->wire_count++; return (m); } + break; + default: + panic("pmap_alloc_l3: Invalid level %d", lvl); } /* @@ -1592,6 +1684,8 @@ pmap_release(pmap_t pmap) KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); + KASSERT(vm_radix_is_empty(&pmap->pm_root), + ("pmap_release: pmap has reserved page table page(s)")); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); @@ -1876,6 +1970,68 @@ retry: } /* + * Ensure that the number of spare PV entries in the specified pmap meets or + * exceeds the given count, "needed". + * + * The given PV list lock may be released. + */ +static void +reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) +{ + struct pch new_tail; + struct pv_chunk *pc; + int avail, free; + vm_page_t m; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); + + /* + * Newly allocated PV chunks must be stored in a private list until + * the required number of PV chunks have been allocated. Otherwise, + * reclaim_pv_chunk() could recycle one of these chunks. In + * contrast, these chunks must be added to the pmap upon allocation. + */ + TAILQ_INIT(&new_tail); +retry: + avail = 0; + TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { + bit_count((bitstr_t *)pc->pc_map, 0, + sizeof(pc->pc_map) * NBBY, &free); + if (free == 0) + break; + avail += free; + if (avail >= needed) + break; + } + for (; avail < needed; avail += _NPCPV) { + m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED); + if (m == NULL) { + m = reclaim_pv_chunk(pmap, lockp); + if (m == NULL) + goto retry; + } + PV_STAT(atomic_add_int(&pc_chunk_count, 1)); + PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); + dump_add_page(m->phys_addr); + pc = (void *)PHYS_TO_DMAP(m->phys_addr); + pc->pc_pmap = pmap; + pc->pc_map[0] = PC_FREE0; + pc->pc_map[1] = PC_FREE1; + pc->pc_map[2] = PC_FREE2; + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); + PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV)); + } + if (!TAILQ_EMPTY(&new_tail)) { + mtx_lock(&pv_chunks_mutex); + TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); + mtx_unlock(&pv_chunks_mutex); + } +} + +/* * First find and then remove the pv entry for the specified pmap and virtual * address from the specified pv list. Returns the pv entry if found and NULL * otherwise. This operation can be performed on pv lists for either 4KB or @@ -1897,6 +2053,74 @@ pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) } /* + * After demotion from a 2MB page mapping to 512 4KB page mappings, + * destroy the pv entry for the 2MB page mapping and reinstantiate the pv + * entries for each of the 4KB page mappings. + */ +static void +pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, + struct rwlock **lockp) +{ + struct md_page *pvh; + struct pv_chunk *pc; + pv_entry_t pv; + vm_offset_t va_last; + vm_page_t m; + int bit, field; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT((pa & L2_OFFSET) == 0, + ("pmap_pv_demote_l2: pa is not 2mpage aligned")); + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); + + /* + * Transfer the 2mpage's pv entry for this mapping to the first + * page's pv list. Once this transfer begins, the pv list lock + * must not be released until the last pv entry is reinstantiated. + */ + pvh = pa_to_pvh(pa); + va = va & ~L2_OFFSET; + pv = pmap_pvh_remove(pvh, pmap, va); + KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found")); + m = PHYS_TO_VM_PAGE(pa); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + /* Instantiate the remaining Ln_ENTRIES - 1 pv entries. */ + PV_STAT(atomic_add_long(&pv_entry_allocs, Ln_ENTRIES - 1)); + va_last = va + L2_SIZE - PAGE_SIZE; + for (;;) { + pc = TAILQ_FIRST(&pmap->pm_pvchunk); + KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 || + pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare")); + for (field = 0; field < _NPCM; field++) { + while (pc->pc_map[field]) { + bit = ffsl(pc->pc_map[field]) - 1; + pc->pc_map[field] &= ~(1ul << bit); + pv = &pc->pc_pventry[field * 64 + bit]; + va += PAGE_SIZE; + pv->pv_va = va; + m++; + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("pmap_pv_demote_l2: page %p is not managed", m)); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + if (va == va_last) + goto out; + } + } + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); + } +out: + if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); + } + PV_STAT(atomic_add_long(&pv_entry_count, Ln_ENTRIES - 1)); + PV_STAT(atomic_subtract_int(&pv_entry_spare, Ln_ENTRIES - 1)); +} + +/* * First find and then destroy the pv entry for the specified pmap and virtual * address. This operation can be performed on pv lists for either 4KB or 2MB * page mappings. @@ -1940,6 +2164,7 @@ static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) { + struct md_page *pvh; pt_entry_t old_l3; vm_page_t m; @@ -1960,6 +2185,12 @@ pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, vm_page_aflag_set(m, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); pmap_pvh_free(&m->md, pmap, va); + if (TAILQ_EMPTY(&m->md.pv_list) && + (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + if (TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + } } return (pmap_unuse_l3(pmap, va, l2e, free)); } @@ -2027,8 +2258,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) l3_paddr = pmap_load(l2); if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) { - KASSERT((l3_paddr & ATTR_SW_MANAGED) == 0, - ("%s: TODO: Demote managed pages", __func__)); + /* TODO: Add pmap_remove_l2 */ if (pmap_demote_l2_locked(pmap, l2, sva & ~L2_OFFSET, &lock) == NULL) continue; @@ -2096,28 +2326,56 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) void pmap_remove_all(vm_page_t m) { + struct md_page *pvh; pv_entry_t pv; pmap_t pmap; struct rwlock *lock; pd_entry_t *pde, tpde; pt_entry_t *pte, tpte; + vm_offset_t va; struct spglist free; - int lvl, md_gen; + int lvl, pvh_gen, md_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); lock = VM_PAGE_TO_PV_LIST_LOCK(m); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : + pa_to_pvh(VM_PAGE_TO_PHYS(m)); retry: rw_wlock(lock); + while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen) { + rw_wunlock(lock); + PMAP_UNLOCK(pmap); + goto retry; + } + } + va = pv->pv_va; + pte = pmap_pte(pmap, va, &lvl); + KASSERT(pte != NULL, + ("pmap_remove_all: no page table entry found")); + KASSERT(lvl == 2, + ("pmap_remove_all: invalid pte level %d", lvl)); + + pmap_demote_l2_locked(pmap, pte, va, &lock); + PMAP_UNLOCK(pmap); + } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); - if (md_gen != m->md.pv_gen) { + if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { rw_wunlock(lock); PMAP_UNLOCK(pmap); goto retry; @@ -2204,9 +2462,17 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); - if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE) + if (pmap_load(l2) == 0) continue; + if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { + l3p = pmap_demote_l2(pmap, l2, sva); + if (l3p == NULL) + continue; + } + KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, + ("pmap_protect: Invalid L2 entry after demotion")); + if (va_next > eva) va_next = eva; @@ -2229,13 +2495,53 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) } /* + * Inserts the specified page table page into the specified pmap's collection + * of idle page table pages. Each of a pmap's page table pages is responsible + * for mapping a distinct range of virtual addresses. The pmap's collection is + * ordered by this virtual address range. + */ +static __inline int +pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + return (vm_radix_insert(&pmap->pm_root, mpte)); +} + +/* + * Looks for a page table page mapping the specified virtual address in the + * specified pmap's collection of idle page table pages. Returns NULL if there + * is no page table page corresponding to the specified virtual address. + */ +static __inline vm_page_t +pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + return (vm_radix_lookup(&pmap->pm_root, pmap_l2_pindex(va))); +} + +/* + * Removes the specified page table page from the specified pmap's collection + * of idle page table pages. The specified page table page must be a member of + * the pmap's collection. + */ +static __inline void +pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + vm_radix_remove(&pmap->pm_root, mpte->pindex); +} + +/* * Performs a break-before-make update of a pmap entry. This is needed when * either promoting or demoting pages to ensure the TLB doesn't get into an * inconsistent state. */ static void pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte, - vm_offset_t va) + vm_offset_t va, vm_size_t size) { register_t intr; @@ -2252,7 +2558,7 @@ pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte, /* Clear the old mapping */ pmap_load_clear(pte); PTE_SYNC(pte); - pmap_invalidate_page(pmap, va); + pmap_invalidate_range(pmap, va, va + size); /* Create the new mapping */ pmap_load_store(pte, newpte); @@ -2263,6 +2569,47 @@ pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte, } /* + * After promotion from 512 4KB page mappings to a single 2MB page mapping, + * replace the many pv entries for the 4KB page mappings by a single pv entry + * for the 2MB page mapping. + */ +static void +pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, + struct rwlock **lockp) +{ + struct md_page *pvh; + pv_entry_t pv; + vm_offset_t va_last; + vm_page_t m; + + KASSERT((pa & L2_OFFSET) == 0, + ("pmap_pv_promote_l2: pa is not 2mpage aligned")); + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); + + /* + * Transfer the first page's pv entry for this mapping to the 2mpage's + * pv list. Aside from avoiding the cost of a call to get_pv_entry(), + * a transfer avoids the possibility that get_pv_entry() calls + * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the + * mappings that is being promoted. + */ + m = PHYS_TO_VM_PAGE(pa); + va = va & ~L2_OFFSET; + pv = pmap_pvh_remove(&m->md, pmap, va); + KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv not found")); + pvh = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + /* Free the remaining NPTEPG - 1 pv entries. */ + va_last = va + L2_SIZE - PAGE_SIZE; + do { + m++; + va += PAGE_SIZE; + pmap_pvh_free(&m->md, pmap, va); + } while (va < va_last); +} + +/* * Tries to promote the 512, contiguous 4KB page mappings that are within a * single level 2 table entry to a single 2MB page mapping. For promotion * to occur, two conditions must be met: (1) the 4KB page mappings must map @@ -2274,51 +2621,65 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, struct rwlock **lockp) { pt_entry_t *firstl3, *l3, newl2, oldl3, pa; - register_t intr; + vm_page_t mpte; + vm_offset_t sva; PMAP_LOCK_ASSERT(pmap, MA_OWNED); - firstl3 = (pt_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK); + sva = va & ~L2_OFFSET; + firstl3 = pmap_l2_to_l3(l2, sva); newl2 = pmap_load(firstl3); - /* Ignore managed pages for now */ - if ((newl2 & ATTR_SW_MANAGED) != 0) - return; /* Check the alingment is valid */ - if (((newl2 & ~ATTR_MASK) & L2_OFFSET) != 0) + if (((newl2 & ~ATTR_MASK) & L2_OFFSET) != 0) { + atomic_add_long(&pmap_l2_p_failures, 1); + CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" + " in pmap %p", va, pmap); return; + } pa = newl2 + L2_SIZE - PAGE_SIZE; for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) { oldl3 = pmap_load(l3); - if (oldl3 != pa) + if (oldl3 != pa) { + atomic_add_long(&pmap_l2_p_failures, 1); + CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" + " in pmap %p", va, pmap); return; + } pa -= PAGE_SIZE; } - newl2 &= ~ATTR_DESCR_MASK; - newl2 |= L2_BLOCK; - /* - * Ensure we don't get switched out with the page table in an - * inconsistent state. We also need to ensure no interrupts fire - * as they may make use of an address we are about to invalidate. + * Save the page table page in its current state until the L2 + * mapping the superpage is demoted by pmap_demote_l2() or + * destroyed by pmap_remove_l3(). */ - intr = intr_disable(); - critical_enter(); + mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); + KASSERT(mpte >= vm_page_array && + mpte < &vm_page_array[vm_page_array_size], + ("pmap_promote_l2: page table page is out of range")); + KASSERT(mpte->pindex == pmap_l2_pindex(va), + ("pmap_promote_l2: page table page's pindex is wrong")); + if (pmap_insert_pt_page(pmap, mpte)) { + atomic_add_long(&pmap_l2_p_failures, 1); + CTR2(KTR_PMAP, + "pmap_promote_l2: failure for va %#lx in pmap %p", va, + pmap); + return; + } - /* Clear the old mapping */ - pmap_load_clear(l2); - PTE_SYNC(l2); - pmap_invalidate_range(pmap, rounddown2(va, L2_SIZE), - roundup2(va, L2_SIZE)); + if ((newl2 & ATTR_SW_MANAGED) != 0) + pmap_pv_promote_l2(pmap, va, newl2 & ~ATTR_MASK, lockp); - /* Create the new mapping */ - pmap_load_store(l2, newl2); - PTE_SYNC(l2); + newl2 &= ~ATTR_DESCR_MASK; + newl2 |= L2_BLOCK; - critical_exit(); - intr_restore(intr); + pmap_update_entry(pmap, l2, newl2, sva, L2_SIZE); + + atomic_add_long(&pmap_l2_promotions, 1); + CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va, + pmap); } /* @@ -2399,7 +2760,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, l3 = pmap_l2_to_l3(pde, va); } else { - pde = pmap_pde(pmap, va, &lvl); /* * If we get a level 2 pde it must point to a level 3 entry * otherwise we will need to create the intermediate tables @@ -2542,10 +2902,11 @@ havel3: */ if (orig_l3 != 0) { validate: - orig_l3 = pmap_load_store(l3, new_l3); + orig_l3 = pmap_load(l3); opa = orig_l3 & ~ATTR_MASK; if (opa != pa) { + pmap_update_entry(pmap, l3, new_l3, va, PAGE_SIZE); if ((orig_l3 & ATTR_SW_MANAGED) != 0) { om = PHYS_TO_VM_PAGE(opa); if (pmap_page_dirty(orig_l3)) @@ -2554,9 +2915,18 @@ validate: vm_page_aflag_set(om, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); pmap_pvh_free(&om->md, pmap, va); + if ((om->aflags & PGA_WRITEABLE) != 0 && + TAILQ_EMPTY(&om->md.pv_list) && + ((om->flags & PG_FICTITIOUS) != 0 || + TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) + vm_page_aflag_clear(om, PGA_WRITEABLE); } - } else if (pmap_page_dirty(orig_l3)) { - if ((orig_l3 & ATTR_SW_MANAGED) != 0) + } else { + pmap_load_store(l3, new_l3); + PTE_SYNC(l3); + pmap_invalidate_page(pmap, va); + if (pmap_page_dirty(orig_l3) && + (orig_l3 & ATTR_SW_MANAGED) != 0) vm_page_dirty(m); } } else { @@ -2569,15 +2939,12 @@ validate: if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) cpu_icache_sync_range(va, PAGE_SIZE); - /* XXX: Not yet, not all demotions are handled */ -#if 0 if ((mpte == NULL || mpte->wire_count == NL3PG) && pmap_superpages_enabled() && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) { KASSERT(lvl == 2, ("Invalid pde level %d", lvl)); pmap_promote_l2(pmap, pde, va, &lock); } -#endif if (lock != NULL) rw_wunlock(lock); @@ -2651,7 +3018,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, { struct spglist free; pd_entry_t *pde; - pt_entry_t *l3; + pt_entry_t *l2, *l3; vm_paddr_t pa; int lvl; @@ -2686,6 +3053,12 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, * attempt to allocate a page table page. If this * attempt fails, we don't retry. Instead, we give up. */ + if (lvl == 1) { + l2 = pmap_l1_to_l2(pde, va); + if ((pmap_load(l2) & ATTR_DESCR_MASK) == + L2_BLOCK) + return (NULL); + } if (lvl == 2 && pmap_load(pde) != 0) { mpte = PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK); @@ -2743,7 +3116,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, pmap_resident_count_inc(pmap, 1); pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | - ATTR_AP(ATTR_AP_RW) | L3_PAGE; + ATTR_AP(ATTR_AP_RO) | L3_PAGE; /* * Now validate mapping with RO protection @@ -2813,6 +3186,14 @@ pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) if (pmap_load(l2) == 0) continue; + if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { + l3 = pmap_demote_l2(pmap, l2, sva); + if (l3 == NULL) + continue; + } + KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, + ("pmap_unwire: Invalid l2 entry after demotion")); + if (va_next > eva) va_next = eva; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, @@ -2967,6 +3348,7 @@ pmap_quick_remove_page(vm_offset_t addr) boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { + struct md_page *pvh; struct rwlock *lock; pv_entry_t pv; int loops = 0; @@ -2986,6 +3368,18 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) if (loops >= 16) break; } + if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + if (PV_PMAP(pv) == pmap) { + rv = TRUE; + break; + } + loops++; + if (loops >= 16) + break; + } + } rw_runlock(lock); return (rv); } @@ -3000,10 +3394,11 @@ int pmap_page_wired_mappings(vm_page_t m) { struct rwlock *lock; + struct md_page *pvh; pmap_t pmap; pt_entry_t *pte; pv_entry_t pv; - int count, lvl, md_gen; + int count, lvl, md_gen, pvh_gen; if ((m->oflags & VPO_UNMANAGED) != 0) return (0); @@ -3028,6 +3423,29 @@ restart: count++; PMAP_UNLOCK(pmap); } + if ((m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + md_gen = m->md.pv_gen; + pvh_gen = pvh->pv_gen; + rw_runlock(lock); + PMAP_LOCK(pmap); + rw_rlock(lock); + if (md_gen != m->md.pv_gen || + pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + pte = pmap_pte(pmap, pv->pv_va, &lvl); + if (pte != NULL && + (pmap_load(pte) & ATTR_SW_WIRED) != 0) + count++; + PMAP_UNLOCK(pmap); + } + } rw_runlock(lock); return (count); } @@ -3054,8 +3472,9 @@ pmap_remove_pages(pmap_t pmap) pd_entry_t *pde; pt_entry_t *pte, tpte; struct spglist free; - vm_page_t m; + vm_page_t m, ml3, mt; pv_entry_t pv; + struct md_page *pvh; struct pv_chunk *pc, *npc; struct rwlock *lock; int64_t bit; @@ -3082,14 +3501,30 @@ pmap_remove_pages(pmap_t pmap) pde = pmap_pde(pmap, pv->pv_va, &lvl); KASSERT(pde != NULL, ("Attempting to remove an unmapped page")); - KASSERT(lvl == 2, - ("Invalid page directory level: %d", lvl)); - - pte = pmap_l2_to_l3(pde, pv->pv_va); - KASSERT(pte != NULL, - ("Attempting to remove an unmapped page")); - tpte = pmap_load(pte); + switch(lvl) { + case 1: + pte = pmap_l1_to_l2(pde, pv->pv_va); + tpte = pmap_load(pte); + KASSERT((tpte & ATTR_DESCR_MASK) == + L2_BLOCK, + ("Attempting to remove an invalid " + "block: %lx", tpte)); + tpte = pmap_load(pte); + break; + case 2: + pte = pmap_l2_to_l3(pde, pv->pv_va); + tpte = pmap_load(pte); + KASSERT((tpte & ATTR_DESCR_MASK) == + L3_PAGE, + ("Attempting to remove an invalid " + "page: %lx", tpte)); + break; + default: + panic( + "Invalid page directory level: %d", + lvl); + } /* * We cannot remove wired pages from a process' mapping at this time @@ -3112,10 +3547,17 @@ pmap_remove_pages(pmap_t pmap) ("pmap_remove_pages: bad pte %#jx", (uintmax_t)tpte)); - /* XXX: assumes tpte is level 3 */ - if (pmap_is_current(pmap) && - pmap_l3_valid_cacheable(tpte)) - cpu_dcache_wb_range(pv->pv_va, L3_SIZE); + if (pmap_is_current(pmap)) { + if (lvl == 2 && + pmap_l3_valid_cacheable(tpte)) { + cpu_dcache_wb_range(pv->pv_va, + L3_SIZE); + } else if (lvl == 1 && + pmap_pte_valid_cacheable(tpte)) { + cpu_dcache_wb_range(pv->pv_va, + L2_SIZE); + } + } pmap_load_clear(pte); PTE_SYNC(pte); pmap_invalidate_page(pmap, pv->pv_va); @@ -3123,18 +3565,66 @@ pmap_remove_pages(pmap_t pmap) /* * Update the vm_page_t clean/reference bits. */ - if ((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) - vm_page_dirty(m); + if ((tpte & ATTR_AP_RW_BIT) == + ATTR_AP(ATTR_AP_RW)) { + switch (lvl) { + case 1: + for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) + vm_page_dirty(m); + break; + case 2: + vm_page_dirty(m); + break; + } + } CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); /* Mark free */ pc->pc_map[field] |= bitmask; - - pmap_resident_count_dec(pmap, 1); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); - m->md.pv_gen++; - + switch (lvl) { + case 1: + pmap_resident_count_dec(pmap, + L2_SIZE / PAGE_SIZE); + pvh = pa_to_pvh(tpte & ~ATTR_MASK); + TAILQ_REMOVE(&pvh->pv_list, pv,pv_next); + pvh->pv_gen++; + if (TAILQ_EMPTY(&pvh->pv_list)) { + for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) + if ((mt->aflags & PGA_WRITEABLE) != 0 && + TAILQ_EMPTY(&mt->md.pv_list)) + vm_page_aflag_clear(mt, PGA_WRITEABLE); + } + ml3 = pmap_lookup_pt_page(pmap, + pv->pv_va); + if (ml3 != NULL) { + pmap_remove_pt_page(pmap, ml3); + pmap_resident_count_dec(pmap,1); + KASSERT(ml3->wire_count == NL3PG, + ("pmap_remove_pages: l3 page wire count error")); + ml3->wire_count = 0; + pmap_add_delayed_free_list(ml3, + &free, FALSE); + atomic_subtract_int( + &vm_cnt.v_wire_count, 1); + } + break; + case 2: + pmap_resident_count_dec(pmap, 1); + TAILQ_REMOVE(&m->md.pv_list, pv, + pv_next); + m->md.pv_gen++; + if ((m->aflags & PGA_WRITEABLE) != 0 && + TAILQ_EMPTY(&m->md.pv_list) && + (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh( + VM_PAGE_TO_PHYS(m)); + if (TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, + PGA_WRITEABLE); + } + break; + } pmap_unuse_l3(pmap, pv->pv_va, pmap_load(pde), &free); freed++; @@ -3165,9 +3655,10 @@ pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) { struct rwlock *lock; pv_entry_t pv; + struct md_page *pvh; pt_entry_t *pte, mask, value; pmap_t pmap; - int lvl, md_gen; + int lvl, md_gen, pvh_gen; boolean_t rv; rv = FALSE; @@ -3204,6 +3695,41 @@ restart: if (rv) goto out; } + if ((m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + md_gen = m->md.pv_gen; + pvh_gen = pvh->pv_gen; + rw_runlock(lock); + PMAP_LOCK(pmap); + rw_rlock(lock); + if (md_gen != m->md.pv_gen || + pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + pte = pmap_pte(pmap, pv->pv_va, &lvl); + KASSERT(lvl == 2, + ("pmap_page_test_mappings: Invalid level %d", lvl)); + mask = 0; + value = 0; + if (modified) { + mask |= ATTR_AP_RW_BIT; + value |= ATTR_AP(ATTR_AP_RW); + } + if (accessed) { + mask |= ATTR_AF | ATTR_DESCR_MASK; + value |= ATTR_AF | L2_BLOCK; + } + rv = (pmap_load(pte) & mask) == value; + PMAP_UNLOCK(pmap); + if (rv) + goto out; + } + } out: rw_runlock(lock); return (rv); @@ -3277,11 +3803,13 @@ pmap_is_referenced(vm_page_t m) void pmap_remove_write(vm_page_t m) { + struct md_page *pvh; pmap_t pmap; struct rwlock *lock; - pv_entry_t pv; + pv_entry_t next_pv, pv; pt_entry_t oldpte, *pte; - int lvl, md_gen; + vm_offset_t va; + int lvl, md_gen, pvh_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); @@ -3295,16 +3823,43 @@ pmap_remove_write(vm_page_t m) if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; lock = VM_PAGE_TO_PV_LIST_LOCK(m); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : + pa_to_pvh(VM_PAGE_TO_PHYS(m)); retry_pv_loop: rw_wlock(lock); + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + rw_wunlock(lock); + goto retry_pv_loop; + } + } + va = pv->pv_va; + pte = pmap_pte(pmap, pv->pv_va, &lvl); + if ((pmap_load(pte) & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) + pmap_demote_l2_locked(pmap, pte, va & ~L2_OFFSET, + &lock); + KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), + ("inconsistent pv lock %p %p for page %p", + lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); + PMAP_UNLOCK(pmap); + } TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); - if (md_gen != m->md.pv_gen) { + if (pvh_gen != pvh->pv_gen || + md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); rw_wunlock(lock); goto retry_pv_loop; @@ -3351,14 +3906,18 @@ safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) int pmap_ts_referenced(vm_page_t m) { + struct md_page *pvh; pv_entry_t pv, pvf; pmap_t pmap; struct rwlock *lock; pd_entry_t *pde, tpde; pt_entry_t *pte, tpte; + pt_entry_t *l3; + vm_offset_t va; vm_paddr_t pa; - int cleared, md_gen, not_cleared, lvl; + int cleared, md_gen, not_cleared, lvl, pvh_gen; struct spglist free; + bool demoted; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); @@ -3366,9 +3925,106 @@ pmap_ts_referenced(vm_page_t m) cleared = 0; pa = VM_PAGE_TO_PHYS(m); lock = PHYS_TO_PV_LIST_LOCK(pa); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); rw_wlock(lock); retry: not_cleared = 0; + if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) + goto small_mappings; + pv = pvf; + do { + if (pvf == NULL) + pvf = pv; + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + goto retry; + } + } + va = pv->pv_va; + pde = pmap_pde(pmap, pv->pv_va, &lvl); + KASSERT(pde != NULL, ("pmap_ts_referenced: no l1 table found")); + KASSERT(lvl == 1, + ("pmap_ts_referenced: invalid pde level %d", lvl)); + tpde = pmap_load(pde); + KASSERT((tpde & ATTR_DESCR_MASK) == L1_TABLE, + ("pmap_ts_referenced: found an invalid l1 table")); + pte = pmap_l1_to_l2(pde, pv->pv_va); + tpte = pmap_load(pte); + if ((tpte & ATTR_AF) != 0) { + /* + * Since this reference bit is shared by 512 4KB + * pages, it should not be cleared every time it is + * tested. Apply a simple "hash" function on the + * physical page number, the virtual superpage number, + * and the pmap address to select one 4KB page out of + * the 512 on which testing the reference bit will + * result in clearing that reference bit. This + * function is designed to avoid the selection of the + * same 4KB page for every 2MB page mapping. + * + * On demotion, a mapping that hasn't been referenced + * is simply destroyed. To avoid the possibility of a + * subsequent page fault on a demoted wired mapping, + * always leave its reference bit set. Moreover, + * since the superpage is wired, the current state of + * its reference bit won't affect page replacement. + */ + if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^ + (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 && + (tpte & ATTR_SW_WIRED) == 0) { + if (safe_to_clear_referenced(pmap, tpte)) { + /* + * TODO: We don't handle the access + * flag at all. We need to be able + * to set it in the exception handler. + */ + panic("ARM64TODO: " + "safe_to_clear_referenced\n"); + } else if (pmap_demote_l2_locked(pmap, pte, + pv->pv_va, &lock) != NULL) { + demoted = true; + va += VM_PAGE_TO_PHYS(m) - + (tpte & ~ATTR_MASK); + l3 = pmap_l2_to_l3(pte, va); + pmap_remove_l3(pmap, l3, va, + pmap_load(pte), NULL, &lock); + } else + demoted = true; + + if (demoted) { + /* + * The superpage mapping was removed + * entirely and therefore 'pv' is no + * longer valid. + */ + if (pvf == pv) + pvf = NULL; + pv = NULL; + } + cleared++; + KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), + ("inconsistent pv lock %p %p for page %p", + lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); + } else + not_cleared++; + } + PMAP_UNLOCK(pmap); + /* Rotate the PV list if it has more than one entry. */ + if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + } + if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX) + goto out; + } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); +small_mappings: if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) goto out; pv = pvf; @@ -3377,11 +4033,12 @@ retry: pvf = pv; pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); - if (md_gen != m->md.pv_gen) { + if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto retry; } @@ -3598,7 +4255,8 @@ pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) l3 &= ~ATTR_IDX_MASK; l3 |= ATTR_IDX(mode); - pmap_update_entry(kernel_pmap, pte, l3, tmpva); + pmap_update_entry(kernel_pmap, pte, l3, tmpva, + PAGE_SIZE); /* * If moving to a non-cacheable entry flush @@ -3634,6 +4292,8 @@ pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va) ("pmap_demote_l1: Demoting a non-block entry")); KASSERT((va & L1_OFFSET) == 0, ("pmap_demote_l1: Invalid virtual address %#lx", va)); + KASSERT((oldl1 & ATTR_SW_MANAGED) == 0, + ("pmap_demote_l1: Level 1 table shouldn't be managed")); tmpl1 = 0; if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) { @@ -3663,6 +4323,9 @@ pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va) phys += L2_SIZE; } cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); + KASSERT(l2[0] == ((oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK), + ("Invalid l2 page (%lx != %lx)", l2[0], + (oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK)); if (tmpl1 != 0) { pmap_kenter(tmpl1, PAGE_SIZE, @@ -3670,7 +4333,7 @@ pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va) l1 = (pt_entry_t *)(tmpl1 + ((vm_offset_t)l1 & PAGE_MASK)); } - pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va); + pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va, PAGE_SIZE); if (tmpl1 != 0) { pmap_kremove(tmpl1); @@ -3694,13 +4357,12 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, int i; PMAP_LOCK_ASSERT(pmap, MA_OWNED); + l3 = NULL; oldl2 = pmap_load(l2); KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK, ("pmap_demote_l2: Demoting a non-block entry")); KASSERT((va & L2_OFFSET) == 0, ("pmap_demote_l2: Invalid virtual address %#lx", va)); - KASSERT((oldl2 & ATTR_SW_MANAGED) == 0, - ("pmap_demote_l2: TODO: Demote managed pages")); tmpl2 = 0; if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) { @@ -3709,11 +4371,19 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, return (NULL); } - if ((ml3 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | - VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { - CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx" - " in pmap %p", va, pmap); - return (NULL); + if ((ml3 = pmap_lookup_pt_page(pmap, va)) != NULL) { + pmap_remove_pt_page(pmap, ml3); + } else { + ml3 = vm_page_alloc(NULL, pmap_l2_pindex(va), + (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); + if (ml3 == NULL) { + CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx" + " in pmap %p", va, pmap); + goto fail; + } + if (va < VM_MAXUSER_ADDRESS) + pmap_resident_count_inc(pmap, 1); } l3phys = VM_PAGE_TO_PHYS(ml3); @@ -3724,21 +4394,53 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, /* The attributed from the old l2 table to be copied */ newl3 = (oldl2 & (ATTR_MASK & ~ATTR_DESCR_MASK)) | L3_PAGE; - /* Create the new entries */ - for (i = 0; i < Ln_ENTRIES; i++) { - l3[i] = newl3 | phys; - phys += L3_SIZE; + /* + * If the page table page is new, initialize it. + */ + if (ml3->wire_count == 1) { + for (i = 0; i < Ln_ENTRIES; i++) { + l3[i] = newl3 | phys; + phys += L3_SIZE; + } + cpu_dcache_wb_range((vm_offset_t)l3, PAGE_SIZE); } - cpu_dcache_wb_range((vm_offset_t)l3, PAGE_SIZE); + KASSERT(l3[0] == ((oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE), + ("Invalid l3 page (%lx != %lx)", l3[0], + (oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE)); + /* + * Map the temporary page so we don't lose access to the l2 table. + */ if (tmpl2 != 0) { pmap_kenter(tmpl2, PAGE_SIZE, DMAP_TO_PHYS((vm_offset_t)l2) & ~L3_OFFSET, CACHED_MEMORY); l2 = (pt_entry_t *)(tmpl2 + ((vm_offset_t)l2 & PAGE_MASK)); } - pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va); + /* + * The spare PV entries must be reserved prior to demoting the + * mapping, that is, prior to changing the PDE. Otherwise, the state + * of the L2 and the PV lists will be inconsistent, which can result + * in reclaim_pv_chunk() attempting to remove a PV entry from the + * wrong PV list and pmap_pv_demote_l2() failing to find the expected + * PV entry for the 2MB page mapping that is being demoted. + */ + if ((oldl2 & ATTR_SW_MANAGED) != 0) + reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp); + + pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va, PAGE_SIZE); + + /* + * Demote the PV entry. + */ + if ((oldl2 & ATTR_SW_MANAGED) != 0) + pmap_pv_demote_l2(pmap, va, oldl2 & ~ATTR_MASK, lockp); + + atomic_add_long(&pmap_l2_demotions, 1); + CTR3(KTR_PMAP, "pmap_demote_l2: success for va %#lx" + " in pmap %p %lx", va, pmap, l3[0]); +fail: if (tmpl2 != 0) { pmap_kremove(tmpl2); kva_free(tmpl2, PAGE_SIZE); |