diff options
author | Mark Johnston <markj@FreeBSD.org> | 2020-04-24 21:21:23 +0000 |
---|---|---|
committer | Mark Johnston <markj@FreeBSD.org> | 2020-04-24 21:21:23 +0000 |
commit | 117d10f3c19e32d90ea6c0e4128a38f6869e7572 (patch) | |
tree | 5eabab3f137fc15a3f78084811c542e0cf829bbe /sys/mips | |
parent | b6fd3860b52303ff5a2c0f2dc97e78ad2b42a781 (diff) | |
download | src-117d10f3c19e32d90ea6c0e4128a38f6869e7572.tar.gz src-117d10f3c19e32d90ea6c0e4128a38f6869e7572.zip |
Fix a race between _pmap_unwire_ptp() and MipsDoTLBMiss().
MipsDoTLBMiss() will load a segmap entry or pde, check that it isn't
zero, and then chase that pointer to a physical page. If that page has
been freed in the interim, it will read garbage and go on to populate
the TLB with it.
This can happen because pmap_unwire_ptp zeros out the pde and
vm_page_free_zero()s the ptp (or, recursively, zeros out the segmap
entry and vm_page_free_zero()s the pdp) without interlocking against
MipsDoTLBMiss(). The pmap is locked, and pvh_global_lock may or may not
be held, but this is not enough. Solve this issue by inserting TLB
shootdowns within _pmap_unwire_ptp(); as MipsDoTLBMiss() runs with IRQs
deferred, the IPIs involved in TLB shootdown are sufficient to ensure
that MipsDoTLBMiss() sees either a zero segmap entry / pde or a non-zero
entry and the pointed-to page still not freed.
Submitted by: Nathaniel Filardo <nwf20@cl.cam.ac.uk>
Reviewed by: kib
MFC after: 2 weeks
Differential Revision: https://reviews.freebsd.org/D24491
Notes
Notes:
svn path=/head/; revision=360280
Diffstat (limited to 'sys/mips')
-rw-r--r-- | sys/mips/mips/pmap.c | 118 |
1 files changed, 97 insertions, 21 deletions
diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index e1c4cfb3bc88..7dcb8bf6e107 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -1004,18 +1004,26 @@ static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m) { pd_entry_t *pde; + vm_offset_t sva, eva; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * unmap the page table page */ #ifdef __mips_n64 - if (m->pindex < NUPDE) + if (m->pindex < NUPDE) { pde = pmap_pde(pmap, va); - else + sva = va & ~PDRMASK; + eva = sva + NBPDR; + } else { pde = pmap_segmap(pmap, va); + sva = va & ~SEGMASK; + eva = sva + NBSEG; + } #else pde = pmap_pde(pmap, va); + sva = va & ~SEGMASK; + eva = sva + NBSEG; #endif *pde = 0; pmap->pm_stats.resident_count--; @@ -1026,12 +1034,22 @@ _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m) vm_page_t pdpg; /* - * Recursively decrement next level pagetable refcount + * Recursively decrement next level pagetable refcount. + * Either that shoots down a larger range from TLBs (below) + * or we're to shoot down just the page in question. */ pdp = (pd_entry_t *)*pmap_segmap(pmap, va); pdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pdp)); - pmap_unwire_ptp(pmap, va, pdpg); + if (!pmap_unwire_ptp(pmap, va, pdpg)) { + pmap_invalidate_range(pmap, sva, eva); + } + } else { + /* Segmap entry shootdown */ + pmap_invalidate_range(pmap, sva, eva); } +#else + /* Segmap entry shootdown */ + pmap_invalidate_range(pmap, sva, eva); #endif /* @@ -1485,7 +1503,15 @@ pmap_pv_reclaim(pmap_t locked_pmap) if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); pc->pc_map[field] |= 1UL << bit; - pmap_unuse_pt(pmap, va, *pde); + + /* + * For simplicity, we will unconditionally shoot + * down TLBs either at the end of this function + * or at the top of the loop above if we switch + * to a different pmap. + */ + (void)pmap_unuse_pt(pmap, va, *pde); + freed++; } } @@ -1714,6 +1740,23 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va, /* * pmap_remove_pte: do the things to unmap a page in a process + * + * Returns true if this was the last PTE in the PT (and possibly the last PT in + * the PD, and possibly the last PD in the segmap), in which case... + * + * 1) the TLB has been invalidated for the whole PT's span (at least), + * already, to ensure that MipsDoTLBMiss does not attempt to follow a + * dangling pointer into a freed page. No additional TLB shootdown is + * required. + * + * 2) if this removal was part of a sweep to remove PTEs, it is safe to jump + * to the PT span boundary and continue. + * + * 3) The given pde may now point onto a freed page and must not be + * dereferenced + * + * If the return value is false, the TLB has not been shot down (and the segmap + * entry, PD, and PT all remain in place). */ static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va, @@ -1782,8 +1825,12 @@ pmap_remove_page(struct pmap *pmap, vm_offset_t va) if (!pte_test(ptq, PTE_V)) return; - (void)pmap_remove_pte(pmap, ptq, va, *pde); - pmap_invalidate_page(pmap, va); + /* + * Remove this PTE from the PT. If this is the last one, then + * the TLB has already been shot down, so don't bother again + */ + if (!pmap_remove_pte(pmap, ptq, va, *pde)) + pmap_invalidate_page(pmap, va); } /* @@ -1797,7 +1844,9 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { pd_entry_t *pde, *pdpe; pt_entry_t *pte; - vm_offset_t va, va_next; + vm_offset_t va_next; + vm_offset_t va_init, va_fini; + bool need_tlb_shootdown; /* * Perform an unsynchronized read. This is, however, safe. @@ -1826,6 +1875,8 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) continue; } #endif + + /* Scan up to the end of the page table pointed to by pde */ va_next = (sva + NBPDR) & ~PDRMASK; if (va_next < sva) va_next = eva; @@ -1842,25 +1893,44 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) if (va_next > eva) va_next = eva; - va = va_next; + need_tlb_shootdown = false; + va_init = sva; + va_fini = va_next; for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, sva += PAGE_SIZE) { + + /* Skip over invalid entries; no need to shootdown */ if (!pte_test(pte, PTE_V)) { - if (va != va_next) { - pmap_invalidate_range(pmap, va, sva); - va = va_next; - } + /* + * If we have not yet found a valid entry, then + * we can move the lower edge of the region to + * invalidate to the next PTE. + */ + if (!need_tlb_shootdown) + va_init = sva + PAGE_SIZE; continue; } - if (va == va_next) - va = sva; + + /* + * A valid entry; the range we are shooting down must + * include this page. va_fini is used instead of sva + * so that if the range ends with a run of !PTE_V PTEs, + * but doesn't clear out so much that pmap_remove_pte + * removes the entire PT, we won't include these !PTE_V + * entries in the region to be shot down. + */ + va_fini = sva + PAGE_SIZE; + if (pmap_remove_pte(pmap, pte, sva, *pde)) { - sva += PAGE_SIZE; + /* Entire PT removed and TLBs shot down. */ + need_tlb_shootdown = false; break; + } else { + need_tlb_shootdown = true; } } - if (va != va_next) - pmap_invalidate_range(pmap, va, sva); + if (need_tlb_shootdown) + pmap_invalidate_range(pmap, va_init, va_fini); } out: rw_wunlock(&pvh_global_lock); @@ -1930,10 +2000,11 @@ pmap_remove_all(vm_page_t m) __func__, (void *)pv->pv_va, (uintmax_t)tpte)); vm_page_dirty(m); } - pmap_invalidate_page(pmap, pv->pv_va); + + if (!pmap_unuse_pt(pmap, pv->pv_va, *pde)) + pmap_invalidate_page(pmap, pv->pv_va); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - pmap_unuse_pt(pmap, pv->pv_va, *pde); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } @@ -2812,7 +2883,12 @@ pmap_remove_pages(pmap_t pmap) TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); - pmap_unuse_pt(pmap, pv->pv_va, *pde); + + /* + * For simplicity, unconditionally call + * pmap_invalidate_all(), below. + */ + (void)pmap_unuse_pt(pmap, pv->pv_va, *pde); } } if (allfree) { |