aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Johnston <markj@FreeBSD.org>2021-06-06 20:41:35 +0000
committerMark Johnston <markj@FreeBSD.org>2021-06-14 20:25:15 +0000
commit25b73f21441893423d00b6631ad22c7d0ad4b0d2 (patch)
treeb35e1234b7f27fa485e57010af927435e857c757
parent5346c8bc543a09932b168f459e8a7a601af4bfdc (diff)
downloadsrc-25b73f21441893423d00b6631ad22c7d0ad4b0d2.tar.gz
src-25b73f21441893423d00b6631ad22c7d0ad4b0d2.zip
riscv: Handle hardware-managed dirty bit updates in pmap_promote_l2()
pmap_promote_l2() failed to handle implementations which set the accessed and dirty flags. In particular, when comparing the attributes of a run of 512 PTEs, we must handle the possibility that the hardware will set PTE_D on a clean, writable mapping. Following the example of amd64 and arm64, change riscv's pmap_promote_l2() to downgrade clean, writable mappings to read-only, so that updates are synchronized by the pmap lock. Fixes: f6893f09d Reported by: Nathaniel Filardo <nwf20@cl.cam.ac.uk> Tested by: Nathaniel Filardo <nwf20@cl.cam.ac.uk> Reviewed by: jrtc27, alc, Nathaniel Filardo Sponsored by: The FreeBSD Foundation (cherry picked from commit c05748e028b84c216d0161e70418f8cb09e074e4)
-rw-r--r--sys/riscv/riscv/pmap.c41
1 files changed, 32 insertions, 9 deletions
diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index f30dda17afae..0f2834febd41 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -2540,7 +2540,7 @@ static void
pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
struct rwlock **lockp)
{
- pt_entry_t *firstl3, *l3;
+ pt_entry_t *firstl3, firstl3e, *l3, l3e;
vm_paddr_t pa;
vm_page_t ml3;
@@ -2551,7 +2551,8 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
("pmap_promote_l2: invalid l2 entry %p", l2));
firstl3 = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l2)));
- pa = PTE_TO_PHYS(pmap_load(firstl3));
+ firstl3e = pmap_load(firstl3);
+ pa = PTE_TO_PHYS(firstl3e);
if ((pa & L2_OFFSET) != 0) {
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p",
va, pmap);
@@ -2559,17 +2560,40 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
return;
}
+ /*
+ * Downgrade a clean, writable mapping to read-only to ensure that the
+ * hardware does not set PTE_D while we are comparing PTEs.
+ *
+ * Upon a write access to a clean mapping, the implementation will
+ * either atomically check protections and set PTE_D, or raise a page
+ * fault. In the latter case, the pmap lock provides atomicity. Thus,
+ * we do not issue an sfence.vma here and instead rely on pmap_fault()
+ * to do so lazily.
+ */
+ while ((firstl3e & (PTE_W | PTE_D)) == PTE_W) {
+ if (atomic_fcmpset_64(firstl3, &firstl3e, firstl3e & ~PTE_W)) {
+ firstl3e &= ~PTE_W;
+ break;
+ }
+ }
+
pa += PAGE_SIZE;
for (l3 = firstl3 + 1; l3 < firstl3 + Ln_ENTRIES; l3++) {
- if (PTE_TO_PHYS(pmap_load(l3)) != pa) {
+ l3e = pmap_load(l3);
+ if (PTE_TO_PHYS(l3e) != pa) {
CTR2(KTR_PMAP,
"pmap_promote_l2: failure for va %#lx pmap %p",
va, pmap);
atomic_add_long(&pmap_l2_p_failures, 1);
return;
}
- if ((pmap_load(l3) & PTE_PROMOTE) !=
- (pmap_load(firstl3) & PTE_PROMOTE)) {
+ while ((l3e & (PTE_W | PTE_D)) == PTE_W) {
+ if (atomic_fcmpset_64(l3, &l3e, l3e & ~PTE_W)) {
+ l3e &= ~PTE_W;
+ break;
+ }
+ }
+ if ((l3e & PTE_PROMOTE) != (firstl3e & PTE_PROMOTE)) {
CTR2(KTR_PMAP,
"pmap_promote_l2: failure for va %#lx pmap %p",
va, pmap);
@@ -2589,11 +2613,10 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
return;
}
- if ((pmap_load(firstl3) & PTE_SW_MANAGED) != 0)
- pmap_pv_promote_l2(pmap, va, PTE_TO_PHYS(pmap_load(firstl3)),
- lockp);
+ if ((firstl3e & PTE_SW_MANAGED) != 0)
+ pmap_pv_promote_l2(pmap, va, PTE_TO_PHYS(firstl3e), lockp);
- pmap_store(l2, pmap_load(firstl3));
+ pmap_store(l2, firstl3e);
atomic_add_long(&pmap_l2_promotions, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va,