aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Johnston <markj@FreeBSD.org>2018-09-06 19:28:52 +0000
committerMark Johnston <markj@FreeBSD.org>2018-09-06 19:28:52 +0000
commit23984ce5cd24ce6d4b67bf6ca58b2f81eb6e176d (patch)
tree357c8c461a1eb6473fd6ee4db017cce80b66f064
parent0c0c965a8f1982bc7c366490157cec9a38315418 (diff)
Avoid resource deadlocks when one domain has exhausted its memory. Attempt
other allowed domains if the requested domain is below the minimum paging threshold. Block in fork only if all domains available to the forking thread are below the severe threshold rather than any. Submitted by: jeff Reported by: mjg Reviewed by: alc, kib, markj Approved by: re (rgrimes) Differential Revision: https://reviews.freebsd.org/D16191
Notes
Notes: svn path=/head/; revision=338507
-rw-r--r--sys/sys/vmmeter.h7
-rw-r--r--sys/vm/vm_domainset.c26
-rw-r--r--sys/vm/vm_domainset.h7
-rw-r--r--sys/vm/vm_fault.c9
-rw-r--r--sys/vm/vm_glue.c8
-rw-r--r--sys/vm/vm_page.c26
-rw-r--r--sys/vm/vm_pageout.h2
7 files changed, 65 insertions, 20 deletions
diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h
index 3c570b0a6b7f..c41b151fa502 100644
--- a/sys/sys/vmmeter.h
+++ b/sys/sys/vmmeter.h
@@ -187,6 +187,13 @@ vm_page_count_severe(void)
return (!DOMAINSET_EMPTY(&vm_severe_domains));
}
+static inline int
+vm_page_count_severe_set(domainset_t *mask)
+{
+
+ return (DOMAINSET_SUBSET(&vm_severe_domains, mask));
+}
+
/*
* Return TRUE if we are under our minimum low-free-pages threshold.
*
diff --git a/sys/vm/vm_domainset.c b/sys/vm/vm_domainset.c
index eae083adade6..93f7c2596de2 100644
--- a/sys/vm/vm_domainset.c
+++ b/sys/vm/vm_domainset.c
@@ -100,6 +100,8 @@ vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj,
pindex += (((uintptr_t)obj) / sizeof(*obj));
di->di_offset = pindex;
}
+ /* Skip zones below min on the first pass. */
+ di->di_minskip = true;
}
static void
@@ -213,6 +215,8 @@ vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
*req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) |
VM_ALLOC_NOWAIT;
vm_domainset_iter_first(di, domain);
+ if (DOMAINSET_ISSET(*domain, &vm_min_domains))
+ vm_domainset_iter_page(di, domain, req);
}
int
@@ -227,8 +231,15 @@ vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req)
return (ENOMEM);
/* If there are more domains to visit we run the iterator. */
- if (--di->di_n != 0) {
+ while (--di->di_n != 0) {
vm_domainset_iter_next(di, domain);
+ if (!di->di_minskip ||
+ !DOMAINSET_ISSET(*domain, &vm_min_domains))
+ return (0);
+ }
+ if (di->di_minskip) {
+ di->di_minskip = false;
+ vm_domainset_iter_first(di, domain);
return (0);
}
@@ -258,6 +269,8 @@ vm_domainset_iter_malloc_init(struct vm_domainset_iter *di,
di->di_flags = *flags;
*flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT;
vm_domainset_iter_first(di, domain);
+ if (DOMAINSET_ISSET(*domain, &vm_min_domains))
+ vm_domainset_iter_malloc(di, domain, flags);
}
int
@@ -265,8 +278,17 @@ vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags)
{
/* If there are more domains to visit we run the iterator. */
- if (--di->di_n != 0) {
+ while (--di->di_n != 0) {
vm_domainset_iter_next(di, domain);
+ if (!di->di_minskip ||
+ !DOMAINSET_ISSET(*domain, &vm_min_domains))
+ return (0);
+ }
+
+ /* If we skipped zones below min start the search from the beginning. */
+ if (di->di_minskip) {
+ di->di_minskip = false;
+ vm_domainset_iter_first(di, domain);
return (0);
}
diff --git a/sys/vm/vm_domainset.h b/sys/vm/vm_domainset.h
index 542fe47da677..10da5caa0ea7 100644
--- a/sys/vm/vm_domainset.h
+++ b/sys/vm/vm_domainset.h
@@ -34,9 +34,10 @@ struct vm_domainset_iter {
struct domainset *di_domain;
int *di_iter;
vm_pindex_t di_offset;
- int di_policy;
int di_flags;
- int di_n;
+ uint16_t di_policy;
+ domainid_t di_n;
+ bool di_minskip;
};
int vm_domainset_iter_page(struct vm_domainset_iter *, int *, int *);
@@ -46,4 +47,6 @@ int vm_domainset_iter_malloc(struct vm_domainset_iter *, int *, int *);
void vm_domainset_iter_malloc_init(struct vm_domainset_iter *,
struct vm_object *, int *, int *);
+void vm_wait_doms(const domainset_t *);
+
#endif /* __VM_DOMAINSET_H__ */
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index e35c31b5427f..d5a6b57f47e3 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -548,6 +548,7 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
{
struct faultstate fs;
struct vnode *vp;
+ struct domainset *dset;
vm_object_t next_object, retry_object;
vm_offset_t e_end, e_start;
vm_pindex_t retry_pindex;
@@ -791,7 +792,11 @@ RetryFault:;
* there, and allocation can fail, causing
* restart and new reading of the p_flag.
*/
- if (!vm_page_count_severe() || P_KILLED(curproc)) {
+ dset = fs.object->domain.dr_policy;
+ if (dset == NULL)
+ dset = curthread->td_domain.dr_policy;
+ if (!vm_page_count_severe_set(&dset->ds_mask) ||
+ P_KILLED(curproc)) {
#if VM_NRESERVLEVEL > 0
vm_object_color(fs.object, atop(vaddr) -
fs.pindex);
@@ -806,7 +811,7 @@ RetryFault:;
}
if (fs.m == NULL) {
unlock_and_deallocate(&fs);
- vm_waitpfault();
+ vm_waitpfault(dset);
goto RetryFault;
}
}
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 7952c81a1afe..832dbce324ef 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -92,6 +92,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
+#include <vm/vm_domainset.h>
#include <vm/vm_map.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
@@ -534,6 +535,7 @@ vm_forkproc(struct thread *td, struct proc *p2, struct thread *td2,
struct vmspace *vm2, int flags)
{
struct proc *p1 = td->td_proc;
+ struct domainset *dset;
int error;
if ((flags & RFPROC) == 0) {
@@ -557,9 +559,9 @@ vm_forkproc(struct thread *td, struct proc *p2, struct thread *td2,
p2->p_vmspace = p1->p_vmspace;
atomic_add_int(&p1->p_vmspace->vm_refcnt, 1);
}
-
- while (vm_page_count_severe()) {
- vm_wait_severe();
+ dset = td2->td_domain.dr_policy;
+ while (vm_page_count_severe_set(&dset->ds_mask)) {
+ vm_wait_doms(&dset->ds_mask);
}
if ((flags & RFMEM) == 0) {
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 539e6effd69a..850b0638841e 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -2935,7 +2935,7 @@ vm_wait_count(void)
return (vm_severe_waiters + vm_min_waiters + vm_pageproc_waiters);
}
-static void
+void
vm_wait_doms(const domainset_t *wdoms)
{
@@ -2961,10 +2961,10 @@ vm_wait_doms(const domainset_t *wdoms)
mtx_lock(&vm_domainset_lock);
if (DOMAINSET_SUBSET(&vm_min_domains, wdoms)) {
vm_min_waiters++;
- msleep(&vm_min_domains, &vm_domainset_lock, PVM,
- "vmwait", 0);
- }
- mtx_unlock(&vm_domainset_lock);
+ msleep(&vm_min_domains, &vm_domainset_lock,
+ PVM | PDROP, "vmwait", 0);
+ } else
+ mtx_unlock(&vm_domainset_lock);
}
}
@@ -3069,15 +3069,21 @@ vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req)
* this balance without careful testing first.
*/
void
-vm_waitpfault(void)
+vm_waitpfault(struct domainset *dset)
{
+ /*
+ * XXX Ideally we would wait only until the allocation could
+ * be satisfied. This condition can cause new allocators to
+ * consume all freed pages while old allocators wait.
+ */
mtx_lock(&vm_domainset_lock);
- if (vm_page_count_min()) {
+ if (DOMAINSET_SUBSET(&vm_min_domains, &dset->ds_mask)) {
vm_min_waiters++;
- msleep(&vm_min_domains, &vm_domainset_lock, PUSER, "pfault", 0);
- }
- mtx_unlock(&vm_domainset_lock);
+ msleep(&vm_min_domains, &vm_domainset_lock, PUSER | PDROP,
+ "pfault", 0);
+ } else
+ mtx_unlock(&vm_domainset_lock);
}
struct vm_pagequeue *
diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h
index 935088f1f301..3b238a0ba3fb 100644
--- a/sys/vm/vm_pageout.h
+++ b/sys/vm/vm_pageout.h
@@ -96,7 +96,7 @@ extern int vm_pageout_page_count;
*/
void vm_wait(vm_object_t obj);
-void vm_waitpfault(void);
+void vm_waitpfault(struct domainset *);
void vm_wait_domain(int domain);
void vm_wait_min(void);
void vm_wait_severe(void);