aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKonstantin Belousov <kib@FreeBSD.org>2019-08-16 09:43:49 +0000
committerKonstantin Belousov <kib@FreeBSD.org>2019-08-16 09:43:49 +0000
commit245139c69d15c639b9e518349786ae46c3188f01 (patch)
treeb47b5b3af8d16c265572fc651d84cb55bd36b647
parent3a79e8e7721fbc40b8c63cf4473d92793be73a1a (diff)
downloadsrc-245139c69d15c639b9e518349786ae46c3188f01.tar.gz
src-245139c69d15c639b9e518349786ae46c3188f01.zip
Fix OOM handling of some corner cases.
In addition to pagedaemon initiating OOM, also do it from the vm_fault() internals. Namely, if the thread waits for a free page to satisfy page fault some preconfigured amount of time, trigger OOM. These triggers are rate-limited, due to a usual case of several threads of the same multi-threaded process to enter fault handler simultaneously. The faults from pagedaemon threads participate in the calculation of OOM rate, but are not under the limit. Reviewed by: markj (previous version) Tested by: pho Discussed with: alc Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Differential revision: https://reviews.freebsd.org/D13671
Notes
Notes: svn path=/head/; revision=351114
-rw-r--r--sys/vm/vm_fault.c31
-rw-r--r--sys/vm/vm_page.c4
-rw-r--r--sys/vm/vm_pageout.c31
-rw-r--r--sys/vm/vm_pageout.h5
4 files changed, 63 insertions, 8 deletions
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index a93e67cef5d6..4efa5486a28c 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -135,6 +135,18 @@ static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr,
static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra,
int backward, int forward, bool obj_locked);
+static int vm_pfault_oom_attempts = 3;
+SYSCTL_INT(_vm, OID_AUTO, pfault_oom_attempts, CTLFLAG_RWTUN,
+ &vm_pfault_oom_attempts, 0,
+ "Number of page allocation attempts in page fault handler before it "
+ "triggers OOM handling");
+
+static int vm_pfault_oom_wait = 10;
+SYSCTL_INT(_vm, OID_AUTO, pfault_oom_wait, CTLFLAG_RWTUN,
+ &vm_pfault_oom_wait, 0,
+ "Number of seconds to wait for free pages before retrying "
+ "the page fault handler");
+
static inline void
release_page(struct faultstate *fs)
{
@@ -570,7 +582,7 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
vm_pindex_t retry_pindex;
vm_prot_t prot, retry_prot;
int ahead, alloc_req, behind, cluster_offset, error, era, faultcount;
- int locked, nera, result, rv;
+ int locked, nera, oom, result, rv;
u_char behavior;
boolean_t wired; /* Passed by reference. */
bool dead, hardfault, is_first_object_locked;
@@ -581,7 +593,9 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
nera = -1;
hardfault = false;
-RetryFault:;
+RetryFault:
+ oom = 0;
+RetryFault_oom:
/*
* Find the backing store object and offset into it to begin the
@@ -827,7 +841,18 @@ RetryFault:;
}
if (fs.m == NULL) {
unlock_and_deallocate(&fs);
- vm_waitpfault(dset);
+ if (vm_pfault_oom_attempts < 0 ||
+ oom < vm_pfault_oom_attempts) {
+ oom++;
+ vm_waitpfault(dset,
+ vm_pfault_oom_wait * hz);
+ goto RetryFault_oom;
+ }
+ if (bootverbose)
+ printf(
+ "proc %d (%s) failed to alloc page on fault, starting OOM\n",
+ curproc->p_pid, curproc->p_comm);
+ vm_pageout_oom(VM_OOM_MEM_PF);
goto RetryFault;
}
}
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 26398a7a7408..e8797a929055 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -3032,7 +3032,7 @@ vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req)
* this balance without careful testing first.
*/
void
-vm_waitpfault(struct domainset *dset)
+vm_waitpfault(struct domainset *dset, int timo)
{
/*
@@ -3044,7 +3044,7 @@ vm_waitpfault(struct domainset *dset)
if (vm_page_count_min_set(&dset->ds_mask)) {
vm_min_waiters++;
msleep(&vm_min_domains, &vm_domainset_lock, PUSER | PDROP,
- "pfault", 0);
+ "pfault", timo);
} else
mtx_unlock(&vm_domainset_lock);
}
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index dbd34e1ad1a8..dfe40e9c724d 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -1720,6 +1720,12 @@ vm_pageout_oom_pagecount(struct vmspace *vmspace)
return (res);
}
+static int vm_oom_ratelim_last;
+static int vm_oom_pf_secs = 10;
+SYSCTL_INT(_vm, OID_AUTO, oom_pf_secs, CTLFLAG_RWTUN, &vm_oom_pf_secs, 0,
+ "");
+static struct mtx vm_oom_ratelim_mtx;
+
void
vm_pageout_oom(int shortage)
{
@@ -1727,9 +1733,31 @@ vm_pageout_oom(int shortage)
vm_offset_t size, bigsize;
struct thread *td;
struct vmspace *vm;
+ int now;
bool breakout;
/*
+ * For OOM requests originating from vm_fault(), there is a high
+ * chance that a single large process faults simultaneously in
+ * several threads. Also, on an active system running many
+ * processes of middle-size, like buildworld, all of them
+ * could fault almost simultaneously as well.
+ *
+ * To avoid killing too many processes, rate-limit OOMs
+ * initiated by vm_fault() time-outs on the waits for free
+ * pages.
+ */
+ mtx_lock(&vm_oom_ratelim_mtx);
+ now = ticks;
+ if (shortage == VM_OOM_MEM_PF &&
+ (u_int)(now - vm_oom_ratelim_last) < hz * vm_oom_pf_secs) {
+ mtx_unlock(&vm_oom_ratelim_mtx);
+ return;
+ }
+ vm_oom_ratelim_last = now;
+ mtx_unlock(&vm_oom_ratelim_mtx);
+
+ /*
* We keep the process bigproc locked once we find it to keep anyone
* from messing with it; however, there is a possibility of
* deadlock if process B is bigproc and one of its child processes
@@ -1793,7 +1821,7 @@ vm_pageout_oom(int shortage)
continue;
}
size = vmspace_swap_count(vm);
- if (shortage == VM_OOM_MEM)
+ if (shortage == VM_OOM_MEM || shortage == VM_OOM_MEM_PF)
size += vm_pageout_oom_pagecount(vm);
vm_map_unlock_read(&vm->vm_map);
vmspace_free(vm);
@@ -2048,6 +2076,7 @@ vm_pageout(void)
p = curproc;
td = curthread;
+ mtx_init(&vm_oom_ratelim_mtx, "vmoomr", NULL, MTX_DEF);
swap_pager_swap_init();
for (first = -1, i = 0; i < vm_ndomains; i++) {
if (VM_DOMAIN_EMPTY(i)) {
diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h
index 57402801f580..ab7d6a945fa1 100644
--- a/sys/vm/vm_pageout.h
+++ b/sys/vm/vm_pageout.h
@@ -79,7 +79,8 @@ extern u_long vm_page_max_user_wired;
extern int vm_pageout_page_count;
#define VM_OOM_MEM 1
-#define VM_OOM_SWAPZ 2
+#define VM_OOM_MEM_PF 2
+#define VM_OOM_SWAPZ 3
/*
* vm_lowmem flags.
@@ -96,7 +97,7 @@ extern int vm_pageout_page_count;
*/
void vm_wait(vm_object_t obj);
-void vm_waitpfault(struct domainset *);
+void vm_waitpfault(struct domainset *, int timo);
void vm_wait_domain(int domain);
void vm_wait_min(void);
void vm_wait_severe(void);