aboutsummaryrefslogtreecommitdiff
path: root/sys/vm
diff options
context:
space:
mode:
authorJeff Roberson <jeff@FreeBSD.org>2018-03-22 19:21:11 +0000
committerJeff Roberson <jeff@FreeBSD.org>2018-03-22 19:21:11 +0000
commit5c930c894dad3d587fe7ff3c9abc496b5379d536 (patch)
tree1983f1c520172dec42bf2d0103e3c860caeb4fea /sys/vm
parent9a4b4cd3bcd466d22941bd4bd8f17fba46ea4dee (diff)
downloadsrc-5c930c894dad3d587fe7ff3c9abc496b5379d536.tar.gz
src-5c930c894dad3d587fe7ff3c9abc496b5379d536.zip
Lock reservations with a dedicated lock in each reservation. Protect the
vmd_free_count with atomics. This allows us to allocate and free from reservations without the free lock except where a superpage is allocated from the physical layer, which is roughly 1/512 of the operations on amd64. Use the counter api to eliminate cache conention on counters. Reviewed by: markj Tested by: pho Sponsored by: Netflix, Dell/EMC Isilon Differential Revision: https://reviews.freebsd.org/D14707
Notes
Notes: svn path=/head/; revision=331369
Diffstat (limited to 'sys/vm')
-rw-r--r--sys/vm/vm_page.c167
-rw-r--r--sys/vm/vm_pagequeue.h19
-rw-r--r--sys/vm/vm_reserv.c340
-rw-r--r--sys/vm/vm_reserv.h4
4 files changed, 318 insertions, 212 deletions
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 8948ab68e87b..56af64a8ead9 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -177,7 +177,6 @@ static uma_zone_t fakepg_zone;
static void vm_page_alloc_check(vm_page_t m);
static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
static void vm_page_enqueue(uint8_t queue, vm_page_t m);
-static void vm_page_free_phys(struct vm_domain *vmd, vm_page_t m);
static void vm_page_init(void *dummy);
static int vm_page_insert_after(vm_page_t m, vm_object_t object,
vm_pindex_t pindex, vm_page_t mpred);
@@ -1677,10 +1676,10 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex,
* for the request class and false otherwise.
*/
int
-vm_domain_available(struct vm_domain *vmd, int req, int npages)
+vm_domain_allocate(struct vm_domain *vmd, int req, int npages)
{
+ u_int limit, old, new;
- vm_domain_free_assert_locked(vmd);
req = req & VM_ALLOC_CLASS_MASK;
/*
@@ -1688,15 +1687,34 @@ vm_domain_available(struct vm_domain *vmd, int req, int npages)
*/
if (curproc == pageproc && req != VM_ALLOC_INTERRUPT)
req = VM_ALLOC_SYSTEM;
+ if (req == VM_ALLOC_INTERRUPT)
+ limit = 0;
+ else if (req == VM_ALLOC_SYSTEM)
+ limit = vmd->vmd_interrupt_free_min;
+ else
+ limit = vmd->vmd_free_reserved;
- if (vmd->vmd_free_count >= npages + vmd->vmd_free_reserved ||
- (req == VM_ALLOC_SYSTEM &&
- vmd->vmd_free_count >= npages + vmd->vmd_interrupt_free_min) ||
- (req == VM_ALLOC_INTERRUPT &&
- vmd->vmd_free_count >= npages))
- return (1);
+ /*
+ * Attempt to reserve the pages. Fail if we're below the limit.
+ */
+ limit += npages;
+ old = vmd->vmd_free_count;
+ do {
+ if (old < limit)
+ return (0);
+ new = old - npages;
+ } while (atomic_fcmpset_int(&vmd->vmd_free_count, &old, new) == 0);
- return (0);
+ /* Wake the page daemon if we've crossed the threshold. */
+ if (vm_paging_needed(vmd, new) && !vm_paging_needed(vmd, old))
+ pagedaemon_wakeup(vmd->vmd_domain);
+
+ /* Only update bitsets on transitions. */
+ if ((old >= vmd->vmd_free_min && new < vmd->vmd_free_min) ||
+ (old >= vmd->vmd_free_severe && new < vmd->vmd_free_severe))
+ vm_domain_set(vmd);
+
+ return (1);
}
vm_page_t
@@ -1723,44 +1741,34 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
again:
m = NULL;
#if VM_NRESERVLEVEL > 0
+ /*
+ * Can we allocate the page from a reservation?
+ */
if (vm_object_reserv(object) &&
- (m = vm_reserv_extend(req, object, pindex, domain, mpred))
- != NULL) {
+ ((m = vm_reserv_extend(req, object, pindex, domain, mpred)) != NULL ||
+ (m = vm_reserv_alloc_page(req, object, pindex, domain, mpred)) != NULL)) {
domain = vm_phys_domain(m);
vmd = VM_DOMAIN(domain);
goto found;
}
#endif
vmd = VM_DOMAIN(domain);
- vm_domain_free_lock(vmd);
- if (vm_domain_available(vmd, req, 1)) {
+ if (vm_domain_allocate(vmd, req, 1)) {
/*
- * Can we allocate the page from a reservation?
+ * If not, allocate it from the free page queues.
*/
+ vm_domain_free_lock(vmd);
+ m = vm_phys_alloc_pages(domain, object != NULL ?
+ VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
+ vm_domain_free_unlock(vmd);
+ if (m == NULL) {
+ vm_domain_freecnt_inc(vmd, 1);
#if VM_NRESERVLEVEL > 0
- if (!vm_object_reserv(object) ||
- (m = vm_reserv_alloc_page(object, pindex,
- domain, mpred)) == NULL)
-#endif
- {
- /*
- * If not, allocate it from the free page queues.
- */
- m = vm_phys_alloc_pages(domain, object != NULL ?
- VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
-#if VM_NRESERVLEVEL > 0
- if (m == NULL && vm_reserv_reclaim_inactive(domain)) {
- m = vm_phys_alloc_pages(domain,
- object != NULL ?
- VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT,
- 0);
- }
+ if (vm_reserv_reclaim_inactive(domain))
+ goto again;
#endif
}
}
- if (m != NULL)
- vm_domain_freecnt_dec(vmd, 1);
- vm_domain_free_unlock(vmd);
if (m == NULL) {
/*
* Not allocatable, give up.
@@ -1775,9 +1783,7 @@ again:
*/
KASSERT(m != NULL, ("missing page"));
-#if VM_NRESERVLEVEL > 0
found:
-#endif
vm_page_alloc_check(m);
/*
@@ -1934,9 +1940,14 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
*/
again:
#if VM_NRESERVLEVEL > 0
+ /*
+ * Can we allocate the pages from a reservation?
+ */
if (vm_object_reserv(object) &&
- (m_ret = vm_reserv_extend_contig(req, object, pindex, domain,
- npages, low, high, alignment, boundary, mpred)) != NULL) {
+ ((m_ret = vm_reserv_extend_contig(req, object, pindex, domain,
+ npages, low, high, alignment, boundary, mpred)) != NULL ||
+ (m_ret = vm_reserv_alloc_contig(req, object, pindex, domain,
+ npages, low, high, alignment, boundary, mpred)) != NULL)) {
domain = vm_phys_domain(m_ret);
vmd = VM_DOMAIN(domain);
goto found;
@@ -1944,31 +1955,23 @@ again:
#endif
m_ret = NULL;
vmd = VM_DOMAIN(domain);
- vm_domain_free_lock(vmd);
- if (vm_domain_available(vmd, req, npages)) {
+ if (vm_domain_allocate(vmd, req, npages)) {
/*
- * Can we allocate the pages from a reservation?
+ * allocate them from the free page queues.
*/
+ vm_domain_free_lock(vmd);
+ m_ret = vm_phys_alloc_contig(domain, npages, low, high,
+ alignment, boundary);
+ vm_domain_free_unlock(vmd);
+ if (m_ret == NULL) {
+ vm_domain_freecnt_inc(vmd, npages);
#if VM_NRESERVLEVEL > 0
-retry:
- if (!vm_object_reserv(object) ||
- (m_ret = vm_reserv_alloc_contig(object, pindex, domain,
- npages, low, high, alignment, boundary, mpred)) == NULL)
-#endif
- /*
- * If not, allocate them from the free page queues.
- */
- m_ret = vm_phys_alloc_contig(domain, npages, low, high,
- alignment, boundary);
-#if VM_NRESERVLEVEL > 0
- if (m_ret == NULL && vm_reserv_reclaim_contig(
- domain, npages, low, high, alignment, boundary))
- goto retry;
+ if (vm_reserv_reclaim_contig(domain, npages, low,
+ high, alignment, boundary))
+ goto again;
#endif
+ }
}
- if (m_ret != NULL)
- vm_domain_freecnt_dec(vmd, npages);
- vm_domain_free_unlock(vmd);
if (m_ret == NULL) {
if (vm_domain_alloc_fail(vmd, object, req))
goto again;
@@ -2109,13 +2112,14 @@ vm_page_alloc_freelist_domain(int domain, int freelist, int req)
*/
vmd = VM_DOMAIN(domain);
again:
- vm_domain_free_lock(vmd);
- if (vm_domain_available(vmd, req, 1))
+ if (vm_domain_allocate(vmd, req, 1)) {
+ vm_domain_free_lock(vmd);
m = vm_phys_alloc_freelist_pages(domain, freelist,
VM_FREEPOOL_DIRECT, 0);
- if (m != NULL)
- vm_domain_freecnt_dec(vmd, 1);
- vm_domain_free_unlock(vmd);
+ vm_domain_free_unlock(vmd);
+ if (m == NULL)
+ vm_domain_freecnt_inc(vmd, 1);
+ }
if (m == NULL) {
if (vm_domain_alloc_fail(vmd, NULL, req))
goto again;
@@ -2491,8 +2495,9 @@ retry:
vm_page_remque(m);
vm_page_replace_checked(m_new, object,
m->pindex, m);
- m->valid = 0;
- vm_page_undirty(m);
+ if (vm_page_free_prep(m, false))
+ SLIST_INSERT_HEAD(&free, m,
+ plinks.s.ss);
/*
* The new page must be deactivated
@@ -2504,10 +2509,12 @@ retry:
m->flags &= ~PG_ZERO;
vm_page_remque(m);
vm_page_remove(m);
+ if (vm_page_free_prep(m, false))
+ SLIST_INSERT_HEAD(&free, m,
+ plinks.s.ss);
KASSERT(m->dirty == 0,
("page %p is dirty", m));
}
- SLIST_INSERT_HEAD(&free, m, plinks.s.ss);
} else
error = EBUSY;
unlock:
@@ -2548,7 +2555,7 @@ unlock:
do {
MPASS(vm_phys_domain(m) == domain);
SLIST_REMOVE_HEAD(&free, plinks.s.ss);
- vm_page_free_phys(vmd, m);
+ vm_phys_free_pages(m, 0);
cnt++;
} while ((m = SLIST_FIRST(&free)) != NULL);
vm_domain_free_unlock(vmd);
@@ -3159,24 +3166,12 @@ vm_page_free_prep(vm_page_t m, bool pagequeue_locked)
if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT)
pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
- return (true);
-}
-
-/*
- * Insert the page into the physical memory allocator's free page
- * queues. This is the last step to free a page. The caller is
- * responsible for adjusting the free page count.
- */
-static void
-vm_page_free_phys(struct vm_domain *vmd, vm_page_t m)
-{
-
- vm_domain_free_assert_locked(vmd);
-
#if VM_NRESERVLEVEL > 0
- if (!vm_reserv_free_page(m))
+ if (vm_reserv_free_page(m))
+ return (false);
#endif
- vm_phys_free_pages(m, 0);
+
+ return (true);
}
void
@@ -3200,7 +3195,7 @@ vm_page_free_phys_pglist(struct pglist *tq)
vmd = vm_pagequeue_domain(m);
vm_domain_free_lock(vmd);
}
- vm_page_free_phys(vmd, m);
+ vm_phys_free_pages(m, 0);
cnt++;
}
if (vmd != NULL) {
@@ -3227,7 +3222,7 @@ vm_page_free_toq(vm_page_t m)
return;
vmd = vm_pagequeue_domain(m);
vm_domain_free_lock(vmd);
- vm_page_free_phys(vmd, m);
+ vm_phys_free_pages(m, 0);
vm_domain_free_unlock(vmd);
vm_domain_freecnt_inc(vmd, 1);
}
diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h
index b13657072717..894b4b57d31b 100644
--- a/sys/vm/vm_pagequeue.h
+++ b/sys/vm/vm_pagequeue.h
@@ -180,7 +180,7 @@ vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend)
void vm_domain_set(struct vm_domain *vmd);
void vm_domain_clear(struct vm_domain *vmd);
-int vm_domain_available(struct vm_domain *vmd, int req, int npages);
+int vm_domain_allocate(struct vm_domain *vmd, int req, int npages);
/*
* vm_pagequeue_domain:
@@ -266,22 +266,5 @@ vm_domain_freecnt_inc(struct vm_domain *vmd, int adj)
vm_domain_clear(vmd);
}
-static inline void
-vm_domain_freecnt_dec(struct vm_domain *vmd, int adj)
-{
- u_int old, new;
-
- old = atomic_fetchadd_int(&vmd->vmd_free_count, -adj);
- new = old - adj;
- KASSERT(new >= 0, ("vm_domain_freecnt_dec: free count underflow"));
- if (vm_paging_needed(vmd, new) && !vm_paging_needed(vmd, old))
- pagedaemon_wakeup(vmd->vmd_domain);
- /* Only update bitsets on transitions. */
- if ((old >= vmd->vmd_free_min && new < vmd->vmd_free_min) ||
- (old >= vmd->vmd_free_severe && new < vmd->vmd_free_severe))
- vm_domain_set(vmd);
-}
-
-
#endif /* _KERNEL */
#endif /* !_VM_PAGEQUEUE_ */
diff --git a/sys/vm/vm_reserv.c b/sys/vm/vm_reserv.c
index 5241f326fc1b..56d51588aa4c 100644
--- a/sys/vm/vm_reserv.c
+++ b/sys/vm/vm_reserv.c
@@ -45,6 +45,8 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
+#include <sys/counter.h>
+#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
@@ -54,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/vmmeter.h>
+#include <sys/smp.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -166,22 +169,37 @@ popmap_is_set(popmap_t popmap[], int i)
*
* A partially populated reservation can be broken and reclaimed at any time.
*
- * f - vm_domain_free_lock
+ * r - vm_reserv_lock
+ * d - vm_reserv_domain_lock
* o - vm_reserv_object_lock
* c - constant after boot
*/
struct vm_reserv {
- TAILQ_ENTRY(vm_reserv) partpopq; /* (f) per-domain queue. */
- LIST_ENTRY(vm_reserv) objq; /* (o, f) object queue */
- vm_object_t object; /* (o, f) containing object */
- vm_pindex_t pindex; /* (o, f) offset in object */
+ struct mtx lock; /* reservation lock. */
+ TAILQ_ENTRY(vm_reserv) partpopq; /* (d) per-domain queue. */
+ LIST_ENTRY(vm_reserv) objq; /* (o, r) object queue */
+ vm_object_t object; /* (o, r) containing object */
+ vm_pindex_t pindex; /* (o, r) offset in object */
vm_page_t pages; /* (c) first page */
- int domain; /* (c) NUMA domain. */
- int popcnt; /* (f) # of pages in use */
- char inpartpopq; /* (f) */
- popmap_t popmap[NPOPMAP]; /* (f) bit vector, used pages */
+ uint16_t domain; /* (c) NUMA domain. */
+ uint16_t popcnt; /* (r) # of pages in use */
+ char inpartpopq; /* (d) */
+ popmap_t popmap[NPOPMAP]; /* (r) bit vector, used pages */
};
+#define vm_reserv_lockptr(rv) (&(rv)->lock)
+#define vm_reserv_assert_locked(rv) \
+ mtx_assert(vm_reserv_lockptr(rv), MA_OWNED)
+#define vm_reserv_lock(rv) mtx_lock(vm_reserv_lockptr(rv))
+#define vm_reserv_trylock(rv) mtx_trylock(vm_reserv_lockptr(rv))
+#define vm_reserv_unlock(rv) mtx_unlock(vm_reserv_lockptr(rv))
+
+static struct mtx_padalign vm_reserv_domain_locks[MAXMEMDOM];
+
+#define vm_reserv_domain_lockptr(d) &vm_reserv_domain_locks[(d)]
+#define vm_reserv_domain_lock(d) mtx_lock(vm_reserv_domain_lockptr(d))
+#define vm_reserv_domain_unlock(d) mtx_unlock(vm_reserv_domain_lockptr(d))
+
/*
* The reservation array
*
@@ -218,13 +236,13 @@ static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop[MAXMEMDOM];
static SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD, 0, "Reservation Info");
-static long vm_reserv_broken;
-SYSCTL_LONG(_vm_reserv, OID_AUTO, broken, CTLFLAG_RD,
- &vm_reserv_broken, 0, "Cumulative number of broken reservations");
+static counter_u64_t vm_reserv_broken = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_vm_reserv, OID_AUTO, broken, CTLFLAG_RD,
+ &vm_reserv_broken, "Cumulative number of broken reservations");
-static long vm_reserv_freed;
-SYSCTL_LONG(_vm_reserv, OID_AUTO, freed, CTLFLAG_RD,
- &vm_reserv_freed, 0, "Cumulative number of freed reservations");
+static counter_u64_t vm_reserv_freed = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_vm_reserv, OID_AUTO, freed, CTLFLAG_RD,
+ &vm_reserv_freed, "Cumulative number of freed reservations");
static int sysctl_vm_reserv_fullpop(SYSCTL_HANDLER_ARGS);
@@ -236,9 +254,9 @@ static int sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS);
SYSCTL_OID(_vm_reserv, OID_AUTO, partpopq, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
sysctl_vm_reserv_partpopq, "A", "Partially populated reservation queues");
-static long vm_reserv_reclaimed;
-SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
- &vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations");
+static counter_u64_t vm_reserv_reclaimed = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
+ &vm_reserv_reclaimed, "Cumulative number of reclaimed reservations");
/*
* The object lock pool is used to synchronize the rvq. We can not use a
@@ -313,12 +331,12 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
counter = 0;
unused_pages = 0;
- vm_domain_free_lock(VM_DOMAIN(domain));
+ vm_reserv_domain_lock(domain);
TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) {
counter++;
unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
}
- vm_domain_free_unlock(VM_DOMAIN(domain));
+ vm_reserv_domain_unlock(domain);
sbuf_printf(&sbuf, "%6d, %7d, %6dK, %6d\n",
domain, level,
unused_pages * ((int)PAGE_SIZE / 1024), counter);
@@ -337,6 +355,9 @@ vm_reserv_remove(vm_reserv_t rv)
{
vm_object_t object;
+ vm_reserv_assert_locked(rv);
+ CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
+ __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
KASSERT(rv->object != NULL,
("vm_reserv_remove: reserv %p is free", rv));
KASSERT(!rv->inpartpopq,
@@ -356,6 +377,11 @@ vm_reserv_insert(vm_reserv_t rv, vm_object_t object, vm_pindex_t pindex)
{
int i;
+ vm_reserv_assert_locked(rv);
+ CTR6(KTR_VM,
+ "%s: rv %p(%p) object %p new %p popcnt %d",
+ __FUNCTION__, rv, rv->pages, rv->object, object,
+ rv->popcnt);
KASSERT(rv->object == NULL,
("vm_reserv_insert: reserv %p isn't free", rv));
KASSERT(rv->popcnt == 0,
@@ -377,14 +403,15 @@ vm_reserv_insert(vm_reserv_t rv, vm_object_t object, vm_pindex_t pindex)
* becomes zero, the reservation is destroyed. Additionally, moves the
* reservation to the tail of the partially populated reservation queue if the
* population count is non-zero.
- *
- * The free page queue lock must be held.
*/
static void
vm_reserv_depopulate(vm_reserv_t rv, int index)
{
+ struct vm_domain *vmd;
- vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
+ vm_reserv_assert_locked(rv);
+ CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
+ __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
KASSERT(rv->object != NULL,
("vm_reserv_depopulate: reserv %p is free", rv));
KASSERT(popmap_is_set(rv->popmap, index),
@@ -395,10 +422,7 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
("vm_reserv_depopulate: reserv %p's domain is corrupted %d",
rv, rv->domain));
- if (rv->inpartpopq) {
- TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
- rv->inpartpopq = FALSE;
- } else {
+ if (rv->popcnt == VM_LEVEL_0_NPAGES) {
KASSERT(rv->pages->psind == 1,
("vm_reserv_depopulate: reserv %p is already demoted",
rv));
@@ -406,14 +430,25 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
}
popmap_clear(rv->popmap, index);
rv->popcnt--;
+ vm_reserv_domain_lock(rv->domain);
+ if (rv->inpartpopq) {
+ TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
+ rv->inpartpopq = FALSE;
+ }
+ if (rv->popcnt != 0) {
+ rv->inpartpopq = TRUE;
+ TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
+ }
+ vm_reserv_domain_unlock(rv->domain);
+ vmd = VM_DOMAIN(rv->domain);
if (rv->popcnt == 0) {
vm_reserv_remove(rv);
+ vm_domain_free_lock(vmd);
vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER);
- vm_reserv_freed++;
- } else {
- rv->inpartpopq = TRUE;
- TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
+ vm_domain_free_unlock(vmd);
+ counter_u64_add(vm_reserv_freed, 1);
}
+ vm_domain_freecnt_inc(vmd, 1);
}
/*
@@ -484,7 +519,9 @@ static void
vm_reserv_populate(vm_reserv_t rv, int index)
{
- vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
+ vm_reserv_assert_locked(rv);
+ CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
+ __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
KASSERT(rv->object != NULL,
("vm_reserv_populate: reserv %p is free", rv));
KASSERT(popmap_is_clear(rv->popmap, index),
@@ -497,17 +534,23 @@ vm_reserv_populate(vm_reserv_t rv, int index)
KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
("vm_reserv_populate: reserv %p's domain is corrupted %d",
rv, rv->domain));
+ popmap_set(rv->popmap, index);
+ rv->popcnt++;
+ vm_reserv_domain_lock(rv->domain);
if (rv->inpartpopq) {
TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
rv->inpartpopq = FALSE;
}
- popmap_set(rv->popmap, index);
- rv->popcnt++;
if (rv->popcnt < VM_LEVEL_0_NPAGES) {
rv->inpartpopq = TRUE;
TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
- } else
+ } else {
+ KASSERT(rv->pages->psind == 0,
+ ("vm_reserv_populate: reserv %p is already promoted",
+ rv));
rv->pages->psind = 1;
+ }
+ vm_reserv_domain_unlock(rv->domain);
}
/*
@@ -572,31 +615,29 @@ vm_reserv_extend_contig(int req, vm_object_t object, vm_pindex_t pindex,
return (NULL);
domain = rv->domain;
vmd = VM_DOMAIN(domain);
- vm_domain_free_lock(vmd);
- if (rv->object != object || !vm_domain_available(vmd, req, npages)) {
- m = NULL;
+ vm_reserv_lock(rv);
+ if (rv->object != object)
goto out;
- }
m = &rv->pages[index];
pa = VM_PAGE_TO_PHYS(m);
if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 ||
- ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) {
- m = NULL;
+ ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0)
goto out;
- }
/* Handle vm_page_rename(m, new_object, ...). */
for (i = 0; i < npages; i++) {
- if (popmap_is_set(rv->popmap, index + i)) {
- m = NULL;
+ if (popmap_is_set(rv->popmap, index + i))
goto out;
- }
}
+ if (!vm_domain_allocate(vmd, req, npages))
+ goto out;
for (i = 0; i < npages; i++)
vm_reserv_populate(rv, index + i);
- vm_domain_freecnt_dec(vmd, npages);
-out:
- vm_domain_free_unlock(vmd);
+ vm_reserv_unlock(rv);
return (m);
+
+out:
+ vm_reserv_unlock(rv);
+ return (NULL);
}
/*
@@ -618,10 +659,11 @@ out:
* The object and free page queue must be locked.
*/
vm_page_t
-vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
+vm_reserv_alloc_contig(int req, vm_object_t object, vm_pindex_t pindex, int domain,
u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_page_t mpred)
{
+ struct vm_domain *vmd;
vm_paddr_t pa, size;
vm_page_t m, m_ret, msucc;
vm_pindex_t first, leftcap, rightcap;
@@ -629,7 +671,6 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
u_long allocpages, maxpages, minpages;
int i, index, n;
- vm_domain_free_assert_locked(VM_DOMAIN(domain));
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0"));
@@ -737,9 +778,19 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
* specified index may not be the first page within the first new
* reservation.
*/
- m = vm_phys_alloc_contig(domain, allocpages, low, high, ulmax(alignment,
- VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0);
- if (m == NULL)
+ m = NULL;
+ vmd = VM_DOMAIN(domain);
+ if (vm_domain_allocate(vmd, req, npages)) {
+ vm_domain_free_lock(vmd);
+ m = vm_phys_alloc_contig(domain, allocpages, low, high,
+ ulmax(alignment, VM_LEVEL_0_SIZE),
+ boundary > VM_LEVEL_0_SIZE ? boundary : 0);
+ vm_domain_free_unlock(vmd);
+ if (m == NULL) {
+ vm_domain_freecnt_inc(vmd, npages);
+ return (NULL);
+ }
+ } else
return (NULL);
KASSERT(vm_phys_domain(m) == domain,
("vm_reserv_alloc_contig: Page domain does not match requested."));
@@ -757,6 +808,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
KASSERT(rv->pages == m,
("vm_reserv_alloc_contig: reserv %p's pages is corrupted",
rv));
+ vm_reserv_lock(rv);
vm_reserv_insert(rv, object, first);
n = ulmin(VM_LEVEL_0_NPAGES - index, npages);
for (i = 0; i < n; i++)
@@ -766,6 +818,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
m_ret = &rv->pages[index];
index = 0;
}
+ vm_reserv_unlock(rv);
m += VM_LEVEL_0_NPAGES;
first += VM_LEVEL_0_NPAGES;
allocpages -= VM_LEVEL_0_NPAGES;
@@ -813,18 +866,20 @@ vm_reserv_extend(int req, vm_object_t object, vm_pindex_t pindex, int domain,
vmd = VM_DOMAIN(domain);
index = VM_RESERV_INDEX(object, pindex);
m = &rv->pages[index];
- vm_domain_free_lock(vmd);
- if (vm_domain_available(vmd, req, 1) == 0 ||
- /* Handle reclaim race. */
- rv->object != object ||
+ vm_reserv_lock(rv);
+ /* Handle reclaim race. */
+ if (rv->object != object ||
/* Handle vm_page_rename(m, new_object, ...). */
- popmap_is_set(rv->popmap, index))
+ popmap_is_set(rv->popmap, index)) {
m = NULL;
- if (m != NULL) {
- vm_reserv_populate(rv, index);
- vm_domain_freecnt_dec(vmd, 1);
+ goto out;
}
- vm_domain_free_unlock(vmd);
+ if (vm_domain_allocate(vmd, req, 1) == 0)
+ m = NULL;
+ else
+ vm_reserv_populate(rv, index);
+out:
+ vm_reserv_unlock(rv);
return (m);
}
@@ -840,15 +895,15 @@ vm_reserv_extend(int req, vm_object_t object, vm_pindex_t pindex, int domain,
* The object and free page queue must be locked.
*/
vm_page_t
-vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
+vm_reserv_alloc_page(int req, vm_object_t object, vm_pindex_t pindex, int domain,
vm_page_t mpred)
{
+ struct vm_domain *vmd;
vm_page_t m, msucc;
vm_pindex_t first, leftcap, rightcap;
vm_reserv_t rv;
int index;
- vm_domain_free_assert_locked(VM_DOMAIN(domain));
VM_OBJECT_ASSERT_WLOCKED(object);
/*
@@ -917,15 +972,28 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
/*
* Allocate and populate the new reservation.
*/
- m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER);
- if (m == NULL)
+ m = NULL;
+ vmd = VM_DOMAIN(domain);
+ if (vm_domain_allocate(vmd, req, 1)) {
+ vm_domain_free_lock(vmd);
+ m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT,
+ VM_LEVEL_0_ORDER);
+ vm_domain_free_unlock(vmd);
+ if (m == NULL) {
+ vm_domain_freecnt_inc(vmd, 1);
+ return (NULL);
+ }
+ } else
return (NULL);
rv = vm_reserv_from_page(m);
+ vm_reserv_lock(rv);
KASSERT(rv->pages == m,
("vm_reserv_alloc_page: reserv %p's pages is corrupted", rv));
vm_reserv_insert(rv, object, first);
index = VM_RESERV_INDEX(object, pindex);
vm_reserv_populate(rv, index);
+ vm_reserv_unlock(rv);
+
return (&rv->pages[index]);
}
@@ -942,7 +1010,9 @@ vm_reserv_break(vm_reserv_t rv)
{
int begin_zeroes, hi, i, lo;
- vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
+ vm_reserv_assert_locked(rv);
+ CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
+ __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
vm_reserv_remove(rv);
rv->pages->psind = 0;
i = hi = 0;
@@ -981,12 +1051,14 @@ vm_reserv_break(vm_reserv_t rv)
if (i != NPOPMAP)
/* Convert from ffsl() to ordinary bit numbering. */
hi--;
+ vm_domain_free_lock(VM_DOMAIN(rv->domain));
vm_phys_free_contig(&rv->pages[begin_zeroes], NBPOPMAP * i +
hi - begin_zeroes);
+ vm_domain_free_unlock(VM_DOMAIN(rv->domain));
} while (i < NPOPMAP);
KASSERT(rv->popcnt == 0,
("vm_reserv_break: reserv %p's popcnt is corrupted", rv));
- vm_reserv_broken++;
+ counter_u64_add(vm_reserv_broken, 1);
}
/*
@@ -996,7 +1068,6 @@ void
vm_reserv_break_all(vm_object_t object)
{
vm_reserv_t rv;
- struct vm_domain *vmd;
/*
* This access of object->rvq is unsynchronized so that the
@@ -1005,27 +1076,22 @@ vm_reserv_break_all(vm_object_t object)
* lock prevents new additions, so we are guaranteed that when
* it returns NULL the object is properly empty.
*/
- vmd = NULL;
while ((rv = LIST_FIRST(&object->rvq)) != NULL) {
- if (vmd != VM_DOMAIN(rv->domain)) {
- if (vmd != NULL)
- vm_domain_free_unlock(vmd);
- vmd = VM_DOMAIN(rv->domain);
- vm_domain_free_lock(vmd);
- }
+ vm_reserv_lock(rv);
/* Reclaim race. */
- if (rv->object != object)
+ if (rv->object != object) {
+ vm_reserv_unlock(rv);
continue;
- KASSERT(rv->object == object,
- ("vm_reserv_break_all: reserv %p is corrupted", rv));
+ }
+ vm_reserv_domain_lock(rv->domain);
if (rv->inpartpopq) {
TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
rv->inpartpopq = FALSE;
}
+ vm_reserv_domain_unlock(rv->domain);
vm_reserv_break(rv);
+ vm_reserv_unlock(rv);
}
- if (vmd != NULL)
- vm_domain_free_unlock(vmd);
}
/*
@@ -1038,13 +1104,21 @@ boolean_t
vm_reserv_free_page(vm_page_t m)
{
vm_reserv_t rv;
+ boolean_t ret;
rv = vm_reserv_from_page(m);
if (rv->object == NULL)
return (FALSE);
- vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
- vm_reserv_depopulate(rv, m - rv->pages);
- return (TRUE);
+ vm_reserv_lock(rv);
+ /* Re-validate after lock. */
+ if (rv->object != NULL) {
+ vm_reserv_depopulate(rv, m - rv->pages);
+ ret = TRUE;
+ } else
+ ret = FALSE;
+ vm_reserv_unlock(rv);
+
+ return (ret);
}
/*
@@ -1058,6 +1132,7 @@ vm_reserv_init(void)
{
vm_paddr_t paddr;
struct vm_phys_seg *seg;
+ struct vm_reserv *rv;
int i, segind;
/*
@@ -1068,15 +1143,22 @@ vm_reserv_init(void)
seg = &vm_phys_segs[segind];
paddr = roundup2(seg->start, VM_LEVEL_0_SIZE);
while (paddr + VM_LEVEL_0_SIZE <= seg->end) {
- vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].pages =
- PHYS_TO_VM_PAGE(paddr);
- vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].domain =
- seg->domain;
+ rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT];
+ rv->pages = PHYS_TO_VM_PAGE(paddr);
+ rv->domain = seg->domain;
+ mtx_init(&rv->lock, "vm reserv", NULL, MTX_DEF);
paddr += VM_LEVEL_0_SIZE;
}
}
- for (i = 0; i < MAXMEMDOM; i++)
+ for (i = 0; i < MAXMEMDOM; i++) {
+ mtx_init(&vm_reserv_domain_locks[i], "VM reserv domain", NULL,
+ MTX_DEF);
TAILQ_INIT(&vm_rvq_partpop[i]);
+ }
+
+ for (i = 0; i < VM_RESERV_OBJ_LOCK_COUNT; i++)
+ mtx_init(&vm_reserv_object_mtx[i], "resv obj lock", NULL,
+ MTX_DEF);
}
/*
@@ -1091,7 +1173,6 @@ vm_reserv_is_page_free(vm_page_t m)
rv = vm_reserv_from_page(m);
if (rv->object == NULL)
return (false);
- vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
return (popmap_is_clear(rv->popmap, m - rv->pages));
}
@@ -1131,7 +1212,10 @@ static void
vm_reserv_reclaim(vm_reserv_t rv)
{
- vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
+ vm_reserv_assert_locked(rv);
+ CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
+ __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
+ vm_reserv_domain_lock(rv->domain);
KASSERT(rv->inpartpopq,
("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv));
KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
@@ -1139,8 +1223,9 @@ vm_reserv_reclaim(vm_reserv_t rv)
rv, rv->domain));
TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
rv->inpartpopq = FALSE;
+ vm_reserv_domain_unlock(rv->domain);
vm_reserv_break(rv);
- vm_reserv_reclaimed++;
+ counter_u64_add(vm_reserv_reclaimed, 1);
}
/*
@@ -1155,9 +1240,14 @@ vm_reserv_reclaim_inactive(int domain)
{
vm_reserv_t rv;
- vm_domain_free_assert_locked(VM_DOMAIN(domain));
- if ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) {
+ while ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) {
+ vm_reserv_lock(rv);
+ if (rv != TAILQ_FIRST(&vm_rvq_partpop[domain])) {
+ vm_reserv_unlock(rv);
+ continue;
+ }
vm_reserv_reclaim(rv);
+ vm_reserv_unlock(rv);
return (TRUE);
}
return (FALSE);
@@ -1176,14 +1266,16 @@ vm_reserv_reclaim_contig(int domain, u_long npages, vm_paddr_t low,
vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
{
vm_paddr_t pa, size;
- vm_reserv_t rv;
+ vm_reserv_t rv, rvn;
int hi, i, lo, low_index, next_free;
- vm_domain_free_assert_locked(VM_DOMAIN(domain));
if (npages > VM_LEVEL_0_NPAGES - 1)
return (FALSE);
size = npages << PAGE_SHIFT;
- TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) {
+ vm_reserv_domain_lock(domain);
+again:
+ for (rv = TAILQ_FIRST(&vm_rvq_partpop[domain]); rv != NULL; rv = rvn) {
+ rvn = TAILQ_NEXT(rv, partpopq);
pa = VM_PAGE_TO_PHYS(&rv->pages[VM_LEVEL_0_NPAGES - 1]);
if (pa + PAGE_SIZE - size < low) {
/* This entire reservation is too low; go to next. */
@@ -1194,6 +1286,17 @@ vm_reserv_reclaim_contig(int domain, u_long npages, vm_paddr_t low,
/* This entire reservation is too high; go to next. */
continue;
}
+ if (vm_reserv_trylock(rv) == 0) {
+ vm_reserv_domain_unlock(domain);
+ vm_reserv_lock(rv);
+ if (!rv->inpartpopq) {
+ vm_reserv_domain_lock(domain);
+ if (!rvn->inpartpopq)
+ goto again;
+ continue;
+ }
+ } else
+ vm_reserv_domain_unlock(domain);
if (pa < low) {
/* Start the search for free pages at "low". */
low_index = (low + PAGE_MASK - pa) >> PAGE_SHIFT;
@@ -1239,6 +1342,7 @@ vm_reserv_reclaim_contig(int domain, u_long npages, vm_paddr_t low,
if ((NBPOPMAP * i - next_free) * PAGE_SIZE >=
size) {
vm_reserv_reclaim(rv);
+ vm_reserv_unlock(rv);
return (TRUE);
}
hi = ffsl(rv->popmap[i]);
@@ -1249,10 +1353,16 @@ vm_reserv_reclaim_contig(int domain, u_long npages, vm_paddr_t low,
if ((NBPOPMAP * i + hi - next_free) * PAGE_SIZE >=
size) {
vm_reserv_reclaim(rv);
+ vm_reserv_unlock(rv);
return (TRUE);
}
} while (i < NPOPMAP);
+ vm_reserv_unlock(rv);
+ vm_reserv_domain_lock(domain);
+ if (rvn != NULL && !rvn->inpartpopq)
+ goto again;
}
+ vm_reserv_domain_unlock(domain);
return (FALSE);
}
@@ -1270,7 +1380,11 @@ vm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object,
VM_OBJECT_ASSERT_WLOCKED(new_object);
rv = vm_reserv_from_page(m);
if (rv->object == old_object) {
- vm_domain_free_lock(VM_DOMAIN(rv->domain));
+ vm_reserv_lock(rv);
+ CTR6(KTR_VM,
+ "%s: rv %p object %p new %p popcnt %d inpartpop %d",
+ __FUNCTION__, rv, rv->object, new_object, rv->popcnt,
+ rv->inpartpopq);
if (rv->object == old_object) {
vm_reserv_object_lock(old_object);
rv->object = NULL;
@@ -1282,7 +1396,7 @@ vm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object,
LIST_INSERT_HEAD(&new_object->rvq, rv, objq);
vm_reserv_object_unlock(new_object);
}
- vm_domain_free_unlock(VM_DOMAIN(rv->domain));
+ vm_reserv_unlock(rv);
}
}
@@ -1312,7 +1426,6 @@ vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water)
{
vm_paddr_t new_end;
size_t size;
- int i;
/*
* Calculate the size (in bytes) of the reservation array. Round up
@@ -1332,10 +1445,6 @@ vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water)
VM_PROT_READ | VM_PROT_WRITE);
bzero(vm_reserv_array, size);
- for (i = 0; i < VM_RESERV_OBJ_LOCK_COUNT; i++)
- mtx_init(&vm_reserv_object_mtx[i], "resv obj lock", NULL,
- MTX_DEF);
-
/*
* Return the next available physical address.
*/
@@ -1343,6 +1452,21 @@ vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water)
}
/*
+ * Initializes the reservation management system. Specifically, initializes
+ * the reservation counters.
+ */
+static void
+vm_reserv_counter_init(void *unused)
+{
+
+ vm_reserv_freed = counter_u64_alloc(M_WAITOK);
+ vm_reserv_broken = counter_u64_alloc(M_WAITOK);
+ vm_reserv_reclaimed = counter_u64_alloc(M_WAITOK);
+}
+SYSINIT(vm_reserv_counter_init, SI_SUB_CPU, SI_ORDER_ANY,
+ vm_reserv_counter_init, NULL);
+
+/*
* Returns the superpage containing the given page.
*/
vm_page_t
@@ -1352,8 +1476,12 @@ vm_reserv_to_superpage(vm_page_t m)
VM_OBJECT_ASSERT_LOCKED(m->object);
rv = vm_reserv_from_page(m);
- return (rv->object == m->object && rv->popcnt == VM_LEVEL_0_NPAGES ?
- rv->pages : NULL);
+ if (rv->object == m->object && rv->popcnt == VM_LEVEL_0_NPAGES)
+ m = rv->pages;
+ else
+ m = NULL;
+
+ return (m);
}
#endif /* VM_NRESERVLEVEL > 0 */
diff --git a/sys/vm/vm_reserv.h b/sys/vm/vm_reserv.h
index 91d2bf6aa389..e46b0ac59526 100644
--- a/sys/vm/vm_reserv.h
+++ b/sys/vm/vm_reserv.h
@@ -47,14 +47,14 @@
/*
* The following functions are only to be used by the virtual memory system.
*/
-vm_page_t vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex,
+vm_page_t vm_reserv_alloc_contig(int req, vm_object_t object, vm_pindex_t pindex,
int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
u_long alignment, vm_paddr_t boundary, vm_page_t mpred);
vm_page_t vm_reserv_extend_contig(int req, vm_object_t object,
vm_pindex_t pindex, int domain, u_long npages,
vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_page_t mpred);
-vm_page_t vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex,
+vm_page_t vm_reserv_alloc_page(int req, vm_object_t object, vm_pindex_t pindex,
int domain, vm_page_t mpred);
vm_page_t vm_reserv_extend(int req, vm_object_t object,
vm_pindex_t pindex, int domain, vm_page_t mpred);