diff options
author | Konstantin Belousov <kib@FreeBSD.org> | 2013-08-07 16:36:38 +0000 |
---|---|---|
committer | Konstantin Belousov <kib@FreeBSD.org> | 2013-08-07 16:36:38 +0000 |
commit | 449c2e92c99cffdc00ebe516479f36154bf2c54c (patch) | |
tree | 958d5d7311cef3751ddc3af3de19306b77bec18d /sys/vm/vm_page.c | |
parent | 872d995f76b96d6fb14208334f73ba5918abb769 (diff) | |
download | src-449c2e92c99cffdc00ebe516479f36154bf2c54c.tar.gz src-449c2e92c99cffdc00ebe516479f36154bf2c54c.zip |
Split the pagequeues per NUMA domains, and split pageademon process
into threads each processing queue in a single domain. The structure
of the pagedaemons and queues is kept intact, most of the changes come
from the need for code to find an owning page queue for given page,
calculated from the segment containing the page.
The tie between NUMA domain and pagedaemon thread/pagequeue split is
rather arbitrary, the multithreaded daemon could be allowed for the
single-domain machines, or one domain might be split into several page
domains, to further increase concurrency.
Right now, each pagedaemon thread tries to reach the global target,
precalculated at the start of the pass. This is not optimal, since it
could cause excessive page deactivation and freeing. The code should
be changed to re-check the global page deficit state in the loop after
some number of iterations.
The pagedaemons reach the quorum before starting the OOM, since one
thread inability to meet the target is normal for split queues. Only
when all pagedaemons fail to produce enough reusable pages, OOM is
started by single selected thread.
Launder is modified to take into account the segments layout with
regard to the region for which cleaning is performed.
Based on the preliminary patch by jeff, sponsored by EMC / Isilon
Storage Division.
Reviewed by: alc
Tested by: pho
Sponsored by: The FreeBSD Foundation
Notes
Notes:
svn path=/head/; revision=254065
Diffstat (limited to 'sys/vm/vm_page.c')
-rw-r--r-- | sys/vm/vm_page.c | 111 |
1 files changed, 67 insertions, 44 deletions
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 4a167c17af18..36689f641fb0 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -64,8 +64,7 @@ * GENERAL RULES ON VM_PAGE MANIPULATION * * - A page queue lock is required when adding or removing a page from a - * page queue (vm_pagequeues[]), regardless of other locks or the - * busy state of a page. + * page queue regardless of other locks or the busy state of a page. * * * In general, no thread besides the page daemon can acquire or * hold more than one page queue lock at a time. @@ -124,20 +123,7 @@ __FBSDID("$FreeBSD$"); * page structure. */ -struct vm_pagequeue vm_pagequeues[PQ_COUNT] = { - [PQ_INACTIVE] = { - .pq_pl = TAILQ_HEAD_INITIALIZER( - vm_pagequeues[PQ_INACTIVE].pq_pl), - .pq_cnt = &cnt.v_inactive_count, - .pq_name = "vm inactive pagequeue" - }, - [PQ_ACTIVE] = { - .pq_pl = TAILQ_HEAD_INITIALIZER( - vm_pagequeues[PQ_ACTIVE].pq_pl), - .pq_cnt = &cnt.v_active_count, - .pq_name = "vm active pagequeue" - } -}; +struct vm_domain vm_dom[MAXMEMDOM]; struct mtx_padalign vm_page_queue_free_mtx; struct mtx_padalign pa_lock[PA_LOCK_COUNT]; @@ -256,6 +242,34 @@ vm_page_blacklist_lookup(char *list, vm_paddr_t pa) return (0); } +static void +vm_page_domain_init(struct vm_domain *vmd) +{ + struct vm_pagequeue *pq; + int i; + + *__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) = + "vm inactive pagequeue"; + *__DECONST(int **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_vcnt) = + &cnt.v_inactive_count; + *__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) = + "vm active pagequeue"; + *__DECONST(int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) = + &cnt.v_active_count; + vmd->vmd_fullintervalcount = 0; + vmd->vmd_page_count = 0; + vmd->vmd_free_count = 0; + vmd->vmd_segs = 0; + vmd->vmd_oom = FALSE; + vmd->vmd_pass = 0; + for (i = 0; i < PQ_COUNT; i++) { + pq = &vmd->vmd_pagequeues[i]; + TAILQ_INIT(&pq->pq_pl); + mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue", + MTX_DEF | MTX_DUPOK); + } +} + /* * vm_page_startup: * @@ -319,8 +333,8 @@ vm_page_startup(vm_offset_t vaddr) mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF); for (i = 0; i < PA_LOCK_COUNT; i++) mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF); - for (i = 0; i < PQ_COUNT; i++) - vm_pagequeue_init_lock(&vm_pagequeues[i]); + for (i = 0; i < vm_ndomains; i++) + vm_page_domain_init(&vm_dom[i]); /* * Allocate memory for use when boot strapping the kernel memory @@ -1055,7 +1069,7 @@ vm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end) KASSERT((m->flags & (PG_CACHED | PG_FREE)) == PG_FREE, ("vm_page_cache_free: page %p has inconsistent flags", m)); cnt.v_cache_count--; - cnt.v_free_count++; + vm_phys_freecnt_adj(m, 1); } empty = vm_radix_is_empty(&object->cache); mtx_unlock(&vm_page_queue_free_mtx); @@ -1311,7 +1325,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) ("vm_page_alloc: page %p is not free", m)); KASSERT(m->valid == 0, ("vm_page_alloc: free page %p is valid", m)); - cnt.v_free_count--; + vm_phys_freecnt_adj(m, -1); } /* @@ -1569,7 +1583,7 @@ vm_page_alloc_init(vm_page_t m) ("vm_page_alloc_init: page %p is not free", m)); KASSERT(m->valid == 0, ("vm_page_alloc_init: free page %p is valid", m)); - cnt.v_free_count--; + vm_phys_freecnt_adj(m, -1); if ((m->flags & PG_ZERO) != 0) vm_page_zero_count--; } @@ -1711,6 +1725,13 @@ vm_waitpfault(void) "pfault", 0); } +struct vm_pagequeue * +vm_page_pagequeue(vm_page_t m) +{ + + return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]); +} + /* * vm_page_dequeue: * @@ -1726,11 +1747,11 @@ vm_page_dequeue(vm_page_t m) vm_page_lock_assert(m, MA_OWNED); KASSERT(m->queue != PQ_NONE, ("vm_page_dequeue: page %p is not queued", m)); - pq = &vm_pagequeues[m->queue]; + pq = vm_page_pagequeue(m); vm_pagequeue_lock(pq); m->queue = PQ_NONE; TAILQ_REMOVE(&pq->pq_pl, m, pageq); - (*pq->pq_cnt)--; + vm_pagequeue_cnt_dec(pq); vm_pagequeue_unlock(pq); } @@ -1747,11 +1768,11 @@ vm_page_dequeue_locked(vm_page_t m) struct vm_pagequeue *pq; vm_page_lock_assert(m, MA_OWNED); - pq = &vm_pagequeues[m->queue]; + pq = vm_page_pagequeue(m); vm_pagequeue_assert_locked(pq); m->queue = PQ_NONE; TAILQ_REMOVE(&pq->pq_pl, m, pageq); - (*pq->pq_cnt)--; + vm_pagequeue_cnt_dec(pq); } /* @@ -1767,11 +1788,11 @@ vm_page_enqueue(int queue, vm_page_t m) struct vm_pagequeue *pq; vm_page_lock_assert(m, MA_OWNED); - pq = &vm_pagequeues[queue]; + pq = &vm_phys_domain(m)->vmd_pagequeues[queue]; vm_pagequeue_lock(pq); m->queue = queue; TAILQ_INSERT_TAIL(&pq->pq_pl, m, pageq); - ++*pq->pq_cnt; + vm_pagequeue_cnt_inc(pq); vm_pagequeue_unlock(pq); } @@ -1790,7 +1811,7 @@ vm_page_requeue(vm_page_t m) vm_page_lock_assert(m, MA_OWNED); KASSERT(m->queue != PQ_NONE, ("vm_page_requeue: page %p is not queued", m)); - pq = &vm_pagequeues[m->queue]; + pq = vm_page_pagequeue(m); vm_pagequeue_lock(pq); TAILQ_REMOVE(&pq->pq_pl, m, pageq); TAILQ_INSERT_TAIL(&pq->pq_pl, m, pageq); @@ -1811,7 +1832,7 @@ vm_page_requeue_locked(vm_page_t m) KASSERT(m->queue != PQ_NONE, ("vm_page_requeue_locked: page %p is not queued", m)); - pq = &vm_pagequeues[m->queue]; + pq = vm_page_pagequeue(m); vm_pagequeue_assert_locked(pq); TAILQ_REMOVE(&pq->pq_pl, m, pageq); TAILQ_INSERT_TAIL(&pq->pq_pl, m, pageq); @@ -1948,7 +1969,7 @@ vm_page_free_toq(vm_page_t m) */ mtx_lock(&vm_page_queue_free_mtx); m->flags |= PG_FREE; - cnt.v_free_count++; + vm_phys_freecnt_adj(m, 1); #if VM_NRESERVLEVEL > 0 if (!vm_reserv_free_page(m)) #else @@ -2081,14 +2102,14 @@ _vm_page_deactivate(vm_page_t m, int athead) if (queue != PQ_NONE) vm_page_dequeue(m); m->flags &= ~PG_WINATCFLS; - pq = &vm_pagequeues[PQ_INACTIVE]; + pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE]; vm_pagequeue_lock(pq); m->queue = PQ_INACTIVE; if (athead) TAILQ_INSERT_HEAD(&pq->pq_pl, m, pageq); else TAILQ_INSERT_TAIL(&pq->pq_pl, m, pageq); - cnt.v_inactive_count++; + vm_pagequeue_cnt_inc(pq); vm_pagequeue_unlock(pq); } } @@ -2888,18 +2909,20 @@ DB_SHOW_COMMAND(page, vm_page_print_page_info) DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) { - - db_printf("PQ_FREE:"); - db_printf(" %d", cnt.v_free_count); - db_printf("\n"); - - db_printf("PQ_CACHE:"); - db_printf(" %d", cnt.v_cache_count); - db_printf("\n"); - - db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n", - *vm_pagequeues[PQ_ACTIVE].pq_cnt, - *vm_pagequeues[PQ_INACTIVE].pq_cnt); + int dom; + + db_printf("pq_free %d pq_cache %d\n", + cnt.v_free_count, cnt.v_cache_count); + for (dom = 0; dom < vm_ndomains; dom++) { + db_printf( + "dom %d page_cnt %d free %d pq_act %d pq_inact %d pass %d\n", + dom, + vm_dom[dom].vmd_page_count, + vm_dom[dom].vmd_free_count, + vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt, + vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt, + vm_dom[dom].vmd_pass); + } } DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo) |