diff options
-rw-r--r-- | sys/vm/swap_pager.c | 37 | ||||
-rw-r--r-- | sys/vm/vm_object.c | 571 | ||||
-rw-r--r-- | sys/vm/vm_object.h | 2 |
3 files changed, 330 insertions, 280 deletions
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index e83403da0755..3347536e54a6 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -974,15 +974,12 @@ swp_pager_xfer_source(vm_object_t srcobject, vm_object_t dstobject, * Destination has no swapblk and is not resident, transfer source. * swp_pager_meta_build() can sleep. */ - vm_object_pip_add(srcobject, 1); VM_OBJECT_WUNLOCK(srcobject); - vm_object_pip_add(dstobject, 1); dstaddr = swp_pager_meta_build(dstobject, pindex, addr); KASSERT(dstaddr == SWAPBLK_NONE, ("Unexpected destination swapblk")); - vm_object_pip_wakeup(dstobject); VM_OBJECT_WLOCK(srcobject); - vm_object_pip_wakeup(srcobject); + return (true); } @@ -995,8 +992,7 @@ swp_pager_xfer_source(vm_object_t srcobject, vm_object_t dstobject, * we keep the destination's. * * This routine is allowed to sleep. It may sleep allocating metadata - * indirectly through swp_pager_meta_build() or if paging is still in - * progress on the source. + * indirectly through swp_pager_meta_build(). * * The source object contains no vm_page_t's (which is just as well) * @@ -1019,18 +1015,14 @@ swap_pager_copy(vm_object_t srcobject, vm_object_t dstobject, */ if (destroysource && (srcobject->flags & OBJ_ANON) == 0 && srcobject->handle != NULL) { - vm_object_pip_add(srcobject, 1); VM_OBJECT_WUNLOCK(srcobject); - vm_object_pip_add(dstobject, 1); VM_OBJECT_WUNLOCK(dstobject); sx_xlock(&sw_alloc_sx); TAILQ_REMOVE(NOBJLIST(srcobject->handle), srcobject, pager_object_list); sx_xunlock(&sw_alloc_sx); VM_OBJECT_WLOCK(dstobject); - vm_object_pip_wakeup(dstobject); VM_OBJECT_WLOCK(srcobject); - vm_object_pip_wakeup(srcobject); } /* @@ -1207,26 +1199,29 @@ swap_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int *rbehind, reqcount = count; - /* - * Determine the final number of read-behind pages and - * allocate them BEFORE releasing the object lock. Otherwise, - * there can be a problematic race with vm_object_split(). - * Specifically, vm_object_split() might first transfer pages - * that precede ma[0] in the current object to a new object, - * and then this function incorrectly recreates those pages as - * read-behind pages in the current object. - */ KASSERT(object->type == OBJT_SWAP, ("%s: object not swappable", __func__)); if (!swap_pager_haspage(object, ma[0]->pindex, &maxbehind, &maxahead)) return (VM_PAGER_FAIL); + KASSERT(reqcount - 1 <= maxahead, + ("page count %d extends beyond swap block", reqcount)); + + /* + * Do not transfer any pages other than those that are xbusied + * when running during a split or collapse operation. This + * prevents clustering from re-creating pages which are being + * moved into another object. + */ + if ((object->flags & (OBJ_SPLIT | OBJ_DEAD)) != 0) { + maxahead = reqcount - 1; + maxbehind = 0; + } + /* * Clip the readahead and readbehind ranges to exclude resident pages. */ if (rahead != NULL) { - KASSERT(reqcount - 1 <= maxahead, - ("page count %d extends beyond swap block", reqcount)); *rahead = imin(*rahead, maxahead - (reqcount - 1)); pindex = ma[reqcount - 1]->pindex; msucc = TAILQ_NEXT(ma[reqcount - 1], listq); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index a3f691390c11..395ba6bad8c2 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -116,8 +116,6 @@ static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, boolean_t *eio); static boolean_t vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *allclean); -static void vm_object_qcollapse(vm_object_t object); -static void vm_object_vndeallocate(vm_object_t object); static void vm_object_backing_remove(vm_object_t object); /* @@ -164,12 +162,18 @@ SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD, &object_bypasses, "VM object bypasses"); +static counter_u64_t object_collapse_waits = EARLY_COUNTER; +SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, collapse_waits, CTLFLAG_RD, + &object_collapse_waits, + "Number of sleeps for collapse"); + static void counter_startup(void) { object_collapses = counter_u64_alloc(M_WAITOK); object_bypasses = counter_u64_alloc(M_WAITOK); + object_collapse_waits = counter_u64_alloc(M_WAITOK); } SYSINIT(object_counters, SI_SUB_CPU, SI_ORDER_ANY, counter_startup, NULL); @@ -376,6 +380,19 @@ vm_object_pip_wakeupn(vm_object_t object, short i) refcount_releasen(&object->paging_in_progress, i); } +/* + * Atomically drop the interlock and wait for pip to drain. This protects + * from sleep/wakeup races due to identity changes. The lock is not + * re-acquired on return. + */ +static void +vm_object_pip_sleep(vm_object_t object, char *waitid) +{ + + refcount_sleep_interlock(&object->paging_in_progress, + &object->lock, waitid, PVM); +} + void vm_object_pip_wait(vm_object_t object, char *waitid) { @@ -383,8 +400,7 @@ vm_object_pip_wait(vm_object_t object, char *waitid) VM_OBJECT_ASSERT_WLOCKED(object); while (REFCOUNT_COUNT(object->paging_in_progress) > 0) { - VM_OBJECT_WUNLOCK(object); - refcount_wait(&object->paging_in_progress, waitid, PVM); + vm_object_pip_sleep(object, waitid); VM_OBJECT_WLOCK(object); } } @@ -466,30 +482,17 @@ vm_object_allocate_anon(vm_pindex_t size, vm_object_t backing_object, return (object); } - -/* - * vm_object_reference: - * - * Gets another reference to the given object. Note: OBJ_DEAD - * objects can be referenced during final cleaning. - */ -void -vm_object_reference(vm_object_t object) +static void +vm_object_reference_vnode(vm_object_t object) { struct vnode *vp; u_int old; - if (object == NULL) - return; - /* - * Many places assume exclusive access to objects with a single - * ref. vm_object_collapse() in particular will directly mainpulate - * references for objects in this state. vnode objects only need - * the lock for the first ref to reference the vnode. + * vnode objects need the lock for the first reference + * to serialize with vnode_object_deallocate(). */ - if (!refcount_acquire_if_gt(&object->ref_count, - object->type == OBJT_VNODE ? 0 : 1)) { + if (!refcount_acquire_if_gt(&object->ref_count, 0)) { VM_OBJECT_RLOCK(object); old = refcount_acquire(&object->ref_count); if (object->type == OBJT_VNODE && old == 0) { @@ -501,6 +504,26 @@ vm_object_reference(vm_object_t object) } /* + * vm_object_reference: + * + * Acquires a reference to the given object. + */ +void +vm_object_reference(vm_object_t object) +{ + + if (object == NULL) + return; + + if (object->type == OBJT_VNODE) + vm_object_reference_vnode(object); + else + refcount_acquire(&object->ref_count); + KASSERT((object->flags & OBJ_DEAD) == 0, + ("vm_object_reference: Referenced dead object.")); +} + +/* * vm_object_reference_locked: * * Gets another reference to the given object. @@ -516,23 +539,23 @@ vm_object_reference_locked(vm_object_t object) VM_OBJECT_ASSERT_LOCKED(object); old = refcount_acquire(&object->ref_count); if (object->type == OBJT_VNODE && old == 0) { - vp = object->handle; - vref(vp); - } + vp = object->handle; vref(vp); } + KASSERT((object->flags & OBJ_DEAD) == 0, + ("vm_object_reference: Referenced dead object.")); } /* * Handle deallocating an object of type OBJT_VNODE. */ static void -vm_object_vndeallocate(vm_object_t object) +vm_object_deallocate_vnode(vm_object_t object) { struct vnode *vp = (struct vnode *) object->handle; bool last; KASSERT(object->type == OBJT_VNODE, - ("vm_object_vndeallocate: not a vnode object")); - KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp")); + ("vm_object_deallocate_vnode: not a vnode object")); + KASSERT(vp != NULL, ("vm_object_deallocate_vnode: missing vp")); /* Object lock to protect handle lookup. */ last = refcount_release(&object->ref_count); @@ -548,6 +571,53 @@ vm_object_vndeallocate(vm_object_t object) vrele(vp); } + +/* + * We dropped a reference on an object and discovered that it had a + * single remaining shadow. This is a sibling of the reference we + * dropped. Attempt to collapse the sibling and backing object. + */ +static vm_object_t +vm_object_deallocate_anon(vm_object_t backing_object) +{ + vm_object_t object; + + /* Fetch the final shadow. */ + object = LIST_FIRST(&backing_object->shadow_head); + KASSERT(object != NULL && backing_object->shadow_count == 1, + ("vm_object_anon_deallocate: ref_count: %d, shadow_count: %d", + backing_object->ref_count, backing_object->shadow_count)); + KASSERT((object->flags & (OBJ_TMPFS_NODE | OBJ_ANON)) == OBJ_ANON, + ("invalid shadow object %p", object)); + + if (!VM_OBJECT_TRYWLOCK(object)) { + /* + * Prevent object from disappearing since we do not have a + * reference. + */ + vm_object_pip_add(object, 1); + VM_OBJECT_WUNLOCK(backing_object); + VM_OBJECT_WLOCK(object); + vm_object_pip_wakeup(object); + } else + VM_OBJECT_WUNLOCK(backing_object); + + /* + * Check for a collapse/terminate race with the last reference holder. + */ + if ((object->flags & (OBJ_DEAD | OBJ_COLLAPSING)) != 0 || + !refcount_acquire_if_not_zero(&object->ref_count)) { + VM_OBJECT_WUNLOCK(object); + return (NULL); + } + backing_object = object->backing_object; + if (backing_object != NULL && (backing_object->flags & OBJ_ANON) != 0) + vm_object_collapse(object); + VM_OBJECT_WUNLOCK(object); + + return (object); +} + /* * vm_object_deallocate: * @@ -562,7 +632,7 @@ vm_object_vndeallocate(vm_object_t object) void vm_object_deallocate(vm_object_t object) { - vm_object_t robject, temp; + vm_object_t temp; bool released; while (object != NULL) { @@ -583,7 +653,7 @@ vm_object_deallocate(vm_object_t object) if (object->type == OBJT_VNODE) { VM_OBJECT_RLOCK(object); if (object->type == OBJT_VNODE) { - vm_object_vndeallocate(object); + vm_object_deallocate_vnode(object); return; } VM_OBJECT_RUNLOCK(object); @@ -594,92 +664,30 @@ vm_object_deallocate(vm_object_t object) ("vm_object_deallocate: object deallocated too many times: %d", object->type)); - if (refcount_release(&object->ref_count)) - goto doterm; - if (object->ref_count > 1) { - VM_OBJECT_WUNLOCK(object); - return; - } else if (object->ref_count == 1) { - if (object->shadow_count == 0 && - (object->flags & OBJ_ANON) != 0) { - vm_object_set_flag(object, OBJ_ONEMAPPING); - } else if (object->shadow_count == 1) { - KASSERT((object->flags & OBJ_ANON) != 0, - ("obj %p with shadow_count > 0 is not anon", - object)); - robject = LIST_FIRST(&object->shadow_head); - KASSERT(robject != NULL, - ("vm_object_deallocate: ref_count: %d, " - "shadow_count: %d", object->ref_count, - object->shadow_count)); - KASSERT((robject->flags & OBJ_TMPFS_NODE) == 0, - ("shadowed tmpfs v_object %p", object)); - if (!VM_OBJECT_TRYWLOCK(robject)) { - /* - * Avoid a potential deadlock. - */ - refcount_acquire(&object->ref_count); - VM_OBJECT_WUNLOCK(object); - /* - * More likely than not the thread - * holding robject's lock has lower - * priority than the current thread. - * Let the lower priority thread run. - */ - pause("vmo_de", 1); - continue; - } - /* - * Collapse object into its shadow unless its - * shadow is dead. In that case, object will - * be deallocated by the thread that is - * deallocating its shadow. - */ - if ((robject->flags & - (OBJ_DEAD | OBJ_ANON)) == OBJ_ANON) { - - refcount_acquire(&robject->ref_count); -retry: - if (REFCOUNT_COUNT(robject->paging_in_progress) > 0) { - VM_OBJECT_WUNLOCK(object); - vm_object_pip_wait(robject, - "objde1"); - temp = robject->backing_object; - if (object == temp) { - VM_OBJECT_WLOCK(object); - goto retry; - } - } else if (REFCOUNT_COUNT(object->paging_in_progress) > 0) { - VM_OBJECT_WUNLOCK(robject); - VM_OBJECT_WUNLOCK(object); - refcount_wait( - &object->paging_in_progress, - "objde2", PVM); - VM_OBJECT_WLOCK(robject); - temp = robject->backing_object; - if (object == temp) { - VM_OBJECT_WLOCK(object); - goto retry; - } - } else - VM_OBJECT_WUNLOCK(object); - - if (robject->ref_count == 1) { - refcount_release(&robject->ref_count); - object = robject; - goto doterm; - } - object = robject; - vm_object_collapse(object); - VM_OBJECT_WUNLOCK(object); - continue; - } - VM_OBJECT_WUNLOCK(robject); + /* + * If this is not the final reference to an anonymous + * object we may need to collapse the shadow chain. + */ + if (!refcount_release(&object->ref_count)) { + if (object->ref_count > 1 || + object->shadow_count == 0) { + if ((object->flags & OBJ_ANON) != 0 && + object->ref_count == 1) + vm_object_set_flag(object, + OBJ_ONEMAPPING); + VM_OBJECT_WUNLOCK(object); + return; } - VM_OBJECT_WUNLOCK(object); - return; + + /* Handle collapsing last ref on anonymous objects. */ + object = vm_object_deallocate_anon(object); + continue; } -doterm: + + /* + * Handle the final reference to an object. We restart + * the loop with the backing object to avoid recursion. + */ umtx_shm_object_terminated(object); temp = object->backing_object; if (temp != NULL) { @@ -687,16 +695,11 @@ doterm: ("shadowed tmpfs v_object 2 %p", object)); vm_object_backing_remove(object); } - /* - * Don't double-terminate, we could be in a termination - * recursion due to the terminate having to sync data - * to disk. - */ - if ((object->flags & OBJ_DEAD) == 0) { - vm_object_set_flag(object, OBJ_DEAD); - vm_object_terminate(object); - } else - VM_OBJECT_WUNLOCK(object); + + KASSERT((object->flags & OBJ_DEAD) == 0, + ("vm_object_deallocate: Terminating dead object.")); + vm_object_set_flag(object, OBJ_DEAD); + vm_object_terminate(object); object = temp; } } @@ -734,6 +737,9 @@ vm_object_backing_remove_locked(vm_object_t object) VM_OBJECT_ASSERT_WLOCKED(object); VM_OBJECT_ASSERT_WLOCKED(backing_object); + KASSERT((object->flags & OBJ_COLLAPSING) == 0, + ("vm_object_backing_remove: Removing collapsing object.")); + if ((object->flags & OBJ_SHADOWLIST) != 0) { LIST_REMOVE(object, shadow_list); backing_object->shadow_count--; @@ -788,6 +794,98 @@ vm_object_backing_insert(vm_object_t object, vm_object_t backing_object) object->backing_object = backing_object; } +/* + * Insert an object into a backing_object's shadow list with an additional + * reference to the backing_object added. + */ +static void +vm_object_backing_insert_ref(vm_object_t object, vm_object_t backing_object) +{ + + VM_OBJECT_ASSERT_WLOCKED(object); + + if ((backing_object->flags & OBJ_ANON) != 0) { + VM_OBJECT_WLOCK(backing_object); + KASSERT((backing_object->flags & OBJ_DEAD) == 0, + ("shadowing dead anonymous object")); + vm_object_reference_locked(backing_object); + vm_object_backing_insert_locked(object, backing_object); + vm_object_clear_flag(backing_object, OBJ_ONEMAPPING); + VM_OBJECT_WUNLOCK(backing_object); + } else { + vm_object_reference(backing_object); + object->backing_object = backing_object; + } +} + +/* + * Transfer a backing reference from backing_object to object. + */ +static void +vm_object_backing_transfer(vm_object_t object, vm_object_t backing_object) +{ + vm_object_t new_backing_object; + + /* + * Note that the reference to backing_object->backing_object + * moves from within backing_object to within object. + */ + vm_object_backing_remove_locked(object); + new_backing_object = backing_object->backing_object; + if (new_backing_object == NULL) + return; + if ((new_backing_object->flags & OBJ_ANON) != 0) { + VM_OBJECT_WLOCK(new_backing_object); + vm_object_backing_remove_locked(backing_object); + vm_object_backing_insert_locked(object, new_backing_object); + VM_OBJECT_WUNLOCK(new_backing_object); + } else { + object->backing_object = new_backing_object; + backing_object->backing_object = NULL; + } +} + +/* + * Wait for a concurrent collapse to settle. + */ +static void +vm_object_collapse_wait(vm_object_t object) +{ + + VM_OBJECT_ASSERT_WLOCKED(object); + + while ((object->flags & OBJ_COLLAPSING) != 0) { + vm_object_pip_wait(object, "vmcolwait"); + counter_u64_add(object_collapse_waits, 1); + } +} + +/* + * Waits for a backing object to clear a pending collapse and returns + * it locked if it is an ANON object. + */ +static vm_object_t +vm_object_backing_collapse_wait(vm_object_t object) +{ + vm_object_t backing_object; + + VM_OBJECT_ASSERT_WLOCKED(object); + + for (;;) { + backing_object = object->backing_object; + if (backing_object == NULL || + (backing_object->flags & OBJ_ANON) == 0) + return (NULL); + VM_OBJECT_WLOCK(backing_object); + if ((backing_object->flags & (OBJ_DEAD | OBJ_COLLAPSING)) == 0) + break; + VM_OBJECT_WUNLOCK(object); + vm_object_pip_sleep(backing_object, "vmbckwait"); + counter_u64_add(object_collapse_waits, 1); + VM_OBJECT_WLOCK(object); + } + return (backing_object); +} /* * vm_object_terminate_pages removes any remaining pageable pages @@ -843,9 +941,14 @@ vm_object_terminate_pages(vm_object_t object) void vm_object_terminate(vm_object_t object) { + VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_DEAD) != 0, ("terminating non-dead obj %p", object)); + KASSERT((object->flags & OBJ_COLLAPSING) == 0, + ("terminating collapsing obj %p", object)); + KASSERT(object->backing_object == NULL, + ("terminating shadow obj %p", object)); /* * wait for the pageout daemon to be done with the object @@ -853,11 +956,11 @@ vm_object_terminate(vm_object_t object) vm_object_pip_wait(object, "objtrm"); KASSERT(!REFCOUNT_COUNT(object->paging_in_progress), - ("vm_object_terminate: pageout in progress")); + ("vm_object_terminate: pageout in progress")); - KASSERT(object->ref_count == 0, - ("vm_object_terminate: object with references, ref_count=%d", - object->ref_count)); + KASSERT(object->ref_count == 0, + ("vm_object_terminate: object with references, ref_count=%d", + object->ref_count)); if ((object->flags & OBJ_PG_DTOR) == 0) vm_object_terminate_pages(object); @@ -1397,11 +1500,13 @@ void vm_object_split(vm_map_entry_t entry) { vm_page_t m, m_next; - vm_object_t orig_object, new_object, source; + vm_object_t orig_object, new_object, backing_object; vm_pindex_t idx, offidxstart; vm_size_t size; orig_object = entry->object.vm_object; + KASSERT((orig_object->flags & OBJ_ONEMAPPING) != 0, + ("vm_object_split: Splitting object with multiple mappings.")); if ((orig_object->flags & OBJ_ANON) == 0) return; if (orig_object->ref_count <= 1) @@ -1419,35 +1524,25 @@ vm_object_split(vm_map_entry_t entry) orig_object->cred, ptoa(size)); /* + * We must wait for the orig_object to complete any in-progress + * collapse so that the swap blocks are stable below. The + * additional reference on backing_object by new object will + * prevent further collapse operations until split completes. + */ + VM_OBJECT_WLOCK(orig_object); + vm_object_collapse_wait(orig_object); + + /* * At this point, the new object is still private, so the order in * which the original and new objects are locked does not matter. */ VM_OBJECT_WLOCK(new_object); - VM_OBJECT_WLOCK(orig_object); new_object->domain = orig_object->domain; - source = orig_object->backing_object; - if (source != NULL) { - if ((source->flags & (OBJ_ANON | OBJ_DEAD)) != 0) { - VM_OBJECT_WLOCK(source); - if ((source->flags & OBJ_DEAD) != 0) { - VM_OBJECT_WUNLOCK(source); - VM_OBJECT_WUNLOCK(orig_object); - VM_OBJECT_WUNLOCK(new_object); - new_object->cred = NULL; - vm_object_deallocate(new_object); - VM_OBJECT_WLOCK(orig_object); - return; - } - vm_object_backing_insert_locked(new_object, source); - vm_object_reference_locked(source); /* for new_object */ - vm_object_clear_flag(source, OBJ_ONEMAPPING); - VM_OBJECT_WUNLOCK(source); - } else { - vm_object_backing_insert(new_object, source); - vm_object_reference(source); - } + backing_object = orig_object->backing_object; + if (backing_object != NULL) { + vm_object_backing_insert_ref(new_object, backing_object); new_object->backing_object_offset = - orig_object->backing_object_offset + entry->offset; + orig_object->backing_object_offset + entry->offset; } if (orig_object->cred != NULL) { crhold(orig_object->cred); @@ -1455,6 +1550,12 @@ vm_object_split(vm_map_entry_t entry) ("orig_object->charge < 0")); orig_object->charge -= ptoa(size); } + + /* + * Mark the split operation so that swap_pager_getpages() knows + * that the object is in transition. + */ + vm_object_set_flag(orig_object, OBJ_SPLIT); retry: m = vm_page_find_least(orig_object, offidxstart); for (; m != NULL && (idx = m->pindex - offidxstart) < size; @@ -1523,6 +1624,7 @@ retry: TAILQ_FOREACH(m, &new_object->memq, listq) vm_page_xunbusy(m); } + vm_object_clear_flag(orig_object, OBJ_SPLIT); VM_OBJECT_WUNLOCK(orig_object); VM_OBJECT_WUNLOCK(new_object); entry->object.vm_object = new_object; @@ -1531,12 +1633,8 @@ retry: VM_OBJECT_WLOCK(new_object); } -#define OBSC_COLLAPSE_NOWAIT 0x0002 -#define OBSC_COLLAPSE_WAIT 0x0004 - static vm_page_t -vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p, vm_page_t next, - int op) +vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p) { vm_object_t backing_object; @@ -1546,8 +1644,6 @@ vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p, vm_page_t next, KASSERT(p == NULL || p->object == object || p->object == backing_object, ("invalid ownership %p %p %p", p, object, backing_object)); - if ((op & OBSC_COLLAPSE_NOWAIT) != 0) - return (next); /* The page is only NULL when rename fails. */ if (p == NULL) { VM_OBJECT_WUNLOCK(object); @@ -1632,8 +1728,8 @@ vm_object_scan_all_shadowed(vm_object_t object) return (true); } -static bool -vm_object_collapse_scan(vm_object_t object, int op) +static void +vm_object_collapse_scan(vm_object_t object) { vm_object_t backing_object; vm_page_t next, p, pp; @@ -1646,12 +1742,6 @@ vm_object_collapse_scan(vm_object_t object, int op) backing_offset_index = OFF_TO_IDX(object->backing_object_offset); /* - * Initial conditions - */ - if ((op & OBSC_COLLAPSE_WAIT) != 0) - vm_object_set_flag(backing_object, OBJ_DEAD); - - /* * Our scan */ for (p = TAILQ_FIRST(&backing_object->memq); p != NULL; p = next) { @@ -1662,12 +1752,16 @@ vm_object_collapse_scan(vm_object_t object, int op) * Check for busy page */ if (vm_page_tryxbusy(p) == 0) { - next = vm_object_collapse_scan_wait(object, p, next, op); + next = vm_object_collapse_scan_wait(object, p); continue; } + KASSERT(object->backing_object == backing_object, + ("vm_object_collapse_scan: backing object mismatch %p != %p", + object->backing_object, backing_object)); KASSERT(p->object == backing_object, - ("vm_object_collapse_scan: object mismatch")); + ("vm_object_collapse_scan: object mismatch %p != %p", + p->object, backing_object)); if (p->pindex < backing_offset_index || new_pindex >= object->size) { @@ -1689,16 +1783,9 @@ vm_object_collapse_scan(vm_object_t object, int op) * The page in the parent is busy and possibly not * (yet) valid. Until its state is finalized by the * busy bit owner, we can't tell whether it shadows the - * original page. Therefore, we must either skip it - * and the original (backing_object) page or wait for - * its state to be finalized. - * - * This is due to a race with vm_fault() where we must - * unbusy the original (backing_obj) page before we can - * (re)lock the parent. Hence we can get here. + * original page. */ - next = vm_object_collapse_scan_wait(object, pp, next, - op); + next = vm_object_collapse_scan_wait(object, pp); continue; } @@ -1742,10 +1829,7 @@ vm_object_collapse_scan(vm_object_t object, int op) */ if (vm_page_rename(p, object, new_pindex)) { vm_page_xunbusy(p); - if (pp != NULL) - vm_page_xunbusy(pp); - next = vm_object_collapse_scan_wait(object, NULL, next, - op); + next = vm_object_collapse_scan_wait(object, NULL); continue; } @@ -1763,27 +1847,7 @@ vm_object_collapse_scan(vm_object_t object, int op) #endif vm_page_xunbusy(p); } - return (true); -} - - -/* - * this version of collapse allows the operation to occur earlier and - * when paging_in_progress is true for an object... This is not a complete - * operation, but should plug 99.9% of the rest of the leaks. - */ -static void -vm_object_qcollapse(vm_object_t object) -{ - vm_object_t backing_object = object->backing_object; - - VM_OBJECT_ASSERT_WLOCKED(object); - VM_OBJECT_ASSERT_WLOCKED(backing_object); - - if (backing_object->ref_count != 1) - return; - - vm_object_collapse_scan(object, OBSC_COLLAPSE_NOWAIT); + return; } /* @@ -1801,53 +1865,48 @@ vm_object_collapse(vm_object_t object) VM_OBJECT_ASSERT_WLOCKED(object); while (TRUE) { - /* - * Verify that the conditions are right for collapse: - * - * The object exists and the backing object exists. - */ - if ((backing_object = object->backing_object) == NULL) - break; + KASSERT((object->flags & (OBJ_DEAD | OBJ_ANON)) == OBJ_ANON, + ("collapsing invalid object")); /* - * we check the backing object first, because it is most likely - * not collapsable. + * Wait for the backing_object to finish any pending + * collapse so that the caller sees the shortest possible + * shadow chain. */ - if ((backing_object->flags & OBJ_ANON) == 0) - break; - VM_OBJECT_WLOCK(backing_object); - if ((backing_object->flags & OBJ_DEAD) != 0 || - (object->flags & (OBJ_DEAD | OBJ_ANON)) != OBJ_ANON) { - VM_OBJECT_WUNLOCK(backing_object); - break; - } + backing_object = vm_object_backing_collapse_wait(object); + if (backing_object == NULL) + return; - if (REFCOUNT_COUNT(object->paging_in_progress) > 0 || - REFCOUNT_COUNT(backing_object->paging_in_progress) > 0) { - vm_object_qcollapse(object); - VM_OBJECT_WUNLOCK(backing_object); - break; - } + KASSERT(object->ref_count > 0 && + object->ref_count > object->shadow_count, + ("collapse with invalid ref %d or shadow %d count.", + object->ref_count, object->shadow_count)); + KASSERT((backing_object->flags & + (OBJ_COLLAPSING | OBJ_DEAD)) == 0, + ("vm_object_collapse: Backing object already collapsing.")); + KASSERT((object->flags & (OBJ_COLLAPSING | OBJ_DEAD)) == 0, + ("vm_object_collapse: object is already collapsing.")); /* - * We know that we can either collapse the backing object (if - * the parent is the only reference to it) or (perhaps) have + * We know that we can either collapse the backing object if + * the parent is the only reference to it, or (perhaps) have * the parent bypass the object if the parent happens to shadow * all the resident pages in the entire backing object. - * - * This is ignoring pager-backed pages such as swap pages. - * vm_object_collapse_scan fails the shadowing test in this - * case. */ if (backing_object->ref_count == 1) { + KASSERT(backing_object->shadow_count == 1, + ("vm_object_collapse: shadow_count: %d", + backing_object->shadow_count)); vm_object_pip_add(object, 1); + vm_object_set_flag(object, OBJ_COLLAPSING); vm_object_pip_add(backing_object, 1); + vm_object_set_flag(backing_object, OBJ_DEAD); /* * If there is exactly one reference to the backing * object, we can collapse it into the parent. */ - vm_object_collapse_scan(object, OBSC_COLLAPSE_WAIT); + vm_object_collapse_scan(object); #if VM_NRESERVLEVEL > 0 /* @@ -1866,31 +1925,24 @@ vm_object_collapse(vm_object_t object) * the backing_object's and object's locks are * released and reacquired. * Since swap_pager_copy() is being asked to - * destroy the source, it will change the - * backing_object's type to OBJT_DEFAULT. + * destroy backing_object, it will change the + * type to OBJT_DEFAULT. */ swap_pager_copy( backing_object, object, OFF_TO_IDX(object->backing_object_offset), TRUE); } + /* * Object now shadows whatever backing_object did. - * Note that the reference to - * backing_object->backing_object moves from within - * backing_object to within object. */ - vm_object_backing_remove_locked(object); - new_backing_object = backing_object->backing_object; - if (new_backing_object != NULL) { - VM_OBJECT_WLOCK(new_backing_object); - vm_object_backing_remove_locked(backing_object); - vm_object_backing_insert_locked(object, - new_backing_object); - VM_OBJECT_WUNLOCK(new_backing_object); - } + vm_object_clear_flag(object, OBJ_COLLAPSING); + vm_object_backing_transfer(object, backing_object); object->backing_object_offset += backing_object->backing_object_offset; + VM_OBJECT_WUNLOCK(object); + vm_object_pip_wakeup(object); /* * Discard backing_object. @@ -1903,17 +1955,17 @@ vm_object_collapse(vm_object_t object) "backing_object %p was somehow re-referenced during collapse!", backing_object)); vm_object_pip_wakeup(backing_object); - backing_object->type = OBJT_DEAD; - refcount_release(&backing_object->ref_count); - VM_OBJECT_WUNLOCK(backing_object); - vm_object_destroy(backing_object); - - vm_object_pip_wakeup(object); + (void)refcount_release(&backing_object->ref_count); + vm_object_terminate(backing_object); counter_u64_add(object_collapses, 1); + VM_OBJECT_WLOCK(object); } else { /* * If we do not entirely shadow the backing object, * there is nothing we can do so we give up. + * + * The object lock and backing_object lock must not + * be dropped during this sequence. */ if (!vm_object_scan_all_shadowed(object)) { VM_OBJECT_WUNLOCK(backing_object); @@ -1926,21 +1978,22 @@ vm_object_collapse(vm_object_t object) * it, since its reference count is at least 2. */ vm_object_backing_remove_locked(object); - new_backing_object = backing_object->backing_object; if (new_backing_object != NULL) { - vm_object_backing_insert(object, + vm_object_backing_insert_ref(object, new_backing_object); - vm_object_reference(new_backing_object); object->backing_object_offset += - backing_object->backing_object_offset; + backing_object->backing_object_offset; } /* * Drop the reference count on backing_object. Since * its ref_count was at least 2, it will not vanish. */ - refcount_release(&backing_object->ref_count); + (void)refcount_release(&backing_object->ref_count); + KASSERT(backing_object->ref_count >= 1, ( +"backing_object %p was somehow dereferenced during collapse!", + backing_object)); VM_OBJECT_WUNLOCK(backing_object); counter_u64_add(object_bypasses, 1); } @@ -2155,7 +2208,7 @@ vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset, VM_OBJECT_WLOCK(prev_object); /* - * Try to collapse the object first + * Try to collapse the object first. */ vm_object_collapse(prev_object); diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index b5b6e0c70dc1..780f7c7e83b7 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -190,6 +190,8 @@ struct vm_object { #define OBJ_SIZEVNLOCK 0x0040 /* lock vnode to check obj size */ #define OBJ_PG_DTOR 0x0080 /* dont reset object, leave that for dtor */ #define OBJ_TMPFS_NODE 0x0200 /* object belongs to tmpfs VREG node */ +#define OBJ_SPLIT 0x0400 /* object is being split */ +#define OBJ_COLLAPSING 0x0800 /* Parent of collapse. */ #define OBJ_COLORED 0x1000 /* pg_color is defined */ #define OBJ_ONEMAPPING 0x2000 /* One USE (a single, non-forked) mapping flag */ #define OBJ_SHADOWLIST 0x4000 /* Object is on the shadow list. */ |