diff options
author | Andriy Gapon <avg@FreeBSD.org> | 2019-10-15 14:29:18 +0000 |
---|---|---|
committer | Andriy Gapon <avg@FreeBSD.org> | 2019-10-15 14:29:18 +0000 |
commit | 563db1a9475896a9f6f14ca0eb4d4c47bfedd487 (patch) | |
tree | f14a033706eb95cd5f2688e9b888dafd33950f17 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c | |
parent | 436858933814e5b174aed0748591c93bab12792c (diff) | |
parent | a4a9c977b9efb8f606c662e31d221af7b15c2a2d (diff) | |
download | src-563db1a9475896a9f6f14ca0eb4d4c47bfedd487.tar.gz src-563db1a9475896a9f6f14ca0eb4d4c47bfedd487.zip |
MFV r353558: 10572 10579 Fix race in dnode_check_slots_free()
illumos/illumos-gate@aa02ea01948372a32cbf08bfc31c72c32e3fc81e
https://github.com/illumos/illumos-gate/commit/aa02ea01948372a32cbf08bfc31c72c32e3fc81e
10572 Fix race in dnode_check_slots_free()
https://www.illumos.org/issues/10572
The Fix from ZoL:
Currently, dnode_check_slots_free() works by checking dn->dn_type
in the dnode to determine if the dnode is reclaimable. However,
there is a small window of time between dnode_free_sync() in the
first call to dsl_dataset_sync() and when the useraccounting code
is run when the type is set DMU_OT_NONE, but the dnode is not yet
evictable, leading to crashes. This patch adds the ability for
dnodes to track which txg they were last dirtied in and adds a
check for this before performing the reclaim.
This patch also corrects several instances when dn_dirty_link was
treated as a list_node_t when it is technically a multilist_node_t.
10579 Don't allow dnode allocation if dn_holds != 0
https://www.illumos.org/issues/10579
The fix from ZoL:
This patch simply fixes a small bug where dnode_hold_impl() could
attempt to allocate a dnode that was in the process of being freed,
but which still had active references. This patch simply adds the
required check.
Author: Tom Caputi <tcaputi@datto.com>
Reported by: delphij
MFC after: 2 weeks
X-MFC with: r353176
Notes
Notes:
svn path=/head/; revision=353559
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c')
-rw-r--r-- | sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c | 30 |
1 files changed, 20 insertions, 10 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c index 41e50e5ec5ca..ae37928da501 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c @@ -146,7 +146,7 @@ dnode_cons(void *arg, void *unused, int kmflag) bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz)); for (i = 0; i < TXG_SIZE; i++) { - list_link_init(&dn->dn_dirty_link[i]); + multilist_link_init(&dn->dn_dirty_link[i]); dn->dn_free_ranges[i] = NULL; list_create(&dn->dn_dirty_records[i], sizeof (dbuf_dirty_record_t), @@ -156,6 +156,7 @@ dnode_cons(void *arg, void *unused, int kmflag) dn->dn_allocated_txg = 0; dn->dn_free_txg = 0; dn->dn_assigned_txg = 0; + dn->dn_dirty_txg = 0; dn->dn_dirtyctx = 0; dn->dn_dirtyctx_firstset = NULL; dn->dn_bonus = NULL; @@ -194,7 +195,7 @@ dnode_dest(void *arg, void *unused) ASSERT(!list_link_active(&dn->dn_link)); for (i = 0; i < TXG_SIZE; i++) { - ASSERT(!list_link_active(&dn->dn_dirty_link[i])); + ASSERT(!multilist_link_active(&dn->dn_dirty_link[i])); ASSERT3P(dn->dn_free_ranges[i], ==, NULL); list_destroy(&dn->dn_dirty_records[i]); ASSERT0(dn->dn_next_nblkptr[i]); @@ -209,6 +210,7 @@ dnode_dest(void *arg, void *unused) ASSERT0(dn->dn_allocated_txg); ASSERT0(dn->dn_free_txg); ASSERT0(dn->dn_assigned_txg); + ASSERT0(dn->dn_dirty_txg); ASSERT0(dn->dn_dirtyctx); ASSERT3P(dn->dn_dirtyctx_firstset, ==, NULL); ASSERT3P(dn->dn_bonus, ==, NULL); @@ -540,6 +542,7 @@ dnode_destroy(dnode_t *dn) dn->dn_allocated_txg = 0; dn->dn_free_txg = 0; dn->dn_assigned_txg = 0; + dn->dn_dirty_txg = 0; dn->dn_dirtyctx = 0; if (dn->dn_dirtyctx_firstset != NULL) { @@ -609,6 +612,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, ASSERT(dn->dn_type == DMU_OT_NONE); ASSERT0(dn->dn_maxblkid); ASSERT0(dn->dn_allocated_txg); + ASSERT0(dn->dn_dirty_txg); ASSERT0(dn->dn_assigned_txg); ASSERT(refcount_is_zero(&dn->dn_tx_holds)); ASSERT3U(refcount_count(&dn->dn_holds), <=, 1); @@ -622,7 +626,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, ASSERT0(dn->dn_next_bonustype[i]); ASSERT0(dn->dn_rm_spillblk[i]); ASSERT0(dn->dn_next_blksz[i]); - ASSERT(!list_link_active(&dn->dn_dirty_link[i])); + ASSERT(!multilist_link_active(&dn->dn_dirty_link[i])); ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL); ASSERT3P(dn->dn_free_ranges[i], ==, NULL); } @@ -799,6 +803,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) ndn->dn_allocated_txg = odn->dn_allocated_txg; ndn->dn_free_txg = odn->dn_free_txg; ndn->dn_assigned_txg = odn->dn_assigned_txg; + ndn->dn_dirty_txg = odn->dn_dirty_txg; ndn->dn_dirtyctx = odn->dn_dirtyctx; ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset; ASSERT(refcount_count(&odn->dn_tx_holds) == 0); @@ -865,6 +870,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) odn->dn_allocated_txg = 0; odn->dn_free_txg = 0; odn->dn_assigned_txg = 0; + odn->dn_dirty_txg = 0; odn->dn_dirtyctx = 0; odn->dn_dirtyctx_firstset = NULL; odn->dn_have_spill = B_FALSE; @@ -1091,6 +1097,10 @@ dnode_check_slots_free(dnode_children_t *children, int idx, int slots) { ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK); + /* + * If all dnode slots are either already free or + * evictable return B_TRUE. + */ for (int i = idx; i < idx + slots; i++) { dnode_handle_t *dnh = &children->dnc_children[i]; dnode_t *dn = dnh->dnh_dnode; @@ -1099,18 +1109,18 @@ dnode_check_slots_free(dnode_children_t *children, int idx, int slots) continue; } else if (DN_SLOT_IS_PTR(dn)) { mutex_enter(&dn->dn_mtx); - dmu_object_type_t type = dn->dn_type; + boolean_t can_free = (dn->dn_type == DMU_OT_NONE && + refcount_is_zero(&dn->dn_holds) && + !DNODE_IS_DIRTY(dn)); mutex_exit(&dn->dn_mtx); - if (type != DMU_OT_NONE) + if (!can_free) return (B_FALSE); - - continue; + else + continue; } else { return (B_FALSE); } - - return (B_FALSE); } return (B_TRUE); @@ -1634,7 +1644,7 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) /* * If we are already marked dirty, we're done. */ - if (list_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) { + if (multilist_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) { multilist_sublist_unlock(mls); return; } |