diff options
Diffstat (limited to 'sys/contrib/openzfs/module/zfs/dnode.c')
-rw-r--r-- | sys/contrib/openzfs/module/zfs/dnode.c | 168 |
1 files changed, 86 insertions, 82 deletions
diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c index 963ff41232a3..e88d394b5229 100644 --- a/sys/contrib/openzfs/module/zfs/dnode.c +++ b/sys/contrib/openzfs/module/zfs/dnode.c @@ -173,9 +173,7 @@ dnode_cons(void *arg, void *unused, int kmflag) dn->dn_allocated_txg = 0; dn->dn_free_txg = 0; dn->dn_assigned_txg = 0; - dn->dn_dirty_txg = 0; - dn->dn_dirtyctx = 0; - dn->dn_dirtyctx_firstset = NULL; + dn->dn_dirtycnt = 0; dn->dn_bonus = NULL; dn->dn_have_spill = B_FALSE; dn->dn_zio = NULL; @@ -229,9 +227,7 @@ dnode_dest(void *arg, void *unused) ASSERT0(dn->dn_allocated_txg); ASSERT0(dn->dn_free_txg); ASSERT0(dn->dn_assigned_txg); - ASSERT0(dn->dn_dirty_txg); - ASSERT0(dn->dn_dirtyctx); - ASSERT0P(dn->dn_dirtyctx_firstset); + ASSERT0(dn->dn_dirtycnt); ASSERT0P(dn->dn_bonus); ASSERT(!dn->dn_have_spill); ASSERT0P(dn->dn_zio); @@ -692,10 +688,8 @@ dnode_destroy(dnode_t *dn) dn->dn_allocated_txg = 0; dn->dn_free_txg = 0; dn->dn_assigned_txg = 0; - dn->dn_dirty_txg = 0; + dn->dn_dirtycnt = 0; - dn->dn_dirtyctx = 0; - dn->dn_dirtyctx_firstset = NULL; if (dn->dn_bonus != NULL) { mutex_enter(&dn->dn_bonus->db_mtx); dbuf_destroy(dn->dn_bonus); @@ -800,11 +794,9 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, dn->dn_bonuslen = bonuslen; dn->dn_checksum = ZIO_CHECKSUM_INHERIT; dn->dn_compress = ZIO_COMPRESS_INHERIT; - dn->dn_dirtyctx = 0; dn->dn_free_txg = 0; - dn->dn_dirtyctx_firstset = NULL; - dn->dn_dirty_txg = 0; + dn->dn_dirtycnt = 0; dn->dn_allocated_txg = tx->tx_txg; dn->dn_id_flags = 0; @@ -955,9 +947,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) ndn->dn_allocated_txg = odn->dn_allocated_txg; ndn->dn_free_txg = odn->dn_free_txg; ndn->dn_assigned_txg = odn->dn_assigned_txg; - ndn->dn_dirty_txg = odn->dn_dirty_txg; - ndn->dn_dirtyctx = odn->dn_dirtyctx; - ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset; + ndn->dn_dirtycnt = odn->dn_dirtycnt; ASSERT0(zfs_refcount_count(&odn->dn_tx_holds)); zfs_refcount_transfer(&ndn->dn_holds, &odn->dn_holds); ASSERT(avl_is_empty(&ndn->dn_dbufs)); @@ -1020,9 +1010,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) odn->dn_allocated_txg = 0; odn->dn_free_txg = 0; odn->dn_assigned_txg = 0; - odn->dn_dirty_txg = 0; - odn->dn_dirtyctx = 0; - odn->dn_dirtyctx_firstset = NULL; + odn->dn_dirtycnt = 0; odn->dn_have_spill = B_FALSE; odn->dn_zio = NULL; odn->dn_oldused = 0; @@ -1273,8 +1261,8 @@ dnode_check_slots_free(dnode_children_t *children, int idx, int slots) } else if (DN_SLOT_IS_PTR(dn)) { mutex_enter(&dn->dn_mtx); boolean_t can_free = (dn->dn_type == DMU_OT_NONE && - zfs_refcount_is_zero(&dn->dn_holds) && - !DNODE_IS_DIRTY(dn)); + dn->dn_dirtycnt == 0 && + zfs_refcount_is_zero(&dn->dn_holds)); mutex_exit(&dn->dn_mtx); if (!can_free) @@ -1757,17 +1745,23 @@ dnode_hold(objset_t *os, uint64_t object, const void *tag, dnode_t **dnp) * reference on the dnode. Returns FALSE if unable to add a * new reference. */ +static boolean_t +dnode_add_ref_locked(dnode_t *dn, const void *tag) +{ + ASSERT(MUTEX_HELD(&dn->dn_mtx)); + if (zfs_refcount_is_zero(&dn->dn_holds)) + return (FALSE); + VERIFY(1 < zfs_refcount_add(&dn->dn_holds, tag)); + return (TRUE); +} + boolean_t dnode_add_ref(dnode_t *dn, const void *tag) { mutex_enter(&dn->dn_mtx); - if (zfs_refcount_is_zero(&dn->dn_holds)) { - mutex_exit(&dn->dn_mtx); - return (FALSE); - } - VERIFY(1 < zfs_refcount_add(&dn->dn_holds, tag)); + boolean_t r = dnode_add_ref_locked(dn, tag); mutex_exit(&dn->dn_mtx); - return (TRUE); + return (r); } void @@ -1830,31 +1824,20 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots) } /* - * Checks if the dnode itself is dirty, or is carrying any uncommitted records. - * It is important to check both conditions, as some operations (eg appending - * to a file) can dirty both as a single logical unit, but they are not synced - * out atomically, so checking one and not the other can result in an object - * appearing to be clean mid-way through a commit. + * Test if the dnode is dirty, or carrying uncommitted records. * - * Do not change this lightly! If you get it wrong, dmu_offset_next() can - * detect a hole where there is really data, leading to silent corruption. + * dn_dirtycnt is the number of txgs this dnode is dirty on. It's incremented + * in dnode_setdirty() the first time the dnode is dirtied on a txg, and + * decremented in either dnode_rele_task() or userquota_updates_task() when the + * txg is synced out. */ boolean_t dnode_is_dirty(dnode_t *dn) { mutex_enter(&dn->dn_mtx); - - for (int i = 0; i < TXG_SIZE; i++) { - if (multilist_link_active(&dn->dn_dirty_link[i]) || - !list_is_empty(&dn->dn_dirty_records[i])) { - mutex_exit(&dn->dn_mtx); - return (B_TRUE); - } - } - + boolean_t dirty = (dn->dn_dirtycnt != 0); mutex_exit(&dn->dn_mtx); - - return (B_FALSE); + return (dirty); } void @@ -1916,7 +1899,11 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) * dnode will hang around after we finish processing its * children. */ - VERIFY(dnode_add_ref(dn, (void *)(uintptr_t)tx->tx_txg)); + mutex_enter(&dn->dn_mtx); + VERIFY(dnode_add_ref_locked(dn, (void *)(uintptr_t)tx->tx_txg)); + dn->dn_dirtycnt++; + ASSERT3U(dn->dn_dirtycnt, <=, 3); + mutex_exit(&dn->dn_mtx); (void) dbuf_dirty(dn->dn_dbuf, tx); @@ -2221,32 +2208,6 @@ dnode_dirty_l1range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, mutex_exit(&dn->dn_dbufs_mtx); } -void -dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, const void *tag) -{ - /* - * Don't set dirtyctx to SYNC if we're just modifying this as we - * initialize the objset. - */ - if (dn->dn_dirtyctx == DN_UNDIRTIED) { - dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; - - if (ds != NULL) { - rrw_enter(&ds->ds_bp_rwlock, RW_READER, tag); - } - if (!BP_IS_HOLE(dn->dn_objset->os_rootbp)) { - if (dmu_tx_is_syncing(tx)) - dn->dn_dirtyctx = DN_DIRTY_SYNC; - else - dn->dn_dirtyctx = DN_DIRTY_OPEN; - dn->dn_dirtyctx_firstset = tag; - } - if (ds != NULL) { - rrw_exit(&ds->ds_bp_rwlock, tag); - } - } -} - static void dnode_partial_zero(dnode_t *dn, uint64_t off, uint64_t blkoff, uint64_t len, dmu_tx_t *tx) @@ -2695,6 +2656,32 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, } /* + * Adjust *offset to the next (or previous) block byte offset at lvl. + * Returns FALSE if *offset would overflow or underflow. + */ +static boolean_t +dnode_next_block(dnode_t *dn, int flags, uint64_t *offset, int lvl) +{ + int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; + int span = lvl * epbs + dn->dn_datablkshift; + uint64_t blkid, maxblkid; + + if (span >= 8 * sizeof (uint64_t)) + return (B_FALSE); + + blkid = *offset >> span; + maxblkid = 1ULL << (8 * sizeof (*offset) - span); + if (!(flags & DNODE_FIND_BACKWARDS) && blkid + 1 < maxblkid) + *offset = (blkid + 1) << span; + else if ((flags & DNODE_FIND_BACKWARDS) && blkid > 0) + *offset = (blkid << span) - 1; + else + return (B_FALSE); + + return (B_TRUE); +} + +/* * Find the next hole, data, or sparse region at or after *offset. * The value 'blkfill' tells us how many items we expect to find * in an L0 data block; this value is 1 for normal objects, @@ -2721,7 +2708,7 @@ int dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, int minlvl, uint64_t blkfill, uint64_t txg) { - uint64_t initial_offset = *offset; + uint64_t matched = *offset; int lvl, maxlvl; int error = 0; @@ -2745,16 +2732,36 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, maxlvl = dn->dn_phys->dn_nlevels; - for (lvl = minlvl; lvl <= maxlvl; lvl++) { + for (lvl = minlvl; lvl <= maxlvl; ) { error = dnode_next_offset_level(dn, flags, offset, lvl, blkfill, txg); - if (error != ESRCH) + if (error == 0 && lvl > minlvl) { + --lvl; + matched = *offset; + } else if (error == ESRCH && lvl < maxlvl && + dnode_next_block(dn, flags, &matched, lvl)) { + /* + * Continue search at next/prev offset in lvl+1 block. + * + * Usually we only search upwards at the start of the + * search as higher level blocks point at a matching + * minlvl block in most cases, but we backtrack if not. + * + * This can happen for txg > 0 searches if the block + * contains only BPs/dnodes freed at that txg. It also + * happens if we are still syncing out the tree, and + * some BP's at higher levels are not updated yet. + * + * We must adjust offset to avoid coming back to the + * same offset and getting stuck looping forever. This + * also deals with the case where offset is already at + * the beginning or end of the object. + */ + ++lvl; + *offset = matched; + } else { break; - } - - while (error == 0 && --lvl >= minlvl) { - error = dnode_next_offset_level(dn, - flags, offset, lvl, blkfill, txg); + } } /* @@ -2766,9 +2773,6 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, error = 0; } - if (error == 0 && (flags & DNODE_FIND_BACKWARDS ? - initial_offset < *offset : initial_offset > *offset)) - error = SET_ERROR(ESRCH); out: if (!(flags & DNODE_FIND_HAVELOCK)) rw_exit(&dn->dn_struct_rwlock); |