aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module/zfs/dbuf.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/module/zfs/dbuf.c')
-rw-r--r--sys/contrib/openzfs/module/zfs/dbuf.c398
1 files changed, 223 insertions, 175 deletions
diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
index 0a243a24266f..fccc4c5b5b94 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@@ -523,7 +523,7 @@ dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type)
return;
/* Only data blocks support the attachment of user data. */
- ASSERT(db->db_level == 0);
+ ASSERT0(db->db_level);
/* Clients must resolve a dbuf before attaching user data. */
ASSERT(db->db.db_data != NULL);
@@ -866,12 +866,33 @@ dbuf_evict_notify(uint64_t size)
* and grabbing the lock results in massive lock contention.
*/
if (size > dbuf_cache_target_bytes()) {
- if (size > dbuf_cache_hiwater_bytes())
+ /*
+ * Avoid calling dbuf_evict_one() from memory reclaim context
+ * (e.g. Linux kswapd, FreeBSD pagedaemon) to prevent deadlocks.
+ * Memory reclaim threads can get stuck waiting for the dbuf
+ * hash lock.
+ */
+ if (size > dbuf_cache_hiwater_bytes() &&
+ !current_is_reclaim_thread()) {
dbuf_evict_one();
+ }
cv_signal(&dbuf_evict_cv);
}
}
+/*
+ * Since dbuf cache size is a fraction of target ARC size, ARC calls this when
+ * its target size is reduced due to memory pressure.
+ */
+void
+dbuf_cache_reduce_target_size(void)
+{
+ uint64_t size = zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size);
+
+ if (size > dbuf_cache_target_bytes())
+ cv_signal(&dbuf_evict_cv);
+}
+
static int
dbuf_kstat_update(kstat_t *ksp, int rw)
{
@@ -1107,8 +1128,8 @@ dbuf_verify(dmu_buf_impl_t *db)
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
if (dn == NULL) {
- ASSERT(db->db_parent == NULL);
- ASSERT(db->db_blkptr == NULL);
+ ASSERT0P(db->db_parent);
+ ASSERT0P(db->db_blkptr);
} else {
ASSERT3U(db->db.db_object, ==, dn->dn_object);
ASSERT3P(db->db_objset, ==, dn->dn_objset);
@@ -1159,7 +1180,7 @@ dbuf_verify(dmu_buf_impl_t *db)
/* db is pointed to by the dnode */
/* ASSERT3U(db->db_blkid, <, dn->dn_nblkptr); */
if (DMU_OBJECT_IS_SPECIAL(db->db.db_object))
- ASSERT(db->db_parent == NULL);
+ ASSERT0P(db->db_parent);
else
ASSERT(db->db_parent != NULL);
if (db->db_blkid != DMU_SPILL_BLKID)
@@ -1172,16 +1193,9 @@ dbuf_verify(dmu_buf_impl_t *db)
ASSERT3U(db->db_parent->db_level, ==, db->db_level+1);
ASSERT3U(db->db_parent->db.db_object, ==,
db->db.db_object);
- /*
- * dnode_grow_indblksz() can make this fail if we don't
- * have the parent's rwlock. XXX indblksz no longer
- * grows. safe to do this now?
- */
- if (RW_LOCK_HELD(&db->db_parent->db_rwlock)) {
- ASSERT3P(db->db_blkptr, ==,
- ((blkptr_t *)db->db_parent->db.db_data +
- db->db_blkid % epb));
- }
+ ASSERT3P(db->db_blkptr, ==,
+ ((blkptr_t *)db->db_parent->db.db_data +
+ db->db_blkid % epb));
}
}
if ((db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr)) &&
@@ -1205,7 +1219,7 @@ dbuf_verify(dmu_buf_impl_t *db)
int i;
for (i = 0; i < db->db.db_size >> 3; i++) {
- ASSERT(buf[i] == 0);
+ ASSERT0(buf[i]);
}
} else {
blkptr_t *bps = db->db.db_data;
@@ -1229,11 +1243,9 @@ dbuf_verify(dmu_buf_impl_t *db)
DVA_IS_EMPTY(&bp->blk_dva[1]) &&
DVA_IS_EMPTY(&bp->blk_dva[2]));
ASSERT0(bp->blk_fill);
- ASSERT0(bp->blk_pad[0]);
- ASSERT0(bp->blk_pad[1]);
ASSERT(!BP_IS_EMBEDDED(bp));
ASSERT(BP_IS_HOLE(bp));
- ASSERT0(BP_GET_PHYSICAL_BIRTH(bp));
+ ASSERT0(BP_GET_RAW_PHYSICAL_BIRTH(bp));
}
}
}
@@ -1247,7 +1259,7 @@ dbuf_clear_data(dmu_buf_impl_t *db)
{
ASSERT(MUTEX_HELD(&db->db_mtx));
dbuf_evict_user(db);
- ASSERT3P(db->db_buf, ==, NULL);
+ ASSERT0P(db->db_buf);
db->db.db_data = NULL;
if (db->db_state != DB_NOFILL) {
db->db_state = DB_UNCACHED;
@@ -1275,33 +1287,6 @@ dbuf_alloc_arcbuf(dmu_buf_impl_t *db)
}
/*
- * Loan out an arc_buf for read. Return the loaned arc_buf.
- */
-arc_buf_t *
-dbuf_loan_arcbuf(dmu_buf_impl_t *db)
-{
- arc_buf_t *abuf;
-
- ASSERT(db->db_blkid != DMU_BONUS_BLKID);
- mutex_enter(&db->db_mtx);
- if (arc_released(db->db_buf) || zfs_refcount_count(&db->db_holds) > 1) {
- int blksz = db->db.db_size;
- spa_t *spa = db->db_objset->os_spa;
-
- mutex_exit(&db->db_mtx);
- abuf = arc_loan_buf(spa, B_FALSE, blksz);
- memcpy(abuf->b_data, db->db.db_data, blksz);
- } else {
- abuf = db->db_buf;
- arc_loan_inuse_buf(abuf, db);
- db->db_buf = NULL;
- dbuf_clear_data(db);
- mutex_exit(&db->db_mtx);
- }
- return (abuf);
-}
-
-/*
* Calculate which level n block references the data at the level 0 offset
* provided.
*/
@@ -1399,13 +1384,13 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
* All reads are synchronous, so we must have a hold on the dbuf
*/
ASSERT(zfs_refcount_count(&db->db_holds) > 0);
- ASSERT(db->db_buf == NULL);
- ASSERT(db->db.db_data == NULL);
+ ASSERT0P(db->db_buf);
+ ASSERT0P(db->db.db_data);
if (buf == NULL) {
/* i/o error */
ASSERT(zio == NULL || zio->io_error != 0);
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
- ASSERT3P(db->db_buf, ==, NULL);
+ ASSERT0P(db->db_buf);
db->db_state = DB_UNCACHED;
DTRACE_SET_STATE(db, "i/o error");
} else if (db->db_level == 0 && db->db_freed_in_flight) {
@@ -1437,6 +1422,7 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
static int
dbuf_read_bonus(dmu_buf_impl_t *db, dnode_t *dn)
{
+ void* db_data;
int bonuslen, max_bonuslen;
bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
@@ -1444,21 +1430,22 @@ dbuf_read_bonus(dmu_buf_impl_t *db, dnode_t *dn)
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT(DB_DNODE_HELD(db));
ASSERT3U(bonuslen, <=, db->db.db_size);
- db->db.db_data = kmem_alloc(max_bonuslen, KM_SLEEP);
+ db_data = kmem_alloc(max_bonuslen, KM_SLEEP);
arc_space_consume(max_bonuslen, ARC_SPACE_BONUS);
if (bonuslen < max_bonuslen)
- memset(db->db.db_data, 0, max_bonuslen);
+ memset(db_data, 0, max_bonuslen);
if (bonuslen)
- memcpy(db->db.db_data, DN_BONUS(dn->dn_phys), bonuslen);
+ memcpy(db_data, DN_BONUS(dn->dn_phys), bonuslen);
+ db->db.db_data = db_data;
db->db_state = DB_CACHED;
DTRACE_SET_STATE(db, "bonus buffer filled");
return (0);
}
static void
-dbuf_handle_indirect_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *dbbp)
+dbuf_handle_indirect_hole(void *data, dnode_t *dn, blkptr_t *dbbp)
{
- blkptr_t *bps = db->db.db_data;
+ blkptr_t *bps = data;
uint32_t indbs = 1ULL << dn->dn_indblkshift;
int n_bps = indbs >> SPA_BLKPTRSHIFT;
@@ -1483,6 +1470,7 @@ static int
dbuf_read_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *bp)
{
ASSERT(MUTEX_HELD(&db->db_mtx));
+ arc_buf_t *db_data;
int is_hole = bp == NULL || BP_IS_HOLE(bp);
/*
@@ -1495,13 +1483,14 @@ dbuf_read_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *bp)
is_hole = dnode_block_freed(dn, db->db_blkid) || BP_IS_HOLE(bp);
if (is_hole) {
- dbuf_set_data(db, dbuf_alloc_arcbuf(db));
- memset(db->db.db_data, 0, db->db.db_size);
+ db_data = dbuf_alloc_arcbuf(db);
+ memset(db_data->b_data, 0, db->db.db_size);
if (bp != NULL && db->db_level > 0 && BP_IS_HOLE(bp) &&
BP_GET_LOGICAL_BIRTH(bp) != 0) {
- dbuf_handle_indirect_hole(db, dn, bp);
+ dbuf_handle_indirect_hole(db_data->b_data, dn, bp);
}
+ dbuf_set_data(db, db_data);
db->db_state = DB_CACHED;
DTRACE_SET_STATE(db, "hole read satisfied");
return (0);
@@ -1522,7 +1511,8 @@ dbuf_read_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *bp)
* decrypt / authenticate them when we need to read an encrypted bonus buffer.
*/
static int
-dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, dnode_t *dn, uint32_t flags)
+dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, dnode_t *dn,
+ dmu_flags_t flags)
{
objset_t *os = db->db_objset;
dmu_buf_impl_t *dndb;
@@ -1530,7 +1520,7 @@ dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, dnode_t *dn, uint32_t flags)
zbookmark_phys_t zb;
int err;
- if ((flags & DB_RF_NO_DECRYPT) != 0 ||
+ if ((flags & DMU_READ_NO_DECRYPT) != 0 ||
!os->os_encrypted || os->os_raw_receive ||
(dndb = dn->dn_dbuf) == NULL)
return (0);
@@ -1584,7 +1574,7 @@ dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, dnode_t *dn, uint32_t flags)
* returning.
*/
static int
-dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags,
+dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, dmu_flags_t flags,
db_lock_type_t dblt, blkptr_t *bp, const void *tag)
{
zbookmark_phys_t zb;
@@ -1594,7 +1584,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags,
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
- ASSERT(db->db_buf == NULL);
+ ASSERT0P(db->db_buf);
ASSERT(db->db_parent == NULL ||
RW_LOCK_HELD(&db->db_parent->db_rwlock));
@@ -1631,7 +1621,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags,
*/
if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bp)) {
spa_log_error(db->db_objset->os_spa, &zb,
- BP_GET_LOGICAL_BIRTH(bp));
+ BP_GET_PHYSICAL_BIRTH(bp));
err = SET_ERROR(EIO);
goto early_unlock;
}
@@ -1650,7 +1640,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags,
zio_flags = (flags & DB_RF_CANFAIL) ?
ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED;
- if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(bp))
+ if ((flags & DMU_READ_NO_DECRYPT) && BP_IS_PROTECTED(bp))
zio_flags |= ZIO_FLAG_RAW;
/*
@@ -1692,7 +1682,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT(db->db.db_data != NULL);
- ASSERT(db->db_level == 0);
+ ASSERT0(db->db_level);
ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT);
if (dr == NULL ||
@@ -1751,7 +1741,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
}
int
-dbuf_read(dmu_buf_impl_t *db, zio_t *pio, uint32_t flags)
+dbuf_read(dmu_buf_impl_t *db, zio_t *pio, dmu_flags_t flags)
{
dnode_t *dn;
boolean_t miss = B_TRUE, need_wait = B_FALSE, prefetch;
@@ -1771,12 +1761,14 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *pio, uint32_t flags)
goto done;
prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
- (flags & DB_RF_NOPREFETCH) == 0;
+ (flags & DMU_READ_NO_PREFETCH) == 0;
mutex_enter(&db->db_mtx);
- if (flags & DB_RF_PARTIAL_FIRST)
+ if (!(flags & (DMU_UNCACHEDIO | DMU_KEEP_CACHING)))
+ db->db_pending_evict = B_FALSE;
+ if (flags & DMU_PARTIAL_FIRST)
db->db_partial_read = B_TRUE;
- else if (!(flags & DB_RF_PARTIAL_MORE))
+ else if (!(flags & (DMU_PARTIAL_MORE | DMU_KEEP_CACHING)))
db->db_partial_read = B_FALSE;
miss = (db->db_state != DB_CACHED);
@@ -1817,7 +1809,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *pio, uint32_t flags)
* unauthenticated blocks, which will verify their MAC if
* the key is now available.
*/
- if ((flags & DB_RF_NO_DECRYPT) == 0 && db->db_buf != NULL &&
+ if ((flags & DMU_READ_NO_DECRYPT) == 0 && db->db_buf != NULL &&
(arc_is_encrypted(db->db_buf) ||
arc_is_unauthenticated(db->db_buf) ||
arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) {
@@ -1865,7 +1857,8 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *pio, uint32_t flags)
if (err == 0 && prefetch) {
dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE, miss,
- flags & DB_RF_HAVESTRUCT);
+ flags & DB_RF_HAVESTRUCT, (flags & DMU_UNCACHEDIO) ||
+ db->db_pending_evict);
}
DB_DNODE_EXIT(db);
@@ -1897,16 +1890,19 @@ done:
}
static void
-dbuf_noread(dmu_buf_impl_t *db)
+dbuf_noread(dmu_buf_impl_t *db, dmu_flags_t flags)
{
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
mutex_enter(&db->db_mtx);
+ if (!(flags & (DMU_UNCACHEDIO | DMU_KEEP_CACHING)))
+ db->db_pending_evict = B_FALSE;
+ db->db_partial_read = B_FALSE;
while (db->db_state == DB_READ || db->db_state == DB_FILL)
cv_wait(&db->db_changed, &db->db_mtx);
if (db->db_state == DB_UNCACHED) {
- ASSERT(db->db_buf == NULL);
- ASSERT(db->db.db_data == NULL);
+ ASSERT0P(db->db_buf);
+ ASSERT0P(db->db.db_data);
dbuf_set_data(db, dbuf_alloc_arcbuf(db));
db->db_state = DB_FILL;
DTRACE_SET_STATE(db, "assigning filled buffer");
@@ -1933,7 +1929,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
* comes from dbuf_dirty() callers who must also hold a range lock.
*/
ASSERT(dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC);
- ASSERT(db->db_level == 0);
+ ASSERT0(db->db_level);
if (db->db_blkid == DMU_BONUS_BLKID ||
dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN)
@@ -1998,7 +1994,7 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
mutex_enter(&dn->dn_dbufs_mtx);
db = avl_find(&dn->dn_dbufs, db_search, &where);
- ASSERT3P(db, ==, NULL);
+ ASSERT0P(db);
db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER);
@@ -2021,7 +2017,7 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
if (db->db_state == DB_UNCACHED ||
db->db_state == DB_NOFILL ||
db->db_state == DB_EVICTING) {
- ASSERT(db->db.db_data == NULL);
+ ASSERT0P(db->db.db_data);
mutex_exit(&db->db_mtx);
continue;
}
@@ -2164,6 +2160,12 @@ dbuf_redirty(dbuf_dirty_record_t *dr)
ASSERT(arc_released(db->db_buf));
arc_buf_thaw(db->db_buf);
}
+
+ /*
+ * Clear the rewrite flag since this is now a logical
+ * modification.
+ */
+ dr->dt.dl.dr_rewrite = B_FALSE;
}
}
@@ -2214,8 +2216,8 @@ dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx)
kmem_free(dr, sizeof (*dr));
return (NULL);
}
- int err = dbuf_read(parent_db, NULL,
- (DB_RF_NOPREFETCH | DB_RF_CANFAIL));
+ int err = dbuf_read(parent_db, NULL, DB_RF_CANFAIL |
+ DMU_READ_NO_PREFETCH);
if (err != 0) {
dbuf_rele(parent_db, FTAG);
kmem_free(dr, sizeof (*dr));
@@ -2268,14 +2270,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
if (dn->dn_objset->os_dsl_dataset != NULL)
rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, FTAG);
#endif
- /*
- * We make this assert for private objects as well, but after we
- * check if we're already dirty. They are allowed to re-dirty
- * in syncing context.
- */
- ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT ||
- dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
- (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
mutex_enter(&db->db_mtx);
/*
@@ -2287,12 +2281,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
db->db_state == DB_CACHED || db->db_state == DB_FILL ||
db->db_state == DB_NOFILL);
- mutex_enter(&dn->dn_mtx);
- dnode_set_dirtyctx(dn, tx, db);
- if (tx->tx_txg > dn->dn_dirty_txg)
- dn->dn_dirty_txg = tx->tx_txg;
- mutex_exit(&dn->dn_mtx);
-
if (db->db_blkid == DMU_SPILL_BLKID)
dn->dn_have_spill = B_TRUE;
@@ -2311,13 +2299,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
return (dr_next);
}
- /*
- * Only valid if not already dirty.
- */
- ASSERT(dn->dn_object == 0 ||
- dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
- (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
-
ASSERT3U(dn->dn_nlevels, >, db->db_level);
/*
@@ -2520,6 +2501,7 @@ dbuf_undirty_bonus(dbuf_dirty_record_t *dr)
{
dmu_buf_impl_t *db = dr->dr_dbuf;
+ ASSERT(MUTEX_HELD(&db->db_mtx));
if (dr->dt.dl.dr_data != db->db.db_data) {
struct dnode *dn = dr->dr_dnode;
int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
@@ -2554,12 +2536,13 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
/*
* Due to our use of dn_nlevels below, this can only be called
- * in open context, unless we are operating on the MOS.
- * From syncing context, dn_nlevels may be different from the
- * dn_nlevels used when dbuf was dirtied.
+ * in open context, unless we are operating on the MOS or it's
+ * a special object. From syncing context, dn_nlevels may be
+ * different from the dn_nlevels used when dbuf was dirtied.
*/
ASSERT(db->db_objset ==
dmu_objset_pool(db->db_objset)->dp_meta_objset ||
+ DMU_OBJECT_IS_SPECIAL(db->db.db_object) ||
txg != spa_syncing_txg(dmu_objset_spa(db->db_objset)));
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
ASSERT0(db->db_level);
@@ -2642,8 +2625,8 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
return (B_FALSE);
}
-static void
-dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
+void
+dmu_buf_will_dirty_flags(dmu_buf_t *db_fake, dmu_tx_t *tx, dmu_flags_t flags)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
boolean_t undirty = B_FALSE;
@@ -2695,7 +2678,7 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
* not the uderlying block that is being replaced. dbuf_undirty() will
* do brt_pending_remove() before removing the dirty record.
*/
- (void) dbuf_read(db, NULL, flags);
+ (void) dbuf_read(db, NULL, flags | DB_RF_MUST_SUCCEED);
if (undirty) {
mutex_enter(&db->db_mtx);
VERIFY(!dbuf_undirty(db, tx));
@@ -2707,8 +2690,39 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
void
dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
{
- dmu_buf_will_dirty_impl(db_fake,
- DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH, tx);
+ dmu_buf_will_dirty_flags(db_fake, tx, DMU_READ_NO_PREFETCH);
+}
+
+void
+dmu_buf_will_rewrite(dmu_buf_t *db_fake, dmu_tx_t *tx)
+{
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
+
+ ASSERT(tx->tx_txg != 0);
+ ASSERT(!zfs_refcount_is_zero(&db->db_holds));
+
+ /*
+ * If the dbuf is already dirty in this txg, it will be written
+ * anyway, so there's nothing to do.
+ */
+ mutex_enter(&db->db_mtx);
+ if (dbuf_find_dirty_eq(db, tx->tx_txg) != NULL) {
+ mutex_exit(&db->db_mtx);
+ return;
+ }
+ mutex_exit(&db->db_mtx);
+
+ /*
+ * The dbuf is not dirty, so we need to make it dirty and
+ * mark it for rewrite (preserve logical birth time).
+ */
+ dmu_buf_will_dirty_flags(db_fake, tx, DMU_READ_NO_PREFETCH);
+
+ mutex_enter(&db->db_mtx);
+ dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, tx->tx_txg);
+ if (dr != NULL && db->db_level == 0)
+ dr->dt.dl.dr_rewrite = B_TRUE;
+ mutex_exit(&db->db_mtx);
}
boolean_t
@@ -2862,8 +2876,8 @@ dmu_buf_will_clone_or_dio(dmu_buf_t *db_fake, dmu_tx_t *tx)
dbuf_clear_data(db);
}
- ASSERT3P(db->db_buf, ==, NULL);
- ASSERT3P(db->db.db_data, ==, NULL);
+ ASSERT0P(db->db_buf);
+ ASSERT0P(db->db.db_data);
db->db_state = DB_NOFILL;
DTRACE_SET_STATE(db,
@@ -2872,7 +2886,7 @@ dmu_buf_will_clone_or_dio(dmu_buf_t *db_fake, dmu_tx_t *tx)
DBUF_VERIFY(db);
mutex_exit(&db->db_mtx);
- dbuf_noread(db);
+ dbuf_noread(db, DMU_KEEP_CACHING);
(void) dbuf_dirty(db, tx);
}
@@ -2886,18 +2900,19 @@ dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
DTRACE_SET_STATE(db, "allocating NOFILL buffer");
mutex_exit(&db->db_mtx);
- dbuf_noread(db);
+ dbuf_noread(db, DMU_KEEP_CACHING);
(void) dbuf_dirty(db, tx);
}
void
-dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail)
+dmu_buf_will_fill_flags(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail,
+ dmu_flags_t flags)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
ASSERT(tx->tx_txg != 0);
- ASSERT(db->db_level == 0);
+ ASSERT0(db->db_level);
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT ||
@@ -2913,7 +2928,7 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail)
*/
if (canfail && dr) {
mutex_exit(&db->db_mtx);
- dmu_buf_will_dirty(db_fake, tx);
+ dmu_buf_will_dirty_flags(db_fake, tx, flags);
return;
}
/*
@@ -2929,10 +2944,16 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail)
}
mutex_exit(&db->db_mtx);
- dbuf_noread(db);
+ dbuf_noread(db, flags);
(void) dbuf_dirty(db, tx);
}
+void
+dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail)
+{
+ dmu_buf_will_fill_flags(db_fake, tx, canfail, DMU_READ_NO_PREFETCH);
+}
+
/*
* This function is effectively the same as dmu_buf_will_dirty(), but
* indicates the caller expects raw encrypted data in the db, and provides
@@ -2955,8 +2976,8 @@ dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder,
ASSERT0(db->db_level);
ASSERT(db->db_objset->os_raw_receive);
- dmu_buf_will_dirty_impl(db_fake,
- DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_NO_DECRYPT, tx);
+ dmu_buf_will_dirty_flags(db_fake, tx,
+ DMU_READ_NO_PREFETCH | DMU_READ_NO_DECRYPT);
dr = dbuf_find_dirty_eq(db, tx->tx_txg);
@@ -3098,11 +3119,12 @@ dmu_buf_redact(dmu_buf_t *dbuf, dmu_tx_t *tx)
* by anybody except our caller. Otherwise copy arcbuf's contents to dbuf.
*/
void
-dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
+dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx,
+ dmu_flags_t flags)
{
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
- ASSERT(db->db_level == 0);
+ ASSERT0(db->db_level);
ASSERT3U(dbuf_is_metadata(db), ==, arc_is_metadata(buf));
ASSERT(buf != NULL);
ASSERT3U(arc_buf_lsize(buf), ==, db->db.db_size);
@@ -3112,6 +3134,9 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
ASSERT(arc_released(buf));
mutex_enter(&db->db_mtx);
+ if (!(flags & (DMU_UNCACHEDIO | DMU_KEEP_CACHING)))
+ db->db_pending_evict = B_FALSE;
+ db->db_partial_read = B_FALSE;
while (db->db_state == DB_READ || db->db_state == DB_FILL)
cv_wait(&db->db_changed, &db->db_mtx);
@@ -3164,7 +3189,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
VERIFY(!dbuf_undirty(db, tx));
db->db_state = DB_UNCACHED;
}
- ASSERT(db->db_buf == NULL);
+ ASSERT0P(db->db_buf);
dbuf_set_data(db, buf);
db->db_state = DB_FILL;
DTRACE_SET_STATE(db, "filling assigned arcbuf");
@@ -3224,7 +3249,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
}
ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
- ASSERT(db->db_data_pending == NULL);
+ ASSERT0P(db->db_data_pending);
ASSERT(list_is_empty(&db->db_dirty_records));
db->db_state = DB_EVICTING;
@@ -3276,11 +3301,11 @@ dbuf_destroy(dmu_buf_impl_t *db)
db->db_parent = NULL;
- ASSERT(db->db_buf == NULL);
- ASSERT(db->db.db_data == NULL);
- ASSERT(db->db_hash_next == NULL);
- ASSERT(db->db_blkptr == NULL);
- ASSERT(db->db_data_pending == NULL);
+ ASSERT0P(db->db_buf);
+ ASSERT0P(db->db.db_data);
+ ASSERT0P(db->db_hash_next);
+ ASSERT0P(db->db_blkptr);
+ ASSERT0P(db->db_data_pending);
ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE);
ASSERT(!multilist_link_active(&db->db_cache_link));
@@ -3366,19 +3391,15 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
if (err)
return (err);
- err = dbuf_read(*parentp, NULL,
- (DB_RF_HAVESTRUCT | DB_RF_NOPREFETCH | DB_RF_CANFAIL));
+ err = dbuf_read(*parentp, NULL, DB_RF_CANFAIL |
+ DB_RF_HAVESTRUCT | DMU_READ_NO_PREFETCH);
if (err) {
dbuf_rele(*parentp, NULL);
*parentp = NULL;
return (err);
}
- rw_enter(&(*parentp)->db_rwlock, RW_READER);
*bpp = ((blkptr_t *)(*parentp)->db.db_data) +
(blkid & ((1ULL << epbs) - 1));
- if (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))
- ASSERT(BP_IS_HOLE(*bpp));
- rw_exit(&(*parentp)->db_rwlock);
return (0);
} else {
/* the block is referenced from the dnode */
@@ -3426,7 +3447,8 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
db->db_user = NULL;
db->db_user_immediate_evict = FALSE;
db->db_freed_in_flight = FALSE;
- db->db_pending_evict = FALSE;
+ db->db_pending_evict = TRUE;
+ db->db_partial_read = FALSE;
if (blkid == DMU_BONUS_BLKID) {
ASSERT3P(parent, ==, dn->dn_dbuf);
@@ -3560,12 +3582,9 @@ dbuf_issue_final_prefetch_done(zio_t *zio, const zbookmark_phys_t *zb,
static void
dbuf_issue_final_prefetch(dbuf_prefetch_arg_t *dpa, blkptr_t *bp)
{
- ASSERT(!BP_IS_REDACTED(bp) ||
- dsl_dataset_feature_is_active(
- dpa->dpa_dnode->dn_objset->os_dsl_dataset,
- SPA_FEATURE_REDACTED_DATASETS));
-
- if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp) || BP_IS_REDACTED(bp))
+ ASSERT(!BP_IS_HOLE(bp));
+ ASSERT(!BP_IS_REDACTED(bp));
+ if (BP_IS_EMBEDDED(bp))
return (dbuf_prefetch_fini(dpa, B_FALSE));
int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
@@ -3640,8 +3659,8 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
dbuf_prefetch_fini(dpa, B_TRUE);
return;
}
- (void) dbuf_read(db, NULL,
- DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_HAVESTRUCT);
+ (void) dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT |
+ DMU_READ_NO_PREFETCH);
dbuf_rele(db, FTAG);
}
@@ -3651,10 +3670,10 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
blkptr_t *bp = ((blkptr_t *)abuf->b_data) +
P2PHASE(nextblkid, 1ULL << dpa->dpa_epbs);
- ASSERT(!BP_IS_REDACTED(bp) || (dpa->dpa_dnode &&
+ ASSERT(!BP_IS_REDACTED(bp) || dpa->dpa_dnode == NULL ||
dsl_dataset_feature_is_active(
dpa->dpa_dnode->dn_objset->os_dsl_dataset,
- SPA_FEATURE_REDACTED_DATASETS)));
+ SPA_FEATURE_REDACTED_DATASETS));
if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp)) {
arc_buf_destroy(abuf, private);
dbuf_prefetch_fini(dpa, B_TRUE);
@@ -3667,8 +3686,14 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
zbookmark_phys_t zb;
/* flag if L2ARC eligible, l2arc_noprefetch then decides */
- if (dpa->dpa_aflags & ARC_FLAG_L2CACHE)
- iter_aflags |= ARC_FLAG_L2CACHE;
+ if (dpa->dpa_dnode) {
+ if (dnode_level_is_l2cacheable(bp, dpa->dpa_dnode,
+ dpa->dpa_curlevel))
+ iter_aflags |= ARC_FLAG_L2CACHE;
+ } else {
+ if (dpa->dpa_aflags & ARC_FLAG_L2CACHE)
+ iter_aflags |= ARC_FLAG_L2CACHE;
+ }
ASSERT3U(dpa->dpa_curlevel, ==, BP_GET_LEVEL(bp));
@@ -3807,7 +3832,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
zbookmark_phys_t zb;
/* flag if L2ARC eligible, l2arc_noprefetch then decides */
- if (dnode_level_is_l2cacheable(&bp, dn, level))
+ if (dnode_level_is_l2cacheable(&bp, dn, curlevel))
iter_aflags |= ARC_FLAG_L2CACHE;
SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
@@ -3851,6 +3876,7 @@ dbuf_hold_copy(dnode_t *dn, dmu_buf_impl_t *db)
{
dbuf_dirty_record_t *dr = db->db_data_pending;
arc_buf_t *data = dr->dt.dl.dr_data;
+ arc_buf_t *db_data;
enum zio_compress compress_type = arc_get_compression(data);
uint8_t complevel = arc_get_complevel(data);
@@ -3861,22 +3887,21 @@ dbuf_hold_copy(dnode_t *dn, dmu_buf_impl_t *db)
uint8_t mac[ZIO_DATA_MAC_LEN];
arc_get_raw_params(data, &byteorder, salt, iv, mac);
- dbuf_set_data(db, arc_alloc_raw_buf(dn->dn_objset->os_spa, db,
+ db_data = arc_alloc_raw_buf(dn->dn_objset->os_spa, db,
dmu_objset_id(dn->dn_objset), byteorder, salt, iv, mac,
dn->dn_type, arc_buf_size(data), arc_buf_lsize(data),
- compress_type, complevel));
+ compress_type, complevel);
} else if (compress_type != ZIO_COMPRESS_OFF) {
- dbuf_set_data(db, arc_alloc_compressed_buf(
+ db_data = arc_alloc_compressed_buf(
dn->dn_objset->os_spa, db, arc_buf_size(data),
- arc_buf_lsize(data), compress_type, complevel));
+ arc_buf_lsize(data), compress_type, complevel);
} else {
- dbuf_set_data(db, arc_alloc_buf(dn->dn_objset->os_spa, db,
- DBUF_GET_BUFC_TYPE(db), db->db.db_size));
+ db_data = arc_alloc_buf(dn->dn_objset->os_spa, db,
+ DBUF_GET_BUFC_TYPE(db), db->db.db_size);
}
+ memcpy(db_data->b_data, data->b_data, arc_buf_size(data));
- rw_enter(&db->db_rwlock, RW_WRITER);
- memcpy(db->db.db_data, data->b_data, arc_buf_size(data));
- rw_exit(&db->db_rwlock);
+ dbuf_set_data(db, db_data);
}
/*
@@ -3900,7 +3925,8 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
ASSERT(blkid != DMU_BONUS_BLKID);
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
- ASSERT3U(dn->dn_nlevels, >, level);
+ if (!fail_sparse)
+ ASSERT3U(dn->dn_nlevels, >, level);
*dbp = NULL;
@@ -3914,7 +3940,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
if (fail_uncached)
return (SET_ERROR(ENOENT));
- ASSERT3P(parent, ==, NULL);
+ ASSERT0P(parent);
err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp);
if (fail_sparse) {
if (err == 0 && bp && BP_IS_HOLE(bp))
@@ -3937,6 +3963,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
if (db->db_buf != NULL) {
arc_buf_access(db->db_buf);
+ ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT3P(db->db.db_data, ==, db->db_buf->b_data);
}
@@ -4017,9 +4044,10 @@ dbuf_create_bonus(dnode_t *dn)
{
ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
- ASSERT(dn->dn_bonus == NULL);
+ ASSERT0P(dn->dn_bonus);
dn->dn_bonus = dbuf_create(dn, 0, DMU_BONUS_BLKID, dn->dn_dbuf, NULL,
dbuf_hash(dn->dn_objset, dn->dn_object, 0, DMU_BONUS_BLKID));
+ dn->dn_bonus->db_pending_evict = FALSE;
}
int
@@ -4185,8 +4213,11 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, const void *tag, boolean_t evicting)
* This dbuf has anonymous data associated with it.
*/
dbuf_destroy(db);
- } else if (!(DBUF_IS_CACHEABLE(db) || db->db_partial_read) ||
- db->db_pending_evict) {
+ } else if (!db->db_partial_read && !DBUF_IS_CACHEABLE(db)) {
+ /*
+ * We don't expect more accesses to the dbuf, and it
+ * is either not cacheable or was marked for eviction.
+ */
dbuf_destroy(db);
} else if (!multilist_link_active(&db->db_cache_link)) {
ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE);
@@ -4365,7 +4396,7 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
* inappropriate to hook it in (i.e., nlevels mismatch).
*/
ASSERT(db->db_blkid < dn->dn_phys->dn_nblkptr);
- ASSERT(db->db_parent == NULL);
+ ASSERT0P(db->db_parent);
db->db_parent = dn->dn_dbuf;
db->db_blkptr = &dn->dn_phys->dn_blkptr[db->db_blkid];
DBUF_VERIFY(db);
@@ -4426,7 +4457,7 @@ dbuf_prepare_encrypted_dnode_leaf(dbuf_dirty_record_t *dr)
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT3U(db->db.db_object, ==, DMU_META_DNODE_OBJECT);
- ASSERT3U(db->db_level, ==, 0);
+ ASSERT0(db->db_level);
if (!db->db_objset->os_raw_receive && arc_is_encrypted(db->db_buf)) {
zbookmark_phys_t zb;
@@ -4537,7 +4568,7 @@ dbuf_sync_leaf_verify_bonus_dnode(dbuf_dirty_record_t *dr)
/* ensure that everything is zero after our data */
for (; datap_end < datap_max; datap_end++)
- ASSERT(*datap_end == 0);
+ ASSERT0(*datap_end);
#endif
}
@@ -4545,7 +4576,7 @@ static blkptr_t *
dbuf_lightweight_bp(dbuf_dirty_record_t *dr)
{
/* This must be a lightweight dirty record. */
- ASSERT3P(dr->dr_dbuf, ==, NULL);
+ ASSERT0P(dr->dr_dbuf);
dnode_t *dn = dr->dr_dnode;
if (dn->dn_phys->dn_nlevels == 1) {
@@ -4688,7 +4719,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
*/
if (db->db_state == DB_UNCACHED) {
/* This buffer has been freed since it was dirtied */
- ASSERT3P(db->db.db_data, ==, NULL);
+ ASSERT0P(db->db.db_data);
} else if (db->db_state == DB_FILL) {
/* This buffer was freed and is now being re-filled */
ASSERT(db->db.db_data != dr->dt.dl.dr_data);
@@ -4705,9 +4736,9 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
*/
dbuf_dirty_record_t *dr_head =
list_head(&db->db_dirty_records);
- ASSERT3P(db->db_buf, ==, NULL);
- ASSERT3P(db->db.db_data, ==, NULL);
- ASSERT3P(dr_head->dt.dl.dr_data, ==, NULL);
+ ASSERT0P(db->db_buf);
+ ASSERT0P(db->db.db_data);
+ ASSERT0P(dr_head->dt.dl.dr_data);
ASSERT3U(dr_head->dt.dl.dr_override_state, ==, DR_OVERRIDDEN);
} else {
ASSERT(db->db_state == DB_CACHED || db->db_state == DB_NOFILL);
@@ -4892,7 +4923,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
dnode_diduse_space(dn, delta - zio->io_prev_space_delta);
zio->io_prev_space_delta = delta;
- if (BP_GET_LOGICAL_BIRTH(bp) != 0) {
+ if (BP_GET_BIRTH(bp) != 0) {
ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
BP_GET_TYPE(bp) == dn->dn_type) ||
(db->db_blkid == DMU_SPILL_BLKID &&
@@ -5179,7 +5210,7 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx)
ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)));
drica.drica_os = dn->dn_objset;
- drica.drica_blk_birth = BP_GET_LOGICAL_BIRTH(bp);
+ drica.drica_blk_birth = BP_GET_BIRTH(bp);
drica.drica_tx = tx;
if (spa_remap_blkptr(spa, &bp_copy, dbuf_remap_impl_callback,
&drica)) {
@@ -5194,8 +5225,7 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx)
if (dn->dn_objset != spa_meta_objset(spa)) {
dsl_dataset_t *ds = dmu_objset_ds(dn->dn_objset);
if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
- BP_GET_LOGICAL_BIRTH(bp) >
- ds->ds_dir->dd_origin_txg) {
+ BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg) {
ASSERT(!BP_IS_EMBEDDED(bp));
ASSERT(dsl_dir_is_clone(ds->ds_dir));
ASSERT(spa_feature_is_enabled(spa,
@@ -5313,7 +5343,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
}
ASSERT(db->db_level == 0 || data == db->db_buf);
- ASSERT3U(BP_GET_LOGICAL_BIRTH(db->db_blkptr), <=, txg);
+ ASSERT3U(BP_GET_BIRTH(db->db_blkptr), <=, txg);
ASSERT(pio);
SET_BOOKMARK(&zb, os->os_dsl_dataset ?
@@ -5327,6 +5357,24 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
/*
+ * Set rewrite properties for zfs_rewrite() operations.
+ */
+ if (db->db_level == 0 && dr->dt.dl.dr_rewrite) {
+ zp.zp_rewrite = B_TRUE;
+
+ /*
+ * Mark physical rewrite feature for activation.
+ * This will be activated automatically during dataset sync.
+ */
+ dsl_dataset_t *ds = os->os_dsl_dataset;
+ if (!dsl_dataset_feature_is_active(ds,
+ SPA_FEATURE_PHYSICAL_REWRITE)) {
+ ds->ds_feature_activation[
+ SPA_FEATURE_PHYSICAL_REWRITE] = (void *)B_TRUE;
+ }
+ }
+
+ /*
* We copy the blkptr now (rather than when we instantiate the dirty
* record), because its value can change between open context and
* syncing context. We do not need to hold dn_struct_rwlock to read
@@ -5387,7 +5435,6 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
EXPORT_SYMBOL(dbuf_find);
EXPORT_SYMBOL(dbuf_is_metadata);
EXPORT_SYMBOL(dbuf_destroy);
-EXPORT_SYMBOL(dbuf_loan_arcbuf);
EXPORT_SYMBOL(dbuf_whichblock);
EXPORT_SYMBOL(dbuf_read);
EXPORT_SYMBOL(dbuf_unoverride);
@@ -5397,6 +5444,7 @@ EXPORT_SYMBOL(dbuf_release_bp);
EXPORT_SYMBOL(dbuf_dirty);
EXPORT_SYMBOL(dmu_buf_set_crypt_params);
EXPORT_SYMBOL(dmu_buf_will_dirty);
+EXPORT_SYMBOL(dmu_buf_will_rewrite);
EXPORT_SYMBOL(dmu_buf_is_dirty);
EXPORT_SYMBOL(dmu_buf_will_clone_or_dio);
EXPORT_SYMBOL(dmu_buf_will_not_fill);