diff options
Diffstat (limited to 'sys/contrib/openzfs/module/zfs/brt.c')
| -rw-r--r-- | sys/contrib/openzfs/module/zfs/brt.c | 118 |
1 files changed, 93 insertions, 25 deletions
diff --git a/sys/contrib/openzfs/module/zfs/brt.c b/sys/contrib/openzfs/module/zfs/brt.c index 27d9ed7ea2b0..08a6bd52ab31 100644 --- a/sys/contrib/openzfs/module/zfs/brt.c +++ b/sys/contrib/openzfs/module/zfs/brt.c @@ -260,8 +260,8 @@ static int brt_zap_prefetch = 1; #define BRT_DEBUG(...) do { } while (0) #endif -static int brt_zap_default_bs = 12; -static int brt_zap_default_ibs = 12; +static int brt_zap_default_bs = 13; +static int brt_zap_default_ibs = 13; static kstat_t *brt_ksp; @@ -454,6 +454,7 @@ brt_vdev_create(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx) VERIFY(mos_entries != 0); VERIFY0(dnode_hold(spa->spa_meta_objset, mos_entries, brtvd, &brtvd->bv_mos_entries_dnode)); + dnode_set_storage_type(brtvd->bv_mos_entries_dnode, DMU_OT_DDT_ZAP); rw_enter(&brtvd->bv_mos_entries_lock, RW_WRITER); brtvd->bv_mos_entries = mos_entries; rw_exit(&brtvd->bv_mos_entries_lock); @@ -478,6 +479,18 @@ brt_vdev_create(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx) sizeof (uint64_t), 1, &brtvd->bv_mos_brtvdev, tx)); BRT_DEBUG("Pool directory object created, object=%s", name); + /* + * Activate the endian-fixed feature if this is the first BRT ZAP + * (i.e., BLOCK_CLONING is not yet active) and the feature is enabled. + */ + if (spa_feature_is_enabled(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN) && + !spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) { + spa_feature_incr(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN, tx); + } else if (spa_feature_is_active(spa, + SPA_FEATURE_BLOCK_CLONING_ENDIAN)) { + spa_feature_incr(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN, tx); + } + spa_feature_incr(spa, SPA_FEATURE_BLOCK_CLONING, tx); } @@ -496,8 +509,8 @@ brt_vdev_realloc(spa_t *spa, brt_vdev_t *brtvd) size = (vdev_get_min_asize(vd) - 1) / spa->spa_brt_rangesize + 1; spa_config_exit(spa, SCL_VDEV, FTAG); - entcount = vmem_zalloc(sizeof (entcount[0]) * size, KM_SLEEP); nblocks = BRT_RANGESIZE_TO_NBLOCKS(size); + entcount = vmem_zalloc(nblocks * BRT_BLOCKSIZE, KM_SLEEP); bitmap = kmem_zalloc(BT_SIZEOFMAP(nblocks), KM_SLEEP); if (!brtvd->bv_initiated) { @@ -518,9 +531,8 @@ brt_vdev_realloc(spa_t *spa, brt_vdev_t *brtvd) memcpy(entcount, brtvd->bv_entcount, sizeof (entcount[0]) * MIN(size, brtvd->bv_size)); - vmem_free(brtvd->bv_entcount, - sizeof (entcount[0]) * brtvd->bv_size); onblocks = BRT_RANGESIZE_TO_NBLOCKS(brtvd->bv_size); + vmem_free(brtvd->bv_entcount, onblocks * BRT_BLOCKSIZE); memcpy(bitmap, brtvd->bv_bitmap, MIN(BT_SIZEOFMAP(nblocks), BT_SIZEOFMAP(onblocks))); kmem_free(brtvd->bv_bitmap, BT_SIZEOFMAP(onblocks)); @@ -569,13 +581,14 @@ brt_vdev_load(spa_t *spa, brt_vdev_t *brtvd) */ error = dmu_read(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, 0, MIN(brtvd->bv_size, bvphys->bvp_size) * sizeof (uint16_t), - brtvd->bv_entcount, DMU_READ_NO_PREFETCH); + brtvd->bv_entcount, DMU_READ_NO_PREFETCH | DMU_UNCACHEDIO); if (error != 0) return (error); ASSERT(bvphys->bvp_mos_entries != 0); VERIFY0(dnode_hold(spa->spa_meta_objset, bvphys->bvp_mos_entries, brtvd, &brtvd->bv_mos_entries_dnode)); + dnode_set_storage_type(brtvd->bv_mos_entries_dnode, DMU_OT_DDT_ZAP); rw_enter(&brtvd->bv_mos_entries_lock, RW_WRITER); brtvd->bv_mos_entries = bvphys->bvp_mos_entries; rw_exit(&brtvd->bv_mos_entries_lock); @@ -601,9 +614,9 @@ brt_vdev_dealloc(brt_vdev_t *brtvd) ASSERT(brtvd->bv_initiated); ASSERT0(avl_numnodes(&brtvd->bv_tree)); - vmem_free(brtvd->bv_entcount, sizeof (uint16_t) * brtvd->bv_size); - brtvd->bv_entcount = NULL; uint64_t nblocks = BRT_RANGESIZE_TO_NBLOCKS(brtvd->bv_size); + vmem_free(brtvd->bv_entcount, nblocks * BRT_BLOCKSIZE); + brtvd->bv_entcount = NULL; kmem_free(brtvd->bv_bitmap, BT_SIZEOFMAP(nblocks)); brtvd->bv_bitmap = NULL; @@ -658,6 +671,8 @@ brt_vdev_destroy(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx) rw_exit(&brtvd->bv_lock); spa_feature_decr(spa, SPA_FEATURE_BLOCK_CLONING, tx); + if (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN)) + spa_feature_decr(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN, tx); } static void @@ -793,10 +808,10 @@ brt_vdev_sync(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx) /* * TODO: Walk brtvd->bv_bitmap and write only the dirty blocks. */ - dmu_write(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, 0, - brtvd->bv_size * sizeof (brtvd->bv_entcount[0]), - brtvd->bv_entcount, tx); uint64_t nblocks = BRT_RANGESIZE_TO_NBLOCKS(brtvd->bv_size); + dmu_write(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, 0, + nblocks * BRT_BLOCKSIZE, brtvd->bv_entcount, tx, + DMU_READ_NO_PREFETCH | DMU_UNCACHEDIO); memset(brtvd->bv_bitmap, 0, BT_SIZEOFMAP(nblocks)); brtvd->bv_entcount_dirty = FALSE; } @@ -855,16 +870,29 @@ brt_entry_fill(const blkptr_t *bp, brt_entry_t *bre, uint64_t *vdevidp) *vdevidp = DVA_GET_VDEV(&bp->blk_dva[0]); } +static boolean_t +brt_has_endian_fixed(spa_t *spa) +{ + return (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN)); +} + static int -brt_entry_lookup(brt_vdev_t *brtvd, brt_entry_t *bre) +brt_entry_lookup(spa_t *spa, brt_vdev_t *brtvd, brt_entry_t *bre) { uint64_t off = BRE_OFFSET(bre); if (brtvd->bv_mos_entries == 0) return (SET_ERROR(ENOENT)); - return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode, - &off, BRT_KEY_WORDS, 1, sizeof (bre->bre_count), &bre->bre_count)); + if (brt_has_endian_fixed(spa)) { + return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode, + &off, BRT_KEY_WORDS, sizeof (bre->bre_count), 1, + &bre->bre_count)); + } else { + return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode, + &off, BRT_KEY_WORDS, 1, sizeof (bre->bre_count), + &bre->bre_count)); + } } /* @@ -1056,7 +1084,7 @@ brt_entry_decref(spa_t *spa, const blkptr_t *bp) } rw_exit(&brtvd->bv_lock); - error = brt_entry_lookup(brtvd, &bre_search); + error = brt_entry_lookup(spa, brtvd, &bre_search); /* bre_search now contains correct bre_count */ if (error == ENOENT) { BRTSTAT_BUMP(brt_decref_no_entry); @@ -1118,7 +1146,7 @@ brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp) bre = avl_find(&brtvd->bv_tree, &bre_search, NULL); if (bre == NULL) { rw_exit(&brtvd->bv_lock); - error = brt_entry_lookup(brtvd, &bre_search); + error = brt_entry_lookup(spa, brtvd, &bre_search); if (error == ENOENT) { refcnt = 0; } else { @@ -1270,10 +1298,18 @@ brt_pending_apply_vdev(spa_t *spa, brt_vdev_t *brtvd, uint64_t txg) uint64_t off = BRE_OFFSET(bre); if (brtvd->bv_mos_entries != 0 && brt_vdev_lookup(spa, brtvd, off)) { - int error = zap_lookup_uint64_by_dnode( - brtvd->bv_mos_entries_dnode, &off, - BRT_KEY_WORDS, 1, sizeof (bre->bre_count), - &bre->bre_count); + int error; + if (brt_has_endian_fixed(spa)) { + error = zap_lookup_uint64_by_dnode( + brtvd->bv_mos_entries_dnode, &off, + BRT_KEY_WORDS, sizeof (bre->bre_count), 1, + &bre->bre_count); + } else { + error = zap_lookup_uint64_by_dnode( + brtvd->bv_mos_entries_dnode, &off, + BRT_KEY_WORDS, 1, sizeof (bre->bre_count), + &bre->bre_count); + } if (error == 0) { BRTSTAT_BUMP(brt_addref_entry_on_disk); } else { @@ -1326,7 +1362,7 @@ brt_pending_apply(spa_t *spa, uint64_t txg) } static void -brt_sync_entry(dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx) +brt_sync_entry(spa_t *spa, dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx) { uint64_t off = BRE_OFFSET(bre); @@ -1337,9 +1373,15 @@ brt_sync_entry(dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx) BRT_KEY_WORDS, tx); VERIFY(error == 0 || error == ENOENT); } else { - VERIFY0(zap_update_uint64_by_dnode(dn, &off, - BRT_KEY_WORDS, 1, sizeof (bre->bre_count), - &bre->bre_count, tx)); + if (brt_has_endian_fixed(spa)) { + VERIFY0(zap_update_uint64_by_dnode(dn, &off, + BRT_KEY_WORDS, sizeof (bre->bre_count), 1, + &bre->bre_count, tx)); + } else { + VERIFY0(zap_update_uint64_by_dnode(dn, &off, + BRT_KEY_WORDS, 1, sizeof (bre->bre_count), + &bre->bre_count, tx)); + } } } @@ -1368,7 +1410,8 @@ brt_sync_table(spa_t *spa, dmu_tx_t *tx) void *c = NULL; while ((bre = avl_destroy_nodes(&brtvd->bv_tree, &c)) != NULL) { - brt_sync_entry(brtvd->bv_mos_entries_dnode, bre, tx); + brt_sync_entry(spa, brtvd->bv_mos_entries_dnode, bre, + tx); kmem_cache_free(brt_entry_cache, bre); } @@ -1468,6 +1511,31 @@ brt_load(spa_t *spa) } void +brt_prefetch_all(spa_t *spa) +{ + /* + * Load all BRT entries for each vdev. This is intended to perform + * a prefetch on all such blocks. For the same reason that brt_prefetch + * (called from brt_pending_add) isn't locked, this is also not locked. + */ + brt_rlock(spa); + for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) { + brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid]; + brt_unlock(spa); + + rw_enter(&brtvd->bv_mos_entries_lock, RW_READER); + if (brtvd->bv_mos_entries != 0) { + (void) zap_prefetch_object(spa->spa_meta_objset, + brtvd->bv_mos_entries); + } + rw_exit(&brtvd->bv_mos_entries_lock); + + brt_rlock(spa); + } + brt_unlock(spa); +} + +void brt_unload(spa_t *spa) { if (spa->spa_brt_rangesize == 0) |
