diff options
author | Martin Matuska <mm@FreeBSD.org> | 2022-11-16 20:25:24 +0000 |
---|---|---|
committer | Martin Matuska <mm@FreeBSD.org> | 2022-11-16 20:27:42 +0000 |
commit | dbd5678dca91abcefe8d046aa2f9b66497a95ffb (patch) | |
tree | 23c7cf5ccced42596b6f6da3c8450e86124f5248 /sys/contrib/openzfs/module/zfs | |
parent | 61b146ba43cd3886c81e79b37fdc665d6e1d74b8 (diff) | |
parent | 2163cde450d0898b5f7bac16afb4e238485411ff (diff) | |
download | src-dbd5678dca91abcefe8d046aa2f9b66497a95ffb.tar.gz src-dbd5678dca91abcefe8d046aa2f9b66497a95ffb.zip |
zfs: merge openzfs/zfs@2163cde45
Notable upstream pull request merges:
#13680 Add options to zfs redundant_metadata property
#13758 Allow mounting snapshots in .zfs/snapshot as a regular user
#13838 quota: disable quota check for ZVOL
#13839 quota: extend quota for dataset
#13973 Fix memory leaks in dmu_send()/dmu_send_obj()
#13977 Avoid unnecessary metaslab_check_free calling
#13978 PAM: Fix unchecked return value from zfs_key_config_load()
#13979 Handle possible null pointers from malloc/strdup/strndup()
#13997 zstream: allow decompress to fix metadata for uncompressed
records
#13998 zvol_wait logic may terminate prematurely
#14001 FreeBSD: Fix a pair of bugs in zfs_fhtovp()
#14003 Stop ganging due to past vdev write errors
#14039 Optimize microzaps
#14050 Fix draid2+2s metadata error on simultaneous 2 drive failures
#14062 zed: Avoid core dump if wholedisk property does not exist
#14077 Propagate extent_bytes change to autotrim thread
#14079 FreeBSD: vn_flush_cached_data: observe vnode locking contract
#14093 Fix ARC target collapse when zfs_arc_meta_limit_percent=100
#14106 Add ability to recompress send streams with new compression
algorithm
#14119 Deny receiving into encrypted datasets if the keys are not
loaded
#14120 Fix arc_p aggressive increase
#14129 zed: Prevent special vdev to be replaced by hot spare
#14133 Expose zfs_vdev_open_timeout_ms as a tunable
#14135 FreeBSD: Fix out of bounds read in zfs_ioctl_ozfs_to_legacy()
#14152 Adds the `-p` option to `zfs holds`
#14161 Handle and detect #13709's unlock regression
Obtained from: OpenZFS
OpenZFS commit: 2163cde450d0898b5f7bac16afb4e238485411ff
Diffstat (limited to 'sys/contrib/openzfs/module/zfs')
49 files changed, 959 insertions, 466 deletions
diff --git a/sys/contrib/openzfs/module/zfs/abd.c b/sys/contrib/openzfs/module/zfs/abd.c index 11a1e5112544..d4921d0ba7db 100644 --- a/sys/contrib/openzfs/module/zfs/abd.c +++ b/sys/contrib/openzfs/module/zfs/abd.c @@ -667,15 +667,15 @@ abd_return_buf(abd_t *abd, void *buf, size_t n) { abd_verify(abd); ASSERT3U(abd->abd_size, >=, n); +#ifdef ZFS_DEBUG + (void) zfs_refcount_remove_many(&abd->abd_children, n, buf); +#endif if (abd_is_linear(abd)) { ASSERT3P(buf, ==, abd_to_buf(abd)); } else { ASSERT0(abd_cmp_buf(abd, buf, n)); zio_buf_free(buf, n); } -#ifdef ZFS_DEBUG - (void) zfs_refcount_remove_many(&abd->abd_children, n, buf); -#endif } void diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index 33865f715b0f..f51f427c1bfd 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -419,12 +419,12 @@ boolean_t arc_warm; /* * These tunables are for performance analysis. */ -unsigned long zfs_arc_max = 0; -unsigned long zfs_arc_min = 0; -unsigned long zfs_arc_meta_limit = 0; -unsigned long zfs_arc_meta_min = 0; -static unsigned long zfs_arc_dnode_limit = 0; -static unsigned long zfs_arc_dnode_reduce_percent = 10; +uint64_t zfs_arc_max = 0; +uint64_t zfs_arc_min = 0; +uint64_t zfs_arc_meta_limit = 0; +uint64_t zfs_arc_meta_min = 0; +static uint64_t zfs_arc_dnode_limit = 0; +static uint_t zfs_arc_dnode_reduce_percent = 10; static uint_t zfs_arc_grow_retry = 0; static uint_t zfs_arc_shrink_shift = 0; static uint_t zfs_arc_p_min_shift = 0; @@ -449,17 +449,17 @@ int zfs_compressed_arc_enabled = B_TRUE; * ARC will evict meta buffers that exceed arc_meta_limit. This * tunable make arc_meta_limit adjustable for different workloads. */ -static unsigned long zfs_arc_meta_limit_percent = 75; +static uint64_t zfs_arc_meta_limit_percent = 75; /* * Percentage that can be consumed by dnodes of ARC meta buffers. */ -static unsigned long zfs_arc_dnode_limit_percent = 10; +static uint_t zfs_arc_dnode_limit_percent = 10; /* * These tunables are Linux-specific */ -static unsigned long zfs_arc_sys_free = 0; +static uint64_t zfs_arc_sys_free = 0; static uint_t zfs_arc_min_prefetch_ms = 0; static uint_t zfs_arc_min_prescient_prefetch_ms = 0; static int zfs_arc_p_dampener_disable = 1; @@ -781,12 +781,12 @@ uint64_t zfs_crc64_table[256]; #define L2ARC_FEED_TYPES 4 /* L2ARC Performance Tunables */ -unsigned long l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */ -unsigned long l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */ -unsigned long l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */ -unsigned long l2arc_headroom_boost = L2ARC_HEADROOM_BOOST; -unsigned long l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */ -unsigned long l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */ +uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */ +uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */ +uint64_t l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */ +uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST; +uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */ +uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */ int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */ int l2arc_feed_again = B_TRUE; /* turbo warmup */ int l2arc_norw = B_FALSE; /* no reads during writes */ @@ -909,7 +909,7 @@ static int l2arc_mfuonly = 0; * will vary depending of how well the specific device handles * these commands. */ -static unsigned long l2arc_trim_ahead = 0; +static uint64_t l2arc_trim_ahead = 0; /* * Performance tuning of L2ARC persistence: @@ -925,7 +925,7 @@ static unsigned long l2arc_trim_ahead = 0; * not to waste space. */ static int l2arc_rebuild_enabled = B_TRUE; -static unsigned long l2arc_rebuild_blocks_min_l2size = 1024 * 1024 * 1024; +static uint64_t l2arc_rebuild_blocks_min_l2size = 1024 * 1024 * 1024; /* L2ARC persistence rebuild control routines. */ void l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen); @@ -3939,7 +3939,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, uint64_t *real_evicted) * dropping from L1+L2 cached to L2-only, * realloc to remove the L1 header. */ - hdr = arc_hdr_realloc(hdr, hdr_full_cache, + (void) arc_hdr_realloc(hdr, hdr_full_cache, hdr_l2only_cache); *real_evicted += HDR_FULL_SIZE - HDR_L2ONLY_SIZE; } else { @@ -4469,7 +4469,7 @@ restart: * meta buffers. Requests to the upper layers will be made with * increasingly large scan sizes until the ARC is below the limit. */ - if (meta_used > arc_meta_limit) { + if (meta_used > arc_meta_limit || arc_available_memory() < 0) { if (type == ARC_BUFC_DATA) { type = ARC_BUFC_METADATA; } else { @@ -5136,7 +5136,7 @@ arc_adapt(int bytes, arc_state_t *state) if (!zfs_arc_p_dampener_disable) mult = MIN(mult, 10); /* avoid wild arc_p adjustment */ - arc_p = MIN(arc_c - arc_p_min, arc_p + bytes * mult); + arc_p = MIN(arc_c - arc_p_min, arc_p + (uint64_t)bytes * mult); } else if (state == arc_mfu_ghost) { uint64_t delta; @@ -5173,7 +5173,7 @@ arc_adapt(int bytes, arc_state_t *state) atomic_add_64(&arc_c, (int64_t)bytes); if (arc_c > arc_c_max) arc_c = arc_c_max; - else if (state == arc_anon) + else if (state == arc_anon && arc_p < arc_c >> 1) atomic_add_64(&arc_p, (int64_t)bytes); if (arc_p > arc_c) arc_p = arc_c; @@ -5386,7 +5386,8 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, const void *tag, if (aggsum_upper_bound(&arc_sums.arcstat_size) < arc_c && hdr->b_l1hdr.b_state == arc_anon && (zfs_refcount_count(&arc_anon->arcs_size) + - zfs_refcount_count(&arc_mru->arcs_size) > arc_p)) + zfs_refcount_count(&arc_mru->arcs_size) > arc_p && + arc_p < arc_c >> 1)) arc_p = MIN(arc_c, arc_p + size); } } @@ -8539,6 +8540,7 @@ l2arc_dev_get_next(void) else if (next == first) break; + ASSERT3P(next, !=, NULL); } while (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild || next->l2ad_trim_all); @@ -11076,20 +11078,20 @@ EXPORT_SYMBOL(arc_add_prune_callback); EXPORT_SYMBOL(arc_remove_prune_callback); ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min, param_set_arc_min, - param_get_ulong, ZMOD_RW, "Minimum ARC size in bytes"); + spl_param_get_u64, ZMOD_RW, "Minimum ARC size in bytes"); ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, max, param_set_arc_max, - param_get_ulong, ZMOD_RW, "Maximum ARC size in bytes"); + spl_param_get_u64, ZMOD_RW, "Maximum ARC size in bytes"); -ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit, param_set_arc_long, - param_get_ulong, ZMOD_RW, "Metadata limit for ARC size in bytes"); +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit, param_set_arc_u64, + spl_param_get_u64, ZMOD_RW, "Metadata limit for ARC size in bytes"); ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit_percent, - param_set_arc_long, param_get_ulong, ZMOD_RW, + param_set_arc_int, param_get_uint, ZMOD_RW, "Percent of ARC size for ARC meta limit"); -ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_min, param_set_arc_long, - param_get_ulong, ZMOD_RW, "Minimum ARC metadata size in bytes"); +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_min, param_set_arc_u64, + spl_param_get_u64, ZMOD_RW, "Minimum ARC metadata size in bytes"); ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_prune, INT, ZMOD_RW, "Meta objects to scan for prune"); @@ -11128,25 +11130,25 @@ ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min_prescient_prefetch_ms, param_set_arc_int, param_get_uint, ZMOD_RW, "Min life of prescient prefetched block in ms"); -ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_max, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_max, U64, ZMOD_RW, "Max write bytes per interval"); -ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_boost, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_boost, U64, ZMOD_RW, "Extra write bytes during device warmup"); -ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom, U64, ZMOD_RW, "Number of max device writes to precache"); -ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom_boost, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom_boost, U64, ZMOD_RW, "Compressed l2arc_headroom multiplier"); -ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, trim_ahead, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, trim_ahead, U64, ZMOD_RW, "TRIM ahead L2ARC write size multiplier"); -ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_secs, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_secs, U64, ZMOD_RW, "Seconds between L2ARC writing"); -ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_min_ms, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_min_ms, U64, ZMOD_RW, "Min feed interval in milliseconds"); ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, noprefetch, INT, ZMOD_RW, @@ -11164,7 +11166,7 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, meta_percent, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_enabled, INT, ZMOD_RW, "Rebuild the L2ARC when importing a pool"); -ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, U64, ZMOD_RW, "Min size in bytes to write rebuild log blocks in L2ARC"); ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW, @@ -11176,17 +11178,17 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, exclude_special, INT, ZMOD_RW, ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int, param_get_uint, ZMOD_RW, "System free memory I/O throttle in bytes"); -ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, sys_free, param_set_arc_long, - param_get_ulong, ZMOD_RW, "System free memory target size in bytes"); +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, sys_free, param_set_arc_u64, + spl_param_get_u64, ZMOD_RW, "System free memory target size in bytes"); -ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit, param_set_arc_long, - param_get_ulong, ZMOD_RW, "Minimum bytes of dnodes in ARC"); +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit, param_set_arc_u64, + spl_param_get_u64, ZMOD_RW, "Minimum bytes of dnodes in ARC"); ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit_percent, - param_set_arc_long, param_get_ulong, ZMOD_RW, + param_set_arc_int, param_get_uint, ZMOD_RW, "Percent of ARC meta buffers for dnodes"); -ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, dnode_reduce_percent, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, dnode_reduce_percent, UINT, ZMOD_RW, "Percentage of excess dnodes to try to unpin"); ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, eviction_pct, UINT, ZMOD_RW, diff --git a/sys/contrib/openzfs/module/zfs/btree.c b/sys/contrib/openzfs/module/zfs/btree.c index f0a9222a4308..4c25afaa8199 100644 --- a/sys/contrib/openzfs/module/zfs/btree.c +++ b/sys/contrib/openzfs/module/zfs/btree.c @@ -102,7 +102,7 @@ zfs_btree_poison_node(zfs_btree_t *tree, zfs_btree_hdr_t *hdr) (void) memset(leaf->btl_elems, 0x0f, hdr->bth_first * size); (void) memset(leaf->btl_elems + (hdr->bth_first + hdr->bth_count) * size, 0x0f, - BTREE_LEAF_ESIZE - + tree->bt_leaf_size - offsetof(zfs_btree_leaf_t, btl_elems) - (hdr->bth_first + hdr->bth_count) * size); } #endif @@ -173,16 +173,44 @@ zfs_btree_fini(void) kmem_cache_destroy(zfs_btree_leaf_cache); } +static void * +zfs_btree_leaf_alloc(zfs_btree_t *tree) +{ + if (tree->bt_leaf_size == BTREE_LEAF_SIZE) + return (kmem_cache_alloc(zfs_btree_leaf_cache, KM_SLEEP)); + else + return (kmem_alloc(tree->bt_leaf_size, KM_SLEEP)); +} + +static void +zfs_btree_leaf_free(zfs_btree_t *tree, void *ptr) +{ + if (tree->bt_leaf_size == BTREE_LEAF_SIZE) + return (kmem_cache_free(zfs_btree_leaf_cache, ptr)); + else + return (kmem_free(ptr, tree->bt_leaf_size)); +} + void zfs_btree_create(zfs_btree_t *tree, int (*compar) (const void *, const void *), size_t size) { - ASSERT3U(size, <=, BTREE_LEAF_ESIZE / 2); + zfs_btree_create_custom(tree, compar, size, BTREE_LEAF_SIZE); +} + +void +zfs_btree_create_custom(zfs_btree_t *tree, + int (*compar) (const void *, const void *), + size_t size, size_t lsize) +{ + size_t esize = lsize - offsetof(zfs_btree_leaf_t, btl_elems); + ASSERT3U(size, <=, esize / 2); memset(tree, 0, sizeof (*tree)); tree->bt_compar = compar; tree->bt_elem_size = size; - tree->bt_leaf_cap = P2ALIGN(BTREE_LEAF_ESIZE / size, 2); + tree->bt_leaf_size = lsize; + tree->bt_leaf_cap = P2ALIGN(esize / size, 2); tree->bt_height = -1; tree->bt_bulk = NULL; } @@ -290,7 +318,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where) zfs_btree_core_t *node = NULL; uint32_t child = 0; - uint64_t depth = 0; + uint32_t depth = 0; /* * Iterate down the tree, finding which child the value should be in @@ -811,8 +839,7 @@ zfs_btree_insert_into_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *leaf, move_count++; } tree->bt_num_nodes++; - zfs_btree_leaf_t *new_leaf = kmem_cache_alloc(zfs_btree_leaf_cache, - KM_SLEEP); + zfs_btree_leaf_t *new_leaf = zfs_btree_leaf_alloc(tree); zfs_btree_hdr_t *new_hdr = &new_leaf->btl_hdr; new_hdr->bth_parent = leaf->btl_hdr.bth_parent; new_hdr->bth_first = (tree->bt_bulk ? 0 : capacity / 4) + @@ -1078,8 +1105,7 @@ zfs_btree_add_idx(zfs_btree_t *tree, const void *value, ASSERT0(where->bti_offset); tree->bt_num_nodes++; - zfs_btree_leaf_t *leaf = kmem_cache_alloc(zfs_btree_leaf_cache, - KM_SLEEP); + zfs_btree_leaf_t *leaf = zfs_btree_leaf_alloc(tree); tree->bt_root = &leaf->btl_hdr; tree->bt_height++; @@ -1378,7 +1404,7 @@ zfs_btree_node_destroy(zfs_btree_t *tree, zfs_btree_hdr_t *node) { tree->bt_num_nodes--; if (!zfs_btree_is_core(node)) { - kmem_cache_free(zfs_btree_leaf_cache, node); + zfs_btree_leaf_free(tree, node); } else { kmem_free(node, sizeof (zfs_btree_core_t) + BTREE_CORE_ELEMS * tree->bt_elem_size); @@ -1991,7 +2017,7 @@ zfs_btree_verify_counts(zfs_btree_t *tree) */ static uint64_t zfs_btree_verify_height_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr, - int64_t height) + int32_t height) { if (!zfs_btree_is_core(hdr)) { VERIFY0(height); @@ -2117,8 +2143,10 @@ zfs_btree_verify_poison_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr) zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr; for (size_t i = 0; i < hdr->bth_first * size; i++) VERIFY3U(leaf->btl_elems[i], ==, 0x0f); + size_t esize = tree->bt_leaf_size - + offsetof(zfs_btree_leaf_t, btl_elems); for (size_t i = (hdr->bth_first + hdr->bth_count) * size; - i < BTREE_LEAF_ESIZE; i++) + i < esize; i++) VERIFY3U(leaf->btl_elems[i], ==, 0x0f); } else { zfs_btree_core_t *node = (zfs_btree_core_t *)hdr; diff --git a/sys/contrib/openzfs/module/zfs/dataset_kstats.c b/sys/contrib/openzfs/module/zfs/dataset_kstats.c index b63f42a21e44..57b8faf213eb 100644 --- a/sys/contrib/openzfs/module/zfs/dataset_kstats.c +++ b/sys/contrib/openzfs/module/zfs/dataset_kstats.c @@ -128,8 +128,13 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset) " snprintf() for kstat name returned %d", (unsigned long long)dmu_objset_id(objset), n); return (SET_ERROR(EINVAL)); + } else if (n >= KSTAT_STRLEN) { + zfs_dbgmsg("failed to create dataset kstat for objset %lld: " + "kstat name length (%d) exceeds limit (%d)", + (unsigned long long)dmu_objset_id(objset), + n, KSTAT_STRLEN); + return (SET_ERROR(ENAMETOOLONG)); } - ASSERT3U(n, <, KSTAT_STRLEN); kstat_t *kstat = kstat_create(kstat_module_name, 0, kstat_name, "dataset", KSTAT_TYPE_NAMED, diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c index db1123d37d98..7982d9702896 100644 --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -227,8 +227,8 @@ typedef struct dbuf_cache { dbuf_cache_t dbuf_caches[DB_CACHE_MAX]; /* Size limits for the caches */ -static unsigned long dbuf_cache_max_bytes = ULONG_MAX; -static unsigned long dbuf_metadata_cache_max_bytes = ULONG_MAX; +static uint64_t dbuf_cache_max_bytes = UINT64_MAX; +static uint64_t dbuf_metadata_cache_max_bytes = UINT64_MAX; /* Set the default sizes of the caches to log2 fraction of arc size */ static uint_t dbuf_cache_shift = 5; @@ -1549,7 +1549,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags, uint32_t aflags = ARC_FLAG_NOWAIT; int err, zio_flags; - err = zio_flags = 0; DB_DNODE_ENTER(db); dn = DB_DNODE(db); ASSERT(!zfs_refcount_is_zero(&db->db_holds)); @@ -2687,6 +2686,7 @@ dbuf_override_impl(dmu_buf_impl_t *db, const blkptr_t *bp, dmu_tx_t *tx) dbuf_dirty_record_t *dr; dr = list_head(&db->db_dirty_records); + ASSERT3P(dr, !=, NULL); ASSERT3U(dr->dr_txg, ==, tx->tx_txg); dl = &dr->dt.dl; dl->dr_overridden_by = *bp; @@ -2748,6 +2748,7 @@ dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data, dmu_buf_will_not_fill(dbuf, tx); dr = list_head(&db->db_dirty_records); + ASSERT3P(dr, !=, NULL); ASSERT3U(dr->dr_txg, ==, tx->tx_txg); dl = &dr->dt.dl; encode_embedded_bp_compressed(&dl->dr_overridden_by, @@ -5120,7 +5121,7 @@ EXPORT_SYMBOL(dmu_buf_set_user_ie); EXPORT_SYMBOL(dmu_buf_get_user); EXPORT_SYMBOL(dmu_buf_get_blkptr); -ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, max_bytes, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, max_bytes, U64, ZMOD_RW, "Maximum size in bytes of the dbuf cache."); ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, hiwater_pct, UINT, ZMOD_RW, @@ -5129,7 +5130,7 @@ ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, hiwater_pct, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, lowater_pct, UINT, ZMOD_RW, "Percentage below dbuf_cache_max_bytes when dbuf eviction stops."); -ZFS_MODULE_PARAM(zfs_dbuf, dbuf_, metadata_cache_max_bytes, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_dbuf, dbuf_, metadata_cache_max_bytes, U64, ZMOD_RW, "Maximum size in bytes of dbuf metadata cache."); ZFS_MODULE_PARAM(zfs_dbuf, dbuf_, cache_shift, UINT, ZMOD_RW, diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c index 9e67eb51f415..45304e7ddf7a 100644 --- a/sys/contrib/openzfs/module/zfs/dmu.c +++ b/sys/contrib/openzfs/module/zfs/dmu.c @@ -28,6 +28,7 @@ * Copyright (c) 2019 Datto Inc. * Copyright (c) 2019, Klara Inc. * Copyright (c) 2019, Allan Jude + * Copyright (c) 2022 Hewlett Packard Enterprise Development LP. */ #include <sys/dmu.h> @@ -70,7 +71,7 @@ static int zfs_nopwrite_enabled = 1; * will wait until the next TXG. * A value of zero will disable this throttle. */ -static unsigned long zfs_per_txg_dirty_frees_percent = 30; +static uint_t zfs_per_txg_dirty_frees_percent = 30; /* * Enable/disable forcing txg sync when dirty checking for holes with lseek(). @@ -1435,7 +1436,7 @@ dmu_return_arcbuf(arc_buf_t *buf) */ int dmu_lightweight_write_by_dnode(dnode_t *dn, uint64_t offset, abd_t *abd, - const zio_prop_t *zp, enum zio_flag flags, dmu_tx_t *tx) + const zio_prop_t *zp, zio_flag_t flags, dmu_tx_t *tx) { dbuf_dirty_record_t *dr = dbuf_dirty_lightweight(dn, dbuf_whichblock(dn, 0, offset), tx); @@ -1992,12 +1993,22 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) ZCHECKSUM_FLAG_EMBEDDED)) checksum = ZIO_CHECKSUM_FLETCHER_4; - if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL || - (os->os_redundant_metadata == - ZFS_REDUNDANT_METADATA_MOST && - (level >= zfs_redundant_metadata_most_ditto_level || - DMU_OT_IS_METADATA(type) || (wp & WP_SPILL)))) + switch (os->os_redundant_metadata) { + case ZFS_REDUNDANT_METADATA_ALL: copies++; + break; + case ZFS_REDUNDANT_METADATA_MOST: + if (level >= zfs_redundant_metadata_most_ditto_level || + DMU_OT_IS_METADATA(type) || (wp & WP_SPILL)) + copies++; + break; + case ZFS_REDUNDANT_METADATA_SOME: + if (DMU_OT_IS_CRITICAL(type)) + copies++; + break; + case ZFS_REDUNDANT_METADATA_NONE: + break; + } } else if (wp & WP_NOFILL) { ASSERT(level == 0); @@ -2355,7 +2366,7 @@ EXPORT_SYMBOL(dmu_ot); ZFS_MODULE_PARAM(zfs, zfs_, nopwrite_enabled, INT, ZMOD_RW, "Enable NOP writes"); -ZFS_MODULE_PARAM(zfs, zfs_, per_txg_dirty_frees_percent, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, per_txg_dirty_frees_percent, UINT, ZMOD_RW, "Percentage of dirtied blocks from frees in one TXG"); ZFS_MODULE_PARAM(zfs, zfs_, dmu_offset_next_sync, INT, ZMOD_RW, diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c index 4c20afcdb9c6..c17c829a04d8 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_objset.c +++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c @@ -32,6 +32,7 @@ * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved. * Copyright (c) 2019, Klara Inc. * Copyright (c) 2019, Allan Jude + * Copyright (c) 2022 Hewlett Packard Enterprise Development LP. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -287,7 +288,9 @@ redundant_metadata_changed_cb(void *arg, uint64_t newval) * Inheritance and range checking should have been done by now. */ ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL || - newval == ZFS_REDUNDANT_METADATA_MOST); + newval == ZFS_REDUNDANT_METADATA_MOST || + newval == ZFS_REDUNDANT_METADATA_SOME || + newval == ZFS_REDUNDANT_METADATA_NONE); os->os_redundant_metadata = newval; } @@ -479,7 +482,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, arc_flags_t aflags = ARC_FLAG_WAIT; zbookmark_phys_t zb; int size; - enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; + zio_flag_t zio_flags = ZIO_FLAG_CANFAIL; SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); diff --git a/sys/contrib/openzfs/module/zfs/dmu_recv.c b/sys/contrib/openzfs/module/zfs/dmu_recv.c index a9e4a6745905..339fb149a49f 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_recv.c +++ b/sys/contrib/openzfs/module/zfs/dmu_recv.c @@ -646,7 +646,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) * so add the DS_HOLD_FLAG_DECRYPT flag only if we are dealing * with a dataset we may encrypt. */ - if (drba->drba_dcp != NULL && + if (drba->drba_dcp == NULL || drba->drba_dcp->cp_crypt != ZIO_CRYPT_OFF) { dsflags |= DS_HOLD_FLAG_DECRYPT; } @@ -1344,7 +1344,7 @@ do_corrective_recv(struct receive_writer_arg *rwa, struct drr_write *drrw, dnode_t *dn; abd_t *abd = rrd->abd; zio_cksum_t bp_cksum = bp->blk_cksum; - enum zio_flag flags = ZIO_FLAG_SPECULATIVE | + zio_flag_t flags = ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL; if (rwa->raw) @@ -2186,7 +2186,7 @@ flush_write_batch_impl(struct receive_writer_arg *rwa) zio_prop_t zp; dmu_write_policy(rwa->os, dn, 0, 0, &zp); - enum zio_flag zio_flags = 0; + zio_flag_t zio_flags = 0; if (rwa->raw) { zp.zp_encrypt = B_TRUE; diff --git a/sys/contrib/openzfs/module/zfs/dmu_send.c b/sys/contrib/openzfs/module/zfs/dmu_send.c index 4ee3ffc352b8..ccb7eb20756d 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_send.c +++ b/sys/contrib/openzfs/module/zfs/dmu_send.c @@ -934,7 +934,7 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range) ASSERT3U(range->start_blkid + 1, ==, range->end_blkid); if (BP_GET_TYPE(bp) == DMU_OT_SA) { arc_flags_t aflags = ARC_FLAG_WAIT; - enum zio_flag zioflags = ZIO_FLAG_CANFAIL; + zio_flag_t zioflags = ZIO_FLAG_CANFAIL; if (dscp->dsc_featureflags & DMU_BACKUP_FEATURE_RAW) { ASSERT(BP_IS_PROTECTED(bp)); @@ -1654,7 +1654,7 @@ issue_data_read(struct send_reader_thread_arg *srta, struct send_range *range) !split_large_blocks && !BP_SHOULD_BYTESWAP(bp) && !BP_IS_EMBEDDED(bp) && !DMU_OT_IS_METADATA(BP_GET_TYPE(bp)); - enum zio_flag zioflags = ZIO_FLAG_CANFAIL; + zio_flag_t zioflags = ZIO_FLAG_CANFAIL; if (srta->featureflags & DMU_BACKUP_FEATURE_RAW) { zioflags |= ZIO_FLAG_RAW; @@ -2511,8 +2511,7 @@ dmu_send_impl(struct dmu_send_params *dspp) } if (featureflags & DMU_BACKUP_FEATURE_RAW) { - uint64_t ivset_guid = (ancestor_zb != NULL) ? - ancestor_zb->zbm_ivset_guid : 0; + uint64_t ivset_guid = ancestor_zb->zbm_ivset_guid; nvlist_t *keynvl = NULL; ASSERT(os->os_encrypted); @@ -2716,6 +2715,10 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, dspp.numfromredactsnaps = NUM_SNAPS_NOT_REDACTED; err = dmu_send_impl(&dspp); } + if (dspp.fromredactsnaps) + kmem_free(dspp.fromredactsnaps, + dspp.numfromredactsnaps * sizeof (uint64_t)); + dsl_dataset_rele(dspp.to_ds, FTAG); return (err); } @@ -2924,6 +2927,10 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, /* dmu_send_impl will call dsl_pool_rele for us. */ err = dmu_send_impl(&dspp); } else { + if (dspp.fromredactsnaps) + kmem_free(dspp.fromredactsnaps, + dspp.numfromredactsnaps * + sizeof (uint64_t)); dsl_pool_rele(dspp.dp, FTAG); } } else { diff --git a/sys/contrib/openzfs/module/zfs/dmu_traverse.c b/sys/contrib/openzfs/module/zfs/dmu_traverse.c index 2ed75640f68d..377634c72bba 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_traverse.c +++ b/sys/contrib/openzfs/module/zfs/dmu_traverse.c @@ -111,6 +111,7 @@ traverse_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg, if (claim_txg == 0 || bp->blk_birth < claim_txg) return (0); + ASSERT3U(BP_GET_LSIZE(bp), !=, 0); SET_BOOKMARK(&zb, td->td_objset, lr->lr_foid, ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); @@ -670,7 +671,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, /* See comment on ZIL traversal in dsl_scan_visitds. */ if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) { - enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; + zio_flag_t zio_flags = ZIO_FLAG_CANFAIL; uint32_t flags = ARC_FLAG_WAIT; objset_phys_t *osp; arc_buf_t *buf; diff --git a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c index 101d2ee7b7a2..1d63d7de65a1 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c +++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c @@ -58,7 +58,7 @@ unsigned int zfetch_max_distance = 64 * 1024 * 1024; /* max bytes to prefetch indirects for per stream (default 64MB) */ unsigned int zfetch_max_idistance = 64 * 1024 * 1024; /* max number of bytes in an array_read in which we allow prefetching (1MB) */ -unsigned long zfetch_array_rd_sz = 1024 * 1024; +uint64_t zfetch_array_rd_sz = 1024 * 1024; typedef struct zfetch_stats { kstat_named_t zfetchstat_hits; @@ -565,5 +565,5 @@ ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_distance, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_idistance, UINT, ZMOD_RW, "Max bytes to prefetch indirects for per stream"); -ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, array_rd_sz, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, array_rd_sz, U64, ZMOD_RW, "Number of bytes in a array_read"); diff --git a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c index 8ca7ba8957aa..b95c94beff1f 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c +++ b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c @@ -229,7 +229,6 @@ dsl_bookmark_create_check_impl(dsl_pool_t *dp, switch (error) { case ESRCH: /* happy path: new bmark doesn't exist, proceed after switch */ - error = 0; break; case 0: error = SET_ERROR(EEXIST); diff --git a/sys/contrib/openzfs/module/zfs/dsl_crypt.c b/sys/contrib/openzfs/module/zfs/dsl_crypt.c index ce2e6ce742a2..382de208b01d 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_crypt.c +++ b/sys/contrib/openzfs/module/zfs/dsl_crypt.c @@ -2671,6 +2671,7 @@ spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, objset_phys_t *osp = buf; uint8_t portable_mac[ZIO_OBJSET_MAC_LEN]; uint8_t local_mac[ZIO_OBJSET_MAC_LEN]; + const uint8_t zeroed_mac[ZIO_OBJSET_MAC_LEN] = {0}; /* look up the key from the spa's keystore */ ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); @@ -2696,8 +2697,21 @@ spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, if (memcmp(portable_mac, osp->os_portable_mac, ZIO_OBJSET_MAC_LEN) != 0 || memcmp(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN) != 0) { - abd_return_buf(abd, buf, datalen); - return (SET_ERROR(ECKSUM)); + /* + * If the MAC is zeroed out, we failed to decrypt it. + * This should only arise, at least on Linux, + * if we hit edge case handling for useraccounting, since we + * shouldn't get here without bailing out on error earlier + * otherwise. + * + * So if we're in that case, we can just fall through and + * special-casing noticing that it's zero will handle it + * elsewhere, since we can just regenerate it. + */ + if (memcmp(local_mac, zeroed_mac, ZIO_OBJSET_MAC_LEN) != 0) { + abd_return_buf(abd, buf, datalen); + return (SET_ERROR(ECKSUM)); + } } abd_return_buf(abd, buf, datalen); diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c index 7a066b786cd0..c7577fc584af 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c +++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c @@ -3421,7 +3421,8 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) conflicting_snaps = B_TRUE; } else if (err == ESRCH) { err = 0; - } else if (err != 0) { + } + if (err != 0) { goto out; } } diff --git a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c index 1ecae0fe3865..2b33446e66af 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c +++ b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c @@ -92,7 +92,7 @@ * will be loaded into memory and shouldn't take up an inordinate amount of * space. We settled on ~500000 entries, corresponding to roughly 128M. */ -unsigned long zfs_livelist_max_entries = 500000; +uint64_t zfs_livelist_max_entries = 500000; /* * We can approximate how much of a performance gain a livelist will give us @@ -542,6 +542,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx) dle = avl_find(&dl->dl_tree, &dle_tofind, NULL); ASSERT3P(dle, !=, NULL); dle_prev = AVL_PREV(&dl->dl_tree, dle); + ASSERT3P(dle_prev, !=, NULL); dle_enqueue_subobj(dl, dle_prev, dle->dle_bpobj.bpo_object, tx); @@ -1039,7 +1040,7 @@ dsl_process_sub_livelist(bpobj_t *bpobj, bplist_t *to_free, zthr_t *t, return (err); } -ZFS_MODULE_PARAM(zfs_livelist, zfs_livelist_, max_entries, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_livelist, zfs_livelist_, max_entries, U64, ZMOD_RW, "Size to start the next sub-livelist in a livelist"); ZFS_MODULE_PARAM(zfs_livelist, zfs_livelist_, min_percent_shared, INT, ZMOD_RW, diff --git a/sys/contrib/openzfs/module/zfs/dsl_dir.c b/sys/contrib/openzfs/module/zfs/dsl_dir.c index d93c7f08c1c2..c1afaa6aaf82 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_dir.c +++ b/sys/contrib/openzfs/module/zfs/dsl_dir.c @@ -54,6 +54,15 @@ #include "zfs_prop.h" /* + * This controls if we verify the ZVOL quota or not. + * Currently, quotas are not implemented for ZVOLs. + * The quota size is the size of the ZVOL. + * The size of the volume already implies the ZVOL size quota. + * The quota mechanism can introduce a significant performance drop. + */ +static int zvol_enforce_quotas = B_TRUE; + +/* * Filesystem and Snapshot Limits * ------------------------------ * @@ -815,6 +824,18 @@ dsl_fs_ss_limit_check(dsl_dir_t *dd, uint64_t delta, zfs_prop_t prop, ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT || prop == ZFS_PROP_SNAPSHOT_LIMIT); + if (prop == ZFS_PROP_SNAPSHOT_LIMIT) { + /* + * We don't enforce the limit for temporary snapshots. This is + * indicated by a NULL cred_t argument. + */ + if (cr == NULL) + return (0); + + count_prop = DD_FIELD_SNAPSHOT_COUNT; + } else { + count_prop = DD_FIELD_FILESYSTEM_COUNT; + } /* * If we're allowed to change the limit, don't enforce the limit * e.g. this can happen if a snapshot is taken by an administrative @@ -834,19 +855,6 @@ dsl_fs_ss_limit_check(dsl_dir_t *dd, uint64_t delta, zfs_prop_t prop, if (delta == 0) return (0); - if (prop == ZFS_PROP_SNAPSHOT_LIMIT) { - /* - * We don't enforce the limit for temporary snapshots. This is - * indicated by a NULL cred_t argument. - */ - if (cr == NULL) - return (0); - - count_prop = DD_FIELD_SNAPSHOT_COUNT; - } else { - count_prop = DD_FIELD_FILESYSTEM_COUNT; - } - /* * If an ancestor has been provided, stop checking the limit once we * hit that dir. We need this during rename so that we don't overcount @@ -1268,6 +1276,7 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, uint64_t quota; struct tempreserve *tr; int retval; + uint64_t ext_quota; uint64_t ref_rsrv; top_of_function: @@ -1311,7 +1320,9 @@ top_of_function: * If this transaction will result in a net free of space, * we want to let it through. */ - if (ignorequota || netfree || dsl_dir_phys(dd)->dd_quota == 0) + if (ignorequota || netfree || dsl_dir_phys(dd)->dd_quota == 0 || + (tx->tx_objset && dmu_objset_type(tx->tx_objset) == DMU_OST_ZVOL && + zvol_enforce_quotas == B_FALSE)) quota = UINT64_MAX; else quota = dsl_dir_phys(dd)->dd_quota; @@ -1343,7 +1354,16 @@ top_of_function: * on-disk is over quota and there are no pending changes * or deferred frees (which may free up space for us). */ - if (used_on_disk + est_inflight >= quota) { + ext_quota = quota >> 5; + if (quota == UINT64_MAX) + ext_quota = 0; + + if (used_on_disk >= quota) { + /* Quota exceeded */ + mutex_exit(&dd->dd_lock); + DMU_TX_STAT_BUMP(dmu_tx_quota); + return (retval); + } else if (used_on_disk + est_inflight >= quota + ext_quota) { if (est_inflight > 0 || used_on_disk < quota) { retval = SET_ERROR(ERESTART); } else { @@ -1390,10 +1410,9 @@ top_of_function: ignorequota = (dsl_dir_phys(dd)->dd_head_dataset_obj == 0); first = B_FALSE; goto top_of_function; - - } else { - return (0); } + + return (0); } /* @@ -2474,3 +2493,7 @@ dsl_dir_cancel_waiters(dsl_dir_t *dd) EXPORT_SYMBOL(dsl_dir_set_quota); EXPORT_SYMBOL(dsl_dir_set_reservation); #endif + +/* CSTYLED */ +ZFS_MODULE_PARAM(zfs, , zvol_enforce_quotas, INT, ZMOD_RW, + "Enable strict ZVOL quota enforcment"); diff --git a/sys/contrib/openzfs/module/zfs/dsl_pool.c b/sys/contrib/openzfs/module/zfs/dsl_pool.c index 4fd3722a051e..5ca918a87ee1 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_pool.c +++ b/sys/contrib/openzfs/module/zfs/dsl_pool.c @@ -99,8 +99,8 @@ * capped at zfs_dirty_data_max_max. It can also be overridden with a module * parameter. */ -unsigned long zfs_dirty_data_max = 0; -unsigned long zfs_dirty_data_max_max = 0; +uint64_t zfs_dirty_data_max = 0; +uint64_t zfs_dirty_data_max_max = 0; uint_t zfs_dirty_data_max_percent = 10; uint_t zfs_dirty_data_max_max_percent = 25; @@ -109,7 +109,7 @@ uint_t zfs_dirty_data_max_max_percent = 25; * when approaching the limit until log data is cleared out after txg sync. * It only counts TX_WRITE log with WR_COPIED or WR_NEED_COPY. */ -unsigned long zfs_wrlog_data_max = 0; +uint64_t zfs_wrlog_data_max = 0; /* * If there's at least this much dirty data (as a percentage of @@ -138,7 +138,7 @@ uint_t zfs_delay_min_dirty_percent = 60; * Note: zfs_delay_scale * zfs_dirty_data_max must be < 2^64, due to the * multiply in dmu_tx_delay(). */ -unsigned long zfs_delay_scale = 1000 * 1000 * 1000 / 2000; +uint64_t zfs_delay_scale = 1000 * 1000 * 1000 / 2000; /* * This determines the number of threads used by the dp_sync_taskq. @@ -331,7 +331,6 @@ dsl_pool_open(dsl_pool_t *dp) /* * We might not have created the remap bpobj yet. */ - err = 0; } else { goto out; } @@ -1465,20 +1464,20 @@ ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max_percent, UINT, ZMOD_RD, ZFS_MODULE_PARAM(zfs, zfs_, delay_min_dirty_percent, UINT, ZMOD_RW, "Transaction delay threshold"); -ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max, U64, ZMOD_RW, "Determines the dirty space limit"); -ZFS_MODULE_PARAM(zfs, zfs_, wrlog_data_max, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, wrlog_data_max, U64, ZMOD_RW, "The size limit of write-transaction zil log data"); /* zfs_dirty_data_max_max only applied at module load in arc_init(). */ -ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max, ULONG, ZMOD_RD, +ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max, U64, ZMOD_RD, "zfs_dirty_data_max upper bound in bytes"); ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_sync_percent, UINT, ZMOD_RW, "Dirty data txg sync threshold as a percentage of zfs_dirty_data_max"); -ZFS_MODULE_PARAM(zfs, zfs_, delay_scale, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, delay_scale, U64, ZMOD_RW, "How quickly delay approaches infinity"); ZFS_MODULE_PARAM(zfs, zfs_, sync_taskq_batch_pct, INT, ZMOD_RW, diff --git a/sys/contrib/openzfs/module/zfs/dsl_prop.c b/sys/contrib/openzfs/module/zfs/dsl_prop.c index 610e887b3fba..d1c0059092b1 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_prop.c +++ b/sys/contrib/openzfs/module/zfs/dsl_prop.c @@ -23,6 +23,7 @@ * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2013 Martin Matuska. All rights reserved. * Copyright 2019 Joyent, Inc. + * Copyright (c) 2022 Hewlett Packard Enterprise Development LP. */ #include <sys/zfs_context.h> @@ -41,6 +42,7 @@ #define ZPROP_INHERIT_SUFFIX "$inherit" #define ZPROP_RECVD_SUFFIX "$recvd" +#define ZPROP_IUV_SUFFIX "$iuv" static int dodefault(zfs_prop_t prop, int intsz, int numints, void *buf) @@ -69,6 +71,17 @@ dodefault(zfs_prop_t prop, int intsz, int numints, void *buf) return (0); } +static int +dsl_prop_known_index(zfs_prop_t prop, uint64_t value) +{ + const char *str = NULL; + if (prop != ZPROP_CONT && prop != ZPROP_INVAL && + zfs_prop_get_type(prop) == PROP_TYPE_INDEX) + return (!zfs_prop_index_to_string(prop, value, &str)); + + return (-1); +} + int dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, int intsz, int numints, void *buf, char *setpoint, boolean_t snapshot) @@ -81,6 +94,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, boolean_t inheriting = B_FALSE; char *inheritstr; char *recvdstr; + char *iuvstr; ASSERT(dsl_pool_config_held(dd->dd_pool)); @@ -91,6 +105,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, inheritable = (prop == ZPROP_USERPROP || zfs_prop_inheritable(prop)); inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX); recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX); + iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX); /* * Note: dd may become NULL, therefore we shouldn't dereference it @@ -105,6 +120,18 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, inheriting = B_TRUE; } + /* Check for a iuv value. */ + err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj, + iuvstr, intsz, numints, buf); + if (dsl_prop_known_index(zfs_name_to_prop(propname), + *(uint64_t *)buf) != 1) + err = ENOENT; + if (err != ENOENT) { + if (setpoint != NULL && err == 0) + dsl_dir_name(dd, setpoint); + break; + } + /* Check for a local value. */ err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj, propname, intsz, numints, buf); @@ -155,6 +182,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, kmem_strfree(inheritstr); kmem_strfree(recvdstr); + kmem_strfree(iuvstr); return (err); } @@ -647,6 +675,45 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, dsl_dir_rele(dd, FTAG); } + +/* + * For newer values in zfs index type properties, we add a new key + * propname$iuv (iuv = Ignore Unknown Values) to the properties zap object + * to store the new property value and store the default value in the + * existing prop key. So that the propname$iuv key is ignored by the older zfs + * versions and the default property value from the existing prop key is + * used. + */ +static void +dsl_prop_set_iuv(objset_t *mos, uint64_t zapobj, const char *propname, + int intsz, int numints, const void *value, dmu_tx_t *tx) +{ + char *iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX); + boolean_t iuv = B_FALSE; + zfs_prop_t prop = zfs_name_to_prop(propname); + + switch (prop) { + case ZFS_PROP_REDUNDANT_METADATA: + if (*(uint64_t *)value == ZFS_REDUNDANT_METADATA_SOME || + *(uint64_t *)value == ZFS_REDUNDANT_METADATA_NONE) + iuv = B_TRUE; + break; + default: + break; + } + + if (iuv) { + VERIFY0(zap_update(mos, zapobj, iuvstr, intsz, numints, + value, tx)); + uint64_t val = zfs_prop_default_numeric(prop); + VERIFY0(zap_update(mos, zapobj, propname, intsz, numints, + &val, tx)); + } else { + zap_remove(mos, zapobj, iuvstr, tx); + } + kmem_strfree(iuvstr); +} + void dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, zprop_source_t source, int intsz, int numints, const void *value, @@ -659,6 +726,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, const char *valstr = NULL; char *inheritstr; char *recvdstr; + char *iuvstr; char *tbuf = NULL; int err; uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa); @@ -692,6 +760,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX); recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX); + iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX); switch ((int)source) { case ZPROP_SRC_NONE: @@ -709,11 +778,14 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, /* * remove propname$inherit * set propname -> value + * set propname$iuv -> new property value */ err = zap_remove(mos, zapobj, inheritstr, tx); ASSERT(err == 0 || err == ENOENT); VERIFY0(zap_update(mos, zapobj, propname, intsz, numints, value, tx)); + (void) dsl_prop_set_iuv(mos, zapobj, propname, intsz, + numints, value, tx); break; case ZPROP_SRC_INHERITED: /* @@ -723,6 +795,8 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, */ err = zap_remove(mos, zapobj, propname, tx); ASSERT(err == 0 || err == ENOENT); + err = zap_remove(mos, zapobj, iuvstr, tx); + ASSERT(err == 0 || err == ENOENT); if (version >= SPA_VERSION_RECVD_PROPS && dsl_prop_get_int_ds(ds, ZPROP_HAS_RECVD, &dummy) == 0) { dummy = 0; @@ -763,6 +837,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, kmem_strfree(inheritstr); kmem_strfree(recvdstr); + kmem_strfree(iuvstr); /* * If we are left with an empty snap zap we can destroy it. @@ -1012,6 +1087,14 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, propname = za.za_name; source = setpoint; + + /* Skip if iuv entries are preset. */ + valstr = kmem_asprintf("%s%s", propname, + ZPROP_IUV_SUFFIX); + err = zap_contains(mos, propobj, valstr); + kmem_strfree(valstr); + if (err == 0) + continue; } else if (strcmp(suffix, ZPROP_INHERIT_SUFFIX) == 0) { /* Skip explicitly inherited entries. */ continue; @@ -1044,6 +1127,16 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, source = ((flags & DSL_PROP_GET_INHERITING) ? setpoint : ZPROP_SOURCE_VAL_RECVD); + } else if (strcmp(suffix, ZPROP_IUV_SUFFIX) == 0) { + (void) strlcpy(buf, za.za_name, + MIN(sizeof (buf), suffix - za.za_name + 1)); + propname = buf; + source = setpoint; + prop = zfs_name_to_prop(propname); + + if (dsl_prop_known_index(prop, + za.za_first_integer) != 1) + continue; } else { /* * For backward compatibility, skip suffixes we don't diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c index f0cd1feaf55b..03c2aa313af0 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_scan.c +++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c @@ -147,13 +147,13 @@ static int zfs_scan_strict_mem_lim = B_FALSE; * overload the drives with I/O, since that is protected by * zfs_vdev_scrub_max_active. */ -static unsigned long zfs_scan_vdev_limit = 4 << 20; +static uint64_t zfs_scan_vdev_limit = 4 << 20; static uint_t zfs_scan_issue_strategy = 0; /* don't queue & sort zios, go direct */ static int zfs_scan_legacy = B_FALSE; -static unsigned long zfs_scan_max_ext_gap = 2 << 20; /* in bytes */ +static uint64_t zfs_scan_max_ext_gap = 2 << 20; /* in bytes */ /* * fill_weight is non-tunable at runtime, so we copy it at module init from @@ -192,9 +192,9 @@ static int zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */ static int zfs_no_scrub_prefetch = B_FALSE; /* set to disable scrub prefetch */ static const enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE; /* max number of blocks to free in a single TXG */ -static unsigned long zfs_async_block_max_blocks = ULONG_MAX; +static uint64_t zfs_async_block_max_blocks = UINT64_MAX; /* max number of dedup blocks to free in a single TXG */ -static unsigned long zfs_max_async_dedup_frees = 100000; +static uint64_t zfs_max_async_dedup_frees = 100000; /* set to disable resilver deferring */ static int zfs_resilver_disable_defer = B_FALSE; @@ -1470,6 +1470,7 @@ dsl_scan_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg, if (claim_txg == 0 || bp->blk_birth < claim_txg) return (0); + ASSERT3U(BP_GET_LSIZE(bp), !=, 0); SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET], lr->lr_foid, ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); @@ -4446,7 +4447,7 @@ dsl_scan_assess_vdev(dsl_pool_t *dp, vdev_t *vd) spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER); } -ZFS_MODULE_PARAM(zfs, zfs_, scan_vdev_limit, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, scan_vdev_limit, U64, ZMOD_RW, "Max bytes in flight per leaf vdev for scrubs and resilvers"); ZFS_MODULE_PARAM(zfs, zfs_, scrub_min_time_ms, UINT, ZMOD_RW, @@ -4470,10 +4471,10 @@ ZFS_MODULE_PARAM(zfs, zfs_, no_scrub_io, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, no_scrub_prefetch, INT, ZMOD_RW, "Set to disable scrub prefetching"); -ZFS_MODULE_PARAM(zfs, zfs_, async_block_max_blocks, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, async_block_max_blocks, U64, ZMOD_RW, "Max number of blocks freed in one txg"); -ZFS_MODULE_PARAM(zfs, zfs_, max_async_dedup_frees, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, max_async_dedup_frees, U64, ZMOD_RW, "Max number of dedup blocks freed in one txg"); ZFS_MODULE_PARAM(zfs, zfs_, free_bpobj_enabled, INT, ZMOD_RW, @@ -4494,7 +4495,7 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_legacy, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, scan_checkpoint_intval, UINT, ZMOD_RW, "Scan progress on-disk checkpointing interval"); -ZFS_MODULE_PARAM(zfs, zfs_, scan_max_ext_gap, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, scan_max_ext_gap, U64, ZMOD_RW, "Max gap in bytes between sequential scrub / resilver I/Os"); ZFS_MODULE_PARAM(zfs, zfs_, scan_mem_lim_soft_fact, UINT, ZMOD_RW, diff --git a/sys/contrib/openzfs/module/zfs/fm.c b/sys/contrib/openzfs/module/zfs/fm.c index 32b5cf8facd1..3f05d759770b 100644 --- a/sys/contrib/openzfs/module/zfs/fm.c +++ b/sys/contrib/openzfs/module/zfs/fm.c @@ -955,6 +955,7 @@ fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth, } atomic_inc_64( &erpt_kstat_data.fmri_set_failed.value.ui64); + va_end(ap); return; } } diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c index efcfeecd778e..c624833bc981 100644 --- a/sys/contrib/openzfs/module/zfs/metaslab.c +++ b/sys/contrib/openzfs/module/zfs/metaslab.c @@ -51,12 +51,12 @@ * operation, we will try to write this amount of data to each disk before * moving on to the next top-level vdev. */ -static unsigned long metaslab_aliquot = 1024 * 1024; +static uint64_t metaslab_aliquot = 1024 * 1024; /* * For testing, make some blocks above a certain size be gang blocks. */ -unsigned long metaslab_force_ganging = SPA_MAXBLOCKSIZE + 1; +uint64_t metaslab_force_ganging = SPA_MAXBLOCKSIZE + 1; /* * In pools where the log space map feature is not enabled we touch @@ -286,7 +286,7 @@ static const int max_disabled_ms = 3; * Time (in seconds) to respect ms_max_size when the metaslab is not loaded. * To avoid 64-bit overflow, don't set above UINT32_MAX. */ -static unsigned long zfs_metaslab_max_size_cache_sec = 1 * 60 * 60; /* 1 hour */ +static uint64_t zfs_metaslab_max_size_cache_sec = 1 * 60 * 60; /* 1 hour */ /* * Maximum percentage of memory to use on storing loaded metaslabs. If loading @@ -5131,8 +5131,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, if (vd != NULL && vd->vdev_mg != NULL) { mg = vdev_get_mg(vd, mc); - if (flags & METASLAB_HINTBP_AVOID && - mg->mg_next != NULL) + if (flags & METASLAB_HINTBP_AVOID) mg = mg->mg_next; } else { mg = mca->mca_rotor; @@ -5201,12 +5200,11 @@ top: ASSERT(mg->mg_initialized); /* - * Avoid writing single-copy data to a failing, + * Avoid writing single-copy data to an unhealthy, * non-redundant vdev, unless we've already tried all * other vdevs. */ - if ((vd->vdev_stat.vs_write_errors > 0 || - vd->vdev_state < VDEV_STATE_HEALTHY) && + if (vd->vdev_state < VDEV_STATE_HEALTHY && d == 0 && !try_hard && vd->vdev_children == 0) { metaslab_trace_add(zal, mg, NULL, psize, d, TRACE_VDEV_ERROR, allocator); @@ -6203,7 +6201,7 @@ metaslab_unflushed_txg(metaslab_t *ms) return (ms->ms_unflushed_txg); } -ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, aliquot, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, aliquot, U64, ZMOD_RW, "Allocation granularity (a.k.a. stripe size)"); ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, debug_load, INT, ZMOD_RW, @@ -6251,7 +6249,7 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, segment_weight_enabled, INT, ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, switch_threshold, INT, ZMOD_RW, "Segment-based metaslab selection maximum buckets before switching"); -ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging, U64, ZMOD_RW, "Blocks larger than this size are forced to be gang blocks"); ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_max_search, UINT, ZMOD_RW, @@ -6260,7 +6258,7 @@ ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_max_search, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_use_largest_segment, INT, ZMOD_RW, "When looking in size tree, use largest segment instead of exact fit"); -ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, max_size_cache_sec, ULONG, +ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, max_size_cache_sec, U64, ZMOD_RW, "How long to trust the cached max chunk size of a metaslab"); ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, mem_limit, UINT, ZMOD_RW, diff --git a/sys/contrib/openzfs/module/zfs/mmp.c b/sys/contrib/openzfs/module/zfs/mmp.c index 92fd6c422330..ef0e01df390f 100644 --- a/sys/contrib/openzfs/module/zfs/mmp.c +++ b/sys/contrib/openzfs/module/zfs/mmp.c @@ -156,7 +156,7 @@ * vary with the I/O load and this observed value is the ub_mmp_delay which is * stored in the uberblock. The minimum allowed value is 100 ms. */ -ulong_t zfs_multihost_interval = MMP_DEFAULT_INTERVAL; +uint64_t zfs_multihost_interval = MMP_DEFAULT_INTERVAL; /* * Used to control the duration of the activity test on import. Smaller values @@ -303,8 +303,10 @@ mmp_next_leaf(spa_t *spa) do { leaf = list_next(&spa->spa_leaf_list, leaf); - if (leaf == NULL) + if (leaf == NULL) { leaf = list_head(&spa->spa_leaf_list); + ASSERT3P(leaf, !=, NULL); + } /* * We skip unwritable, offline, detached, and dRAID spare @@ -548,11 +550,11 @@ mmp_thread(void *arg) uint32_t mmp_fail_intervals = MMP_FAIL_INTVS_OK( zfs_multihost_fail_intervals); hrtime_t mmp_fail_ns = mmp_fail_intervals * mmp_interval; - boolean_t last_spa_suspended = suspended; - boolean_t last_spa_multihost = multihost; - uint64_t last_mmp_interval = mmp_interval; - uint32_t last_mmp_fail_intervals = mmp_fail_intervals; - hrtime_t last_mmp_fail_ns = mmp_fail_ns; + boolean_t last_spa_suspended; + boolean_t last_spa_multihost; + uint64_t last_mmp_interval; + uint32_t last_mmp_fail_intervals; + hrtime_t last_mmp_fail_ns; callb_cpr_t cpr; int skip_wait = 0; @@ -734,7 +736,7 @@ mmp_signal_all_threads(void) /* BEGIN CSTYLED */ ZFS_MODULE_PARAM_CALL(zfs_multihost, zfs_multihost_, interval, - param_set_multihost_interval, param_get_ulong, ZMOD_RW, + param_set_multihost_interval, spl_param_get_u64, ZMOD_RW, "Milliseconds between mmp writes to each leaf"); /* END CSTYLED */ diff --git a/sys/contrib/openzfs/module/zfs/range_tree.c b/sys/contrib/openzfs/module/zfs/range_tree.c index a2923d1664c7..894c30fcae16 100644 --- a/sys/contrib/openzfs/module/zfs/range_tree.c +++ b/sys/contrib/openzfs/module/zfs/range_tree.c @@ -369,6 +369,7 @@ range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill) * invalid as soon as we do any mutating btree operations. */ rs_after = zfs_btree_find(&rt->rt_root, &tmp, &where_after); + ASSERT3P(rs_after, !=, NULL); rs_set_start_raw(rs_after, rt, before_start); rs_set_fill(rs_after, rt, after_fill + before_fill + fill); rs = rs_after; diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c index cc367745e486..fe7051db2737 100644 --- a/sys/contrib/openzfs/module/zfs/spa.c +++ b/sys/contrib/openzfs/module/zfs/spa.c @@ -218,7 +218,7 @@ static int spa_load_print_vdev_tree = B_FALSE; * there are also risks of performing an inadvertent rewind as we might be * missing all the vdevs with the latest uberblocks. */ -unsigned long zfs_max_missing_tvds = 0; +uint64_t zfs_max_missing_tvds = 0; /* * The parameters below are similar to zfs_max_missing_tvds but are only @@ -5267,7 +5267,7 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag, * If we've recovered the pool, pass back any information we * gathered while doing the load. */ - if (state == SPA_LOAD_RECOVER) { + if (state == SPA_LOAD_RECOVER && config != NULL) { fnvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO, spa->spa_load_info); } @@ -6803,8 +6803,8 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, pvd = oldvd->vdev_parent; - if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, - VDEV_ALLOC_ATTACH)) != 0) + if (spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, + VDEV_ALLOC_ATTACH) != 0) return (spa_vdev_exit(spa, NULL, txg, EINVAL)); if (newrootvd->vdev_children != 1) @@ -6819,10 +6819,12 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, return (spa_vdev_exit(spa, newrootvd, txg, error)); /* - * Spares can't replace logs + * log, dedup and special vdevs should not be replaced by spares. */ - if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) + if ((oldvd->vdev_top->vdev_alloc_bias != VDEV_BIAS_NONE || + oldvd->vdev_top->vdev_islog) && newvd->vdev_isspare) { return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); + } /* * A dRAID spare can only replace a child of its parent dRAID vdev. @@ -7160,7 +7162,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) * it may be that the unwritability of the disk is the reason * it's being detached! */ - error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); + (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); /* * Remove vd from its parent and compact the parent's children. @@ -8867,36 +8869,36 @@ spa_sync_props(void *arg, dmu_tx_t *tx) spa_history_log_internal(spa, "set", tx, "%s=%lld", nvpair_name(elem), (longlong_t)intval); - } else { - ASSERT(0); /* not allowed */ - } - switch (prop) { - case ZPOOL_PROP_DELEGATION: - spa->spa_delegation = intval; - break; - case ZPOOL_PROP_BOOTFS: - spa->spa_bootfs = intval; - break; - case ZPOOL_PROP_FAILUREMODE: - spa->spa_failmode = intval; - break; - case ZPOOL_PROP_AUTOTRIM: - spa->spa_autotrim = intval; - spa_async_request(spa, - SPA_ASYNC_AUTOTRIM_RESTART); - break; - case ZPOOL_PROP_AUTOEXPAND: - spa->spa_autoexpand = intval; - if (tx->tx_txg != TXG_INITIAL) + switch (prop) { + case ZPOOL_PROP_DELEGATION: + spa->spa_delegation = intval; + break; + case ZPOOL_PROP_BOOTFS: + spa->spa_bootfs = intval; + break; + case ZPOOL_PROP_FAILUREMODE: + spa->spa_failmode = intval; + break; + case ZPOOL_PROP_AUTOTRIM: + spa->spa_autotrim = intval; spa_async_request(spa, - SPA_ASYNC_AUTOEXPAND); - break; - case ZPOOL_PROP_MULTIHOST: - spa->spa_multihost = intval; - break; - default: - break; + SPA_ASYNC_AUTOTRIM_RESTART); + break; + case ZPOOL_PROP_AUTOEXPAND: + spa->spa_autoexpand = intval; + if (tx->tx_txg != TXG_INITIAL) + spa_async_request(spa, + SPA_ASYNC_AUTOEXPAND); + break; + case ZPOOL_PROP_MULTIHOST: + spa->spa_multihost = intval; + break; + default: + break; + } + } else { + ASSERT(0); /* not allowed */ } } @@ -10016,7 +10018,7 @@ ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_batch_tpq, UINT, ZMOD_RD, "Number of threads per IO worker taskqueue"); /* BEGIN CSTYLED */ -ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds, U64, ZMOD_RW, "Allow importing pool with up to this number of missing top-level " "vdevs (in read-only mode)"); /* END CSTYLED */ diff --git a/sys/contrib/openzfs/module/zfs/spa_checkpoint.c b/sys/contrib/openzfs/module/zfs/spa_checkpoint.c index a837b1ce97ec..b588f7041e5c 100644 --- a/sys/contrib/openzfs/module/zfs/spa_checkpoint.c +++ b/sys/contrib/openzfs/module/zfs/spa_checkpoint.c @@ -158,7 +158,7 @@ * amount of checkpointed data that has been freed within them while * the pool had a checkpoint. */ -static unsigned long zfs_spa_discard_memory_limit = 16 * 1024 * 1024; +static uint64_t zfs_spa_discard_memory_limit = 16 * 1024 * 1024; int spa_checkpoint_get_stats(spa_t *spa, pool_checkpoint_stat_t *pcs) @@ -631,7 +631,7 @@ EXPORT_SYMBOL(spa_checkpoint_discard_thread); EXPORT_SYMBOL(spa_checkpoint_discard_thread_check); /* BEGIN CSTYLED */ -ZFS_MODULE_PARAM(zfs_spa, zfs_spa_, discard_memory_limit, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_spa, zfs_spa_, discard_memory_limit, U64, ZMOD_RW, "Limit for memory used in prefetching the checkpoint space map done " "on each vdev while discarding the checkpoint"); /* END CSTYLED */ diff --git a/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c b/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c index 4ecce8214f6a..2878e68c6e4b 100644 --- a/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c +++ b/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c @@ -188,13 +188,13 @@ static const unsigned long zfs_log_sm_blksz = 1ULL << 17; * (thus the _ppm suffix; reads as "parts per million"). As an example, * the default of 1000 allows 0.1% of memory to be used. */ -static unsigned long zfs_unflushed_max_mem_ppm = 1000; +static uint64_t zfs_unflushed_max_mem_ppm = 1000; /* * Specific hard-limit in memory that ZFS allows to be used for * unflushed changes. */ -static unsigned long zfs_unflushed_max_mem_amt = 1ULL << 30; +static uint64_t zfs_unflushed_max_mem_amt = 1ULL << 30; /* * The following tunable determines the number of blocks that can be used for @@ -243,33 +243,33 @@ static unsigned long zfs_unflushed_max_mem_amt = 1ULL << 30; * provide upper and lower bounds for the log block limit. * [see zfs_unflushed_log_block_{min,max}] */ -static unsigned long zfs_unflushed_log_block_pct = 400; +static uint_t zfs_unflushed_log_block_pct = 400; /* * If the number of metaslabs is small and our incoming rate is high, we could * get into a situation that we are flushing all our metaslabs every TXG. Thus * we always allow at least this many log blocks. */ -static unsigned long zfs_unflushed_log_block_min = 1000; +static uint64_t zfs_unflushed_log_block_min = 1000; /* * If the log becomes too big, the import time of the pool can take a hit in * terms of performance. Thus we have a hard limit in the size of the log in * terms of blocks. */ -static unsigned long zfs_unflushed_log_block_max = (1ULL << 17); +static uint64_t zfs_unflushed_log_block_max = (1ULL << 17); /* * Also we have a hard limit in the size of the log in terms of dirty TXGs. */ -static unsigned long zfs_unflushed_log_txg_max = 1000; +static uint64_t zfs_unflushed_log_txg_max = 1000; /* * Max # of rows allowed for the log_summary. The tradeoff here is accuracy and * stability of the flushing algorithm (longer summary) vs its runtime overhead * (smaller summary is faster to traverse). */ -static unsigned long zfs_max_logsm_summary_length = 10; +static uint64_t zfs_max_logsm_summary_length = 10; /* * Tunable that sets the lower bound on the metaslabs to flush every TXG. @@ -282,7 +282,7 @@ static unsigned long zfs_max_logsm_summary_length = 10; * The point of this tunable is to be used in extreme cases where we really * want to flush more metaslabs than our adaptable heuristic plans to flush. */ -static unsigned long zfs_min_metaslabs_to_flush = 1; +static uint64_t zfs_min_metaslabs_to_flush = 1; /* * Tunable that specifies how far in the past do we want to look when trying to @@ -293,7 +293,7 @@ static unsigned long zfs_min_metaslabs_to_flush = 1; * average over all the blocks that we walk * [see spa_estimate_incoming_log_blocks]. */ -static unsigned long zfs_max_log_walking = 5; +static uint64_t zfs_max_log_walking = 5; /* * This tunable exists solely for testing purposes. It ensures that the log @@ -507,6 +507,7 @@ void spa_log_summary_decrement_blkcount(spa_t *spa, uint64_t blocks_gone) { log_summary_entry_t *e = list_head(&spa->spa_log_summary); + ASSERT3P(e, !=, NULL); if (e->lse_txgcount > 0) e->lse_txgcount--; for (; e != NULL; e = list_head(&spa->spa_log_summary)) { @@ -690,7 +691,8 @@ spa_estimate_metaslabs_to_flush(spa_t *spa) * based on the incoming rate until we exceed it. */ if (available_blocks >= 0 && available_txgs >= 0) { - uint64_t skip_txgs = MIN(available_txgs + 1, + uint64_t skip_txgs = (incoming == 0) ? + available_txgs + 1 : MIN(available_txgs + 1, (available_blocks / incoming) + 1); available_blocks -= (skip_txgs * incoming); available_txgs -= skip_txgs; @@ -1356,34 +1358,34 @@ spa_ld_log_spacemaps(spa_t *spa) } /* BEGIN CSTYLED */ -ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_amt, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_amt, U64, ZMOD_RW, "Specific hard-limit in memory that ZFS allows to be used for " "unflushed changes"); -ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_ppm, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_ppm, U64, ZMOD_RW, "Percentage of the overall system memory that ZFS allows to be " "used for unflushed changes (value is calculated over 1000000 for " "finer granularity)"); -ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_max, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_max, U64, ZMOD_RW, "Hard limit (upper-bound) in the size of the space map log " "in terms of blocks."); -ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_min, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_min, U64, ZMOD_RW, "Lower-bound limit for the maximum amount of blocks allowed in " "log spacemap (see zfs_unflushed_log_block_max)"); -ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_txg_max, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_txg_max, U64, ZMOD_RW, "Hard limit (upper-bound) in the size of the space map log " "in terms of dirty TXGs."); -ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_pct, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_pct, UINT, ZMOD_RW, "Tunable used to determine the number of blocks that can be used for " "the spacemap log, expressed as a percentage of the total number of " "metaslabs in the pool (e.g. 400 means the number of log blocks is " "capped at 4 times the number of metaslabs)"); -ZFS_MODULE_PARAM(zfs, zfs_, max_log_walking, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, max_log_walking, U64, ZMOD_RW, "The number of past TXGs that the flushing algorithm of the log " "spacemap feature uses to estimate incoming log blocks"); @@ -1392,8 +1394,8 @@ ZFS_MODULE_PARAM(zfs, zfs_, keep_log_spacemaps_at_export, INT, ZMOD_RW, "during pool export/destroy"); /* END CSTYLED */ -ZFS_MODULE_PARAM(zfs, zfs_, max_logsm_summary_length, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, max_logsm_summary_length, U64, ZMOD_RW, "Maximum number of rows allowed in the summary of the spacemap log"); -ZFS_MODULE_PARAM(zfs, zfs_, min_metaslabs_to_flush, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, min_metaslabs_to_flush, U64, ZMOD_RW, "Minimum number of metaslabs to flush per dirty TXG"); diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c index 102070013404..ca55d55405d3 100644 --- a/sys/contrib/openzfs/module/zfs/spa_misc.c +++ b/sys/contrib/openzfs/module/zfs/spa_misc.c @@ -304,20 +304,20 @@ int zfs_free_leak_on_eio = B_FALSE; * has not completed in zfs_deadman_synctime_ms is considered "hung" resulting * in one of three behaviors controlled by zfs_deadman_failmode. */ -unsigned long zfs_deadman_synctime_ms = 600000UL; /* 10 min. */ +uint64_t zfs_deadman_synctime_ms = 600000UL; /* 10 min. */ /* * This value controls the maximum amount of time zio_wait() will block for an * outstanding IO. By default this is 300 seconds at which point the "hung" * behavior will be applied as described for zfs_deadman_synctime_ms. */ -unsigned long zfs_deadman_ziotime_ms = 300000UL; /* 5 min. */ +uint64_t zfs_deadman_ziotime_ms = 300000UL; /* 5 min. */ /* * Check time in milliseconds. This defines the frequency at which we check * for hung I/O. */ -unsigned long zfs_deadman_checktime_ms = 60000UL; /* 1 min. */ +uint64_t zfs_deadman_checktime_ms = 60000UL; /* 1 min. */ /* * By default the deadman is enabled. @@ -1536,7 +1536,7 @@ snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp) compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name; } - SNPRINTF_BLKPTR(snprintf, ' ', buf, buflen, bp, type, checksum, + SNPRINTF_BLKPTR(kmem_scnprintf, ' ', buf, buflen, bp, type, checksum, compress); } @@ -2922,7 +2922,7 @@ ZFS_MODULE_PARAM(zfs, zfs_, recover, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, free_leak_on_eio, INT, ZMOD_RW, "Set to ignore IO errors during free and permanently leak the space"); -ZFS_MODULE_PARAM(zfs_deadman, zfs_deadman_, checktime_ms, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_deadman, zfs_deadman_, checktime_ms, U64, ZMOD_RW, "Dead I/O check interval in milliseconds"); ZFS_MODULE_PARAM(zfs_deadman, zfs_deadman_, enabled, INT, ZMOD_RW, @@ -2943,11 +2943,11 @@ ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, failmode, "Failmode for deadman timer"); ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, synctime_ms, - param_set_deadman_synctime, param_get_ulong, ZMOD_RW, + param_set_deadman_synctime, spl_param_get_u64, ZMOD_RW, "Pool sync expiration time in milliseconds"); ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, ziotime_ms, - param_set_deadman_ziotime, param_get_ulong, ZMOD_RW, + param_set_deadman_ziotime, spl_param_get_u64, ZMOD_RW, "IO expiration time in milliseconds"); ZFS_MODULE_PARAM(zfs, zfs_, special_class_metadata_reserve_pct, UINT, ZMOD_RW, diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c index 66cec052b669..4520ca31b7d7 100644 --- a/sys/contrib/openzfs/module/zfs/vdev.c +++ b/sys/contrib/openzfs/module/zfs/vdev.c @@ -144,8 +144,8 @@ int zfs_nocacheflush = 0; * be forced by vdev logical ashift or by user via ashift property, but won't * be set automatically as a performance optimization. */ -uint64_t zfs_vdev_max_auto_ashift = 14; -uint64_t zfs_vdev_min_auto_ashift = ASHIFT_MIN; +uint_t zfs_vdev_max_auto_ashift = 14; +uint_t zfs_vdev_min_auto_ashift = ASHIFT_MIN; void vdev_dbgmsg(vdev_t *vd, const char *fmt, ...) @@ -3563,6 +3563,26 @@ vdev_load(vdev_t *vd) } } + if (vd == vd->vdev_top && vd->vdev_top_zap != 0) { + spa_t *spa = vd->vdev_spa; + uint64_t failfast; + + error = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap, + vdev_prop_to_name(VDEV_PROP_FAILFAST), sizeof (failfast), + 1, &failfast); + if (error == 0) { + vd->vdev_failfast = failfast & 1; + } else if (error == ENOENT) { + vd->vdev_failfast = vdev_prop_default_numeric( + VDEV_PROP_FAILFAST); + } else { + vdev_dbgmsg(vd, + "vdev_load: zap_lookup(top_zap=%llu) " + "failed [error=%d]", + (u_longlong_t)vd->vdev_top_zap, error); + } + } + /* * Load any rebuild state from the top-level vdev zap. */ @@ -5648,7 +5668,7 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) nvpair_t *elem = NULL; uint64_t vdev_guid; nvlist_t *nvprops; - int error; + int error = 0; ASSERT(vd != NULL); @@ -5709,6 +5729,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) else error = spa_vdev_alloc(spa, vdev_guid); break; + case VDEV_PROP_FAILFAST: + if (nvpair_value_uint64(elem, &intval) != 0) { + error = EINVAL; + break; + } + vd->vdev_failfast = intval & 1; + break; default: /* Most processing is done in vdev_props_set_sync */ break; @@ -6022,6 +6049,25 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) vdev_prop_add_list(outnvl, propname, strval, intval, src); break; + case VDEV_PROP_FAILFAST: + src = ZPROP_SRC_LOCAL; + strval = NULL; + + err = zap_lookup(mos, objid, nvpair_name(elem), + sizeof (uint64_t), 1, &intval); + if (err == ENOENT) { + intval = vdev_prop_default_numeric( + prop); + err = 0; + } else if (err) { + break; + } + if (intval == vdev_prop_default_numeric(prop)) + src = ZPROP_SRC_DEFAULT; + + vdev_prop_add_list(outnvl, propname, strval, + intval, src); + break; /* Text Properties */ case VDEV_PROP_COMMENT: /* Exists in the ZAP below */ @@ -6078,7 +6124,6 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) strval = NULL; zprop_source_t src = ZPROP_SRC_DEFAULT; propname = za.za_name; - prop = vdev_name_to_prop(propname); switch (za.za_integer_length) { case 8: @@ -6156,11 +6201,11 @@ ZFS_MODULE_PARAM(zfs, zfs_, embedded_slog_min_ms, UINT, ZMOD_RW, /* BEGIN CSTYLED */ ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, min_auto_ashift, - param_set_min_auto_ashift, param_get_ulong, ZMOD_RW, + param_set_min_auto_ashift, param_get_uint, ZMOD_RW, "Minimum ashift used when creating new top-level vdevs"); ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, max_auto_ashift, - param_set_max_auto_ashift, param_get_ulong, ZMOD_RW, + param_set_max_auto_ashift, param_get_uint, ZMOD_RW, "Maximum ashift used when optimizing for logical -> physical sector " "size on new top-level vdevs"); /* END CSTYLED */ diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect.c b/sys/contrib/openzfs/module/zfs/vdev_indirect.c index 0ca0c245e952..814a1f0efe4c 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c +++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c @@ -189,14 +189,14 @@ static uint_t zfs_condense_indirect_obsolete_pct = 25; * consumed by the obsolete space map; the default of 1GB is small enough * that we typically don't mind "wasting" it. */ -static unsigned long zfs_condense_max_obsolete_bytes = 1024 * 1024 * 1024; +static uint64_t zfs_condense_max_obsolete_bytes = 1024 * 1024 * 1024; /* * Don't bother condensing if the mapping uses less than this amount of * memory. The default of 128KB is considered a "trivial" amount of * memory and not worth reducing. */ -static unsigned long zfs_condense_min_mapping_bytes = 128 * 1024; +static uint64_t zfs_condense_min_mapping_bytes = 128 * 1024; /* * This is used by the test suite so that it can ensure that certain @@ -1319,6 +1319,7 @@ vdev_indirect_io_start(zio_t *zio) vdev_indirect_gather_splits, zio); indirect_split_t *first = list_head(&iv->iv_splits); + ASSERT3P(first, !=, NULL); if (first->is_size == zio->io_size) { /* * This is not a split block; we are pointing to the entire @@ -1891,11 +1892,11 @@ ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, indirect_obsolete_pct, UINT, "Minimum obsolete percent of bytes in the mapping " "to attempt condensing"); -ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, min_mapping_bytes, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, min_mapping_bytes, U64, ZMOD_RW, "Don't bother condensing if the mapping uses less than this amount of " "memory"); -ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, max_obsolete_bytes, ULONG, +ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, max_obsolete_bytes, U64, ZMOD_RW, "Minimum size obsolete spacemap to attempt condensing"); diff --git a/sys/contrib/openzfs/module/zfs/vdev_initialize.c b/sys/contrib/openzfs/module/zfs/vdev_initialize.c index 965fb7ef0593..75beb0cc3d12 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_initialize.c +++ b/sys/contrib/openzfs/module/zfs/vdev_initialize.c @@ -36,17 +36,13 @@ /* * Value that is written to disk during initialization. */ -#ifdef _ILP32 -static unsigned long zfs_initialize_value = 0xdeadbeefUL; -#else -static unsigned long zfs_initialize_value = 0xdeadbeefdeadbeeeULL; -#endif +static uint64_t zfs_initialize_value = 0xdeadbeefdeadbeeeULL; /* maximum number of I/Os outstanding per leaf vdev */ static const int zfs_initialize_limit = 1; /* size of initializing writes; default 1MiB, see zfs_remove_max_segment */ -static unsigned long zfs_initialize_chunk_size = 1024 * 1024; +static uint64_t zfs_initialize_chunk_size = 1024 * 1024; static boolean_t vdev_initialize_should_stop(vdev_t *vd) @@ -261,15 +257,9 @@ vdev_initialize_block_fill(void *buf, size_t len, void *unused) (void) unused; ASSERT0(len % sizeof (uint64_t)); -#ifdef _ILP32 - for (uint64_t i = 0; i < len; i += sizeof (uint32_t)) { - *(uint32_t *)((char *)(buf) + i) = zfs_initialize_value; - } -#else for (uint64_t i = 0; i < len; i += sizeof (uint64_t)) { *(uint64_t *)((char *)(buf) + i) = zfs_initialize_value; } -#endif return (0); } @@ -765,8 +755,8 @@ EXPORT_SYMBOL(vdev_initialize_stop_all); EXPORT_SYMBOL(vdev_initialize_stop_wait); EXPORT_SYMBOL(vdev_initialize_restart); -ZFS_MODULE_PARAM(zfs, zfs_, initialize_value, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, initialize_value, U64, ZMOD_RW, "Value written during zpool initialize"); -ZFS_MODULE_PARAM(zfs, zfs_, initialize_chunk_size, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, initialize_chunk_size, U64, ZMOD_RW, "Size in bytes of writes by zpool initialize"); diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c index 1acb89cea393..ec55674393ce 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_queue.c +++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c @@ -605,7 +605,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) int maxblocksize; boolean_t stretch = B_FALSE; avl_tree_t *t = vdev_queue_type_tree(vq, zio->io_type); - enum zio_flag flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT; + zio_flag_t flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT; uint64_t next_offset; abd_t *abd; @@ -725,6 +725,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) * after our span is mandatory. */ dio = AVL_NEXT(t, last); + ASSERT3P(dio, !=, NULL); dio->io_flags &= ~ZIO_FLAG_OPTIONAL; } else { /* do not include the optional i/o */ @@ -756,6 +757,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) do { dio = nio; nio = AVL_NEXT(t, dio); + ASSERT3P(dio, !=, NULL); zio_add_child(dio, aio); vdev_queue_io_remove(vq, dio); diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c b/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c index f74a76a8d5ba..2980f8acfbd7 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c +++ b/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c @@ -285,17 +285,17 @@ raidz_math_kstat_headers(char *buf, size_t size) { ASSERT3U(size, >=, RAIDZ_KSTAT_LINE_LEN); - ssize_t off = snprintf(buf, size, "%-17s", "implementation"); + ssize_t off = kmem_scnprintf(buf, size, "%-17s", "implementation"); for (int i = 0; i < ARRAY_SIZE(raidz_gen_name); i++) - off += snprintf(buf + off, size - off, "%-16s", + off += kmem_scnprintf(buf + off, size - off, "%-16s", raidz_gen_name[i]); for (int i = 0; i < ARRAY_SIZE(raidz_rec_name); i++) - off += snprintf(buf + off, size - off, "%-16s", + off += kmem_scnprintf(buf + off, size - off, "%-16s", raidz_rec_name[i]); - (void) snprintf(buf + off, size - off, "\n"); + (void) kmem_scnprintf(buf + off, size - off, "\n"); return (0); } @@ -311,34 +311,35 @@ raidz_math_kstat_data(char *buf, size_t size, void *data) ASSERT3U(size, >=, RAIDZ_KSTAT_LINE_LEN); if (cstat == fstat) { - off += snprintf(buf + off, size - off, "%-17s", "fastest"); + off += kmem_scnprintf(buf + off, size - off, "%-17s", + "fastest"); for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++) { int id = fstat->gen[i]; - off += snprintf(buf + off, size - off, "%-16s", + off += kmem_scnprintf(buf + off, size - off, "%-16s", raidz_supp_impl[id]->name); } for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++) { int id = fstat->rec[i]; - off += snprintf(buf + off, size - off, "%-16s", + off += kmem_scnprintf(buf + off, size - off, "%-16s", raidz_supp_impl[id]->name); } } else { ptrdiff_t id = cstat - raidz_impl_kstats; - off += snprintf(buf + off, size - off, "%-17s", + off += kmem_scnprintf(buf + off, size - off, "%-17s", raidz_supp_impl[id]->name); for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++) - off += snprintf(buf + off, size - off, "%-16llu", + off += kmem_scnprintf(buf + off, size - off, "%-16llu", (u_longlong_t)cstat->gen[i]); for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++) - off += snprintf(buf + off, size - off, "%-16llu", + off += kmem_scnprintf(buf + off, size - off, "%-16llu", (u_longlong_t)cstat->rec[i]); } - (void) snprintf(buf + off, size - off, "\n"); + (void) kmem_scnprintf(buf + off, size - off, "\n"); return (0); } diff --git a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c index 1ce578e228d8..1f56275c853b 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c +++ b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c @@ -22,6 +22,7 @@ * * Copyright (c) 2018, Intel Corporation. * Copyright (c) 2020 by Lawrence Livermore National Security, LLC. + * Copyright (c) 2022 Hewlett Packard Enterprise Development LP. */ #include <sys/vdev_impl.h> @@ -103,7 +104,7 @@ * Size of rebuild reads; defaults to 1MiB per data disk and is capped at * SPA_MAXBLOCKSIZE. */ -static unsigned long zfs_rebuild_max_segment = 1024 * 1024; +static uint64_t zfs_rebuild_max_segment = 1024 * 1024; /* * Maximum number of parallelly executed bytes per leaf vdev caused by a @@ -121,7 +122,7 @@ static unsigned long zfs_rebuild_max_segment = 1024 * 1024; * With a value of 32MB the sequential resilver write rate was measured at * 800MB/s sustained while rebuilding to a distributed spare. */ -static unsigned long zfs_rebuild_vdev_limit = 32 << 20; +static uint64_t zfs_rebuild_vdev_limit = 32 << 20; /* * Automatically start a pool scrub when the last active sequential resilver @@ -134,6 +135,7 @@ static int zfs_rebuild_scrub_enabled = 1; * For vdev_rebuild_initiate_sync() and vdev_rebuild_reset_sync(). */ static __attribute__((noreturn)) void vdev_rebuild_thread(void *arg); +static void vdev_rebuild_reset_sync(void *arg, dmu_tx_t *tx); /* * Clear the per-vdev rebuild bytes value for a vdev tree. @@ -307,6 +309,17 @@ vdev_rebuild_complete_sync(void *arg, dmu_tx_t *tx) vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; mutex_enter(&vd->vdev_rebuild_lock); + + /* + * Handle a second device failure if it occurs after all rebuild I/O + * has completed but before this sync task has been executed. + */ + if (vd->vdev_rebuild_reset_wanted) { + mutex_exit(&vd->vdev_rebuild_lock); + vdev_rebuild_reset_sync(arg, tx); + return; + } + vrp->vrp_rebuild_state = VDEV_REBUILD_COMPLETE; vrp->vrp_end_time = gethrestime_sec(); @@ -760,7 +773,6 @@ vdev_rebuild_thread(void *arg) ASSERT(vd->vdev_rebuilding); ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REBUILD)); ASSERT3B(vd->vdev_rebuild_cancel_wanted, ==, B_FALSE); - ASSERT3B(vd->vdev_rebuild_reset_wanted, ==, B_FALSE); vdev_rebuild_t *vr = &vd->vdev_rebuild_config; vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; @@ -1138,10 +1150,10 @@ vdev_rebuild_get_stats(vdev_t *tvd, vdev_rebuild_stat_t *vrs) return (error); } -ZFS_MODULE_PARAM(zfs, zfs_, rebuild_max_segment, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, rebuild_max_segment, U64, ZMOD_RW, "Max segment size in bytes of rebuild reads"); -ZFS_MODULE_PARAM(zfs, zfs_, rebuild_vdev_limit, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, rebuild_vdev_limit, U64, ZMOD_RW, "Max bytes in flight per leaf vdev for sequential resilvers"); ZFS_MODULE_PARAM(zfs, zfs_, rebuild_scrub_enabled, INT, ZMOD_RW, diff --git a/sys/contrib/openzfs/module/zfs/vdev_trim.c b/sys/contrib/openzfs/module/zfs/vdev_trim.c index 5905d9a07571..5b5076c8722c 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_trim.c +++ b/sys/contrib/openzfs/module/zfs/vdev_trim.c @@ -1188,12 +1188,11 @@ vdev_autotrim_thread(void *arg) mutex_exit(&vd->vdev_autotrim_lock); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - uint64_t extent_bytes_max = zfs_trim_extent_bytes_max; - uint64_t extent_bytes_min = zfs_trim_extent_bytes_min; - while (!vdev_autotrim_should_stop(vd)) { int txgs_per_trim = MAX(zfs_trim_txg_batch, 1); boolean_t issued_trim = B_FALSE; + uint64_t extent_bytes_max = zfs_trim_extent_bytes_max; + uint64_t extent_bytes_min = zfs_trim_extent_bytes_min; /* * All of the metaslabs are divided in to groups of size diff --git a/sys/contrib/openzfs/module/zfs/zap_leaf.c b/sys/contrib/openzfs/module/zfs/zap_leaf.c index 25c2d5163a26..2e8489c7dfcf 100644 --- a/sys/contrib/openzfs/module/zfs/zap_leaf.c +++ b/sys/contrib/openzfs/module/zfs/zap_leaf.c @@ -646,7 +646,7 @@ zap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd, * form of the name. But all callers have one of these on hand anyway, * so might as well take advantage. A cleaner but slower interface * would accept neither argument, and compute the normalized name as - * needed (using zap_name_alloc(zap_entry_read_name(zeh))). + * needed (using zap_name_alloc_str(zap_entry_read_name(zeh))). */ boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn, @@ -667,7 +667,7 @@ zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn, continue; if (zn == NULL) { - zn = zap_name_alloc(zap, name, MT_NORMALIZE); + zn = zap_name_alloc_str(zap, name, MT_NORMALIZE); allocdzn = B_TRUE; } if (zap_leaf_array_match(zeh->zeh_leaf, zn, diff --git a/sys/contrib/openzfs/module/zfs/zap_micro.c b/sys/contrib/openzfs/module/zfs/zap_micro.c index 58a5c9f600b7..606f426404cc 100644 --- a/sys/contrib/openzfs/module/zfs/zap_micro.c +++ b/sys/contrib/openzfs/module/zfs/zap_micro.c @@ -33,7 +33,7 @@ #include <sys/zap.h> #include <sys/zap_impl.h> #include <sys/zap_leaf.h> -#include <sys/avl.h> +#include <sys/btree.h> #include <sys/arc.h> #include <sys/dmu_objset.h> @@ -92,7 +92,7 @@ zap_hash(zap_name_t *zn) wp++, i++) { uint64_t word = *wp; - for (int j = 0; j < zn->zn_key_intlen; j++) { + for (int j = 0; j < 8; j++) { h = (h >> 8) ^ zfs_crc64_table[(h ^ word) & 0xFF]; word >>= NBBY; @@ -162,18 +162,25 @@ zap_match(zap_name_t *zn, const char *matchname) } } +static zap_name_t * +zap_name_alloc(zap_t *zap) +{ + zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); + zn->zn_zap = zap; + return (zn); +} + void zap_name_free(zap_name_t *zn) { kmem_free(zn, sizeof (zap_name_t)); } -zap_name_t * -zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) +static int +zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt) { - zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); + zap_t *zap = zn->zn_zap; - zn->zn_zap = zap; zn->zn_key_intlen = sizeof (*key); zn->zn_key_orig = key; zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; @@ -194,17 +201,13 @@ zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) * what the hash is computed from. */ if (zap_normalize(zap, key, zn->zn_normbuf, - zap->zap_normflags) != 0) { - zap_name_free(zn); - return (NULL); - } + zap->zap_normflags) != 0) + return (SET_ERROR(ENOTSUP)); zn->zn_key_norm = zn->zn_normbuf; zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; } else { - if (mt != 0) { - zap_name_free(zn); - return (NULL); - } + if (mt != 0) + return (SET_ERROR(ENOTSUP)); zn->zn_key_norm = zn->zn_key_orig; zn->zn_key_norm_numints = zn->zn_key_orig_numints; } @@ -217,13 +220,22 @@ zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) * what the matching is based on. (Not the hash!) */ if (zap_normalize(zap, key, zn->zn_normbuf, - zn->zn_normflags) != 0) { - zap_name_free(zn); - return (NULL); - } + zn->zn_normflags) != 0) + return (SET_ERROR(ENOTSUP)); zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; } + return (0); +} + +zap_name_t * +zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt) +{ + zap_name_t *zn = zap_name_alloc(zap); + if (zap_name_init_str(zn, key, mt) != 0) { + zap_name_free(zn); + return (NULL); + } return (zn); } @@ -277,45 +289,46 @@ mze_compare(const void *arg1, const void *arg2) const mzap_ent_t *mze1 = arg1; const mzap_ent_t *mze2 = arg2; - int cmp = TREE_CMP(mze1->mze_hash, mze2->mze_hash); - if (likely(cmp)) - return (cmp); - - return (TREE_CMP(mze1->mze_cd, mze2->mze_cd)); + return (TREE_CMP((uint64_t)(mze1->mze_hash) << 32 | mze1->mze_cd, + (uint64_t)(mze2->mze_hash) << 32 | mze2->mze_cd)); } static void -mze_insert(zap_t *zap, int chunkid, uint64_t hash) +mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash) { + mzap_ent_t mze; + ASSERT(zap->zap_ismicro); ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); - mzap_ent_t *mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP); - mze->mze_chunkid = chunkid; - mze->mze_hash = hash; - mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd; - ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0); - avl_add(&zap->zap_m.zap_avl, mze); + mze.mze_chunkid = chunkid; + ASSERT0(hash & 0xffffffff); + mze.mze_hash = hash >> 32; + ASSERT3U(MZE_PHYS(zap, &mze)->mze_cd, <=, 0xffff); + mze.mze_cd = (uint16_t)MZE_PHYS(zap, &mze)->mze_cd; + ASSERT(MZE_PHYS(zap, &mze)->mze_name[0] != 0); + zfs_btree_add(&zap->zap_m.zap_tree, &mze); } static mzap_ent_t * -mze_find(zap_name_t *zn) +mze_find(zap_name_t *zn, zfs_btree_index_t *idx) { mzap_ent_t mze_tofind; mzap_ent_t *mze; - avl_index_t idx; - avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl; + zfs_btree_t *tree = &zn->zn_zap->zap_m.zap_tree; ASSERT(zn->zn_zap->zap_ismicro); ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); - mze_tofind.mze_hash = zn->zn_hash; + ASSERT0(zn->zn_hash & 0xffffffff); + mze_tofind.mze_hash = zn->zn_hash >> 32; mze_tofind.mze_cd = 0; - mze = avl_find(avl, &mze_tofind, &idx); + mze = zfs_btree_find(tree, &mze_tofind, idx); if (mze == NULL) - mze = avl_nearest(avl, idx, AVL_AFTER); - for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) { + mze = zfs_btree_next(tree, idx, idx); + for (; mze && mze->mze_hash == mze_tofind.mze_hash; + mze = zfs_btree_next(tree, idx, idx)) { ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd); if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name)) return (mze); @@ -328,18 +341,21 @@ static uint32_t mze_find_unused_cd(zap_t *zap, uint64_t hash) { mzap_ent_t mze_tofind; - avl_index_t idx; - avl_tree_t *avl = &zap->zap_m.zap_avl; + zfs_btree_index_t idx; + zfs_btree_t *tree = &zap->zap_m.zap_tree; ASSERT(zap->zap_ismicro); ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); + ASSERT0(hash & 0xffffffff); + hash >>= 32; mze_tofind.mze_hash = hash; mze_tofind.mze_cd = 0; uint32_t cd = 0; - for (mzap_ent_t *mze = avl_find(avl, &mze_tofind, &idx); - mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { + for (mzap_ent_t *mze = zfs_btree_find(tree, &mze_tofind, &idx); + mze && mze->mze_hash == hash; + mze = zfs_btree_next(tree, &idx, &idx)) { if (mze->mze_cd != cd) break; cd++; @@ -364,16 +380,18 @@ mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash) { zap_t *zap = zn->zn_zap; mzap_ent_t mze_tofind; - mzap_ent_t *mze; - avl_index_t idx; - avl_tree_t *avl = &zap->zap_m.zap_avl; + zfs_btree_index_t idx; + zfs_btree_t *tree = &zap->zap_m.zap_tree; uint32_t mzap_ents = 0; + ASSERT0(hash & 0xffffffff); + hash >>= 32; mze_tofind.mze_hash = hash; mze_tofind.mze_cd = 0; - for (mze = avl_find(avl, &mze_tofind, &idx); - mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { + for (mzap_ent_t *mze = zfs_btree_find(tree, &mze_tofind, &idx); + mze && mze->mze_hash == hash; + mze = zfs_btree_next(tree, &idx, &idx)) { mzap_ents++; } @@ -384,24 +402,10 @@ mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash) } static void -mze_remove(zap_t *zap, mzap_ent_t *mze) -{ - ASSERT(zap->zap_ismicro); - ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); - - avl_remove(&zap->zap_m.zap_avl, mze); - kmem_free(mze, sizeof (mzap_ent_t)); -} - -static void mze_destroy(zap_t *zap) { - mzap_ent_t *mze; - void *avlcookie = NULL; - - while ((mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))) - kmem_free(mze, sizeof (mzap_ent_t)); - avl_destroy(&zap->zap_m.zap_avl); + zfs_btree_clear(&zap->zap_m.zap_tree); + zfs_btree_destroy(&zap->zap_m.zap_tree); } static zap_t * @@ -448,21 +452,26 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) zap->zap_salt = zap_m_phys(zap)->mz_salt; zap->zap_normflags = zap_m_phys(zap)->mz_normflags; zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; - avl_create(&zap->zap_m.zap_avl, mze_compare, - sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); - for (int i = 0; i < zap->zap_m.zap_num_chunks; i++) { + /* + * Reduce B-tree leaf from 4KB to 512 bytes to reduce memmove() + * overhead on massive inserts below. It still allows to store + * 62 entries before we have to add 2KB B-tree core node. + */ + zfs_btree_create_custom(&zap->zap_m.zap_tree, mze_compare, + sizeof (mzap_ent_t), 512); + + zap_name_t *zn = zap_name_alloc(zap); + for (uint16_t i = 0; i < zap->zap_m.zap_num_chunks; i++) { mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i]; if (mze->mze_name[0]) { - zap_name_t *zn; - zap->zap_m.zap_num_entries++; - zn = zap_name_alloc(zap, mze->mze_name, 0); + zap_name_init_str(zn, mze->mze_name, 0); mze_insert(zap, i, zn->zn_hash); - zap_name_free(zn); } } + zap_name_free(zn); } else { zap->zap_salt = zap_f_phys(zap)->zap_salt; zap->zap_normflags = zap_f_phys(zap)->zap_normflags; @@ -657,24 +666,25 @@ mzap_upgrade(zap_t **zapp, const void *tag, dmu_tx_t *tx, zap_flags_t flags) dprintf("upgrading obj=%llu with %u chunks\n", (u_longlong_t)zap->zap_object, nchunks); - /* XXX destroy the avl later, so we can use the stored hash value */ + /* XXX destroy the tree later, so we can use the stored hash value */ mze_destroy(zap); fzap_upgrade(zap, tx, flags); + zap_name_t *zn = zap_name_alloc(zap); for (int i = 0; i < nchunks; i++) { mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; if (mze->mze_name[0] == 0) continue; dprintf("adding %s=%llu\n", mze->mze_name, (u_longlong_t)mze->mze_value); - zap_name_t *zn = zap_name_alloc(zap, mze->mze_name, 0); + zap_name_init_str(zn, mze->mze_name, 0); /* If we fail here, we would end up losing entries */ VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tag, tx)); zap = zn->zn_zap; /* fzap_add_cd() may change zap */ - zap_name_free(zn); } + zap_name_free(zn); vmem_free(mzp, sz); *zapp = zap; return (0); @@ -916,22 +926,23 @@ zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) * See also the comment above zap_entry_normalization_conflict(). */ static boolean_t -mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze) +mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze, + zfs_btree_index_t *idx) { - int direction = AVL_BEFORE; boolean_t allocdzn = B_FALSE; + mzap_ent_t *other; + zfs_btree_index_t oidx; if (zap->zap_normflags == 0) return (B_FALSE); -again: - for (mzap_ent_t *other = avl_walk(&zap->zap_m.zap_avl, mze, direction); + for (other = zfs_btree_prev(&zap->zap_m.zap_tree, idx, &oidx); other && other->mze_hash == mze->mze_hash; - other = avl_walk(&zap->zap_m.zap_avl, other, direction)) { + other = zfs_btree_prev(&zap->zap_m.zap_tree, &oidx, &oidx)) { if (zn == NULL) { - zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name, - MT_NORMALIZE); + zn = zap_name_alloc_str(zap, + MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE); allocdzn = B_TRUE; } if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { @@ -941,9 +952,20 @@ again: } } - if (direction == AVL_BEFORE) { - direction = AVL_AFTER; - goto again; + for (other = zfs_btree_next(&zap->zap_m.zap_tree, idx, &oidx); + other && other->mze_hash == mze->mze_hash; + other = zfs_btree_next(&zap->zap_m.zap_tree, &oidx, &oidx)) { + + if (zn == NULL) { + zn = zap_name_alloc_str(zap, + MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE); + allocdzn = B_TRUE; + } + if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { + if (allocdzn) + zap_name_free(zn); + return (B_TRUE); + } } if (allocdzn) @@ -971,7 +993,7 @@ zap_lookup_impl(zap_t *zap, const char *name, { int err = 0; - zap_name_t *zn = zap_name_alloc(zap, name, mt); + zap_name_t *zn = zap_name_alloc_str(zap, name, mt); if (zn == NULL) return (SET_ERROR(ENOTSUP)); @@ -979,7 +1001,8 @@ zap_lookup_impl(zap_t *zap, const char *name, err = fzap_lookup(zn, integer_size, num_integers, buf, realname, rn_len, ncp); } else { - mzap_ent_t *mze = mze_find(zn); + zfs_btree_index_t idx; + mzap_ent_t *mze = mze_find(zn, &idx); if (mze == NULL) { err = SET_ERROR(ENOENT); } else { @@ -990,11 +1013,13 @@ zap_lookup_impl(zap_t *zap, const char *name, } else { *(uint64_t *)buf = MZE_PHYS(zap, mze)->mze_value; - (void) strlcpy(realname, - MZE_PHYS(zap, mze)->mze_name, rn_len); + if (realname != NULL) + (void) strlcpy(realname, + MZE_PHYS(zap, mze)->mze_name, + rn_len); if (ncp) { *ncp = mzap_normalization_conflict(zap, - zn, mze); + zn, mze, &idx); } } } @@ -1031,7 +1056,7 @@ zap_prefetch(objset_t *os, uint64_t zapobj, const char *name) err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); if (err) return (err); - zn = zap_name_alloc(zap, name, 0); + zn = zap_name_alloc_str(zap, name, 0); if (zn == NULL) { zap_unlockdir(zap, FTAG); return (SET_ERROR(ENOTSUP)); @@ -1134,7 +1159,7 @@ zap_length(objset_t *os, uint64_t zapobj, const char *name, zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); if (err != 0) return (err); - zap_name_t *zn = zap_name_alloc(zap, name, 0); + zap_name_t *zn = zap_name_alloc_str(zap, name, 0); if (zn == NULL) { zap_unlockdir(zap, FTAG); return (SET_ERROR(ENOTSUP)); @@ -1142,7 +1167,8 @@ zap_length(objset_t *os, uint64_t zapobj, const char *name, if (!zap->zap_ismicro) { err = fzap_length(zn, integer_size, num_integers); } else { - mzap_ent_t *mze = mze_find(zn); + zfs_btree_index_t idx; + mzap_ent_t *mze = mze_find(zn, &idx); if (mze == NULL) { err = SET_ERROR(ENOENT); } else { @@ -1182,7 +1208,7 @@ static void mzap_addent(zap_name_t *zn, uint64_t value) { zap_t *zap = zn->zn_zap; - int start = zap->zap_m.zap_alloc_next; + uint16_t start = zap->zap_m.zap_alloc_next; ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); @@ -1198,7 +1224,7 @@ mzap_addent(zap_name_t *zn, uint64_t value) ASSERT(cd < zap_maxcd(zap)); again: - for (int i = start; i < zap->zap_m.zap_num_chunks; i++) { + for (uint16_t i = start; i < zap->zap_m.zap_num_chunks; i++) { mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i]; if (mze->mze_name[0] == 0) { mze->mze_value = value; @@ -1229,7 +1255,7 @@ zap_add_impl(zap_t *zap, const char *key, const uint64_t *intval = val; int err = 0; - zap_name_t *zn = zap_name_alloc(zap, key, 0); + zap_name_t *zn = zap_name_alloc_str(zap, key, 0); if (zn == NULL) { zap_unlockdir(zap, tag); return (SET_ERROR(ENOTSUP)); @@ -1247,7 +1273,8 @@ zap_add_impl(zap_t *zap, const char *key, } zap = zn->zn_zap; /* fzap_add() may change zap */ } else { - if (mze_find(zn) != NULL) { + zfs_btree_index_t idx; + if (mze_find(zn, &idx) != NULL) { err = SET_ERROR(EEXIST); } else { mzap_addent(zn, *intval); @@ -1327,7 +1354,7 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name, zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); if (err != 0) return (err); - zap_name_t *zn = zap_name_alloc(zap, name, 0); + zap_name_t *zn = zap_name_alloc_str(zap, name, 0); if (zn == NULL) { zap_unlockdir(zap, FTAG); return (SET_ERROR(ENOTSUP)); @@ -1348,7 +1375,8 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name, } zap = zn->zn_zap; /* fzap_update() may change zap */ } else { - mzap_ent_t *mze = mze_find(zn); + zfs_btree_index_t idx; + mzap_ent_t *mze = mze_find(zn, &idx); if (mze != NULL) { MZE_PHYS(zap, mze)->mze_value = *intval; } else { @@ -1398,20 +1426,20 @@ zap_remove_impl(zap_t *zap, const char *name, { int err = 0; - zap_name_t *zn = zap_name_alloc(zap, name, mt); + zap_name_t *zn = zap_name_alloc_str(zap, name, mt); if (zn == NULL) return (SET_ERROR(ENOTSUP)); if (!zap->zap_ismicro) { err = fzap_remove(zn, tx); } else { - mzap_ent_t *mze = mze_find(zn); + zfs_btree_index_t idx; + mzap_ent_t *mze = mze_find(zn, &idx); if (mze == NULL) { err = SET_ERROR(ENOENT); } else { zap->zap_m.zap_num_entries--; - memset(&zap_m_phys(zap)->mz_chunk[mze->mze_chunkid], 0, - sizeof (mzap_ent_phys_t)); - mze_remove(zap, mze); + memset(MZE_PHYS(zap, mze), 0, sizeof (mzap_ent_phys_t)); + zfs_btree_remove_idx(&zap->zap_m.zap_tree, &idx); } } zap_name_free(zn); @@ -1582,29 +1610,30 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) if (!zc->zc_zap->zap_ismicro) { err = fzap_cursor_retrieve(zc->zc_zap, zc, za); } else { - avl_index_t idx; + zfs_btree_index_t idx; mzap_ent_t mze_tofind; - mze_tofind.mze_hash = zc->zc_hash; + mze_tofind.mze_hash = zc->zc_hash >> 32; mze_tofind.mze_cd = zc->zc_cd; - mzap_ent_t *mze = - avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); + mzap_ent_t *mze = zfs_btree_find(&zc->zc_zap->zap_m.zap_tree, + &mze_tofind, &idx); if (mze == NULL) { - mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl, - idx, AVL_AFTER); + mze = zfs_btree_next(&zc->zc_zap->zap_m.zap_tree, + &idx, &idx); } if (mze) { mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze); ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); za->za_normalization_conflict = - mzap_normalization_conflict(zc->zc_zap, NULL, mze); + mzap_normalization_conflict(zc->zc_zap, NULL, + mze, &idx); za->za_integer_length = 8; za->za_num_integers = 1; za->za_first_integer = mzep->mze_value; (void) strlcpy(za->za_name, mzep->mze_name, sizeof (za->za_name)); - zc->zc_hash = mze->mze_hash; + zc->zc_hash = (uint64_t)mze->mze_hash << 32; zc->zc_cd = mze->mze_cd; err = 0; } else { diff --git a/sys/contrib/openzfs/module/zfs/zcp.c b/sys/contrib/openzfs/module/zfs/zcp.c index fe90242ca40d..5ebf1bbbc8cc 100644 --- a/sys/contrib/openzfs/module/zfs/zcp.c +++ b/sys/contrib/openzfs/module/zfs/zcp.c @@ -109,8 +109,8 @@ #define ZCP_NVLIST_MAX_DEPTH 20 static const uint64_t zfs_lua_check_instrlimit_interval = 100; -unsigned long zfs_lua_max_instrlimit = ZCP_MAX_INSTRLIMIT; -unsigned long zfs_lua_max_memlimit = ZCP_MAX_MEMLIMIT; +uint64_t zfs_lua_max_instrlimit = ZCP_MAX_INSTRLIMIT; +uint64_t zfs_lua_max_memlimit = ZCP_MAX_MEMLIMIT; /* * Forward declarations for mutually recursive functions @@ -277,9 +277,9 @@ zcp_table_to_nvlist(lua_State *state, int index, int depth) } break; case LUA_TNUMBER: - VERIFY3U(sizeof (buf), >, - snprintf(buf, sizeof (buf), "%lld", - (longlong_t)lua_tonumber(state, -2))); + (void) snprintf(buf, sizeof (buf), "%lld", + (longlong_t)lua_tonumber(state, -2)); + key = buf; if (saw_str_could_collide) { key_could_collide = B_TRUE; @@ -1443,8 +1443,8 @@ zcp_parse_args(lua_State *state, const char *fname, const zcp_arg_t *pargs, } } -ZFS_MODULE_PARAM(zfs_lua, zfs_lua_, max_instrlimit, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_lua, zfs_lua_, max_instrlimit, U64, ZMOD_RW, "Max instruction limit that can be specified for a channel program"); -ZFS_MODULE_PARAM(zfs_lua, zfs_lua_, max_memlimit, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_lua, zfs_lua_, max_memlimit, U64, ZMOD_RW, "Max memory limit that can be specified for a channel program"); diff --git a/sys/contrib/openzfs/module/zfs/zcp_get.c b/sys/contrib/openzfs/module/zfs/zcp_get.c index cd17374eb422..f28266b8095f 100644 --- a/sys/contrib/openzfs/module/zfs/zcp_get.c +++ b/sys/contrib/openzfs/module/zfs/zcp_get.c @@ -467,7 +467,8 @@ get_zap_prop(lua_State *state, dsl_dataset_t *ds, zfs_prop_t zfs_prop) } else { error = dsl_prop_get_ds(ds, prop_name, sizeof (numval), 1, &numval, setpoint); - + if (error != 0) + goto out; #ifdef _KERNEL /* Fill in temporary value for prop, if applicable */ (void) zfs_get_temporary_prop(ds, zfs_prop, &numval, setpoint); @@ -489,6 +490,7 @@ get_zap_prop(lua_State *state, dsl_dataset_t *ds, zfs_prop_t zfs_prop) (void) lua_pushnumber(state, numval); } } +out: kmem_free(strval, ZAP_MAXVALUELEN); if (error == 0) get_prop_src(state, setpoint, zfs_prop); diff --git a/sys/contrib/openzfs/module/zfs/zfs_chksum.c b/sys/contrib/openzfs/module/zfs/zfs_chksum.c index 74b4cb8d2e63..4a9a36d87e66 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_chksum.c +++ b/sys/contrib/openzfs/module/zfs/zfs_chksum.c @@ -81,15 +81,15 @@ chksum_kstat_headers(char *buf, size_t size) { ssize_t off = 0; - off += snprintf(buf + off, size, "%-23s", "implementation"); - off += snprintf(buf + off, size - off, "%8s", "1k"); - off += snprintf(buf + off, size - off, "%8s", "4k"); - off += snprintf(buf + off, size - off, "%8s", "16k"); - off += snprintf(buf + off, size - off, "%8s", "64k"); - off += snprintf(buf + off, size - off, "%8s", "256k"); - off += snprintf(buf + off, size - off, "%8s", "1m"); - off += snprintf(buf + off, size - off, "%8s", "4m"); - (void) snprintf(buf + off, size - off, "%8s\n", "16m"); + off += kmem_scnprintf(buf + off, size, "%-23s", "implementation"); + off += kmem_scnprintf(buf + off, size - off, "%8s", "1k"); + off += kmem_scnprintf(buf + off, size - off, "%8s", "4k"); + off += kmem_scnprintf(buf + off, size - off, "%8s", "16k"); + off += kmem_scnprintf(buf + off, size - off, "%8s", "64k"); + off += kmem_scnprintf(buf + off, size - off, "%8s", "256k"); + off += kmem_scnprintf(buf + off, size - off, "%8s", "1m"); + off += kmem_scnprintf(buf + off, size - off, "%8s", "4m"); + (void) kmem_scnprintf(buf + off, size - off, "%8s\n", "16m"); return (0); } @@ -102,23 +102,23 @@ chksum_kstat_data(char *buf, size_t size, void *data) char b[24]; cs = (chksum_stat_t *)data; - snprintf(b, 23, "%s-%s", cs->name, cs->impl); - off += snprintf(buf + off, size - off, "%-23s", b); - off += snprintf(buf + off, size - off, "%8llu", + kmem_scnprintf(b, 23, "%s-%s", cs->name, cs->impl); + off += kmem_scnprintf(buf + off, size - off, "%-23s", b); + off += kmem_scnprintf(buf + off, size - off, "%8llu", (u_longlong_t)cs->bs1k); - off += snprintf(buf + off, size - off, "%8llu", + off += kmem_scnprintf(buf + off, size - off, "%8llu", (u_longlong_t)cs->bs4k); - off += snprintf(buf + off, size - off, "%8llu", + off += kmem_scnprintf(buf + off, size - off, "%8llu", (u_longlong_t)cs->bs16k); - off += snprintf(buf + off, size - off, "%8llu", + off += kmem_scnprintf(buf + off, size - off, "%8llu", (u_longlong_t)cs->bs64k); - off += snprintf(buf + off, size - off, "%8llu", + off += kmem_scnprintf(buf + off, size - off, "%8llu", (u_longlong_t)cs->bs256k); - off += snprintf(buf + off, size - off, "%8llu", + off += kmem_scnprintf(buf + off, size - off, "%8llu", (u_longlong_t)cs->bs1m); - off += snprintf(buf + off, size - off, "%8llu", + off += kmem_scnprintf(buf + off, size - off, "%8llu", (u_longlong_t)cs->bs4m); - (void) snprintf(buf + off, size - off, "%8llu\n", + (void) kmem_scnprintf(buf + off, size - off, "%8llu\n", (u_longlong_t)cs->bs16m); return (0); diff --git a/sys/contrib/openzfs/module/zfs/zfs_fm.c b/sys/contrib/openzfs/module/zfs/zfs_fm.c index 06aa1214ace8..fd0dc7d69bf8 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_fm.c +++ b/sys/contrib/openzfs/module/zfs/zfs_fm.c @@ -253,7 +253,6 @@ void zfs_ereport_clear(spa_t *spa, vdev_t *vd) { uint64_t vdev_guid, pool_guid; - int cnt = 0; ASSERT(vd != NULL || spa != NULL); if (vd == NULL) { @@ -277,7 +276,6 @@ zfs_ereport_clear(spa_t *spa, vdev_t *vd) avl_remove(&recent_events_tree, entry); list_remove(&recent_events_list, entry); kmem_free(entry, sizeof (*entry)); - cnt++; } } diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c index c3266c09306b..a5168b937588 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c +++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c @@ -229,14 +229,14 @@ static zfsdev_state_t *zfsdev_state_list; * for zc->zc_nvlist_src_size, since we will need to allocate that much memory. * Defaults to 0=auto which is handled by platform code. */ -unsigned long zfs_max_nvlist_src_size = 0; +uint64_t zfs_max_nvlist_src_size = 0; /* * When logging the output nvlist of an ioctl in the on-disk history, limit * the logged size to this many bytes. This must be less than DMU_MAX_ACCESS. * This applies primarily to zfs_ioc_channel_program(). */ -static unsigned long zfs_history_output_max = 1024 * 1024; +static uint64_t zfs_history_output_max = 1024 * 1024; uint_t zfs_fsyncer_key; uint_t zfs_allow_log_key; @@ -7884,8 +7884,8 @@ zfs_kmod_fini(void) tsd_destroy(&zfs_allow_log_key); } -ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, U64, ZMOD_RW, "Maximum size in bytes allowed for src nvlist passed with ZFS ioctls"); -ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, U64, ZMOD_RW, "Maximum size in bytes of ZFS ioctl output that will be logged"); diff --git a/sys/contrib/openzfs/module/zfs/zfs_log.c b/sys/contrib/openzfs/module/zfs/zfs_log.c index c92044337bce..77bf9140d52d 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_log.c +++ b/sys/contrib/openzfs/module/zfs/zfs_log.c @@ -494,6 +494,29 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, zil_itx_assign(zilog, itx, tx); } +static void +do_zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, + const char *sname, znode_t *tdzp, const char *dname, znode_t *szp) +{ + itx_t *itx; + lr_rename_t *lr; + size_t snamesize = strlen(sname) + 1; + size_t dnamesize = strlen(dname) + 1; + + if (zil_replaying(zilog, tx)) + return; + + itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize); + lr = (lr_rename_t *)&itx->itx_lr; + lr->lr_sdoid = sdzp->z_id; + lr->lr_tdoid = tdzp->z_id; + memcpy((char *)(lr + 1), sname, snamesize); + memcpy((char *)(lr + 1) + snamesize, dname, dnamesize); + itx->itx_oid = szp->z_id; + + zil_itx_assign(zilog, itx, tx); +} + /* * Handles TX_RENAME transactions. */ @@ -501,18 +524,71 @@ void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, znode_t *szp) { + txtype |= TX_RENAME; + do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp); +} + +/* + * Handles TX_RENAME_EXCHANGE transactions. + */ +void +zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, + znode_t *szp) +{ + txtype |= TX_RENAME_EXCHANGE; + do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp); +} + +/* + * Handles TX_RENAME_WHITEOUT transactions. + * + * Unfortunately we cannot reuse do_zfs_log_rename because we we need to call + * zfs_mknode() on replay which requires stashing bits as with TX_CREATE. + */ +void +zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, + znode_t *szp, znode_t *wzp) +{ itx_t *itx; - lr_rename_t *lr; + lr_rename_whiteout_t *lr; size_t snamesize = strlen(sname) + 1; size_t dnamesize = strlen(dname) + 1; if (zil_replaying(zilog, tx)) return; + txtype |= TX_RENAME_WHITEOUT; itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize); - lr = (lr_rename_t *)&itx->itx_lr; - lr->lr_sdoid = sdzp->z_id; - lr->lr_tdoid = tdzp->z_id; + lr = (lr_rename_whiteout_t *)&itx->itx_lr; + lr->lr_rename.lr_sdoid = sdzp->z_id; + lr->lr_rename.lr_tdoid = tdzp->z_id; + + /* + * RENAME_WHITEOUT will create an entry at the source znode, so we need + * to store the same data that the equivalent call to zfs_log_create() + * would. + */ + lr->lr_wfoid = wzp->z_id; + LR_FOID_SET_SLOTS(lr->lr_wfoid, wzp->z_dnodesize >> DNODE_SHIFT); + (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(wzp)), &lr->lr_wgen, + sizeof (uint64_t)); + (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(wzp)), + lr->lr_wcrtime, sizeof (uint64_t) * 2); + lr->lr_wmode = wzp->z_mode; + lr->lr_wuid = (uint64_t)KUID_TO_SUID(ZTOUID(wzp)); + lr->lr_wgid = (uint64_t)KGID_TO_SGID(ZTOGID(wzp)); + + /* + * This rdev will always be makdevice(0, 0) but because the ZIL log and + * replay code needs to be platform independent (and there is no + * platform independent makdev()) we need to copy the one created + * during the rename operation. + */ + (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(wzp)), &lr->lr_wrdev, + sizeof (lr->lr_wrdev)); + memcpy((char *)(lr + 1), sname, snamesize); memcpy((char *)(lr + 1) + snamesize, dname, dnamesize); itx->itx_oid = szp->z_id; @@ -525,7 +601,7 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, * called as soon as the write is on stable storage (be it via a DMU sync or a * ZIL commit). */ -static long zfs_immediate_write_sz = 32768; +static int64_t zfs_immediate_write_sz = 32768; void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, @@ -815,5 +891,5 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, zil_itx_assign(zilog, itx, tx); } -ZFS_MODULE_PARAM(zfs, zfs_, immediate_write_sz, LONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs, zfs_, immediate_write_sz, S64, ZMOD_RW, "Largest data block to write to zil"); diff --git a/sys/contrib/openzfs/module/zfs/zfs_onexit.c b/sys/contrib/openzfs/module/zfs/zfs_onexit.c index dfcdeeb5b46f..63acf7ab2e4d 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_onexit.c +++ b/sys/contrib/openzfs/module/zfs/zfs_onexit.c @@ -151,7 +151,7 @@ zfs_onexit_minor_to_state(minor_t minor, zfs_onexit_t **zo) */ int zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, - uint64_t *action_handle) + uintptr_t *action_handle) { zfs_onexit_t *zo; zfs_onexit_action_node_t *ap; @@ -170,7 +170,7 @@ zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, list_insert_tail(&zo->zo_actions, ap); mutex_exit(&zo->zo_lock); if (action_handle) - *action_handle = (uint64_t)(uintptr_t)ap; + *action_handle = (uintptr_t)ap; return (0); } diff --git a/sys/contrib/openzfs/module/zfs/zfs_replay.c b/sys/contrib/openzfs/module/zfs/zfs_replay.c index 379e1d1a7b57..0293e46d5858 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_replay.c +++ b/sys/contrib/openzfs/module/zfs/zfs_replay.c @@ -386,8 +386,13 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap) lr->lr_uid, lr->lr_gid); } +#if defined(__linux__) error = zfs_create(dzp, name, &xva.xva_vattr, - 0, 0, &zp, kcred, vflg, &vsec); + 0, 0, &zp, kcred, vflg, &vsec, kcred->user_ns); +#else + error = zfs_create(dzp, name, &xva.xva_vattr, + 0, 0, &zp, kcred, vflg, &vsec, NULL); +#endif break; case TX_MKDIR_ACL: aclstart = (caddr_t)(lracl + 1); @@ -416,8 +421,13 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap) (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, lr->lr_uid, lr->lr_gid); } +#if defined(__linux__) + error = zfs_mkdir(dzp, name, &xva.xva_vattr, + &zp, kcred, vflg, &vsec, kcred->user_ns); +#else error = zfs_mkdir(dzp, name, &xva.xva_vattr, - &zp, kcred, vflg, &vsec); + &zp, kcred, vflg, &vsec, NULL); +#endif break; default: error = SET_ERROR(ENOTSUP); @@ -527,8 +537,13 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) if (name == NULL) name = (char *)start; +#if defined(__linux__) + error = zfs_create(dzp, name, &xva.xva_vattr, + 0, 0, &zp, kcred, vflg, NULL, kcred->user_ns); +#else error = zfs_create(dzp, name, &xva.xva_vattr, - 0, 0, &zp, kcred, vflg, NULL); + 0, 0, &zp, kcred, vflg, NULL, NULL); +#endif break; case TX_MKDIR_ATTR: lrattr = (lr_attr_t *)(caddr_t)(lr + 1); @@ -545,8 +560,14 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) if (name == NULL) name = (char *)(lr + 1); +#if defined(__linux__) + error = zfs_mkdir(dzp, name, &xva.xva_vattr, + &zp, kcred, vflg, NULL, kcred->user_ns); +#else error = zfs_mkdir(dzp, name, &xva.xva_vattr, - &zp, kcred, vflg, NULL); + &zp, kcred, vflg, NULL, NULL); +#endif + break; case TX_MKXATTR: error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &zp, kcred); @@ -554,8 +575,13 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) case TX_SYMLINK: name = (char *)(lr + 1); link = name + strlen(name) + 1; +#if defined(__linux__) + error = zfs_symlink(dzp, name, &xva.xva_vattr, + link, &zp, kcred, vflg, kcred->user_ns); +#else error = zfs_symlink(dzp, name, &xva.xva_vattr, - link, &zp, kcred, vflg); + link, &zp, kcred, vflg, NULL); +#endif break; default: error = SET_ERROR(ENOTSUP); @@ -643,18 +669,21 @@ zfs_replay_link(void *arg1, void *arg2, boolean_t byteswap) } static int -zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap) +do_zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, char *sname, + char *tname, uint64_t rflags, vattr_t *wo_vap) { - zfsvfs_t *zfsvfs = arg1; - lr_rename_t *lr = arg2; - char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ - char *tname = sname + strlen(sname) + 1; znode_t *sdzp, *tdzp; - int error; - int vflg = 0; + int error, vflg = 0; - if (byteswap) - byteswap_uint64_array(lr, sizeof (*lr)); + /* Only Linux currently supports RENAME_* flags. */ +#ifdef __linux__ + VERIFY0(rflags & ~(RENAME_EXCHANGE | RENAME_WHITEOUT)); + + /* wo_vap must be non-NULL iff. we're doing RENAME_WHITEOUT */ + VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL); +#else + VERIFY0(rflags); +#endif if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0) return (error); @@ -667,7 +696,13 @@ zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap) if (lr->lr_common.lrc_txtype & TX_CI) vflg |= FIGNORECASE; - error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg); +#if defined(__linux__) + error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, rflags, + wo_vap, kcred->user_ns); +#else + error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, rflags, + wo_vap, NULL); +#endif zrele(tdzp); zrele(sdzp); @@ -675,6 +710,86 @@ zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap) } static int +zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap) +{ + zfsvfs_t *zfsvfs = arg1; + lr_rename_t *lr = arg2; + char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ + char *tname = sname + strlen(sname) + 1; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, 0, NULL)); +} + +static int +zfs_replay_rename_exchange(void *arg1, void *arg2, boolean_t byteswap) +{ +#ifdef __linux__ + zfsvfs_t *zfsvfs = arg1; + lr_rename_t *lr = arg2; + char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ + char *tname = sname + strlen(sname) + 1; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, RENAME_EXCHANGE, + NULL)); +#else + return (SET_ERROR(ENOTSUP)); +#endif +} + +static int +zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap) +{ +#ifdef __linux__ + zfsvfs_t *zfsvfs = arg1; + lr_rename_whiteout_t *lr = arg2; + int error; + /* sname and tname follow lr_rename_whiteout_t */ + char *sname = (char *)(lr + 1); + char *tname = sname + strlen(sname) + 1; + /* For the whiteout file. */ + xvattr_t xva; + uint64_t objid; + uint64_t dnodesize; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + objid = LR_FOID_GET_OBJ(lr->lr_wfoid); + dnodesize = LR_FOID_GET_SLOTS(lr->lr_wfoid) << DNODE_SHIFT; + + xva_init(&xva); + zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID, + lr->lr_wmode, lr->lr_wuid, lr->lr_wgid, lr->lr_wrdev, objid); + + /* + * As with TX_CREATE, RENAME_WHITEOUT ends up in zfs_mknode(), which + * assigns the object's creation time, generation number, and dnode + * slot count. The generic zfs_rename() has no concept of these + * attributes, so we smuggle the values inside the vattr's otherwise + * unused va_ctime, va_nblocks, and va_fsid fields. + */ + ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_wcrtime); + xva.xva_vattr.va_nblocks = lr->lr_wgen; + xva.xva_vattr.va_fsid = dnodesize; + + error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); + if (error) + return (error); + + return (do_zfs_replay_rename(zfsvfs, &lr->lr_rename, sname, tname, + RENAME_WHITEOUT, &xva.xva_vattr)); +#else + return (SET_ERROR(ENOTSUP)); +#endif +} + +static int zfs_replay_write(void *arg1, void *arg2, boolean_t byteswap) { zfsvfs_t *zfsvfs = arg1; @@ -860,7 +975,11 @@ zfs_replay_setattr(void *arg1, void *arg2, boolean_t byteswap) zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start, lr->lr_uid, lr->lr_gid); - error = zfs_setattr(zp, vap, 0, kcred); +#if defined(__linux__) + error = zfs_setattr(zp, vap, 0, kcred, kcred->user_ns); +#else + error = zfs_setattr(zp, vap, 0, kcred, NULL); +#endif zfs_fuid_info_free(zfsvfs->z_fuid_replay); zfsvfs->z_fuid_replay = NULL; @@ -1069,4 +1188,6 @@ zil_replay_func_t *const zfs_replay_vector[TX_MAX_TYPE] = { zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */ zfs_replay_write2, /* TX_WRITE2 */ zfs_replay_setsaxattr, /* TX_SETSAXATTR */ + zfs_replay_rename_exchange, /* TX_RENAME_EXCHANGE */ + zfs_replay_rename_whiteout, /* TX_RENAME_WHITEOUT */ }; diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c index 57f03f116273..45ecb0773260 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c +++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c @@ -64,7 +64,7 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr) int error = 0; zfsvfs_t *zfsvfs = ZTOZSB(zp); - (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); + (void) tsd_set(zfs_fsyncer_key, (void *)(uintptr_t)zfs_fsync_sync_cnt); if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) @@ -168,15 +168,25 @@ zfs_access(znode_t *zp, int mode, int flag, cred_t *cr) return (error); if (flag & V_ACE_MASK) - error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); +#if defined(__linux__) + error = zfs_zaccess(zp, mode, flag, B_FALSE, cr, + kcred->user_ns); +#else + error = zfs_zaccess(zp, mode, flag, B_FALSE, cr, + NULL); +#endif else - error = zfs_zaccess_rwx(zp, mode, flag, cr); +#if defined(__linux__) + error = zfs_zaccess_rwx(zp, mode, flag, cr, kcred->user_ns); +#else + error = zfs_zaccess_rwx(zp, mode, flag, cr, NULL); +#endif zfs_exit(zfsvfs, FTAG); return (error); } -static unsigned long zfs_vnops_read_chunk_size = 1024 * 1024; /* Tunable */ +static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024; /* Tunable */ /* * Read bytes from specified file into supplied buffer. @@ -991,5 +1001,5 @@ EXPORT_SYMBOL(zfs_write); EXPORT_SYMBOL(zfs_getsecattr); EXPORT_SYMBOL(zfs_setsecattr); -ZFS_MODULE_PARAM(zfs_vnops, zfs_vnops_, read_chunk_size, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_vnops, zfs_vnops_, read_chunk_size, U64, ZMOD_RW, "Bytes to read per chunk"); diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c index dc5b8018e16e..02e6f4b83b9c 100644 --- a/sys/contrib/openzfs/module/zfs/zil.c +++ b/sys/contrib/openzfs/module/zfs/zil.c @@ -132,7 +132,7 @@ static int zil_nocacheflush = 0; * Any writes above that will be executed with lower (asynchronous) priority * to limit potential SLOG device abuse by single active ZIL writer. */ -static unsigned long zil_slog_bulk = 768 * 1024; +static uint64_t zil_slog_bulk = 768 * 1024; static kmem_cache_t *zil_lwb_cache; static kmem_cache_t *zil_zcw_cache; @@ -237,7 +237,7 @@ static int zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp, blkptr_t *nbp, void *dst, char **end) { - enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; + zio_flag_t zio_flags = ZIO_FLAG_CANFAIL; arc_flags_t aflags = ARC_FLAG_WAIT; arc_buf_t *abuf = NULL; zbookmark_phys_t zb; @@ -315,7 +315,7 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp, static int zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf) { - enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; + zio_flag_t zio_flags = ZIO_FLAG_CANFAIL; const blkptr_t *bp = &lr->lr_blkptr; arc_flags_t aflags = ARC_FLAG_WAIT; arc_buf_t *abuf = NULL; @@ -339,6 +339,7 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf) if (wbuf == NULL) zio_flags |= ZIO_FLAG_RAW; + ASSERT3U(BP_GET_LSIZE(bp), !=, 0); SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid, ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); @@ -479,8 +480,18 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, error = zil_read_log_block(zilog, decrypt, &blk, &next_blk, lrbuf, &end); - if (error != 0) + if (error != 0) { + if (claimed) { + char name[ZFS_MAX_DATASET_NAME_LEN]; + + dmu_objset_name(zilog->zl_os, name); + + cmn_err(CE_WARN, "ZFS read log block error %d, " + "dataset %s, seq 0x%llx\n", error, name, + (u_longlong_t)blk_seq); + } break; + } for (lrp = lrbuf; lrp < end; lrp += reclen) { lr_t *lr = (lr_t *)lrp; @@ -504,10 +515,6 @@ done: zilog->zl_parse_blk_count = blk_count; zilog->zl_parse_lr_count = lr_count; - ASSERT(!claimed || !(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID) || - (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq) || - (decrypt && error == EIO)); - zil_bp_tree_fini(zilog); zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE); @@ -758,11 +765,9 @@ zil_commit_activate_saxattr_feature(zilog_t *zilog) uint64_t txg = 0; dmu_tx_t *tx = NULL; - if (spa_feature_is_enabled(zilog->zl_spa, - SPA_FEATURE_ZILSAXATTR) && + if (spa_feature_is_enabled(zilog->zl_spa, SPA_FEATURE_ZILSAXATTR) && dmu_objset_type(zilog->zl_os) != DMU_OST_ZVOL && - !dsl_dataset_feature_is_active(ds, - SPA_FEATURE_ZILSAXATTR)) { + !dsl_dataset_feature_is_active(ds, SPA_FEATURE_ZILSAXATTR)) { tx = dmu_tx_create(zilog->zl_os); VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); dsl_dataset_dirty(ds, tx); @@ -882,8 +887,9 @@ zil_create(zilog_t *zilog) * txg_wait_synced() here either when keep_first is set, because both * zil_create() and zil_destroy() will wait for any in-progress destroys * to complete. + * Return B_TRUE if there were any entries to replay. */ -void +boolean_t zil_destroy(zilog_t *zilog, boolean_t keep_first) { const zil_header_t *zh = zilog->zl_header; @@ -899,7 +905,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first) zilog->zl_old_header = *zh; /* debugging aid */ if (BP_IS_HOLE(&zh->zh_log)) - return; + return (B_FALSE); tx = dmu_tx_create(zilog->zl_os); VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); @@ -932,6 +938,8 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first) mutex_exit(&zilog->zl_lock); dmu_tx_commit(tx); + + return (B_TRUE); } void @@ -3844,8 +3852,9 @@ zil_incr_blks(zilog_t *zilog, const blkptr_t *bp, void *arg, uint64_t claim_txg) /* * If this dataset has a non-empty intent log, replay it and destroy it. + * Return B_TRUE if there were any entries to replay. */ -void +boolean_t zil_replay(objset_t *os, void *arg, zil_replay_func_t *const replay_func[TX_MAX_TYPE]) { @@ -3854,8 +3863,7 @@ zil_replay(objset_t *os, void *arg, zil_replay_arg_t zr; if ((zh->zh_flags & ZIL_REPLAY_NEEDED) == 0) { - zil_destroy(zilog, B_TRUE); - return; + return (zil_destroy(zilog, B_TRUE)); } zr.zr_replay = replay_func; @@ -3878,6 +3886,8 @@ zil_replay(objset_t *os, void *arg, zil_destroy(zilog, B_FALSE); txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); zilog->zl_replay = B_FALSE; + + return (B_TRUE); } boolean_t @@ -3945,7 +3955,7 @@ ZFS_MODULE_PARAM(zfs_zil, zil_, replay_disable, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_zil, zil_, nocacheflush, INT, ZMOD_RW, "Disable ZIL cache flushes"); -ZFS_MODULE_PARAM(zfs_zil, zil_, slog_bulk, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM(zfs_zil, zil_, slog_bulk, U64, ZMOD_RW, "Limit in bytes slog sync writes per commit"); ZFS_MODULE_PARAM(zfs_zil, zil_, maxblocksize, UINT, ZMOD_RW, diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c index c2e3c6169fa3..928e28813931 100644 --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -512,8 +512,9 @@ zio_decrypt(zio_t *zio, abd_t *data, uint64_t size) /* * If this is an authenticated block, just check the MAC. It would be - * nice to separate this out into its own flag, but for the moment - * enum zio_flag is out of bits. + * nice to separate this out into its own flag, but when this was done, + * we had run out of bits in what is now zio_flag_t. Future cleanup + * could make this a flag bit. */ if (BP_IS_AUTHENTICATED(bp)) { if (ot == DMU_OT_OBJSET) { @@ -802,7 +803,7 @@ static zio_t * zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, abd_t *data, uint64_t lsize, uint64_t psize, zio_done_func_t *done, void *private, zio_type_t type, zio_priority_t priority, - enum zio_flag flags, vdev_t *vd, uint64_t offset, + zio_flag_t flags, vdev_t *vd, uint64_t offset, const zbookmark_phys_t *zb, enum zio_stage stage, enum zio_stage pipeline) { @@ -901,7 +902,7 @@ zio_destroy(zio_t *zio) zio_t * zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done, - void *private, enum zio_flag flags) + void *private, zio_flag_t flags) { zio_t *zio; @@ -913,7 +914,7 @@ zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done, } zio_t * -zio_root(spa_t *spa, zio_done_func_t *done, void *private, enum zio_flag flags) +zio_root(spa_t *spa, zio_done_func_t *done, void *private, zio_flag_t flags) { return (zio_null(NULL, spa, NULL, done, private, flags)); } @@ -1099,7 +1100,7 @@ zfs_dva_valid(spa_t *spa, const dva_t *dva, const blkptr_t *bp) zio_t * zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, abd_t *data, uint64_t size, zio_done_func_t *done, void *private, - zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb) + zio_priority_t priority, zio_flag_t flags, const zbookmark_phys_t *zb) { zio_t *zio; @@ -1117,7 +1118,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, abd_t *data, uint64_t lsize, uint64_t psize, const zio_prop_t *zp, zio_done_func_t *ready, zio_done_func_t *children_ready, zio_done_func_t *physdone, zio_done_func_t *done, - void *private, zio_priority_t priority, enum zio_flag flags, + void *private, zio_priority_t priority, zio_flag_t flags, const zbookmark_phys_t *zb) { zio_t *zio; @@ -1160,7 +1161,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, zio_t * zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, abd_t *data, uint64_t size, zio_done_func_t *done, void *private, - zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb) + zio_priority_t priority, zio_flag_t flags, zbookmark_phys_t *zb) { zio_t *zio; @@ -1203,7 +1204,6 @@ zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp) */ if (BP_IS_EMBEDDED(bp)) return; - metaslab_check_free(spa, bp); /* * Frees that are for the currently-syncing txg, are not going to be @@ -1220,6 +1220,7 @@ zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp) txg != spa->spa_syncing_txg || (spa_sync_pass(spa) >= zfs_sync_pass_deferred_free && !spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))) { + metaslab_check_free(spa, bp); bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp); } else { VERIFY3P(zio_free_sync(NULL, spa, txg, bp, 0), ==, NULL); @@ -1233,7 +1234,7 @@ zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp) */ zio_t * zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, - enum zio_flag flags) + zio_flag_t flags) { ASSERT(!BP_IS_HOLE(bp)); ASSERT(spa_syncing_txg(spa) == txg); @@ -1266,7 +1267,7 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, zio_t * zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, - zio_done_func_t *done, void *private, enum zio_flag flags) + zio_done_func_t *done, void *private, zio_flag_t flags) { zio_t *zio; @@ -1303,7 +1304,7 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, zio_t * zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, - zio_done_func_t *done, void *private, enum zio_flag flags) + zio_done_func_t *done, void *private, zio_flag_t flags) { zio_t *zio; int c; @@ -1328,7 +1329,7 @@ zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, zio_t * zio_trim(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, zio_done_func_t *done, void *private, zio_priority_t priority, - enum zio_flag flags, enum trim_flag trim_flags) + zio_flag_t flags, enum trim_flag trim_flags) { zio_t *zio; @@ -1348,7 +1349,7 @@ zio_trim(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, zio_t * zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, abd_t *data, int checksum, zio_done_func_t *done, void *private, - zio_priority_t priority, enum zio_flag flags, boolean_t labels) + zio_priority_t priority, zio_flag_t flags, boolean_t labels) { zio_t *zio; @@ -1369,7 +1370,7 @@ zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, zio_t * zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, abd_t *data, int checksum, zio_done_func_t *done, void *private, - zio_priority_t priority, enum zio_flag flags, boolean_t labels) + zio_priority_t priority, zio_flag_t flags, boolean_t labels) { zio_t *zio; @@ -1406,7 +1407,7 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, zio_t * zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset, abd_t *data, uint64_t size, int type, zio_priority_t priority, - enum zio_flag flags, zio_done_func_t *done, void *private) + zio_flag_t flags, zio_done_func_t *done, void *private) { enum zio_stage pipeline = ZIO_VDEV_CHILD_PIPELINE; zio_t *zio; @@ -1480,7 +1481,7 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset, zio_t * zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, abd_t *data, uint64_t size, - zio_type_t type, zio_priority_t priority, enum zio_flag flags, + zio_type_t type, zio_priority_t priority, zio_flag_t flags, zio_done_func_t *done, void *private) { zio_t *zio; @@ -2030,7 +2031,7 @@ zio_deadman_impl(zio_t *pio, int ziodepth) "delta=%llu queued=%llu io=%llu " "path=%s " "last=%llu type=%d " - "priority=%d flags=0x%x stage=0x%x " + "priority=%d flags=0x%llx stage=0x%x " "pipeline=0x%x pipeline-trace=0x%x " "objset=%llu object=%llu " "level=%llu blkid=%llu " @@ -2040,8 +2041,8 @@ zio_deadman_impl(zio_t *pio, int ziodepth) (u_longlong_t)delta, pio->io_delta, pio->io_delay, vd ? vd->vdev_path : "NULL", vq ? vq->vq_io_complete_ts : 0, pio->io_type, - pio->io_priority, pio->io_flags, pio->io_stage, - pio->io_pipeline, pio->io_pipeline_trace, + pio->io_priority, (u_longlong_t)pio->io_flags, + pio->io_stage, pio->io_pipeline, pio->io_pipeline_trace, (u_longlong_t)zb->zb_objset, (u_longlong_t)zb->zb_object, (u_longlong_t)zb->zb_level, (u_longlong_t)zb->zb_blkid, (u_longlong_t)pio->io_offset, (u_longlong_t)pio->io_size, @@ -3360,7 +3361,7 @@ zio_ddt_write(zio_t *zio) return (zio); } -ddt_entry_t *freedde; /* for debugging */ +static ddt_entry_t *freedde; /* for debugging */ static zio_t * zio_ddt_free(zio_t *zio) diff --git a/sys/contrib/openzfs/module/zfs/zio_compress.c b/sys/contrib/openzfs/module/zfs/zio_compress.c index 4c9cbc962093..0fb91ac81522 100644 --- a/sys/contrib/openzfs/module/zfs/zio_compress.c +++ b/sys/contrib/openzfs/module/zfs/zio_compress.c @@ -44,7 +44,7 @@ * If nonzero, every 1/X decompression attempts will fail, simulating * an undetected memory error. */ -unsigned long zio_decompress_fail_fraction = 0; +static unsigned long zio_decompress_fail_fraction = 0; /* * Compression vectors. diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c index 2e2860ff0212..20578a8223b2 100644 --- a/sys/contrib/openzfs/module/zfs/zvol.c +++ b/sys/contrib/openzfs/module/zfs/zvol.c @@ -514,6 +514,8 @@ zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE] = { zvol_replay_err, /* TX_MKDIR_ACL_ATTR */ zvol_replay_err, /* TX_WRITE2 */ zvol_replay_err, /* TX_SETSAXATTR */ + zvol_replay_err, /* TX_RENAME_EXCHANGE */ + zvol_replay_err, /* TX_RENAME_WHITEOUT */ }; /* @@ -1026,8 +1028,7 @@ zvol_add_clones(const char *dsname, list_t *minors_list) out: if (dd != NULL) dsl_dir_rele(dd, FTAG); - if (dp != NULL) - dsl_pool_rele(dp, FTAG); + dsl_pool_rele(dp, FTAG); } /* |