aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module/zfs
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2022-11-16 20:25:24 +0000
committerMartin Matuska <mm@FreeBSD.org>2022-11-16 20:27:42 +0000
commitdbd5678dca91abcefe8d046aa2f9b66497a95ffb (patch)
tree23c7cf5ccced42596b6f6da3c8450e86124f5248 /sys/contrib/openzfs/module/zfs
parent61b146ba43cd3886c81e79b37fdc665d6e1d74b8 (diff)
parent2163cde450d0898b5f7bac16afb4e238485411ff (diff)
downloadsrc-dbd5678dca91abcefe8d046aa2f9b66497a95ffb.tar.gz
src-dbd5678dca91abcefe8d046aa2f9b66497a95ffb.zip
zfs: merge openzfs/zfs@2163cde45
Notable upstream pull request merges: #13680 Add options to zfs redundant_metadata property #13758 Allow mounting snapshots in .zfs/snapshot as a regular user #13838 quota: disable quota check for ZVOL #13839 quota: extend quota for dataset #13973 Fix memory leaks in dmu_send()/dmu_send_obj() #13977 Avoid unnecessary metaslab_check_free calling #13978 PAM: Fix unchecked return value from zfs_key_config_load() #13979 Handle possible null pointers from malloc/strdup/strndup() #13997 zstream: allow decompress to fix metadata for uncompressed records #13998 zvol_wait logic may terminate prematurely #14001 FreeBSD: Fix a pair of bugs in zfs_fhtovp() #14003 Stop ganging due to past vdev write errors #14039 Optimize microzaps #14050 Fix draid2+2s metadata error on simultaneous 2 drive failures #14062 zed: Avoid core dump if wholedisk property does not exist #14077 Propagate extent_bytes change to autotrim thread #14079 FreeBSD: vn_flush_cached_data: observe vnode locking contract #14093 Fix ARC target collapse when zfs_arc_meta_limit_percent=100 #14106 Add ability to recompress send streams with new compression algorithm #14119 Deny receiving into encrypted datasets if the keys are not loaded #14120 Fix arc_p aggressive increase #14129 zed: Prevent special vdev to be replaced by hot spare #14133 Expose zfs_vdev_open_timeout_ms as a tunable #14135 FreeBSD: Fix out of bounds read in zfs_ioctl_ozfs_to_legacy() #14152 Adds the `-p` option to `zfs holds` #14161 Handle and detect #13709's unlock regression Obtained from: OpenZFS OpenZFS commit: 2163cde450d0898b5f7bac16afb4e238485411ff
Diffstat (limited to 'sys/contrib/openzfs/module/zfs')
-rw-r--r--sys/contrib/openzfs/module/zfs/abd.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/arc.c88
-rw-r--r--sys/contrib/openzfs/module/zfs/btree.c50
-rw-r--r--sys/contrib/openzfs/module/zfs/dataset_kstats.c7
-rw-r--r--sys/contrib/openzfs/module/zfs/dbuf.c11
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu.c27
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_objset.c7
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_recv.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_send.c15
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_traverse.c3
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_zfetch.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_bookmark.c1
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_crypt.c18
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_dataset.c3
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_deadlist.c5
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_dir.c59
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_pool.c17
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_prop.c93
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_scan.c17
-rw-r--r--sys/contrib/openzfs/module/zfs/fm.c1
-rw-r--r--sys/contrib/openzfs/module/zfs/metaslab.c20
-rw-r--r--sys/contrib/openzfs/module/zfs/mmp.c18
-rw-r--r--sys/contrib/openzfs/module/zfs/range_tree.c1
-rw-r--r--sys/contrib/openzfs/module/zfs/spa.c74
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_checkpoint.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_log_spacemap.c40
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_misc.c14
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev.c57
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_indirect.c9
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_initialize.c18
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_queue.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_raidz_math.c23
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_rebuild.c22
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_trim.c5
-rw-r--r--sys/contrib/openzfs/module/zfs/zap_leaf.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/zap_micro.c247
-rw-r--r--sys/contrib/openzfs/module/zfs/zcp.c14
-rw-r--r--sys/contrib/openzfs/module/zfs/zcp_get.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_chksum.c38
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_fm.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_ioctl.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_log.c88
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_onexit.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_replay.c153
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_vnops.c20
-rw-r--r--sys/contrib/openzfs/module/zfs/zil.c46
-rw-r--r--sys/contrib/openzfs/module/zfs/zio.c43
-rw-r--r--sys/contrib/openzfs/module/zfs/zio_compress.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/zvol.c5
49 files changed, 959 insertions, 466 deletions
diff --git a/sys/contrib/openzfs/module/zfs/abd.c b/sys/contrib/openzfs/module/zfs/abd.c
index 11a1e5112544..d4921d0ba7db 100644
--- a/sys/contrib/openzfs/module/zfs/abd.c
+++ b/sys/contrib/openzfs/module/zfs/abd.c
@@ -667,15 +667,15 @@ abd_return_buf(abd_t *abd, void *buf, size_t n)
{
abd_verify(abd);
ASSERT3U(abd->abd_size, >=, n);
+#ifdef ZFS_DEBUG
+ (void) zfs_refcount_remove_many(&abd->abd_children, n, buf);
+#endif
if (abd_is_linear(abd)) {
ASSERT3P(buf, ==, abd_to_buf(abd));
} else {
ASSERT0(abd_cmp_buf(abd, buf, n));
zio_buf_free(buf, n);
}
-#ifdef ZFS_DEBUG
- (void) zfs_refcount_remove_many(&abd->abd_children, n, buf);
-#endif
}
void
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index 33865f715b0f..f51f427c1bfd 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -419,12 +419,12 @@ boolean_t arc_warm;
/*
* These tunables are for performance analysis.
*/
-unsigned long zfs_arc_max = 0;
-unsigned long zfs_arc_min = 0;
-unsigned long zfs_arc_meta_limit = 0;
-unsigned long zfs_arc_meta_min = 0;
-static unsigned long zfs_arc_dnode_limit = 0;
-static unsigned long zfs_arc_dnode_reduce_percent = 10;
+uint64_t zfs_arc_max = 0;
+uint64_t zfs_arc_min = 0;
+uint64_t zfs_arc_meta_limit = 0;
+uint64_t zfs_arc_meta_min = 0;
+static uint64_t zfs_arc_dnode_limit = 0;
+static uint_t zfs_arc_dnode_reduce_percent = 10;
static uint_t zfs_arc_grow_retry = 0;
static uint_t zfs_arc_shrink_shift = 0;
static uint_t zfs_arc_p_min_shift = 0;
@@ -449,17 +449,17 @@ int zfs_compressed_arc_enabled = B_TRUE;
* ARC will evict meta buffers that exceed arc_meta_limit. This
* tunable make arc_meta_limit adjustable for different workloads.
*/
-static unsigned long zfs_arc_meta_limit_percent = 75;
+static uint64_t zfs_arc_meta_limit_percent = 75;
/*
* Percentage that can be consumed by dnodes of ARC meta buffers.
*/
-static unsigned long zfs_arc_dnode_limit_percent = 10;
+static uint_t zfs_arc_dnode_limit_percent = 10;
/*
* These tunables are Linux-specific
*/
-static unsigned long zfs_arc_sys_free = 0;
+static uint64_t zfs_arc_sys_free = 0;
static uint_t zfs_arc_min_prefetch_ms = 0;
static uint_t zfs_arc_min_prescient_prefetch_ms = 0;
static int zfs_arc_p_dampener_disable = 1;
@@ -781,12 +781,12 @@ uint64_t zfs_crc64_table[256];
#define L2ARC_FEED_TYPES 4
/* L2ARC Performance Tunables */
-unsigned long l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */
-unsigned long l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */
-unsigned long l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */
-unsigned long l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
-unsigned long l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */
-unsigned long l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */
+uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */
+uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */
+uint64_t l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */
+uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
+uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */
+uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */
int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */
int l2arc_feed_again = B_TRUE; /* turbo warmup */
int l2arc_norw = B_FALSE; /* no reads during writes */
@@ -909,7 +909,7 @@ static int l2arc_mfuonly = 0;
* will vary depending of how well the specific device handles
* these commands.
*/
-static unsigned long l2arc_trim_ahead = 0;
+static uint64_t l2arc_trim_ahead = 0;
/*
* Performance tuning of L2ARC persistence:
@@ -925,7 +925,7 @@ static unsigned long l2arc_trim_ahead = 0;
* not to waste space.
*/
static int l2arc_rebuild_enabled = B_TRUE;
-static unsigned long l2arc_rebuild_blocks_min_l2size = 1024 * 1024 * 1024;
+static uint64_t l2arc_rebuild_blocks_min_l2size = 1024 * 1024 * 1024;
/* L2ARC persistence rebuild control routines. */
void l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen);
@@ -3939,7 +3939,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, uint64_t *real_evicted)
* dropping from L1+L2 cached to L2-only,
* realloc to remove the L1 header.
*/
- hdr = arc_hdr_realloc(hdr, hdr_full_cache,
+ (void) arc_hdr_realloc(hdr, hdr_full_cache,
hdr_l2only_cache);
*real_evicted += HDR_FULL_SIZE - HDR_L2ONLY_SIZE;
} else {
@@ -4469,7 +4469,7 @@ restart:
* meta buffers. Requests to the upper layers will be made with
* increasingly large scan sizes until the ARC is below the limit.
*/
- if (meta_used > arc_meta_limit) {
+ if (meta_used > arc_meta_limit || arc_available_memory() < 0) {
if (type == ARC_BUFC_DATA) {
type = ARC_BUFC_METADATA;
} else {
@@ -5136,7 +5136,7 @@ arc_adapt(int bytes, arc_state_t *state)
if (!zfs_arc_p_dampener_disable)
mult = MIN(mult, 10); /* avoid wild arc_p adjustment */
- arc_p = MIN(arc_c - arc_p_min, arc_p + bytes * mult);
+ arc_p = MIN(arc_c - arc_p_min, arc_p + (uint64_t)bytes * mult);
} else if (state == arc_mfu_ghost) {
uint64_t delta;
@@ -5173,7 +5173,7 @@ arc_adapt(int bytes, arc_state_t *state)
atomic_add_64(&arc_c, (int64_t)bytes);
if (arc_c > arc_c_max)
arc_c = arc_c_max;
- else if (state == arc_anon)
+ else if (state == arc_anon && arc_p < arc_c >> 1)
atomic_add_64(&arc_p, (int64_t)bytes);
if (arc_p > arc_c)
arc_p = arc_c;
@@ -5386,7 +5386,8 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, const void *tag,
if (aggsum_upper_bound(&arc_sums.arcstat_size) < arc_c &&
hdr->b_l1hdr.b_state == arc_anon &&
(zfs_refcount_count(&arc_anon->arcs_size) +
- zfs_refcount_count(&arc_mru->arcs_size) > arc_p))
+ zfs_refcount_count(&arc_mru->arcs_size) > arc_p &&
+ arc_p < arc_c >> 1))
arc_p = MIN(arc_c, arc_p + size);
}
}
@@ -8539,6 +8540,7 @@ l2arc_dev_get_next(void)
else if (next == first)
break;
+ ASSERT3P(next, !=, NULL);
} while (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild ||
next->l2ad_trim_all);
@@ -11076,20 +11078,20 @@ EXPORT_SYMBOL(arc_add_prune_callback);
EXPORT_SYMBOL(arc_remove_prune_callback);
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min, param_set_arc_min,
- param_get_ulong, ZMOD_RW, "Minimum ARC size in bytes");
+ spl_param_get_u64, ZMOD_RW, "Minimum ARC size in bytes");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, max, param_set_arc_max,
- param_get_ulong, ZMOD_RW, "Maximum ARC size in bytes");
+ spl_param_get_u64, ZMOD_RW, "Maximum ARC size in bytes");
-ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit, param_set_arc_long,
- param_get_ulong, ZMOD_RW, "Metadata limit for ARC size in bytes");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit, param_set_arc_u64,
+ spl_param_get_u64, ZMOD_RW, "Metadata limit for ARC size in bytes");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit_percent,
- param_set_arc_long, param_get_ulong, ZMOD_RW,
+ param_set_arc_int, param_get_uint, ZMOD_RW,
"Percent of ARC size for ARC meta limit");
-ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_min, param_set_arc_long,
- param_get_ulong, ZMOD_RW, "Minimum ARC metadata size in bytes");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_min, param_set_arc_u64,
+ spl_param_get_u64, ZMOD_RW, "Minimum ARC metadata size in bytes");
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_prune, INT, ZMOD_RW,
"Meta objects to scan for prune");
@@ -11128,25 +11130,25 @@ ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min_prescient_prefetch_ms,
param_set_arc_int, param_get_uint, ZMOD_RW,
"Min life of prescient prefetched block in ms");
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_max, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_max, U64, ZMOD_RW,
"Max write bytes per interval");
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_boost, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_boost, U64, ZMOD_RW,
"Extra write bytes during device warmup");
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom, U64, ZMOD_RW,
"Number of max device writes to precache");
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom_boost, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom_boost, U64, ZMOD_RW,
"Compressed l2arc_headroom multiplier");
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, trim_ahead, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, trim_ahead, U64, ZMOD_RW,
"TRIM ahead L2ARC write size multiplier");
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_secs, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_secs, U64, ZMOD_RW,
"Seconds between L2ARC writing");
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_min_ms, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_min_ms, U64, ZMOD_RW,
"Min feed interval in milliseconds");
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, noprefetch, INT, ZMOD_RW,
@@ -11164,7 +11166,7 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, meta_percent, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_enabled, INT, ZMOD_RW,
"Rebuild the L2ARC when importing a pool");
-ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, U64, ZMOD_RW,
"Min size in bytes to write rebuild log blocks in L2ARC");
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW,
@@ -11176,17 +11178,17 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, exclude_special, INT, ZMOD_RW,
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int,
param_get_uint, ZMOD_RW, "System free memory I/O throttle in bytes");
-ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, sys_free, param_set_arc_long,
- param_get_ulong, ZMOD_RW, "System free memory target size in bytes");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, sys_free, param_set_arc_u64,
+ spl_param_get_u64, ZMOD_RW, "System free memory target size in bytes");
-ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit, param_set_arc_long,
- param_get_ulong, ZMOD_RW, "Minimum bytes of dnodes in ARC");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit, param_set_arc_u64,
+ spl_param_get_u64, ZMOD_RW, "Minimum bytes of dnodes in ARC");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit_percent,
- param_set_arc_long, param_get_ulong, ZMOD_RW,
+ param_set_arc_int, param_get_uint, ZMOD_RW,
"Percent of ARC meta buffers for dnodes");
-ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, dnode_reduce_percent, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, dnode_reduce_percent, UINT, ZMOD_RW,
"Percentage of excess dnodes to try to unpin");
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, eviction_pct, UINT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/btree.c b/sys/contrib/openzfs/module/zfs/btree.c
index f0a9222a4308..4c25afaa8199 100644
--- a/sys/contrib/openzfs/module/zfs/btree.c
+++ b/sys/contrib/openzfs/module/zfs/btree.c
@@ -102,7 +102,7 @@ zfs_btree_poison_node(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
(void) memset(leaf->btl_elems, 0x0f, hdr->bth_first * size);
(void) memset(leaf->btl_elems +
(hdr->bth_first + hdr->bth_count) * size, 0x0f,
- BTREE_LEAF_ESIZE -
+ tree->bt_leaf_size - offsetof(zfs_btree_leaf_t, btl_elems) -
(hdr->bth_first + hdr->bth_count) * size);
}
#endif
@@ -173,16 +173,44 @@ zfs_btree_fini(void)
kmem_cache_destroy(zfs_btree_leaf_cache);
}
+static void *
+zfs_btree_leaf_alloc(zfs_btree_t *tree)
+{
+ if (tree->bt_leaf_size == BTREE_LEAF_SIZE)
+ return (kmem_cache_alloc(zfs_btree_leaf_cache, KM_SLEEP));
+ else
+ return (kmem_alloc(tree->bt_leaf_size, KM_SLEEP));
+}
+
+static void
+zfs_btree_leaf_free(zfs_btree_t *tree, void *ptr)
+{
+ if (tree->bt_leaf_size == BTREE_LEAF_SIZE)
+ return (kmem_cache_free(zfs_btree_leaf_cache, ptr));
+ else
+ return (kmem_free(ptr, tree->bt_leaf_size));
+}
+
void
zfs_btree_create(zfs_btree_t *tree, int (*compar) (const void *, const void *),
size_t size)
{
- ASSERT3U(size, <=, BTREE_LEAF_ESIZE / 2);
+ zfs_btree_create_custom(tree, compar, size, BTREE_LEAF_SIZE);
+}
+
+void
+zfs_btree_create_custom(zfs_btree_t *tree,
+ int (*compar) (const void *, const void *),
+ size_t size, size_t lsize)
+{
+ size_t esize = lsize - offsetof(zfs_btree_leaf_t, btl_elems);
+ ASSERT3U(size, <=, esize / 2);
memset(tree, 0, sizeof (*tree));
tree->bt_compar = compar;
tree->bt_elem_size = size;
- tree->bt_leaf_cap = P2ALIGN(BTREE_LEAF_ESIZE / size, 2);
+ tree->bt_leaf_size = lsize;
+ tree->bt_leaf_cap = P2ALIGN(esize / size, 2);
tree->bt_height = -1;
tree->bt_bulk = NULL;
}
@@ -290,7 +318,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
zfs_btree_core_t *node = NULL;
uint32_t child = 0;
- uint64_t depth = 0;
+ uint32_t depth = 0;
/*
* Iterate down the tree, finding which child the value should be in
@@ -811,8 +839,7 @@ zfs_btree_insert_into_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *leaf,
move_count++;
}
tree->bt_num_nodes++;
- zfs_btree_leaf_t *new_leaf = kmem_cache_alloc(zfs_btree_leaf_cache,
- KM_SLEEP);
+ zfs_btree_leaf_t *new_leaf = zfs_btree_leaf_alloc(tree);
zfs_btree_hdr_t *new_hdr = &new_leaf->btl_hdr;
new_hdr->bth_parent = leaf->btl_hdr.bth_parent;
new_hdr->bth_first = (tree->bt_bulk ? 0 : capacity / 4) +
@@ -1078,8 +1105,7 @@ zfs_btree_add_idx(zfs_btree_t *tree, const void *value,
ASSERT0(where->bti_offset);
tree->bt_num_nodes++;
- zfs_btree_leaf_t *leaf = kmem_cache_alloc(zfs_btree_leaf_cache,
- KM_SLEEP);
+ zfs_btree_leaf_t *leaf = zfs_btree_leaf_alloc(tree);
tree->bt_root = &leaf->btl_hdr;
tree->bt_height++;
@@ -1378,7 +1404,7 @@ zfs_btree_node_destroy(zfs_btree_t *tree, zfs_btree_hdr_t *node)
{
tree->bt_num_nodes--;
if (!zfs_btree_is_core(node)) {
- kmem_cache_free(zfs_btree_leaf_cache, node);
+ zfs_btree_leaf_free(tree, node);
} else {
kmem_free(node, sizeof (zfs_btree_core_t) +
BTREE_CORE_ELEMS * tree->bt_elem_size);
@@ -1991,7 +2017,7 @@ zfs_btree_verify_counts(zfs_btree_t *tree)
*/
static uint64_t
zfs_btree_verify_height_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr,
- int64_t height)
+ int32_t height)
{
if (!zfs_btree_is_core(hdr)) {
VERIFY0(height);
@@ -2117,8 +2143,10 @@ zfs_btree_verify_poison_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr;
for (size_t i = 0; i < hdr->bth_first * size; i++)
VERIFY3U(leaf->btl_elems[i], ==, 0x0f);
+ size_t esize = tree->bt_leaf_size -
+ offsetof(zfs_btree_leaf_t, btl_elems);
for (size_t i = (hdr->bth_first + hdr->bth_count) * size;
- i < BTREE_LEAF_ESIZE; i++)
+ i < esize; i++)
VERIFY3U(leaf->btl_elems[i], ==, 0x0f);
} else {
zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
diff --git a/sys/contrib/openzfs/module/zfs/dataset_kstats.c b/sys/contrib/openzfs/module/zfs/dataset_kstats.c
index b63f42a21e44..57b8faf213eb 100644
--- a/sys/contrib/openzfs/module/zfs/dataset_kstats.c
+++ b/sys/contrib/openzfs/module/zfs/dataset_kstats.c
@@ -128,8 +128,13 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset)
" snprintf() for kstat name returned %d",
(unsigned long long)dmu_objset_id(objset), n);
return (SET_ERROR(EINVAL));
+ } else if (n >= KSTAT_STRLEN) {
+ zfs_dbgmsg("failed to create dataset kstat for objset %lld: "
+ "kstat name length (%d) exceeds limit (%d)",
+ (unsigned long long)dmu_objset_id(objset),
+ n, KSTAT_STRLEN);
+ return (SET_ERROR(ENAMETOOLONG));
}
- ASSERT3U(n, <, KSTAT_STRLEN);
kstat_t *kstat = kstat_create(kstat_module_name, 0, kstat_name,
"dataset", KSTAT_TYPE_NAMED,
diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
index db1123d37d98..7982d9702896 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@@ -227,8 +227,8 @@ typedef struct dbuf_cache {
dbuf_cache_t dbuf_caches[DB_CACHE_MAX];
/* Size limits for the caches */
-static unsigned long dbuf_cache_max_bytes = ULONG_MAX;
-static unsigned long dbuf_metadata_cache_max_bytes = ULONG_MAX;
+static uint64_t dbuf_cache_max_bytes = UINT64_MAX;
+static uint64_t dbuf_metadata_cache_max_bytes = UINT64_MAX;
/* Set the default sizes of the caches to log2 fraction of arc size */
static uint_t dbuf_cache_shift = 5;
@@ -1549,7 +1549,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
uint32_t aflags = ARC_FLAG_NOWAIT;
int err, zio_flags;
- err = zio_flags = 0;
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
@@ -2687,6 +2686,7 @@ dbuf_override_impl(dmu_buf_impl_t *db, const blkptr_t *bp, dmu_tx_t *tx)
dbuf_dirty_record_t *dr;
dr = list_head(&db->db_dirty_records);
+ ASSERT3P(dr, !=, NULL);
ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
dl = &dr->dt.dl;
dl->dr_overridden_by = *bp;
@@ -2748,6 +2748,7 @@ dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
dmu_buf_will_not_fill(dbuf, tx);
dr = list_head(&db->db_dirty_records);
+ ASSERT3P(dr, !=, NULL);
ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
dl = &dr->dt.dl;
encode_embedded_bp_compressed(&dl->dr_overridden_by,
@@ -5120,7 +5121,7 @@ EXPORT_SYMBOL(dmu_buf_set_user_ie);
EXPORT_SYMBOL(dmu_buf_get_user);
EXPORT_SYMBOL(dmu_buf_get_blkptr);
-ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, max_bytes, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, max_bytes, U64, ZMOD_RW,
"Maximum size in bytes of the dbuf cache.");
ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, hiwater_pct, UINT, ZMOD_RW,
@@ -5129,7 +5130,7 @@ ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, hiwater_pct, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, lowater_pct, UINT, ZMOD_RW,
"Percentage below dbuf_cache_max_bytes when dbuf eviction stops.");
-ZFS_MODULE_PARAM(zfs_dbuf, dbuf_, metadata_cache_max_bytes, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_dbuf, dbuf_, metadata_cache_max_bytes, U64, ZMOD_RW,
"Maximum size in bytes of dbuf metadata cache.");
ZFS_MODULE_PARAM(zfs_dbuf, dbuf_, cache_shift, UINT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c
index 9e67eb51f415..45304e7ddf7a 100644
--- a/sys/contrib/openzfs/module/zfs/dmu.c
+++ b/sys/contrib/openzfs/module/zfs/dmu.c
@@ -28,6 +28,7 @@
* Copyright (c) 2019 Datto Inc.
* Copyright (c) 2019, Klara Inc.
* Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/
#include <sys/dmu.h>
@@ -70,7 +71,7 @@ static int zfs_nopwrite_enabled = 1;
* will wait until the next TXG.
* A value of zero will disable this throttle.
*/
-static unsigned long zfs_per_txg_dirty_frees_percent = 30;
+static uint_t zfs_per_txg_dirty_frees_percent = 30;
/*
* Enable/disable forcing txg sync when dirty checking for holes with lseek().
@@ -1435,7 +1436,7 @@ dmu_return_arcbuf(arc_buf_t *buf)
*/
int
dmu_lightweight_write_by_dnode(dnode_t *dn, uint64_t offset, abd_t *abd,
- const zio_prop_t *zp, enum zio_flag flags, dmu_tx_t *tx)
+ const zio_prop_t *zp, zio_flag_t flags, dmu_tx_t *tx)
{
dbuf_dirty_record_t *dr =
dbuf_dirty_lightweight(dn, dbuf_whichblock(dn, 0, offset), tx);
@@ -1992,12 +1993,22 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
ZCHECKSUM_FLAG_EMBEDDED))
checksum = ZIO_CHECKSUM_FLETCHER_4;
- if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL ||
- (os->os_redundant_metadata ==
- ZFS_REDUNDANT_METADATA_MOST &&
- (level >= zfs_redundant_metadata_most_ditto_level ||
- DMU_OT_IS_METADATA(type) || (wp & WP_SPILL))))
+ switch (os->os_redundant_metadata) {
+ case ZFS_REDUNDANT_METADATA_ALL:
copies++;
+ break;
+ case ZFS_REDUNDANT_METADATA_MOST:
+ if (level >= zfs_redundant_metadata_most_ditto_level ||
+ DMU_OT_IS_METADATA(type) || (wp & WP_SPILL))
+ copies++;
+ break;
+ case ZFS_REDUNDANT_METADATA_SOME:
+ if (DMU_OT_IS_CRITICAL(type))
+ copies++;
+ break;
+ case ZFS_REDUNDANT_METADATA_NONE:
+ break;
+ }
} else if (wp & WP_NOFILL) {
ASSERT(level == 0);
@@ -2355,7 +2366,7 @@ EXPORT_SYMBOL(dmu_ot);
ZFS_MODULE_PARAM(zfs, zfs_, nopwrite_enabled, INT, ZMOD_RW,
"Enable NOP writes");
-ZFS_MODULE_PARAM(zfs, zfs_, per_txg_dirty_frees_percent, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, per_txg_dirty_frees_percent, UINT, ZMOD_RW,
"Percentage of dirtied blocks from frees in one TXG");
ZFS_MODULE_PARAM(zfs, zfs_, dmu_offset_next_sync, INT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c
index 4c20afcdb9c6..c17c829a04d8 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_objset.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c
@@ -32,6 +32,7 @@
* Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
* Copyright (c) 2019, Klara Inc.
* Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -287,7 +288,9 @@ redundant_metadata_changed_cb(void *arg, uint64_t newval)
* Inheritance and range checking should have been done by now.
*/
ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL ||
- newval == ZFS_REDUNDANT_METADATA_MOST);
+ newval == ZFS_REDUNDANT_METADATA_MOST ||
+ newval == ZFS_REDUNDANT_METADATA_SOME ||
+ newval == ZFS_REDUNDANT_METADATA_NONE);
os->os_redundant_metadata = newval;
}
@@ -479,7 +482,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
arc_flags_t aflags = ARC_FLAG_WAIT;
zbookmark_phys_t zb;
int size;
- enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
+ zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
diff --git a/sys/contrib/openzfs/module/zfs/dmu_recv.c b/sys/contrib/openzfs/module/zfs/dmu_recv.c
index a9e4a6745905..339fb149a49f 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_recv.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_recv.c
@@ -646,7 +646,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
* so add the DS_HOLD_FLAG_DECRYPT flag only if we are dealing
* with a dataset we may encrypt.
*/
- if (drba->drba_dcp != NULL &&
+ if (drba->drba_dcp == NULL ||
drba->drba_dcp->cp_crypt != ZIO_CRYPT_OFF) {
dsflags |= DS_HOLD_FLAG_DECRYPT;
}
@@ -1344,7 +1344,7 @@ do_corrective_recv(struct receive_writer_arg *rwa, struct drr_write *drrw,
dnode_t *dn;
abd_t *abd = rrd->abd;
zio_cksum_t bp_cksum = bp->blk_cksum;
- enum zio_flag flags = ZIO_FLAG_SPECULATIVE |
+ zio_flag_t flags = ZIO_FLAG_SPECULATIVE |
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL;
if (rwa->raw)
@@ -2186,7 +2186,7 @@ flush_write_batch_impl(struct receive_writer_arg *rwa)
zio_prop_t zp;
dmu_write_policy(rwa->os, dn, 0, 0, &zp);
- enum zio_flag zio_flags = 0;
+ zio_flag_t zio_flags = 0;
if (rwa->raw) {
zp.zp_encrypt = B_TRUE;
diff --git a/sys/contrib/openzfs/module/zfs/dmu_send.c b/sys/contrib/openzfs/module/zfs/dmu_send.c
index 4ee3ffc352b8..ccb7eb20756d 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_send.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_send.c
@@ -934,7 +934,7 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range)
ASSERT3U(range->start_blkid + 1, ==, range->end_blkid);
if (BP_GET_TYPE(bp) == DMU_OT_SA) {
arc_flags_t aflags = ARC_FLAG_WAIT;
- enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
+ zio_flag_t zioflags = ZIO_FLAG_CANFAIL;
if (dscp->dsc_featureflags & DMU_BACKUP_FEATURE_RAW) {
ASSERT(BP_IS_PROTECTED(bp));
@@ -1654,7 +1654,7 @@ issue_data_read(struct send_reader_thread_arg *srta, struct send_range *range)
!split_large_blocks && !BP_SHOULD_BYTESWAP(bp) &&
!BP_IS_EMBEDDED(bp) && !DMU_OT_IS_METADATA(BP_GET_TYPE(bp));
- enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
+ zio_flag_t zioflags = ZIO_FLAG_CANFAIL;
if (srta->featureflags & DMU_BACKUP_FEATURE_RAW) {
zioflags |= ZIO_FLAG_RAW;
@@ -2511,8 +2511,7 @@ dmu_send_impl(struct dmu_send_params *dspp)
}
if (featureflags & DMU_BACKUP_FEATURE_RAW) {
- uint64_t ivset_guid = (ancestor_zb != NULL) ?
- ancestor_zb->zbm_ivset_guid : 0;
+ uint64_t ivset_guid = ancestor_zb->zbm_ivset_guid;
nvlist_t *keynvl = NULL;
ASSERT(os->os_encrypted);
@@ -2716,6 +2715,10 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
dspp.numfromredactsnaps = NUM_SNAPS_NOT_REDACTED;
err = dmu_send_impl(&dspp);
}
+ if (dspp.fromredactsnaps)
+ kmem_free(dspp.fromredactsnaps,
+ dspp.numfromredactsnaps * sizeof (uint64_t));
+
dsl_dataset_rele(dspp.to_ds, FTAG);
return (err);
}
@@ -2924,6 +2927,10 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
/* dmu_send_impl will call dsl_pool_rele for us. */
err = dmu_send_impl(&dspp);
} else {
+ if (dspp.fromredactsnaps)
+ kmem_free(dspp.fromredactsnaps,
+ dspp.numfromredactsnaps *
+ sizeof (uint64_t));
dsl_pool_rele(dspp.dp, FTAG);
}
} else {
diff --git a/sys/contrib/openzfs/module/zfs/dmu_traverse.c b/sys/contrib/openzfs/module/zfs/dmu_traverse.c
index 2ed75640f68d..377634c72bba 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_traverse.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_traverse.c
@@ -111,6 +111,7 @@ traverse_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
if (claim_txg == 0 || bp->blk_birth < claim_txg)
return (0);
+ ASSERT3U(BP_GET_LSIZE(bp), !=, 0);
SET_BOOKMARK(&zb, td->td_objset, lr->lr_foid,
ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));
@@ -670,7 +671,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
/* See comment on ZIL traversal in dsl_scan_visitds. */
if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) {
- enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
+ zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
uint32_t flags = ARC_FLAG_WAIT;
objset_phys_t *osp;
arc_buf_t *buf;
diff --git a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
index 101d2ee7b7a2..1d63d7de65a1 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
@@ -58,7 +58,7 @@ unsigned int zfetch_max_distance = 64 * 1024 * 1024;
/* max bytes to prefetch indirects for per stream (default 64MB) */
unsigned int zfetch_max_idistance = 64 * 1024 * 1024;
/* max number of bytes in an array_read in which we allow prefetching (1MB) */
-unsigned long zfetch_array_rd_sz = 1024 * 1024;
+uint64_t zfetch_array_rd_sz = 1024 * 1024;
typedef struct zfetch_stats {
kstat_named_t zfetchstat_hits;
@@ -565,5 +565,5 @@ ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_distance, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_idistance, UINT, ZMOD_RW,
"Max bytes to prefetch indirects for per stream");
-ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, array_rd_sz, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, array_rd_sz, U64, ZMOD_RW,
"Number of bytes in a array_read");
diff --git a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
index 8ca7ba8957aa..b95c94beff1f 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
@@ -229,7 +229,6 @@ dsl_bookmark_create_check_impl(dsl_pool_t *dp,
switch (error) {
case ESRCH:
/* happy path: new bmark doesn't exist, proceed after switch */
- error = 0;
break;
case 0:
error = SET_ERROR(EEXIST);
diff --git a/sys/contrib/openzfs/module/zfs/dsl_crypt.c b/sys/contrib/openzfs/module/zfs/dsl_crypt.c
index ce2e6ce742a2..382de208b01d 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_crypt.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_crypt.c
@@ -2671,6 +2671,7 @@ spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
objset_phys_t *osp = buf;
uint8_t portable_mac[ZIO_OBJSET_MAC_LEN];
uint8_t local_mac[ZIO_OBJSET_MAC_LEN];
+ const uint8_t zeroed_mac[ZIO_OBJSET_MAC_LEN] = {0};
/* look up the key from the spa's keystore */
ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck);
@@ -2696,8 +2697,21 @@ spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
if (memcmp(portable_mac, osp->os_portable_mac,
ZIO_OBJSET_MAC_LEN) != 0 ||
memcmp(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN) != 0) {
- abd_return_buf(abd, buf, datalen);
- return (SET_ERROR(ECKSUM));
+ /*
+ * If the MAC is zeroed out, we failed to decrypt it.
+ * This should only arise, at least on Linux,
+ * if we hit edge case handling for useraccounting, since we
+ * shouldn't get here without bailing out on error earlier
+ * otherwise.
+ *
+ * So if we're in that case, we can just fall through and
+ * special-casing noticing that it's zero will handle it
+ * elsewhere, since we can just regenerate it.
+ */
+ if (memcmp(local_mac, zeroed_mac, ZIO_OBJSET_MAC_LEN) != 0) {
+ abd_return_buf(abd, buf, datalen);
+ return (SET_ERROR(ECKSUM));
+ }
}
abd_return_buf(abd, buf, datalen);
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
index 7a066b786cd0..c7577fc584af 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
@@ -3421,7 +3421,8 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
conflicting_snaps = B_TRUE;
} else if (err == ESRCH) {
err = 0;
- } else if (err != 0) {
+ }
+ if (err != 0) {
goto out;
}
}
diff --git a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c
index 1ecae0fe3865..2b33446e66af 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c
@@ -92,7 +92,7 @@
* will be loaded into memory and shouldn't take up an inordinate amount of
* space. We settled on ~500000 entries, corresponding to roughly 128M.
*/
-unsigned long zfs_livelist_max_entries = 500000;
+uint64_t zfs_livelist_max_entries = 500000;
/*
* We can approximate how much of a performance gain a livelist will give us
@@ -542,6 +542,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
dle = avl_find(&dl->dl_tree, &dle_tofind, NULL);
ASSERT3P(dle, !=, NULL);
dle_prev = AVL_PREV(&dl->dl_tree, dle);
+ ASSERT3P(dle_prev, !=, NULL);
dle_enqueue_subobj(dl, dle_prev, dle->dle_bpobj.bpo_object, tx);
@@ -1039,7 +1040,7 @@ dsl_process_sub_livelist(bpobj_t *bpobj, bplist_t *to_free, zthr_t *t,
return (err);
}
-ZFS_MODULE_PARAM(zfs_livelist, zfs_livelist_, max_entries, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_livelist, zfs_livelist_, max_entries, U64, ZMOD_RW,
"Size to start the next sub-livelist in a livelist");
ZFS_MODULE_PARAM(zfs_livelist, zfs_livelist_, min_percent_shared, INT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dir.c b/sys/contrib/openzfs/module/zfs/dsl_dir.c
index d93c7f08c1c2..c1afaa6aaf82 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_dir.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dir.c
@@ -54,6 +54,15 @@
#include "zfs_prop.h"
/*
+ * This controls if we verify the ZVOL quota or not.
+ * Currently, quotas are not implemented for ZVOLs.
+ * The quota size is the size of the ZVOL.
+ * The size of the volume already implies the ZVOL size quota.
+ * The quota mechanism can introduce a significant performance drop.
+ */
+static int zvol_enforce_quotas = B_TRUE;
+
+/*
* Filesystem and Snapshot Limits
* ------------------------------
*
@@ -815,6 +824,18 @@ dsl_fs_ss_limit_check(dsl_dir_t *dd, uint64_t delta, zfs_prop_t prop,
ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT ||
prop == ZFS_PROP_SNAPSHOT_LIMIT);
+ if (prop == ZFS_PROP_SNAPSHOT_LIMIT) {
+ /*
+ * We don't enforce the limit for temporary snapshots. This is
+ * indicated by a NULL cred_t argument.
+ */
+ if (cr == NULL)
+ return (0);
+
+ count_prop = DD_FIELD_SNAPSHOT_COUNT;
+ } else {
+ count_prop = DD_FIELD_FILESYSTEM_COUNT;
+ }
/*
* If we're allowed to change the limit, don't enforce the limit
* e.g. this can happen if a snapshot is taken by an administrative
@@ -834,19 +855,6 @@ dsl_fs_ss_limit_check(dsl_dir_t *dd, uint64_t delta, zfs_prop_t prop,
if (delta == 0)
return (0);
- if (prop == ZFS_PROP_SNAPSHOT_LIMIT) {
- /*
- * We don't enforce the limit for temporary snapshots. This is
- * indicated by a NULL cred_t argument.
- */
- if (cr == NULL)
- return (0);
-
- count_prop = DD_FIELD_SNAPSHOT_COUNT;
- } else {
- count_prop = DD_FIELD_FILESYSTEM_COUNT;
- }
-
/*
* If an ancestor has been provided, stop checking the limit once we
* hit that dir. We need this during rename so that we don't overcount
@@ -1268,6 +1276,7 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
uint64_t quota;
struct tempreserve *tr;
int retval;
+ uint64_t ext_quota;
uint64_t ref_rsrv;
top_of_function:
@@ -1311,7 +1320,9 @@ top_of_function:
* If this transaction will result in a net free of space,
* we want to let it through.
*/
- if (ignorequota || netfree || dsl_dir_phys(dd)->dd_quota == 0)
+ if (ignorequota || netfree || dsl_dir_phys(dd)->dd_quota == 0 ||
+ (tx->tx_objset && dmu_objset_type(tx->tx_objset) == DMU_OST_ZVOL &&
+ zvol_enforce_quotas == B_FALSE))
quota = UINT64_MAX;
else
quota = dsl_dir_phys(dd)->dd_quota;
@@ -1343,7 +1354,16 @@ top_of_function:
* on-disk is over quota and there are no pending changes
* or deferred frees (which may free up space for us).
*/
- if (used_on_disk + est_inflight >= quota) {
+ ext_quota = quota >> 5;
+ if (quota == UINT64_MAX)
+ ext_quota = 0;
+
+ if (used_on_disk >= quota) {
+ /* Quota exceeded */
+ mutex_exit(&dd->dd_lock);
+ DMU_TX_STAT_BUMP(dmu_tx_quota);
+ return (retval);
+ } else if (used_on_disk + est_inflight >= quota + ext_quota) {
if (est_inflight > 0 || used_on_disk < quota) {
retval = SET_ERROR(ERESTART);
} else {
@@ -1390,10 +1410,9 @@ top_of_function:
ignorequota = (dsl_dir_phys(dd)->dd_head_dataset_obj == 0);
first = B_FALSE;
goto top_of_function;
-
- } else {
- return (0);
}
+
+ return (0);
}
/*
@@ -2474,3 +2493,7 @@ dsl_dir_cancel_waiters(dsl_dir_t *dd)
EXPORT_SYMBOL(dsl_dir_set_quota);
EXPORT_SYMBOL(dsl_dir_set_reservation);
#endif
+
+/* CSTYLED */
+ZFS_MODULE_PARAM(zfs, , zvol_enforce_quotas, INT, ZMOD_RW,
+ "Enable strict ZVOL quota enforcment");
diff --git a/sys/contrib/openzfs/module/zfs/dsl_pool.c b/sys/contrib/openzfs/module/zfs/dsl_pool.c
index 4fd3722a051e..5ca918a87ee1 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_pool.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_pool.c
@@ -99,8 +99,8 @@
* capped at zfs_dirty_data_max_max. It can also be overridden with a module
* parameter.
*/
-unsigned long zfs_dirty_data_max = 0;
-unsigned long zfs_dirty_data_max_max = 0;
+uint64_t zfs_dirty_data_max = 0;
+uint64_t zfs_dirty_data_max_max = 0;
uint_t zfs_dirty_data_max_percent = 10;
uint_t zfs_dirty_data_max_max_percent = 25;
@@ -109,7 +109,7 @@ uint_t zfs_dirty_data_max_max_percent = 25;
* when approaching the limit until log data is cleared out after txg sync.
* It only counts TX_WRITE log with WR_COPIED or WR_NEED_COPY.
*/
-unsigned long zfs_wrlog_data_max = 0;
+uint64_t zfs_wrlog_data_max = 0;
/*
* If there's at least this much dirty data (as a percentage of
@@ -138,7 +138,7 @@ uint_t zfs_delay_min_dirty_percent = 60;
* Note: zfs_delay_scale * zfs_dirty_data_max must be < 2^64, due to the
* multiply in dmu_tx_delay().
*/
-unsigned long zfs_delay_scale = 1000 * 1000 * 1000 / 2000;
+uint64_t zfs_delay_scale = 1000 * 1000 * 1000 / 2000;
/*
* This determines the number of threads used by the dp_sync_taskq.
@@ -331,7 +331,6 @@ dsl_pool_open(dsl_pool_t *dp)
/*
* We might not have created the remap bpobj yet.
*/
- err = 0;
} else {
goto out;
}
@@ -1465,20 +1464,20 @@ ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max_percent, UINT, ZMOD_RD,
ZFS_MODULE_PARAM(zfs, zfs_, delay_min_dirty_percent, UINT, ZMOD_RW,
"Transaction delay threshold");
-ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max, U64, ZMOD_RW,
"Determines the dirty space limit");
-ZFS_MODULE_PARAM(zfs, zfs_, wrlog_data_max, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, wrlog_data_max, U64, ZMOD_RW,
"The size limit of write-transaction zil log data");
/* zfs_dirty_data_max_max only applied at module load in arc_init(). */
-ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max, ULONG, ZMOD_RD,
+ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max, U64, ZMOD_RD,
"zfs_dirty_data_max upper bound in bytes");
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_sync_percent, UINT, ZMOD_RW,
"Dirty data txg sync threshold as a percentage of zfs_dirty_data_max");
-ZFS_MODULE_PARAM(zfs, zfs_, delay_scale, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, delay_scale, U64, ZMOD_RW,
"How quickly delay approaches infinity");
ZFS_MODULE_PARAM(zfs, zfs_, sync_taskq_batch_pct, INT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/dsl_prop.c b/sys/contrib/openzfs/module/zfs/dsl_prop.c
index 610e887b3fba..d1c0059092b1 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_prop.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_prop.c
@@ -23,6 +23,7 @@
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 Martin Matuska. All rights reserved.
* Copyright 2019 Joyent, Inc.
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/
#include <sys/zfs_context.h>
@@ -41,6 +42,7 @@
#define ZPROP_INHERIT_SUFFIX "$inherit"
#define ZPROP_RECVD_SUFFIX "$recvd"
+#define ZPROP_IUV_SUFFIX "$iuv"
static int
dodefault(zfs_prop_t prop, int intsz, int numints, void *buf)
@@ -69,6 +71,17 @@ dodefault(zfs_prop_t prop, int intsz, int numints, void *buf)
return (0);
}
+static int
+dsl_prop_known_index(zfs_prop_t prop, uint64_t value)
+{
+ const char *str = NULL;
+ if (prop != ZPROP_CONT && prop != ZPROP_INVAL &&
+ zfs_prop_get_type(prop) == PROP_TYPE_INDEX)
+ return (!zfs_prop_index_to_string(prop, value, &str));
+
+ return (-1);
+}
+
int
dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
int intsz, int numints, void *buf, char *setpoint, boolean_t snapshot)
@@ -81,6 +94,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
boolean_t inheriting = B_FALSE;
char *inheritstr;
char *recvdstr;
+ char *iuvstr;
ASSERT(dsl_pool_config_held(dd->dd_pool));
@@ -91,6 +105,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
inheritable = (prop == ZPROP_USERPROP || zfs_prop_inheritable(prop));
inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
+ iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX);
/*
* Note: dd may become NULL, therefore we shouldn't dereference it
@@ -105,6 +120,18 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
inheriting = B_TRUE;
}
+ /* Check for a iuv value. */
+ err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj,
+ iuvstr, intsz, numints, buf);
+ if (dsl_prop_known_index(zfs_name_to_prop(propname),
+ *(uint64_t *)buf) != 1)
+ err = ENOENT;
+ if (err != ENOENT) {
+ if (setpoint != NULL && err == 0)
+ dsl_dir_name(dd, setpoint);
+ break;
+ }
+
/* Check for a local value. */
err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj,
propname, intsz, numints, buf);
@@ -155,6 +182,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
kmem_strfree(inheritstr);
kmem_strfree(recvdstr);
+ kmem_strfree(iuvstr);
return (err);
}
@@ -647,6 +675,45 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
dsl_dir_rele(dd, FTAG);
}
+
+/*
+ * For newer values in zfs index type properties, we add a new key
+ * propname$iuv (iuv = Ignore Unknown Values) to the properties zap object
+ * to store the new property value and store the default value in the
+ * existing prop key. So that the propname$iuv key is ignored by the older zfs
+ * versions and the default property value from the existing prop key is
+ * used.
+ */
+static void
+dsl_prop_set_iuv(objset_t *mos, uint64_t zapobj, const char *propname,
+ int intsz, int numints, const void *value, dmu_tx_t *tx)
+{
+ char *iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX);
+ boolean_t iuv = B_FALSE;
+ zfs_prop_t prop = zfs_name_to_prop(propname);
+
+ switch (prop) {
+ case ZFS_PROP_REDUNDANT_METADATA:
+ if (*(uint64_t *)value == ZFS_REDUNDANT_METADATA_SOME ||
+ *(uint64_t *)value == ZFS_REDUNDANT_METADATA_NONE)
+ iuv = B_TRUE;
+ break;
+ default:
+ break;
+ }
+
+ if (iuv) {
+ VERIFY0(zap_update(mos, zapobj, iuvstr, intsz, numints,
+ value, tx));
+ uint64_t val = zfs_prop_default_numeric(prop);
+ VERIFY0(zap_update(mos, zapobj, propname, intsz, numints,
+ &val, tx));
+ } else {
+ zap_remove(mos, zapobj, iuvstr, tx);
+ }
+ kmem_strfree(iuvstr);
+}
+
void
dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
zprop_source_t source, int intsz, int numints, const void *value,
@@ -659,6 +726,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
const char *valstr = NULL;
char *inheritstr;
char *recvdstr;
+ char *iuvstr;
char *tbuf = NULL;
int err;
uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa);
@@ -692,6 +760,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
+ iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX);
switch ((int)source) {
case ZPROP_SRC_NONE:
@@ -709,11 +778,14 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
/*
* remove propname$inherit
* set propname -> value
+ * set propname$iuv -> new property value
*/
err = zap_remove(mos, zapobj, inheritstr, tx);
ASSERT(err == 0 || err == ENOENT);
VERIFY0(zap_update(mos, zapobj, propname,
intsz, numints, value, tx));
+ (void) dsl_prop_set_iuv(mos, zapobj, propname, intsz,
+ numints, value, tx);
break;
case ZPROP_SRC_INHERITED:
/*
@@ -723,6 +795,8 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
*/
err = zap_remove(mos, zapobj, propname, tx);
ASSERT(err == 0 || err == ENOENT);
+ err = zap_remove(mos, zapobj, iuvstr, tx);
+ ASSERT(err == 0 || err == ENOENT);
if (version >= SPA_VERSION_RECVD_PROPS &&
dsl_prop_get_int_ds(ds, ZPROP_HAS_RECVD, &dummy) == 0) {
dummy = 0;
@@ -763,6 +837,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
kmem_strfree(inheritstr);
kmem_strfree(recvdstr);
+ kmem_strfree(iuvstr);
/*
* If we are left with an empty snap zap we can destroy it.
@@ -1012,6 +1087,14 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
propname = za.za_name;
source = setpoint;
+
+ /* Skip if iuv entries are preset. */
+ valstr = kmem_asprintf("%s%s", propname,
+ ZPROP_IUV_SUFFIX);
+ err = zap_contains(mos, propobj, valstr);
+ kmem_strfree(valstr);
+ if (err == 0)
+ continue;
} else if (strcmp(suffix, ZPROP_INHERIT_SUFFIX) == 0) {
/* Skip explicitly inherited entries. */
continue;
@@ -1044,6 +1127,16 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
source = ((flags & DSL_PROP_GET_INHERITING) ?
setpoint : ZPROP_SOURCE_VAL_RECVD);
+ } else if (strcmp(suffix, ZPROP_IUV_SUFFIX) == 0) {
+ (void) strlcpy(buf, za.za_name,
+ MIN(sizeof (buf), suffix - za.za_name + 1));
+ propname = buf;
+ source = setpoint;
+ prop = zfs_name_to_prop(propname);
+
+ if (dsl_prop_known_index(prop,
+ za.za_first_integer) != 1)
+ continue;
} else {
/*
* For backward compatibility, skip suffixes we don't
diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c
index f0cd1feaf55b..03c2aa313af0 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_scan.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c
@@ -147,13 +147,13 @@ static int zfs_scan_strict_mem_lim = B_FALSE;
* overload the drives with I/O, since that is protected by
* zfs_vdev_scrub_max_active.
*/
-static unsigned long zfs_scan_vdev_limit = 4 << 20;
+static uint64_t zfs_scan_vdev_limit = 4 << 20;
static uint_t zfs_scan_issue_strategy = 0;
/* don't queue & sort zios, go direct */
static int zfs_scan_legacy = B_FALSE;
-static unsigned long zfs_scan_max_ext_gap = 2 << 20; /* in bytes */
+static uint64_t zfs_scan_max_ext_gap = 2 << 20; /* in bytes */
/*
* fill_weight is non-tunable at runtime, so we copy it at module init from
@@ -192,9 +192,9 @@ static int zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
static int zfs_no_scrub_prefetch = B_FALSE; /* set to disable scrub prefetch */
static const enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
/* max number of blocks to free in a single TXG */
-static unsigned long zfs_async_block_max_blocks = ULONG_MAX;
+static uint64_t zfs_async_block_max_blocks = UINT64_MAX;
/* max number of dedup blocks to free in a single TXG */
-static unsigned long zfs_max_async_dedup_frees = 100000;
+static uint64_t zfs_max_async_dedup_frees = 100000;
/* set to disable resilver deferring */
static int zfs_resilver_disable_defer = B_FALSE;
@@ -1470,6 +1470,7 @@ dsl_scan_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
if (claim_txg == 0 || bp->blk_birth < claim_txg)
return (0);
+ ASSERT3U(BP_GET_LSIZE(bp), !=, 0);
SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
lr->lr_foid, ZB_ZIL_LEVEL,
lr->lr_offset / BP_GET_LSIZE(bp));
@@ -4446,7 +4447,7 @@ dsl_scan_assess_vdev(dsl_pool_t *dp, vdev_t *vd)
spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER);
}
-ZFS_MODULE_PARAM(zfs, zfs_, scan_vdev_limit, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, scan_vdev_limit, U64, ZMOD_RW,
"Max bytes in flight per leaf vdev for scrubs and resilvers");
ZFS_MODULE_PARAM(zfs, zfs_, scrub_min_time_ms, UINT, ZMOD_RW,
@@ -4470,10 +4471,10 @@ ZFS_MODULE_PARAM(zfs, zfs_, no_scrub_io, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, no_scrub_prefetch, INT, ZMOD_RW,
"Set to disable scrub prefetching");
-ZFS_MODULE_PARAM(zfs, zfs_, async_block_max_blocks, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, async_block_max_blocks, U64, ZMOD_RW,
"Max number of blocks freed in one txg");
-ZFS_MODULE_PARAM(zfs, zfs_, max_async_dedup_frees, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, max_async_dedup_frees, U64, ZMOD_RW,
"Max number of dedup blocks freed in one txg");
ZFS_MODULE_PARAM(zfs, zfs_, free_bpobj_enabled, INT, ZMOD_RW,
@@ -4494,7 +4495,7 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_legacy, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, scan_checkpoint_intval, UINT, ZMOD_RW,
"Scan progress on-disk checkpointing interval");
-ZFS_MODULE_PARAM(zfs, zfs_, scan_max_ext_gap, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, scan_max_ext_gap, U64, ZMOD_RW,
"Max gap in bytes between sequential scrub / resilver I/Os");
ZFS_MODULE_PARAM(zfs, zfs_, scan_mem_lim_soft_fact, UINT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/fm.c b/sys/contrib/openzfs/module/zfs/fm.c
index 32b5cf8facd1..3f05d759770b 100644
--- a/sys/contrib/openzfs/module/zfs/fm.c
+++ b/sys/contrib/openzfs/module/zfs/fm.c
@@ -955,6 +955,7 @@ fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
}
atomic_inc_64(
&erpt_kstat_data.fmri_set_failed.value.ui64);
+ va_end(ap);
return;
}
}
diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c
index efcfeecd778e..c624833bc981 100644
--- a/sys/contrib/openzfs/module/zfs/metaslab.c
+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
@@ -51,12 +51,12 @@
* operation, we will try to write this amount of data to each disk before
* moving on to the next top-level vdev.
*/
-static unsigned long metaslab_aliquot = 1024 * 1024;
+static uint64_t metaslab_aliquot = 1024 * 1024;
/*
* For testing, make some blocks above a certain size be gang blocks.
*/
-unsigned long metaslab_force_ganging = SPA_MAXBLOCKSIZE + 1;
+uint64_t metaslab_force_ganging = SPA_MAXBLOCKSIZE + 1;
/*
* In pools where the log space map feature is not enabled we touch
@@ -286,7 +286,7 @@ static const int max_disabled_ms = 3;
* Time (in seconds) to respect ms_max_size when the metaslab is not loaded.
* To avoid 64-bit overflow, don't set above UINT32_MAX.
*/
-static unsigned long zfs_metaslab_max_size_cache_sec = 1 * 60 * 60; /* 1 hour */
+static uint64_t zfs_metaslab_max_size_cache_sec = 1 * 60 * 60; /* 1 hour */
/*
* Maximum percentage of memory to use on storing loaded metaslabs. If loading
@@ -5131,8 +5131,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
if (vd != NULL && vd->vdev_mg != NULL) {
mg = vdev_get_mg(vd, mc);
- if (flags & METASLAB_HINTBP_AVOID &&
- mg->mg_next != NULL)
+ if (flags & METASLAB_HINTBP_AVOID)
mg = mg->mg_next;
} else {
mg = mca->mca_rotor;
@@ -5201,12 +5200,11 @@ top:
ASSERT(mg->mg_initialized);
/*
- * Avoid writing single-copy data to a failing,
+ * Avoid writing single-copy data to an unhealthy,
* non-redundant vdev, unless we've already tried all
* other vdevs.
*/
- if ((vd->vdev_stat.vs_write_errors > 0 ||
- vd->vdev_state < VDEV_STATE_HEALTHY) &&
+ if (vd->vdev_state < VDEV_STATE_HEALTHY &&
d == 0 && !try_hard && vd->vdev_children == 0) {
metaslab_trace_add(zal, mg, NULL, psize, d,
TRACE_VDEV_ERROR, allocator);
@@ -6203,7 +6201,7 @@ metaslab_unflushed_txg(metaslab_t *ms)
return (ms->ms_unflushed_txg);
}
-ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, aliquot, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, aliquot, U64, ZMOD_RW,
"Allocation granularity (a.k.a. stripe size)");
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, debug_load, INT, ZMOD_RW,
@@ -6251,7 +6249,7 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, segment_weight_enabled, INT,
ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, switch_threshold, INT, ZMOD_RW,
"Segment-based metaslab selection maximum buckets before switching");
-ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging, U64, ZMOD_RW,
"Blocks larger than this size are forced to be gang blocks");
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_max_search, UINT, ZMOD_RW,
@@ -6260,7 +6258,7 @@ ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_max_search, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_use_largest_segment, INT, ZMOD_RW,
"When looking in size tree, use largest segment instead of exact fit");
-ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, max_size_cache_sec, ULONG,
+ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, max_size_cache_sec, U64,
ZMOD_RW, "How long to trust the cached max chunk size of a metaslab");
ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, mem_limit, UINT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/mmp.c b/sys/contrib/openzfs/module/zfs/mmp.c
index 92fd6c422330..ef0e01df390f 100644
--- a/sys/contrib/openzfs/module/zfs/mmp.c
+++ b/sys/contrib/openzfs/module/zfs/mmp.c
@@ -156,7 +156,7 @@
* vary with the I/O load and this observed value is the ub_mmp_delay which is
* stored in the uberblock. The minimum allowed value is 100 ms.
*/
-ulong_t zfs_multihost_interval = MMP_DEFAULT_INTERVAL;
+uint64_t zfs_multihost_interval = MMP_DEFAULT_INTERVAL;
/*
* Used to control the duration of the activity test on import. Smaller values
@@ -303,8 +303,10 @@ mmp_next_leaf(spa_t *spa)
do {
leaf = list_next(&spa->spa_leaf_list, leaf);
- if (leaf == NULL)
+ if (leaf == NULL) {
leaf = list_head(&spa->spa_leaf_list);
+ ASSERT3P(leaf, !=, NULL);
+ }
/*
* We skip unwritable, offline, detached, and dRAID spare
@@ -548,11 +550,11 @@ mmp_thread(void *arg)
uint32_t mmp_fail_intervals = MMP_FAIL_INTVS_OK(
zfs_multihost_fail_intervals);
hrtime_t mmp_fail_ns = mmp_fail_intervals * mmp_interval;
- boolean_t last_spa_suspended = suspended;
- boolean_t last_spa_multihost = multihost;
- uint64_t last_mmp_interval = mmp_interval;
- uint32_t last_mmp_fail_intervals = mmp_fail_intervals;
- hrtime_t last_mmp_fail_ns = mmp_fail_ns;
+ boolean_t last_spa_suspended;
+ boolean_t last_spa_multihost;
+ uint64_t last_mmp_interval;
+ uint32_t last_mmp_fail_intervals;
+ hrtime_t last_mmp_fail_ns;
callb_cpr_t cpr;
int skip_wait = 0;
@@ -734,7 +736,7 @@ mmp_signal_all_threads(void)
/* BEGIN CSTYLED */
ZFS_MODULE_PARAM_CALL(zfs_multihost, zfs_multihost_, interval,
- param_set_multihost_interval, param_get_ulong, ZMOD_RW,
+ param_set_multihost_interval, spl_param_get_u64, ZMOD_RW,
"Milliseconds between mmp writes to each leaf");
/* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/zfs/range_tree.c b/sys/contrib/openzfs/module/zfs/range_tree.c
index a2923d1664c7..894c30fcae16 100644
--- a/sys/contrib/openzfs/module/zfs/range_tree.c
+++ b/sys/contrib/openzfs/module/zfs/range_tree.c
@@ -369,6 +369,7 @@ range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill)
* invalid as soon as we do any mutating btree operations.
*/
rs_after = zfs_btree_find(&rt->rt_root, &tmp, &where_after);
+ ASSERT3P(rs_after, !=, NULL);
rs_set_start_raw(rs_after, rt, before_start);
rs_set_fill(rs_after, rt, after_fill + before_fill + fill);
rs = rs_after;
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index cc367745e486..fe7051db2737 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -218,7 +218,7 @@ static int spa_load_print_vdev_tree = B_FALSE;
* there are also risks of performing an inadvertent rewind as we might be
* missing all the vdevs with the latest uberblocks.
*/
-unsigned long zfs_max_missing_tvds = 0;
+uint64_t zfs_max_missing_tvds = 0;
/*
* The parameters below are similar to zfs_max_missing_tvds but are only
@@ -5267,7 +5267,7 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag,
* If we've recovered the pool, pass back any information we
* gathered while doing the load.
*/
- if (state == SPA_LOAD_RECOVER) {
+ if (state == SPA_LOAD_RECOVER && config != NULL) {
fnvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO,
spa->spa_load_info);
}
@@ -6803,8 +6803,8 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
pvd = oldvd->vdev_parent;
- if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
- VDEV_ALLOC_ATTACH)) != 0)
+ if (spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
+ VDEV_ALLOC_ATTACH) != 0)
return (spa_vdev_exit(spa, NULL, txg, EINVAL));
if (newrootvd->vdev_children != 1)
@@ -6819,10 +6819,12 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
return (spa_vdev_exit(spa, newrootvd, txg, error));
/*
- * Spares can't replace logs
+ * log, dedup and special vdevs should not be replaced by spares.
*/
- if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)
+ if ((oldvd->vdev_top->vdev_alloc_bias != VDEV_BIAS_NONE ||
+ oldvd->vdev_top->vdev_islog) && newvd->vdev_isspare) {
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
+ }
/*
* A dRAID spare can only replace a child of its parent dRAID vdev.
@@ -7160,7 +7162,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
* it may be that the unwritability of the disk is the reason
* it's being detached!
*/
- error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
+ (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
/*
* Remove vd from its parent and compact the parent's children.
@@ -8867,36 +8869,36 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
spa_history_log_internal(spa, "set", tx,
"%s=%lld", nvpair_name(elem),
(longlong_t)intval);
- } else {
- ASSERT(0); /* not allowed */
- }
- switch (prop) {
- case ZPOOL_PROP_DELEGATION:
- spa->spa_delegation = intval;
- break;
- case ZPOOL_PROP_BOOTFS:
- spa->spa_bootfs = intval;
- break;
- case ZPOOL_PROP_FAILUREMODE:
- spa->spa_failmode = intval;
- break;
- case ZPOOL_PROP_AUTOTRIM:
- spa->spa_autotrim = intval;
- spa_async_request(spa,
- SPA_ASYNC_AUTOTRIM_RESTART);
- break;
- case ZPOOL_PROP_AUTOEXPAND:
- spa->spa_autoexpand = intval;
- if (tx->tx_txg != TXG_INITIAL)
+ switch (prop) {
+ case ZPOOL_PROP_DELEGATION:
+ spa->spa_delegation = intval;
+ break;
+ case ZPOOL_PROP_BOOTFS:
+ spa->spa_bootfs = intval;
+ break;
+ case ZPOOL_PROP_FAILUREMODE:
+ spa->spa_failmode = intval;
+ break;
+ case ZPOOL_PROP_AUTOTRIM:
+ spa->spa_autotrim = intval;
spa_async_request(spa,
- SPA_ASYNC_AUTOEXPAND);
- break;
- case ZPOOL_PROP_MULTIHOST:
- spa->spa_multihost = intval;
- break;
- default:
- break;
+ SPA_ASYNC_AUTOTRIM_RESTART);
+ break;
+ case ZPOOL_PROP_AUTOEXPAND:
+ spa->spa_autoexpand = intval;
+ if (tx->tx_txg != TXG_INITIAL)
+ spa_async_request(spa,
+ SPA_ASYNC_AUTOEXPAND);
+ break;
+ case ZPOOL_PROP_MULTIHOST:
+ spa->spa_multihost = intval;
+ break;
+ default:
+ break;
+ }
+ } else {
+ ASSERT(0); /* not allowed */
}
}
@@ -10016,7 +10018,7 @@ ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_batch_tpq, UINT, ZMOD_RD,
"Number of threads per IO worker taskqueue");
/* BEGIN CSTYLED */
-ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds, U64, ZMOD_RW,
"Allow importing pool with up to this number of missing top-level "
"vdevs (in read-only mode)");
/* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/zfs/spa_checkpoint.c b/sys/contrib/openzfs/module/zfs/spa_checkpoint.c
index a837b1ce97ec..b588f7041e5c 100644
--- a/sys/contrib/openzfs/module/zfs/spa_checkpoint.c
+++ b/sys/contrib/openzfs/module/zfs/spa_checkpoint.c
@@ -158,7 +158,7 @@
* amount of checkpointed data that has been freed within them while
* the pool had a checkpoint.
*/
-static unsigned long zfs_spa_discard_memory_limit = 16 * 1024 * 1024;
+static uint64_t zfs_spa_discard_memory_limit = 16 * 1024 * 1024;
int
spa_checkpoint_get_stats(spa_t *spa, pool_checkpoint_stat_t *pcs)
@@ -631,7 +631,7 @@ EXPORT_SYMBOL(spa_checkpoint_discard_thread);
EXPORT_SYMBOL(spa_checkpoint_discard_thread_check);
/* BEGIN CSTYLED */
-ZFS_MODULE_PARAM(zfs_spa, zfs_spa_, discard_memory_limit, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_spa, zfs_spa_, discard_memory_limit, U64, ZMOD_RW,
"Limit for memory used in prefetching the checkpoint space map done "
"on each vdev while discarding the checkpoint");
/* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c b/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c
index 4ecce8214f6a..2878e68c6e4b 100644
--- a/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c
+++ b/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c
@@ -188,13 +188,13 @@ static const unsigned long zfs_log_sm_blksz = 1ULL << 17;
* (thus the _ppm suffix; reads as "parts per million"). As an example,
* the default of 1000 allows 0.1% of memory to be used.
*/
-static unsigned long zfs_unflushed_max_mem_ppm = 1000;
+static uint64_t zfs_unflushed_max_mem_ppm = 1000;
/*
* Specific hard-limit in memory that ZFS allows to be used for
* unflushed changes.
*/
-static unsigned long zfs_unflushed_max_mem_amt = 1ULL << 30;
+static uint64_t zfs_unflushed_max_mem_amt = 1ULL << 30;
/*
* The following tunable determines the number of blocks that can be used for
@@ -243,33 +243,33 @@ static unsigned long zfs_unflushed_max_mem_amt = 1ULL << 30;
* provide upper and lower bounds for the log block limit.
* [see zfs_unflushed_log_block_{min,max}]
*/
-static unsigned long zfs_unflushed_log_block_pct = 400;
+static uint_t zfs_unflushed_log_block_pct = 400;
/*
* If the number of metaslabs is small and our incoming rate is high, we could
* get into a situation that we are flushing all our metaslabs every TXG. Thus
* we always allow at least this many log blocks.
*/
-static unsigned long zfs_unflushed_log_block_min = 1000;
+static uint64_t zfs_unflushed_log_block_min = 1000;
/*
* If the log becomes too big, the import time of the pool can take a hit in
* terms of performance. Thus we have a hard limit in the size of the log in
* terms of blocks.
*/
-static unsigned long zfs_unflushed_log_block_max = (1ULL << 17);
+static uint64_t zfs_unflushed_log_block_max = (1ULL << 17);
/*
* Also we have a hard limit in the size of the log in terms of dirty TXGs.
*/
-static unsigned long zfs_unflushed_log_txg_max = 1000;
+static uint64_t zfs_unflushed_log_txg_max = 1000;
/*
* Max # of rows allowed for the log_summary. The tradeoff here is accuracy and
* stability of the flushing algorithm (longer summary) vs its runtime overhead
* (smaller summary is faster to traverse).
*/
-static unsigned long zfs_max_logsm_summary_length = 10;
+static uint64_t zfs_max_logsm_summary_length = 10;
/*
* Tunable that sets the lower bound on the metaslabs to flush every TXG.
@@ -282,7 +282,7 @@ static unsigned long zfs_max_logsm_summary_length = 10;
* The point of this tunable is to be used in extreme cases where we really
* want to flush more metaslabs than our adaptable heuristic plans to flush.
*/
-static unsigned long zfs_min_metaslabs_to_flush = 1;
+static uint64_t zfs_min_metaslabs_to_flush = 1;
/*
* Tunable that specifies how far in the past do we want to look when trying to
@@ -293,7 +293,7 @@ static unsigned long zfs_min_metaslabs_to_flush = 1;
* average over all the blocks that we walk
* [see spa_estimate_incoming_log_blocks].
*/
-static unsigned long zfs_max_log_walking = 5;
+static uint64_t zfs_max_log_walking = 5;
/*
* This tunable exists solely for testing purposes. It ensures that the log
@@ -507,6 +507,7 @@ void
spa_log_summary_decrement_blkcount(spa_t *spa, uint64_t blocks_gone)
{
log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+ ASSERT3P(e, !=, NULL);
if (e->lse_txgcount > 0)
e->lse_txgcount--;
for (; e != NULL; e = list_head(&spa->spa_log_summary)) {
@@ -690,7 +691,8 @@ spa_estimate_metaslabs_to_flush(spa_t *spa)
* based on the incoming rate until we exceed it.
*/
if (available_blocks >= 0 && available_txgs >= 0) {
- uint64_t skip_txgs = MIN(available_txgs + 1,
+ uint64_t skip_txgs = (incoming == 0) ?
+ available_txgs + 1 : MIN(available_txgs + 1,
(available_blocks / incoming) + 1);
available_blocks -= (skip_txgs * incoming);
available_txgs -= skip_txgs;
@@ -1356,34 +1358,34 @@ spa_ld_log_spacemaps(spa_t *spa)
}
/* BEGIN CSTYLED */
-ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_amt, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_amt, U64, ZMOD_RW,
"Specific hard-limit in memory that ZFS allows to be used for "
"unflushed changes");
-ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_ppm, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_ppm, U64, ZMOD_RW,
"Percentage of the overall system memory that ZFS allows to be "
"used for unflushed changes (value is calculated over 1000000 for "
"finer granularity)");
-ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_max, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_max, U64, ZMOD_RW,
"Hard limit (upper-bound) in the size of the space map log "
"in terms of blocks.");
-ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_min, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_min, U64, ZMOD_RW,
"Lower-bound limit for the maximum amount of blocks allowed in "
"log spacemap (see zfs_unflushed_log_block_max)");
-ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_txg_max, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_txg_max, U64, ZMOD_RW,
"Hard limit (upper-bound) in the size of the space map log "
"in terms of dirty TXGs.");
-ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_pct, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_pct, UINT, ZMOD_RW,
"Tunable used to determine the number of blocks that can be used for "
"the spacemap log, expressed as a percentage of the total number of "
"metaslabs in the pool (e.g. 400 means the number of log blocks is "
"capped at 4 times the number of metaslabs)");
-ZFS_MODULE_PARAM(zfs, zfs_, max_log_walking, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, max_log_walking, U64, ZMOD_RW,
"The number of past TXGs that the flushing algorithm of the log "
"spacemap feature uses to estimate incoming log blocks");
@@ -1392,8 +1394,8 @@ ZFS_MODULE_PARAM(zfs, zfs_, keep_log_spacemaps_at_export, INT, ZMOD_RW,
"during pool export/destroy");
/* END CSTYLED */
-ZFS_MODULE_PARAM(zfs, zfs_, max_logsm_summary_length, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, max_logsm_summary_length, U64, ZMOD_RW,
"Maximum number of rows allowed in the summary of the spacemap log");
-ZFS_MODULE_PARAM(zfs, zfs_, min_metaslabs_to_flush, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, min_metaslabs_to_flush, U64, ZMOD_RW,
"Minimum number of metaslabs to flush per dirty TXG");
diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c
index 102070013404..ca55d55405d3 100644
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@@ -304,20 +304,20 @@ int zfs_free_leak_on_eio = B_FALSE;
* has not completed in zfs_deadman_synctime_ms is considered "hung" resulting
* in one of three behaviors controlled by zfs_deadman_failmode.
*/
-unsigned long zfs_deadman_synctime_ms = 600000UL; /* 10 min. */
+uint64_t zfs_deadman_synctime_ms = 600000UL; /* 10 min. */
/*
* This value controls the maximum amount of time zio_wait() will block for an
* outstanding IO. By default this is 300 seconds at which point the "hung"
* behavior will be applied as described for zfs_deadman_synctime_ms.
*/
-unsigned long zfs_deadman_ziotime_ms = 300000UL; /* 5 min. */
+uint64_t zfs_deadman_ziotime_ms = 300000UL; /* 5 min. */
/*
* Check time in milliseconds. This defines the frequency at which we check
* for hung I/O.
*/
-unsigned long zfs_deadman_checktime_ms = 60000UL; /* 1 min. */
+uint64_t zfs_deadman_checktime_ms = 60000UL; /* 1 min. */
/*
* By default the deadman is enabled.
@@ -1536,7 +1536,7 @@ snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp)
compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
}
- SNPRINTF_BLKPTR(snprintf, ' ', buf, buflen, bp, type, checksum,
+ SNPRINTF_BLKPTR(kmem_scnprintf, ' ', buf, buflen, bp, type, checksum,
compress);
}
@@ -2922,7 +2922,7 @@ ZFS_MODULE_PARAM(zfs, zfs_, recover, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, free_leak_on_eio, INT, ZMOD_RW,
"Set to ignore IO errors during free and permanently leak the space");
-ZFS_MODULE_PARAM(zfs_deadman, zfs_deadman_, checktime_ms, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_deadman, zfs_deadman_, checktime_ms, U64, ZMOD_RW,
"Dead I/O check interval in milliseconds");
ZFS_MODULE_PARAM(zfs_deadman, zfs_deadman_, enabled, INT, ZMOD_RW,
@@ -2943,11 +2943,11 @@ ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, failmode,
"Failmode for deadman timer");
ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, synctime_ms,
- param_set_deadman_synctime, param_get_ulong, ZMOD_RW,
+ param_set_deadman_synctime, spl_param_get_u64, ZMOD_RW,
"Pool sync expiration time in milliseconds");
ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, ziotime_ms,
- param_set_deadman_ziotime, param_get_ulong, ZMOD_RW,
+ param_set_deadman_ziotime, spl_param_get_u64, ZMOD_RW,
"IO expiration time in milliseconds");
ZFS_MODULE_PARAM(zfs, zfs_, special_class_metadata_reserve_pct, UINT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
index 66cec052b669..4520ca31b7d7 100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -144,8 +144,8 @@ int zfs_nocacheflush = 0;
* be forced by vdev logical ashift or by user via ashift property, but won't
* be set automatically as a performance optimization.
*/
-uint64_t zfs_vdev_max_auto_ashift = 14;
-uint64_t zfs_vdev_min_auto_ashift = ASHIFT_MIN;
+uint_t zfs_vdev_max_auto_ashift = 14;
+uint_t zfs_vdev_min_auto_ashift = ASHIFT_MIN;
void
vdev_dbgmsg(vdev_t *vd, const char *fmt, ...)
@@ -3563,6 +3563,26 @@ vdev_load(vdev_t *vd)
}
}
+ if (vd == vd->vdev_top && vd->vdev_top_zap != 0) {
+ spa_t *spa = vd->vdev_spa;
+ uint64_t failfast;
+
+ error = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap,
+ vdev_prop_to_name(VDEV_PROP_FAILFAST), sizeof (failfast),
+ 1, &failfast);
+ if (error == 0) {
+ vd->vdev_failfast = failfast & 1;
+ } else if (error == ENOENT) {
+ vd->vdev_failfast = vdev_prop_default_numeric(
+ VDEV_PROP_FAILFAST);
+ } else {
+ vdev_dbgmsg(vd,
+ "vdev_load: zap_lookup(top_zap=%llu) "
+ "failed [error=%d]",
+ (u_longlong_t)vd->vdev_top_zap, error);
+ }
+ }
+
/*
* Load any rebuild state from the top-level vdev zap.
*/
@@ -5648,7 +5668,7 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
nvpair_t *elem = NULL;
uint64_t vdev_guid;
nvlist_t *nvprops;
- int error;
+ int error = 0;
ASSERT(vd != NULL);
@@ -5709,6 +5729,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
else
error = spa_vdev_alloc(spa, vdev_guid);
break;
+ case VDEV_PROP_FAILFAST:
+ if (nvpair_value_uint64(elem, &intval) != 0) {
+ error = EINVAL;
+ break;
+ }
+ vd->vdev_failfast = intval & 1;
+ break;
default:
/* Most processing is done in vdev_props_set_sync */
break;
@@ -6022,6 +6049,25 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
vdev_prop_add_list(outnvl, propname, strval,
intval, src);
break;
+ case VDEV_PROP_FAILFAST:
+ src = ZPROP_SRC_LOCAL;
+ strval = NULL;
+
+ err = zap_lookup(mos, objid, nvpair_name(elem),
+ sizeof (uint64_t), 1, &intval);
+ if (err == ENOENT) {
+ intval = vdev_prop_default_numeric(
+ prop);
+ err = 0;
+ } else if (err) {
+ break;
+ }
+ if (intval == vdev_prop_default_numeric(prop))
+ src = ZPROP_SRC_DEFAULT;
+
+ vdev_prop_add_list(outnvl, propname, strval,
+ intval, src);
+ break;
/* Text Properties */
case VDEV_PROP_COMMENT:
/* Exists in the ZAP below */
@@ -6078,7 +6124,6 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
strval = NULL;
zprop_source_t src = ZPROP_SRC_DEFAULT;
propname = za.za_name;
- prop = vdev_name_to_prop(propname);
switch (za.za_integer_length) {
case 8:
@@ -6156,11 +6201,11 @@ ZFS_MODULE_PARAM(zfs, zfs_, embedded_slog_min_ms, UINT, ZMOD_RW,
/* BEGIN CSTYLED */
ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, min_auto_ashift,
- param_set_min_auto_ashift, param_get_ulong, ZMOD_RW,
+ param_set_min_auto_ashift, param_get_uint, ZMOD_RW,
"Minimum ashift used when creating new top-level vdevs");
ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, max_auto_ashift,
- param_set_max_auto_ashift, param_get_ulong, ZMOD_RW,
+ param_set_max_auto_ashift, param_get_uint, ZMOD_RW,
"Maximum ashift used when optimizing for logical -> physical sector "
"size on new top-level vdevs");
/* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect.c b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
index 0ca0c245e952..814a1f0efe4c 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
@@ -189,14 +189,14 @@ static uint_t zfs_condense_indirect_obsolete_pct = 25;
* consumed by the obsolete space map; the default of 1GB is small enough
* that we typically don't mind "wasting" it.
*/
-static unsigned long zfs_condense_max_obsolete_bytes = 1024 * 1024 * 1024;
+static uint64_t zfs_condense_max_obsolete_bytes = 1024 * 1024 * 1024;
/*
* Don't bother condensing if the mapping uses less than this amount of
* memory. The default of 128KB is considered a "trivial" amount of
* memory and not worth reducing.
*/
-static unsigned long zfs_condense_min_mapping_bytes = 128 * 1024;
+static uint64_t zfs_condense_min_mapping_bytes = 128 * 1024;
/*
* This is used by the test suite so that it can ensure that certain
@@ -1319,6 +1319,7 @@ vdev_indirect_io_start(zio_t *zio)
vdev_indirect_gather_splits, zio);
indirect_split_t *first = list_head(&iv->iv_splits);
+ ASSERT3P(first, !=, NULL);
if (first->is_size == zio->io_size) {
/*
* This is not a split block; we are pointing to the entire
@@ -1891,11 +1892,11 @@ ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, indirect_obsolete_pct, UINT,
"Minimum obsolete percent of bytes in the mapping "
"to attempt condensing");
-ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, min_mapping_bytes, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, min_mapping_bytes, U64, ZMOD_RW,
"Don't bother condensing if the mapping uses less than this amount of "
"memory");
-ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, max_obsolete_bytes, ULONG,
+ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, max_obsolete_bytes, U64,
ZMOD_RW,
"Minimum size obsolete spacemap to attempt condensing");
diff --git a/sys/contrib/openzfs/module/zfs/vdev_initialize.c b/sys/contrib/openzfs/module/zfs/vdev_initialize.c
index 965fb7ef0593..75beb0cc3d12 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_initialize.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_initialize.c
@@ -36,17 +36,13 @@
/*
* Value that is written to disk during initialization.
*/
-#ifdef _ILP32
-static unsigned long zfs_initialize_value = 0xdeadbeefUL;
-#else
-static unsigned long zfs_initialize_value = 0xdeadbeefdeadbeeeULL;
-#endif
+static uint64_t zfs_initialize_value = 0xdeadbeefdeadbeeeULL;
/* maximum number of I/Os outstanding per leaf vdev */
static const int zfs_initialize_limit = 1;
/* size of initializing writes; default 1MiB, see zfs_remove_max_segment */
-static unsigned long zfs_initialize_chunk_size = 1024 * 1024;
+static uint64_t zfs_initialize_chunk_size = 1024 * 1024;
static boolean_t
vdev_initialize_should_stop(vdev_t *vd)
@@ -261,15 +257,9 @@ vdev_initialize_block_fill(void *buf, size_t len, void *unused)
(void) unused;
ASSERT0(len % sizeof (uint64_t));
-#ifdef _ILP32
- for (uint64_t i = 0; i < len; i += sizeof (uint32_t)) {
- *(uint32_t *)((char *)(buf) + i) = zfs_initialize_value;
- }
-#else
for (uint64_t i = 0; i < len; i += sizeof (uint64_t)) {
*(uint64_t *)((char *)(buf) + i) = zfs_initialize_value;
}
-#endif
return (0);
}
@@ -765,8 +755,8 @@ EXPORT_SYMBOL(vdev_initialize_stop_all);
EXPORT_SYMBOL(vdev_initialize_stop_wait);
EXPORT_SYMBOL(vdev_initialize_restart);
-ZFS_MODULE_PARAM(zfs, zfs_, initialize_value, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, initialize_value, U64, ZMOD_RW,
"Value written during zpool initialize");
-ZFS_MODULE_PARAM(zfs, zfs_, initialize_chunk_size, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, initialize_chunk_size, U64, ZMOD_RW,
"Size in bytes of writes by zpool initialize");
diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c
index 1acb89cea393..ec55674393ce 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_queue.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c
@@ -605,7 +605,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
int maxblocksize;
boolean_t stretch = B_FALSE;
avl_tree_t *t = vdev_queue_type_tree(vq, zio->io_type);
- enum zio_flag flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
+ zio_flag_t flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
uint64_t next_offset;
abd_t *abd;
@@ -725,6 +725,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
* after our span is mandatory.
*/
dio = AVL_NEXT(t, last);
+ ASSERT3P(dio, !=, NULL);
dio->io_flags &= ~ZIO_FLAG_OPTIONAL;
} else {
/* do not include the optional i/o */
@@ -756,6 +757,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
do {
dio = nio;
nio = AVL_NEXT(t, dio);
+ ASSERT3P(dio, !=, NULL);
zio_add_child(dio, aio);
vdev_queue_io_remove(vq, dio);
diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c b/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c
index f74a76a8d5ba..2980f8acfbd7 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c
@@ -285,17 +285,17 @@ raidz_math_kstat_headers(char *buf, size_t size)
{
ASSERT3U(size, >=, RAIDZ_KSTAT_LINE_LEN);
- ssize_t off = snprintf(buf, size, "%-17s", "implementation");
+ ssize_t off = kmem_scnprintf(buf, size, "%-17s", "implementation");
for (int i = 0; i < ARRAY_SIZE(raidz_gen_name); i++)
- off += snprintf(buf + off, size - off, "%-16s",
+ off += kmem_scnprintf(buf + off, size - off, "%-16s",
raidz_gen_name[i]);
for (int i = 0; i < ARRAY_SIZE(raidz_rec_name); i++)
- off += snprintf(buf + off, size - off, "%-16s",
+ off += kmem_scnprintf(buf + off, size - off, "%-16s",
raidz_rec_name[i]);
- (void) snprintf(buf + off, size - off, "\n");
+ (void) kmem_scnprintf(buf + off, size - off, "\n");
return (0);
}
@@ -311,34 +311,35 @@ raidz_math_kstat_data(char *buf, size_t size, void *data)
ASSERT3U(size, >=, RAIDZ_KSTAT_LINE_LEN);
if (cstat == fstat) {
- off += snprintf(buf + off, size - off, "%-17s", "fastest");
+ off += kmem_scnprintf(buf + off, size - off, "%-17s",
+ "fastest");
for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++) {
int id = fstat->gen[i];
- off += snprintf(buf + off, size - off, "%-16s",
+ off += kmem_scnprintf(buf + off, size - off, "%-16s",
raidz_supp_impl[id]->name);
}
for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++) {
int id = fstat->rec[i];
- off += snprintf(buf + off, size - off, "%-16s",
+ off += kmem_scnprintf(buf + off, size - off, "%-16s",
raidz_supp_impl[id]->name);
}
} else {
ptrdiff_t id = cstat - raidz_impl_kstats;
- off += snprintf(buf + off, size - off, "%-17s",
+ off += kmem_scnprintf(buf + off, size - off, "%-17s",
raidz_supp_impl[id]->name);
for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++)
- off += snprintf(buf + off, size - off, "%-16llu",
+ off += kmem_scnprintf(buf + off, size - off, "%-16llu",
(u_longlong_t)cstat->gen[i]);
for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++)
- off += snprintf(buf + off, size - off, "%-16llu",
+ off += kmem_scnprintf(buf + off, size - off, "%-16llu",
(u_longlong_t)cstat->rec[i]);
}
- (void) snprintf(buf + off, size - off, "\n");
+ (void) kmem_scnprintf(buf + off, size - off, "\n");
return (0);
}
diff --git a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
index 1ce578e228d8..1f56275c853b 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
@@ -22,6 +22,7 @@
*
* Copyright (c) 2018, Intel Corporation.
* Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/
#include <sys/vdev_impl.h>
@@ -103,7 +104,7 @@
* Size of rebuild reads; defaults to 1MiB per data disk and is capped at
* SPA_MAXBLOCKSIZE.
*/
-static unsigned long zfs_rebuild_max_segment = 1024 * 1024;
+static uint64_t zfs_rebuild_max_segment = 1024 * 1024;
/*
* Maximum number of parallelly executed bytes per leaf vdev caused by a
@@ -121,7 +122,7 @@ static unsigned long zfs_rebuild_max_segment = 1024 * 1024;
* With a value of 32MB the sequential resilver write rate was measured at
* 800MB/s sustained while rebuilding to a distributed spare.
*/
-static unsigned long zfs_rebuild_vdev_limit = 32 << 20;
+static uint64_t zfs_rebuild_vdev_limit = 32 << 20;
/*
* Automatically start a pool scrub when the last active sequential resilver
@@ -134,6 +135,7 @@ static int zfs_rebuild_scrub_enabled = 1;
* For vdev_rebuild_initiate_sync() and vdev_rebuild_reset_sync().
*/
static __attribute__((noreturn)) void vdev_rebuild_thread(void *arg);
+static void vdev_rebuild_reset_sync(void *arg, dmu_tx_t *tx);
/*
* Clear the per-vdev rebuild bytes value for a vdev tree.
@@ -307,6 +309,17 @@ vdev_rebuild_complete_sync(void *arg, dmu_tx_t *tx)
vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
mutex_enter(&vd->vdev_rebuild_lock);
+
+ /*
+ * Handle a second device failure if it occurs after all rebuild I/O
+ * has completed but before this sync task has been executed.
+ */
+ if (vd->vdev_rebuild_reset_wanted) {
+ mutex_exit(&vd->vdev_rebuild_lock);
+ vdev_rebuild_reset_sync(arg, tx);
+ return;
+ }
+
vrp->vrp_rebuild_state = VDEV_REBUILD_COMPLETE;
vrp->vrp_end_time = gethrestime_sec();
@@ -760,7 +773,6 @@ vdev_rebuild_thread(void *arg)
ASSERT(vd->vdev_rebuilding);
ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REBUILD));
ASSERT3B(vd->vdev_rebuild_cancel_wanted, ==, B_FALSE);
- ASSERT3B(vd->vdev_rebuild_reset_wanted, ==, B_FALSE);
vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
@@ -1138,10 +1150,10 @@ vdev_rebuild_get_stats(vdev_t *tvd, vdev_rebuild_stat_t *vrs)
return (error);
}
-ZFS_MODULE_PARAM(zfs, zfs_, rebuild_max_segment, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, rebuild_max_segment, U64, ZMOD_RW,
"Max segment size in bytes of rebuild reads");
-ZFS_MODULE_PARAM(zfs, zfs_, rebuild_vdev_limit, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, rebuild_vdev_limit, U64, ZMOD_RW,
"Max bytes in flight per leaf vdev for sequential resilvers");
ZFS_MODULE_PARAM(zfs, zfs_, rebuild_scrub_enabled, INT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/vdev_trim.c b/sys/contrib/openzfs/module/zfs/vdev_trim.c
index 5905d9a07571..5b5076c8722c 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_trim.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_trim.c
@@ -1188,12 +1188,11 @@ vdev_autotrim_thread(void *arg)
mutex_exit(&vd->vdev_autotrim_lock);
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
- uint64_t extent_bytes_max = zfs_trim_extent_bytes_max;
- uint64_t extent_bytes_min = zfs_trim_extent_bytes_min;
-
while (!vdev_autotrim_should_stop(vd)) {
int txgs_per_trim = MAX(zfs_trim_txg_batch, 1);
boolean_t issued_trim = B_FALSE;
+ uint64_t extent_bytes_max = zfs_trim_extent_bytes_max;
+ uint64_t extent_bytes_min = zfs_trim_extent_bytes_min;
/*
* All of the metaslabs are divided in to groups of size
diff --git a/sys/contrib/openzfs/module/zfs/zap_leaf.c b/sys/contrib/openzfs/module/zfs/zap_leaf.c
index 25c2d5163a26..2e8489c7dfcf 100644
--- a/sys/contrib/openzfs/module/zfs/zap_leaf.c
+++ b/sys/contrib/openzfs/module/zfs/zap_leaf.c
@@ -646,7 +646,7 @@ zap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd,
* form of the name. But all callers have one of these on hand anyway,
* so might as well take advantage. A cleaner but slower interface
* would accept neither argument, and compute the normalized name as
- * needed (using zap_name_alloc(zap_entry_read_name(zeh))).
+ * needed (using zap_name_alloc_str(zap_entry_read_name(zeh))).
*/
boolean_t
zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
@@ -667,7 +667,7 @@ zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
continue;
if (zn == NULL) {
- zn = zap_name_alloc(zap, name, MT_NORMALIZE);
+ zn = zap_name_alloc_str(zap, name, MT_NORMALIZE);
allocdzn = B_TRUE;
}
if (zap_leaf_array_match(zeh->zeh_leaf, zn,
diff --git a/sys/contrib/openzfs/module/zfs/zap_micro.c b/sys/contrib/openzfs/module/zfs/zap_micro.c
index 58a5c9f600b7..606f426404cc 100644
--- a/sys/contrib/openzfs/module/zfs/zap_micro.c
+++ b/sys/contrib/openzfs/module/zfs/zap_micro.c
@@ -33,7 +33,7 @@
#include <sys/zap.h>
#include <sys/zap_impl.h>
#include <sys/zap_leaf.h>
-#include <sys/avl.h>
+#include <sys/btree.h>
#include <sys/arc.h>
#include <sys/dmu_objset.h>
@@ -92,7 +92,7 @@ zap_hash(zap_name_t *zn)
wp++, i++) {
uint64_t word = *wp;
- for (int j = 0; j < zn->zn_key_intlen; j++) {
+ for (int j = 0; j < 8; j++) {
h = (h >> 8) ^
zfs_crc64_table[(h ^ word) & 0xFF];
word >>= NBBY;
@@ -162,18 +162,25 @@ zap_match(zap_name_t *zn, const char *matchname)
}
}
+static zap_name_t *
+zap_name_alloc(zap_t *zap)
+{
+ zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
+ zn->zn_zap = zap;
+ return (zn);
+}
+
void
zap_name_free(zap_name_t *zn)
{
kmem_free(zn, sizeof (zap_name_t));
}
-zap_name_t *
-zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
+static int
+zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt)
{
- zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
+ zap_t *zap = zn->zn_zap;
- zn->zn_zap = zap;
zn->zn_key_intlen = sizeof (*key);
zn->zn_key_orig = key;
zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1;
@@ -194,17 +201,13 @@ zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
* what the hash is computed from.
*/
if (zap_normalize(zap, key, zn->zn_normbuf,
- zap->zap_normflags) != 0) {
- zap_name_free(zn);
- return (NULL);
- }
+ zap->zap_normflags) != 0)
+ return (SET_ERROR(ENOTSUP));
zn->zn_key_norm = zn->zn_normbuf;
zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
} else {
- if (mt != 0) {
- zap_name_free(zn);
- return (NULL);
- }
+ if (mt != 0)
+ return (SET_ERROR(ENOTSUP));
zn->zn_key_norm = zn->zn_key_orig;
zn->zn_key_norm_numints = zn->zn_key_orig_numints;
}
@@ -217,13 +220,22 @@ zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
* what the matching is based on. (Not the hash!)
*/
if (zap_normalize(zap, key, zn->zn_normbuf,
- zn->zn_normflags) != 0) {
- zap_name_free(zn);
- return (NULL);
- }
+ zn->zn_normflags) != 0)
+ return (SET_ERROR(ENOTSUP));
zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
}
+ return (0);
+}
+
+zap_name_t *
+zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt)
+{
+ zap_name_t *zn = zap_name_alloc(zap);
+ if (zap_name_init_str(zn, key, mt) != 0) {
+ zap_name_free(zn);
+ return (NULL);
+ }
return (zn);
}
@@ -277,45 +289,46 @@ mze_compare(const void *arg1, const void *arg2)
const mzap_ent_t *mze1 = arg1;
const mzap_ent_t *mze2 = arg2;
- int cmp = TREE_CMP(mze1->mze_hash, mze2->mze_hash);
- if (likely(cmp))
- return (cmp);
-
- return (TREE_CMP(mze1->mze_cd, mze2->mze_cd));
+ return (TREE_CMP((uint64_t)(mze1->mze_hash) << 32 | mze1->mze_cd,
+ (uint64_t)(mze2->mze_hash) << 32 | mze2->mze_cd));
}
static void
-mze_insert(zap_t *zap, int chunkid, uint64_t hash)
+mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash)
{
+ mzap_ent_t mze;
+
ASSERT(zap->zap_ismicro);
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
- mzap_ent_t *mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP);
- mze->mze_chunkid = chunkid;
- mze->mze_hash = hash;
- mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd;
- ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0);
- avl_add(&zap->zap_m.zap_avl, mze);
+ mze.mze_chunkid = chunkid;
+ ASSERT0(hash & 0xffffffff);
+ mze.mze_hash = hash >> 32;
+ ASSERT3U(MZE_PHYS(zap, &mze)->mze_cd, <=, 0xffff);
+ mze.mze_cd = (uint16_t)MZE_PHYS(zap, &mze)->mze_cd;
+ ASSERT(MZE_PHYS(zap, &mze)->mze_name[0] != 0);
+ zfs_btree_add(&zap->zap_m.zap_tree, &mze);
}
static mzap_ent_t *
-mze_find(zap_name_t *zn)
+mze_find(zap_name_t *zn, zfs_btree_index_t *idx)
{
mzap_ent_t mze_tofind;
mzap_ent_t *mze;
- avl_index_t idx;
- avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl;
+ zfs_btree_t *tree = &zn->zn_zap->zap_m.zap_tree;
ASSERT(zn->zn_zap->zap_ismicro);
ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock));
- mze_tofind.mze_hash = zn->zn_hash;
+ ASSERT0(zn->zn_hash & 0xffffffff);
+ mze_tofind.mze_hash = zn->zn_hash >> 32;
mze_tofind.mze_cd = 0;
- mze = avl_find(avl, &mze_tofind, &idx);
+ mze = zfs_btree_find(tree, &mze_tofind, idx);
if (mze == NULL)
- mze = avl_nearest(avl, idx, AVL_AFTER);
- for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) {
+ mze = zfs_btree_next(tree, idx, idx);
+ for (; mze && mze->mze_hash == mze_tofind.mze_hash;
+ mze = zfs_btree_next(tree, idx, idx)) {
ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd);
if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name))
return (mze);
@@ -328,18 +341,21 @@ static uint32_t
mze_find_unused_cd(zap_t *zap, uint64_t hash)
{
mzap_ent_t mze_tofind;
- avl_index_t idx;
- avl_tree_t *avl = &zap->zap_m.zap_avl;
+ zfs_btree_index_t idx;
+ zfs_btree_t *tree = &zap->zap_m.zap_tree;
ASSERT(zap->zap_ismicro);
ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
+ ASSERT0(hash & 0xffffffff);
+ hash >>= 32;
mze_tofind.mze_hash = hash;
mze_tofind.mze_cd = 0;
uint32_t cd = 0;
- for (mzap_ent_t *mze = avl_find(avl, &mze_tofind, &idx);
- mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
+ for (mzap_ent_t *mze = zfs_btree_find(tree, &mze_tofind, &idx);
+ mze && mze->mze_hash == hash;
+ mze = zfs_btree_next(tree, &idx, &idx)) {
if (mze->mze_cd != cd)
break;
cd++;
@@ -364,16 +380,18 @@ mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash)
{
zap_t *zap = zn->zn_zap;
mzap_ent_t mze_tofind;
- mzap_ent_t *mze;
- avl_index_t idx;
- avl_tree_t *avl = &zap->zap_m.zap_avl;
+ zfs_btree_index_t idx;
+ zfs_btree_t *tree = &zap->zap_m.zap_tree;
uint32_t mzap_ents = 0;
+ ASSERT0(hash & 0xffffffff);
+ hash >>= 32;
mze_tofind.mze_hash = hash;
mze_tofind.mze_cd = 0;
- for (mze = avl_find(avl, &mze_tofind, &idx);
- mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
+ for (mzap_ent_t *mze = zfs_btree_find(tree, &mze_tofind, &idx);
+ mze && mze->mze_hash == hash;
+ mze = zfs_btree_next(tree, &idx, &idx)) {
mzap_ents++;
}
@@ -384,24 +402,10 @@ mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash)
}
static void
-mze_remove(zap_t *zap, mzap_ent_t *mze)
-{
- ASSERT(zap->zap_ismicro);
- ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-
- avl_remove(&zap->zap_m.zap_avl, mze);
- kmem_free(mze, sizeof (mzap_ent_t));
-}
-
-static void
mze_destroy(zap_t *zap)
{
- mzap_ent_t *mze;
- void *avlcookie = NULL;
-
- while ((mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie)))
- kmem_free(mze, sizeof (mzap_ent_t));
- avl_destroy(&zap->zap_m.zap_avl);
+ zfs_btree_clear(&zap->zap_m.zap_tree);
+ zfs_btree_destroy(&zap->zap_m.zap_tree);
}
static zap_t *
@@ -448,21 +452,26 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
zap->zap_salt = zap_m_phys(zap)->mz_salt;
zap->zap_normflags = zap_m_phys(zap)->mz_normflags;
zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
- avl_create(&zap->zap_m.zap_avl, mze_compare,
- sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
- for (int i = 0; i < zap->zap_m.zap_num_chunks; i++) {
+ /*
+ * Reduce B-tree leaf from 4KB to 512 bytes to reduce memmove()
+ * overhead on massive inserts below. It still allows to store
+ * 62 entries before we have to add 2KB B-tree core node.
+ */
+ zfs_btree_create_custom(&zap->zap_m.zap_tree, mze_compare,
+ sizeof (mzap_ent_t), 512);
+
+ zap_name_t *zn = zap_name_alloc(zap);
+ for (uint16_t i = 0; i < zap->zap_m.zap_num_chunks; i++) {
mzap_ent_phys_t *mze =
&zap_m_phys(zap)->mz_chunk[i];
if (mze->mze_name[0]) {
- zap_name_t *zn;
-
zap->zap_m.zap_num_entries++;
- zn = zap_name_alloc(zap, mze->mze_name, 0);
+ zap_name_init_str(zn, mze->mze_name, 0);
mze_insert(zap, i, zn->zn_hash);
- zap_name_free(zn);
}
}
+ zap_name_free(zn);
} else {
zap->zap_salt = zap_f_phys(zap)->zap_salt;
zap->zap_normflags = zap_f_phys(zap)->zap_normflags;
@@ -657,24 +666,25 @@ mzap_upgrade(zap_t **zapp, const void *tag, dmu_tx_t *tx, zap_flags_t flags)
dprintf("upgrading obj=%llu with %u chunks\n",
(u_longlong_t)zap->zap_object, nchunks);
- /* XXX destroy the avl later, so we can use the stored hash value */
+ /* XXX destroy the tree later, so we can use the stored hash value */
mze_destroy(zap);
fzap_upgrade(zap, tx, flags);
+ zap_name_t *zn = zap_name_alloc(zap);
for (int i = 0; i < nchunks; i++) {
mzap_ent_phys_t *mze = &mzp->mz_chunk[i];
if (mze->mze_name[0] == 0)
continue;
dprintf("adding %s=%llu\n",
mze->mze_name, (u_longlong_t)mze->mze_value);
- zap_name_t *zn = zap_name_alloc(zap, mze->mze_name, 0);
+ zap_name_init_str(zn, mze->mze_name, 0);
/* If we fail here, we would end up losing entries */
VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
tag, tx));
zap = zn->zn_zap; /* fzap_add_cd() may change zap */
- zap_name_free(zn);
}
+ zap_name_free(zn);
vmem_free(mzp, sz);
*zapp = zap;
return (0);
@@ -916,22 +926,23 @@ zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
* See also the comment above zap_entry_normalization_conflict().
*/
static boolean_t
-mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze)
+mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze,
+ zfs_btree_index_t *idx)
{
- int direction = AVL_BEFORE;
boolean_t allocdzn = B_FALSE;
+ mzap_ent_t *other;
+ zfs_btree_index_t oidx;
if (zap->zap_normflags == 0)
return (B_FALSE);
-again:
- for (mzap_ent_t *other = avl_walk(&zap->zap_m.zap_avl, mze, direction);
+ for (other = zfs_btree_prev(&zap->zap_m.zap_tree, idx, &oidx);
other && other->mze_hash == mze->mze_hash;
- other = avl_walk(&zap->zap_m.zap_avl, other, direction)) {
+ other = zfs_btree_prev(&zap->zap_m.zap_tree, &oidx, &oidx)) {
if (zn == NULL) {
- zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name,
- MT_NORMALIZE);
+ zn = zap_name_alloc_str(zap,
+ MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE);
allocdzn = B_TRUE;
}
if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
@@ -941,9 +952,20 @@ again:
}
}
- if (direction == AVL_BEFORE) {
- direction = AVL_AFTER;
- goto again;
+ for (other = zfs_btree_next(&zap->zap_m.zap_tree, idx, &oidx);
+ other && other->mze_hash == mze->mze_hash;
+ other = zfs_btree_next(&zap->zap_m.zap_tree, &oidx, &oidx)) {
+
+ if (zn == NULL) {
+ zn = zap_name_alloc_str(zap,
+ MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE);
+ allocdzn = B_TRUE;
+ }
+ if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
+ if (allocdzn)
+ zap_name_free(zn);
+ return (B_TRUE);
+ }
}
if (allocdzn)
@@ -971,7 +993,7 @@ zap_lookup_impl(zap_t *zap, const char *name,
{
int err = 0;
- zap_name_t *zn = zap_name_alloc(zap, name, mt);
+ zap_name_t *zn = zap_name_alloc_str(zap, name, mt);
if (zn == NULL)
return (SET_ERROR(ENOTSUP));
@@ -979,7 +1001,8 @@ zap_lookup_impl(zap_t *zap, const char *name,
err = fzap_lookup(zn, integer_size, num_integers, buf,
realname, rn_len, ncp);
} else {
- mzap_ent_t *mze = mze_find(zn);
+ zfs_btree_index_t idx;
+ mzap_ent_t *mze = mze_find(zn, &idx);
if (mze == NULL) {
err = SET_ERROR(ENOENT);
} else {
@@ -990,11 +1013,13 @@ zap_lookup_impl(zap_t *zap, const char *name,
} else {
*(uint64_t *)buf =
MZE_PHYS(zap, mze)->mze_value;
- (void) strlcpy(realname,
- MZE_PHYS(zap, mze)->mze_name, rn_len);
+ if (realname != NULL)
+ (void) strlcpy(realname,
+ MZE_PHYS(zap, mze)->mze_name,
+ rn_len);
if (ncp) {
*ncp = mzap_normalization_conflict(zap,
- zn, mze);
+ zn, mze, &idx);
}
}
}
@@ -1031,7 +1056,7 @@ zap_prefetch(objset_t *os, uint64_t zapobj, const char *name)
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
if (err)
return (err);
- zn = zap_name_alloc(zap, name, 0);
+ zn = zap_name_alloc_str(zap, name, 0);
if (zn == NULL) {
zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
@@ -1134,7 +1159,7 @@ zap_length(objset_t *os, uint64_t zapobj, const char *name,
zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
if (err != 0)
return (err);
- zap_name_t *zn = zap_name_alloc(zap, name, 0);
+ zap_name_t *zn = zap_name_alloc_str(zap, name, 0);
if (zn == NULL) {
zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
@@ -1142,7 +1167,8 @@ zap_length(objset_t *os, uint64_t zapobj, const char *name,
if (!zap->zap_ismicro) {
err = fzap_length(zn, integer_size, num_integers);
} else {
- mzap_ent_t *mze = mze_find(zn);
+ zfs_btree_index_t idx;
+ mzap_ent_t *mze = mze_find(zn, &idx);
if (mze == NULL) {
err = SET_ERROR(ENOENT);
} else {
@@ -1182,7 +1208,7 @@ static void
mzap_addent(zap_name_t *zn, uint64_t value)
{
zap_t *zap = zn->zn_zap;
- int start = zap->zap_m.zap_alloc_next;
+ uint16_t start = zap->zap_m.zap_alloc_next;
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
@@ -1198,7 +1224,7 @@ mzap_addent(zap_name_t *zn, uint64_t value)
ASSERT(cd < zap_maxcd(zap));
again:
- for (int i = start; i < zap->zap_m.zap_num_chunks; i++) {
+ for (uint16_t i = start; i < zap->zap_m.zap_num_chunks; i++) {
mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i];
if (mze->mze_name[0] == 0) {
mze->mze_value = value;
@@ -1229,7 +1255,7 @@ zap_add_impl(zap_t *zap, const char *key,
const uint64_t *intval = val;
int err = 0;
- zap_name_t *zn = zap_name_alloc(zap, key, 0);
+ zap_name_t *zn = zap_name_alloc_str(zap, key, 0);
if (zn == NULL) {
zap_unlockdir(zap, tag);
return (SET_ERROR(ENOTSUP));
@@ -1247,7 +1273,8 @@ zap_add_impl(zap_t *zap, const char *key,
}
zap = zn->zn_zap; /* fzap_add() may change zap */
} else {
- if (mze_find(zn) != NULL) {
+ zfs_btree_index_t idx;
+ if (mze_find(zn, &idx) != NULL) {
err = SET_ERROR(EEXIST);
} else {
mzap_addent(zn, *intval);
@@ -1327,7 +1354,7 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name,
zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
if (err != 0)
return (err);
- zap_name_t *zn = zap_name_alloc(zap, name, 0);
+ zap_name_t *zn = zap_name_alloc_str(zap, name, 0);
if (zn == NULL) {
zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
@@ -1348,7 +1375,8 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name,
}
zap = zn->zn_zap; /* fzap_update() may change zap */
} else {
- mzap_ent_t *mze = mze_find(zn);
+ zfs_btree_index_t idx;
+ mzap_ent_t *mze = mze_find(zn, &idx);
if (mze != NULL) {
MZE_PHYS(zap, mze)->mze_value = *intval;
} else {
@@ -1398,20 +1426,20 @@ zap_remove_impl(zap_t *zap, const char *name,
{
int err = 0;
- zap_name_t *zn = zap_name_alloc(zap, name, mt);
+ zap_name_t *zn = zap_name_alloc_str(zap, name, mt);
if (zn == NULL)
return (SET_ERROR(ENOTSUP));
if (!zap->zap_ismicro) {
err = fzap_remove(zn, tx);
} else {
- mzap_ent_t *mze = mze_find(zn);
+ zfs_btree_index_t idx;
+ mzap_ent_t *mze = mze_find(zn, &idx);
if (mze == NULL) {
err = SET_ERROR(ENOENT);
} else {
zap->zap_m.zap_num_entries--;
- memset(&zap_m_phys(zap)->mz_chunk[mze->mze_chunkid], 0,
- sizeof (mzap_ent_phys_t));
- mze_remove(zap, mze);
+ memset(MZE_PHYS(zap, mze), 0, sizeof (mzap_ent_phys_t));
+ zfs_btree_remove_idx(&zap->zap_m.zap_tree, &idx);
}
}
zap_name_free(zn);
@@ -1582,29 +1610,30 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
if (!zc->zc_zap->zap_ismicro) {
err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
} else {
- avl_index_t idx;
+ zfs_btree_index_t idx;
mzap_ent_t mze_tofind;
- mze_tofind.mze_hash = zc->zc_hash;
+ mze_tofind.mze_hash = zc->zc_hash >> 32;
mze_tofind.mze_cd = zc->zc_cd;
- mzap_ent_t *mze =
- avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx);
+ mzap_ent_t *mze = zfs_btree_find(&zc->zc_zap->zap_m.zap_tree,
+ &mze_tofind, &idx);
if (mze == NULL) {
- mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl,
- idx, AVL_AFTER);
+ mze = zfs_btree_next(&zc->zc_zap->zap_m.zap_tree,
+ &idx, &idx);
}
if (mze) {
mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze);
ASSERT3U(mze->mze_cd, ==, mzep->mze_cd);
za->za_normalization_conflict =
- mzap_normalization_conflict(zc->zc_zap, NULL, mze);
+ mzap_normalization_conflict(zc->zc_zap, NULL,
+ mze, &idx);
za->za_integer_length = 8;
za->za_num_integers = 1;
za->za_first_integer = mzep->mze_value;
(void) strlcpy(za->za_name, mzep->mze_name,
sizeof (za->za_name));
- zc->zc_hash = mze->mze_hash;
+ zc->zc_hash = (uint64_t)mze->mze_hash << 32;
zc->zc_cd = mze->mze_cd;
err = 0;
} else {
diff --git a/sys/contrib/openzfs/module/zfs/zcp.c b/sys/contrib/openzfs/module/zfs/zcp.c
index fe90242ca40d..5ebf1bbbc8cc 100644
--- a/sys/contrib/openzfs/module/zfs/zcp.c
+++ b/sys/contrib/openzfs/module/zfs/zcp.c
@@ -109,8 +109,8 @@
#define ZCP_NVLIST_MAX_DEPTH 20
static const uint64_t zfs_lua_check_instrlimit_interval = 100;
-unsigned long zfs_lua_max_instrlimit = ZCP_MAX_INSTRLIMIT;
-unsigned long zfs_lua_max_memlimit = ZCP_MAX_MEMLIMIT;
+uint64_t zfs_lua_max_instrlimit = ZCP_MAX_INSTRLIMIT;
+uint64_t zfs_lua_max_memlimit = ZCP_MAX_MEMLIMIT;
/*
* Forward declarations for mutually recursive functions
@@ -277,9 +277,9 @@ zcp_table_to_nvlist(lua_State *state, int index, int depth)
}
break;
case LUA_TNUMBER:
- VERIFY3U(sizeof (buf), >,
- snprintf(buf, sizeof (buf), "%lld",
- (longlong_t)lua_tonumber(state, -2)));
+ (void) snprintf(buf, sizeof (buf), "%lld",
+ (longlong_t)lua_tonumber(state, -2));
+
key = buf;
if (saw_str_could_collide) {
key_could_collide = B_TRUE;
@@ -1443,8 +1443,8 @@ zcp_parse_args(lua_State *state, const char *fname, const zcp_arg_t *pargs,
}
}
-ZFS_MODULE_PARAM(zfs_lua, zfs_lua_, max_instrlimit, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_lua, zfs_lua_, max_instrlimit, U64, ZMOD_RW,
"Max instruction limit that can be specified for a channel program");
-ZFS_MODULE_PARAM(zfs_lua, zfs_lua_, max_memlimit, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_lua, zfs_lua_, max_memlimit, U64, ZMOD_RW,
"Max memory limit that can be specified for a channel program");
diff --git a/sys/contrib/openzfs/module/zfs/zcp_get.c b/sys/contrib/openzfs/module/zfs/zcp_get.c
index cd17374eb422..f28266b8095f 100644
--- a/sys/contrib/openzfs/module/zfs/zcp_get.c
+++ b/sys/contrib/openzfs/module/zfs/zcp_get.c
@@ -467,7 +467,8 @@ get_zap_prop(lua_State *state, dsl_dataset_t *ds, zfs_prop_t zfs_prop)
} else {
error = dsl_prop_get_ds(ds, prop_name, sizeof (numval),
1, &numval, setpoint);
-
+ if (error != 0)
+ goto out;
#ifdef _KERNEL
/* Fill in temporary value for prop, if applicable */
(void) zfs_get_temporary_prop(ds, zfs_prop, &numval, setpoint);
@@ -489,6 +490,7 @@ get_zap_prop(lua_State *state, dsl_dataset_t *ds, zfs_prop_t zfs_prop)
(void) lua_pushnumber(state, numval);
}
}
+out:
kmem_free(strval, ZAP_MAXVALUELEN);
if (error == 0)
get_prop_src(state, setpoint, zfs_prop);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_chksum.c b/sys/contrib/openzfs/module/zfs/zfs_chksum.c
index 74b4cb8d2e63..4a9a36d87e66 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_chksum.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_chksum.c
@@ -81,15 +81,15 @@ chksum_kstat_headers(char *buf, size_t size)
{
ssize_t off = 0;
- off += snprintf(buf + off, size, "%-23s", "implementation");
- off += snprintf(buf + off, size - off, "%8s", "1k");
- off += snprintf(buf + off, size - off, "%8s", "4k");
- off += snprintf(buf + off, size - off, "%8s", "16k");
- off += snprintf(buf + off, size - off, "%8s", "64k");
- off += snprintf(buf + off, size - off, "%8s", "256k");
- off += snprintf(buf + off, size - off, "%8s", "1m");
- off += snprintf(buf + off, size - off, "%8s", "4m");
- (void) snprintf(buf + off, size - off, "%8s\n", "16m");
+ off += kmem_scnprintf(buf + off, size, "%-23s", "implementation");
+ off += kmem_scnprintf(buf + off, size - off, "%8s", "1k");
+ off += kmem_scnprintf(buf + off, size - off, "%8s", "4k");
+ off += kmem_scnprintf(buf + off, size - off, "%8s", "16k");
+ off += kmem_scnprintf(buf + off, size - off, "%8s", "64k");
+ off += kmem_scnprintf(buf + off, size - off, "%8s", "256k");
+ off += kmem_scnprintf(buf + off, size - off, "%8s", "1m");
+ off += kmem_scnprintf(buf + off, size - off, "%8s", "4m");
+ (void) kmem_scnprintf(buf + off, size - off, "%8s\n", "16m");
return (0);
}
@@ -102,23 +102,23 @@ chksum_kstat_data(char *buf, size_t size, void *data)
char b[24];
cs = (chksum_stat_t *)data;
- snprintf(b, 23, "%s-%s", cs->name, cs->impl);
- off += snprintf(buf + off, size - off, "%-23s", b);
- off += snprintf(buf + off, size - off, "%8llu",
+ kmem_scnprintf(b, 23, "%s-%s", cs->name, cs->impl);
+ off += kmem_scnprintf(buf + off, size - off, "%-23s", b);
+ off += kmem_scnprintf(buf + off, size - off, "%8llu",
(u_longlong_t)cs->bs1k);
- off += snprintf(buf + off, size - off, "%8llu",
+ off += kmem_scnprintf(buf + off, size - off, "%8llu",
(u_longlong_t)cs->bs4k);
- off += snprintf(buf + off, size - off, "%8llu",
+ off += kmem_scnprintf(buf + off, size - off, "%8llu",
(u_longlong_t)cs->bs16k);
- off += snprintf(buf + off, size - off, "%8llu",
+ off += kmem_scnprintf(buf + off, size - off, "%8llu",
(u_longlong_t)cs->bs64k);
- off += snprintf(buf + off, size - off, "%8llu",
+ off += kmem_scnprintf(buf + off, size - off, "%8llu",
(u_longlong_t)cs->bs256k);
- off += snprintf(buf + off, size - off, "%8llu",
+ off += kmem_scnprintf(buf + off, size - off, "%8llu",
(u_longlong_t)cs->bs1m);
- off += snprintf(buf + off, size - off, "%8llu",
+ off += kmem_scnprintf(buf + off, size - off, "%8llu",
(u_longlong_t)cs->bs4m);
- (void) snprintf(buf + off, size - off, "%8llu\n",
+ (void) kmem_scnprintf(buf + off, size - off, "%8llu\n",
(u_longlong_t)cs->bs16m);
return (0);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_fm.c b/sys/contrib/openzfs/module/zfs/zfs_fm.c
index 06aa1214ace8..fd0dc7d69bf8 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_fm.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_fm.c
@@ -253,7 +253,6 @@ void
zfs_ereport_clear(spa_t *spa, vdev_t *vd)
{
uint64_t vdev_guid, pool_guid;
- int cnt = 0;
ASSERT(vd != NULL || spa != NULL);
if (vd == NULL) {
@@ -277,7 +276,6 @@ zfs_ereport_clear(spa_t *spa, vdev_t *vd)
avl_remove(&recent_events_tree, entry);
list_remove(&recent_events_list, entry);
kmem_free(entry, sizeof (*entry));
- cnt++;
}
}
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index c3266c09306b..a5168b937588 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -229,14 +229,14 @@ static zfsdev_state_t *zfsdev_state_list;
* for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
* Defaults to 0=auto which is handled by platform code.
*/
-unsigned long zfs_max_nvlist_src_size = 0;
+uint64_t zfs_max_nvlist_src_size = 0;
/*
* When logging the output nvlist of an ioctl in the on-disk history, limit
* the logged size to this many bytes. This must be less than DMU_MAX_ACCESS.
* This applies primarily to zfs_ioc_channel_program().
*/
-static unsigned long zfs_history_output_max = 1024 * 1024;
+static uint64_t zfs_history_output_max = 1024 * 1024;
uint_t zfs_fsyncer_key;
uint_t zfs_allow_log_key;
@@ -7884,8 +7884,8 @@ zfs_kmod_fini(void)
tsd_destroy(&zfs_allow_log_key);
}
-ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, U64, ZMOD_RW,
"Maximum size in bytes allowed for src nvlist passed with ZFS ioctls");
-ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, U64, ZMOD_RW,
"Maximum size in bytes of ZFS ioctl output that will be logged");
diff --git a/sys/contrib/openzfs/module/zfs/zfs_log.c b/sys/contrib/openzfs/module/zfs/zfs_log.c
index c92044337bce..77bf9140d52d 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_log.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_log.c
@@ -494,6 +494,29 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
zil_itx_assign(zilog, itx, tx);
}
+static void
+do_zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
+ const char *sname, znode_t *tdzp, const char *dname, znode_t *szp)
+{
+ itx_t *itx;
+ lr_rename_t *lr;
+ size_t snamesize = strlen(sname) + 1;
+ size_t dnamesize = strlen(dname) + 1;
+
+ if (zil_replaying(zilog, tx))
+ return;
+
+ itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
+ lr = (lr_rename_t *)&itx->itx_lr;
+ lr->lr_sdoid = sdzp->z_id;
+ lr->lr_tdoid = tdzp->z_id;
+ memcpy((char *)(lr + 1), sname, snamesize);
+ memcpy((char *)(lr + 1) + snamesize, dname, dnamesize);
+ itx->itx_oid = szp->z_id;
+
+ zil_itx_assign(zilog, itx, tx);
+}
+
/*
* Handles TX_RENAME transactions.
*/
@@ -501,18 +524,71 @@ void
zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
const char *sname, znode_t *tdzp, const char *dname, znode_t *szp)
{
+ txtype |= TX_RENAME;
+ do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp);
+}
+
+/*
+ * Handles TX_RENAME_EXCHANGE transactions.
+ */
+void
+zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
+ znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname,
+ znode_t *szp)
+{
+ txtype |= TX_RENAME_EXCHANGE;
+ do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp);
+}
+
+/*
+ * Handles TX_RENAME_WHITEOUT transactions.
+ *
+ * Unfortunately we cannot reuse do_zfs_log_rename because we we need to call
+ * zfs_mknode() on replay which requires stashing bits as with TX_CREATE.
+ */
+void
+zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
+ znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname,
+ znode_t *szp, znode_t *wzp)
+{
itx_t *itx;
- lr_rename_t *lr;
+ lr_rename_whiteout_t *lr;
size_t snamesize = strlen(sname) + 1;
size_t dnamesize = strlen(dname) + 1;
if (zil_replaying(zilog, tx))
return;
+ txtype |= TX_RENAME_WHITEOUT;
itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
- lr = (lr_rename_t *)&itx->itx_lr;
- lr->lr_sdoid = sdzp->z_id;
- lr->lr_tdoid = tdzp->z_id;
+ lr = (lr_rename_whiteout_t *)&itx->itx_lr;
+ lr->lr_rename.lr_sdoid = sdzp->z_id;
+ lr->lr_rename.lr_tdoid = tdzp->z_id;
+
+ /*
+ * RENAME_WHITEOUT will create an entry at the source znode, so we need
+ * to store the same data that the equivalent call to zfs_log_create()
+ * would.
+ */
+ lr->lr_wfoid = wzp->z_id;
+ LR_FOID_SET_SLOTS(lr->lr_wfoid, wzp->z_dnodesize >> DNODE_SHIFT);
+ (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(wzp)), &lr->lr_wgen,
+ sizeof (uint64_t));
+ (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(wzp)),
+ lr->lr_wcrtime, sizeof (uint64_t) * 2);
+ lr->lr_wmode = wzp->z_mode;
+ lr->lr_wuid = (uint64_t)KUID_TO_SUID(ZTOUID(wzp));
+ lr->lr_wgid = (uint64_t)KGID_TO_SGID(ZTOGID(wzp));
+
+ /*
+ * This rdev will always be makdevice(0, 0) but because the ZIL log and
+ * replay code needs to be platform independent (and there is no
+ * platform independent makdev()) we need to copy the one created
+ * during the rename operation.
+ */
+ (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(wzp)), &lr->lr_wrdev,
+ sizeof (lr->lr_wrdev));
+
memcpy((char *)(lr + 1), sname, snamesize);
memcpy((char *)(lr + 1) + snamesize, dname, dnamesize);
itx->itx_oid = szp->z_id;
@@ -525,7 +601,7 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
* called as soon as the write is on stable storage (be it via a DMU sync or a
* ZIL commit).
*/
-static long zfs_immediate_write_sz = 32768;
+static int64_t zfs_immediate_write_sz = 32768;
void
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
@@ -815,5 +891,5 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
zil_itx_assign(zilog, itx, tx);
}
-ZFS_MODULE_PARAM(zfs, zfs_, immediate_write_sz, LONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, immediate_write_sz, S64, ZMOD_RW,
"Largest data block to write to zil");
diff --git a/sys/contrib/openzfs/module/zfs/zfs_onexit.c b/sys/contrib/openzfs/module/zfs/zfs_onexit.c
index dfcdeeb5b46f..63acf7ab2e4d 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_onexit.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_onexit.c
@@ -151,7 +151,7 @@ zfs_onexit_minor_to_state(minor_t minor, zfs_onexit_t **zo)
*/
int
zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
- uint64_t *action_handle)
+ uintptr_t *action_handle)
{
zfs_onexit_t *zo;
zfs_onexit_action_node_t *ap;
@@ -170,7 +170,7 @@ zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
list_insert_tail(&zo->zo_actions, ap);
mutex_exit(&zo->zo_lock);
if (action_handle)
- *action_handle = (uint64_t)(uintptr_t)ap;
+ *action_handle = (uintptr_t)ap;
return (0);
}
diff --git a/sys/contrib/openzfs/module/zfs/zfs_replay.c b/sys/contrib/openzfs/module/zfs/zfs_replay.c
index 379e1d1a7b57..0293e46d5858 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_replay.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_replay.c
@@ -386,8 +386,13 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap)
lr->lr_uid, lr->lr_gid);
}
+#if defined(__linux__)
error = zfs_create(dzp, name, &xva.xva_vattr,
- 0, 0, &zp, kcred, vflg, &vsec);
+ 0, 0, &zp, kcred, vflg, &vsec, kcred->user_ns);
+#else
+ error = zfs_create(dzp, name, &xva.xva_vattr,
+ 0, 0, &zp, kcred, vflg, &vsec, NULL);
+#endif
break;
case TX_MKDIR_ACL:
aclstart = (caddr_t)(lracl + 1);
@@ -416,8 +421,13 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap)
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
lr->lr_uid, lr->lr_gid);
}
+#if defined(__linux__)
+ error = zfs_mkdir(dzp, name, &xva.xva_vattr,
+ &zp, kcred, vflg, &vsec, kcred->user_ns);
+#else
error = zfs_mkdir(dzp, name, &xva.xva_vattr,
- &zp, kcred, vflg, &vsec);
+ &zp, kcred, vflg, &vsec, NULL);
+#endif
break;
default:
error = SET_ERROR(ENOTSUP);
@@ -527,8 +537,13 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap)
if (name == NULL)
name = (char *)start;
+#if defined(__linux__)
+ error = zfs_create(dzp, name, &xva.xva_vattr,
+ 0, 0, &zp, kcred, vflg, NULL, kcred->user_ns);
+#else
error = zfs_create(dzp, name, &xva.xva_vattr,
- 0, 0, &zp, kcred, vflg, NULL);
+ 0, 0, &zp, kcred, vflg, NULL, NULL);
+#endif
break;
case TX_MKDIR_ATTR:
lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
@@ -545,8 +560,14 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap)
if (name == NULL)
name = (char *)(lr + 1);
+#if defined(__linux__)
+ error = zfs_mkdir(dzp, name, &xva.xva_vattr,
+ &zp, kcred, vflg, NULL, kcred->user_ns);
+#else
error = zfs_mkdir(dzp, name, &xva.xva_vattr,
- &zp, kcred, vflg, NULL);
+ &zp, kcred, vflg, NULL, NULL);
+#endif
+
break;
case TX_MKXATTR:
error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &zp, kcred);
@@ -554,8 +575,13 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap)
case TX_SYMLINK:
name = (char *)(lr + 1);
link = name + strlen(name) + 1;
+#if defined(__linux__)
+ error = zfs_symlink(dzp, name, &xva.xva_vattr,
+ link, &zp, kcred, vflg, kcred->user_ns);
+#else
error = zfs_symlink(dzp, name, &xva.xva_vattr,
- link, &zp, kcred, vflg);
+ link, &zp, kcred, vflg, NULL);
+#endif
break;
default:
error = SET_ERROR(ENOTSUP);
@@ -643,18 +669,21 @@ zfs_replay_link(void *arg1, void *arg2, boolean_t byteswap)
}
static int
-zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap)
+do_zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, char *sname,
+ char *tname, uint64_t rflags, vattr_t *wo_vap)
{
- zfsvfs_t *zfsvfs = arg1;
- lr_rename_t *lr = arg2;
- char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
- char *tname = sname + strlen(sname) + 1;
znode_t *sdzp, *tdzp;
- int error;
- int vflg = 0;
+ int error, vflg = 0;
- if (byteswap)
- byteswap_uint64_array(lr, sizeof (*lr));
+ /* Only Linux currently supports RENAME_* flags. */
+#ifdef __linux__
+ VERIFY0(rflags & ~(RENAME_EXCHANGE | RENAME_WHITEOUT));
+
+ /* wo_vap must be non-NULL iff. we're doing RENAME_WHITEOUT */
+ VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL);
+#else
+ VERIFY0(rflags);
+#endif
if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0)
return (error);
@@ -667,7 +696,13 @@ zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap)
if (lr->lr_common.lrc_txtype & TX_CI)
vflg |= FIGNORECASE;
- error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg);
+#if defined(__linux__)
+ error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, rflags,
+ wo_vap, kcred->user_ns);
+#else
+ error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, rflags,
+ wo_vap, NULL);
+#endif
zrele(tdzp);
zrele(sdzp);
@@ -675,6 +710,86 @@ zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap)
}
static int
+zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap)
+{
+ zfsvfs_t *zfsvfs = arg1;
+ lr_rename_t *lr = arg2;
+ char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
+ char *tname = sname + strlen(sname) + 1;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, 0, NULL));
+}
+
+static int
+zfs_replay_rename_exchange(void *arg1, void *arg2, boolean_t byteswap)
+{
+#ifdef __linux__
+ zfsvfs_t *zfsvfs = arg1;
+ lr_rename_t *lr = arg2;
+ char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
+ char *tname = sname + strlen(sname) + 1;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, RENAME_EXCHANGE,
+ NULL));
+#else
+ return (SET_ERROR(ENOTSUP));
+#endif
+}
+
+static int
+zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap)
+{
+#ifdef __linux__
+ zfsvfs_t *zfsvfs = arg1;
+ lr_rename_whiteout_t *lr = arg2;
+ int error;
+ /* sname and tname follow lr_rename_whiteout_t */
+ char *sname = (char *)(lr + 1);
+ char *tname = sname + strlen(sname) + 1;
+ /* For the whiteout file. */
+ xvattr_t xva;
+ uint64_t objid;
+ uint64_t dnodesize;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ objid = LR_FOID_GET_OBJ(lr->lr_wfoid);
+ dnodesize = LR_FOID_GET_SLOTS(lr->lr_wfoid) << DNODE_SHIFT;
+
+ xva_init(&xva);
+ zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
+ lr->lr_wmode, lr->lr_wuid, lr->lr_wgid, lr->lr_wrdev, objid);
+
+ /*
+ * As with TX_CREATE, RENAME_WHITEOUT ends up in zfs_mknode(), which
+ * assigns the object's creation time, generation number, and dnode
+ * slot count. The generic zfs_rename() has no concept of these
+ * attributes, so we smuggle the values inside the vattr's otherwise
+ * unused va_ctime, va_nblocks, and va_fsid fields.
+ */
+ ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_wcrtime);
+ xva.xva_vattr.va_nblocks = lr->lr_wgen;
+ xva.xva_vattr.va_fsid = dnodesize;
+
+ error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT);
+ if (error)
+ return (error);
+
+ return (do_zfs_replay_rename(zfsvfs, &lr->lr_rename, sname, tname,
+ RENAME_WHITEOUT, &xva.xva_vattr));
+#else
+ return (SET_ERROR(ENOTSUP));
+#endif
+}
+
+static int
zfs_replay_write(void *arg1, void *arg2, boolean_t byteswap)
{
zfsvfs_t *zfsvfs = arg1;
@@ -860,7 +975,11 @@ zfs_replay_setattr(void *arg1, void *arg2, boolean_t byteswap)
zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start,
lr->lr_uid, lr->lr_gid);
- error = zfs_setattr(zp, vap, 0, kcred);
+#if defined(__linux__)
+ error = zfs_setattr(zp, vap, 0, kcred, kcred->user_ns);
+#else
+ error = zfs_setattr(zp, vap, 0, kcred, NULL);
+#endif
zfs_fuid_info_free(zfsvfs->z_fuid_replay);
zfsvfs->z_fuid_replay = NULL;
@@ -1069,4 +1188,6 @@ zil_replay_func_t *const zfs_replay_vector[TX_MAX_TYPE] = {
zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */
zfs_replay_write2, /* TX_WRITE2 */
zfs_replay_setsaxattr, /* TX_SETSAXATTR */
+ zfs_replay_rename_exchange, /* TX_RENAME_EXCHANGE */
+ zfs_replay_rename_whiteout, /* TX_RENAME_WHITEOUT */
};
diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
index 57f03f116273..45ecb0773260 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
@@ -64,7 +64,7 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
int error = 0;
zfsvfs_t *zfsvfs = ZTOZSB(zp);
- (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
+ (void) tsd_set(zfs_fsyncer_key, (void *)(uintptr_t)zfs_fsync_sync_cnt);
if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
@@ -168,15 +168,25 @@ zfs_access(znode_t *zp, int mode, int flag, cred_t *cr)
return (error);
if (flag & V_ACE_MASK)
- error = zfs_zaccess(zp, mode, flag, B_FALSE, cr);
+#if defined(__linux__)
+ error = zfs_zaccess(zp, mode, flag, B_FALSE, cr,
+ kcred->user_ns);
+#else
+ error = zfs_zaccess(zp, mode, flag, B_FALSE, cr,
+ NULL);
+#endif
else
- error = zfs_zaccess_rwx(zp, mode, flag, cr);
+#if defined(__linux__)
+ error = zfs_zaccess_rwx(zp, mode, flag, cr, kcred->user_ns);
+#else
+ error = zfs_zaccess_rwx(zp, mode, flag, cr, NULL);
+#endif
zfs_exit(zfsvfs, FTAG);
return (error);
}
-static unsigned long zfs_vnops_read_chunk_size = 1024 * 1024; /* Tunable */
+static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024; /* Tunable */
/*
* Read bytes from specified file into supplied buffer.
@@ -991,5 +1001,5 @@ EXPORT_SYMBOL(zfs_write);
EXPORT_SYMBOL(zfs_getsecattr);
EXPORT_SYMBOL(zfs_setsecattr);
-ZFS_MODULE_PARAM(zfs_vnops, zfs_vnops_, read_chunk_size, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_vnops, zfs_vnops_, read_chunk_size, U64, ZMOD_RW,
"Bytes to read per chunk");
diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c
index dc5b8018e16e..02e6f4b83b9c 100644
--- a/sys/contrib/openzfs/module/zfs/zil.c
+++ b/sys/contrib/openzfs/module/zfs/zil.c
@@ -132,7 +132,7 @@ static int zil_nocacheflush = 0;
* Any writes above that will be executed with lower (asynchronous) priority
* to limit potential SLOG device abuse by single active ZIL writer.
*/
-static unsigned long zil_slog_bulk = 768 * 1024;
+static uint64_t zil_slog_bulk = 768 * 1024;
static kmem_cache_t *zil_lwb_cache;
static kmem_cache_t *zil_zcw_cache;
@@ -237,7 +237,7 @@ static int
zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
blkptr_t *nbp, void *dst, char **end)
{
- enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
+ zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
arc_flags_t aflags = ARC_FLAG_WAIT;
arc_buf_t *abuf = NULL;
zbookmark_phys_t zb;
@@ -315,7 +315,7 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
static int
zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
{
- enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
+ zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
const blkptr_t *bp = &lr->lr_blkptr;
arc_flags_t aflags = ARC_FLAG_WAIT;
arc_buf_t *abuf = NULL;
@@ -339,6 +339,7 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
if (wbuf == NULL)
zio_flags |= ZIO_FLAG_RAW;
+ ASSERT3U(BP_GET_LSIZE(bp), !=, 0);
SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid,
ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));
@@ -479,8 +480,18 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
error = zil_read_log_block(zilog, decrypt, &blk, &next_blk,
lrbuf, &end);
- if (error != 0)
+ if (error != 0) {
+ if (claimed) {
+ char name[ZFS_MAX_DATASET_NAME_LEN];
+
+ dmu_objset_name(zilog->zl_os, name);
+
+ cmn_err(CE_WARN, "ZFS read log block error %d, "
+ "dataset %s, seq 0x%llx\n", error, name,
+ (u_longlong_t)blk_seq);
+ }
break;
+ }
for (lrp = lrbuf; lrp < end; lrp += reclen) {
lr_t *lr = (lr_t *)lrp;
@@ -504,10 +515,6 @@ done:
zilog->zl_parse_blk_count = blk_count;
zilog->zl_parse_lr_count = lr_count;
- ASSERT(!claimed || !(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID) ||
- (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq) ||
- (decrypt && error == EIO));
-
zil_bp_tree_fini(zilog);
zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE);
@@ -758,11 +765,9 @@ zil_commit_activate_saxattr_feature(zilog_t *zilog)
uint64_t txg = 0;
dmu_tx_t *tx = NULL;
- if (spa_feature_is_enabled(zilog->zl_spa,
- SPA_FEATURE_ZILSAXATTR) &&
+ if (spa_feature_is_enabled(zilog->zl_spa, SPA_FEATURE_ZILSAXATTR) &&
dmu_objset_type(zilog->zl_os) != DMU_OST_ZVOL &&
- !dsl_dataset_feature_is_active(ds,
- SPA_FEATURE_ZILSAXATTR)) {
+ !dsl_dataset_feature_is_active(ds, SPA_FEATURE_ZILSAXATTR)) {
tx = dmu_tx_create(zilog->zl_os);
VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
dsl_dataset_dirty(ds, tx);
@@ -882,8 +887,9 @@ zil_create(zilog_t *zilog)
* txg_wait_synced() here either when keep_first is set, because both
* zil_create() and zil_destroy() will wait for any in-progress destroys
* to complete.
+ * Return B_TRUE if there were any entries to replay.
*/
-void
+boolean_t
zil_destroy(zilog_t *zilog, boolean_t keep_first)
{
const zil_header_t *zh = zilog->zl_header;
@@ -899,7 +905,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
zilog->zl_old_header = *zh; /* debugging aid */
if (BP_IS_HOLE(&zh->zh_log))
- return;
+ return (B_FALSE);
tx = dmu_tx_create(zilog->zl_os);
VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
@@ -932,6 +938,8 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
mutex_exit(&zilog->zl_lock);
dmu_tx_commit(tx);
+
+ return (B_TRUE);
}
void
@@ -3844,8 +3852,9 @@ zil_incr_blks(zilog_t *zilog, const blkptr_t *bp, void *arg, uint64_t claim_txg)
/*
* If this dataset has a non-empty intent log, replay it and destroy it.
+ * Return B_TRUE if there were any entries to replay.
*/
-void
+boolean_t
zil_replay(objset_t *os, void *arg,
zil_replay_func_t *const replay_func[TX_MAX_TYPE])
{
@@ -3854,8 +3863,7 @@ zil_replay(objset_t *os, void *arg,
zil_replay_arg_t zr;
if ((zh->zh_flags & ZIL_REPLAY_NEEDED) == 0) {
- zil_destroy(zilog, B_TRUE);
- return;
+ return (zil_destroy(zilog, B_TRUE));
}
zr.zr_replay = replay_func;
@@ -3878,6 +3886,8 @@ zil_replay(objset_t *os, void *arg,
zil_destroy(zilog, B_FALSE);
txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);
zilog->zl_replay = B_FALSE;
+
+ return (B_TRUE);
}
boolean_t
@@ -3945,7 +3955,7 @@ ZFS_MODULE_PARAM(zfs_zil, zil_, replay_disable, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_zil, zil_, nocacheflush, INT, ZMOD_RW,
"Disable ZIL cache flushes");
-ZFS_MODULE_PARAM(zfs_zil, zil_, slog_bulk, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_zil, zil_, slog_bulk, U64, ZMOD_RW,
"Limit in bytes slog sync writes per commit");
ZFS_MODULE_PARAM(zfs_zil, zil_, maxblocksize, UINT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
index c2e3c6169fa3..928e28813931 100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -512,8 +512,9 @@ zio_decrypt(zio_t *zio, abd_t *data, uint64_t size)
/*
* If this is an authenticated block, just check the MAC. It would be
- * nice to separate this out into its own flag, but for the moment
- * enum zio_flag is out of bits.
+ * nice to separate this out into its own flag, but when this was done,
+ * we had run out of bits in what is now zio_flag_t. Future cleanup
+ * could make this a flag bit.
*/
if (BP_IS_AUTHENTICATED(bp)) {
if (ot == DMU_OT_OBJSET) {
@@ -802,7 +803,7 @@ static zio_t *
zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
abd_t *data, uint64_t lsize, uint64_t psize, zio_done_func_t *done,
void *private, zio_type_t type, zio_priority_t priority,
- enum zio_flag flags, vdev_t *vd, uint64_t offset,
+ zio_flag_t flags, vdev_t *vd, uint64_t offset,
const zbookmark_phys_t *zb, enum zio_stage stage,
enum zio_stage pipeline)
{
@@ -901,7 +902,7 @@ zio_destroy(zio_t *zio)
zio_t *
zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done,
- void *private, enum zio_flag flags)
+ void *private, zio_flag_t flags)
{
zio_t *zio;
@@ -913,7 +914,7 @@ zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done,
}
zio_t *
-zio_root(spa_t *spa, zio_done_func_t *done, void *private, enum zio_flag flags)
+zio_root(spa_t *spa, zio_done_func_t *done, void *private, zio_flag_t flags)
{
return (zio_null(NULL, spa, NULL, done, private, flags));
}
@@ -1099,7 +1100,7 @@ zfs_dva_valid(spa_t *spa, const dva_t *dva, const blkptr_t *bp)
zio_t *
zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
abd_t *data, uint64_t size, zio_done_func_t *done, void *private,
- zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb)
+ zio_priority_t priority, zio_flag_t flags, const zbookmark_phys_t *zb)
{
zio_t *zio;
@@ -1117,7 +1118,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
abd_t *data, uint64_t lsize, uint64_t psize, const zio_prop_t *zp,
zio_done_func_t *ready, zio_done_func_t *children_ready,
zio_done_func_t *physdone, zio_done_func_t *done,
- void *private, zio_priority_t priority, enum zio_flag flags,
+ void *private, zio_priority_t priority, zio_flag_t flags,
const zbookmark_phys_t *zb)
{
zio_t *zio;
@@ -1160,7 +1161,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zio_t *
zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, abd_t *data,
uint64_t size, zio_done_func_t *done, void *private,
- zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb)
+ zio_priority_t priority, zio_flag_t flags, zbookmark_phys_t *zb)
{
zio_t *zio;
@@ -1203,7 +1204,6 @@ zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
*/
if (BP_IS_EMBEDDED(bp))
return;
- metaslab_check_free(spa, bp);
/*
* Frees that are for the currently-syncing txg, are not going to be
@@ -1220,6 +1220,7 @@ zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
txg != spa->spa_syncing_txg ||
(spa_sync_pass(spa) >= zfs_sync_pass_deferred_free &&
!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))) {
+ metaslab_check_free(spa, bp);
bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp);
} else {
VERIFY3P(zio_free_sync(NULL, spa, txg, bp, 0), ==, NULL);
@@ -1233,7 +1234,7 @@ zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
*/
zio_t *
zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
- enum zio_flag flags)
+ zio_flag_t flags)
{
ASSERT(!BP_IS_HOLE(bp));
ASSERT(spa_syncing_txg(spa) == txg);
@@ -1266,7 +1267,7 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio_t *
zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
- zio_done_func_t *done, void *private, enum zio_flag flags)
+ zio_done_func_t *done, void *private, zio_flag_t flags)
{
zio_t *zio;
@@ -1303,7 +1304,7 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio_t *
zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
- zio_done_func_t *done, void *private, enum zio_flag flags)
+ zio_done_func_t *done, void *private, zio_flag_t flags)
{
zio_t *zio;
int c;
@@ -1328,7 +1329,7 @@ zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
zio_t *
zio_trim(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
zio_done_func_t *done, void *private, zio_priority_t priority,
- enum zio_flag flags, enum trim_flag trim_flags)
+ zio_flag_t flags, enum trim_flag trim_flags)
{
zio_t *zio;
@@ -1348,7 +1349,7 @@ zio_trim(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
zio_t *
zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
abd_t *data, int checksum, zio_done_func_t *done, void *private,
- zio_priority_t priority, enum zio_flag flags, boolean_t labels)
+ zio_priority_t priority, zio_flag_t flags, boolean_t labels)
{
zio_t *zio;
@@ -1369,7 +1370,7 @@ zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
zio_t *
zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
abd_t *data, int checksum, zio_done_func_t *done, void *private,
- zio_priority_t priority, enum zio_flag flags, boolean_t labels)
+ zio_priority_t priority, zio_flag_t flags, boolean_t labels)
{
zio_t *zio;
@@ -1406,7 +1407,7 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
zio_t *
zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
abd_t *data, uint64_t size, int type, zio_priority_t priority,
- enum zio_flag flags, zio_done_func_t *done, void *private)
+ zio_flag_t flags, zio_done_func_t *done, void *private)
{
enum zio_stage pipeline = ZIO_VDEV_CHILD_PIPELINE;
zio_t *zio;
@@ -1480,7 +1481,7 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
zio_t *
zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, abd_t *data, uint64_t size,
- zio_type_t type, zio_priority_t priority, enum zio_flag flags,
+ zio_type_t type, zio_priority_t priority, zio_flag_t flags,
zio_done_func_t *done, void *private)
{
zio_t *zio;
@@ -2030,7 +2031,7 @@ zio_deadman_impl(zio_t *pio, int ziodepth)
"delta=%llu queued=%llu io=%llu "
"path=%s "
"last=%llu type=%d "
- "priority=%d flags=0x%x stage=0x%x "
+ "priority=%d flags=0x%llx stage=0x%x "
"pipeline=0x%x pipeline-trace=0x%x "
"objset=%llu object=%llu "
"level=%llu blkid=%llu "
@@ -2040,8 +2041,8 @@ zio_deadman_impl(zio_t *pio, int ziodepth)
(u_longlong_t)delta, pio->io_delta, pio->io_delay,
vd ? vd->vdev_path : "NULL",
vq ? vq->vq_io_complete_ts : 0, pio->io_type,
- pio->io_priority, pio->io_flags, pio->io_stage,
- pio->io_pipeline, pio->io_pipeline_trace,
+ pio->io_priority, (u_longlong_t)pio->io_flags,
+ pio->io_stage, pio->io_pipeline, pio->io_pipeline_trace,
(u_longlong_t)zb->zb_objset, (u_longlong_t)zb->zb_object,
(u_longlong_t)zb->zb_level, (u_longlong_t)zb->zb_blkid,
(u_longlong_t)pio->io_offset, (u_longlong_t)pio->io_size,
@@ -3360,7 +3361,7 @@ zio_ddt_write(zio_t *zio)
return (zio);
}
-ddt_entry_t *freedde; /* for debugging */
+static ddt_entry_t *freedde; /* for debugging */
static zio_t *
zio_ddt_free(zio_t *zio)
diff --git a/sys/contrib/openzfs/module/zfs/zio_compress.c b/sys/contrib/openzfs/module/zfs/zio_compress.c
index 4c9cbc962093..0fb91ac81522 100644
--- a/sys/contrib/openzfs/module/zfs/zio_compress.c
+++ b/sys/contrib/openzfs/module/zfs/zio_compress.c
@@ -44,7 +44,7 @@
* If nonzero, every 1/X decompression attempts will fail, simulating
* an undetected memory error.
*/
-unsigned long zio_decompress_fail_fraction = 0;
+static unsigned long zio_decompress_fail_fraction = 0;
/*
* Compression vectors.
diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c
index 2e2860ff0212..20578a8223b2 100644
--- a/sys/contrib/openzfs/module/zfs/zvol.c
+++ b/sys/contrib/openzfs/module/zfs/zvol.c
@@ -514,6 +514,8 @@ zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE] = {
zvol_replay_err, /* TX_MKDIR_ACL_ATTR */
zvol_replay_err, /* TX_WRITE2 */
zvol_replay_err, /* TX_SETSAXATTR */
+ zvol_replay_err, /* TX_RENAME_EXCHANGE */
+ zvol_replay_err, /* TX_RENAME_WHITEOUT */
};
/*
@@ -1026,8 +1028,7 @@ zvol_add_clones(const char *dsname, list_t *minors_list)
out:
if (dd != NULL)
dsl_dir_rele(dd, FTAG);
- if (dp != NULL)
- dsl_pool_rele(dp, FTAG);
+ dsl_pool_rele(dp, FTAG);
}
/*