aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/include
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/include')
-rw-r--r--sys/contrib/openzfs/include/Makefile.am2
-rw-r--r--sys/contrib/openzfs/include/libzfs.h20
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/Makefile.am2
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h5
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/debug.h75
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/misc.h5
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/mod.h (renamed from sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h)0
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/policy.h1
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h8
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/simd_powerpc.h4
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/time.h11
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/vm.h1
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode_impl.h1
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h8
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h9
-rw-r--r--sys/contrib/openzfs/include/os/linux/Makefile.am3
-rw-r--r--sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h18
-rw-r--r--sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h26
-rw-r--r--sys/contrib/openzfs/include/os/linux/kernel/linux/mod_compat.h65
-rw-r--r--sys/contrib/openzfs/include/os/linux/kernel/linux/page_compat.h4
-rw-r--r--sys/contrib/openzfs/include/os/linux/kernel/linux/pagemap_compat.h36
-rw-r--r--sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h47
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/atomic.h32
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/debug.h71
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h61
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/misc.h6
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/mod.h (renamed from sys/contrib/openzfs/include/os/linux/spl/sys/mod_os.h)0
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h2
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h2
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/stat.h2
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h4
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/time.h8
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/uio.h4
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/policy.h1
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h7
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/trace_common.h8
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h6
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h8
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h1
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h36
-rw-r--r--sys/contrib/openzfs/include/sys/arc_impl.h2
-rw-r--r--sys/contrib/openzfs/include/sys/dbuf.h28
-rw-r--r--sys/contrib/openzfs/include/sys/ddt.h4
-rw-r--r--sys/contrib/openzfs/include/sys/dmu.h117
-rw-r--r--sys/contrib/openzfs/include/sys/dmu_impl.h14
-rw-r--r--sys/contrib/openzfs/include/sys/dmu_objset.h2
-rw-r--r--sys/contrib/openzfs/include/sys/dmu_recv.h1
-rw-r--r--sys/contrib/openzfs/include/sys/dmu_traverse.h7
-rw-r--r--sys/contrib/openzfs/include/sys/dmu_tx.h6
-rw-r--r--sys/contrib/openzfs/include/sys/dmu_zfetch.h5
-rw-r--r--sys/contrib/openzfs/include/sys/dnode.h14
-rw-r--r--sys/contrib/openzfs/include/sys/dsl_dataset.h13
-rw-r--r--sys/contrib/openzfs/include/sys/dsl_deleg.h1
-rw-r--r--sys/contrib/openzfs/include/sys/dsl_dir.h4
-rw-r--r--sys/contrib/openzfs/include/sys/fm/fs/zfs.h2
-rw-r--r--sys/contrib/openzfs/include/sys/frame.h8
-rw-r--r--sys/contrib/openzfs/include/sys/fs/zfs.h23
-rw-r--r--sys/contrib/openzfs/include/sys/metaslab.h26
-rw-r--r--sys/contrib/openzfs/include/sys/metaslab_impl.h6
-rw-r--r--sys/contrib/openzfs/include/sys/mod.h36
-rw-r--r--sys/contrib/openzfs/include/sys/nvpair.h2
-rw-r--r--sys/contrib/openzfs/include/sys/range_tree.h14
-rw-r--r--sys/contrib/openzfs/include/sys/spa.h89
-rw-r--r--sys/contrib/openzfs/include/sys/spa_impl.h10
-rw-r--r--sys/contrib/openzfs/include/sys/txg.h30
-rw-r--r--sys/contrib/openzfs/include/sys/vdev.h19
-rw-r--r--sys/contrib/openzfs/include/sys/vdev_draid.h2
-rw-r--r--sys/contrib/openzfs/include/sys/vdev_impl.h14
-rw-r--r--sys/contrib/openzfs/include/sys/vdev_raidz.h3
-rw-r--r--sys/contrib/openzfs/include/sys/vdev_raidz_impl.h2
-rw-r--r--sys/contrib/openzfs/include/sys/xvattr.h1
-rw-r--r--sys/contrib/openzfs/include/sys/zcp.h1
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_context.h28
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_debug.h20
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_file.h2
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_ioctl.h35
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_quota.h2
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_racct.h6
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_vfsops.h2
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_vnops.h1
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_znode.h9
-rw-r--r--sys/contrib/openzfs/include/sys/zil.h37
-rw-r--r--sys/contrib/openzfs/include/sys/zil_impl.h22
-rw-r--r--sys/contrib/openzfs/include/sys/zio.h100
-rw-r--r--sys/contrib/openzfs/include/sys/zvol.h5
-rw-r--r--sys/contrib/openzfs/include/sys/zvol_impl.h43
-rw-r--r--sys/contrib/openzfs/include/zfeature_common.h13
-rw-r--r--sys/contrib/openzfs/include/zfs_crrd.h75
-rw-r--r--sys/contrib/openzfs/include/zfs_deleg.h1
-rw-r--r--sys/contrib/openzfs/include/zfs_valstr.h1
90 files changed, 947 insertions, 571 deletions
diff --git a/sys/contrib/openzfs/include/Makefile.am b/sys/contrib/openzfs/include/Makefile.am
index a9258deabfd7..7588cd0aedc9 100644
--- a/sys/contrib/openzfs/include/Makefile.am
+++ b/sys/contrib/openzfs/include/Makefile.am
@@ -10,6 +10,7 @@ COMMON_H = \
cityhash.h \
zfeature_common.h \
zfs_comutil.h \
+ zfs_crrd.h \
zfs_deleg.h \
zfs_fletcher.h \
zfs_namecheck.h \
@@ -69,7 +70,6 @@ COMMON_H = \
sys/metaslab_impl.h \
sys/mmp.h \
sys/mntent.h \
- sys/mod.h \
sys/multilist.h \
sys/nvpair.h \
sys/nvpair_impl.h \
diff --git a/sys/contrib/openzfs/include/libzfs.h b/sys/contrib/openzfs/include/libzfs.h
index 8774d490f74b..14930fb90622 100644
--- a/sys/contrib/openzfs/include/libzfs.h
+++ b/sys/contrib/openzfs/include/libzfs.h
@@ -30,6 +30,7 @@
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2019 Datto Inc.
* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
+ * Copyright (c) 2025 Hewlett Packard Enterprise Development LP.
*/
#ifndef _LIBZFS_H
@@ -288,10 +289,22 @@ typedef struct trimflags {
uint64_t rate;
} trimflags_t;
+typedef struct trim_cbdata {
+ trimflags_t trim_flags;
+ pool_trim_func_t cmd_type;
+} trim_cbdata_t;
+
+typedef struct initialize_cbdata {
+ boolean_t wait;
+ pool_initialize_func_t cmd_type;
+} initialize_cbdata_t;
/*
* Functions to manipulate pool and vdev state
*/
_LIBZFS_H int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
+_LIBZFS_H int zpool_scan_range(zpool_handle_t *, pool_scan_func_t,
+ pool_scrub_cmd_t, time_t, time_t);
+_LIBZFS_H int zpool_initialize_one(zpool_handle_t *, void *);
_LIBZFS_H int zpool_initialize(zpool_handle_t *, pool_initialize_func_t,
nvlist_t *);
_LIBZFS_H int zpool_initialize_wait(zpool_handle_t *, pool_initialize_func_t,
@@ -304,7 +317,9 @@ _LIBZFS_H int zpool_reguid(zpool_handle_t *);
_LIBZFS_H int zpool_set_guid(zpool_handle_t *, const uint64_t *);
_LIBZFS_H int zpool_reopen_one(zpool_handle_t *, void *);
+_LIBZFS_H void zpool_collect_leaves(zpool_handle_t *, nvlist_t *, nvlist_t *);
_LIBZFS_H int zpool_sync_one(zpool_handle_t *, void *);
+_LIBZFS_H int zpool_trim_one(zpool_handle_t *, void *);
_LIBZFS_H int zpool_ddt_prune(zpool_handle_t *, zpool_ddt_prune_unit_t,
uint64_t);
@@ -464,6 +479,8 @@ _LIBZFS_H zpool_status_t zpool_import_status(nvlist_t *, const char **,
_LIBZFS_H nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **);
_LIBZFS_H nvlist_t *zpool_get_features(zpool_handle_t *);
_LIBZFS_H int zpool_refresh_stats(zpool_handle_t *, boolean_t *);
+_LIBZFS_H void zpool_refresh_stats_from_handle(zpool_handle_t *,
+ zpool_handle_t *);
_LIBZFS_H int zpool_get_errlog(zpool_handle_t *, nvlist_t **);
_LIBZFS_H void zpool_add_propname(zpool_handle_t *, const char *);
@@ -591,6 +608,7 @@ _LIBZFS_H int zfs_crypto_attempt_load_keys(libzfs_handle_t *, const char *);
_LIBZFS_H int zfs_crypto_load_key(zfs_handle_t *, boolean_t, const char *);
_LIBZFS_H int zfs_crypto_unload_key(zfs_handle_t *);
_LIBZFS_H int zfs_crypto_rewrap(zfs_handle_t *, nvlist_t *, boolean_t);
+_LIBZFS_H boolean_t zfs_is_encrypted(zfs_handle_t *);
typedef struct zprop_list {
int pl_prop;
@@ -853,7 +871,7 @@ _LIBZFS_H uint64_t zvol_volsize_to_reservation(zpool_handle_t *, uint64_t,
nvlist_t *);
typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
- uid_t rid, uint64_t space);
+ uid_t rid, uint64_t space, uint64_t default_quota);
_LIBZFS_H int zfs_userspace(zfs_handle_t *, zfs_userquota_prop_t,
zfs_userspace_cb_t, void *);
diff --git a/sys/contrib/openzfs/include/os/freebsd/Makefile.am b/sys/contrib/openzfs/include/os/freebsd/Makefile.am
index d975c4fe69fa..d6b6923d033f 100644
--- a/sys/contrib/openzfs/include/os/freebsd/Makefile.am
+++ b/sys/contrib/openzfs/include/os/freebsd/Makefile.am
@@ -33,7 +33,7 @@ noinst_HEADERS = \
%D%/spl/sys/list_impl.h \
%D%/spl/sys/lock.h \
%D%/spl/sys/misc.h \
- %D%/spl/sys/mod_os.h \
+ %D%/spl/sys/mod.h \
%D%/spl/sys/mode.h \
%D%/spl/sys/mount.h \
%D%/spl/sys/mutex.h \
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
index 112f7bc32849..51238dd1c8fd 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
@@ -43,10 +43,7 @@ extern "C" {
#endif
#define EXPORT_SYMBOL(x)
-#define module_param(a, b, c)
-#define module_param_call(a, b, c, d, e)
-#define module_param_named(a, b, c, d)
-#define MODULE_PARM_DESC(a, b)
+
#define asm __asm
#ifdef ZFS_DEBUG
#undef NDEBUG
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/debug.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/debug.h
index c1a7cfdeca51..32bc02f3dc86 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/debug.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/debug.h
@@ -69,6 +69,10 @@
#define __maybe_unused __attribute__((unused))
#endif
+#ifndef __must_check
+#define __must_check __attribute__((__warn_unused_result__))
+#endif
+
/*
* Without this, we see warnings from objtool during normal Linux builds when
* the kernel is built with CONFIG_STACK_VALIDATION=y:
@@ -112,14 +116,13 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
} while (0)
#define VERIFY3B(LEFT, OP, RIGHT) do { \
- const boolean_t _verify3_left = (boolean_t)(LEFT); \
- const boolean_t _verify3_right = (boolean_t)(RIGHT); \
+ const boolean_t _verify3_left = (boolean_t)!!(LEFT); \
+ const boolean_t _verify3_right = (boolean_t)!!(RIGHT); \
if (unlikely(!(_verify3_left OP _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
+ "VERIFY3B(" #LEFT ", " #OP ", " #RIGHT ") " \
"failed (%d " #OP " %d)\n", \
- (boolean_t)_verify3_left, \
- (boolean_t)_verify3_right); \
+ _verify3_left, _verify3_right); \
} while (0)
#define VERIFY3S(LEFT, OP, RIGHT) do { \
@@ -127,7 +130,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
const int64_t _verify3_right = (int64_t)(RIGHT); \
if (unlikely(!(_verify3_left OP _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
+ "VERIFY3S(" #LEFT ", " #OP ", " #RIGHT ") " \
"failed (%lld " #OP " %lld)\n", \
(long long)_verify3_left, \
(long long)_verify3_right); \
@@ -138,7 +141,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
const uint64_t _verify3_right = (uint64_t)(RIGHT); \
if (unlikely(!(_verify3_left OP _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
+ "VERIFY3U(" #LEFT ", " #OP ", " #RIGHT ") " \
"failed (%llu " #OP " %llu)\n", \
(unsigned long long)_verify3_left, \
(unsigned long long)_verify3_right); \
@@ -149,8 +152,8 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
const uintptr_t _verify3_right = (uintptr_t)(RIGHT); \
if (unlikely(!(_verify3_left OP _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
- "failed (%px " #OP " %px)\n", \
+ "VERIFY3P(" #LEFT ", " #OP ", " #RIGHT ") " \
+ "failed (%p " #OP " %p)\n", \
(void *)_verify3_left, \
(void *)_verify3_right); \
} while (0)
@@ -159,8 +162,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
const int64_t _verify0_right = (int64_t)(RIGHT); \
if (unlikely(!(0 == _verify0_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY0(" #RIGHT ") " \
- "failed (0 == %lld)\n", \
+ "VERIFY0(" #RIGHT ") failed (%lld)\n", \
(long long)_verify0_right); \
} while (0)
@@ -168,8 +170,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
const uintptr_t _verify0_right = (uintptr_t)(RIGHT); \
if (unlikely(!(0 == _verify0_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY0P(" #RIGHT ") " \
- "failed (NULL == %px)\n", \
+ "VERIFY0P(" #RIGHT ") failed (%p)\n", \
(void *)_verify0_right); \
} while (0)
@@ -182,14 +183,13 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
*/
#define VERIFY3BF(LEFT, OP, RIGHT, STR, ...) do { \
- const boolean_t _verify3_left = (boolean_t)(LEFT); \
- const boolean_t _verify3_right = (boolean_t)(RIGHT); \
+ const boolean_t _verify3_left = (boolean_t)!!(LEFT); \
+ const boolean_t _verify3_right = (boolean_t)!!(RIGHT); \
if (unlikely(!(_verify3_left OP _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
+ "VERIFY3B(" #LEFT ", " #OP ", " #RIGHT ") " \
"failed (%d " #OP " %d) " STR "\n", \
- (boolean_t)(_verify3_left), \
- (boolean_t)(_verify3_right), \
+ _verify3_left, _verify3_right, \
__VA_ARGS__); \
} while (0)
@@ -198,10 +198,9 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
const int64_t _verify3_right = (int64_t)(RIGHT); \
if (unlikely(!(_verify3_left OP _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
+ "VERIFY3S(" #LEFT ", " #OP ", " #RIGHT ") " \
"failed (%lld " #OP " %lld) " STR "\n", \
- (long long)(_verify3_left), \
- (long long)(_verify3_right), \
+ (long long)_verify3_left, (long long)_verify3_right,\
__VA_ARGS__); \
} while (0)
@@ -210,10 +209,10 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
const uint64_t _verify3_right = (uint64_t)(RIGHT); \
if (unlikely(!(_verify3_left OP _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
+ "VERIFY3U(" #LEFT ", " #OP ", " #RIGHT ") " \
"failed (%llu " #OP " %llu) " STR "\n", \
- (unsigned long long)(_verify3_left), \
- (unsigned long long)(_verify3_right), \
+ (unsigned long long)_verify3_left, \
+ (unsigned long long)_verify3_right, \
__VA_ARGS__); \
} while (0)
@@ -222,32 +221,27 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
const uintptr_t _verify3_right = (uintptr_t)(RIGHT); \
if (unlikely(!(_verify3_left OP _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
- "failed (%px " #OP " %px) " STR "\n", \
- (void *) (_verify3_left), \
- (void *) (_verify3_right), \
+ "VERIFY3P(" #LEFT ", " #OP ", " #RIGHT ") " \
+ "failed (%p " #OP " %p) " STR "\n", \
+ (void *)_verify3_left, (void *)_verify3_right, \
__VA_ARGS__); \
} while (0)
#define VERIFY0PF(RIGHT, STR, ...) do { \
- const uintptr_t _verify3_left = (uintptr_t)(0); \
const uintptr_t _verify3_right = (uintptr_t)(RIGHT); \
- if (unlikely(!(_verify3_left == _verify3_right))) \
+ if (unlikely(!(0 == _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY0(0 == " #RIGHT ") " \
- "failed (0 == %px) " STR "\n", \
- (long long) (_verify3_right), \
+ "VERIFY0P(" #RIGHT ") failed (%p) " STR "\n", \
+ (void *)_verify3_right, \
__VA_ARGS__); \
} while (0)
#define VERIFY0F(RIGHT, STR, ...) do { \
- const int64_t _verify3_left = (int64_t)(0); \
const int64_t _verify3_right = (int64_t)(RIGHT); \
- if (unlikely(!(_verify3_left == _verify3_right))) \
+ if (unlikely(!(0 == _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY0(0 == " #RIGHT ") " \
- "failed (0 == %lld) " STR "\n", \
- (long long) (_verify3_right), \
+ "VERIFY0(" #RIGHT ") failed (%lld) " STR "\n", \
+ (long long)_verify3_right, \
__VA_ARGS__); \
} while (0)
@@ -256,10 +250,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
spl_assert("(" #A ") implies (" #B ")", \
__FILE__, __FUNCTION__, __LINE__)))
-#define VERIFY_EQUIV(A, B) \
- ((void)(likely(!!(A) == !!(B)) || \
- spl_assert("(" #A ") is equivalent to (" #B ")", \
- __FILE__, __FUNCTION__, __LINE__)))
+#define VERIFY_EQUIV(A, B) VERIFY3B(A, ==, B)
/*
* Debugging disabled (--disable-debug)
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/misc.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/misc.h
index 091ebe772810..acce8734b2c5 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/misc.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/misc.h
@@ -56,4 +56,9 @@ struct opensolaris_utsname {
#define task_io_account_read(n)
#define task_io_account_write(n)
+/*
+ * Check if the current thread is a memory reclaim thread.
+ */
+extern int current_is_reclaim_thread(void);
+
#endif /* _OPENSOLARIS_SYS_MISC_H_ */
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod.h
index 4214189c32df..4214189c32df 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod.h
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/policy.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/policy.h
index 639ade831c28..48bc4f3d5b0f 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/policy.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/policy.h
@@ -39,7 +39,6 @@ struct znode;
int secpolicy_nfs(cred_t *cr);
int secpolicy_zfs(cred_t *crd);
-int secpolicy_zfs_proc(cred_t *cr, proc_t *proc);
int secpolicy_sys_config(cred_t *cr, int checkonly);
int secpolicy_zinject(cred_t *cr);
int secpolicy_fs_unmount(cred_t *cr, struct mount *vfsp);
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h
index a03b815a22a6..1cbd79ec893f 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h
@@ -45,7 +45,9 @@
#ifdef _KERNEL
#define CPU curcpu
#define minclsyspri PRIBIO
-#define defclsyspri minclsyspri
+#define defclsyspri minclsyspri
+/* Write issue taskq priority. */
+#define wtqclsyspri ((PVM + PRIBIO) / 2)
#define maxclsyspri PVM
#define max_ncpus (mp_maxid + 1)
#define boot_max_ncpus (mp_maxid + 1)
@@ -75,8 +77,8 @@ do_thread_create(caddr_t stk, size_t stksize, void (*proc)(void *), void *arg,
/*
* Be sure there are no surprises.
*/
- ASSERT(stk == NULL);
- ASSERT(len == 0);
+ ASSERT0P(stk);
+ ASSERT0(len);
ASSERT(state == TS_RUN);
if (pp == &p0)
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/simd_powerpc.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/simd_powerpc.h
index 0be9257e40cb..5596f35a66d1 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/simd_powerpc.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/simd_powerpc.h
@@ -53,11 +53,11 @@
#define kfpu_allowed() 1
#define kfpu_initialize(tsk) do {} while (0)
-#define kfpu_begin() { \
+#define kfpu_begin() { \
if (__predict_false(!is_fpu_kern_thread(0))) \
fpu_kern_enter(PCPU_GET(curthread), NULL, FPU_KERN_NOCTX);\
}
-#define kfpu_end() { \
+#define kfpu_end() { \
if (__predict_false(PCPU_GET(curpcb)->pcb_flags & PCB_KERN_FPU_NOSAVE))\
fpu_kern_leave(PCPU_GET(curthread), NULL); \
}
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/time.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/time.h
index 2f5fe4619ef7..14b42f2e7087 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/time.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/time.h
@@ -63,6 +63,17 @@ typedef longlong_t hrtime_t;
#define NSEC_TO_TICK(nsec) ((nsec) / (NANOSEC / hz))
static __inline hrtime_t
+getlrtime(void)
+{
+ struct timespec ts;
+ hrtime_t nsec;
+
+ getnanouptime(&ts);
+ nsec = ((hrtime_t)ts.tv_sec * NANOSEC) + ts.tv_nsec;
+ return (nsec);
+}
+
+static __inline hrtime_t
gethrtime(void)
{
struct timespec ts;
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/vm.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/vm.h
index 454078f0fe79..d36bee881d0b 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/vm.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/vm.h
@@ -35,6 +35,7 @@
extern const int zfs_vm_pagerret_bad;
extern const int zfs_vm_pagerret_error;
extern const int zfs_vm_pagerret_ok;
+extern const int zfs_vm_pagerret_pend;
extern const int zfs_vm_pagerput_sync;
extern const int zfs_vm_pagerput_inval;
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode_impl.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode_impl.h
index 0df3378c23e7..b18836aa563e 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode_impl.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode_impl.h
@@ -227,6 +227,7 @@ struct taskq;
#define LOOKUP_XATTR 0x02 /* lookup up extended attr dir */
#define CREATE_XATTR_DIR 0x04 /* Create extended attr dir */
#define LOOKUP_HAVE_SYSATTR_DIR 0x08 /* Already created virtual GFS dir */
+#define LOOKUP_NAMED_ATTR 0x10 /* Lookup a named attribute */
/*
* Public vnode manipulation functions.
diff --git a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
index 289b64759382..3ed311d49cc6 100644
--- a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
+++ b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
@@ -96,6 +96,12 @@ struct zfsvfs {
uint64_t z_groupobjquota_obj;
uint64_t z_projectquota_obj;
uint64_t z_projectobjquota_obj;
+ uint64_t z_defaultuserquota;
+ uint64_t z_defaultgroupquota;
+ uint64_t z_defaultprojectquota;
+ uint64_t z_defaultuserobjquota;
+ uint64_t z_defaultgroupobjquota;
+ uint64_t z_defaultprojectobjquota;
uint64_t z_replay_eof; /* New end of file - replay only */
sa_attr_type_t *z_attr_table; /* SA attr mapping->id */
#define ZFS_OBJ_MTX_SZ 64
@@ -226,6 +232,8 @@ extern boolean_t zfs_is_readonly(zfsvfs_t *zfsvfs);
extern int zfs_get_temporary_prop(struct dsl_dataset *ds, zfs_prop_t zfs_prop,
uint64_t *val, char *setpoint);
extern int zfs_busy(void);
+extern int zfs_set_default_quota(zfsvfs_t *zfsvfs, zfs_prop_t zfs_prop,
+ uint64_t quota);
#ifdef __cplusplus
}
diff --git a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h
index b292818750d9..15e3affba0e8 100644
--- a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h
+++ b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h
@@ -29,6 +29,7 @@
#ifndef _FREEBSD_ZFS_SYS_ZNODE_IMPL_H
#define _FREEBSD_ZFS_SYS_ZNODE_IMPL_H
+#ifdef _KERNEL
#include <sys/list.h>
#include <sys/dmu.h>
#include <sys/sa.h>
@@ -42,6 +43,7 @@
#include <sys/zfs_project.h>
#include <vm/vm_object.h>
#include <sys/uio.h>
+#endif
#ifdef __cplusplus
extern "C" {
@@ -54,7 +56,7 @@ extern "C" {
*/
#define ZNODE_OS_FIELDS \
struct zfsvfs *z_zfsvfs; \
- vnode_t *z_vnode; \
+ struct vnode *z_vnode; \
char *z_cached_symlink; \
uint64_t z_uid; \
uint64_t z_gid; \
@@ -62,6 +64,8 @@ extern "C" {
uint64_t z_atime[2]; \
uint64_t z_links;
+#ifdef _KERNEL
+
#define ZFS_LINK_MAX UINT64_MAX
/*
@@ -183,6 +187,9 @@ extern int zfs_znode_parent_and_name(struct znode *zp, struct znode **dzpp,
char *buf, uint64_t buflen);
extern int zfs_rlimit_fsize(off_t fsize);
+
+#endif /* _KERNEL */
+
#ifdef __cplusplus
}
#endif
diff --git a/sys/contrib/openzfs/include/os/linux/Makefile.am b/sys/contrib/openzfs/include/os/linux/Makefile.am
index b7bdd892ec1d..e156ca183dbd 100644
--- a/sys/contrib/openzfs/include/os/linux/Makefile.am
+++ b/sys/contrib/openzfs/include/os/linux/Makefile.am
@@ -8,6 +8,7 @@ kernel_linux_HEADERS = \
%D%/kernel/linux/mm_compat.h \
%D%/kernel/linux/mod_compat.h \
%D%/kernel/linux/page_compat.h \
+ %D%/kernel/linux/pagemap_compat.h \
%D%/kernel/linux/simd.h \
%D%/kernel/linux/simd_aarch64.h \
%D%/kernel/linux/simd_arm.h \
@@ -74,7 +75,7 @@ kernel_spl_sys_HEADERS = \
%D%/spl/sys/kstat.h \
%D%/spl/sys/list.h \
%D%/spl/sys/misc.h \
- %D%/spl/sys/mod_os.h \
+ %D%/spl/sys/mod.h \
%D%/spl/sys/mutex.h \
%D%/spl/sys/param.h \
%D%/spl/sys/proc.h \
diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h
index 076dab8ba6dc..214f3ea0e787 100644
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h
@@ -542,24 +542,6 @@ blk_generic_alloc_queue(make_request_fn make_request, int node_id)
}
#endif /* !HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
-/*
- * All the io_*() helper functions below can operate on a bio, or a rq, but
- * not both. The older submit_bio() codepath will pass a bio, and the
- * newer blk-mq codepath will pass a rq.
- */
-static inline int
-io_data_dir(struct bio *bio, struct request *rq)
-{
- if (rq != NULL) {
- if (op_is_write(req_op(rq))) {
- return (WRITE);
- } else {
- return (READ);
- }
- }
- return (bio_data_dir(bio));
-}
-
static inline int
io_is_flush(struct bio *bio, struct request *rq)
{
diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h
index 16e8a319a5f8..152e5a606f0e 100644
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h
@@ -61,32 +61,6 @@
#endif
/*
- * 2.6.30 API change,
- * The const keyword was added to the 'struct dentry_operations' in
- * the dentry structure. To handle this we define an appropriate
- * dentry_operations_t typedef which can be used.
- */
-typedef const struct dentry_operations dentry_operations_t;
-
-/*
- * 2.6.38 API addition,
- * Added d_clear_d_op() helper function which clears some flags and the
- * registered dentry->d_op table. This is required because d_set_d_op()
- * issues a warning when the dentry operations table is already set.
- * For the .zfs control directory to work properly we must be able to
- * override the default operations table and register custom .d_automount
- * and .d_revalidate callbacks.
- */
-static inline void
-d_clear_d_op(struct dentry *dentry)
-{
- dentry->d_op = NULL;
- dentry->d_flags &= ~(
- DCACHE_OP_HASH | DCACHE_OP_COMPARE |
- DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE);
-}
-
-/*
* Walk and invalidate all dentry aliases of an inode
* unless it's a mountpoint
*/
diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/mod_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/mod_compat.h
index 110cdfa259be..e49ada399694 100644
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/mod_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/mod_compat.h
@@ -31,15 +31,6 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
-/*
- * Despite constifying struct kernel_param_ops, some older kernels define a
- * `__check_old_set_param()` function in their headers that checks for a
- * non-constified `->set()`. This has long been fixed in Linux mainline, but
- * since we support older kernels, we workaround it by using a preprocessor
- * definition to disable it.
- */
-#define __check_old_set_param(_) (0)
-
typedef const struct kernel_param zfs_kernel_param_t;
#define ZMOD_RW 0644
@@ -72,6 +63,7 @@ enum scope_prefix_types {
zfs_vdev_disk,
zfs_vdev_file,
zfs_vdev_mirror,
+ zfs_vol,
zfs_vnops,
zfs_zevent,
zfs_zio,
@@ -79,48 +71,23 @@ enum scope_prefix_types {
};
/*
- * While we define our own s64/u64 types, there is no reason to reimplement the
- * existing Linux kernel types, so we use the preprocessor to remap our
- * "custom" implementations to the kernel ones. This is done because the CPP
- * does not allow us to write conditional definitions. The fourth definition
- * exists because the CPP will not allow us to replace things like INT with int
- * before string concatenation.
+ * Our uint64 params are called U64 in part because we had them before Linux
+ * provided ULLONG param ops. Now it does, and we use them, but we retain the
+ * U64 name to keep many existing tunables working without issue.
*/
+#define spl_param_set_u64 param_set_ullong
+#define spl_param_get_u64 param_get_ullong
+#define spl_param_ops_U64 param_ops_ullong
-#define spl_param_set_int param_set_int
-#define spl_param_get_int param_get_int
-#define spl_param_ops_int param_ops_int
-#define spl_param_ops_INT param_ops_int
-
-#define spl_param_set_long param_set_long
-#define spl_param_get_long param_get_long
-#define spl_param_ops_long param_ops_long
-#define spl_param_ops_LONG param_ops_long
-
-#define spl_param_set_uint param_set_uint
-#define spl_param_get_uint param_get_uint
-#define spl_param_ops_uint param_ops_uint
-#define spl_param_ops_UINT param_ops_uint
-
-#define spl_param_set_ulong param_set_ulong
-#define spl_param_get_ulong param_get_ulong
-#define spl_param_ops_ulong param_ops_ulong
-#define spl_param_ops_ULONG param_ops_ulong
-
-#define spl_param_set_charp param_set_charp
-#define spl_param_get_charp param_get_charp
-#define spl_param_ops_charp param_ops_charp
-#define spl_param_ops_STRING param_ops_charp
-
-int spl_param_set_s64(const char *val, zfs_kernel_param_t *kp);
-extern int spl_param_get_s64(char *buffer, zfs_kernel_param_t *kp);
-extern const struct kernel_param_ops spl_param_ops_s64;
-#define spl_param_ops_S64 spl_param_ops_s64
-
-extern int spl_param_set_u64(const char *val, zfs_kernel_param_t *kp);
-extern int spl_param_get_u64(char *buffer, zfs_kernel_param_t *kp);
-extern const struct kernel_param_ops spl_param_ops_u64;
-#define spl_param_ops_U64 spl_param_ops_u64
+/*
+ * We keep our own names for param ops to make expanding them in
+ * ZFS_MODULE_PARAM easy.
+ */
+#define spl_param_ops_INT param_ops_int
+#define spl_param_ops_LONG param_ops_long
+#define spl_param_ops_UINT param_ops_uint
+#define spl_param_ops_ULONG param_ops_ulong
+#define spl_param_ops_STRING param_ops_charp
/*
* Declare a module parameter / sysctl node
diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/page_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/page_compat.h
index 963b96ba6351..7dcf53bbea47 100644
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/page_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/page_compat.h
@@ -4,8 +4,8 @@
/*
* Create our own accessor functions to follow the Linux API changes
*/
-#define nr_file_pages() global_node_page_state(NR_FILE_PAGES)
-#define nr_inactive_anon_pages() global_node_page_state(NR_INACTIVE_ANON)
+#define nr_file_pages() (global_node_page_state(NR_ACTIVE_FILE) + \
+ global_node_page_state(NR_INACTIVE_FILE))
#define nr_inactive_file_pages() global_node_page_state(NR_INACTIVE_FILE)
#endif /* _ZFS_PAGE_COMPAT_H */
diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/pagemap_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/pagemap_compat.h
new file mode 100644
index 000000000000..a0465ede0105
--- /dev/null
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/pagemap_compat.h
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: CDDL-1.0
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2025, Rob Norris <robn@despairlabs.com>
+ */
+
+#ifndef _ZFS_PAGEMAP_COMPAT_H
+#define _ZFS_PAGEMAP_COMPAT_H
+
+#include <linux/pagemap.h>
+
+#ifndef HAVE_PAGEMAP_READAHEAD_PAGE
+#define readahead_page(ractl) (&(__readahead_folio(ractl)->page))
+#endif
+
+#endif
diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h
index cd245a5f0135..326f471d7c9b 100644
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h
@@ -139,15 +139,6 @@
*/
#if defined(HAVE_KERNEL_FPU_INTERNAL)
-/*
- * For kernels not exporting *kfpu_{begin,end} we have to use inline assembly
- * with the XSAVE{,OPT,S} instructions, so we need the toolchain to support at
- * least XSAVE.
- */
-#if !defined(HAVE_XSAVE)
-#error "Toolchain needs to support the XSAVE assembler instruction"
-#endif
-
#ifndef XFEATURE_MASK_XTILE
/*
* For kernels where this doesn't exist yet, we still don't want to break
@@ -335,9 +326,13 @@ kfpu_begin(void)
return;
}
#endif
+#if defined(HAVE_XSAVE)
if (static_cpu_has(X86_FEATURE_XSAVE)) {
kfpu_do_xsave("xsave", state, ~XFEATURE_MASK_XTILE);
- } else if (static_cpu_has(X86_FEATURE_FXSR)) {
+ return;
+ }
+#endif
+ if (static_cpu_has(X86_FEATURE_FXSR)) {
kfpu_save_fxsr(state);
} else {
kfpu_save_fsave(state);
@@ -390,9 +385,13 @@ kfpu_end(void)
goto out;
}
#endif
+#if defined(HAVE_XSAVE)
if (static_cpu_has(X86_FEATURE_XSAVE)) {
kfpu_do_xrstor("xrstor", state, ~XFEATURE_MASK_XTILE);
- } else if (static_cpu_has(X86_FEATURE_FXSR)) {
+ goto out;
+ }
+#endif
+ if (static_cpu_has(X86_FEATURE_FXSR)) {
kfpu_restore_fxsr(state);
} else {
kfpu_restore_fsave(state);
@@ -599,6 +598,32 @@ zfs_movbe_available(void)
}
/*
+ * Check if VAES instruction set is available
+ */
+static inline boolean_t
+zfs_vaes_available(void)
+{
+#if defined(X86_FEATURE_VAES)
+ return (!!boot_cpu_has(X86_FEATURE_VAES));
+#else
+ return (B_FALSE);
+#endif
+}
+
+/*
+ * Check if VPCLMULQDQ instruction set is available
+ */
+static inline boolean_t
+zfs_vpclmulqdq_available(void)
+{
+#if defined(X86_FEATURE_VPCLMULQDQ)
+ return (!!boot_cpu_has(X86_FEATURE_VPCLMULQDQ));
+#else
+ return (B_FALSE);
+#endif
+}
+
+/*
* Check if SHA_NI instruction set is available
*/
static inline boolean_t
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/atomic.h b/sys/contrib/openzfs/include/os/linux/spl/sys/atomic.h
index b2a39d7d6cbf..f4bcd58bd281 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/atomic.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/atomic.h
@@ -71,6 +71,22 @@ atomic_cas_ptr(volatile void *target, void *cmp, void *newval)
return ((void *)atomic_cas_64((volatile uint64_t *)target,
(uint64_t)cmp, (uint64_t)newval));
}
+static __inline__ void *
+atomic_swap_ptr(volatile void *target, void *newval)
+{
+ return ((void *)atomic_swap_64((volatile uint64_t *)target,
+ (uint64_t)newval));
+}
+static __inline__ void *
+atomic_load_ptr(volatile void *target)
+{
+ return ((void *)atomic_load_64((volatile uint64_t *)target));
+}
+static __inline__ void
+atomic_store_ptr(volatile void *target, void *newval)
+{
+ atomic_store_64((volatile uint64_t *)target, (uint64_t)newval);
+}
#else /* _LP64 */
static __inline__ void *
atomic_cas_ptr(volatile void *target, void *cmp, void *newval)
@@ -78,6 +94,22 @@ atomic_cas_ptr(volatile void *target, void *cmp, void *newval)
return ((void *)atomic_cas_32((volatile uint32_t *)target,
(uint32_t)cmp, (uint32_t)newval));
}
+static __inline__ void *
+atomic_swap_ptr(volatile void *target, void *newval)
+{
+ return ((void *)atomic_swap_32((volatile uint32_t *)target,
+ (uint32_t)newval));
+}
+static __inline__ void *
+atomic_load_ptr(volatile void *target)
+{
+ return ((void *)atomic_load_32((volatile uint32_t *)target));
+}
+static __inline__ void
+atomic_store_ptr(volatile void *target, void *newval)
+{
+ atomic_store_32((volatile uint32_t *)target, (uint32_t)newval);
+}
#endif /* _LP64 */
#endif /* _SPL_ATOMIC_H */
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/debug.h b/sys/contrib/openzfs/include/os/linux/spl/sys/debug.h
index 700cc85b60b6..85b96e1e23a7 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/debug.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/debug.h
@@ -69,6 +69,10 @@
#define __maybe_unused __attribute__((unused))
#endif
+#ifndef __must_check
+#define __must_check __attribute__((__warn_unused_result__))
+#endif
+
/*
* Without this, we see warnings from objtool during normal Linux builds when
* the kernel is built with CONFIG_STACK_VALIDATION=y:
@@ -116,14 +120,13 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
} while (0)
#define VERIFY3B(LEFT, OP, RIGHT) do { \
- const boolean_t _verify3_left = (boolean_t)(LEFT); \
- const boolean_t _verify3_right = (boolean_t)(RIGHT); \
+ const boolean_t _verify3_left = (boolean_t)!!(LEFT); \
+ const boolean_t _verify3_right = (boolean_t)!!(RIGHT); \
if (unlikely(!(_verify3_left OP _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
+ "VERIFY3B(" #LEFT ", " #OP ", " #RIGHT ") " \
"failed (%d " #OP " %d)\n", \
- (boolean_t)_verify3_left, \
- (boolean_t)_verify3_right); \
+ _verify3_left, _verify3_right); \
} while (0)
#define VERIFY3S(LEFT, OP, RIGHT) do { \
@@ -131,7 +134,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
const int64_t _verify3_right = (int64_t)(RIGHT); \
if (unlikely(!(_verify3_left OP _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
+ "VERIFY3S(" #LEFT ", " #OP ", " #RIGHT ") " \
"failed (%lld " #OP " %lld)\n", \
(long long)_verify3_left, \
(long long)_verify3_right); \
@@ -142,7 +145,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
const uint64_t _verify3_right = (uint64_t)(RIGHT); \
if (unlikely(!(_verify3_left OP _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
+ "VERIFY3U(" #LEFT ", " #OP ", " #RIGHT ") " \
"failed (%llu " #OP " %llu)\n", \
(unsigned long long)_verify3_left, \
(unsigned long long)_verify3_right); \
@@ -153,7 +156,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
const uintptr_t _verify3_right = (uintptr_t)(RIGHT); \
if (unlikely(!(_verify3_left OP _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
+ "VERIFY3P(" #LEFT ", " #OP ", " #RIGHT ") " \
"failed (%px " #OP " %px)\n", \
(void *)_verify3_left, \
(void *)_verify3_right); \
@@ -163,8 +166,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
const int64_t _verify0_right = (int64_t)(RIGHT); \
if (unlikely(!(0 == _verify0_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY0(" #RIGHT ") " \
- "failed (0 == %lld)\n", \
+ "VERIFY0(" #RIGHT ") failed (%lld)\n", \
(long long)_verify0_right); \
} while (0)
@@ -172,8 +174,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
const uintptr_t _verify0_right = (uintptr_t)(RIGHT); \
if (unlikely(!(0 == _verify0_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY0P(" #RIGHT ") " \
- "failed (NULL == %px)\n", \
+ "VERIFY0P(" #RIGHT ") failed (%px)\n", \
(void *)_verify0_right); \
} while (0)
@@ -186,14 +187,13 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
*/
#define VERIFY3BF(LEFT, OP, RIGHT, STR, ...) do { \
- const boolean_t _verify3_left = (boolean_t)(LEFT); \
- const boolean_t _verify3_right = (boolean_t)(RIGHT); \
+ const boolean_t _verify3_left = (boolean_t)!!(LEFT); \
+ const boolean_t _verify3_right = (boolean_t)!!(RIGHT); \
if (unlikely(!(_verify3_left OP _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
+ "VERIFY3B(" #LEFT ", " #OP ", " #RIGHT ") " \
"failed (%d " #OP " %d) " STR "\n", \
- (boolean_t)(_verify3_left), \
- (boolean_t)(_verify3_right), \
+ _verify3_left, _verify3_right, \
__VA_ARGS__); \
} while (0)
@@ -202,10 +202,9 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
const int64_t _verify3_right = (int64_t)(RIGHT); \
if (unlikely(!(_verify3_left OP _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
+ "VERIFY3S(" #LEFT ", " #OP ", " #RIGHT ") " \
"failed (%lld " #OP " %lld) " STR "\n", \
- (long long)(_verify3_left), \
- (long long)(_verify3_right), \
+ (long long)_verify3_left, (long long)_verify3_right,\
__VA_ARGS__); \
} while (0)
@@ -214,10 +213,10 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
const uint64_t _verify3_right = (uint64_t)(RIGHT); \
if (unlikely(!(_verify3_left OP _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
+ "VERIFY3U(" #LEFT ", " #OP ", " #RIGHT ") " \
"failed (%llu " #OP " %llu) " STR "\n", \
- (unsigned long long)(_verify3_left), \
- (unsigned long long)(_verify3_right), \
+ (unsigned long long)_verify3_left, \
+ (unsigned long long)_verify3_right, \
__VA_ARGS__); \
} while (0)
@@ -226,32 +225,27 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
const uintptr_t _verify3_right = (uintptr_t)(RIGHT); \
if (unlikely(!(_verify3_left OP _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \
+ "VERIFY3P(" #LEFT ", " #OP ", " #RIGHT ") " \
"failed (%px " #OP " %px) " STR "\n", \
- (void *) (_verify3_left), \
- (void *) (_verify3_right), \
+ (void *)_verify3_left, (void *)_verify3_right, \
__VA_ARGS__); \
} while (0)
#define VERIFY0PF(RIGHT, STR, ...) do { \
- const uintptr_t _verify3_left = (uintptr_t)(0); \
const uintptr_t _verify3_right = (uintptr_t)(RIGHT); \
- if (unlikely(!(_verify3_left == _verify3_right))) \
+ if (unlikely(!(0 == _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY0(0 == " #RIGHT ") " \
- "failed (0 == %px) " STR "\n", \
- (long long) (_verify3_right), \
+ "VERIFY0P(" #RIGHT ") failed (%px) " STR "\n", \
+ (void *)_verify3_right, \
__VA_ARGS__); \
} while (0)
#define VERIFY0F(RIGHT, STR, ...) do { \
- const int64_t _verify3_left = (int64_t)(0); \
const int64_t _verify3_right = (int64_t)(RIGHT); \
- if (unlikely(!(_verify3_left == _verify3_right))) \
+ if (unlikely(!(0 == _verify3_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
- "VERIFY0(0 == " #RIGHT ") " \
- "failed (0 == %lld) " STR "\n", \
- (long long) (_verify3_right), \
+ "VERIFY0(" #RIGHT ") failed (%lld) " STR "\n", \
+ (long long)_verify3_right, \
__VA_ARGS__); \
} while (0)
@@ -260,10 +254,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
spl_assert("(" #A ") implies (" #B ")", \
__FILE__, __FUNCTION__, __LINE__)))
-#define VERIFY_EQUIV(A, B) \
- ((void)(likely(!!(A) == !!(B)) || \
- spl_assert("(" #A ") is equivalent to (" #B ")", \
- __FILE__, __FUNCTION__, __LINE__)))
+#define VERIFY_EQUIV(A, B) VERIFY3B(A, ==, B)
/*
* Debugging disabled (--disable-debug)
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h
index 995236117dd4..fe34de9c179e 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h
@@ -61,7 +61,7 @@ void *spl_kvmalloc(size_t size, gfp_t flags);
/*
* Convert a KM_* flags mask to its Linux GFP_* counterpart. The conversion
* function is context aware which means that KM_SLEEP allocations can be
- * safely used in syncing contexts which have set PF_FSTRANS.
+ * safely used in syncing contexts which have set SPL_FSTRANS.
*/
static inline gfp_t
kmem_flags_convert(int flags)
@@ -91,25 +91,11 @@ typedef struct {
} fstrans_cookie_t;
/*
- * Introduced in Linux 3.9, however this cannot be solely relied on before
- * Linux 3.18 as it doesn't turn off __GFP_FS as it should.
+ * SPL_FSTRANS is the set of flags that indicate that the task is in a
+ * filesystem or IO codepath, and so any allocation must not call back into
+ * those codepaths (eg to swap).
*/
-#ifdef PF_MEMALLOC_NOIO
-#define __SPL_PF_MEMALLOC_NOIO (PF_MEMALLOC_NOIO)
-#else
-#define __SPL_PF_MEMALLOC_NOIO (0)
-#endif
-
-/*
- * PF_FSTRANS is removed from Linux 4.12
- */
-#ifdef PF_FSTRANS
-#define __SPL_PF_FSTRANS (PF_FSTRANS)
-#else
-#define __SPL_PF_FSTRANS (0)
-#endif
-
-#define SPL_FSTRANS (__SPL_PF_FSTRANS|__SPL_PF_MEMALLOC_NOIO)
+#define SPL_FSTRANS (PF_MEMALLOC_NOIO)
static inline fstrans_cookie_t
spl_fstrans_mark(void)
@@ -141,43 +127,8 @@ spl_fstrans_check(void)
return (current->flags & SPL_FSTRANS);
}
-/*
- * specifically used to check PF_FSTRANS flag, cannot be relied on for
- * checking spl_fstrans_mark().
- */
-static inline int
-__spl_pf_fstrans_check(void)
-{
- return (current->flags & __SPL_PF_FSTRANS);
-}
-
-/*
- * Kernel compatibility for GFP flags
- */
-/* < 4.13 */
-#ifndef __GFP_RETRY_MAYFAIL
-#define __GFP_RETRY_MAYFAIL __GFP_REPEAT
-#endif
-/* < 4.4 */
-#ifndef __GFP_RECLAIM
-#define __GFP_RECLAIM __GFP_WAIT
-#endif
-
-#ifdef HAVE_ATOMIC64_T
-#define kmem_alloc_used_add(size) atomic64_add(size, &kmem_alloc_used)
-#define kmem_alloc_used_sub(size) atomic64_sub(size, &kmem_alloc_used)
-#define kmem_alloc_used_read() atomic64_read(&kmem_alloc_used)
-#define kmem_alloc_used_set(size) atomic64_set(&kmem_alloc_used, size)
extern atomic64_t kmem_alloc_used;
-extern unsigned long long kmem_alloc_max;
-#else /* HAVE_ATOMIC64_T */
-#define kmem_alloc_used_add(size) atomic_add(size, &kmem_alloc_used)
-#define kmem_alloc_used_sub(size) atomic_sub(size, &kmem_alloc_used)
-#define kmem_alloc_used_read() atomic_read(&kmem_alloc_used)
-#define kmem_alloc_used_set(size) atomic_set(&kmem_alloc_used, size)
-extern atomic_t kmem_alloc_used;
-extern unsigned long long kmem_alloc_max;
-#endif /* HAVE_ATOMIC64_T */
+extern uint64_t kmem_alloc_max;
extern unsigned int spl_kmem_alloc_warn;
extern unsigned int spl_kmem_alloc_max;
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/misc.h b/sys/contrib/openzfs/include/os/linux/spl/sys/misc.h
index 0b44786f8a6e..fbaaf229bd1a 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/misc.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/misc.h
@@ -24,7 +24,13 @@
#define _OS_LINUX_SPL_MISC_H
#include <linux/kobject.h>
+#include <linux/swap.h>
extern void spl_signal_kobj_evt(struct block_device *bdev);
+/*
+ * Check if the current thread is a memory reclaim thread.
+ */
+extern int current_is_reclaim_thread(void);
+
#endif
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/mod_os.h b/sys/contrib/openzfs/include/os/linux/spl/sys/mod.h
index eaeb9255039e..eaeb9255039e 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/mod_os.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/mod.h
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h b/sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h
index f000f53ab9b6..4eca2414fc5b 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h
@@ -111,7 +111,7 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \
#undef mutex_destroy
#define mutex_destroy(mp) \
{ \
- VERIFY3P(mutex_owner(mp), ==, NULL); \
+ VERIFY0P(mutex_owner(mp)); \
}
#define mutex_tryenter(mp) \
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h b/sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h
index 563e0a19663d..c883836c2f83 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h
@@ -130,7 +130,7 @@ RW_READ_HELD(krwlock_t *rwp)
/*
* The Linux rwsem implementation does not require a matching destroy.
*/
-#define rw_destroy(rwp) ((void) 0)
+#define rw_destroy(rwp) ASSERT(!(RW_LOCK_HELD(rwp)))
/*
* Upgrading a rwsem from a reader to a writer is not supported by the
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h b/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h
index 087389b57b34..ad2815e46394 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/stat.h
@@ -25,6 +25,6 @@
#ifndef _SPL_STAT_H
#define _SPL_STAT_H
-#include <linux/stat.h>
+#include <sys/stat.h>
#endif /* SPL_STAT_H */
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h b/sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h
index e932ea72f1be..db48222b712a 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h
@@ -92,8 +92,10 @@
* Treat shim tasks as SCHED_NORMAL tasks
*/
#define minclsyspri (MAX_PRIO-1)
-#define maxclsyspri (MAX_RT_PRIO)
#define defclsyspri (DEFAULT_PRIO)
+/* Write issue taskq priority. */
+#define wtqclsyspri (MAX_RT_PRIO + 1)
+#define maxclsyspri (MAX_RT_PRIO)
#ifndef NICE_TO_PRIO
#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/time.h b/sys/contrib/openzfs/include/os/linux/spl/sys/time.h
index 33b273b53996..4edc42a8aef9 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/time.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/time.h
@@ -80,6 +80,14 @@ gethrestime_sec(void)
}
static inline hrtime_t
+getlrtime(void)
+{
+ inode_timespec_t ts;
+ ktime_get_coarse_ts64(&ts);
+ return (((hrtime_t)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec);
+}
+
+static inline hrtime_t
gethrtime(void)
{
struct timespec64 ts;
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h b/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h
index f66da5d5af57..26c2c387caa3 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h
@@ -174,7 +174,7 @@ zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq)
static inline void
zfs_uio_iov_iter_init(zfs_uio_t *uio, struct iov_iter *iter, offset_t offset,
- ssize_t resid, size_t skip)
+ ssize_t resid)
{
uio->uio_iter = iter;
uio->uio_iovcnt = iter->nr_segs;
@@ -184,7 +184,7 @@ zfs_uio_iov_iter_init(zfs_uio_t *uio, struct iov_iter *iter, offset_t offset,
uio->uio_fmode = 0;
uio->uio_extflg = 0;
uio->uio_resid = resid;
- uio->uio_skip = skip;
+ uio->uio_skip = 0;
uio->uio_soffset = uio->uio_loffset;
memset(&uio->uio_dio, 0, sizeof (zfs_uio_dio_t));
}
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/policy.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/policy.h
index 77d0cdef5d2f..8fa6ab01d1ad 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/policy.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/policy.h
@@ -52,7 +52,6 @@ int secpolicy_vnode_setids_setgids(const cred_t *, gid_t, zidmap_t *,
struct user_namespace *);
int secpolicy_zinject(const cred_t *);
int secpolicy_zfs(const cred_t *);
-int secpolicy_zfs_proc(const cred_t *, proc_t *);
void secpolicy_setid_clear(vattr_t *, cred_t *);
int secpolicy_setid_setsticky_clear(struct inode *, vattr_t *,
const vattr_t *, cred_t *, zidmap_t *, struct user_namespace *);
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h
index 8923657daf02..d88b4937ef08 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h
@@ -59,8 +59,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__field(uint64_t, z_size)
__field(uint64_t, z_pflags)
__field(uint32_t, z_sync_cnt)
- __field(uint32_t, z_sync_writes_cnt)
- __field(uint32_t, z_async_writes_cnt)
__field(mode_t, z_mode)
__field(boolean_t, z_is_sa)
__field(boolean_t, z_is_ctldir)
@@ -92,8 +90,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__entry->z_size = zn->z_size;
__entry->z_pflags = zn->z_pflags;
__entry->z_sync_cnt = zn->z_sync_cnt;
- __entry->z_sync_writes_cnt = zn->z_sync_writes_cnt;
- __entry->z_async_writes_cnt = zn->z_async_writes_cnt;
__entry->z_mode = zn->z_mode;
__entry->z_is_sa = zn->z_is_sa;
__entry->z_is_ctldir = zn->z_is_ctldir;
@@ -117,7 +113,7 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
TP_printk("zn { id %llu unlinked %u atime_dirty %u "
"zn_prefetch %u blksz %u seq %u "
"mapcnt %llu size %llu pflags %llu "
- "sync_cnt %u sync_writes_cnt %u async_writes_cnt %u "
+ "sync_cnt %u "
"mode 0x%x is_sa %d is_ctldir %d "
"inode { uid %u gid %u ino %lu nlink %u size %lli "
"blkbits %u bytes %u mode 0x%x generation %x } } "
@@ -126,7 +122,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__entry->z_zn_prefetch, __entry->z_blksz,
__entry->z_seq, __entry->z_mapcnt, __entry->z_size,
__entry->z_pflags, __entry->z_sync_cnt,
- __entry->z_sync_writes_cnt, __entry->z_async_writes_cnt,
__entry->z_mode, __entry->z_is_sa, __entry->z_is_ctldir,
__entry->i_uid, __entry->i_gid, __entry->i_ino, __entry->i_nlink,
__entry->i_size, __entry->i_blkbits,
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_common.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_common.h
index 85cf8cc20b09..e1b6d61099b9 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_common.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_common.h
@@ -45,7 +45,7 @@
__field(zio_flag_t, zio_orig_flags) \
__field(enum zio_stage, zio_orig_stage) \
__field(enum zio_stage, zio_orig_pipeline) \
- __field(uint8_t, zio_reexecute) \
+ __field(uint8_t, zio_post) \
__field(uint64_t, zio_txg) \
__field(int, zio_error) \
__field(uint64_t, zio_ena) \
@@ -74,7 +74,7 @@
__entry->zio_orig_flags = zio->io_orig_flags; \
__entry->zio_orig_stage = zio->io_orig_stage; \
__entry->zio_orig_pipeline = zio->io_orig_pipeline; \
- __entry->zio_reexecute = zio->io_reexecute; \
+ __entry->zio_post = zio->io_post; \
__entry->zio_txg = zio->io_txg; \
__entry->zio_error = zio->io_error; \
__entry->zio_ena = zio->io_ena; \
@@ -92,7 +92,7 @@
"zio { type %u prio %u size %llu orig_size %llu " \
"offset %llu timestamp %llu delta %llu delay %llu " \
"flags 0x%llx stage 0x%x pipeline 0x%x orig_flags 0x%llx " \
- "orig_stage 0x%x orig_pipeline 0x%x reexecute %u " \
+ "orig_stage 0x%x orig_pipeline 0x%x post %u " \
"txg %llu error %d ena %llu prop { checksum %u compress %u " \
"type %u level %u copies %u dedup %u dedup_verify %u nopwrite %u } }"
@@ -102,7 +102,7 @@
__entry->zio_timestamp, __entry->zio_delta, __entry->zio_delay, \
__entry->zio_flags, __entry->zio_stage, __entry->zio_pipeline, \
__entry->zio_orig_flags, __entry->zio_orig_stage, \
- __entry->zio_orig_pipeline, __entry->zio_reexecute, \
+ __entry->zio_orig_pipeline, __entry->zio_post, \
__entry->zio_txg, __entry->zio_error, __entry->zio_ena, \
__entry->zp_checksum, __entry->zp_compress, __entry->zp_type, \
__entry->zp_level, __entry->zp_copies, __entry->zp_dedup, \
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h
index 955462c85d10..e34ea46b3fe8 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h
@@ -139,18 +139,18 @@
#define ZCW_TP_STRUCT_ENTRY \
__field(lwb_t *, zcw_lwb) \
__field(boolean_t, zcw_done) \
- __field(int, zcw_zio_error) \
+ __field(int, zcw_error) \
#define ZCW_TP_FAST_ASSIGN \
__entry->zcw_lwb = zcw->zcw_lwb; \
__entry->zcw_done = zcw->zcw_done; \
- __entry->zcw_zio_error = zcw->zcw_zio_error;
+ __entry->zcw_error = zcw->zcw_error;
#define ZCW_TP_PRINTK_FMT \
"zcw { lwb %p done %u error %u }"
#define ZCW_TP_PRINTK_ARGS \
- __entry->zcw_lwb, __entry->zcw_done, __entry->zcw_zio_error
+ __entry->zcw_lwb, __entry->zcw_done, __entry->zcw_error
/*
* Generic support for two argument tracepoints of the form:
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
index 4a73712e959d..ab46d5f8ca08 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
@@ -131,6 +131,12 @@ struct zfsvfs {
uint64_t z_groupobjquota_obj;
uint64_t z_projectquota_obj;
uint64_t z_projectobjquota_obj;
+ uint64_t z_defaultuserquota;
+ uint64_t z_defaultgroupquota;
+ uint64_t z_defaultprojectquota;
+ uint64_t z_defaultuserobjquota;
+ uint64_t z_defaultgroupobjquota;
+ uint64_t z_defaultprojectobjquota;
uint64_t z_replay_eof; /* New end of file - replay only */
sa_attr_type_t *z_attr_table; /* SA attr mapping->id */
uint64_t z_hold_size; /* znode hold array size */
@@ -250,6 +256,8 @@ extern int zfs_prune(struct super_block *sb, unsigned long nr_to_scan,
int *objects);
extern int zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop,
uint64_t *val, char *setpoint);
+extern int zfs_set_default_quota(zfsvfs_t *zfsvfs, zfs_prop_t zfs_prop,
+ uint64_t quota);
#ifdef __cplusplus
}
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h
index b38847b20462..6a77e40abe10 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h
@@ -157,6 +157,7 @@ struct znode;
extern int zfs_sync(struct super_block *, int, cred_t *);
extern int zfs_inode_alloc(struct super_block *, struct inode **ip);
+extern void zfs_inode_free(struct inode *);
extern void zfs_inode_destroy(struct inode *);
extern void zfs_mark_inode_dirty(struct inode *);
extern boolean_t zfs_relatime_need_update(const struct inode *);
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h
index 39f1310aadf2..8994aab889fe 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h
@@ -55,6 +55,7 @@ extern const struct file_operations zpl_dir_file_operations;
extern void zpl_prune_sb(uint64_t nr_to_scan, void *arg);
extern const struct super_operations zpl_super_operations;
+extern const struct dentry_operations zpl_dentry_operations;
extern const struct export_operations zpl_export_operations;
extern struct file_system_type zpl_fs_type;
@@ -123,41 +124,6 @@ extern int zpl_clone_file_range(struct file *src_file, loff_t src_off,
extern int zpl_dedupe_file_range(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, uint64_t len);
-/* compat for FICLONE/FICLONERANGE/FIDEDUPERANGE ioctls */
-typedef struct {
- int64_t fcr_src_fd;
- uint64_t fcr_src_offset;
- uint64_t fcr_src_length;
- uint64_t fcr_dest_offset;
-} zfs_ioc_compat_file_clone_range_t;
-
-typedef struct {
- int64_t fdri_dest_fd;
- uint64_t fdri_dest_offset;
- uint64_t fdri_bytes_deduped;
- int32_t fdri_status;
- uint32_t fdri_reserved;
-} zfs_ioc_compat_dedupe_range_info_t;
-
-typedef struct {
- uint64_t fdr_src_offset;
- uint64_t fdr_src_length;
- uint16_t fdr_dest_count;
- uint16_t fdr_reserved1;
- uint32_t fdr_reserved2;
- zfs_ioc_compat_dedupe_range_info_t fdr_info[];
-} zfs_ioc_compat_dedupe_range_t;
-
-#define ZFS_IOC_COMPAT_FICLONE _IOW(0x94, 9, int)
-#define ZFS_IOC_COMPAT_FICLONERANGE \
- _IOW(0x94, 13, zfs_ioc_compat_file_clone_range_t)
-#define ZFS_IOC_COMPAT_FIDEDUPERANGE \
- _IOWR(0x94, 54, zfs_ioc_compat_dedupe_range_t)
-
-extern long zpl_ioctl_ficlone(struct file *filp, void *arg);
-extern long zpl_ioctl_ficlonerange(struct file *filp, void *arg);
-extern long zpl_ioctl_fideduperange(struct file *filp, void *arg);
-
#if defined(HAVE_INODE_TIMESTAMP_TRUNCATE)
#define zpl_inode_timestamp_truncate(ts, ip) timestamp_truncate(ts, ip)
diff --git a/sys/contrib/openzfs/include/sys/arc_impl.h b/sys/contrib/openzfs/include/sys/arc_impl.h
index 1b30389107c5..b55d5da3378c 100644
--- a/sys/contrib/openzfs/include/sys/arc_impl.h
+++ b/sys/contrib/openzfs/include/sys/arc_impl.h
@@ -954,7 +954,7 @@ typedef struct arc_sums {
wmsum_t arcstat_data_size;
wmsum_t arcstat_metadata_size;
wmsum_t arcstat_dbuf_size;
- wmsum_t arcstat_dnode_size;
+ aggsum_t arcstat_dnode_size;
wmsum_t arcstat_bonus_size;
wmsum_t arcstat_l2_hits;
wmsum_t arcstat_l2_misses;
diff --git a/sys/contrib/openzfs/include/sys/dbuf.h b/sys/contrib/openzfs/include/sys/dbuf.h
index 285e02484c57..baf3b1508335 100644
--- a/sys/contrib/openzfs/include/sys/dbuf.h
+++ b/sys/contrib/openzfs/include/sys/dbuf.h
@@ -46,20 +46,6 @@ extern "C" {
#define IN_DMU_SYNC 2
/*
- * define flags for dbuf_read
- */
-
-#define DB_RF_MUST_SUCCEED (1 << 0)
-#define DB_RF_CANFAIL (1 << 1)
-#define DB_RF_HAVESTRUCT (1 << 2)
-#define DB_RF_NOPREFETCH (1 << 3)
-#define DB_RF_NEVERWAIT (1 << 4)
-#define DB_RF_CACHED (1 << 5)
-#define DB_RF_NO_DECRYPT (1 << 6)
-#define DB_RF_PARTIAL_FIRST (1 << 7)
-#define DB_RF_PARTIAL_MORE (1 << 8)
-
-/*
* The simplified state transition diagram for dbufs looks like:
*
* +-------> READ ------+
@@ -178,6 +164,7 @@ typedef struct dbuf_dirty_record {
boolean_t dr_nopwrite;
boolean_t dr_brtwrite;
boolean_t dr_diowrite;
+ boolean_t dr_rewrite;
boolean_t dr_has_raw_params;
/* Override and raw params are mutually exclusive. */
@@ -389,19 +376,21 @@ void dbuf_rele_and_unlock(dmu_buf_impl_t *db, const void *tag,
dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level,
uint64_t blkid, uint64_t *hash_out);
-int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
+int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, dmu_flags_t flags);
void dmu_buf_will_clone_or_dio(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail);
+void dmu_buf_will_fill_flags(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail,
+ dmu_flags_t flags);
boolean_t dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx, boolean_t failed);
-void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
+void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx,
+ dmu_flags_t flags);
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid,
dmu_tx_t *tx);
boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
int dmu_buf_get_bp_from_dbuf(dmu_buf_impl_t *db, blkptr_t **bp);
int dmu_buf_untransform_direct(dmu_buf_impl_t *db, spa_t *spa);
-arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db);
void dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
bp_embedded_type_t etype, enum zio_compress comp,
int uncompressed_size, int compressed_size, int byteorder, dmu_tx_t *tx);
@@ -447,6 +436,7 @@ int dbuf_dnode_findbp(dnode_t *dn, uint64_t level, uint64_t blkid,
void dbuf_init(void);
void dbuf_fini(void);
+void dbuf_cache_reduce_target_size(void);
boolean_t dbuf_is_metadata(dmu_buf_impl_t *db);
@@ -476,10 +466,10 @@ dbuf_find_dirty_eq(dmu_buf_impl_t *db, uint64_t txg)
#define DBUF_GET_BUFC_TYPE(_db) \
(dbuf_is_metadata(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
-#define DBUF_IS_CACHEABLE(_db) \
+#define DBUF_IS_CACHEABLE(_db) (!(_db)->db_pending_evict && \
((_db)->db_objset->os_primary_cache == ZFS_CACHE_ALL || \
(dbuf_is_metadata(_db) && \
- ((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
+ ((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA))))
boolean_t dbuf_is_l2cacheable(dmu_buf_impl_t *db, blkptr_t *db_bp);
diff --git a/sys/contrib/openzfs/include/sys/ddt.h b/sys/contrib/openzfs/include/sys/ddt.h
index 8f2cc9c5a99b..f1687d471a0a 100644
--- a/sys/contrib/openzfs/include/sys/ddt.h
+++ b/sys/contrib/openzfs/include/sys/ddt.h
@@ -339,6 +339,8 @@ extern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk,
extern void ddt_phys_extend(ddt_univ_phys_t *ddp, ddt_phys_variant_t v,
const blkptr_t *bp);
+extern void ddt_phys_unextend(ddt_univ_phys_t *cur, ddt_univ_phys_t *orig,
+ ddt_phys_variant_t v);
extern void ddt_phys_copy(ddt_univ_phys_t *dst, const ddt_univ_phys_t *src,
ddt_phys_variant_t v);
extern void ddt_phys_clear(ddt_univ_phys_t *ddp, ddt_phys_variant_t v);
@@ -350,6 +352,8 @@ extern ddt_phys_variant_t ddt_phys_select(const ddt_t *ddt,
const ddt_entry_t *dde, const blkptr_t *bp);
extern uint64_t ddt_phys_birth(const ddt_univ_phys_t *ddp,
ddt_phys_variant_t v);
+extern int ddt_phys_is_gang(const ddt_univ_phys_t *ddp,
+ ddt_phys_variant_t v);
extern int ddt_phys_dva_count(const ddt_univ_phys_t *ddp, ddt_phys_variant_t v,
boolean_t encrypted);
diff --git a/sys/contrib/openzfs/include/sys/dmu.h b/sys/contrib/openzfs/include/sys/dmu.h
index da1fdfd23962..aa5035862def 100644
--- a/sys/contrib/openzfs/include/sys/dmu.h
+++ b/sys/contrib/openzfs/include/sys/dmu.h
@@ -144,9 +144,9 @@ typedef enum dmu_object_byteswap {
#define DMU_OT_IS_DDT(ot) \
((ot) == DMU_OT_DDT_ZAP)
-#define DMU_OT_IS_CRITICAL(ot) \
+#define DMU_OT_IS_CRITICAL(ot, level) \
(DMU_OT_IS_METADATA(ot) && \
- (ot) != DMU_OT_DNODE && \
+ ((ot) != DMU_OT_DNODE || (level) > 0) && \
(ot) != DMU_OT_DIRECTORY_CONTENTS && \
(ot) != DMU_OT_SA)
@@ -281,9 +281,30 @@ typedef enum dmu_object_type {
* the transaction is full. See the comment above dmu_tx_assign() for more
* details on the meaning of these flags.
*/
-#define DMU_TX_NOWAIT (0ULL)
-#define DMU_TX_WAIT (1ULL<<0)
-#define DMU_TX_NOTHROTTLE (1ULL<<1)
+typedef enum {
+ /*
+ * If the tx cannot be assigned to a transaction for any reason, do
+ * not block but return immediately.
+ */
+ DMU_TX_NOWAIT = 0,
+
+ /*
+ * Assign the tx to the open transaction. If the open transaction is
+ * full, or the write throttle is active, block until the next
+ * transaction and try again. If the pool suspends while waiting
+ * and failmode=continue, return an error.
+ */
+ DMU_TX_WAIT = (1 << 0),
+
+ /* If the write throttle would prevent the assignment, ignore it. */
+ DMU_TX_NOTHROTTLE = (1 << 1),
+
+ /*
+ * With DMU_TX_WAIT, always block if the pool suspends during
+ * assignment, regardless of the value of the failmode= property.
+ */
+ DMU_TX_SUSPEND = (1 << 2),
+} dmu_tx_flag_t;
void byteswap_uint64_array(void *buf, size_t size);
void byteswap_uint32_array(void *buf, size_t size);
@@ -339,7 +360,6 @@ void dmu_objset_evict_dbufs(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
struct dsl_crypto_params *dcp, dmu_objset_create_sync_func_t func,
void *arg);
-int dmu_objset_clone(const char *name, const char *origin);
int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
struct nvlist *errlist);
int dmu_objset_snapshot_one(const char *fsname, const char *snapname);
@@ -394,6 +414,9 @@ typedef struct dmu_buf {
#define DMU_POOL_ZPOOL_CHECKPOINT "com.delphix:zpool_checkpoint"
#define DMU_POOL_LOG_SPACEMAP_ZAP "com.delphix:log_spacemap_zap"
#define DMU_POOL_DELETED_CLONES "com.delphix:deleted_clones"
+#define DMU_POOL_TXG_LOG_TIME_MINUTES "com.klarasystems:txg_log_time:minutes"
+#define DMU_POOL_TXG_LOG_TIME_DAYS "com.klarasystems:txg_log_time:days"
+#define DMU_POOL_TXG_LOG_TIME_MONTHS "com.klarasystems:txg_log_time:months"
/*
* Allocate an object from this objset. The range of object numbers
@@ -533,6 +556,26 @@ void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
struct zio_prop *zp);
/*
+ * DB_RF_* are to be used for dbuf_read() or in limited other cases.
+ */
+typedef enum dmu_flags {
+ DB_RF_MUST_SUCCEED = 0, /* Suspend on I/O errors. */
+ DB_RF_CANFAIL = 1 << 0, /* Return on I/O errors. */
+ DB_RF_HAVESTRUCT = 1 << 1, /* dn_struct_rwlock is locked. */
+ DB_RF_NEVERWAIT = 1 << 2,
+ DMU_READ_PREFETCH = 0, /* Try speculative prefetch. */
+ DMU_READ_NO_PREFETCH = 1 << 3, /* Don't prefetch speculatively. */
+ DB_RF_NOPREFETCH = DMU_READ_NO_PREFETCH,
+ DMU_READ_NO_DECRYPT = 1 << 4, /* Don't decrypt. */
+ DB_RF_NO_DECRYPT = DMU_READ_NO_DECRYPT,
+ DMU_DIRECTIO = 1 << 5, /* Bypass ARC. */
+ DMU_UNCACHEDIO = 1 << 6, /* Reduce caching. */
+ DMU_PARTIAL_FIRST = 1 << 7, /* First partial access. */
+ DMU_PARTIAL_MORE = 1 << 8, /* Following partial access. */
+ DMU_KEEP_CACHING = 1 << 9, /* Don't affect caching. */
+} dmu_flags_t;
+
+/*
* The bonus data is accessed more or less like a regular buffer.
* You must dmu_bonus_hold() to get the buffer, which will give you a
* dmu_buf_t with db_offset==-1ULL, and db_size = the size of the bonus
@@ -547,7 +590,7 @@ void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
int dmu_bonus_hold(objset_t *os, uint64_t object, const void *tag,
dmu_buf_t **dbp);
int dmu_bonus_hold_by_dnode(dnode_t *dn, const void *tag, dmu_buf_t **dbp,
- uint32_t flags);
+ dmu_flags_t flags);
int dmu_bonus_max(void);
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
int dmu_set_bonustype(dmu_buf_t *, dmu_object_type_t, dmu_tx_t *);
@@ -558,9 +601,9 @@ int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *);
* Special spill buffer support used by "SA" framework
*/
-int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, const void *tag,
- dmu_buf_t **dbp);
-int dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags,
+int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, dmu_flags_t flags,
+ const void *tag, dmu_buf_t **dbp);
+int dmu_spill_hold_by_dnode(dnode_t *dn, dmu_flags_t flags,
const void *tag, dmu_buf_t **dbp);
int dmu_spill_hold_existing(dmu_buf_t *bonus, const void *tag, dmu_buf_t **dbp);
@@ -579,17 +622,17 @@ int dmu_spill_hold_existing(dmu_buf_t *bonus, const void *tag, dmu_buf_t **dbp);
* The object number must be a valid, allocated object number.
*/
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
- const void *tag, dmu_buf_t **, int flags);
+ const void *tag, dmu_buf_t **, dmu_flags_t flags);
int dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
uint64_t length, int read, const void *tag, int *numbufsp,
dmu_buf_t ***dbpp);
int dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
const void *tag, dmu_buf_t **dbp);
int dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
- const void *tag, dmu_buf_t **dbp, int flags);
+ const void *tag, dmu_buf_t **dbp, dmu_flags_t flags);
int dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
uint64_t length, boolean_t read, const void *tag, int *numbufsp,
- dmu_buf_t ***dbpp, uint32_t flags);
+ dmu_buf_t ***dbpp, dmu_flags_t flags);
int dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset, const void *tag,
dmu_buf_t **dbp);
@@ -699,8 +742,8 @@ dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func_sync,
dmu_buf_evict_func_t *evict_func_async,
dmu_buf_t **clear_on_evict_dbufp __maybe_unused)
{
- ASSERT(dbu->dbu_evict_func_sync == NULL);
- ASSERT(dbu->dbu_evict_func_async == NULL);
+ ASSERT0P(dbu->dbu_evict_func_sync);
+ ASSERT0P(dbu->dbu_evict_func_async);
/* must have at least one evict func */
IMPLY(evict_func_sync == NULL, evict_func_async != NULL);
@@ -781,6 +824,8 @@ struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db);
* (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
*/
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
+void dmu_buf_will_dirty_flags(dmu_buf_t *db, dmu_tx_t *tx, dmu_flags_t flags);
+void dmu_buf_will_rewrite(dmu_buf_t *db, dmu_tx_t *tx);
boolean_t dmu_buf_is_dirty(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder,
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
@@ -814,7 +859,7 @@ void dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
void dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
int len);
void dmu_tx_hold_clone_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
- int len);
+ uint64_t len, uint_t blksz);
void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
uint64_t len);
void dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
@@ -828,7 +873,7 @@ void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object);
void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
void dmu_tx_abort(dmu_tx_t *tx);
-int dmu_tx_assign(dmu_tx_t *tx, uint64_t flags);
+int dmu_tx_assign(dmu_tx_t *tx, dmu_tx_flag_t flags);
void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_commit(dmu_tx_t *tx);
void dmu_tx_mark_netfree(dmu_tx_t *tx);
@@ -874,40 +919,36 @@ int dmu_free_long_object(objset_t *os, uint64_t object);
* Canfail routines will return 0 on success, or an errno if there is a
* nonrecoverable I/O error.
*/
-#define DMU_READ_PREFETCH 0 /* prefetch */
-#define DMU_READ_NO_PREFETCH 1 /* don't prefetch */
-#define DMU_READ_NO_DECRYPT 2 /* don't decrypt */
-#define DMU_DIRECTIO 4 /* use Direct I/O */
-
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
- void *buf, uint32_t flags);
+ void *buf, dmu_flags_t flags);
int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf,
- uint32_t flags);
+ dmu_flags_t flags);
void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
const void *buf, dmu_tx_t *tx);
int dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size,
- const void *buf, dmu_tx_t *tx);
-int dmu_write_by_dnode_flags(dnode_t *dn, uint64_t offset, uint64_t size,
- const void *buf, dmu_tx_t *tx, uint32_t flags);
+ const void *buf, dmu_tx_t *tx, dmu_flags_t flags);
void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
dmu_tx_t *tx);
#ifdef _KERNEL
-int dmu_read_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size);
-int dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size);
-int dmu_read_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size);
+int dmu_read_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size,
+ dmu_flags_t flags);
+int dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size,
+ dmu_flags_t flags);
+int dmu_read_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size,
+ dmu_flags_t flags);
int dmu_write_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size,
- dmu_tx_t *tx);
+ dmu_tx_t *tx, dmu_flags_t flags);
int dmu_write_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size,
- dmu_tx_t *tx);
+ dmu_tx_t *tx, dmu_flags_t flags);
int dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size,
- dmu_tx_t *tx);
+ dmu_tx_t *tx, dmu_flags_t flags);
#endif
struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
void dmu_return_arcbuf(struct arc_buf *buf);
int dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset,
- struct arc_buf *buf, dmu_tx_t *tx);
+ struct arc_buf *buf, dmu_tx_t *tx, dmu_flags_t flags);
int dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset,
- struct arc_buf *buf, dmu_tx_t *tx);
+ struct arc_buf *buf, dmu_tx_t *tx, dmu_flags_t flags);
#define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf
extern uint_t zfs_max_recordsize;
@@ -980,6 +1021,11 @@ void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
void dmu_object_dnsize_from_db(dmu_buf_t *db, int *dnsize);
+typedef enum {
+ DDS_FLAG_ENCRYPTED = (1<<0),
+ DDS_FLAG_HAS_ENCRYPTED = (1<<7),
+} dmu_objset_flag_t;
+
typedef struct dmu_objset_stats {
uint64_t dds_num_clones; /* number of clones of this */
uint64_t dds_creation_txg;
@@ -989,6 +1035,7 @@ typedef struct dmu_objset_stats {
uint8_t dds_inconsistent;
uint8_t dds_redacted;
char dds_origin[ZFS_MAX_DATASET_NAME_LEN];
+ uint8_t dds_flags; /* dmu_objset_flag_t */
} dmu_objset_stats_t;
/*
diff --git a/sys/contrib/openzfs/include/sys/dmu_impl.h b/sys/contrib/openzfs/include/sys/dmu_impl.h
index dc2b66d06e7c..bae872bd1907 100644
--- a/sys/contrib/openzfs/include/sys/dmu_impl.h
+++ b/sys/contrib/openzfs/include/sys/dmu_impl.h
@@ -168,12 +168,10 @@ extern "C" {
* dn_allocated_txg
* dn_free_txg
* dn_assigned_txg
- * dn_dirty_txg
+ * dn_dirtycnt
* dd_assigned_tx
* dn_notxholds
* dn_nodnholds
- * dn_dirtyctx
- * dn_dirtyctx_firstset
* (dn_phys copy fields?)
* (dn_phys contents?)
* held from:
@@ -270,11 +268,13 @@ void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *);
int dmu_write_direct(zio_t *, dmu_buf_impl_t *, abd_t *, dmu_tx_t *);
-int dmu_read_abd(dnode_t *, uint64_t, uint64_t, abd_t *, uint32_t flags);
-int dmu_write_abd(dnode_t *, uint64_t, uint64_t, abd_t *, uint32_t, dmu_tx_t *);
+int dmu_read_abd(dnode_t *, uint64_t, uint64_t, abd_t *, dmu_flags_t);
+int dmu_write_abd(dnode_t *, uint64_t, uint64_t, abd_t *, dmu_flags_t,
+ dmu_tx_t *);
#if defined(_KERNEL)
-int dmu_read_uio_direct(dnode_t *, zfs_uio_t *, uint64_t);
-int dmu_write_uio_direct(dnode_t *, zfs_uio_t *, uint64_t, dmu_tx_t *);
+int dmu_read_uio_direct(dnode_t *, zfs_uio_t *, uint64_t, dmu_flags_t);
+int dmu_write_uio_direct(dnode_t *, zfs_uio_t *, uint64_t, dmu_flags_t,
+ dmu_tx_t *);
#endif
#ifdef __cplusplus
diff --git a/sys/contrib/openzfs/include/sys/dmu_objset.h b/sys/contrib/openzfs/include/sys/dmu_objset.h
index 288ad30166df..492be29200e4 100644
--- a/sys/contrib/openzfs/include/sys/dmu_objset.h
+++ b/sys/contrib/openzfs/include/sys/dmu_objset.h
@@ -152,7 +152,7 @@ struct objset {
* The largest zpl file block allowed in special class.
* cached here instead of zfsvfs for easier access.
*/
- int os_zpl_special_smallblock;
+ uint64_t os_zpl_special_smallblock;
/*
* Pointer is constant; the blkptr it points to is protected by
diff --git a/sys/contrib/openzfs/include/sys/dmu_recv.h b/sys/contrib/openzfs/include/sys/dmu_recv.h
index cd292d9244b0..ffb2b602d73f 100644
--- a/sys/contrib/openzfs/include/sys/dmu_recv.h
+++ b/sys/contrib/openzfs/include/sys/dmu_recv.h
@@ -60,7 +60,6 @@ typedef struct dmu_recv_cookie {
uint64_t drc_ivset_guid;
void *drc_owner;
cred_t *drc_cred;
- proc_t *drc_proc;
nvlist_t *drc_begin_nvl;
objset_t *drc_os;
diff --git a/sys/contrib/openzfs/include/sys/dmu_traverse.h b/sys/contrib/openzfs/include/sys/dmu_traverse.h
index 3196b2addeee..70cafa4c74f1 100644
--- a/sys/contrib/openzfs/include/sys/dmu_traverse.h
+++ b/sys/contrib/openzfs/include/sys/dmu_traverse.h
@@ -59,6 +59,13 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
*/
#define TRAVERSE_NO_DECRYPT (1<<5)
+/*
+ * Always use logical birth time for birth time comparisons. This is useful
+ * for operations that care about user data changes rather than physical
+ * block rewrites (e.g., incremental replication).
+ */
+#define TRAVERSE_LOGICAL (1<<6)
+
/* Special traverse error return value to indicate skipping of children */
#define TRAVERSE_VISIT_NO_CHILDREN -1
diff --git a/sys/contrib/openzfs/include/sys/dmu_tx.h b/sys/contrib/openzfs/include/sys/dmu_tx.h
index b87836ecc2d9..ce49a0c49044 100644
--- a/sys/contrib/openzfs/include/sys/dmu_tx.h
+++ b/sys/contrib/openzfs/include/sys/dmu_tx.h
@@ -25,6 +25,7 @@
*/
/*
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2025, Klara, Inc.
*/
#ifndef _SYS_DMU_TX_H
@@ -80,6 +81,9 @@ struct dmu_tx {
/* has this transaction already been delayed? */
boolean_t tx_dirty_delayed;
+ /* whether dmu_tx_wait() should return on suspend */
+ boolean_t tx_break_on_suspend;
+
int tx_err;
};
@@ -143,7 +147,7 @@ extern dmu_tx_stats_t dmu_tx_stats;
* These routines are defined in dmu.h, and are called by the user.
*/
dmu_tx_t *dmu_tx_create(objset_t *dd);
-int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
+int dmu_tx_assign(dmu_tx_t *tx, dmu_tx_flag_t flags);
void dmu_tx_commit(dmu_tx_t *tx);
void dmu_tx_abort(dmu_tx_t *tx);
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
diff --git a/sys/contrib/openzfs/include/sys/dmu_zfetch.h b/sys/contrib/openzfs/include/sys/dmu_zfetch.h
index 963e841a4882..a5ddd28026ce 100644
--- a/sys/contrib/openzfs/include/sys/dmu_zfetch.h
+++ b/sys/contrib/openzfs/include/sys/dmu_zfetch.h
@@ -81,9 +81,10 @@ void dmu_zfetch_init(zfetch_t *, struct dnode *);
void dmu_zfetch_fini(zfetch_t *);
zstream_t *dmu_zfetch_prepare(zfetch_t *, uint64_t, uint64_t, boolean_t,
boolean_t);
-void dmu_zfetch_run(zfetch_t *, zstream_t *, boolean_t, boolean_t);
-void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t, boolean_t,
+void dmu_zfetch_run(zfetch_t *, zstream_t *, boolean_t, boolean_t,
boolean_t);
+void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t, boolean_t,
+ boolean_t, boolean_t);
#ifdef __cplusplus
diff --git a/sys/contrib/openzfs/include/sys/dnode.h b/sys/contrib/openzfs/include/sys/dnode.h
index 76218c8b09ca..8bd1db5b7165 100644
--- a/sys/contrib/openzfs/include/sys/dnode.h
+++ b/sys/contrib/openzfs/include/sys/dnode.h
@@ -141,12 +141,6 @@ struct dmu_buf_impl;
struct objset;
struct zio;
-enum dnode_dirtycontext {
- DN_UNDIRTIED,
- DN_DIRTY_OPEN,
- DN_DIRTY_SYNC
-};
-
/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */
#define DNODE_FLAG_USED_BYTES (1 << 0)
#define DNODE_FLAG_USERUSED_ACCOUNTED (1 << 1)
@@ -340,11 +334,9 @@ struct dnode {
uint64_t dn_allocated_txg;
uint64_t dn_free_txg;
uint64_t dn_assigned_txg;
- uint64_t dn_dirty_txg; /* txg dnode was last dirtied */
+ uint8_t dn_dirtycnt;
kcondvar_t dn_notxholds;
kcondvar_t dn_nodnholds;
- enum dnode_dirtycontext dn_dirtyctx;
- const void *dn_dirtyctx_firstset; /* dbg: contents meaningless */
/* protected by own devices */
zfs_refcount_t dn_tx_holds;
@@ -440,7 +432,6 @@ void dnode_rele_and_unlock(dnode_t *dn, const void *tag, boolean_t evicting);
int dnode_try_claim(objset_t *os, uint64_t object, int slots);
boolean_t dnode_is_dirty(dnode_t *dn);
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
-void dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, const void *tag);
void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx);
@@ -468,9 +459,6 @@ void dnode_free_interior_slots(dnode_t *dn);
void dnode_set_storage_type(dnode_t *dn, dmu_object_type_t type);
-#define DNODE_IS_DIRTY(_dn) \
- ((_dn)->dn_dirty_txg >= spa_syncing_txg((_dn)->dn_objset->os_spa))
-
#define DNODE_LEVEL_IS_CACHEABLE(_dn, _level) \
((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \
(((_level) > 0 || DMU_OT_IS_METADATA((_dn)->dn_type)) && \
diff --git a/sys/contrib/openzfs/include/sys/dsl_dataset.h b/sys/contrib/openzfs/include/sys/dsl_dataset.h
index 624f3ddde9f0..2e1f9847f34c 100644
--- a/sys/contrib/openzfs/include/sys/dsl_dataset.h
+++ b/sys/contrib/openzfs/include/sys/dsl_dataset.h
@@ -276,6 +276,12 @@ dsl_dataset_phys(dsl_dataset_t *ds)
return ((dsl_dataset_phys_t *)ds->ds_dbuf->db_data);
}
+typedef struct dsl_dataset_clone_arg_t {
+ const char *ddca_clone;
+ const char *ddca_origin;
+ cred_t *ddca_cred;
+} dsl_dataset_clone_arg_t;
+
typedef struct dsl_dataset_promote_arg {
const char *ddpa_clonename;
dsl_dataset_t *ddpa_clone;
@@ -284,7 +290,6 @@ typedef struct dsl_dataset_promote_arg {
uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
nvlist_t *err_ds;
cred_t *cr;
- proc_t *proc;
} dsl_dataset_promote_arg_t;
typedef struct dsl_dataset_rollback_arg {
@@ -299,7 +304,6 @@ typedef struct dsl_dataset_snapshot_arg {
nvlist_t *ddsa_props;
nvlist_t *ddsa_errors;
cred_t *ddsa_cr;
- proc_t *ddsa_proc;
} dsl_dataset_snapshot_arg_t;
typedef struct dsl_dataset_rename_snapshot_arg {
@@ -366,6 +370,9 @@ uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
void dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx);
int dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx);
int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors);
+void dsl_dataset_clone_sync(void *arg, dmu_tx_t *tx);
+int dsl_dataset_clone_check(void *arg, dmu_tx_t *tx);
+int dsl_dataset_clone(const char *clone, const char *origin);
void dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx);
int dsl_dataset_promote_check(void *arg, dmu_tx_t *tx);
int dsl_dataset_promote(const char *name, char *conflsnap);
@@ -459,7 +466,7 @@ int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
void dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
dsl_dataset_t *origin_head, dmu_tx_t *tx);
int dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
- dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr, proc_t *proc);
+ dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr);
void dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
dmu_tx_t *tx);
diff --git a/sys/contrib/openzfs/include/sys/dsl_deleg.h b/sys/contrib/openzfs/include/sys/dsl_deleg.h
index ae729b9f32ff..36dd6211219d 100644
--- a/sys/contrib/openzfs/include/sys/dsl_deleg.h
+++ b/sys/contrib/openzfs/include/sys/dsl_deleg.h
@@ -46,6 +46,7 @@ extern "C" {
#define ZFS_DELEG_PERM_MOUNT "mount"
#define ZFS_DELEG_PERM_SHARE "share"
#define ZFS_DELEG_PERM_SEND "send"
+#define ZFS_DELEG_PERM_SEND_RAW "send:raw"
#define ZFS_DELEG_PERM_RECEIVE "receive"
#define ZFS_DELEG_PERM_RECEIVE_APPEND "receive:append"
#define ZFS_DELEG_PERM_ALLOW "allow"
diff --git a/sys/contrib/openzfs/include/sys/dsl_dir.h b/sys/contrib/openzfs/include/sys/dsl_dir.h
index 6135835fca48..d2e9e2282975 100644
--- a/sys/contrib/openzfs/include/sys/dsl_dir.h
+++ b/sys/contrib/openzfs/include/sys/dsl_dir.h
@@ -185,11 +185,11 @@ int dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
uint64_t reservation);
int dsl_dir_activate_fs_ss_limit(const char *);
int dsl_fs_ss_limit_check(dsl_dir_t *, uint64_t, zfs_prop_t, dsl_dir_t *,
- cred_t *, proc_t *);
+ cred_t *);
void dsl_fs_ss_count_adjust(dsl_dir_t *, int64_t, const char *, dmu_tx_t *);
int dsl_dir_rename(const char *oldname, const char *newname);
int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd,
- uint64_t fs_cnt, uint64_t ss_cnt, uint64_t space, cred_t *, proc_t *);
+ uint64_t fs_cnt, uint64_t ss_cnt, uint64_t space, cred_t *);
boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
diff --git a/sys/contrib/openzfs/include/sys/fm/fs/zfs.h b/sys/contrib/openzfs/include/sys/fm/fs/zfs.h
index 30ced55da138..a771b11420fd 100644
--- a/sys/contrib/openzfs/include/sys/fm/fs/zfs.h
+++ b/sys/contrib/openzfs/include/sys/fm/fs/zfs.h
@@ -58,6 +58,7 @@ extern "C" {
#define FM_EREPORT_ZFS_PROBE_FAILURE "probe_failure"
#define FM_EREPORT_ZFS_LOG_REPLAY "log_replay"
#define FM_EREPORT_ZFS_CONFIG_CACHE_WRITE "config_cache_write"
+#define FM_EREPORT_ZFS_SITOUT "sitout"
#define FM_EREPORT_PAYLOAD_ZFS_POOL "pool"
#define FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE "pool_failmode"
@@ -103,6 +104,7 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS "zio_flags"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE "zio_stage"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY "zio_priority"
+#define FM_EREPORT_PAYLOAD_ZFS_ZIO_TYPE "zio_type"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE "zio_pipeline"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY "zio_delay"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP "zio_timestamp"
diff --git a/sys/contrib/openzfs/include/sys/frame.h b/sys/contrib/openzfs/include/sys/frame.h
index dbcf1087bed8..fe1db28b7077 100644
--- a/sys/contrib/openzfs/include/sys/frame.h
+++ b/sys/contrib/openzfs/include/sys/frame.h
@@ -31,8 +31,16 @@ extern "C" {
#else
#include <linux/frame.h>
#endif
+#if defined(_ASM) && ! defined(HAVE_STACK_FRAME_NON_STANDARD_ASM)
+.macro STACK_FRAME_NON_STANDARD func:req
+.endm
+#endif
#else
#define STACK_FRAME_NON_STANDARD(func)
+#if defined(_ASM)
+.macro STACK_FRAME_NON_STANDARD func:req
+.endm
+#endif
#endif
#ifdef __cplusplus
diff --git a/sys/contrib/openzfs/include/sys/fs/zfs.h b/sys/contrib/openzfs/include/sys/fs/zfs.h
index 2d27aee217e0..662fd81c5ee1 100644
--- a/sys/contrib/openzfs/include/sys/fs/zfs.h
+++ b/sys/contrib/openzfs/include/sys/fs/zfs.h
@@ -197,6 +197,12 @@ typedef enum {
ZFS_PROP_VOLTHREADING,
ZFS_PROP_DIRECT,
ZFS_PROP_LONGNAME,
+ ZFS_PROP_DEFAULTUSERQUOTA,
+ ZFS_PROP_DEFAULTGROUPQUOTA,
+ ZFS_PROP_DEFAULTPROJECTQUOTA,
+ ZFS_PROP_DEFAULTUSEROBJQUOTA,
+ ZFS_PROP_DEFAULTGROUPOBJQUOTA,
+ ZFS_PROP_DEFAULTPROJECTOBJQUOTA,
ZFS_NUM_PROPS
} zfs_prop_t;
@@ -379,6 +385,8 @@ typedef enum {
VDEV_PROP_TRIM_SUPPORT,
VDEV_PROP_TRIM_ERRORS,
VDEV_PROP_SLOW_IOS,
+ VDEV_PROP_SIT_OUT,
+ VDEV_PROP_AUTOSIT,
VDEV_NUM_PROPS
} vdev_prop_t;
@@ -740,6 +748,8 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_METASLAB_SHIFT "metaslab_shift"
#define ZPOOL_CONFIG_ASHIFT "ashift"
#define ZPOOL_CONFIG_ASIZE "asize"
+#define ZPOOL_CONFIG_MIN_ALLOC "min_alloc"
+#define ZPOOL_CONFIG_MAX_ALLOC "max_alloc"
#define ZPOOL_CONFIG_DTL "DTL"
#define ZPOOL_CONFIG_SCAN_STATS "scan_stats" /* not stored on disk */
#define ZPOOL_CONFIG_REMOVAL_STATS "removal_stats" /* not stored on disk */
@@ -1614,6 +1624,18 @@ typedef enum zfs_ioc {
#endif
+typedef struct zfs_rewrite_args {
+ uint64_t off;
+ uint64_t len;
+ uint64_t flags;
+ uint64_t arg;
+} zfs_rewrite_args_t;
+
+/* zfs_rewrite_args flags */
+#define ZFS_REWRITE_PHYSICAL 0x1 /* Preserve logical birth time. */
+
+#define ZFS_IOC_REWRITE _IOW(0x83, 3, zfs_rewrite_args_t)
+
/*
* ZFS-specific error codes used for returning descriptive errors
* to the userland through zfs ioctls.
@@ -1655,6 +1677,7 @@ typedef enum {
ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS,
ZFS_ERR_ASHIFT_MISMATCH,
ZFS_ERR_STREAM_LARGE_MICROZAP,
+ ZFS_ERR_TOO_MANY_SITOUTS,
} zfs_errno_t;
/*
diff --git a/sys/contrib/openzfs/include/sys/metaslab.h b/sys/contrib/openzfs/include/sys/metaslab.h
index c0844dac9187..36cbe06bacce 100644
--- a/sys/contrib/openzfs/include/sys/metaslab.h
+++ b/sys/contrib/openzfs/include/sys/metaslab.h
@@ -41,7 +41,7 @@ extern "C" {
typedef struct metaslab_ops {
const char *msop_name;
- uint64_t (*msop_alloc)(metaslab_t *, uint64_t);
+ uint64_t (*msop_alloc)(metaslab_t *, uint64_t, uint64_t, uint64_t *);
} metaslab_ops_t;
@@ -81,9 +81,12 @@ uint64_t metaslab_largest_allocatable(metaslab_t *);
#define METASLAB_ASYNC_ALLOC 0x8
int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t, blkptr_t *, int,
- uint64_t, blkptr_t *, int, zio_alloc_list_t *, int, const void *);
+ uint64_t, const blkptr_t *, int, zio_alloc_list_t *, int, const void *);
+int metaslab_alloc_range(spa_t *, metaslab_class_t *, uint64_t, uint64_t,
+ blkptr_t *, int, uint64_t, const blkptr_t *, int, zio_alloc_list_t *,
+ int, const void *, uint64_t *);
int metaslab_alloc_dva(spa_t *, metaslab_class_t *, uint64_t,
- dva_t *, int, dva_t *, uint64_t, int, zio_alloc_list_t *, int);
+ dva_t *, int, const dva_t *, uint64_t, int, zio_alloc_list_t *, int);
void metaslab_free(spa_t *, const blkptr_t *, uint64_t, boolean_t);
void metaslab_free_concrete(vdev_t *, uint64_t, uint64_t, boolean_t);
void metaslab_free_dva(spa_t *, const dva_t *, boolean_t);
@@ -95,21 +98,24 @@ void metaslab_check_free(spa_t *, const blkptr_t *);
void metaslab_stat_init(void);
void metaslab_stat_fini(void);
+void metaslab_trace_move(zio_alloc_list_t *, zio_alloc_list_t *);
void metaslab_trace_init(zio_alloc_list_t *);
void metaslab_trace_fini(zio_alloc_list_t *);
-metaslab_class_t *metaslab_class_create(spa_t *, const metaslab_ops_t *,
- boolean_t);
+metaslab_class_t *metaslab_class_create(spa_t *, const char *,
+ const metaslab_ops_t *, boolean_t);
void metaslab_class_destroy(metaslab_class_t *);
void metaslab_class_validate(metaslab_class_t *);
void metaslab_class_balance(metaslab_class_t *mc, boolean_t onsync);
void metaslab_class_histogram_verify(metaslab_class_t *);
uint64_t metaslab_class_fragmentation(metaslab_class_t *);
uint64_t metaslab_class_expandable_space(metaslab_class_t *);
-boolean_t metaslab_class_throttle_reserve(metaslab_class_t *, int, zio_t *,
- boolean_t, boolean_t *);
-boolean_t metaslab_class_throttle_unreserve(metaslab_class_t *, int, zio_t *);
+boolean_t metaslab_class_throttle_reserve(metaslab_class_t *, int, int,
+ uint64_t, boolean_t, boolean_t *);
+boolean_t metaslab_class_throttle_unreserve(metaslab_class_t *, int, int,
+ uint64_t);
void metaslab_class_evict_old(metaslab_class_t *, uint64_t);
+const char *metaslab_class_get_name(metaslab_class_t *);
uint64_t metaslab_class_get_alloc(metaslab_class_t *);
uint64_t metaslab_class_get_space(metaslab_class_t *);
uint64_t metaslab_class_get_dspace(metaslab_class_t *);
@@ -118,7 +124,7 @@ uint64_t metaslab_class_get_deferred(metaslab_class_t *);
void metaslab_space_update(vdev_t *, metaslab_class_t *,
int64_t, int64_t, int64_t);
-metaslab_group_t *metaslab_group_create(metaslab_class_t *, vdev_t *, int);
+metaslab_group_t *metaslab_group_create(metaslab_class_t *, vdev_t *);
void metaslab_group_destroy(metaslab_group_t *);
void metaslab_group_activate(metaslab_group_t *);
void metaslab_group_passivate(metaslab_group_t *);
@@ -127,6 +133,8 @@ uint64_t metaslab_group_get_space(metaslab_group_t *);
void metaslab_group_histogram_verify(metaslab_group_t *);
uint64_t metaslab_group_fragmentation(metaslab_group_t *);
void metaslab_group_histogram_remove(metaslab_group_t *, metaslab_t *);
+void metaslab_group_alloc_increment_all(spa_t *, blkptr_t *, int, int,
+ uint64_t, const void *);
void metaslab_group_alloc_decrement(spa_t *, uint64_t, int, int, uint64_t,
const void *);
void metaslab_recalculate_weight_and_sort(metaslab_t *);
diff --git a/sys/contrib/openzfs/include/sys/metaslab_impl.h b/sys/contrib/openzfs/include/sys/metaslab_impl.h
index 4408dcfddd4a..6ce995d0a086 100644
--- a/sys/contrib/openzfs/include/sys/metaslab_impl.h
+++ b/sys/contrib/openzfs/include/sys/metaslab_impl.h
@@ -181,7 +181,8 @@ typedef struct metaslab_class_allocator {
struct metaslab_class {
kmutex_t mc_lock;
spa_t *mc_spa;
- const metaslab_ops_t *mc_ops;
+ const char *mc_name;
+ const metaslab_ops_t *mc_ops;
/*
* Track the number of metaslab groups that have been initialized
@@ -269,7 +270,6 @@ struct metaslab_group {
kmutex_t mg_ms_disabled_lock;
kcondvar_t mg_ms_disabled_cv;
- int mg_allocators;
metaslab_group_allocator_t mg_allocator[];
};
@@ -539,6 +539,8 @@ typedef struct metaslab_unflushed_phys {
uint64_t msp_unflushed_txg;
} metaslab_unflushed_phys_t;
+char *metaslab_rt_name(metaslab_group_t *, metaslab_t *, const char *);
+
#ifdef __cplusplus
}
#endif
diff --git a/sys/contrib/openzfs/include/sys/mod.h b/sys/contrib/openzfs/include/sys/mod.h
deleted file mode 100644
index 4122889ab758..000000000000
--- a/sys/contrib/openzfs/include/sys/mod.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2007 The Regents of the University of California.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- * UCRL-CODE-235197
- *
- * This file is part of the SPL, Solaris Porting Layer.
- *
- * The SPL is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * The SPL is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with the SPL. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef _SYS_MOD_H
-#define _SYS_MOD_H
-
-#ifdef _KERNEL
-#include <sys/mod_os.h>
-#else
-/*
- * Exported symbols
- */
-#define EXPORT_SYMBOL(x)
-#endif
-
-#endif /* SYS_MOD_H */
diff --git a/sys/contrib/openzfs/include/sys/nvpair.h b/sys/contrib/openzfs/include/sys/nvpair.h
index 66362a9dbd41..6bec9702eb16 100644
--- a/sys/contrib/openzfs/include/sys/nvpair.h
+++ b/sys/contrib/openzfs/include/sys/nvpair.h
@@ -267,6 +267,8 @@ _SYS_NVPAIR_H int nvlist_lookup_double(const nvlist_t *, const char *,
double *);
#endif
+_SYS_NVPAIR_H int nvlist_snprintf(char *, size_t, nvlist_t *, int);
+
_SYS_NVPAIR_H int nvlist_lookup_nvpair(nvlist_t *, const char *, nvpair_t **);
_SYS_NVPAIR_H int nvlist_lookup_nvpair_embedded_index(nvlist_t *, const char *,
nvpair_t **, int *, const char **);
diff --git a/sys/contrib/openzfs/include/sys/range_tree.h b/sys/contrib/openzfs/include/sys/range_tree.h
index 23e80f64284b..0f6def36f9f6 100644
--- a/sys/contrib/openzfs/include/sys/range_tree.h
+++ b/sys/contrib/openzfs/include/sys/range_tree.h
@@ -49,6 +49,9 @@ typedef enum zfs_range_seg_type {
ZFS_RANGE_SEG_NUM_TYPES,
} zfs_range_seg_type_t;
+#define ZFS_RT_NAME(rt) (((rt)->rt_name != NULL) ? (rt)->rt_name : "")
+#define ZFS_RT_F_DYN_NAME (1ULL << 0) /* if rt_name must be freed */
+
/*
* Note: the range_tree may not be accessed concurrently; consumers
* must provide external locking if required.
@@ -68,6 +71,9 @@ typedef struct zfs_range_tree {
void *rt_arg;
uint64_t rt_gap; /* allowable inter-segment gap */
+ uint64_t rt_flags;
+ const char *rt_name; /* details for debugging */
+
/*
* The rt_histogram maintains a histogram of ranges. Each bucket,
* rt_histogram[i], contains the number of ranges whose size is:
@@ -232,8 +238,7 @@ zfs_rs_set_end_raw(zfs_range_seg_t *rs, zfs_range_tree_t *rt, uint64_t end)
}
static inline void
-zfs_zfs_rs_set_fill_raw(zfs_range_seg_t *rs, zfs_range_tree_t *rt,
- uint64_t fill)
+zfs_rs_set_fill_raw(zfs_range_seg_t *rs, zfs_range_tree_t *rt, uint64_t fill)
{
ASSERT3U(rt->rt_type, <=, ZFS_RANGE_SEG_NUM_TYPES);
switch (rt->rt_type) {
@@ -271,7 +276,7 @@ static inline void
zfs_rs_set_fill(zfs_range_seg_t *rs, zfs_range_tree_t *rt, uint64_t fill)
{
ASSERT(IS_P2ALIGNED(fill, 1ULL << rt->rt_shift));
- zfs_zfs_rs_set_fill_raw(rs, rt, fill >> rt->rt_shift);
+ zfs_rs_set_fill_raw(rs, rt, fill >> rt->rt_shift);
}
typedef void zfs_range_tree_func_t(void *arg, uint64_t start, uint64_t size);
@@ -281,6 +286,9 @@ zfs_range_tree_t *zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops,
uint64_t gap);
zfs_range_tree_t *zfs_range_tree_create(const zfs_range_tree_ops_t *ops,
zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift);
+zfs_range_tree_t *zfs_range_tree_create_flags(const zfs_range_tree_ops_t *ops,
+ zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift,
+ uint64_t flags, const char *name);
void zfs_range_tree_destroy(zfs_range_tree_t *rt);
boolean_t zfs_range_tree_contains(zfs_range_tree_t *rt, uint64_t start,
uint64_t size);
diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h
index 439cd461b710..f172f2af6f07 100644
--- a/sys/contrib/openzfs/include/sys/spa.h
+++ b/sys/contrib/openzfs/include/sys/spa.h
@@ -140,7 +140,7 @@ typedef struct zio_cksum_salt {
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 7 | padding |
+ * 7 |R| padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 8 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
@@ -175,6 +175,7 @@ typedef struct zio_cksum_salt {
* E blkptr_t contains embedded data (see below)
* lvl level of indirection
* type DMU object type
+ * R rewrite (reallocated/rewritten at phys birth TXG)
* phys birth txg when dva[0] was written; zero if same as logical birth txg
* note that typically all the dva's would be written in this
* txg, but they could be different if they were moved by
@@ -190,11 +191,11 @@ typedef struct zio_cksum_salt {
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 0 | vdev1 | pad | ASIZE |
+ * 0 | pad | vdev1 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 1 |G| offset1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 2 | vdev2 | pad | ASIZE |
+ * 2 | pad | vdev2 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 3 |G| offset2 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
@@ -204,7 +205,7 @@ typedef struct zio_cksum_salt {
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 7 | padding |
+ * 7 |R| padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 8 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
@@ -373,7 +374,8 @@ typedef enum bp_embedded_type {
typedef struct blkptr {
dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */
uint64_t blk_prop; /* size, compression, type, etc */
- uint64_t blk_pad[2]; /* Extra space for the future */
+ uint64_t blk_prop2; /* additional properties */
+ uint64_t blk_pad; /* Extra space for the future */
uint64_t blk_birth_word[2];
uint64_t blk_fill; /* fill count */
zio_cksum_t blk_cksum; /* 256-bit checksum */
@@ -476,32 +478,51 @@ typedef struct blkptr {
#define BP_GET_FREE(bp) BF64_GET((bp)->blk_fill, 0, 1)
#define BP_SET_FREE(bp, x) BF64_SET((bp)->blk_fill, 0, 1, x)
+/*
+ * Block birth time macros for different use cases:
+ * - BP_GET_LOGICAL_BIRTH(): When the block was logically modified by user.
+ * To be used with a focus on user data, like incremental replication.
+ * - BP_GET_PHYSICAL_BIRTH(): When the block was physically written to disks.
+ * For regular writes is equal to logical birth. For dedup and block cloning
+ * can be smaller than logical birth. For remapped and rewritten blocks can
+ * be bigger. To be used with focus on physical disk content: ARC, DDT, scrub.
+ * - BP_GET_RAW_PHYSICAL_BIRTH(): Raw physical birth value. Zero if equal
+ * to logical birth. Should only be used for BP copying and debugging.
+ * - BP_GET_BIRTH(): When the block was allocated, which is a physical birth
+ * for rewritten blocks (rewrite flag set) or logical birth otherwise.
+ */
#define BP_GET_LOGICAL_BIRTH(bp) (bp)->blk_birth_word[1]
#define BP_SET_LOGICAL_BIRTH(bp, x) ((bp)->blk_birth_word[1] = (x))
-#define BP_GET_PHYSICAL_BIRTH(bp) (bp)->blk_birth_word[0]
+#define BP_GET_RAW_PHYSICAL_BIRTH(bp) (bp)->blk_birth_word[0]
#define BP_SET_PHYSICAL_BIRTH(bp, x) ((bp)->blk_birth_word[0] = (x))
-#define BP_GET_BIRTH(bp) \
- (BP_IS_EMBEDDED(bp) ? 0 : \
- BP_GET_PHYSICAL_BIRTH(bp) ? BP_GET_PHYSICAL_BIRTH(bp) : \
+#define BP_GET_PHYSICAL_BIRTH(bp) \
+ (BP_IS_EMBEDDED(bp) ? 0 : \
+ BP_GET_RAW_PHYSICAL_BIRTH(bp) ? BP_GET_RAW_PHYSICAL_BIRTH(bp) : \
BP_GET_LOGICAL_BIRTH(bp))
-#define BP_SET_BIRTH(bp, logical, physical) \
-{ \
- ASSERT(!BP_IS_EMBEDDED(bp)); \
- BP_SET_LOGICAL_BIRTH(bp, logical); \
- BP_SET_PHYSICAL_BIRTH(bp, \
- ((logical) == (physical) ? 0 : (physical))); \
+#define BP_GET_BIRTH(bp) \
+ ((BP_IS_EMBEDDED(bp) || !BP_GET_REWRITE(bp)) ? \
+ BP_GET_LOGICAL_BIRTH(bp) : BP_GET_PHYSICAL_BIRTH(bp))
+
+#define BP_SET_BIRTH(bp, logical, physical) \
+{ \
+ ASSERT(!BP_IS_EMBEDDED(bp)); \
+ BP_SET_LOGICAL_BIRTH(bp, logical); \
+ BP_SET_PHYSICAL_BIRTH(bp, \
+ ((logical) == (physical) ? 0 : (physical))); \
}
#define BP_GET_FILL(bp) \
- ((BP_IS_ENCRYPTED(bp)) ? BF64_GET((bp)->blk_fill, 0, 32) : \
- ((BP_IS_EMBEDDED(bp)) ? 1 : (bp)->blk_fill))
+ (BP_IS_EMBEDDED(bp) ? 1 : \
+ BP_IS_ENCRYPTED(bp) ? BF64_GET((bp)->blk_fill, 0, 32) : \
+ (bp)->blk_fill)
#define BP_SET_FILL(bp, fill) \
{ \
- if (BP_IS_ENCRYPTED(bp)) \
+ ASSERT(!BP_IS_EMBEDDED(bp)); \
+ if (BP_IS_ENCRYPTED(bp)) \
BF64_SET((bp)->blk_fill, 0, 32, fill); \
else \
(bp)->blk_fill = fill; \
@@ -516,6 +537,15 @@ typedef struct blkptr {
BF64_SET((bp)->blk_fill, 32, 32, iv2); \
}
+#define BP_GET_REWRITE(bp) \
+ (BP_IS_EMBEDDED(bp) ? 0 : BF64_GET((bp)->blk_prop2, 63, 1))
+
+#define BP_SET_REWRITE(bp, x) \
+{ \
+ ASSERT(!BP_IS_EMBEDDED(bp)); \
+ BF64_SET((bp)->blk_prop2, 63, 1, x); \
+}
+
#define BP_IS_METADATA(bp) \
(BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
@@ -545,7 +575,7 @@ typedef struct blkptr {
(dva1)->dva_word[0] == (dva2)->dva_word[0])
#define BP_EQUAL(bp1, bp2) \
- (BP_GET_BIRTH(bp1) == BP_GET_BIRTH(bp2) && \
+ (BP_GET_PHYSICAL_BIRTH(bp1) == BP_GET_PHYSICAL_BIRTH(bp2) && \
BP_GET_LOGICAL_BIRTH(bp1) == BP_GET_LOGICAL_BIRTH(bp2) && \
DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) && \
DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \
@@ -588,8 +618,8 @@ typedef struct blkptr {
{ \
BP_ZERO_DVAS(bp); \
(bp)->blk_prop = 0; \
- (bp)->blk_pad[0] = 0; \
- (bp)->blk_pad[1] = 0; \
+ (bp)->blk_prop2 = 0; \
+ (bp)->blk_pad = 0; \
(bp)->blk_birth_word[0] = 0; \
(bp)->blk_birth_word[1] = 0; \
(bp)->blk_fill = 0; \
@@ -696,7 +726,7 @@ typedef struct blkptr {
(u_longlong_t)BP_GET_LSIZE(bp), \
(u_longlong_t)BP_GET_PSIZE(bp), \
(u_longlong_t)BP_GET_LOGICAL_BIRTH(bp), \
- (u_longlong_t)BP_GET_BIRTH(bp), \
+ (u_longlong_t)BP_GET_PHYSICAL_BIRTH(bp), \
(u_longlong_t)BP_GET_FILL(bp), \
ws, \
(u_longlong_t)bp->blk_cksum.zc_word[0], \
@@ -784,6 +814,7 @@ extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
#define SPA_ASYNC_L2CACHE_TRIM 0x1000
#define SPA_ASYNC_REBUILD_DONE 0x2000
#define SPA_ASYNC_DETACH_SPARE 0x4000
+#define SPA_ASYNC_REMOVE_BY_USER 0x8000
/* device manipulation */
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t ashift_check);
@@ -849,7 +880,6 @@ extern kcondvar_t spa_namespace_cv;
#define SPA_CONFIG_UPDATE_VDEVS 1
extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t);
-extern void spa_config_load(void);
extern int spa_all_configs(uint64_t *generation, nvlist_t **pools);
extern void spa_config_set(spa_t *spa, nvlist_t *config);
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
@@ -980,9 +1010,9 @@ extern void spa_iostats_trim_add(spa_t *spa, trim_type_t type,
uint64_t extents_skipped, uint64_t bytes_skipped,
uint64_t extents_failed, uint64_t bytes_failed);
extern void spa_iostats_read_add(spa_t *spa, uint64_t size, uint64_t iops,
- uint32_t flags);
+ dmu_flags_t flags);
extern void spa_iostats_write_add(spa_t *spa, uint64_t size, uint64_t iops,
- uint32_t flags);
+ dmu_flags_t flags);
extern void spa_import_progress_add(spa_t *spa);
extern void spa_import_progress_remove(uint64_t spa_guid);
extern int spa_import_progress_set_mmp_check(uint64_t pool_guid,
@@ -1000,7 +1030,7 @@ extern void spa_import_progress_set_notes_nolog(spa_t *spa,
extern int spa_config_tryenter(spa_t *spa, int locks, const void *tag,
krw_t rw);
extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw);
-extern void spa_config_enter_mmp(spa_t *spa, int locks, const void *tag,
+extern void spa_config_enter_priority(spa_t *spa, int locks, const void *tag,
krw_t rw);
extern void spa_config_exit(spa_t *spa, int locks, const void *tag);
extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
@@ -1054,6 +1084,7 @@ extern pool_state_t spa_state(spa_t *spa);
extern spa_load_state_t spa_load_state(spa_t *spa);
extern uint64_t spa_freeze_txg(spa_t *spa);
extern uint64_t spa_get_worst_case_asize(spa_t *spa, uint64_t lsize);
+extern void spa_get_min_alloc_range(spa_t *spa, uint64_t *min, uint64_t *max);
extern uint64_t spa_get_dspace(spa_t *spa);
extern uint64_t spa_get_checkpoint_space(spa_t *spa);
extern uint64_t spa_get_slop_space(spa_t *spa);
@@ -1064,6 +1095,7 @@ extern metaslab_class_t *spa_normal_class(spa_t *spa);
extern metaslab_class_t *spa_log_class(spa_t *spa);
extern metaslab_class_t *spa_embedded_log_class(spa_t *spa);
extern metaslab_class_t *spa_special_class(spa_t *spa);
+extern metaslab_class_t *spa_special_embedded_log_class(spa_t *spa);
extern metaslab_class_t *spa_dedup_class(spa_t *spa);
extern metaslab_class_t *spa_preferred_class(spa_t *spa, const zio_t *zio);
extern boolean_t spa_special_has_ddt(spa_t *spa);
@@ -1115,7 +1147,9 @@ extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
extern uint64_t dva_get_dsize_sync(spa_t *spa, const dva_t *dva);
extern uint64_t bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp);
extern uint64_t bp_get_dsize(spa_t *spa, const blkptr_t *bp);
+extern boolean_t spa_has_dedup(spa_t *spa);
extern boolean_t spa_has_slogs(spa_t *spa);
+extern boolean_t spa_has_special(spa_t *spa);
extern boolean_t spa_is_root(spa_t *spa);
extern boolean_t spa_writeable(spa_t *spa);
extern boolean_t spa_has_pending_synctask(spa_t *spa);
@@ -1179,7 +1213,7 @@ extern void zfs_ereport_taskq_fini(void);
extern void zfs_ereport_clear(spa_t *spa, vdev_t *vd);
extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
const char *name, nvlist_t *aux);
-extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
+extern void zfs_post_remove(spa_t *spa, vdev_t *vd, boolean_t by_kernel);
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
extern uint64_t spa_approx_errlog_size(spa_t *spa);
@@ -1210,7 +1244,6 @@ extern void vdev_mirror_stat_fini(void);
/* Initialization and termination */
extern void spa_init(spa_mode_t mode);
extern void spa_fini(void);
-extern void spa_boot_init(void *);
/* properties */
extern int spa_prop_set(spa_t *spa, nvlist_t *nvp);
diff --git a/sys/contrib/openzfs/include/sys/spa_impl.h b/sys/contrib/openzfs/include/sys/spa_impl.h
index 8c52f751a819..62b062984d36 100644
--- a/sys/contrib/openzfs/include/sys/spa_impl.h
+++ b/sys/contrib/openzfs/include/sys/spa_impl.h
@@ -55,6 +55,8 @@
#include <sys/dsl_deadlist.h>
#include <zfeature_common.h>
+#include "zfs_crrd.h"
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -246,6 +248,7 @@ struct spa {
metaslab_class_t *spa_log_class; /* intent log data class */
metaslab_class_t *spa_embedded_log_class; /* log on normal vdevs */
metaslab_class_t *spa_special_class; /* special allocation class */
+ metaslab_class_t *spa_special_embedded_log_class; /* log on special */
metaslab_class_t *spa_dedup_class; /* dedup allocation class */
uint64_t spa_first_txg; /* first txg after spa_open() */
uint64_t spa_final_txg; /* txg of export/destroy */
@@ -262,6 +265,7 @@ struct spa {
uint64_t spa_min_ashift; /* of vdevs in normal class */
uint64_t spa_max_ashift; /* of vdevs in normal class */
uint64_t spa_min_alloc; /* of vdevs in normal class */
+ uint64_t spa_max_alloc; /* of vdevs in normal class */
uint64_t spa_gcd_alloc; /* of vdevs in normal class */
uint64_t spa_config_guid; /* config pool guid */
uint64_t spa_load_guid; /* spa_load initialized guid */
@@ -343,6 +347,12 @@ struct spa {
spa_checkpoint_info_t spa_checkpoint_info; /* checkpoint accounting */
zthr_t *spa_checkpoint_discard_zthr;
+ kmutex_t spa_txg_log_time_lock; /* for spa_txg_log_time */
+ dbrrd_t spa_txg_log_time;
+ uint64_t spa_last_noted_txg;
+ uint64_t spa_last_noted_txg_time;
+ uint64_t spa_last_flush_txg_time;
+
space_map_t *spa_syncing_log_sm; /* current log space map */
avl_tree_t spa_sm_logs_by_txg;
kmutex_t spa_flushed_ms_lock; /* for metaslabs_by_flushed */
diff --git a/sys/contrib/openzfs/include/sys/txg.h b/sys/contrib/openzfs/include/sys/txg.h
index 70ba89c8ac09..eabb6f7aab4e 100644
--- a/sys/contrib/openzfs/include/sys/txg.h
+++ b/sys/contrib/openzfs/include/sys/txg.h
@@ -25,6 +25,7 @@
*/
/*
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2025, Klara, Inc.
*/
#ifndef _SYS_TXG_H
@@ -66,6 +67,23 @@ typedef struct txg_list {
txg_node_t *tl_head[TXG_SIZE];
} txg_list_t;
+/*
+ * Wait flags for txg_wait_synced_flags(). By default (TXG_WAIT_NONE), it will
+ * wait until the wanted txg is reached, or block forever. Additional flags
+ * indicate other conditions that the caller is interested in, that will cause
+ * the wait to break and return an error code describing the condition.
+ */
+typedef enum {
+ /* No special flags. Guaranteed to block forever or return 0 */
+ TXG_WAIT_NONE = 0,
+
+ /* If a signal arrives while waiting, abort and return EINTR */
+ TXG_WAIT_SIGNAL = (1 << 0),
+
+ /* If the pool suspends while waiting, abort and return ESHUTDOWN. */
+ TXG_WAIT_SUSPEND = (1 << 1),
+} txg_wait_flag_t;
+
struct dsl_pool;
extern void txg_init(struct dsl_pool *dp, uint64_t txg);
@@ -86,13 +104,21 @@ extern void txg_kick(struct dsl_pool *dp, uint64_t txg);
* Try to make this happen as soon as possible (eg. kick off any
* necessary syncs immediately). If txg==0, wait for the currently open
* txg to finish syncing.
+ * See txg_wait_flag_t above for a description of how the flags affect the wait.
+ */
+extern int txg_wait_synced_flags(struct dsl_pool *dp, uint64_t txg,
+ txg_wait_flag_t flags);
+
+/*
+ * Traditional form of txg_wait_synced_flags, waits forever.
+ * Shorthand for VERIFY0(txg_wait_synced_flags(dp, TXG_WAIT_NONE))
*/
extern void txg_wait_synced(struct dsl_pool *dp, uint64_t txg);
/*
- * Wait as above. Returns true if the thread was signaled while waiting.
+ * Wake all threads waiting in txg_wait_synced_flags() so they can reevaluate.
*/
-extern boolean_t txg_wait_synced_sig(struct dsl_pool *dp, uint64_t txg);
+extern void txg_wait_kick(struct dsl_pool *dp);
/*
* Wait until the given transaction group, or one after it, is
diff --git a/sys/contrib/openzfs/include/sys/vdev.h b/sys/contrib/openzfs/include/sys/vdev.h
index a6a41882d3cf..510474d6c085 100644
--- a/sys/contrib/openzfs/include/sys/vdev.h
+++ b/sys/contrib/openzfs/include/sys/vdev.h
@@ -100,6 +100,7 @@ extern boolean_t vdev_replace_in_progress(vdev_t *vdev);
extern void vdev_hold(vdev_t *);
extern void vdev_rele(vdev_t *);
+void vdev_update_nonallocating_space(vdev_t *vd, boolean_t add);
extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
extern void vdev_metaslab_fini(vdev_t *vd);
extern void vdev_metaslab_set_size(vdev_t *);
@@ -133,9 +134,12 @@ extern void vdev_space_update(vdev_t *vd,
extern int64_t vdev_deflated_space(vdev_t *vd, int64_t space);
+extern uint64_t vdev_asize_to_psize_txg(vdev_t *vd, uint64_t asize,
+ uint64_t txg);
extern uint64_t vdev_psize_to_asize_txg(vdev_t *vd, uint64_t psize,
uint64_t txg);
extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
+extern uint64_t vdev_get_min_alloc(vdev_t *vd);
/*
* Return the amount of space allocated for a gang block header. Note that
@@ -145,7 +149,20 @@ extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
static inline uint64_t
vdev_gang_header_asize(vdev_t *vd)
{
- return (vdev_psize_to_asize_txg(vd, SPA_GANGBLOCKSIZE, 0));
+ return (vdev_psize_to_asize_txg(vd, SPA_OLD_GANGBLOCKSIZE, 0));
+}
+
+/*
+ * Return the amount of data that can be stored in a gang header. Because we
+ * need to ensure gang headers can always be allocated (as long as there is
+ * space available), this is the minimum allocatable size on the vdev. Note that
+ * since the physical birth txg is not provided, this must be constant for
+ * a given vdev. (e.g. raidz expansion can't change this)
+ */
+static inline uint64_t
+vdev_gang_header_psize(vdev_t *vd)
+{
+ return (vdev_get_min_alloc(vd));
}
extern int vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux);
diff --git a/sys/contrib/openzfs/include/sys/vdev_draid.h b/sys/contrib/openzfs/include/sys/vdev_draid.h
index d44ab6681db9..e923092a39ad 100644
--- a/sys/contrib/openzfs/include/sys/vdev_draid.h
+++ b/sys/contrib/openzfs/include/sys/vdev_draid.h
@@ -95,7 +95,7 @@ extern int vdev_draid_generate_perms(const draid_map_t *, uint8_t **);
*/
extern boolean_t vdev_draid_readable(vdev_t *, uint64_t);
extern boolean_t vdev_draid_missing(vdev_t *, uint64_t, uint64_t, uint64_t);
-extern uint64_t vdev_draid_asize_to_psize(vdev_t *, uint64_t);
+extern uint64_t vdev_draid_asize_to_psize(vdev_t *, uint64_t, uint64_t);
extern void vdev_draid_map_alloc_empty(zio_t *, struct raidz_row *);
extern int vdev_draid_map_verify_empty(zio_t *, struct raidz_row *);
extern nvlist_t *vdev_draid_read_config_spare(vdev_t *);
diff --git a/sys/contrib/openzfs/include/sys/vdev_impl.h b/sys/contrib/openzfs/include/sys/vdev_impl.h
index a2a3e25d14cc..5a8c2f846be2 100644
--- a/sys/contrib/openzfs/include/sys/vdev_impl.h
+++ b/sys/contrib/openzfs/include/sys/vdev_impl.h
@@ -103,7 +103,8 @@ typedef const struct vdev_ops {
vdev_fini_func_t *vdev_op_fini;
vdev_open_func_t *vdev_op_open;
vdev_close_func_t *vdev_op_close;
- vdev_asize_func_t *vdev_op_asize;
+ vdev_asize_func_t *vdev_op_psize_to_asize;
+ vdev_asize_func_t *vdev_op_asize_to_psize;
vdev_min_asize_func_t *vdev_op_min_asize;
vdev_min_alloc_func_t *vdev_op_min_alloc;
vdev_io_start_func_t *vdev_op_io_start;
@@ -278,10 +279,12 @@ struct vdev {
uint64_t vdev_noalloc; /* device is passivated? */
uint64_t vdev_removing; /* device is being removed? */
uint64_t vdev_failfast; /* device failfast setting */
+ boolean_t vdev_autosit; /* automatic sitout management */
boolean_t vdev_rz_expanding; /* raidz is being expanded? */
boolean_t vdev_ishole; /* is a hole in the namespace */
uint64_t vdev_top_zap;
vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias */
+ uint64_t vdev_last_latency_check;
/* pool checkpoint related */
space_map_t *vdev_checkpoint_sm; /* contains reserved blocks */
@@ -430,6 +433,10 @@ struct vdev {
hrtime_t vdev_mmp_pending; /* 0 if write finished */
uint64_t vdev_mmp_kstat_id; /* to find kstat entry */
uint64_t vdev_expansion_time; /* vdev's last expansion time */
+ /* used to calculate average read latency */
+ uint64_t *vdev_prev_histo;
+ int64_t vdev_outlier_count; /* read outlier amongst peers */
+ hrtime_t vdev_read_sit_out_expire; /* end of sit out period */
list_node_t vdev_leaf_node; /* leaf vdev list */
/*
@@ -615,11 +622,11 @@ extern vdev_ops_t vdev_indirect_ops;
*/
extern void vdev_default_xlate(vdev_t *vd, const zfs_range_seg64_t *logical_rs,
zfs_range_seg64_t *physical_rs, zfs_range_seg64_t *remain_rs);
+extern uint64_t vdev_default_psize(vdev_t *vd, uint64_t asize, uint64_t txg);
extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize, uint64_t txg);
extern uint64_t vdev_default_min_asize(vdev_t *vd);
extern uint64_t vdev_get_min_asize(vdev_t *vd);
extern void vdev_set_min_asize(vdev_t *vd);
-extern uint64_t vdev_get_min_alloc(vdev_t *vd);
extern uint64_t vdev_get_nparity(vdev_t *vd);
extern uint64_t vdev_get_ndisks(vdev_t *vd);
@@ -643,10 +650,11 @@ extern int vdev_obsolete_counts_are_precise(vdev_t *vd, boolean_t *are_precise);
int vdev_checkpoint_sm_object(vdev_t *vd, uint64_t *sm_obj);
void vdev_metaslab_group_create(vdev_t *vd);
uint64_t vdev_best_ashift(uint64_t logical, uint64_t a, uint64_t b);
-#if defined(__linux__)
+#if defined(__linux__) && defined(_KERNEL)
int param_get_raidz_impl(char *buf, zfs_kernel_param_t *kp);
#endif
int param_set_raidz_impl(ZFS_MODULE_PARAM_ARGS);
+char *vdev_rt_name(vdev_t *vd, const char *name);
/*
* Vdev ashift optimization tunables
diff --git a/sys/contrib/openzfs/include/sys/vdev_raidz.h b/sys/contrib/openzfs/include/sys/vdev_raidz.h
index 3b02728cdbf3..df8c2aed4045 100644
--- a/sys/contrib/openzfs/include/sys/vdev_raidz.h
+++ b/sys/contrib/openzfs/include/sys/vdev_raidz.h
@@ -61,6 +61,9 @@ void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *);
struct raidz_row *vdev_raidz_row_alloc(int, zio_t *);
void vdev_raidz_reflow_copy_scratch(spa_t *);
void raidz_dtl_reassessed(vdev_t *);
+boolean_t vdev_sit_out_reads(vdev_t *, zio_flag_t);
+void vdev_raidz_sit_child(vdev_t *, uint64_t);
+void vdev_raidz_unsit_child(vdev_t *);
extern const zio_vsd_ops_t vdev_raidz_vsd_ops;
diff --git a/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h b/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
index debce6f09a22..8c8dcfb077f6 100644
--- a/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
+++ b/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
@@ -119,6 +119,7 @@ typedef struct raidz_col {
uint8_t rc_need_orig_restore:1; /* need to restore from orig_data? */
uint8_t rc_force_repair:1; /* Write good data to this column */
uint8_t rc_allow_repair:1; /* Allow repair I/O to this column */
+ uint8_t rc_latency_outlier:1; /* Latency outlier for this device */
int rc_shadow_devidx; /* for double write during expansion */
int rc_shadow_error; /* for double write during expansion */
uint64_t rc_shadow_offset; /* for double write during expansion */
@@ -133,6 +134,7 @@ typedef struct raidz_row {
int rr_firstdatacol; /* First data column/parity count */
abd_t *rr_abd_empty; /* dRAID empty sector buffer */
int rr_nempty; /* empty sectors included in parity */
+ int rr_outlier_cnt; /* Count of latency outlier devices */
#ifdef ZFS_DEBUG
uint64_t rr_offset; /* Logical offset for *_io_verify() */
uint64_t rr_size; /* Physical size for *_io_verify() */
diff --git a/sys/contrib/openzfs/include/sys/xvattr.h b/sys/contrib/openzfs/include/sys/xvattr.h
index 447842d269b3..5dadbdb4c619 100644
--- a/sys/contrib/openzfs/include/sys/xvattr.h
+++ b/sys/contrib/openzfs/include/sys/xvattr.h
@@ -311,6 +311,7 @@ xva_getxoptattr(xvattr_t *xvap)
*/
#define V_ACE_MASK 0x1 /* mask represents NFSv4 ACE permissions */
#define V_APPEND 0x2 /* want to do append only check */
+#define V_NAMEDATTR 0x4 /* is a named attribute check */
/*
* Structure used on VOP_GETSECATTR and VOP_SETSECATTR operations
diff --git a/sys/contrib/openzfs/include/sys/zcp.h b/sys/contrib/openzfs/include/sys/zcp.h
index 96279deaee75..5fcfb6219870 100644
--- a/sys/contrib/openzfs/include/sys/zcp.h
+++ b/sys/contrib/openzfs/include/sys/zcp.h
@@ -76,7 +76,6 @@ typedef struct zcp_run_info {
* rather than the 'current' thread's.
*/
cred_t *zri_cred;
- proc_t *zri_proc;
/*
* The tx in which this channel program is running.
diff --git a/sys/contrib/openzfs/include/sys/zfs_context.h b/sys/contrib/openzfs/include/sys/zfs_context.h
index 549f54c09383..7112d3ef5c99 100644
--- a/sys/contrib/openzfs/include/sys/zfs_context.h
+++ b/sys/contrib/openzfs/include/sys/zfs_context.h
@@ -205,18 +205,6 @@ extern void vpanic(const char *, va_list)
#define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i)
/*
- * Tunables.
- */
-typedef struct zfs_kernel_param {
- const char *name; /* unused stub */
-} zfs_kernel_param_t;
-
-#define ZFS_MODULE_PARAM(scope_prefix, name_prefix, name, type, perm, desc)
-#define ZFS_MODULE_PARAM_ARGS void
-#define ZFS_MODULE_PARAM_CALL(scope_prefix, name_prefix, name, setfunc, \
- getfunc, perm, desc)
-
-/*
* Threads.
*/
typedef pthread_t kthread_t;
@@ -236,6 +224,11 @@ typedef pthread_t kthread_t;
#define thread_join(t) pthread_join((pthread_t)(t), NULL)
#define newproc(f, a, cid, pri, ctp, pid) (ENOSYS)
+/*
+ * Check if the current thread is a memory reclaim thread.
+ * Always returns false in userspace (no memory reclaim thread).
+ */
+#define current_is_reclaim_thread() (0)
/* in libzpool, p0 exists only to have its address taken */
typedef struct proc {
@@ -623,8 +616,10 @@ extern void delay(clock_t ticks);
* Process priorities as defined by setpriority(2) and getpriority(2).
*/
#define minclsyspri 19
-#define maxclsyspri -20
#define defclsyspri 0
+/* Write issue taskq priority. */
+#define wtqclsyspri -19
+#define maxclsyspri -20
#define CPU_SEQID ((uintptr_t)pthread_self() & (max_ncpus - 1))
#define CPU_SEQID_UNSTABLE CPU_SEQID
@@ -632,6 +627,9 @@ extern void delay(clock_t ticks);
#define kcred NULL
#define CRED() NULL
+#define crhold(cr) ((void)cr)
+#define crfree(cr) ((void)cr)
+
#define ptob(x) ((x) * PAGESIZE)
#define NN_DIVISOR_1000 (1U << 0)
@@ -668,7 +666,7 @@ extern void random_fini(void);
struct spa;
extern void show_pool_stats(struct spa *);
-extern int set_global_var(char const *arg);
+extern int handle_tunable_option(const char *, boolean_t);
typedef struct callb_cpr {
kmutex_t *cc_lockp;
@@ -744,7 +742,6 @@ extern int zfs_secpolicy_rename_perms(const char *from, const char *to,
cred_t *cr);
extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
extern int secpolicy_zfs(const cred_t *cr);
-extern int secpolicy_zfs_proc(const cred_t *cr, proc_t *proc);
extern zoneid_t getzoneid(void);
/* SID stuff */
@@ -774,7 +771,6 @@ typedef int fstrans_cookie_t;
extern fstrans_cookie_t spl_fstrans_mark(void);
extern void spl_fstrans_unmark(fstrans_cookie_t);
-extern int __spl_pf_fstrans_check(void);
extern int kmem_cache_reap_active(void);
diff --git a/sys/contrib/openzfs/include/sys/zfs_debug.h b/sys/contrib/openzfs/include/sys/zfs_debug.h
index 871936da15f6..4d4cd4c39e97 100644
--- a/sys/contrib/openzfs/include/sys/zfs_debug.h
+++ b/sys/contrib/openzfs/include/sys/zfs_debug.h
@@ -39,6 +39,8 @@ extern "C" {
#define FALSE 0
#endif
+#include <sys/nvpair.h>
+
extern int zfs_flags;
extern int zfs_recover;
extern int zfs_free_leak_on_eio;
@@ -104,6 +106,24 @@ extern void zfs_panic_recover(const char *fmt, ...);
extern void zfs_dbgmsg_init(void);
extern void zfs_dbgmsg_fini(void);
+/*
+ * When printing an nvlist, print one beginning line with the file/func/line
+ * number and the text "nvlist <var name>:" followed by all the nvlist lines
+ * without the file/fun/line number. This makes the nvlist lines easy to read.
+ */
+#define zfs_dbgmsg_nvlist(nv) \
+ if (zfs_dbgmsg_enable) { \
+ zfs_dbgmsg("nvlist "#nv":"); \
+ __zfs_dbgmsg_nvlist(nv); \
+ }
+
+#define zfs_dbgmsg(...) \
+ if (zfs_dbgmsg_enable) \
+ __dprintf(B_FALSE, __FILE__, __func__, __LINE__, __VA_ARGS__)
+
+
+extern void __zfs_dbgmsg_nvlist(nvlist_t *nv);
+
#ifndef _KERNEL
extern int dprintf_find_string(const char *string);
extern void zfs_dbgmsg_print(int fd, const char *tag);
diff --git a/sys/contrib/openzfs/include/sys/zfs_file.h b/sys/contrib/openzfs/include/sys/zfs_file.h
index a1f344c2bb79..67abe9988aaa 100644
--- a/sys/contrib/openzfs/include/sys/zfs_file.h
+++ b/sys/contrib/openzfs/include/sys/zfs_file.h
@@ -46,7 +46,7 @@ void zfs_file_close(zfs_file_t *fp);
int zfs_file_write(zfs_file_t *fp, const void *buf, size_t len, ssize_t *resid);
int zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t len, loff_t off,
- ssize_t *resid);
+ uint8_t ashift, ssize_t *resid);
int zfs_file_read(zfs_file_t *fp, void *buf, size_t len, ssize_t *resid);
int zfs_file_pread(zfs_file_t *fp, void *buf, size_t len, loff_t off,
ssize_t *resid);
diff --git a/sys/contrib/openzfs/include/sys/zfs_ioctl.h b/sys/contrib/openzfs/include/sys/zfs_ioctl.h
index 1805028024e6..cfe11f43bb8e 100644
--- a/sys/contrib/openzfs/include/sys/zfs_ioctl.h
+++ b/sys/contrib/openzfs/include/sys/zfs_ioctl.h
@@ -455,6 +455,7 @@ typedef enum zinject_type {
ZINJECT_DECRYPT_FAULT,
ZINJECT_DELAY_IMPORT,
ZINJECT_DELAY_EXPORT,
+ ZINJECT_DELAY_READY,
} zinject_type_t;
typedef enum zinject_iotype {
@@ -534,10 +535,22 @@ typedef struct zfs_cmd {
zfs_share_t zc_share;
dmu_objset_stats_t zc_objset_stats;
struct drr_begin zc_begin_record;
- zinject_record_t zc_inject_record;
- uint32_t zc_defer_destroy;
- uint32_t zc_flags;
- uint64_t zc_action_handle;
+
+ /*
+ * zinject_record_t grew past its original size, which would push out
+ * the size of zfs_cmd_t. To adjust for this, we allow it to use the
+ * space after it, since those fields aren't used with ZFS_IOC_INJECT.
+ */
+ union {
+ zinject_record_t zc_inject_record;
+ struct {
+ char zc_pad1[sizeof (zinject_record_t) - 16];
+ uint32_t zc_defer_destroy;
+ uint32_t zc_flags;
+ uint64_t zc_action_handle;
+ };
+ };
+
int zc_cleanup_fd;
uint8_t zc_simple;
uint8_t zc_pad[3]; /* alignment */
@@ -548,6 +561,20 @@ typedef struct zfs_cmd {
uint64_t zc_zoneid;
} zfs_cmd_t;
+/*
+ * zfs_cmd_t (and by extension, it's member structs) must always be the same
+ * size. Changing it will break compatibility between the kernel module and the
+ * userspace tools.
+ *
+ * This test is convoluted because MAXPATHLEN and MAXNAMELEN can vary across
+ * platforms. We include them directly here, which means it won't trip if those
+ * ever change, but if that happens we likely have other things to worry about.
+ */
+#define _expected_zfs_cmd_size ((MAXPATHLEN*3)+MAXNAMELEN+1200)
+_Static_assert(sizeof (zfs_cmd_t) == _expected_zfs_cmd_size,
+ "zfs_cmd_t has wrong size");
+#undef _expected_zfs_cmd_size
+
typedef struct zfs_useracct {
char zu_domain[256];
uid_t zu_rid;
diff --git a/sys/contrib/openzfs/include/sys/zfs_quota.h b/sys/contrib/openzfs/include/sys/zfs_quota.h
index f12a0f2db394..62389cd2f3b2 100644
--- a/sys/contrib/openzfs/include/sys/zfs_quota.h
+++ b/sys/contrib/openzfs/include/sys/zfs_quota.h
@@ -35,7 +35,7 @@ extern int zpl_get_file_info(dmu_object_type_t,
extern int zfs_userspace_one(struct zfsvfs *, zfs_userquota_prop_t,
const char *, uint64_t, uint64_t *);
extern int zfs_userspace_many(struct zfsvfs *, zfs_userquota_prop_t,
- uint64_t *, void *, uint64_t *);
+ uint64_t *, void *, uint64_t *, uint64_t *);
extern int zfs_set_userquota(struct zfsvfs *, zfs_userquota_prop_t,
const char *, uint64_t, uint64_t);
diff --git a/sys/contrib/openzfs/include/sys/zfs_racct.h b/sys/contrib/openzfs/include/sys/zfs_racct.h
index 939e8fa666e9..562029d4114d 100644
--- a/sys/contrib/openzfs/include/sys/zfs_racct.h
+++ b/sys/contrib/openzfs/include/sys/zfs_racct.h
@@ -33,7 +33,9 @@
/*
* Platform-dependent resource accounting hooks
*/
-void zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags);
-void zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags);
+void zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops,
+ dmu_flags_t flags);
+void zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops,
+ dmu_flags_t flags);
#endif /* _SYS_ZFS_RACCT_H */
diff --git a/sys/contrib/openzfs/include/sys/zfs_vfsops.h b/sys/contrib/openzfs/include/sys/zfs_vfsops.h
index 18cc31e7183f..8b8f73cf3540 100644
--- a/sys/contrib/openzfs/include/sys/zfs_vfsops.h
+++ b/sys/contrib/openzfs/include/sys/zfs_vfsops.h
@@ -27,7 +27,7 @@
#ifndef _SYS_ZFS_VFSOPS_H
#define _SYS_ZFS_VFSOPS_H
-#ifdef _KERNEL
+#if defined(_KERNEL) || defined(_WANT_ZNODE)
#include <sys/zfs_vfsops_os.h>
#endif
diff --git a/sys/contrib/openzfs/include/sys/zfs_vnops.h b/sys/contrib/openzfs/include/sys/zfs_vnops.h
index 21f0da4fe6b4..08cf0e2a6e48 100644
--- a/sys/contrib/openzfs/include/sys/zfs_vnops.h
+++ b/sys/contrib/openzfs/include/sys/zfs_vnops.h
@@ -40,6 +40,7 @@ extern int zfs_clone_range(znode_t *, uint64_t *, znode_t *, uint64_t *,
uint64_t *, cred_t *);
extern int zfs_clone_range_replay(znode_t *, uint64_t, uint64_t, uint64_t,
const blkptr_t *, size_t);
+extern int zfs_rewrite(znode_t *, uint64_t, uint64_t, uint64_t, uint64_t);
extern int zfs_getsecattr(znode_t *, vsecattr_t *, int, cred_t *);
extern int zfs_setsecattr(znode_t *, vsecattr_t *, int, cred_t *);
diff --git a/sys/contrib/openzfs/include/sys/zfs_znode.h b/sys/contrib/openzfs/include/sys/zfs_znode.h
index b3a267e16f3e..79b845a672a8 100644
--- a/sys/contrib/openzfs/include/sys/zfs_znode.h
+++ b/sys/contrib/openzfs/include/sys/zfs_znode.h
@@ -73,7 +73,7 @@ extern "C" {
pflags |= attr; \
else \
pflags &= ~attr; \
- VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(ZTOZSB(zp)), \
+ VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(ZTOZSB(zp)), \
&pflags, sizeof (pflags), tx)); \
}
@@ -163,8 +163,9 @@ extern int zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl,
sa_attr_type_t *sa_table, uint64_t *pobjp, int *is_xattrdir);
extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
-#ifdef _KERNEL
+#if defined(_KERNEL) || defined(_WANT_ZNODE)
#include <sys/zfs_znode_impl.h>
+#include <sys/zfs_rlock.h>
/*
* Directory entry locks control access to directory entries.
@@ -201,8 +202,6 @@ typedef struct znode {
uint64_t z_size; /* file size (cached) */
uint64_t z_pflags; /* pflags (cached) */
uint32_t z_sync_cnt; /* synchronous open count */
- uint32_t z_sync_writes_cnt; /* synchronous write count */
- uint32_t z_async_writes_cnt; /* asynchronous write count */
mode_t z_mode; /* mode (cached) */
kmutex_t z_acl_lock; /* acl data lock */
zfs_acl_t *z_acl_cached; /* cached acl */
@@ -219,7 +218,9 @@ typedef struct znode {
*/
ZNODE_OS_FIELDS;
} znode_t;
+#endif
+#ifdef _KERNEL
/* Verifies the znode is valid. */
static inline int
zfs_verify_zp(znode_t *zp)
diff --git a/sys/contrib/openzfs/include/sys/zil.h b/sys/contrib/openzfs/include/sys/zil.h
index fa7945d8ab8b..da085998879b 100644
--- a/sys/contrib/openzfs/include/sys/zil.h
+++ b/sys/contrib/openzfs/include/sys/zil.h
@@ -456,7 +456,7 @@ typedef enum {
WR_NUM_STATES /* number of states */
} itx_wr_state_t;
-typedef void (*zil_callback_t)(void *data);
+typedef void (*zil_callback_t)(void *data, int err);
typedef struct itx {
list_node_t itx_node; /* linkage on zl_itx_list */
@@ -498,10 +498,13 @@ typedef struct zil_stats {
* (see zil_commit_writer_stall())
* - suspend: ZIL suspended
* (see zil_commit(), zil_get_commit_list())
+ * - crash: ZIL crashed
+ * (see zil_crash(), zil_commit(), ...)
*/
kstat_named_t zil_commit_error_count;
kstat_named_t zil_commit_stall_count;
kstat_named_t zil_commit_suspend_count;
+ kstat_named_t zil_commit_crash_count;
/*
* Number of transactions (reads, writes, renames, etc.)
@@ -549,6 +552,7 @@ typedef struct zil_sums {
wmsum_t zil_commit_error_count;
wmsum_t zil_commit_stall_count;
wmsum_t zil_commit_suspend_count;
+ wmsum_t zil_commit_crash_count;
wmsum_t zil_itx_count;
wmsum_t zil_itx_indirect_count;
wmsum_t zil_itx_indirect_bytes;
@@ -577,6 +581,25 @@ typedef struct zil_sums {
#define ZIL_STAT_BUMP(zil, stat) \
ZIL_STAT_INCR(zil, stat, 1);
+/*
+ * Flags for zil_commit_flags(). zil_commit() is a shortcut for
+ * zil_commit_flags(ZIL_COMMIT_FAILMODE), which is the most common use.
+ */
+typedef enum {
+ /*
+ * Try to commit the ZIL. If it fails, fall back to txg_wait_synced().
+ * If that fails, return EIO.
+ */
+ ZIL_COMMIT_NOW = 0,
+
+ /*
+ * Like ZIL_COMMIT_NOW, but if the ZIL commit fails because the pool
+ * suspended, act according to the pool's failmode= setting (wait for
+ * the pool to resume, or return EIO).
+ */
+ ZIL_COMMIT_FAILMODE = (1 << 1),
+} zil_commit_flag_t;
+
typedef int zil_parse_blk_func_t(zilog_t *zilog, const blkptr_t *bp, void *arg,
uint64_t txg);
typedef int zil_parse_lr_func_t(zilog_t *zilog, const lr_t *lr, void *arg,
@@ -606,14 +629,16 @@ extern boolean_t zil_destroy(zilog_t *zilog, boolean_t keep_first);
extern void zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx);
extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize);
-extern void zil_itx_destroy(itx_t *itx);
+extern void zil_itx_destroy(itx_t *itx, int err);
extern void zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
extern void zil_async_to_sync(zilog_t *zilog, uint64_t oid);
-extern void zil_commit(zilog_t *zilog, uint64_t oid);
-extern void zil_commit_impl(zilog_t *zilog, uint64_t oid);
extern void zil_remove_async(zilog_t *zilog, uint64_t oid);
+extern int zil_commit_flags(zilog_t *zilog, uint64_t oid,
+ zil_commit_flag_t flags);
+extern int __must_check zil_commit(zilog_t *zilog, uint64_t oid);
+
extern int zil_reset(const char *osname, void *txarg);
extern int zil_claim(struct dsl_pool *dp,
struct dsl_dataset *ds, void *txarg);
@@ -635,6 +660,8 @@ extern void zil_set_logbias(zilog_t *zilog, uint64_t slogval);
extern uint64_t zil_max_copied_data(zilog_t *zilog);
extern uint64_t zil_max_log_data(zilog_t *zilog, size_t hdrsize);
+extern itx_wr_state_t zil_write_state(zilog_t *zilog, uint64_t size,
+ uint32_t blocksize, boolean_t o_direct, boolean_t commit);
extern void zil_sums_init(zil_sums_t *zs);
extern void zil_sums_fini(zil_sums_t *zs);
@@ -642,6 +669,8 @@ extern void zil_kstat_values_update(zil_kstat_values_t *zs,
zil_sums_t *zil_sums);
extern int zil_replay_disable;
+extern uint_t zfs_immediate_write_sz;
+extern int zil_special_is_slog;
#ifdef __cplusplus
}
diff --git a/sys/contrib/openzfs/include/sys/zil_impl.h b/sys/contrib/openzfs/include/sys/zil_impl.h
index 252264b9eae9..ea1364a7e35a 100644
--- a/sys/contrib/openzfs/include/sys/zil_impl.h
+++ b/sys/contrib/openzfs/include/sys/zil_impl.h
@@ -41,8 +41,8 @@ extern "C" {
*
* An lwb will start out in the "new" state, and transition to the "opened"
* state via a call to zil_lwb_write_open() on first itx assignment. When
- * transitioning from "new" to "opened" the zilog's "zl_issuer_lock" must be
- * held.
+ * transitioning from "new" to "opened" the zilog's "zl_issuer_lock" and
+ * LWB's "lwb_lock" must be held.
*
* After the lwb is "opened", it can be assigned number of itxs and transition
* into the "closed" state via zil_lwb_write_close() when full or on timeout.
@@ -100,16 +100,22 @@ typedef enum {
* holding the "zl_issuer_lock". After the lwb is issued, the zilog's
* "zl_lock" is used to protect the lwb against concurrent access.
*/
+typedef enum {
+ LWB_FLAG_SLIM = (1<<0), /* log block has slim format */
+ LWB_FLAG_SLOG = (1<<1), /* lwb_blk is on SLOG device */
+ LWB_FLAG_CRASHED = (1<<2), /* lwb is on the crash list */
+} lwb_flag_t;
+
typedef struct lwb {
zilog_t *lwb_zilog; /* back pointer to log struct */
blkptr_t lwb_blk; /* on disk address of this log blk */
- boolean_t lwb_slim; /* log block has slim format */
- boolean_t lwb_slog; /* lwb_blk is on SLOG device */
+ lwb_flag_t lwb_flags; /* extra info about this lwb */
int lwb_error; /* log block allocation error */
int lwb_nmax; /* max bytes in the buffer */
int lwb_nused; /* # used bytes in buffer */
int lwb_nfilled; /* # filled bytes in buffer */
int lwb_sz; /* size of block and buffer */
+ int lwb_min_sz; /* min size for range allocation */
lwb_state_t lwb_state; /* the state of this lwb */
char *lwb_buf; /* log write buffer */
zio_t *lwb_child_zio; /* parent zio for children */
@@ -124,7 +130,7 @@ typedef struct lwb {
list_t lwb_itxs; /* list of itx's */
list_t lwb_waiters; /* list of zil_commit_waiter's */
avl_tree_t lwb_vdev_tree; /* vdevs to flush after lwb write */
- kmutex_t lwb_vdev_lock; /* protects lwb_vdev_tree */
+ kmutex_t lwb_lock; /* protects lwb_vdev_tree and size */
} lwb_t;
/*
@@ -149,7 +155,7 @@ typedef struct zil_commit_waiter {
list_node_t zcw_node; /* linkage in lwb_t:lwb_waiter list */
lwb_t *zcw_lwb; /* back pointer to lwb when linked */
boolean_t zcw_done; /* B_TRUE when "done", else B_FALSE */
- int zcw_zio_error; /* contains the zio io_error value */
+ int zcw_error; /* result to return from zil_commit() */
} zil_commit_waiter_t;
/*
@@ -221,6 +227,7 @@ struct zilog {
uint64_t zl_cur_left; /* current burst remaining size */
uint64_t zl_cur_max; /* biggest record in current burst */
list_t zl_lwb_list; /* in-flight log write list */
+ list_t zl_lwb_crash_list; /* log writes in-flight at crash */
avl_tree_t zl_bp_tree; /* track bps during log parse */
clock_t zl_replay_time; /* lbolt of when replay started */
uint64_t zl_replay_blks; /* number of log blocks replayed */
@@ -245,6 +252,9 @@ struct zilog {
*/
uint64_t zl_max_block_size;
+ /* After crash, txg to restart zil */
+ uint64_t zl_restart_txg;
+
/* Pointer for per dataset zil sums */
zil_sums_t *zl_sums;
};
diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h
index 78adca4d7d00..acb0a03a36b2 100644
--- a/sys/contrib/openzfs/include/sys/zio.h
+++ b/sys/contrib/openzfs/include/sys/zio.h
@@ -59,21 +59,37 @@ typedef struct zio_eck {
/*
* Gang block headers are self-checksumming and contain an array
- * of block pointers.
+ * of block pointers. The old gang block size has enough room for 3 blkptrs,
+ * while new gang blocks can store more.
+ *
+ * Layout:
+ * +--------+--------+--------+-----+---------+-----------+
+ * | | | | | | |
+ * | blkptr | blkptr | blkptr | ... | padding | zio_eck_t |
+ * | 1 | 2 | 3 | | | |
+ * +--------+--------+--------+-----+---------+-----------+
+ * 128B 128B 128B 88B 40B
*/
-#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE
-#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \
- sizeof (zio_eck_t)) / sizeof (blkptr_t))
-#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \
- sizeof (zio_eck_t) - \
- (SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
- sizeof (uint64_t))
-
-typedef struct zio_gbh {
- blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS];
- uint64_t zg_filler[SPA_GBH_FILLER];
- zio_eck_t zg_tail;
-} zio_gbh_phys_t;
+#define SPA_OLD_GANGBLOCKSIZE SPA_MINBLOCKSIZE
+typedef void zio_gbh_phys_t;
+
+static inline uint64_t
+gbh_nblkptrs(uint64_t size) {
+ ASSERT(IS_P2ALIGNED(size, sizeof (blkptr_t)));
+ return ((size - sizeof (zio_eck_t)) / sizeof (blkptr_t));
+}
+
+static inline zio_eck_t *
+gbh_eck(zio_gbh_phys_t *gbh, uint64_t size) {
+ ASSERT(IS_P2ALIGNED(size, sizeof (blkptr_t)));
+ return ((zio_eck_t *)((uintptr_t)gbh + (size_t)size -
+ sizeof (zio_eck_t)));
+}
+
+static inline blkptr_t *
+gbh_bp(zio_gbh_phys_t *gbh, int bp) {
+ return (&((blkptr_t *)gbh)[bp]);
+}
enum zio_checksum {
ZIO_CHECKSUM_INHERIT = 0,
@@ -196,7 +212,7 @@ typedef uint64_t zio_flag_t;
#define ZIO_FLAG_DONT_RETRY (1ULL << 10)
#define ZIO_FLAG_NODATA (1ULL << 12)
#define ZIO_FLAG_INDUCE_DAMAGE (1ULL << 13)
-#define ZIO_FLAG_IO_ALLOCATING (1ULL << 14)
+#define ZIO_FLAG_ALLOC_THROTTLED (1ULL << 14)
#define ZIO_FLAG_DDT_INHERIT (ZIO_FLAG_IO_RETRY - 1)
#define ZIO_FLAG_GANG_INHERIT (ZIO_FLAG_IO_RETRY - 1)
@@ -226,7 +242,7 @@ typedef uint64_t zio_flag_t;
#define ZIO_FLAG_NOPWRITE (1ULL << 29)
#define ZIO_FLAG_REEXECUTED (1ULL << 30)
#define ZIO_FLAG_DELEGATED (1ULL << 31)
-#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 32)
+#define ZIO_FLAG_PREALLOCATED (1ULL << 32)
#define ZIO_ALLOCATOR_NONE (-1)
#define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE)
@@ -345,25 +361,26 @@ struct zbookmark_err_phys {
(zb)->zb_blkid == ZB_ROOT_BLKID)
typedef struct zio_prop {
- enum zio_checksum zp_checksum;
- enum zio_compress zp_compress;
+ enum zio_checksum zp_checksum:8;
+ enum zio_compress zp_compress:8;
uint8_t zp_complevel;
uint8_t zp_level;
uint8_t zp_copies;
uint8_t zp_gang_copies;
- dmu_object_type_t zp_type;
- boolean_t zp_dedup;
- boolean_t zp_dedup_verify;
- boolean_t zp_nopwrite;
- boolean_t zp_brtwrite;
- boolean_t zp_encrypt;
- boolean_t zp_byteorder;
- boolean_t zp_direct_write;
+ dmu_object_type_t zp_type:8;
+ dmu_object_type_t zp_storage_type:8;
+ boolean_t zp_dedup:1;
+ boolean_t zp_dedup_verify:1;
+ boolean_t zp_nopwrite:1;
+ boolean_t zp_brtwrite:1;
+ boolean_t zp_encrypt:1;
+ boolean_t zp_byteorder:1;
+ boolean_t zp_direct_write:1;
+ boolean_t zp_rewrite:1;
+ uint32_t zp_zpl_smallblk;
uint8_t zp_salt[ZIO_DATA_SALT_LEN];
uint8_t zp_iv[ZIO_DATA_IV_LEN];
uint8_t zp_mac[ZIO_DATA_MAC_LEN];
- uint32_t zp_zpl_smallblk;
- dmu_object_type_t zp_storage_type;
} zio_prop_t;
typedef struct zio_cksum_report zio_cksum_report_t;
@@ -398,7 +415,9 @@ typedef struct zio_vsd_ops {
typedef struct zio_gang_node {
zio_gbh_phys_t *gn_gbh;
- struct zio_gang_node *gn_child[SPA_GBH_NBLKPTRS];
+ uint64_t gn_gangblocksize;
+ uint64_t gn_allocsize;
+ struct zio_gang_node *gn_child[];
} zio_gang_node_t;
typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp,
@@ -417,14 +436,16 @@ typedef struct zio_transform {
typedef zio_t *zio_pipe_stage_t(zio_t *zio);
/*
- * The io_reexecute flags are distinct from io_flags because the child must
- * be able to propagate them to the parent. The normal io_flags are local
- * to the zio, not protected by any lock, and not modifiable by children;
- * the reexecute flags are protected by io_lock, modifiable by children,
- * and always propagated -- even when ZIO_FLAG_DONT_PROPAGATE is set.
+ * The io_post flags describe additional actions that a parent IO should
+ * consider or perform on behalf of a child. They are distinct from io_flags
+ * because the child must be able to propagate them to the parent. The normal
+ * io_flags are local to the zio, not protected by any lock, and not modifiable
+ * by children; the reexecute flags are protected by io_lock, modifiable by
+ * children, and always propagated -- even when ZIO_FLAG_DONT_PROPAGATE is set.
*/
-#define ZIO_REEXECUTE_NOW 0x01
-#define ZIO_REEXECUTE_SUSPEND 0x02
+#define ZIO_POST_REEXECUTE (1 << 0)
+#define ZIO_POST_SUSPEND (1 << 1)
+#define ZIO_POST_DIO_CHKSUM_ERR (1 << 2)
/*
* The io_trim flags are used to specify the type of TRIM to perform. They
@@ -460,7 +481,7 @@ struct zio {
enum zio_child io_child_type;
enum trim_flag io_trim_flags;
zio_priority_t io_priority;
- uint8_t io_reexecute;
+ uint8_t io_post;
uint8_t io_state[ZIO_WAIT_TYPES];
uint64_t io_txg;
spa_t *io_spa;
@@ -602,7 +623,8 @@ extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp, zio_flag_t flags);
extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg,
- blkptr_t *new_bp, uint64_t size, boolean_t *slog);
+ blkptr_t *new_bp, uint64_t min_size, uint64_t max_size, boolean_t *slog,
+ boolean_t allow_larger);
extern void zio_flush(zio_t *zio, vdev_t *vd);
extern void zio_shrink(zio_t *zio, uint64_t size);
@@ -620,7 +642,6 @@ extern zio_t *zio_walk_parents(zio_t *cio, zio_link_t **);
extern zio_t *zio_walk_children(zio_t *pio, zio_link_t **);
extern zio_t *zio_unique_parent(zio_t *cio);
extern void zio_add_child(zio_t *pio, zio_t *cio);
-extern void zio_add_child_first(zio_t *pio, zio_t *cio);
extern void *zio_buf_alloc(size_t size);
extern void zio_buf_free(void *buf, size_t size);
@@ -697,6 +718,7 @@ extern void zio_handle_ignored_writes(zio_t *zio);
extern hrtime_t zio_handle_io_delay(zio_t *zio);
extern void zio_handle_import_delay(spa_t *spa, hrtime_t elapsed);
extern void zio_handle_export_delay(spa_t *spa, hrtime_t elapsed);
+extern hrtime_t zio_handle_ready_delay(zio_t *zio);
/*
* Checksum ereport functions
diff --git a/sys/contrib/openzfs/include/sys/zvol.h b/sys/contrib/openzfs/include/sys/zvol.h
index 32e703650935..5791246e99e4 100644
--- a/sys/contrib/openzfs/include/sys/zvol.h
+++ b/sys/contrib/openzfs/include/sys/zvol.h
@@ -36,8 +36,7 @@
#define SPEC_MAXOFFSET_T ((1LL << ((NBBY * sizeof (daddr32_t)) + \
DEV_BSHIFT - 1)) - 1)
-extern void zvol_create_minor(const char *);
-extern void zvol_create_minors_recursive(const char *);
+extern void zvol_create_minors(const char *);
extern void zvol_remove_minors(spa_t *, const char *, boolean_t);
extern void zvol_rename_minors(spa_t *, const char *, const char *, boolean_t);
@@ -54,7 +53,7 @@ extern int zvol_set_volsize(const char *, uint64_t);
extern int zvol_set_volthreading(const char *, boolean_t);
extern int zvol_set_common(const char *, zfs_prop_t, zprop_source_t, uint64_t);
extern int zvol_set_ro(const char *, boolean_t);
-extern zvol_state_handle_t *zvol_suspend(const char *);
+extern int zvol_suspend(const char *, zvol_state_handle_t **);
extern int zvol_resume(zvol_state_handle_t *);
extern void *zvol_tag(zvol_state_handle_t *);
diff --git a/sys/contrib/openzfs/include/sys/zvol_impl.h b/sys/contrib/openzfs/include/sys/zvol_impl.h
index 3a40b40f7f3d..5422e66832c0 100644
--- a/sys/contrib/openzfs/include/sys/zvol_impl.h
+++ b/sys/contrib/openzfs/include/sys/zvol_impl.h
@@ -20,7 +20,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2024, Klara, Inc.
+ * Copyright (c) 2024, 2025, Klara, Inc.
*/
#ifndef _SYS_ZVOL_IMPL_H
@@ -56,10 +56,37 @@ typedef struct zvol_state {
atomic_t zv_suspend_ref; /* refcount for suspend */
krwlock_t zv_suspend_lock; /* suspend lock */
kcondvar_t zv_removing_cv; /* ready to remove minor */
+ list_node_t zv_remove_node; /* node on removal list */
struct zvol_state_os *zv_zso; /* private platform state */
boolean_t zv_threading; /* volthreading property */
} zvol_state_t;
+/*
+ * zvol taskqs
+ */
+typedef struct zv_taskq {
+ uint_t tqs_cnt;
+ taskq_t **tqs_taskq;
+} zv_taskq_t;
+
+typedef struct zv_request_stack {
+ zvol_state_t *zv;
+ struct bio *bio;
+#ifdef __linux__
+ struct request *rq;
+#endif
+} zv_request_t;
+
+typedef struct zv_request_task {
+ zv_request_t zvr;
+ taskq_ent_t ent;
+} zv_request_task_t;
+
+/*
+ * Switch taskq at multiple of 512 MB offset. This can be set to a lower value
+ * to utilize more threads for small files but may affect prefetch hits.
+ */
+#define ZVOL_TASKQ_OFFSET_SHIFT 29
extern krwlock_t zvol_state_lock;
#define ZVOL_HT_SIZE 1024
@@ -67,8 +94,13 @@ extern struct hlist_head *zvol_htable;
#define ZVOL_HT_HEAD(hash) (&zvol_htable[(hash) & (ZVOL_HT_SIZE-1)])
extern zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE];
-extern unsigned int zvol_volmode;
extern unsigned int zvol_inhibit_dev;
+extern unsigned int zvol_prefetch_bytes;
+extern unsigned int zvol_volmode;
+extern unsigned int zvol_threads;
+extern unsigned int zvol_num_taskqs;
+extern unsigned int zvol_request_sync;
+extern zv_taskq_t zvol_taskqs;
/*
* platform independent functions exported to platform code
@@ -77,7 +109,6 @@ zvol_state_t *zvol_find_by_name_hash(const char *name,
uint64_t hash, int mode);
int zvol_first_open(zvol_state_t *zv, boolean_t readonly);
uint64_t zvol_name_hash(const char *name);
-void zvol_remove_minors_impl(const char *name);
void zvol_last_close(zvol_state_t *zv);
void zvol_insert(zvol_state_t *zv);
void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off,
@@ -94,16 +125,18 @@ int zvol_clone_range(zvol_state_handle_t *, uint64_t,
void zvol_log_clone_range(zilog_t *zilog, dmu_tx_t *tx, int txtype,
uint64_t off, uint64_t len, uint64_t blksz, const blkptr_t *bps,
size_t nbps);
+zv_request_task_t *zv_request_task_create(zv_request_t zvr);
+void zv_request_task_free(zv_request_task_t *task);
/*
* platform dependent functions exported to platform independent code
*/
void zvol_os_free(zvol_state_t *zv);
-void zvol_os_rename_minor(zvol_state_t *zv, const char *newname);
+int zvol_os_rename_minor(zvol_state_t *zv, const char *newname);
int zvol_os_create_minor(const char *name);
int zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize);
boolean_t zvol_os_is_zvol(const char *path);
-void zvol_os_clear_private(zvol_state_t *zv);
+void zvol_os_remove_minor(zvol_state_t *zv);
void zvol_os_set_disk_ro(zvol_state_t *zv, int flags);
void zvol_os_set_capacity(zvol_state_t *zv, uint64_t capacity);
diff --git a/sys/contrib/openzfs/include/zfeature_common.h b/sys/contrib/openzfs/include/zfeature_common.h
index 85537c1ae96e..56382ca85b55 100644
--- a/sys/contrib/openzfs/include/zfeature_common.h
+++ b/sys/contrib/openzfs/include/zfeature_common.h
@@ -87,6 +87,9 @@ typedef enum spa_feature {
SPA_FEATURE_FAST_DEDUP,
SPA_FEATURE_LONGNAME,
SPA_FEATURE_LARGE_MICROZAP,
+ SPA_FEATURE_DYNAMIC_GANG_HEADER,
+ SPA_FEATURE_BLOCK_CLONING_ENDIAN,
+ SPA_FEATURE_PHYSICAL_REWRITE,
SPA_FEATURES
} spa_feature_t;
@@ -103,7 +106,15 @@ typedef enum zfeature_flags {
/* Activate this feature at the same time it is enabled. */
ZFEATURE_FLAG_ACTIVATE_ON_ENABLE = (1 << 2),
/* Each dataset has a field set if it has ever used this feature. */
- ZFEATURE_FLAG_PER_DATASET = (1 << 3)
+ ZFEATURE_FLAG_PER_DATASET = (1 << 3),
+ /*
+ * This feature isn't enabled by zpool upgrade; it must be explicitly
+ * listed to be enabled. It will also be applied if listed in an
+ * explicitly provided compatibility list. This flag can be removed
+ * from a given feature once support is sufficiently widespread, or
+ * worries about backwards compatibility are no longer relevant.
+ */
+ ZFEATURE_FLAG_NO_UPGRADE = (1 << 4)
} zfeature_flags_t;
typedef enum zfeature_type {
diff --git a/sys/contrib/openzfs/include/zfs_crrd.h b/sys/contrib/openzfs/include/zfs_crrd.h
new file mode 100644
index 000000000000..ba192a2062ea
--- /dev/null
+++ b/sys/contrib/openzfs/include/zfs_crrd.h
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: CDDL-1.0
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2024 Klara Inc.
+ *
+ * This software was developed by
+ * Mariusz Zaborski <mariusz.zaborski@klarasystems.com>
+ * Fred Weigel <fred.weigel@klarasystems.com>
+ * under sponsorship from Wasabi Technology, Inc. and Klara Inc.
+ */
+
+#ifndef _CRRD_H_
+#define _CRRD_H_
+
+#define RRD_MAX_ENTRIES 256
+
+#define RRD_ENTRY_SIZE sizeof (uint64_t)
+#define RRD_STRUCT_ELEM (sizeof (rrd_t) / RRD_ENTRY_SIZE)
+
+typedef enum {
+ DBRRD_FLOOR,
+ DBRRD_CEILING
+} dbrrd_rounding_t;
+
+typedef struct {
+ uint64_t rrdd_time;
+ uint64_t rrdd_txg;
+} rrd_data_t;
+
+typedef struct {
+ uint64_t rrd_head; /* head (beginning) */
+ uint64_t rrd_tail; /* tail (end) */
+ uint64_t rrd_length;
+
+ rrd_data_t rrd_entries[RRD_MAX_ENTRIES];
+} rrd_t;
+
+typedef struct {
+ rrd_t dbr_minutes;
+ rrd_t dbr_days;
+ rrd_t dbr_months;
+} dbrrd_t;
+
+size_t rrd_len(rrd_t *rrd);
+
+const rrd_data_t *rrd_entry(rrd_t *r, size_t i);
+rrd_data_t *rrd_tail_entry(rrd_t *rrd);
+uint64_t rrd_tail(rrd_t *rrd);
+uint64_t rrd_get(rrd_t *rrd, size_t i);
+
+void rrd_add(rrd_t *rrd, hrtime_t time, uint64_t txg);
+
+void dbrrd_add(dbrrd_t *db, hrtime_t time, uint64_t txg);
+uint64_t dbrrd_query(dbrrd_t *r, hrtime_t tv, dbrrd_rounding_t rouding);
+
+#endif
diff --git a/sys/contrib/openzfs/include/zfs_deleg.h b/sys/contrib/openzfs/include/zfs_deleg.h
index f80fe46d35f8..a7bbf1620ad5 100644
--- a/sys/contrib/openzfs/include/zfs_deleg.h
+++ b/sys/contrib/openzfs/include/zfs_deleg.h
@@ -55,6 +55,7 @@ typedef enum {
ZFS_DELEG_NOTE_PROMOTE,
ZFS_DELEG_NOTE_RENAME,
ZFS_DELEG_NOTE_SEND,
+ ZFS_DELEG_NOTE_SEND_RAW,
ZFS_DELEG_NOTE_RECEIVE,
ZFS_DELEG_NOTE_ALLOW,
ZFS_DELEG_NOTE_USERPROP,
diff --git a/sys/contrib/openzfs/include/zfs_valstr.h b/sys/contrib/openzfs/include/zfs_valstr.h
index 295449396c51..27b4c9ebb239 100644
--- a/sys/contrib/openzfs/include/zfs_valstr.h
+++ b/sys/contrib/openzfs/include/zfs_valstr.h
@@ -73,6 +73,7 @@ extern "C" {
_ZFS_VALSTR_DECLARE_BITFIELD(zio_flag)
_ZFS_VALSTR_DECLARE_BITFIELD(zio_stage)
+_ZFS_VALSTR_DECLARE_ENUM(zio_type)
_ZFS_VALSTR_DECLARE_ENUM(zio_priority)
#undef _ZFS_VALSTR_DECLARE_BITFIELD